diff --git a/.env b/.env new file mode 100644 index 00000000000..e3ececc2e54 --- /dev/null +++ b/.env @@ -0,0 +1,4 @@ +APP_IMAGE=gdcc/dataverse:unstable +POSTGRES_VERSION=13 +DATAVERSE_DB_USER=dataverse +SOLR_VERSION=9.3.0 diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index cf982992c03..b297dfc4ee8 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -1,11 +1,13 @@ --- name: Bug report -about: Did you encounter something unexpected or incorrect in the Dataverse software? We'd like to hear about it! +about: Did you encounter something unexpected or incorrect in the Dataverse software? + We'd like to hear about it! title: '' labels: '' assignees: '' --- + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -XX:MaxPermSize=192m - -client - -Djava.awt.headless=true - -Djdk.corba.allowOutputStreamSubclass=true - -Djavax.xml.accessExternalSchema=all - -Djavax.management.builder.initial=com.sun.enterprise.v3.admin.AppServerMBeanServerBuilder - -XX:+UnlockDiagnosticVMOptions - -Djava.endorsed.dirs=${com.sun.aas.installRoot}/modules/endorsed${path.separator}${com.sun.aas.installRoot}/lib/endorsed - -Djava.security.policy=${com.sun.aas.instanceRoot}/config/server.policy - -Djava.security.auth.login.config=${com.sun.aas.instanceRoot}/config/login.conf - -Dcom.sun.enterprise.security.httpsOutboundKeyAlias=s1as - -Xmx512m - -Djavax.net.ssl.keyStore=${com.sun.aas.instanceRoot}/config/keystore.jks - -Djavax.net.ssl.trustStore=${com.sun.aas.instanceRoot}/config/cacerts.jks - -Djava.ext.dirs=${com.sun.aas.javaRoot}/lib/ext${path.separator}${com.sun.aas.javaRoot}/jre/lib/ext${path.separator}${com.sun.aas.instanceRoot}/lib/ext - -Djdbc.drivers=org.apache.derby.jdbc.ClientDriver - -DANTLR_USE_DIRECT_CLASS_LOADING=true - -Dcom.sun.enterprise.config.config_environment_factory_class=com.sun.enterprise.config.serverbeans.AppserverConfigEnvironmentFactory - - -Dorg.glassfish.additionalOSGiBundlesToStart=org.apache.felix.shell,org.apache.felix.gogo.runtime,org.apache.felix.gogo.shell,org.apache.felix.gogo.command,org.apache.felix.shell.remote,org.apache.felix.fileinstall - - - -Dosgi.shell.telnet.port=6666 - - -Dosgi.shell.telnet.maxconn=1 - - -Dosgi.shell.telnet.ip=127.0.0.1 - - -Dgosh.args=--nointeractive - - -Dfelix.fileinstall.dir=${com.sun.aas.installRoot}/modules/autostart/ - - -Dfelix.fileinstall.poll=5000 - - -Dfelix.fileinstall.log.level=2 - - -Dfelix.fileinstall.bundles.new.start=true - - -Dfelix.fileinstall.bundles.startTransient=true - - -Dfelix.fileinstall.disableConfigSave=false - - -XX:NewRatio=2 - - -Dcom.ctc.wstx.returnNullForDefaultNamespace=true - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -XX:MaxPermSize=192m - -server - -Djava.awt.headless=true - -Djdk.corba.allowOutputStreamSubclass=true - -XX:+UnlockDiagnosticVMOptions - -Djava.endorsed.dirs=${com.sun.aas.installRoot}/modules/endorsed${path.separator}${com.sun.aas.installRoot}/lib/endorsed - -Djava.security.policy=${com.sun.aas.instanceRoot}/config/server.policy - -Djava.security.auth.login.config=${com.sun.aas.instanceRoot}/config/login.conf - -Dcom.sun.enterprise.security.httpsOutboundKeyAlias=s1as - -Djavax.net.ssl.keyStore=${com.sun.aas.instanceRoot}/config/keystore.jks - -Djavax.net.ssl.trustStore=${com.sun.aas.instanceRoot}/config/cacerts.jks - -Djava.ext.dirs=${com.sun.aas.javaRoot}/lib/ext${path.separator}${com.sun.aas.javaRoot}/jre/lib/ext${path.separator}${com.sun.aas.instanceRoot}/lib/ext - -Djdbc.drivers=org.apache.derby.jdbc.ClientDriver - -DANTLR_USE_DIRECT_CLASS_LOADING=true - -Dcom.sun.enterprise.config.config_environment_factory_class=com.sun.enterprise.config.serverbeans.AppserverConfigEnvironmentFactory - -XX:NewRatio=2 - -Xmx512m - - -Dorg.glassfish.additionalOSGiBundlesToStart=org.apache.felix.shell,org.apache.felix.gogo.runtime,org.apache.felix.gogo.shell,org.apache.felix.gogo.command,org.apache.felix.fileinstall - - -Dosgi.shell.telnet.port=${OSGI_SHELL_TELNET_PORT} - - -Dosgi.shell.telnet.maxconn=1 - - -Dosgi.shell.telnet.ip=127.0.0.1 - - -Dgosh.args=--noshutdown -c noop=true - - -Dfelix.fileinstall.dir=${com.sun.aas.installRoot}/modules/autostart/ - - -Dfelix.fileinstall.poll=5000 - - -Dfelix.fileinstall.log.level=3 - - -Dfelix.fileinstall.bundles.new.start=true - - -Dfelix.fileinstall.bundles.startTransient=true - - -Dfelix.fileinstall.disableConfigSave=false - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/conf/docker-aio/dv/install/default.config b/conf/docker-aio/dv/install/default.config deleted file mode 100644 index 0b806a8714b..00000000000 --- a/conf/docker-aio/dv/install/default.config +++ /dev/null @@ -1,15 +0,0 @@ -HOST_DNS_ADDRESS localhost -GLASSFISH_DIRECTORY /opt/glassfish4 -ADMIN_EMAIL -MAIL_SERVER mail.hmdc.harvard.edu -POSTGRES_ADMIN_PASSWORD secret -POSTGRES_SERVER db -POSTGRES_PORT 5432 -POSTGRES_DATABASE dvndb -POSTGRES_USER dvnapp -POSTGRES_PASSWORD secret -SOLR_LOCATION idx -RSERVE_HOST localhost -RSERVE_PORT 6311 -RSERVE_USER rserve -RSERVE_PASSWORD rserve diff --git a/conf/docker-aio/dv/pg_hba.conf b/conf/docker-aio/dv/pg_hba.conf deleted file mode 100644 index 77feba5247d..00000000000 --- a/conf/docker-aio/dv/pg_hba.conf +++ /dev/null @@ -1,91 +0,0 @@ -# PostgreSQL Client Authentication Configuration File -# =================================================== -# -# Refer to the "Client Authentication" section in the PostgreSQL -# documentation for a complete description of this file. A short -# synopsis follows. -# -# This file controls: which hosts are allowed to connect, how clients -# are authenticated, which PostgreSQL user names they can use, which -# databases they can access. Records take one of these forms: -# -# local DATABASE USER METHOD [OPTIONS] -# host DATABASE USER ADDRESS METHOD [OPTIONS] -# hostssl DATABASE USER ADDRESS METHOD [OPTIONS] -# hostnossl DATABASE USER ADDRESS METHOD [OPTIONS] -# -# (The uppercase items must be replaced by actual values.) -# -# The first field is the connection type: "local" is a Unix-domain -# socket, "host" is either a plain or SSL-encrypted TCP/IP socket, -# "hostssl" is an SSL-encrypted TCP/IP socket, and "hostnossl" is a -# plain TCP/IP socket. -# -# DATABASE can be "all", "sameuser", "samerole", "replication", a -# database name, or a comma-separated list thereof. The "all" -# keyword does not match "replication". Access to replication -# must be enabled in a separate record (see example below). -# -# USER can be "all", a user name, a group name prefixed with "+", or a -# comma-separated list thereof. In both the DATABASE and USER fields -# you can also write a file name prefixed with "@" to include names -# from a separate file. -# -# ADDRESS specifies the set of hosts the record matches. It can be a -# host name, or it is made up of an IP address and a CIDR mask that is -# an integer (between 0 and 32 (IPv4) or 128 (IPv6) inclusive) that -# specifies the number of significant bits in the mask. A host name -# that starts with a dot (.) matches a suffix of the actual host name. -# Alternatively, you can write an IP address and netmask in separate -# columns to specify the set of hosts. Instead of a CIDR-address, you -# can write "samehost" to match any of the server's own IP addresses, -# or "samenet" to match any address in any subnet that the server is -# directly connected to. -# -# METHOD can be "trust", "reject", "md5", "password", "gss", "sspi", -# "krb5", "ident", "peer", "pam", "ldap", "radius" or "cert". Note that -# "password" sends passwords in clear text; "md5" is preferred since -# it sends encrypted passwords. -# -# OPTIONS are a set of options for the authentication in the format -# NAME=VALUE. The available options depend on the different -# authentication methods -- refer to the "Client Authentication" -# section in the documentation for a list of which options are -# available for which authentication methods. -# -# Database and user names containing spaces, commas, quotes and other -# special characters must be quoted. Quoting one of the keywords -# "all", "sameuser", "samerole" or "replication" makes the name lose -# its special character, and just match a database or username with -# that name. -# -# This file is read on server startup and when the postmaster receives -# a SIGHUP signal. If you edit the file on a running system, you have -# to SIGHUP the postmaster for the changes to take effect. You can -# use "pg_ctl reload" to do that. - -# Put your actual configuration here -# ---------------------------------- -# -# If you want to allow non-local connections, you need to add more -# "host" records. In that case you will also need to make PostgreSQL -# listen on a non-local interface via the listen_addresses -# configuration parameter, or via the -i or -h command line switches. - - - -# TYPE DATABASE USER ADDRESS METHOD - -# "local" is for Unix domain socket connections only -#local all all peer -local all all trust -# IPv4 local connections: -#host all all 127.0.0.1/32 trust -host all all 0.0.0.0/0 trust -# IPv6 local connections: -host all all ::1/128 trust -# Allow replication connections from localhost, by a user with the -# replication privilege. -#local replication postgres peer -#host replication postgres 127.0.0.1/32 ident -#host replication postgres ::1/128 ident diff --git a/conf/docker-aio/entrypoint.bash b/conf/docker-aio/entrypoint.bash deleted file mode 100755 index 236bb30f67a..00000000000 --- a/conf/docker-aio/entrypoint.bash +++ /dev/null @@ -1,16 +0,0 @@ -#!/usr/bin/env bash -export LANG=en_US.UTF-8 -sudo -u postgres /usr/pgsql-13/bin/pg_ctl start -D /var/lib/pgsql/13/data & -cd /opt/solr-8.11.1/ -# TODO: Run Solr as non-root and remove "-force". -bin/solr start -force -bin/solr create_core -c collection1 -d server/solr/collection1/conf -force - -# start apache, in both foreground and background... -apachectl -DFOREGROUND & - -# TODO: Run Payara as non-root. -cd /opt/payara5 -bin/asadmin start-domain --debug -sleep infinity - diff --git a/conf/docker-aio/httpd.conf b/conf/docker-aio/httpd.conf deleted file mode 100644 index 85c851d785f..00000000000 --- a/conf/docker-aio/httpd.conf +++ /dev/null @@ -1,27 +0,0 @@ - -Include conf.d/*.conf -Include conf.modules.d/*.conf -ServerName localhost -Listen 80 443 -PidFile run/httpd.pid -DocumentRoot "/var/www/html" -TypesConfig /etc/mime.types -User apache -Group apache - - - ServerName localhost - LogLevel debug - ErrorLog logs/error_log - LogFormat "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"" combined - CustomLog logs/access_log combined - - # proxy config (aka - what to send to glassfish or not) - ProxyPassMatch ^/Shibboleth.sso ! - ProxyPassMatch ^/shibboleth-ds ! - # pass everything else to Glassfish - ProxyPass / ajp://localhost:8009/ -# glassfish can be slow sometimes - ProxyTimeout 300 - - diff --git a/conf/docker-aio/install.bash b/conf/docker-aio/install.bash deleted file mode 100755 index 2b3275ad830..00000000000 --- a/conf/docker-aio/install.bash +++ /dev/null @@ -1,10 +0,0 @@ -#!/usr/bin/env bash -sudo -u postgres createuser --superuser dvnapp -#./entrypoint.bash & -unzip dvinstall.zip -cd dvinstall/ -echo "beginning installer" -./install -admin_email=dvAdmin@mailinator.com -y -f > install.out 2> install.err - -echo "installer complete" -cat install.err diff --git a/conf/docker-aio/prep_it.bash b/conf/docker-aio/prep_it.bash deleted file mode 100755 index adb257e43b1..00000000000 --- a/conf/docker-aio/prep_it.bash +++ /dev/null @@ -1,55 +0,0 @@ -#!/usr/bin/env bash - -# run through all the steps to setup docker-aio to run integration tests - -# hard-codes several assumptions: image is named dv0, container is named dv, port is 8084 - -# glassfish healthy/ready retries -n_wait=5 - -cd conf/docker-aio -./0prep_deps.sh -./1prep.sh -docker build -t dv0 -f c8.dockerfile . -# cleanup from previous runs if necessary -docker rm -f dv -# start container -docker run -d -p 8084:80 -p 8083:8080 -p 9010:9009 --name dv dv0 -# wait for glassfish to be healthy -i_wait=0 -d_wait=10 -while [ $i_wait -lt $n_wait ] -do - h=`docker inspect -f "{{.State.Health.Status}}" dv` - if [ "healthy" == "${h}" ]; then - break - else - sleep $d_wait - fi - i_wait=$(( $i_wait + 1 )) - -done -# try setupIT.bash -docker exec dv /opt/dv/setupIT.bash -err=$? -if [ $err -ne 0 ]; then - echo "error - setupIT failure" - exit 1 -fi -# configure DOI provider based on docker build arguments / environmental variables -docker exec dv /opt/dv/configure_doi.bash -err=$? -if [ $err -ne 0 ]; then - echo "error - DOI configuration failure" - exit 1 -fi -# handle config for the private url test (and things like publishing...) -./seturl.bash - - -cd ../.. -#echo "docker-aio ready to run integration tests ($i_retry)" -echo "docker-aio ready to run integration tests" -curl http://localhost:8084/api/info/version -echo $? - diff --git a/conf/docker-aio/readme.md b/conf/docker-aio/readme.md deleted file mode 100644 index ef4d3626cf0..00000000000 --- a/conf/docker-aio/readme.md +++ /dev/null @@ -1,60 +0,0 @@ -# Docker All-In-One - -First pass docker all-in-one image, intended for running integration tests against. -Also usable for normal development and system evaluation; not intended for production. - -### Requirements: - - java11 compiler, maven, make, wget, docker - -### Quickstart: - - in the root of the repository, run `./conf/docker-aio/prep_it.bash` - - if using DataCite test credentials, update the build args appropriately. - - if all goes well, you should see the results of the `api/info/version` endpoint, including the deployed build (eg `{"status":"OK","data":{"version":"4.8.6","build":"develop-c3e9f40"}}`). If not, you may need to read the non-quickstart instructions. - - run integration tests: `./conf/docker-aio/run-test-suite.sh` - ----- - -## More in-depth documentation: - - -### Initial setup (aka - do once): -- `cd conf/docker-aio` and run `./0prep_deps.sh` to created Payara and Solr tarballs in `conf/docker-aio/dv/deps`. - -### Per-build: - -> Note: If you encounter any issues, see the Troubleshooting section at the end of this document. - -#### Setup - -- `cd conf/docker-aio`, and run `./1prep.sh` to copy files for integration test data into docker build context; `1prep.sh` will also build the war file and installation zip file -- build the docker image: `docker build -t dv0 -f c8.dockerfile .` - -- Run image: `docker run -d -p 8083:8080 -p 8084:80 --name dv dv0` (aka - forward port 8083 locally to 8080 in the container for payara, and 8084 to 80 for apache); if you'd like to connect a java debugger to payara, use `docker run -d -p 8083:8080 -p 8084:80 -p 9010:9009 --name dv dv0` - -- Installation (integration test): `docker exec dv /opt/dv/setupIT.bash` - (Note that it's possible to customize the installation by editing `conf/docker-aio/default.config` and running `docker exec dv /opt/dv/install.bash` but for the purposes of integration testing, the `setupIT.bash` script above works fine.) - -- update `dataverse.siteUrl` (appears only necessary for `DatasetsIT.testPrivateUrl`): `docker exec dv /usr/local/glassfish4/bin/asadmin create-jvm-options "-Ddataverse.siteUrl=http\://localhost\:8084"` (or use the provided `seturl.bash`) - -#### Run integration tests: - -First, cd back to the root of the repo where the `pom.xml` file is (`cd ../..` assuming you're still in the `conf/docker-aio` directory). Then run the test suite with script below: - -`conf/docker-aio/run-test-suite.sh` - -There isn't any strict requirement on the local port (8083, 8084 in this doc), the name of the image (dv0) or container (dv), these can be changed as desired as long as they are consistent. - -### Troubleshooting Notes: - -* If Dataverse' build fails due to an error about `Module` being ambiguous, you might be using a Java 9 compiler. - -* If you see an error like this: - ``` - docker: Error response from daemon: Conflict. The container name "/dv" is already in use by container "5f72a45b68c86c7b0f4305b83ce7d663020329ea4e30fa2a3ce9ddb05223533d" - You have to remove (or rename) that container to be able to reuse that name. - ``` - run something like `docker ps -a | grep dv` to see the container left over from the last run and something like `docker rm 5f72a45b68c8` to remove it. Then try the `docker run` command above again. - -* `empty reply from server` or `Failed to connect to ::1: Cannot assign requested address` tend to indicate either that you haven't given payara enough time to start, or your docker setup is in an inconsistent state and should probably be restarted. - -* For manually fiddling around with the created dataverse, use user `dataverseAdmin` with password `admin1`. diff --git a/conf/docker-aio/run-test-suite.sh b/conf/docker-aio/run-test-suite.sh deleted file mode 100755 index 39809a7a50e..00000000000 --- a/conf/docker-aio/run-test-suite.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/bin/bash -# This is the canonical list of which "IT" tests are expected to pass. - -dvurl=$1 -if [ -z "$dvurl" ]; then - dvurl="http://localhost:8084" -fi - -integrationtests=$( - ServerName localhost - LogLevel debug - ErrorLog logs/error_log - LogFormat "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"" combined - CustomLog logs/access_log combined - - # proxy config (aka - what to send to glassfish or not) - ProxyPassMatch ^/Shibboleth.sso ! - ProxyPassMatch ^/shibboleth-ds ! - # pass everything else to Glassfish - ProxyPass / ajp://localhost:8009/ -# glassfish can be slow sometimes - ProxyTimeout 300 - - diff --git a/conf/docker-aio/testscripts/db.sh b/conf/docker-aio/testscripts/db.sh deleted file mode 100755 index f0a9e409fd7..00000000000 --- a/conf/docker-aio/testscripts/db.sh +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/sh -psql -U postgres -c "CREATE ROLE dvnapp PASSWORD 'secret' SUPERUSER CREATEDB CREATEROLE INHERIT LOGIN" template1 -psql -U dvnapp -c 'CREATE DATABASE "dvndb" WITH OWNER = "dvnapp"' template1 diff --git a/conf/docker-aio/testscripts/install b/conf/docker-aio/testscripts/install deleted file mode 100755 index f87f180b554..00000000000 --- a/conf/docker-aio/testscripts/install +++ /dev/null @@ -1,21 +0,0 @@ -#!/bin/sh -export HOST_ADDRESS=localhost -export GLASSFISH_ROOT=/opt/payara5 -export FILES_DIR=/opt/payara5/glassfish/domains/domain1/files -export DB_NAME=dvndb -export DB_PORT=5432 -export DB_HOST=localhost -export DB_USER=dvnapp -export DB_PASS=secret -export RSERVE_HOST=localhost -export RSERVE_PORT=6311 -export RSERVE_USER=rserve -export RSERVE_PASS=rserve -export SMTP_SERVER=localhost -export MEM_HEAP_SIZE=2048 -export GLASSFISH_DOMAIN=domain1 -cd scripts/installer -#cp ../../conf/jhove/jhove.conf $GLASSFISH_ROOT/glassfish/domains/$GLASSFISH_DOMAIN/config/jhove.conf -cp /opt/dv/testdata/jhove.conf $GLASSFISH_ROOT/glassfish/domains/$GLASSFISH_DOMAIN/config/jhove.conf -cp /opt/dv/testdata/jhoveConfig.xsd $GLASSFISH_ROOT/glassfish/domains/$GLASSFISH_DOMAIN/config/jhoveConfig.xsd -./as-setup.sh dvndb diff --git a/conf/docker-aio/testscripts/post b/conf/docker-aio/testscripts/post deleted file mode 100755 index 0f292109d31..00000000000 --- a/conf/docker-aio/testscripts/post +++ /dev/null @@ -1,13 +0,0 @@ -#/bin/sh -cd scripts/api -./setup-all.sh --insecure -p=admin1 | tee /tmp/setup-all.sh.out -cd ../.. -psql -U dvnapp dvndb -f doc/sphinx-guides/source/_static/util/createsequence.sql -scripts/search/tests/publish-dataverse-root -#git checkout scripts/api/data/dv-root.json -scripts/search/tests/grant-authusers-add-on-root -scripts/search/populate-users -scripts/search/create-users -scripts/search/tests/create-all-and-test -scripts/search/tests/publish-spruce1-and-test -#java -jar downloads/schemaSpy_5.0.0.jar -t pgsql -host localhost -db dvndb -u postgres -p secret -s public -dp scripts/installer/pgdriver/postgresql-9.1-902.jdbc4.jar -o /var/www/html/schemaspy/latest diff --git a/conf/docker-dcm/.gitignore b/conf/docker-dcm/.gitignore deleted file mode 100644 index ac39981ce6a..00000000000 --- a/conf/docker-dcm/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -*.rpm -upload*.bash diff --git a/conf/docker-dcm/0prep.sh b/conf/docker-dcm/0prep.sh deleted file mode 100755 index 300aa39d567..00000000000 --- a/conf/docker-dcm/0prep.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/sh -DCM_VERSION=0.5 -RSAL_VERSION=0.1 - -if [ ! -e dcm-${DCM_VERSION}-0.noarch.rpm ]; then - wget https://github.com/sbgrid/data-capture-module/releases/download/${DCM_VERSION}/dcm-${DCM_VERSION}-0.noarch.rpm -fi - -if [ ! -e rsal-${RSAL_VERSION}-0.noarch.rpm ] ;then - wget https://github.com/sbgrid/rsal/releases/download/${RSAL_VERSION}/rsal-${RSAL_VERSION}-0.noarch.rpm -fi diff --git a/conf/docker-dcm/c6client.dockerfile b/conf/docker-dcm/c6client.dockerfile deleted file mode 100644 index e4d1ae7da82..00000000000 --- a/conf/docker-dcm/c6client.dockerfile +++ /dev/null @@ -1,7 +0,0 @@ -# build from repo root -FROM centos:6 -RUN yum install -y epel-release -RUN yum install -y rsync openssh-clients jq curl wget lynx -RUN useradd depositor -USER depositor -WORKDIR /home/depositor diff --git a/conf/docker-dcm/cfg/dcm/bashrc b/conf/docker-dcm/cfg/dcm/bashrc deleted file mode 100644 index 07137ab8471..00000000000 --- a/conf/docker-dcm/cfg/dcm/bashrc +++ /dev/null @@ -1,18 +0,0 @@ -# .bashrc - -# User specific aliases and functions - -alias rm='rm -i' -alias cp='cp -i' -alias mv='mv -i' - -# Source global definitions -if [ -f /etc/bashrc ]; then - . /etc/bashrc -fi - -# these are dummy values, obviously -export UPLOADHOST=dcmsrv -export DVAPIKEY=burrito -export DVHOSTINT=dvsrv -export DVHOST=dvsrv diff --git a/conf/docker-dcm/cfg/dcm/entrypoint-dcm.sh b/conf/docker-dcm/cfg/dcm/entrypoint-dcm.sh deleted file mode 100755 index 0db674bfac4..00000000000 --- a/conf/docker-dcm/cfg/dcm/entrypoint-dcm.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/sh - -/etc/init.d/sshd start -/etc/init.d/redis start -/etc/init.d/rq start -lighttpd -D -f /etc/lighttpd/lighttpd.conf diff --git a/conf/docker-dcm/cfg/dcm/healthcheck-dcm.sh b/conf/docker-dcm/cfg/dcm/healthcheck-dcm.sh deleted file mode 100755 index 3964a79391e..00000000000 --- a/conf/docker-dcm/cfg/dcm/healthcheck-dcm.sh +++ /dev/null @@ -1,14 +0,0 @@ -#!/bin/sh - -r_rq=`/etc/init.d/rq status` -if [ "rq_worker running" != "$r_rq" ]; then - echo "rq failed" - exit 1 -fi -r_www=`/etc/init.d/lighttpd status` -e_www=$? -if [ 0 -ne $e_www ]; then - echo "lighttpd failed" - exit 2 -fi - diff --git a/conf/docker-dcm/cfg/dcm/rq-init-d b/conf/docker-dcm/cfg/dcm/rq-init-d deleted file mode 100755 index 093cd894376..00000000000 --- a/conf/docker-dcm/cfg/dcm/rq-init-d +++ /dev/null @@ -1,57 +0,0 @@ -#!/bin/bash - -# chkconfig: 2345 90 60 -# description: rq worker script (single worker process) - -# example rq configuration file (to be placed in /etc/init.d) - -# works on cent6 - -DAEMON=rq_worker -DAEMON_PATH=/opt/dcm/gen/ -export UPLOADHOST=dcmsrv -VIRTUALENV= -LOGFILE=/var/log/${DAEMON}.log -PIDFILE=/var/run/${DAEMON}.pid - -case "$1" in -start) - printf "%-50s" "starting $DAEMON..." - cd $DAEMON_PATH - if [ ! -z "$VIRTUALENV" ]; then - source $VIRTUALENV/bin/activate - fi - rq worker normal --pid $PIDFILE > ${LOGFILE} 2>&1 & -;; -status) - if [ -f $PIDFILE ]; then - PID=`cat $PIDFILE` - if [ -z "`ps axf | grep ${PID} | grep -v grep`" ]; then - printf "%s\n" "$DAEMON not running, but PID file ($PIDFILE) exists" - else - echo "$DAEMON running" - fi - else - printf "%s\n" "$DAEMON not running" - fi -;; -stop) - printf "%-50s" "stopping $DAEMON" - if [ -f $PIDFILE ]; then - PID=`cat $PIDFILE` - kill -HUP $PID - rm -f $PIDFILE - else - printf "%s\n" "no PID file ($PIDFILE) - maybe not running" - fi -;; -restart) - $0 stop - $0 start -;; - -*) - echo "Usage: $0 {status|start|stop|restart}" - exit 1 -esac - diff --git a/conf/docker-dcm/cfg/dcm/test_install.sh b/conf/docker-dcm/cfg/dcm/test_install.sh deleted file mode 100755 index 3026ceb9fa5..00000000000 --- a/conf/docker-dcm/cfg/dcm/test_install.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/sh - -cp /etc/dcm/rq-init-d /etc/init.d/rq -cp /etc/dcm/lighttpd-conf-dcm /etc/lighttpd/lighttpd.conf -cp /etc/dcm/lighttpd-modules-dcm /etc/lighttpd/modules.conf -cp /etc/dcm/dcm-rssh.conf /etc/rssh.conf - diff --git a/conf/docker-dcm/cfg/rsal/entrypoint-rsal.sh b/conf/docker-dcm/cfg/rsal/entrypoint-rsal.sh deleted file mode 100755 index 92466c3bd4b..00000000000 --- a/conf/docker-dcm/cfg/rsal/entrypoint-rsal.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/sh - -#/usr/bin/rsync --no-detach --daemon --config /etc/rsyncd.conf -/usr/bin/rsync --daemon --config /etc/rsyncd.conf -lighttpd -D -f /etc/lighttpd/lighttpd.conf diff --git a/conf/docker-dcm/cfg/rsal/lighttpd-modules.conf b/conf/docker-dcm/cfg/rsal/lighttpd-modules.conf deleted file mode 100644 index cdb1438af82..00000000000 --- a/conf/docker-dcm/cfg/rsal/lighttpd-modules.conf +++ /dev/null @@ -1,174 +0,0 @@ -####################################################################### -## -## ansible managed -# -## Modules to load -## ----------------- -## -## at least mod_access and mod_accesslog should be loaded -## all other module should only be loaded if really neccesary -## -## - saves some time -## - saves memory -## -## the default module set contains: -## -## "mod_indexfile", "mod_dirlisting", "mod_staticfile" -## -## you dont have to include those modules in your list -## -## Modules, which are pulled in via conf.d/*.conf -## -## NOTE: the order of modules is important. -## -## - mod_accesslog -> conf.d/access_log.conf -## - mod_compress -> conf.d/compress.conf -## - mod_status -> conf.d/status.conf -## - mod_webdav -> conf.d/webdav.conf -## - mod_cml -> conf.d/cml.conf -## - mod_evhost -> conf.d/evhost.conf -## - mod_simple_vhost -> conf.d/simple_vhost.conf -## - mod_mysql_vhost -> conf.d/mysql_vhost.conf -## - mod_trigger_b4_dl -> conf.d/trigger_b4_dl.conf -## - mod_userdir -> conf.d/userdir.conf -## - mod_rrdtool -> conf.d/rrdtool.conf -## - mod_ssi -> conf.d/ssi.conf -## - mod_cgi -> conf.d/cgi.conf -## - mod_scgi -> conf.d/scgi.conf -## - mod_fastcgi -> conf.d/fastcgi.conf -## - mod_proxy -> conf.d/proxy.conf -## - mod_secdownload -> conf.d/secdownload.conf -## - mod_expire -> conf.d/expire.conf -## - -server.modules = ( - "mod_access", -# "mod_alias", -# "mod_auth", -# "mod_evasive", -# "mod_redirect", -# "mod_rewrite", -# "mod_setenv", -# "mod_usertrack", -) - -## -####################################################################### - -####################################################################### -## -## Config for various Modules -## - -## -## mod_ssi -## -#include "conf.d/ssi.conf" - -## -## mod_status -## -#include "conf.d/status.conf" - -## -## mod_webdav -## -#include "conf.d/webdav.conf" - -## -## mod_compress -## -#include "conf.d/compress.conf" - -## -## mod_userdir -## -#include "conf.d/userdir.conf" - -## -## mod_magnet -## -#include "conf.d/magnet.conf" - -## -## mod_cml -## -#include "conf.d/cml.conf" - -## -## mod_rrdtool -## -#include "conf.d/rrdtool.conf" - -## -## mod_proxy -## -#include "conf.d/proxy.conf" - -## -## mod_expire -## -#include "conf.d/expire.conf" - -## -## mod_secdownload -## -#include "conf.d/secdownload.conf" - -## -####################################################################### - -####################################################################### -## -## CGI modules -## - -## -## SCGI (mod_scgi) -## -#include "conf.d/scgi.conf" - -## -## FastCGI (mod_fastcgi) -## -#include "conf.d/fastcgi.conf" - -## -## plain old CGI (mod_cgi) -## -include "conf.d/cgi.conf" - -## -####################################################################### - -####################################################################### -## -## VHost Modules -## -## Only load ONE of them! -## ======================== -## - -## -## You can use conditionals for vhosts aswell. -## -## see http://www.lighttpd.net/documentation/configuration.html -## - -## -## mod_evhost -## -#include "conf.d/evhost.conf" - -## -## mod_simple_vhost -## -#include "conf.d/simple_vhost.conf" - -## -## mod_mysql_vhost -## -#include "conf.d/mysql_vhost.conf" - -## -####################################################################### diff --git a/conf/docker-dcm/cfg/rsal/lighttpd.conf b/conf/docker-dcm/cfg/rsal/lighttpd.conf deleted file mode 100644 index 5874d60eb48..00000000000 --- a/conf/docker-dcm/cfg/rsal/lighttpd.conf +++ /dev/null @@ -1,43 +0,0 @@ -## lighttpd configuration customized for RSAL; centos7 - -# refuse connections not from frontend or localhost -# DO NOT HAVE THIS OPEN TO THE WORLD!!! -#$HTTP["remoteip"] !~ "192.168.2.2|127.0.0.1" { -#url.access-deny = ("") -#} -server.breakagelog = "/var/log/lighttpd/breakage.log" - -####################################################################### -## -## Some Variable definition which will make chrooting easier. -## -## if you add a variable here. Add the corresponding variable in the -## chroot example aswell. -## -var.log_root = "/var/log/lighttpd" -var.server_root = "/opt/rsal/api" -var.state_dir = "/var/run" -var.home_dir = "/var/lib/lighttpd" -var.conf_dir = "/etc/lighttpd" - -var.cache_dir = "/var/cache/lighttpd" -var.socket_dir = home_dir + "/sockets" -include "modules.conf" -server.port = 80 -server.use-ipv6 = "disable" -server.username = "lighttpd" -server.groupname = "lighttpd" -server.document-root = server_root -server.pid-file = state_dir + "/lighttpd.pid" -server.errorlog = log_root + "/error.log" -include "conf.d/access_log.conf" -include "conf.d/debug.conf" -server.event-handler = "linux-sysepoll" -server.network-backend = "linux-sendfile" -server.stat-cache-engine = "simple" -server.max-connections = 1024 -static-file.exclude-extensions = ( ".php", ".pl", ".fcgi", ".scgi" ) -include "conf.d/mime.conf" -include "conf.d/dirlisting.conf" -server.follow-symlink = "enable" -server.upload-dirs = ( "/var/tmp" ) diff --git a/conf/docker-dcm/cfg/rsal/rsyncd.conf b/conf/docker-dcm/cfg/rsal/rsyncd.conf deleted file mode 100644 index 5a15ab28a12..00000000000 --- a/conf/docker-dcm/cfg/rsal/rsyncd.conf +++ /dev/null @@ -1,8 +0,0 @@ -lock file=/var/run/rsync.lock -log file=/var/log/rsyncd.log -pid file=/var/log/rsyncd.pid - -[10.5072] - path=/public/ - read only=yes - diff --git a/conf/docker-dcm/configure_dcm.sh b/conf/docker-dcm/configure_dcm.sh deleted file mode 100755 index 5b65b0a0314..00000000000 --- a/conf/docker-dcm/configure_dcm.sh +++ /dev/null @@ -1,26 +0,0 @@ -#!/bin/sh - -echo "dcm configs on dv side to be done" - -# in homage to dataverse traditions, reset to insecure "burrito" admin API key -sudo -u postgres psql -c "update apitoken set tokenstring='burrito' where id=1;" dvndb -sudo -u postgres psql -c "update authenticateduser set superuser='t' where id=1;" dvndb - -# dataverse configs for DCM -curl -X PUT -d "SHA-1" "http://localhost:8080/api/admin/settings/:FileFixityChecksumAlgorithm" -curl -X PUT "http://localhost:8080/api/admin/settings/:UploadMethods" -d "dcm/rsync+ssh" -curl -X PUT "http://localhost:8080/api/admin/settings/:DataCaptureModuleUrl" -d "http://dcmsrv" - -# configure for RSAL downloads; but no workflows or RSAL yet -curl -X PUT "http://localhost:8080/api/admin/settings/:DownloadMethods" -d "rsal/rsync" - -# publish root dataverse -curl -X POST -H "X-Dataverse-key: burrito" "http://localhost:8080/api/dataverses/root/actions/:publish" - -# symlink `hold` volume -mkdir -p /usr/local/glassfish4/glassfish/domains/domain1/files/ -ln -s /hold /usr/local/glassfish4/glassfish/domains/domain1/files/10.5072 - -# need to set siteUrl -cd /usr/local/glassfish4 -bin/asadmin create-jvm-options "\"-Ddataverse.siteUrl=http\://localhost\:8084\"" diff --git a/conf/docker-dcm/configure_rsal.sh b/conf/docker-dcm/configure_rsal.sh deleted file mode 100755 index 5db43a34381..00000000000 --- a/conf/docker-dcm/configure_rsal.sh +++ /dev/null @@ -1,21 +0,0 @@ -#!/bin/sh - -fn=rsal-workflow2.json -# needs an actual IP (vs a hostname) for whitelist -rsalip=`dig +short rsalsrv` - -# create workflow -curl -s -X POST -H "Content-type: application/json" -d @${fn} "http://localhost:8080/api/admin/workflows" - -# put rsal on the whitelist -curl -X PUT -d "127.0.0.1;${rsalip}" "http://localhost:8080/api/admin/workflows/ip-whitelist" - -# set workflow as default -curl -X PUT -d "1" "http://localhost:8080/api/admin/workflows/default/PrePublishDataset" - -# local access path -curl -X PUT -d "/hpc/storage" "http://localhost:8080/api/admin/settings/:LocalDataAccessPath" - -# storage sites -curl -X POST -H "Content-type: application/json" --upload-file site-primary.json "http://localhost:8080/api/admin/storageSites" -curl -X POST -H "Content-type: application/json" --upload-file site-remote.json "http://localhost:8080/api/admin/storageSites" diff --git a/conf/docker-dcm/create.bash b/conf/docker-dcm/create.bash deleted file mode 100755 index 58ae6e61dc7..00000000000 --- a/conf/docker-dcm/create.bash +++ /dev/null @@ -1,22 +0,0 @@ -#!/usr/bin/env bash - - -# user creates dataset -k_d=burrito -dv_d=root -h=http://dvsrv - -fn=dataset.json -#dset_id=`curl -s -H "X-Dataverse-key: $k_d" -X POST --upload-file $fn $h/api/dataverses/$dv_d/datasets | jq .data.id` -r=`curl -s -H "X-Dataverse-key: $k_d" -X POST --upload-file $fn $h/api/dataverses/$dv_d/datasets` -echo $r -dset_id=`echo $r | jq .data.id` -echo "dataset created with id: $dset_id" - -if [ "null" == "${dset_id}" ]; then - echo "error - no dataset id from create command" - exit 1 -fi -echo "dataset created; internal/db id: ${dset_id}" - - diff --git a/conf/docker-dcm/dataset.json b/conf/docker-dcm/dataset.json deleted file mode 100644 index fb1b734ed40..00000000000 --- a/conf/docker-dcm/dataset.json +++ /dev/null @@ -1,126 +0,0 @@ -{ - "datasetVersion": { - "metadataBlocks": { - "citation": { - "displayName": "Citation Metadata", - "fields": [ - { - "typeName": "title", - "multiple": false, - "typeClass": "primitive", - "value": "DCM test dataset" - }, - { - "typeName": "productionDate", - "multiple": false, - "typeClass": "primitive", - "value": "2017-04-01" - }, - { - "typeName": "dsDescription", - "multiple": true, - "typeClass": "compound", - "value": [ - { - "dsDescriptionValue": { - "typeName": "dsDescriptionValue", - "multiple": false, - "typeClass": "primitive", - "value": "this would normally be a dataset large enough to require a DCM" - } - } - ] - }, - { - "typeName": "depositor", - "multiple": false, - "typeClass": "primitive", - "value": "Doc, Bob" - }, - { - "typeName": "producer", - "multiple": true, - "typeClass": "compound", - "value": [ - { - "producerName": { - "typeName": "producerName", - "multiple": false, - "typeClass": "primitive", - "value": "Prof, Arthor" - }, - "producerAffiliation": { - "typeName": "producerAffiliation", - "multiple": false, - "typeClass": "primitive", - "value": "LibraScholar" - } - } - ] - }, - { - "typeName": "author", - "multiple": true, - "typeClass": "compound", - "value": [ - { - "authorName": { - "typeName": "authorName", - "multiple": false, - "typeClass": "primitive", - "value": "Student, Carol" - } - , - "authorAffiliation": { - "typeName": "authorAffiliation", - "multiple": false, - "typeClass": "primitive", - "value": "LibraScholar" - } - }, - { - "authorName": { - "typeName": "authorName", - "multiple": false, - "typeClass": "primitive", - "value": "Doc, Bob" - } - , - "authorAffiliation": { - "typeName": "authorAffiliation", - "multiple": false, - "typeClass": "primitive", - "value": "LibraScholar" - } - } - - ] - }, - { - "typeName": "datasetContact", - "multiple": true, - "typeClass": "compound", - "value": [ - { - "datasetContactEmail": { - "typeName": "datasetContactEmail", - "multiple": false, - "typeClass": "primitive", - "value": "dsContact@mailinator.com" - } - } - ] - }, - { - "typeName": "subject", - "multiple": true, - "typeClass": "controlledVocabulary", - "value": [ - "Medicine, Health and Life Sciences" - ] - } - ] - } - } - } -} diff --git a/conf/docker-dcm/dcmsrv.dockerfile b/conf/docker-dcm/dcmsrv.dockerfile deleted file mode 100644 index 9989fa3a89d..00000000000 --- a/conf/docker-dcm/dcmsrv.dockerfile +++ /dev/null @@ -1,21 +0,0 @@ -# build from repo root -FROM centos:6 -RUN yum install -y epel-release -ARG RPMFILE=dcm-0.5-0.noarch.rpm -COPY ${RPMFILE} /tmp/ -COPY cfg/dcm/bashrc /root/.bashrc -COPY cfg/dcm/test_install.sh /root/ -RUN yum localinstall -y /tmp/${RPMFILE} -RUN pip install -r /opt/dcm/requirements.txt -RUN pip install awscli==1.15.75 -run export PATH=~/.local/bin:$PATH -RUN /root/test_install.sh -COPY cfg/dcm/rq-init-d /etc/init.d/rq -RUN useradd glassfish -COPY cfg/dcm/entrypoint-dcm.sh / -COPY cfg/dcm/healthcheck-dcm.sh / -EXPOSE 80 -EXPOSE 22 -VOLUME /hold -HEALTHCHECK CMD /healthcheck-dcm.sh -CMD ["/entrypoint-dcm.sh"] diff --git a/conf/docker-dcm/docker-compose.yml b/conf/docker-dcm/docker-compose.yml deleted file mode 100644 index 49d4467d349..00000000000 --- a/conf/docker-dcm/docker-compose.yml +++ /dev/null @@ -1,50 +0,0 @@ -# initial docker-compose file for combined Dataverse and DCM with shared filesystem - -version: '3' - -services: - dcmsrv: - build: - context: . - dockerfile: dcmsrv.dockerfile - container_name: dcmsrv - volumes: - - hold:/hold - rsalsrv: - build: - context: . - dockerfile: rsalsrv.dockerfile - container_name: rsalsrv -# image: rsalrepo_rsal - volumes: - - hold:/hold - - ./:/mnt - environment: - DV_HOST: http://dvsrv:8080 - DV_APIKEY: burrito - ports: - - "8889:80" - - "873:873" - dvsrv: - build: - context: . - dockerfile: dv0dcm.dockerfile - container_name: dvsrv - volumes: - - hold:/hold - - ./:/mnt - ports: - - "8083:8080" - - "8084:80" - client: - build: - context: . - dockerfile: c6client.dockerfile - command: sleep infinity - container_name: dcm_client - volumes: - - ./:/mnt - -volumes: - hold: - diff --git a/conf/docker-dcm/dv0dcm.dockerfile b/conf/docker-dcm/dv0dcm.dockerfile deleted file mode 100644 index 021534c8978..00000000000 --- a/conf/docker-dcm/dv0dcm.dockerfile +++ /dev/null @@ -1,7 +0,0 @@ -# dv0 assumed to be image name for docker-aio -FROM dv0 -RUN yum install -y bind-utils -COPY configure_dcm.sh /opt/dv/ -COPY configure_rsal.sh /opt/dv/ -COPY rsal-workflow2.json site-primary.json site-remote.json /opt/dv/ -VOLUME /hold diff --git a/conf/docker-dcm/get_transfer.bash b/conf/docker-dcm/get_transfer.bash deleted file mode 100755 index 42080f536e1..00000000000 --- a/conf/docker-dcm/get_transfer.bash +++ /dev/null @@ -1,19 +0,0 @@ -#!/usr/bin/env bash - -# user gets transfer script - -dset_id=$1 -if [ -z "$dset_id" ]; then - echo "no dataset id specified, bailing out" - exit 1 -fi - -k_d=burrito -dv_d=root - -h=http://dvsrv - -#get upload script from DCM -wget --header "X-Dataverse-key: ${k_d}" ${h}/api/datasets/${dset_id}/dataCaptureModule/rsync -O upload-${dset_id}.bash - - diff --git a/conf/docker-dcm/publish_major.bash b/conf/docker-dcm/publish_major.bash deleted file mode 100755 index 6a3fd1288ca..00000000000 --- a/conf/docker-dcm/publish_major.bash +++ /dev/null @@ -1,17 +0,0 @@ -#!/usr/bin/env bash - -# publish dataset based on database id - -dset_id=$1 -if [ -z "$dset_id" ]; then - echo "no dataset id specified, bailing out" - exit 1 -fi - -k_d=burrito - -h=http://dvsrv - -curl -X POST -H "X-Dataverse-key: ${k_d}" "${h}/api/datasets/${dset_id}/actions/:publish?type=major" - - diff --git a/conf/docker-dcm/readme.md b/conf/docker-dcm/readme.md deleted file mode 100644 index 3e6a15e61d6..00000000000 --- a/conf/docker-dcm/readme.md +++ /dev/null @@ -1,26 +0,0 @@ -This docker-compose setup is intended for use in development, small scale evaluation, and potentially serve as an example of a working (although not production security level) configuration. - -Setup: - -- build docker-aio image with name dv0 as described in `../docker-aio` (don't start up the docker image or run setupIT.bash) -- work in the `conf/docker-dcm` directory for below commands -- download/prepare dependencies: `./0prep.sh` -- build dcm/dv0dcm images with docker-compose: `docker-compose -f docker-compose.yml build` -- start containers: `docker-compose -f docker-compose.yml up -d` -- wait for container to show "healthy" (aka - `docker ps`), then run dataverse app installation: `docker exec dvsrv /opt/dv/install.bash` -- for development, you probably want to use the `FAKE` DOI provider: `docker exec -it dvsrv /opt/dv/configure_doi.bash` -- configure dataverse application to use DCM: `docker exec -it dvsrv /opt/dv/configure_dcm.sh` -- configure dataverse application to use RSAL (if desired): `docker exec -it dvsrv /opt/dv/configure_rsal.sh` - -Operation: -The dataverse installation is accessible at `http://localhost:8084`. -The `dcm_client` container is intended to be used for executing transfer scripts, and `conf/docker-dcm` is available at `/mnt` inside the container; this container can be accessed with `docker exec -it dcm_client bash`. -The DCM cron job is NOT configured here; for development purposes the DCM checks can be run manually with `docker exec -it dcmsrv /opt/dcm/scn/post_upload.bash`. -The RSAL cron job is similarly NOT configured; for development purposes `docker exec -it rsalsrv /opt/rsal/scn/pub.py` can be run manually. - - -Cleanup: -- shutdown/cleanup `docker-compose -f docker-compose.yml down -v` - -For reference, this configuration was working with docker 17.09 / docker-compose 1.16. - diff --git a/conf/docker-dcm/rsal-workflow2.json b/conf/docker-dcm/rsal-workflow2.json deleted file mode 100644 index 322d3ecbcf7..00000000000 --- a/conf/docker-dcm/rsal-workflow2.json +++ /dev/null @@ -1,31 +0,0 @@ -{ - "name": "RSAL file move for publication", - "steps": [ - { - "provider":":internal", - "stepType":"log", - "parameters": { - "message": "Pre-http request" - } - }, - { - "provider":":internal", - "stepType":"http/sr", - "parameters": { - "url":"http://rsalsrv/rr.py", - "method":"POST", - "contentType":"text/plain", - "body":"${invocationId}\ndataset.id=${dataset.id}\ndataset.identifier=${dataset.identifier}\ndataset.globalId=${dataset.globalId}", - "expectedResponse":"OK.*", - "rollbackMethod":"DELETE" - } - }, - { - "provider":":internal", - "stepType":"log", - "parameters": { - "message": "Post-http request" - } - } - ] -} diff --git a/conf/docker-dcm/rsalsrv.dockerfile b/conf/docker-dcm/rsalsrv.dockerfile deleted file mode 100644 index 844432afe6b..00000000000 --- a/conf/docker-dcm/rsalsrv.dockerfile +++ /dev/null @@ -1,20 +0,0 @@ -FROM centos:7 -ARG RPMFILE=rsal-0.1-0.noarch.rpm -RUN yum update; yum install -y epel-release -COPY ${RPMFILE} /tmp/ -RUN yum localinstall -y /tmp/${RPMFILE} -COPY cfg/rsal/rsyncd.conf /etc/rsyncd.conf -COPY cfg/rsal/entrypoint-rsal.sh /entrypoint.sh -COPY cfg/rsal/lighttpd-modules.conf /etc/lighttpd/modules.conf -COPY cfg/rsal/lighttpd.conf /etc/lighttpd/lighttpd.conf -RUN mkdir -p /public/FK2 -RUN pip2 install -r /opt/rsal/scn/requirements.txt -#COPY doc/testdata/ /hold/ -ARG DV_HOST=http://dv_srv:8080 -ARG DV_API_KEY=burrito -ENV DV_HOST ${DV_HOST} -ENV DV_API_KEY ${DV_API_KEY} -EXPOSE 873 -EXPOSE 80 -HEALTHCHECK CMD curl --fail http://localhost/hw.py || exit 1 -CMD ["/entrypoint.sh"] diff --git a/conf/docker-dcm/site-primary.json b/conf/docker-dcm/site-primary.json deleted file mode 100644 index 35b217edffd..00000000000 --- a/conf/docker-dcm/site-primary.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "hostname": "rsalsrv", - "name": "LibraScholar University", - "primaryStorage": true, - "transferProtocols": "rsync,posix" -} diff --git a/conf/docker-dcm/site-remote.json b/conf/docker-dcm/site-remote.json deleted file mode 100644 index d47c3ef4dda..00000000000 --- a/conf/docker-dcm/site-remote.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "hostname": "remote.libra.research", - "name": "LibraResearch Institute", - "primaryStorage": false, - "transferProtocols": "rsync" -} diff --git a/conf/jhove/jhove.conf b/conf/jhove/jhove.conf index 5134ae0f81a..971c60acfaa 100644 --- a/conf/jhove/jhove.conf +++ b/conf/jhove/jhove.conf @@ -3,7 +3,7 @@ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://hul.harvard.edu/ois/xml/ns/jhove/jhoveConfig" xsi:schemaLocation="http://hul.harvard.edu/ois/xml/ns/jhove/jhoveConfig - file:///usr/local/payara5/glassfish/domains/domain1/config/jhoveConfig.xsd"> + file:///usr/local/payara6/glassfish/domains/domain1/config/jhoveConfig.xsd"> /usr/local/src/jhove utf-8 /tmp diff --git a/conf/keycloak/docker-compose.yml b/conf/keycloak/docker-compose.yml new file mode 100644 index 00000000000..2776f6572df --- /dev/null +++ b/conf/keycloak/docker-compose.yml @@ -0,0 +1,15 @@ +version: "3.9" + +services: + + keycloak: + image: 'jboss/keycloak:16.1.1' + environment: + - KEYCLOAK_USER=kcadmin + - KEYCLOAK_PASSWORD=kcpassword + - KEYCLOAK_IMPORT=/tmp/oidc-realm.json + - KEYCLOAK_LOGLEVEL=DEBUG + ports: + - "8090:8080" + volumes: + - './oidc-realm.json:/tmp/oidc-realm.json' diff --git a/conf/keycloak/oidc-keycloak-auth-provider.json b/conf/keycloak/oidc-keycloak-auth-provider.json new file mode 100644 index 00000000000..7d09fe5f36e --- /dev/null +++ b/conf/keycloak/oidc-keycloak-auth-provider.json @@ -0,0 +1,8 @@ +{ + "id": "oidc-keycloak", + "factoryAlias": "oidc", + "title": "OIDC-Keycloak", + "subtitle": "OIDC-Keycloak", + "factoryData": "type: oidc | issuer: http://keycloak.mydomain.com:8090/realms/oidc-realm | clientId: oidc-client | clientSecret: ss6gE8mODCDfqesQaSG3gwUwZqZt547E", + "enabled": true +} diff --git a/conf/keycloak/oidc-realm.json b/conf/keycloak/oidc-realm.json new file mode 100644 index 00000000000..1b77f2b4384 --- /dev/null +++ b/conf/keycloak/oidc-realm.json @@ -0,0 +1,2108 @@ +{ + "id": "oidc-realm", + "realm": "oidc-realm", + "notBefore": 0, + "defaultSignatureAlgorithm": "RS256", + "revokeRefreshToken": false, + "refreshTokenMaxReuse": 0, + "accessTokenLifespan": 300, + "accessTokenLifespanForImplicitFlow": 900, + "ssoSessionIdleTimeout": 1800, + "ssoSessionMaxLifespan": 36000, + "ssoSessionIdleTimeoutRememberMe": 0, + "ssoSessionMaxLifespanRememberMe": 0, + "offlineSessionIdleTimeout": 2592000, + "offlineSessionMaxLifespanEnabled": false, + "offlineSessionMaxLifespan": 5184000, + "clientSessionIdleTimeout": 0, + "clientSessionMaxLifespan": 0, + "clientOfflineSessionIdleTimeout": 0, + "clientOfflineSessionMaxLifespan": 0, + "accessCodeLifespan": 60, + "accessCodeLifespanUserAction": 300, + "accessCodeLifespanLogin": 1800, + "actionTokenGeneratedByAdminLifespan": 43200, + "actionTokenGeneratedByUserLifespan": 300, + "oauth2DeviceCodeLifespan": 600, + "oauth2DevicePollingInterval": 5, + "enabled": true, + "sslRequired": "external", + "registrationAllowed": false, + "registrationEmailAsUsername": false, + "rememberMe": false, + "verifyEmail": false, + "loginWithEmailAllowed": true, + "duplicateEmailsAllowed": false, + "resetPasswordAllowed": false, + "editUsernameAllowed": false, + "bruteForceProtected": false, + "permanentLockout": false, + "maxFailureWaitSeconds": 900, + "minimumQuickLoginWaitSeconds": 60, + "waitIncrementSeconds": 60, + "quickLoginCheckMilliSeconds": 1000, + "maxDeltaTimeSeconds": 43200, + "failureFactor": 30, + "roles": { + "realm": [ + { + "id": "13d76240-fcf8-4361-9dbf-de268717cfb2", + "name": "uma_authorization", + "description": "${role_uma_authorization}", + "composite": false, + "clientRole": false, + "containerId": "oidc-realm", + "attributes": {} + }, + { + "id": "88b414c4-3516-4486-8f8b-a811ed0e0ce5", + "name": "default-roles-oidc-realm", + "description": "${role_default-roles}", + "composite": true, + "composites": { + "realm": [ + "offline_access", + "uma_authorization" + ] + }, + "clientRole": false, + "containerId": "oidc-realm", + "attributes": {} + }, + { + "id": "b907fd4e-0e54-461c-9411-3f736eef7d2f", + "name": "offline_access", + "description": "${role_offline-access}", + "composite": false, + "clientRole": false, + "containerId": "oidc-realm", + "attributes": {} + } + ], + "client": { + "realm-management": [ + { + "id": "39342ea9-0b4e-4841-8996-433759e9297f", + "name": "create-client", + "description": "${role_create-client}", + "composite": false, + "clientRole": true, + "containerId": "43ffb712-f233-48e2-ae79-d6993bac34a5", + "attributes": {} + }, + { + "id": "f8680034-617d-45d3-9801-7bf0d704c549", + "name": "manage-users", + "description": "${role_manage-users}", + "composite": false, + "clientRole": true, + "containerId": "43ffb712-f233-48e2-ae79-d6993bac34a5", + "attributes": {} + }, + { + "id": "b08e4cc3-71e2-4395-b66b-fb1277b48b88", + "name": "manage-realm", + "description": "${role_manage-realm}", + "composite": false, + "clientRole": true, + "containerId": "43ffb712-f233-48e2-ae79-d6993bac34a5", + "attributes": {} + }, + { + "id": "c15dc407-d012-43af-9a21-a2923e1d7b74", + "name": "manage-events", + "description": "${role_manage-events}", + "composite": false, + "clientRole": true, + "containerId": "43ffb712-f233-48e2-ae79-d6993bac34a5", + "attributes": {} + }, + { + "id": "66c07cb7-42cd-4155-8485-6cc7bd37cba9", + "name": "view-realm", + "description": "${role_view-realm}", + "composite": false, + "clientRole": true, + "containerId": "43ffb712-f233-48e2-ae79-d6993bac34a5", + "attributes": {} + }, + { + "id": "0419515f-4ab8-43ca-ac69-e842195813c0", + "name": "view-events", + "description": "${role_view-events}", + "composite": false, + "clientRole": true, + "containerId": "43ffb712-f233-48e2-ae79-d6993bac34a5", + "attributes": {} + }, + { + "id": "aa553d5a-b2dc-4f81-979a-2af0a019fee0", + "name": "impersonation", + "description": "${role_impersonation}", + "composite": false, + "clientRole": true, + "containerId": "43ffb712-f233-48e2-ae79-d6993bac34a5", + "attributes": {} + }, + { + "id": "9567e1e9-b755-43a8-93ed-d5929391316f", + "name": "manage-clients", + "description": "${role_manage-clients}", + "composite": false, + "clientRole": true, + "containerId": "43ffb712-f233-48e2-ae79-d6993bac34a5", + "attributes": {} + }, + { + "id": "e3dab69f-7323-4aad-bf98-8b7697f36d57", + "name": "query-users", + "description": "${role_query-users}", + "composite": false, + "clientRole": true, + "containerId": "43ffb712-f233-48e2-ae79-d6993bac34a5", + "attributes": {} + }, + { + "id": "ee8a4855-d0d5-4261-bdba-b419d304a824", + "name": "query-groups", + "description": "${role_query-groups}", + "composite": false, + "clientRole": true, + "containerId": "43ffb712-f233-48e2-ae79-d6993bac34a5", + "attributes": {} + }, + { + "id": "4f251212-e922-4ac0-9cce-3ada607648d2", + "name": "view-identity-providers", + "description": "${role_view-identity-providers}", + "composite": false, + "clientRole": true, + "containerId": "43ffb712-f233-48e2-ae79-d6993bac34a5", + "attributes": {} + }, + { + "id": "34e1dc59-a975-424f-887b-52465e184a4b", + "name": "realm-admin", + "description": "${role_realm-admin}", + "composite": true, + "composites": { + "client": { + "realm-management": [ + "create-client", + "manage-users", + "manage-realm", + "manage-events", + "view-realm", + "view-events", + "impersonation", + "manage-clients", + "query-users", + "view-identity-providers", + "query-groups", + "view-clients", + "view-users", + "manage-authorization", + "manage-identity-providers", + "query-realms", + "query-clients", + "view-authorization" + ] + } + }, + "clientRole": true, + "containerId": "43ffb712-f233-48e2-ae79-d6993bac34a5", + "attributes": {} + }, + { + "id": "d35aca04-0182-40d3-96b8-1ce5cc118729", + "name": "view-clients", + "description": "${role_view-clients}", + "composite": true, + "composites": { + "client": { + "realm-management": [ + "query-clients" + ] + } + }, + "clientRole": true, + "containerId": "43ffb712-f233-48e2-ae79-d6993bac34a5", + "attributes": {} + }, + { + "id": "7d3b28d5-471a-4b2b-bc80-56d4ff80fd28", + "name": "view-users", + "description": "${role_view-users}", + "composite": true, + "composites": { + "client": { + "realm-management": [ + "query-users", + "query-groups" + ] + } + }, + "clientRole": true, + "containerId": "43ffb712-f233-48e2-ae79-d6993bac34a5", + "attributes": {} + }, + { + "id": "651059eb-fc1a-4f8d-9ced-ed28b0a2f965", + "name": "manage-authorization", + "description": "${role_manage-authorization}", + "composite": false, + "clientRole": true, + "containerId": "43ffb712-f233-48e2-ae79-d6993bac34a5", + "attributes": {} + }, + { + "id": "73f447e9-def8-4214-8516-56571f2c6f65", + "name": "manage-identity-providers", + "description": "${role_manage-identity-providers}", + "composite": false, + "clientRole": true, + "containerId": "43ffb712-f233-48e2-ae79-d6993bac34a5", + "attributes": {} + }, + { + "id": "1b5f7c39-885e-4246-8cf5-25769544fc3d", + "name": "query-realms", + "description": "${role_query-realms}", + "composite": false, + "clientRole": true, + "containerId": "43ffb712-f233-48e2-ae79-d6993bac34a5", + "attributes": {} + }, + { + "id": "350da4c1-69d4-4557-a9a8-8ba760db0225", + "name": "query-clients", + "description": "${role_query-clients}", + "composite": false, + "clientRole": true, + "containerId": "43ffb712-f233-48e2-ae79-d6993bac34a5", + "attributes": {} + }, + { + "id": "43d51082-6922-4765-8022-529d91a4603f", + "name": "view-authorization", + "description": "${role_view-authorization}", + "composite": false, + "clientRole": true, + "containerId": "43ffb712-f233-48e2-ae79-d6993bac34a5", + "attributes": {} + } + ], + "security-admin-console": [], + "admin-cli": [], + "account-console": [], + "broker": [], + "oidc-client": [], + "account": [ + { + "id": "a163535c-71de-4b2d-9530-26b25eeb1c1e", + "name": "delete-account", + "description": "${role_delete-account}", + "composite": false, + "clientRole": true, + "containerId": "aed2e103-ee29-4d5c-a34e-1b8c65b7d537", + "attributes": {} + }, + { + "id": "851c6a9f-bce7-4c70-be82-084c25d61b25", + "name": "manage-account", + "composite": false, + "clientRole": true, + "containerId": "aed2e103-ee29-4d5c-a34e-1b8c65b7d537", + "attributes": {} + } + ] + } + }, + "groups": [], + "defaultRole": { + "id": "88b414c4-3516-4486-8f8b-a811ed0e0ce5", + "name": "default-roles-oidc-realm", + "description": "${role_default-roles}", + "composite": true, + "clientRole": false, + "containerId": "oidc-realm" + }, + "requiredCredentials": [ + "password" + ], + "otpPolicyType": "totp", + "otpPolicyAlgorithm": "HmacSHA1", + "otpPolicyInitialCounter": 0, + "otpPolicyDigits": 6, + "otpPolicyLookAheadWindow": 1, + "otpPolicyPeriod": 30, + "otpSupportedApplications": [ + "FreeOTP", + "Google Authenticator" + ], + "webAuthnPolicyRpEntityName": "keycloak", + "webAuthnPolicySignatureAlgorithms": [ + "ES256" + ], + "webAuthnPolicyRpId": "", + "webAuthnPolicyAttestationConveyancePreference": "not specified", + "webAuthnPolicyAuthenticatorAttachment": "not specified", + "webAuthnPolicyRequireResidentKey": "not specified", + "webAuthnPolicyUserVerificationRequirement": "not specified", + "webAuthnPolicyCreateTimeout": 0, + "webAuthnPolicyAvoidSameAuthenticatorRegister": false, + "webAuthnPolicyAcceptableAaguids": [], + "webAuthnPolicyPasswordlessRpEntityName": "keycloak", + "webAuthnPolicyPasswordlessSignatureAlgorithms": [ + "ES256" + ], + "webAuthnPolicyPasswordlessRpId": "", + "webAuthnPolicyPasswordlessAttestationConveyancePreference": "not specified", + "webAuthnPolicyPasswordlessAuthenticatorAttachment": "not specified", + "webAuthnPolicyPasswordlessRequireResidentKey": "not specified", + "webAuthnPolicyPasswordlessUserVerificationRequirement": "not specified", + "webAuthnPolicyPasswordlessCreateTimeout": 0, + "webAuthnPolicyPasswordlessAvoidSameAuthenticatorRegister": false, + "webAuthnPolicyPasswordlessAcceptableAaguids": [], + "users": [ + { + "username": "kcuser", + "enabled": true, + "totp": false, + "emailVerified": true, + "firstName": "Test", + "lastName": "Test", + "email": "test@test.com", + "credentials": [ + { + "type": "password", + "value": "kcpassword" + } + ] + } + ], + "scopeMappings": [ + { + "clientScope": "offline_access", + "roles": [ + "offline_access" + ] + } + ], + "clientScopeMappings": { + "account": [ + { + "client": "account-console", + "roles": [ + "manage-account" + ] + } + ] + }, + "clients": [ + { + "id": "aed2e103-ee29-4d5c-a34e-1b8c65b7d537", + "clientId": "account", + "name": "${client_account}", + "rootUrl": "${authBaseUrl}", + "baseUrl": "/realms/oidc-realm/account/", + "surrogateAuthRequired": false, + "enabled": true, + "alwaysDisplayInConsole": false, + "clientAuthenticatorType": "client-secret", + "redirectUris": [ + "/realms/oidc-realm/account/*" + ], + "webOrigins": [], + "notBefore": 0, + "bearerOnly": false, + "consentRequired": false, + "standardFlowEnabled": true, + "implicitFlowEnabled": false, + "directAccessGrantsEnabled": false, + "serviceAccountsEnabled": false, + "publicClient": true, + "frontchannelLogout": false, + "protocol": "openid-connect", + "attributes": {}, + "authenticationFlowBindingOverrides": {}, + "fullScopeAllowed": false, + "nodeReRegistrationTimeout": 0, + "defaultClientScopes": [ + "web-origins", + "roles", + "profile", + "email" + ], + "optionalClientScopes": [ + "address", + "phone", + "offline_access", + "microprofile-jwt" + ] + }, + { + "id": "1e821c0e-f6b9-4324-9b23-e82b5431fb72", + "clientId": "account-console", + "name": "${client_account-console}", + "rootUrl": "${authBaseUrl}", + "baseUrl": "/realms/oidc-realm/account/", + "surrogateAuthRequired": false, + "enabled": true, + "alwaysDisplayInConsole": false, + "clientAuthenticatorType": "client-secret", + "redirectUris": [ + "/realms/oidc-realm/account/*" + ], + "webOrigins": [], + "notBefore": 0, + "bearerOnly": false, + "consentRequired": false, + "standardFlowEnabled": true, + "implicitFlowEnabled": false, + "directAccessGrantsEnabled": false, + "serviceAccountsEnabled": false, + "publicClient": true, + "frontchannelLogout": false, + "protocol": "openid-connect", + "attributes": { + "pkce.code.challenge.method": "S256" + }, + "authenticationFlowBindingOverrides": {}, + "fullScopeAllowed": false, + "nodeReRegistrationTimeout": 0, + "protocolMappers": [ + { + "id": "397616ab-4124-4a13-92b6-317423e818a3", + "name": "audience resolve", + "protocol": "openid-connect", + "protocolMapper": "oidc-audience-resolve-mapper", + "consentRequired": false, + "config": {} + } + ], + "defaultClientScopes": [ + "web-origins", + "roles", + "profile", + "email" + ], + "optionalClientScopes": [ + "address", + "phone", + "offline_access", + "microprofile-jwt" + ] + }, + { + "id": "dddcc3e0-d742-422b-8b5f-84a292ea9d66", + "clientId": "admin-cli", + "name": "${client_admin-cli}", + "surrogateAuthRequired": false, + "enabled": true, + "alwaysDisplayInConsole": false, + "clientAuthenticatorType": "client-secret", + "redirectUris": [], + "webOrigins": [], + "notBefore": 0, + "bearerOnly": false, + "consentRequired": false, + "standardFlowEnabled": false, + "implicitFlowEnabled": false, + "directAccessGrantsEnabled": true, + "serviceAccountsEnabled": false, + "publicClient": true, + "frontchannelLogout": false, + "protocol": "openid-connect", + "attributes": {}, + "authenticationFlowBindingOverrides": {}, + "fullScopeAllowed": false, + "nodeReRegistrationTimeout": 0, + "defaultClientScopes": [ + "web-origins", + "roles", + "profile", + "email" + ], + "optionalClientScopes": [ + "address", + "phone", + "offline_access", + "microprofile-jwt" + ] + }, + { + "id": "df6f6cd0-a046-492f-84ac-b4fe31909be4", + "clientId": "broker", + "name": "${client_broker}", + "surrogateAuthRequired": false, + "enabled": true, + "alwaysDisplayInConsole": false, + "clientAuthenticatorType": "client-secret", + "redirectUris": [], + "webOrigins": [], + "notBefore": 0, + "bearerOnly": true, + "consentRequired": false, + "standardFlowEnabled": true, + "implicitFlowEnabled": false, + "directAccessGrantsEnabled": false, + "serviceAccountsEnabled": false, + "publicClient": false, + "frontchannelLogout": false, + "protocol": "openid-connect", + "attributes": {}, + "authenticationFlowBindingOverrides": {}, + "fullScopeAllowed": false, + "nodeReRegistrationTimeout": 0, + "defaultClientScopes": [ + "web-origins", + "roles", + "profile", + "email" + ], + "optionalClientScopes": [ + "address", + "phone", + "offline_access", + "microprofile-jwt" + ] + }, + { + "id": "c0af31b9-21aa-4e70-baf3-8d68850c4081", + "clientId": "oidc-client", + "surrogateAuthRequired": false, + "enabled": true, + "alwaysDisplayInConsole": false, + "clientAuthenticatorType": "client-secret", + "secret": "ss6gE8mODCDfqesQaSG3gwUwZqZt547E", + "redirectUris": [ + "*" + ], + "webOrigins": [ + "+" + ], + "notBefore": 0, + "bearerOnly": false, + "consentRequired": false, + "standardFlowEnabled": true, + "implicitFlowEnabled": false, + "directAccessGrantsEnabled": false, + "serviceAccountsEnabled": false, + "publicClient": false, + "frontchannelLogout": false, + "protocol": "openid-connect", + "attributes": { + "saml.force.post.binding": "false", + "saml.multivalued.roles": "false", + "oauth2.device.authorization.grant.enabled": "false", + "use.jwks.url": "true", + "backchannel.logout.revoke.offline.tokens": "false", + "saml.server.signature.keyinfo.ext": "false", + "use.refresh.tokens": "true", + "jwt.credential.certificate": "MIICpTCCAY0CBgGE8V6o6TANBgkqhkiG9w0BAQsFADAWMRQwEgYDVQQDDAtvaWRjLWNsaWVudDAeFw0yMjEyMDgxMDUyMDNaFw0zMjEyMDgxMDUzNDNaMBYxFDASBgNVBAMMC29pZGMtY2xpZW50MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEArUffTl+jXWzyY3T4VVtkiGyNnY+RgyAXUzz+dxT7wUQaYSiNPvmaxnio555pWjR403SRUjVxM8eJYgHK9s43qQWdheXBIHyLKaQfjVsTtSmHgFtPmjk+kweQs6fxUi5CNvtx4RTCaOK5wV8q5q1X7mb8cZ5+gLSx1f/pHtayFXMT75nV04aZKWgPztPz8w+QXUx9cuFY4OIiTdRbdyfr1oOiDtMbxxA22tggB/HSMVkSckT3LSPj7fJKJMPFYi/g1AXxGipX/q8XkmOBrvNePCpH0F/IZbC1vXEsDC6urfoijOdiZgPMobuADmWHPiw2zgCN8qa6QuLFaI+JduXT9QIDAQABMA0GCSqGSIb3DQEBCwUAA4IBAQCEOYRHkH8DnBucb+uN5c9U/fZY+mpglxzZvby7dGBXfVwLN+eP1kGcQPaFi+nshk7FgF4mR5/cmuAPZt+YBbgP0z37D49nB7S6sniwzfhCAAplOT4vmm+MjperTDsWFUGhQZJvN/jxqP2Xccw7N//ReYi7yOlmWhwGyqQyTi0ySbE3BY5eFvUKepekybYi/15XlyF8lwS2jH1MvnJAxAMNVpVUcP4wTnq/dOw5ybrVWF0mPnA8KVzTPuPE5nzZvZ3rkXQeEJTffIToR+T/DH/KTLXcNUtx4nG0ajJ0gM6iVAXGnKlI9Viq/M5Ese+52I6rQmxTsFMn57LNzKgMpWcE", + "oidc.ciba.grant.enabled": "false", + "use.jwks.string": "false", + "backchannel.logout.session.required": "false", + "client_credentials.use_refresh_token": "false", + "require.pushed.authorization.requests": "false", + "saml.client.signature": "false", + "id.token.as.detached.signature": "false", + "saml.assertion.signature": "false", + "saml.encrypt": "false", + "saml.server.signature": "false", + "exclude.session.state.from.auth.response": "false", + "saml.artifact.binding": "false", + "saml_force_name_id_format": "false", + "tls.client.certificate.bound.access.tokens": "false", + "saml.authnstatement": "false", + "display.on.consent.screen": "false", + "saml.onetimeuse.condition": "false" + }, + "authenticationFlowBindingOverrides": {}, + "fullScopeAllowed": true, + "nodeReRegistrationTimeout": -1, + "defaultClientScopes": [ + "web-origins", + "roles", + "profile", + "email" + ], + "optionalClientScopes": [ + "address", + "phone", + "offline_access", + "microprofile-jwt" + ] + }, + { + "id": "43ffb712-f233-48e2-ae79-d6993bac34a5", + "clientId": "realm-management", + "name": "${client_realm-management}", + "surrogateAuthRequired": false, + "enabled": true, + "alwaysDisplayInConsole": false, + "clientAuthenticatorType": "client-secret", + "redirectUris": [], + "webOrigins": [], + "notBefore": 0, + "bearerOnly": true, + "consentRequired": false, + "standardFlowEnabled": true, + "implicitFlowEnabled": false, + "directAccessGrantsEnabled": false, + "serviceAccountsEnabled": false, + "publicClient": false, + "frontchannelLogout": false, + "protocol": "openid-connect", + "attributes": {}, + "authenticationFlowBindingOverrides": {}, + "fullScopeAllowed": false, + "nodeReRegistrationTimeout": 0, + "defaultClientScopes": [ + "web-origins", + "roles", + "profile", + "email" + ], + "optionalClientScopes": [ + "address", + "phone", + "offline_access", + "microprofile-jwt" + ] + }, + { + "id": "3747f98f-efbb-49ef-8238-a349bf5ab409", + "clientId": "security-admin-console", + "name": "${client_security-admin-console}", + "rootUrl": "${authAdminUrl}", + "baseUrl": "/admin/oidc-realm/console/", + "surrogateAuthRequired": false, + "enabled": true, + "alwaysDisplayInConsole": false, + "clientAuthenticatorType": "client-secret", + "redirectUris": [ + "/admin/oidc-realm/console/*" + ], + "webOrigins": [ + "+" + ], + "notBefore": 0, + "bearerOnly": false, + "consentRequired": false, + "standardFlowEnabled": true, + "implicitFlowEnabled": false, + "directAccessGrantsEnabled": false, + "serviceAccountsEnabled": false, + "publicClient": true, + "frontchannelLogout": false, + "protocol": "openid-connect", + "attributes": { + "pkce.code.challenge.method": "S256" + }, + "authenticationFlowBindingOverrides": {}, + "fullScopeAllowed": false, + "nodeReRegistrationTimeout": 0, + "protocolMappers": [ + { + "id": "2fbdf6c9-ee69-4edc-b780-ec62aecfc519", + "name": "locale", + "protocol": "openid-connect", + "protocolMapper": "oidc-usermodel-attribute-mapper", + "consentRequired": false, + "config": { + "userinfo.token.claim": "true", + "user.attribute": "locale", + "id.token.claim": "true", + "access.token.claim": "true", + "claim.name": "locale", + "jsonType.label": "String" + } + } + ], + "defaultClientScopes": [ + "web-origins", + "roles", + "profile", + "email" + ], + "optionalClientScopes": [ + "address", + "phone", + "offline_access", + "microprofile-jwt" + ] + } + ], + "clientScopes": [ + { + "id": "f76f507d-7d1c-495b-9504-47830b3834f1", + "name": "phone", + "description": "OpenID Connect built-in scope: phone", + "protocol": "openid-connect", + "attributes": { + "include.in.token.scope": "true", + "display.on.consent.screen": "true", + "consent.screen.text": "${phoneScopeConsentText}" + }, + "protocolMappers": [ + { + "id": "be849ec8-1747-4efb-bc00-beeaf44f11c8", + "name": "phone number verified", + "protocol": "openid-connect", + "protocolMapper": "oidc-usermodel-attribute-mapper", + "consentRequired": false, + "config": { + "userinfo.token.claim": "true", + "user.attribute": "phoneNumberVerified", + "id.token.claim": "true", + "access.token.claim": "true", + "claim.name": "phone_number_verified", + "jsonType.label": "boolean" + } + }, + { + "id": "8e8600ec-4290-435d-b109-9f0547cb4a1d", + "name": "phone number", + "protocol": "openid-connect", + "protocolMapper": "oidc-usermodel-attribute-mapper", + "consentRequired": false, + "config": { + "userinfo.token.claim": "true", + "user.attribute": "phoneNumber", + "id.token.claim": "true", + "access.token.claim": "true", + "claim.name": "phone_number", + "jsonType.label": "String" + } + } + ] + }, + { + "id": "54b87197-5309-4b2c-8ad9-f561a0fc178a", + "name": "role_list", + "description": "SAML role list", + "protocol": "saml", + "attributes": { + "consent.screen.text": "${samlRoleListScopeConsentText}", + "display.on.consent.screen": "true" + }, + "protocolMappers": [ + { + "id": "5fd831af-19a5-4a9c-b44f-2a806fae011c", + "name": "role list", + "protocol": "saml", + "protocolMapper": "saml-role-list-mapper", + "consentRequired": false, + "config": { + "single": "false", + "attribute.nameformat": "Basic", + "attribute.name": "Role" + } + } + ] + }, + { + "id": "2f85470d-8cb7-4f07-8602-47342d68af86", + "name": "web-origins", + "description": "OpenID Connect scope for add allowed web origins to the access token", + "protocol": "openid-connect", + "attributes": { + "include.in.token.scope": "false", + "display.on.consent.screen": "false", + "consent.screen.text": "" + }, + "protocolMappers": [ + { + "id": "c5d2aafc-f72d-4d7b-9d88-cd759f0e045e", + "name": "allowed web origins", + "protocol": "openid-connect", + "protocolMapper": "oidc-allowed-origins-mapper", + "consentRequired": false, + "config": {} + } + ] + }, + { + "id": "528face9-229a-4adf-98d8-68b1a22e880d", + "name": "microprofile-jwt", + "description": "Microprofile - JWT built-in scope", + "protocol": "openid-connect", + "attributes": { + "include.in.token.scope": "true", + "display.on.consent.screen": "false" + }, + "protocolMappers": [ + { + "id": "89240a7c-10f3-4e09-9d6b-41955b86c58d", + "name": "groups", + "protocol": "openid-connect", + "protocolMapper": "oidc-usermodel-realm-role-mapper", + "consentRequired": false, + "config": { + "multivalued": "true", + "userinfo.token.claim": "true", + "user.attribute": "foo", + "id.token.claim": "true", + "access.token.claim": "true", + "claim.name": "groups", + "jsonType.label": "String" + } + }, + { + "id": "15b6db72-4870-480e-a675-87f87df5f8a5", + "name": "upn", + "protocol": "openid-connect", + "protocolMapper": "oidc-usermodel-property-mapper", + "consentRequired": false, + "config": { + "userinfo.token.claim": "true", + "user.attribute": "username", + "id.token.claim": "true", + "access.token.claim": "true", + "claim.name": "upn", + "jsonType.label": "String" + } + } + ] + }, + { + "id": "cdd11477-b02b-4886-bc6d-cf4b728ebc0e", + "name": "email", + "description": "OpenID Connect built-in scope: email", + "protocol": "openid-connect", + "attributes": { + "include.in.token.scope": "true", + "display.on.consent.screen": "true", + "consent.screen.text": "${emailScopeConsentText}" + }, + "protocolMappers": [ + { + "id": "627b9f4f-23d6-4480-adf4-264faf58de33", + "name": "email verified", + "protocol": "openid-connect", + "protocolMapper": "oidc-usermodel-property-mapper", + "consentRequired": false, + "config": { + "userinfo.token.claim": "true", + "user.attribute": "emailVerified", + "id.token.claim": "true", + "access.token.claim": "true", + "claim.name": "email_verified", + "jsonType.label": "boolean" + } + }, + { + "id": "6a2adf2e-db2d-4ebe-8d48-f658f9b4a5ca", + "name": "email", + "protocol": "openid-connect", + "protocolMapper": "oidc-usermodel-property-mapper", + "consentRequired": false, + "config": { + "userinfo.token.claim": "true", + "user.attribute": "email", + "id.token.claim": "true", + "access.token.claim": "true", + "claim.name": "email", + "jsonType.label": "String" + } + } + ] + }, + { + "id": "8f830142-b3f1-40f0-82e2-ceed68857a40", + "name": "roles", + "description": "OpenID Connect scope for add user roles to the access token", + "protocol": "openid-connect", + "attributes": { + "include.in.token.scope": "false", + "display.on.consent.screen": "true", + "consent.screen.text": "${rolesScopeConsentText}" + }, + "protocolMappers": [ + { + "id": "28a96dc6-c4dc-4aae-b316-28b56dccd077", + "name": "audience resolve", + "protocol": "openid-connect", + "protocolMapper": "oidc-audience-resolve-mapper", + "consentRequired": false, + "config": {} + }, + { + "id": "3e81050f-540e-4f3d-9abf-86406e484f76", + "name": "realm roles", + "protocol": "openid-connect", + "protocolMapper": "oidc-usermodel-realm-role-mapper", + "consentRequired": false, + "config": { + "user.attribute": "foo", + "access.token.claim": "true", + "claim.name": "realm_access.roles", + "jsonType.label": "String", + "multivalued": "true" + } + }, + { + "id": "13afa1f4-3fac-4c90-a9b4-e84e682f46e9", + "name": "client roles", + "protocol": "openid-connect", + "protocolMapper": "oidc-usermodel-client-role-mapper", + "consentRequired": false, + "config": { + "user.attribute": "foo", + "access.token.claim": "true", + "claim.name": "resource_access.${client_id}.roles", + "jsonType.label": "String", + "multivalued": "true" + } + } + ] + }, + { + "id": "3beac2fc-e947-408f-8422-ca9a1e66a258", + "name": "address", + "description": "OpenID Connect built-in scope: address", + "protocol": "openid-connect", + "attributes": { + "include.in.token.scope": "true", + "display.on.consent.screen": "true", + "consent.screen.text": "${addressScopeConsentText}" + }, + "protocolMappers": [ + { + "id": "12911891-db5c-4a35-80fa-555c5eda7e68", + "name": "address", + "protocol": "openid-connect", + "protocolMapper": "oidc-address-mapper", + "consentRequired": false, + "config": { + "user.attribute.formatted": "formatted", + "user.attribute.country": "country", + "user.attribute.postal_code": "postal_code", + "userinfo.token.claim": "true", + "user.attribute.street": "street", + "id.token.claim": "true", + "user.attribute.region": "region", + "access.token.claim": "true", + "user.attribute.locality": "locality" + } + } + ] + }, + { + "id": "8a29297a-e6f6-41ae-b25d-8a14236de535", + "name": "offline_access", + "description": "OpenID Connect built-in scope: offline_access", + "protocol": "openid-connect", + "attributes": { + "consent.screen.text": "${offlineAccessScopeConsentText}", + "display.on.consent.screen": "true" + } + }, + { + "id": "ce1622c5-701f-4e3e-9d2d-8dae0f07a295", + "name": "profile", + "description": "OpenID Connect built-in scope: profile", + "protocol": "openid-connect", + "attributes": { + "include.in.token.scope": "true", + "display.on.consent.screen": "true", + "consent.screen.text": "${profileScopeConsentText}" + }, + "protocolMappers": [ + { + "id": "98cc62b8-250a-4087-92da-bb0f0931e675", + "name": "full name", + "protocol": "openid-connect", + "protocolMapper": "oidc-full-name-mapper", + "consentRequired": false, + "config": { + "id.token.claim": "true", + "access.token.claim": "true", + "userinfo.token.claim": "true" + } + }, + { + "id": "b99c8c44-4cc9-4c87-a5a1-c14e64d472ae", + "name": "given name", + "protocol": "openid-connect", + "protocolMapper": "oidc-usermodel-property-mapper", + "consentRequired": false, + "config": { + "userinfo.token.claim": "true", + "user.attribute": "firstName", + "id.token.claim": "true", + "access.token.claim": "true", + "claim.name": "given_name", + "jsonType.label": "String" + } + }, + { + "id": "903d5932-bdec-42bc-a53c-3cce93deaa1c", + "name": "zoneinfo", + "protocol": "openid-connect", + "protocolMapper": "oidc-usermodel-attribute-mapper", + "consentRequired": false, + "config": { + "userinfo.token.claim": "true", + "user.attribute": "zoneinfo", + "id.token.claim": "true", + "access.token.claim": "true", + "claim.name": "zoneinfo", + "jsonType.label": "String" + } + }, + { + "id": "ccbdc095-28f7-4769-8261-2e32c7b6fab0", + "name": "picture", + "protocol": "openid-connect", + "protocolMapper": "oidc-usermodel-attribute-mapper", + "consentRequired": false, + "config": { + "userinfo.token.claim": "true", + "user.attribute": "picture", + "id.token.claim": "true", + "access.token.claim": "true", + "claim.name": "picture", + "jsonType.label": "String" + } + }, + { + "id": "22a4a38c-f755-44f3-b847-803c7fb3cef5", + "name": "birthdate", + "protocol": "openid-connect", + "protocolMapper": "oidc-usermodel-attribute-mapper", + "consentRequired": false, + "config": { + "userinfo.token.claim": "true", + "user.attribute": "birthdate", + "id.token.claim": "true", + "access.token.claim": "true", + "claim.name": "birthdate", + "jsonType.label": "String" + } + }, + { + "id": "78726920-b4e2-4ed2-b9e0-df38a7f82376", + "name": "updated at", + "protocol": "openid-connect", + "protocolMapper": "oidc-usermodel-attribute-mapper", + "consentRequired": false, + "config": { + "userinfo.token.claim": "true", + "user.attribute": "updatedAt", + "id.token.claim": "true", + "access.token.claim": "true", + "claim.name": "updated_at", + "jsonType.label": "String" + } + }, + { + "id": "c64c6eb8-5cbe-4092-bf2c-dd02b8c0e0e8", + "name": "family name", + "protocol": "openid-connect", + "protocolMapper": "oidc-usermodel-property-mapper", + "consentRequired": false, + "config": { + "userinfo.token.claim": "true", + "user.attribute": "lastName", + "id.token.claim": "true", + "access.token.claim": "true", + "claim.name": "family_name", + "jsonType.label": "String" + } + }, + { + "id": "306784d8-8da1-48d8-92a3-dccfff83bcaf", + "name": "middle name", + "protocol": "openid-connect", + "protocolMapper": "oidc-usermodel-attribute-mapper", + "consentRequired": false, + "config": { + "userinfo.token.claim": "true", + "user.attribute": "middleName", + "id.token.claim": "true", + "access.token.claim": "true", + "claim.name": "middle_name", + "jsonType.label": "String" + } + }, + { + "id": "0ff127fa-774e-43a8-a1fc-47ea3f307aa1", + "name": "website", + "protocol": "openid-connect", + "protocolMapper": "oidc-usermodel-attribute-mapper", + "consentRequired": false, + "config": { + "userinfo.token.claim": "true", + "user.attribute": "website", + "id.token.claim": "true", + "access.token.claim": "true", + "claim.name": "website", + "jsonType.label": "String" + } + }, + { + "id": "8989c6f8-25c5-4d02-aa06-25b3b77fc227", + "name": "profile", + "protocol": "openid-connect", + "protocolMapper": "oidc-usermodel-attribute-mapper", + "consentRequired": false, + "config": { + "userinfo.token.claim": "true", + "user.attribute": "profile", + "id.token.claim": "true", + "access.token.claim": "true", + "claim.name": "profile", + "jsonType.label": "String" + } + }, + { + "id": "3b67000c-9cbf-43ee-9e05-26f560871897", + "name": "gender", + "protocol": "openid-connect", + "protocolMapper": "oidc-usermodel-attribute-mapper", + "consentRequired": false, + "config": { + "userinfo.token.claim": "true", + "user.attribute": "gender", + "id.token.claim": "true", + "access.token.claim": "true", + "claim.name": "gender", + "jsonType.label": "String" + } + }, + { + "id": "c28b04de-2770-423e-9b9a-b3321d7300e2", + "name": "nickname", + "protocol": "openid-connect", + "protocolMapper": "oidc-usermodel-attribute-mapper", + "consentRequired": false, + "config": { + "userinfo.token.claim": "true", + "user.attribute": "nickname", + "id.token.claim": "true", + "access.token.claim": "true", + "claim.name": "nickname", + "jsonType.label": "String" + } + }, + { + "id": "fd791ed4-d4ab-4df9-81b4-c69a3134bcab", + "name": "username", + "protocol": "openid-connect", + "protocolMapper": "oidc-usermodel-property-mapper", + "consentRequired": false, + "config": { + "userinfo.token.claim": "true", + "user.attribute": "username", + "id.token.claim": "true", + "access.token.claim": "true", + "claim.name": "preferred_username", + "jsonType.label": "String" + } + }, + { + "id": "c7378ce5-3673-47b2-9ebc-92c772bebf9f", + "name": "locale", + "protocol": "openid-connect", + "protocolMapper": "oidc-usermodel-attribute-mapper", + "consentRequired": false, + "config": { + "userinfo.token.claim": "true", + "user.attribute": "locale", + "id.token.claim": "true", + "access.token.claim": "true", + "claim.name": "locale", + "jsonType.label": "String" + } + } + ] + } + ], + "defaultDefaultClientScopes": [ + "web-origins", + "role_list", + "roles", + "email", + "profile" + ], + "defaultOptionalClientScopes": [ + "address", + "microprofile-jwt", + "offline_access", + "phone" + ], + "browserSecurityHeaders": { + "contentSecurityPolicyReportOnly": "", + "xContentTypeOptions": "nosniff", + "xRobotsTag": "none", + "xFrameOptions": "SAMEORIGIN", + "contentSecurityPolicy": "frame-src 'self'; frame-ancestors 'self'; object-src 'none';", + "xXSSProtection": "1; mode=block", + "strictTransportSecurity": "max-age=31536000; includeSubDomains" + }, + "smtpServer": {}, + "eventsEnabled": false, + "eventsListeners": [ + "jboss-logging" + ], + "enabledEventTypes": [], + "adminEventsEnabled": false, + "adminEventsDetailsEnabled": false, + "identityProviders": [], + "identityProviderMappers": [], + "components": { + "org.keycloak.services.clientregistration.policy.ClientRegistrationPolicy": [ + { + "id": "8e2d0c22-0627-4115-9f14-4225244333d9", + "name": "Trusted Hosts", + "providerId": "trusted-hosts", + "subType": "anonymous", + "subComponents": {}, + "config": { + "host-sending-registration-request-must-match": [ + "true" + ], + "client-uris-must-match": [ + "true" + ] + } + }, + { + "id": "45bdde87-a364-4d66-a12e-1a4fd42c85fb", + "name": "Full Scope Disabled", + "providerId": "scope", + "subType": "anonymous", + "subComponents": {}, + "config": {} + }, + { + "id": "7b7d3215-68d2-41db-bc0f-db0a45934a84", + "name": "Allowed Client Scopes", + "providerId": "allowed-client-templates", + "subType": "anonymous", + "subComponents": {}, + "config": { + "allow-default-scopes": [ + "true" + ] + } + }, + { + "id": "e067781a-6058-4f2b-9408-3390e9854cf8", + "name": "Consent Required", + "providerId": "consent-required", + "subType": "anonymous", + "subComponents": {}, + "config": {} + }, + { + "id": "296be954-8084-45c8-b6f3-94d53f7341f6", + "name": "Allowed Protocol Mapper Types", + "providerId": "allowed-protocol-mappers", + "subType": "anonymous", + "subComponents": {}, + "config": { + "allowed-protocol-mapper-types": [ + "saml-role-list-mapper", + "saml-user-property-mapper", + "oidc-usermodel-attribute-mapper", + "oidc-address-mapper", + "oidc-sha256-pairwise-sub-mapper", + "saml-user-attribute-mapper", + "oidc-usermodel-property-mapper", + "oidc-full-name-mapper" + ] + } + }, + { + "id": "b9a2a484-aee1-4633-aa37-a9ab2b74a239", + "name": "Allowed Client Scopes", + "providerId": "allowed-client-templates", + "subType": "authenticated", + "subComponents": {}, + "config": { + "allow-default-scopes": [ + "true" + ] + } + }, + { + "id": "016e4914-a32c-40fa-8aab-3eb25a411df5", + "name": "Max Clients Limit", + "providerId": "max-clients", + "subType": "anonymous", + "subComponents": {}, + "config": { + "max-clients": [ + "200" + ] + } + }, + { + "id": "a4fb2fa3-93b8-4497-8047-424f70f298c7", + "name": "Allowed Protocol Mapper Types", + "providerId": "allowed-protocol-mappers", + "subType": "authenticated", + "subComponents": {}, + "config": { + "allowed-protocol-mapper-types": [ + "oidc-sha256-pairwise-sub-mapper", + "oidc-full-name-mapper", + "saml-user-property-mapper", + "saml-role-list-mapper", + "oidc-usermodel-attribute-mapper", + "oidc-address-mapper", + "oidc-usermodel-property-mapper", + "saml-user-attribute-mapper" + ] + } + } + ], + "org.keycloak.keys.KeyProvider": [ + { + "id": "31b693fa-2b95-47a6-96a1-dfff868ca1df", + "name": "rsa-enc-generated", + "providerId": "rsa-enc-generated", + "subComponents": {}, + "config": { + "priority": [ + "100" + ], + "algorithm": [ + "RSA-OAEP" + ] + } + }, + { + "id": "f1e63d09-45a0-4382-8346-0408ee906649", + "name": "hmac-generated", + "providerId": "hmac-generated", + "subComponents": {}, + "config": { + "priority": [ + "100" + ], + "algorithm": [ + "HS256" + ] + } + }, + { + "id": "99084d92-06f5-4787-b932-a40b5377f3cb", + "name": "rsa-generated", + "providerId": "rsa-generated", + "subComponents": {}, + "config": { + "priority": [ + "100" + ] + } + }, + { + "id": "9887f1bf-b4f7-4646-9919-a9dbde13ce74", + "name": "aes-generated", + "providerId": "aes-generated", + "subComponents": {}, + "config": { + "priority": [ + "100" + ] + } + } + ] + }, + "internationalizationEnabled": false, + "supportedLocales": [], + "authenticationFlows": [ + { + "id": "a7f91199-178d-4399-8319-5063ffcc37b0", + "alias": "Account verification options", + "description": "Method with which to verity the existing account", + "providerId": "basic-flow", + "topLevel": false, + "builtIn": true, + "authenticationExecutions": [ + { + "authenticator": "idp-email-verification", + "authenticatorFlow": false, + "requirement": "ALTERNATIVE", + "priority": 10, + "userSetupAllowed": false, + "autheticatorFlow": false + }, + { + "authenticatorFlow": true, + "requirement": "ALTERNATIVE", + "priority": 20, + "flowAlias": "Verify Existing Account by Re-authentication", + "userSetupAllowed": false, + "autheticatorFlow": true + } + ] + }, + { + "id": "602533e3-f7a1-4e25-9a12-f3080eeccec3", + "alias": "Authentication Options", + "description": "Authentication options.", + "providerId": "basic-flow", + "topLevel": false, + "builtIn": true, + "authenticationExecutions": [ + { + "authenticator": "basic-auth", + "authenticatorFlow": false, + "requirement": "REQUIRED", + "priority": 10, + "userSetupAllowed": false, + "autheticatorFlow": false + }, + { + "authenticator": "basic-auth-otp", + "authenticatorFlow": false, + "requirement": "DISABLED", + "priority": 20, + "userSetupAllowed": false, + "autheticatorFlow": false + }, + { + "authenticator": "auth-spnego", + "authenticatorFlow": false, + "requirement": "DISABLED", + "priority": 30, + "userSetupAllowed": false, + "autheticatorFlow": false + } + ] + }, + { + "id": "ba7bcdfd-05c6-4da6-827b-24e3513bddbe", + "alias": "Browser - Conditional OTP", + "description": "Flow to determine if the OTP is required for the authentication", + "providerId": "basic-flow", + "topLevel": false, + "builtIn": true, + "authenticationExecutions": [ + { + "authenticator": "conditional-user-configured", + "authenticatorFlow": false, + "requirement": "REQUIRED", + "priority": 10, + "userSetupAllowed": false, + "autheticatorFlow": false + }, + { + "authenticator": "auth-otp-form", + "authenticatorFlow": false, + "requirement": "REQUIRED", + "priority": 20, + "userSetupAllowed": false, + "autheticatorFlow": false + } + ] + }, + { + "id": "d0f62327-ef2f-4561-8b5a-1f61faecdac0", + "alias": "Direct Grant - Conditional OTP", + "description": "Flow to determine if the OTP is required for the authentication", + "providerId": "basic-flow", + "topLevel": false, + "builtIn": true, + "authenticationExecutions": [ + { + "authenticator": "conditional-user-configured", + "authenticatorFlow": false, + "requirement": "REQUIRED", + "priority": 10, + "userSetupAllowed": false, + "autheticatorFlow": false + }, + { + "authenticator": "direct-grant-validate-otp", + "authenticatorFlow": false, + "requirement": "REQUIRED", + "priority": 20, + "userSetupAllowed": false, + "autheticatorFlow": false + } + ] + }, + { + "id": "f10b85d0-26ee-4648-b81b-80213b066d76", + "alias": "First broker login - Conditional OTP", + "description": "Flow to determine if the OTP is required for the authentication", + "providerId": "basic-flow", + "topLevel": false, + "builtIn": true, + "authenticationExecutions": [ + { + "authenticator": "conditional-user-configured", + "authenticatorFlow": false, + "requirement": "REQUIRED", + "priority": 10, + "userSetupAllowed": false, + "autheticatorFlow": false + }, + { + "authenticator": "auth-otp-form", + "authenticatorFlow": false, + "requirement": "REQUIRED", + "priority": 20, + "userSetupAllowed": false, + "autheticatorFlow": false + } + ] + }, + { + "id": "d6af4ac0-f6bc-4197-bf01-6e2c321ecaad", + "alias": "Handle Existing Account", + "description": "Handle what to do if there is existing account with same email/username like authenticated identity provider", + "providerId": "basic-flow", + "topLevel": false, + "builtIn": true, + "authenticationExecutions": [ + { + "authenticator": "idp-confirm-link", + "authenticatorFlow": false, + "requirement": "REQUIRED", + "priority": 10, + "userSetupAllowed": false, + "autheticatorFlow": false + }, + { + "authenticatorFlow": true, + "requirement": "REQUIRED", + "priority": 20, + "flowAlias": "Account verification options", + "userSetupAllowed": false, + "autheticatorFlow": true + } + ] + }, + { + "id": "501ab743-2e2f-427d-820f-14deed111b08", + "alias": "Reset - Conditional OTP", + "description": "Flow to determine if the OTP should be reset or not. Set to REQUIRED to force.", + "providerId": "basic-flow", + "topLevel": false, + "builtIn": true, + "authenticationExecutions": [ + { + "authenticator": "conditional-user-configured", + "authenticatorFlow": false, + "requirement": "REQUIRED", + "priority": 10, + "userSetupAllowed": false, + "autheticatorFlow": false + }, + { + "authenticator": "reset-otp", + "authenticatorFlow": false, + "requirement": "REQUIRED", + "priority": 20, + "userSetupAllowed": false, + "autheticatorFlow": false + } + ] + }, + { + "id": "e02c3a63-a09d-4dde-9f6c-22c95eef8534", + "alias": "User creation or linking", + "description": "Flow for the existing/non-existing user alternatives", + "providerId": "basic-flow", + "topLevel": false, + "builtIn": true, + "authenticationExecutions": [ + { + "authenticatorConfig": "create unique user config", + "authenticator": "idp-create-user-if-unique", + "authenticatorFlow": false, + "requirement": "ALTERNATIVE", + "priority": 10, + "userSetupAllowed": false, + "autheticatorFlow": false + }, + { + "authenticatorFlow": true, + "requirement": "ALTERNATIVE", + "priority": 20, + "flowAlias": "Handle Existing Account", + "userSetupAllowed": false, + "autheticatorFlow": true + } + ] + }, + { + "id": "c348906d-6266-4e68-937e-8f3d15c66524", + "alias": "Verify Existing Account by Re-authentication", + "description": "Reauthentication of existing account", + "providerId": "basic-flow", + "topLevel": false, + "builtIn": true, + "authenticationExecutions": [ + { + "authenticator": "idp-username-password-form", + "authenticatorFlow": false, + "requirement": "REQUIRED", + "priority": 10, + "userSetupAllowed": false, + "autheticatorFlow": false + }, + { + "authenticatorFlow": true, + "requirement": "CONDITIONAL", + "priority": 20, + "flowAlias": "First broker login - Conditional OTP", + "userSetupAllowed": false, + "autheticatorFlow": true + } + ] + }, + { + "id": "cf6ba166-43d5-4687-95c4-0a184ca08885", + "alias": "browser", + "description": "browser based authentication", + "providerId": "basic-flow", + "topLevel": true, + "builtIn": true, + "authenticationExecutions": [ + { + "authenticator": "auth-cookie", + "authenticatorFlow": false, + "requirement": "ALTERNATIVE", + "priority": 10, + "userSetupAllowed": false, + "autheticatorFlow": false + }, + { + "authenticator": "auth-spnego", + "authenticatorFlow": false, + "requirement": "DISABLED", + "priority": 20, + "userSetupAllowed": false, + "autheticatorFlow": false + }, + { + "authenticator": "identity-provider-redirector", + "authenticatorFlow": false, + "requirement": "ALTERNATIVE", + "priority": 25, + "userSetupAllowed": false, + "autheticatorFlow": false + }, + { + "authenticatorFlow": true, + "requirement": "ALTERNATIVE", + "priority": 30, + "flowAlias": "forms", + "userSetupAllowed": false, + "autheticatorFlow": true + } + ] + }, + { + "id": "87cb4f25-9275-4617-9e95-63adf1ce3ece", + "alias": "clients", + "description": "Base authentication for clients", + "providerId": "client-flow", + "topLevel": true, + "builtIn": true, + "authenticationExecutions": [ + { + "authenticator": "client-secret", + "authenticatorFlow": false, + "requirement": "ALTERNATIVE", + "priority": 10, + "userSetupAllowed": false, + "autheticatorFlow": false + }, + { + "authenticator": "client-jwt", + "authenticatorFlow": false, + "requirement": "ALTERNATIVE", + "priority": 20, + "userSetupAllowed": false, + "autheticatorFlow": false + }, + { + "authenticator": "client-secret-jwt", + "authenticatorFlow": false, + "requirement": "ALTERNATIVE", + "priority": 30, + "userSetupAllowed": false, + "autheticatorFlow": false + }, + { + "authenticator": "client-x509", + "authenticatorFlow": false, + "requirement": "ALTERNATIVE", + "priority": 40, + "userSetupAllowed": false, + "autheticatorFlow": false + } + ] + }, + { + "id": "e75b99c5-c566-4009-b0ba-c73716bed254", + "alias": "direct grant", + "description": "OpenID Connect Resource Owner Grant", + "providerId": "basic-flow", + "topLevel": true, + "builtIn": true, + "authenticationExecutions": [ + { + "authenticator": "direct-grant-validate-username", + "authenticatorFlow": false, + "requirement": "REQUIRED", + "priority": 10, + "userSetupAllowed": false, + "autheticatorFlow": false + }, + { + "authenticator": "direct-grant-validate-password", + "authenticatorFlow": false, + "requirement": "REQUIRED", + "priority": 20, + "userSetupAllowed": false, + "autheticatorFlow": false + }, + { + "authenticatorFlow": true, + "requirement": "CONDITIONAL", + "priority": 30, + "flowAlias": "Direct Grant - Conditional OTP", + "userSetupAllowed": false, + "autheticatorFlow": true + } + ] + }, + { + "id": "8a97380c-0f70-45cb-a7b0-780eb70453ba", + "alias": "docker auth", + "description": "Used by Docker clients to authenticate against the IDP", + "providerId": "basic-flow", + "topLevel": true, + "builtIn": true, + "authenticationExecutions": [ + { + "authenticator": "docker-http-basic-authenticator", + "authenticatorFlow": false, + "requirement": "REQUIRED", + "priority": 10, + "userSetupAllowed": false, + "autheticatorFlow": false + } + ] + }, + { + "id": "131e0aad-5422-4504-bafc-96be2fa44c34", + "alias": "first broker login", + "description": "Actions taken after first broker login with identity provider account, which is not yet linked to any Keycloak account", + "providerId": "basic-flow", + "topLevel": true, + "builtIn": true, + "authenticationExecutions": [ + { + "authenticatorConfig": "review profile config", + "authenticator": "idp-review-profile", + "authenticatorFlow": false, + "requirement": "REQUIRED", + "priority": 10, + "userSetupAllowed": false, + "autheticatorFlow": false + }, + { + "authenticatorFlow": true, + "requirement": "REQUIRED", + "priority": 20, + "flowAlias": "User creation or linking", + "userSetupAllowed": false, + "autheticatorFlow": true + } + ] + }, + { + "id": "e7d4b793-b3c2-4ec3-a2b1-04f7217e8f46", + "alias": "forms", + "description": "Username, password, otp and other auth forms.", + "providerId": "basic-flow", + "topLevel": false, + "builtIn": true, + "authenticationExecutions": [ + { + "authenticator": "auth-username-password-form", + "authenticatorFlow": false, + "requirement": "REQUIRED", + "priority": 10, + "userSetupAllowed": false, + "autheticatorFlow": false + }, + { + "authenticatorFlow": true, + "requirement": "CONDITIONAL", + "priority": 20, + "flowAlias": "Browser - Conditional OTP", + "userSetupAllowed": false, + "autheticatorFlow": true + } + ] + }, + { + "id": "f59a7688-61a1-4ac9-a13a-03f92e022add", + "alias": "http challenge", + "description": "An authentication flow based on challenge-response HTTP Authentication Schemes", + "providerId": "basic-flow", + "topLevel": true, + "builtIn": true, + "authenticationExecutions": [ + { + "authenticator": "no-cookie-redirect", + "authenticatorFlow": false, + "requirement": "REQUIRED", + "priority": 10, + "userSetupAllowed": false, + "autheticatorFlow": false + }, + { + "authenticatorFlow": true, + "requirement": "REQUIRED", + "priority": 20, + "flowAlias": "Authentication Options", + "userSetupAllowed": false, + "autheticatorFlow": true + } + ] + }, + { + "id": "80a7b0f5-abb3-4780-be58-4ed1dc3e50fa", + "alias": "registration", + "description": "registration flow", + "providerId": "basic-flow", + "topLevel": true, + "builtIn": true, + "authenticationExecutions": [ + { + "authenticator": "registration-page-form", + "authenticatorFlow": true, + "requirement": "REQUIRED", + "priority": 10, + "flowAlias": "registration form", + "userSetupAllowed": false, + "autheticatorFlow": true + } + ] + }, + { + "id": "f18231cf-b803-493b-9dd6-ee8fa602c861", + "alias": "registration form", + "description": "registration form", + "providerId": "form-flow", + "topLevel": false, + "builtIn": true, + "authenticationExecutions": [ + { + "authenticator": "registration-user-creation", + "authenticatorFlow": false, + "requirement": "REQUIRED", + "priority": 20, + "userSetupAllowed": false, + "autheticatorFlow": false + }, + { + "authenticator": "registration-profile-action", + "authenticatorFlow": false, + "requirement": "REQUIRED", + "priority": 40, + "userSetupAllowed": false, + "autheticatorFlow": false + }, + { + "authenticator": "registration-password-action", + "authenticatorFlow": false, + "requirement": "REQUIRED", + "priority": 50, + "userSetupAllowed": false, + "autheticatorFlow": false + }, + { + "authenticator": "registration-recaptcha-action", + "authenticatorFlow": false, + "requirement": "DISABLED", + "priority": 60, + "userSetupAllowed": false, + "autheticatorFlow": false + } + ] + }, + { + "id": "34ccfce6-1488-4db3-b90e-d98e8d8b2ae6", + "alias": "reset credentials", + "description": "Reset credentials for a user if they forgot their password or something", + "providerId": "basic-flow", + "topLevel": true, + "builtIn": true, + "authenticationExecutions": [ + { + "authenticator": "reset-credentials-choose-user", + "authenticatorFlow": false, + "requirement": "REQUIRED", + "priority": 10, + "userSetupAllowed": false, + "autheticatorFlow": false + }, + { + "authenticator": "reset-credential-email", + "authenticatorFlow": false, + "requirement": "REQUIRED", + "priority": 20, + "userSetupAllowed": false, + "autheticatorFlow": false + }, + { + "authenticator": "reset-password", + "authenticatorFlow": false, + "requirement": "REQUIRED", + "priority": 30, + "userSetupAllowed": false, + "autheticatorFlow": false + }, + { + "authenticatorFlow": true, + "requirement": "CONDITIONAL", + "priority": 40, + "flowAlias": "Reset - Conditional OTP", + "userSetupAllowed": false, + "autheticatorFlow": true + } + ] + }, + { + "id": "4468100c-fa83-4c16-8970-d53cb592f93a", + "alias": "saml ecp", + "description": "SAML ECP Profile Authentication Flow", + "providerId": "basic-flow", + "topLevel": true, + "builtIn": true, + "authenticationExecutions": [ + { + "authenticator": "http-basic-authenticator", + "authenticatorFlow": false, + "requirement": "REQUIRED", + "priority": 10, + "userSetupAllowed": false, + "autheticatorFlow": false + } + ] + } + ], + "authenticatorConfig": [ + { + "id": "c3bb087e-7fe9-4f13-b1bd-c2d7d1320054", + "alias": "create unique user config", + "config": { + "require.password.update.after.registration": "false" + } + }, + { + "id": "09820d9d-3c12-45f3-bc62-97b53f8a7efe", + "alias": "review profile config", + "config": { + "update.profile.on.first.login": "missing" + } + } + ], + "requiredActions": [ + { + "alias": "CONFIGURE_TOTP", + "name": "Configure OTP", + "providerId": "CONFIGURE_TOTP", + "enabled": true, + "defaultAction": false, + "priority": 10, + "config": {} + }, + { + "alias": "terms_and_conditions", + "name": "Terms and Conditions", + "providerId": "terms_and_conditions", + "enabled": false, + "defaultAction": false, + "priority": 20, + "config": {} + }, + { + "alias": "UPDATE_PASSWORD", + "name": "Update Password", + "providerId": "UPDATE_PASSWORD", + "enabled": true, + "defaultAction": false, + "priority": 30, + "config": {} + }, + { + "alias": "UPDATE_PROFILE", + "name": "Update Profile", + "providerId": "UPDATE_PROFILE", + "enabled": true, + "defaultAction": false, + "priority": 40, + "config": {} + }, + { + "alias": "VERIFY_EMAIL", + "name": "Verify Email", + "providerId": "VERIFY_EMAIL", + "enabled": true, + "defaultAction": false, + "priority": 50, + "config": {} + }, + { + "alias": "delete_account", + "name": "Delete Account", + "providerId": "delete_account", + "enabled": false, + "defaultAction": false, + "priority": 60, + "config": {} + }, + { + "alias": "update_user_locale", + "name": "Update User Locale", + "providerId": "update_user_locale", + "enabled": true, + "defaultAction": false, + "priority": 1000, + "config": {} + } + ], + "browserFlow": "browser", + "registrationFlow": "registration", + "directGrantFlow": "direct grant", + "resetCredentialsFlow": "reset credentials", + "clientAuthenticationFlow": "clients", + "dockerAuthenticationFlow": "docker auth", + "attributes": { + "cibaBackchannelTokenDeliveryMode": "poll", + "cibaExpiresIn": "120", + "cibaAuthRequestedUserHint": "login_hint", + "oauth2DeviceCodeLifespan": "600", + "clientOfflineSessionMaxLifespan": "0", + "oauth2DevicePollingInterval": "5", + "clientSessionIdleTimeout": "0", + "parRequestUriLifespan": "60", + "clientSessionMaxLifespan": "0", + "clientOfflineSessionIdleTimeout": "0", + "cibaInterval": "5" + }, + "keycloakVersion": "16.1.1", + "userManagedAccessAllowed": false, + "clientProfiles": { + "profiles": [] + }, + "clientPolicies": { + "policies": [] + } +} diff --git a/conf/keycloak/rm-keycloak.sh b/conf/keycloak/rm-keycloak.sh new file mode 100755 index 00000000000..ea29fbb37c0 --- /dev/null +++ b/conf/keycloak/rm-keycloak.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash + +if [ "$(docker ps -aq -f name=^/keycloak$)" ]; then + if [ "$(docker ps -aq -f status=running -f name=^/keycloak$)" ]; then + docker kill keycloak + fi + docker rm keycloak + echo "INFO - Keycloak container removed" +else + echo "INFO - No Keycloak container available to remove" +fi diff --git a/conf/keycloak/run-keycloak.sh b/conf/keycloak/run-keycloak.sh new file mode 100755 index 00000000000..effb37f91b8 --- /dev/null +++ b/conf/keycloak/run-keycloak.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash + +DOCKER_IMAGE="jboss/keycloak:16.1.1" +KEYCLOAK_USER="kcadmin" +KEYCLOAK_PASSWORD="kcpassword" +KEYCLOAK_PORT=8090 + +if [ ! "$(docker ps -q -f name=^/keycloak$)" ]; then + if [ "$(docker ps -aq -f status=exited -f name=^/keycloak$)" ]; then + echo "INFO - An exited Keycloak container already exists, restarting..." + docker start keycloak + echo "INFO - Keycloak container restarted" + else + docker run -d --name keycloak -p $KEYCLOAK_PORT:8080 -e KEYCLOAK_USER=$KEYCLOAK_USER -e KEYCLOAK_PASSWORD=$KEYCLOAK_PASSWORD -e KEYCLOAK_IMPORT=/tmp/oidc-realm.json -v "$(pwd)"/oidc-realm.json:/tmp/oidc-realm.json $DOCKER_IMAGE + echo "INFO - Keycloak container created and running" + fi +else + echo "INFO - Keycloak container is already running" +fi diff --git a/conf/solr/8.11.1/readme.md b/conf/solr/8.11.1/readme.md deleted file mode 100644 index 4457cf9a7df..00000000000 --- a/conf/solr/8.11.1/readme.md +++ /dev/null @@ -1 +0,0 @@ -Please see the dev guide for what to do with Solr config files. \ No newline at end of file diff --git a/conf/solr/8.11.1/schema.xml b/conf/solr/9.3.0/schema.xml similarity index 95% rename from conf/solr/8.11.1/schema.xml rename to conf/solr/9.3.0/schema.xml index 63312ab5d40..3c15b659c4e 100644 --- a/conf/solr/8.11.1/schema.xml +++ b/conf/solr/9.3.0/schema.xml @@ -23,7 +23,7 @@ For more information, on how to customize this file, please see - http://lucene.apache.org/solr/guide/documents-fields-and-schema-design.html + https://solr.apache.org/guide/solr/latest/indexing-guide/schema-elements.html PERFORMANCE NOTE: this schema includes many optional features and should not be used for benchmarking. To improve performance one could @@ -38,7 +38,7 @@ catchall "text" field, and use that for searching. --> - + - - - - - - - - - + + @@ -163,7 +156,7 @@ - + @@ -200,7 +193,7 @@ - + @@ -208,9 +201,9 @@ - + - + @@ -218,16 +211,24 @@ - + - + + + + + + + + + + + + RESTRICTION: the glob-like pattern in the name attribute must have a "*" + only at the start or the end. --> @@ -662,19 +660,23 @@ - + + + + + + + - - - - + + @@ -720,43 +722,6 @@ field first in an ascending sort and last in a descending sort. --> - - - - - - - - - - - - - - - - + - + + + + + + + + + + + + + + + + + + + + + diff --git a/conf/solr/8.11.1/solrconfig.xml b/conf/solr/9.3.0/solrconfig.xml similarity index 67% rename from conf/solr/8.11.1/solrconfig.xml rename to conf/solr/9.3.0/solrconfig.xml index 3e4e5adc7b6..b89315cdaa9 100644 --- a/conf/solr/8.11.1/solrconfig.xml +++ b/conf/solr/9.3.0/solrconfig.xml @@ -1,1410 +1,1176 @@ - - - - - - - - - 7.3.0 - - - - - - - - - - - - - - - - - - - - ${solr.data.dir:} - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ${solr.lock.type:native} - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ${solr.ulog.dir:} - ${solr.ulog.numVersionBuckets:65536} - - - - - ${solr.autoCommit.maxTime:15000} - false - - - - - - ${solr.autoSoftCommit.maxTime:-1} - - - - - - - - - - - - - - 1024 - - - - - - - - - - - - - - - - - - - - - - - - true - - - - - - 20 - - - 200 - - - - - - - - - - - - - - - - false - - - - - - - - - - - - - - - - - - - - - - explicit - 10 - edismax - 0.075 - - dvName^400 - authorName^180 - dvSubject^190 - dvDescription^180 - dvAffiliation^170 - title^130 - subject^120 - keyword^110 - topicClassValue^100 - dsDescriptionValue^90 - authorAffiliation^80 - publicationCitation^60 - producerName^50 - fileName^30 - fileDescription^30 - variableLabel^20 - variableName^10 - _text_^1.0 - - - dvName^200 - authorName^100 - dvSubject^100 - dvDescription^100 - dvAffiliation^100 - title^75 - subject^75 - keyword^75 - topicClassValue^75 - dsDescriptionValue^75 - authorAffiliation^75 - publicationCitation^75 - producerName^75 - - - - isHarvested:false^25000 - - - - - - - - - - - - - - - - - - explicit - json - true - - - - - - - - explicit - - - - - - _text_ - - - - - - - true - ignored_ - _text_ - - - - - - - - - text_general - - - - - - default - _text_ - solr.DirectSolrSpellChecker - - internal - - 0.5 - - 2 - - 1 - - 5 - - 4 - - 0.01 - - - - - - - - - - - - default - on - true - 10 - 5 - 5 - true - true - 10 - 5 - - - spellcheck - - - - - - - - - - true - - - tvComponent - - - - - - - - - - - - true - false - - - terms - - - - - - - - string - - - - - - explicit - - - elevator - - - - - - - - - - - 100 - - - - - - - - 70 - - 0.5 - - [-\w ,/\n\"']{20,200} - - - - - - - ]]> - ]]> - - - - - - - - - - - - - - - - - - - - - - - - ,, - ,, - ,, - ,, - ,]]> - ]]> - - - - - - 10 - .,!? - - - - - - - WORD - - - en - US - - - - - - - - - - - - - - [^\w-\.] - _ - - - - - - - yyyy-MM-dd'T'HH:mm:ss.SSSZ - yyyy-MM-dd'T'HH:mm:ss,SSSZ - yyyy-MM-dd'T'HH:mm:ss.SSS - yyyy-MM-dd'T'HH:mm:ss,SSS - yyyy-MM-dd'T'HH:mm:ssZ - yyyy-MM-dd'T'HH:mm:ss - yyyy-MM-dd'T'HH:mmZ - yyyy-MM-dd'T'HH:mm - yyyy-MM-dd HH:mm:ss.SSSZ - yyyy-MM-dd HH:mm:ss,SSSZ - yyyy-MM-dd HH:mm:ss.SSS - yyyy-MM-dd HH:mm:ss,SSS - yyyy-MM-dd HH:mm:ssZ - yyyy-MM-dd HH:mm:ss - yyyy-MM-dd HH:mmZ - yyyy-MM-dd HH:mm - yyyy-MM-dd - - - - - - - - - - - - - - - - - - - - - - - - - - - - - text/plain; charset=UTF-8 - - - - - ${velocity.template.base.dir:} - ${velocity.solr.resource.loader.enabled:true} - ${velocity.params.resource.loader.enabled:false} - - - - - 5 - - - - - - - - - - - - - - + + + + + + + + + 9.7 + + + + + + + + + + + ${solr.data.dir:} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ${solr.lock.type:native} + + + + + + + + + + + + + + + + + + + + + ${solr.ulog.dir:} + ${solr.ulog.numVersionBuckets:65536} + + + + + ${solr.autoCommit.maxTime:15000} + false + + + + + + ${solr.autoSoftCommit.maxTime:-1} + + + + + + + + + + + + + + ${solr.max.booleanClauses:1024} + + + + + + + + + + + + + + + + + + + + + + + + true + + + + + + 20 + + + 200 + + + + + + + + + + + + + + + + + + + false + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + explicit + 10 + + edismax + 0.075 + + dvName^400 + authorName^180 + dvSubject^190 + dvDescription^180 + dvAffiliation^170 + title^130 + subject^120 + keyword^110 + topicClassValue^100 + dsDescriptionValue^90 + authorAffiliation^80 + publicationCitation^60 + producerName^50 + fileName^30 + fileDescription^30 + variableLabel^20 + variableName^10 + _text_^1.0 + + + dvName^200 + authorName^100 + dvSubject^100 + dvDescription^100 + dvAffiliation^100 + title^75 + subject^75 + keyword^75 + topicClassValue^75 + dsDescriptionValue^75 + authorAffiliation^75 + publicationCitation^75 + producerName^75 + + + + isHarvested:false^25000 + + + + + + + + explicit + json + true + + + + + + + _text_ + + + + + + + text_general + + + + + + default + _text_ + solr.DirectSolrSpellChecker + + internal + + 0.5 + + 2 + + 1 + + 5 + + 4 + + 0.01 + + + + + + + + + + + + + + + + + + + + + 100 + + + + + + + + 70 + + 0.5 + + [-\w ,/\n\"']{20,200} + + + + + + + ]]> + ]]> + + + + + + + + + + + + + + + + + + + + + + + + ,, + ,, + ,, + ,, + ,]]> + ]]> + + + + + + 10 + .,!? + + + + + + + WORD + + + en + US + + + + + + + + + + + + + [^\w-\.] + _ + + + + + + + yyyy-MM-dd['T'[HH:mm[:ss[.SSS]][z + yyyy-MM-dd['T'[HH:mm[:ss[,SSS]][z + yyyy-MM-dd HH:mm[:ss[.SSS]][z + yyyy-MM-dd HH:mm[:ss[,SSS]][z + [EEE, ]dd MMM yyyy HH:mm[:ss] z + EEEE, dd-MMM-yy HH:mm:ss z + EEE MMM ppd HH:mm:ss [z ]yyyy + + + + + java.lang.String + text_general + + *_str + 256 + + + true + + + java.lang.Boolean + booleans + + + java.util.Date + pdates + + + java.lang.Long + java.lang.Integer + plongs + + + java.lang.Number + pdoubles + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/conf/solr/8.11.1/update-fields.sh b/conf/solr/9.3.0/update-fields.sh similarity index 98% rename from conf/solr/8.11.1/update-fields.sh rename to conf/solr/9.3.0/update-fields.sh index 49ea8151c77..386c1ee4e87 100755 --- a/conf/solr/8.11.1/update-fields.sh +++ b/conf/solr/9.3.0/update-fields.sh @@ -2,6 +2,8 @@ set -euo pipefail +# [INFO]: Update a prepared Solr schema.xml for Dataverse with a given list of metadata fields + #### #### #### #### #### #### #### #### #### #### #### #### #### #### #### #### #### #### # This script will # 1. take a file (or read it from STDIN) with all and definitions diff --git a/conf/vagrant/etc/shibboleth/attribute-map.xml b/conf/vagrant/etc/shibboleth/attribute-map.xml deleted file mode 100644 index f6386b620f5..00000000000 --- a/conf/vagrant/etc/shibboleth/attribute-map.xml +++ /dev/null @@ -1,141 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/conf/vagrant/etc/shibboleth/dataverse-idp-metadata.xml b/conf/vagrant/etc/shibboleth/dataverse-idp-metadata.xml deleted file mode 100644 index 67225b5e670..00000000000 --- a/conf/vagrant/etc/shibboleth/dataverse-idp-metadata.xml +++ /dev/null @@ -1,298 +0,0 @@ - - - - - - - - - - - - - - - - - - - - testshib.org - - TestShib Test IdP - TestShib IdP. Use this as a source of attributes - for your test SP. - https://www.testshib.org/images/testshib-transp.png - - - - - - - - MIIEDjCCAvagAwIBAgIBADANBgkqhkiG9w0BAQUFADBnMQswCQYDVQQGEwJVUzEV - MBMGA1UECBMMUGVubnN5bHZhbmlhMRMwEQYDVQQHEwpQaXR0c2J1cmdoMREwDwYD - VQQKEwhUZXN0U2hpYjEZMBcGA1UEAxMQaWRwLnRlc3RzaGliLm9yZzAeFw0wNjA4 - MzAyMTEyMjVaFw0xNjA4MjcyMTEyMjVaMGcxCzAJBgNVBAYTAlVTMRUwEwYDVQQI - EwxQZW5uc3lsdmFuaWExEzARBgNVBAcTClBpdHRzYnVyZ2gxETAPBgNVBAoTCFRl - c3RTaGliMRkwFwYDVQQDExBpZHAudGVzdHNoaWIub3JnMIIBIjANBgkqhkiG9w0B - AQEFAAOCAQ8AMIIBCgKCAQEArYkCGuTmJp9eAOSGHwRJo1SNatB5ZOKqDM9ysg7C - yVTDClcpu93gSP10nH4gkCZOlnESNgttg0r+MqL8tfJC6ybddEFB3YBo8PZajKSe - 3OQ01Ow3yT4I+Wdg1tsTpSge9gEz7SrC07EkYmHuPtd71CHiUaCWDv+xVfUQX0aT - NPFmDixzUjoYzbGDrtAyCqA8f9CN2txIfJnpHE6q6CmKcoLADS4UrNPlhHSzd614 - kR/JYiks0K4kbRqCQF0Dv0P5Di+rEfefC6glV8ysC8dB5/9nb0yh/ojRuJGmgMWH - gWk6h0ihjihqiu4jACovUZ7vVOCgSE5Ipn7OIwqd93zp2wIDAQABo4HEMIHBMB0G - A1UdDgQWBBSsBQ869nh83KqZr5jArr4/7b+QazCBkQYDVR0jBIGJMIGGgBSsBQ86 - 9nh83KqZr5jArr4/7b+Qa6FrpGkwZzELMAkGA1UEBhMCVVMxFTATBgNVBAgTDFBl - bm5zeWx2YW5pYTETMBEGA1UEBxMKUGl0dHNidXJnaDERMA8GA1UEChMIVGVzdFNo - aWIxGTAXBgNVBAMTEGlkcC50ZXN0c2hpYi5vcmeCAQAwDAYDVR0TBAUwAwEB/zAN - BgkqhkiG9w0BAQUFAAOCAQEAjR29PhrCbk8qLN5MFfSVk98t3CT9jHZoYxd8QMRL - I4j7iYQxXiGJTT1FXs1nd4Rha9un+LqTfeMMYqISdDDI6tv8iNpkOAvZZUosVkUo - 93pv1T0RPz35hcHHYq2yee59HJOco2bFlcsH8JBXRSRrJ3Q7Eut+z9uo80JdGNJ4 - /SJy5UorZ8KazGj16lfJhOBXldgrhppQBb0Nq6HKHguqmwRfJ+WkxemZXzhediAj - Geka8nz8JjwxpUjAiSWYKLtJhGEaTqCYxCCX2Dw+dOTqUzHOZ7WKv4JXPK5G/Uhr - 8K/qhmFT2nIQi538n6rVYLeWj8Bbnl+ev0peYzxFyF5sQA== - - - - - - - - - - - - - - - urn:mace:shibboleth:1.0:nameIdentifier - urn:oasis:names:tc:SAML:2.0:nameid-format:transient - - - - - - - - - - - - - - - - MIIEDjCCAvagAwIBAgIBADANBgkqhkiG9w0BAQUFADBnMQswCQYDVQQGEwJVUzEV - MBMGA1UECBMMUGVubnN5bHZhbmlhMRMwEQYDVQQHEwpQaXR0c2J1cmdoMREwDwYD - VQQKEwhUZXN0U2hpYjEZMBcGA1UEAxMQaWRwLnRlc3RzaGliLm9yZzAeFw0wNjA4 - MzAyMTEyMjVaFw0xNjA4MjcyMTEyMjVaMGcxCzAJBgNVBAYTAlVTMRUwEwYDVQQI - EwxQZW5uc3lsdmFuaWExEzARBgNVBAcTClBpdHRzYnVyZ2gxETAPBgNVBAoTCFRl - c3RTaGliMRkwFwYDVQQDExBpZHAudGVzdHNoaWIub3JnMIIBIjANBgkqhkiG9w0B - AQEFAAOCAQ8AMIIBCgKCAQEArYkCGuTmJp9eAOSGHwRJo1SNatB5ZOKqDM9ysg7C - yVTDClcpu93gSP10nH4gkCZOlnESNgttg0r+MqL8tfJC6ybddEFB3YBo8PZajKSe - 3OQ01Ow3yT4I+Wdg1tsTpSge9gEz7SrC07EkYmHuPtd71CHiUaCWDv+xVfUQX0aT - NPFmDixzUjoYzbGDrtAyCqA8f9CN2txIfJnpHE6q6CmKcoLADS4UrNPlhHSzd614 - kR/JYiks0K4kbRqCQF0Dv0P5Di+rEfefC6glV8ysC8dB5/9nb0yh/ojRuJGmgMWH - gWk6h0ihjihqiu4jACovUZ7vVOCgSE5Ipn7OIwqd93zp2wIDAQABo4HEMIHBMB0G - A1UdDgQWBBSsBQ869nh83KqZr5jArr4/7b+QazCBkQYDVR0jBIGJMIGGgBSsBQ86 - 9nh83KqZr5jArr4/7b+Qa6FrpGkwZzELMAkGA1UEBhMCVVMxFTATBgNVBAgTDFBl - bm5zeWx2YW5pYTETMBEGA1UEBxMKUGl0dHNidXJnaDERMA8GA1UEChMIVGVzdFNo - aWIxGTAXBgNVBAMTEGlkcC50ZXN0c2hpYi5vcmeCAQAwDAYDVR0TBAUwAwEB/zAN - BgkqhkiG9w0BAQUFAAOCAQEAjR29PhrCbk8qLN5MFfSVk98t3CT9jHZoYxd8QMRL - I4j7iYQxXiGJTT1FXs1nd4Rha9un+LqTfeMMYqISdDDI6tv8iNpkOAvZZUosVkUo - 93pv1T0RPz35hcHHYq2yee59HJOco2bFlcsH8JBXRSRrJ3Q7Eut+z9uo80JdGNJ4 - /SJy5UorZ8KazGj16lfJhOBXldgrhppQBb0Nq6HKHguqmwRfJ+WkxemZXzhediAj - Geka8nz8JjwxpUjAiSWYKLtJhGEaTqCYxCCX2Dw+dOTqUzHOZ7WKv4JXPK5G/Uhr - 8K/qhmFT2nIQi538n6rVYLeWj8Bbnl+ev0peYzxFyF5sQA== - - - - - - - - - - - - - - - - urn:mace:shibboleth:1.0:nameIdentifier - urn:oasis:names:tc:SAML:2.0:nameid-format:transient - - - - - TestShib Two Identity Provider - TestShib Two - http://www.testshib.org/testshib-two/ - - - Nate - Klingenstein - ndk@internet2.edu - - - - - - - - - - - - - - - - - - - - - - - - - TestShib Test SP - TestShib SP. Log into this to test your machine. - Once logged in check that all attributes that you expected have been - released. - https://www.testshib.org/images/testshib-transp.png - - - - - - - - MIIEPjCCAyagAwIBAgIBADANBgkqhkiG9w0BAQUFADB3MQswCQYDVQQGEwJVUzEV - MBMGA1UECBMMUGVubnN5bHZhbmlhMRMwEQYDVQQHEwpQaXR0c2J1cmdoMSIwIAYD - VQQKExlUZXN0U2hpYiBTZXJ2aWNlIFByb3ZpZGVyMRgwFgYDVQQDEw9zcC50ZXN0 - c2hpYi5vcmcwHhcNMDYwODMwMjEyNDM5WhcNMTYwODI3MjEyNDM5WjB3MQswCQYD - VQQGEwJVUzEVMBMGA1UECBMMUGVubnN5bHZhbmlhMRMwEQYDVQQHEwpQaXR0c2J1 - cmdoMSIwIAYDVQQKExlUZXN0U2hpYiBTZXJ2aWNlIFByb3ZpZGVyMRgwFgYDVQQD - Ew9zcC50ZXN0c2hpYi5vcmcwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIB - AQDJyR6ZP6MXkQ9z6RRziT0AuCabDd3x1m7nLO9ZRPbr0v1LsU+nnC363jO8nGEq - sqkgiZ/bSsO5lvjEt4ehff57ERio2Qk9cYw8XCgmYccVXKH9M+QVO1MQwErNobWb - AjiVkuhWcwLWQwTDBowfKXI87SA7KR7sFUymNx5z1aoRvk3GM++tiPY6u4shy8c7 - vpWbVfisfTfvef/y+galxjPUQYHmegu7vCbjYP3On0V7/Ivzr+r2aPhp8egxt00Q - XpilNai12LBYV3Nv/lMsUzBeB7+CdXRVjZOHGuQ8mGqEbsj8MBXvcxIKbcpeK5Zi - JCVXPfarzuriM1G5y5QkKW+LAgMBAAGjgdQwgdEwHQYDVR0OBBYEFKB6wPDxwYrY - StNjU5P4b4AjBVQVMIGhBgNVHSMEgZkwgZaAFKB6wPDxwYrYStNjU5P4b4AjBVQV - oXukeTB3MQswCQYDVQQGEwJVUzEVMBMGA1UECBMMUGVubnN5bHZhbmlhMRMwEQYD - VQQHEwpQaXR0c2J1cmdoMSIwIAYDVQQKExlUZXN0U2hpYiBTZXJ2aWNlIFByb3Zp - ZGVyMRgwFgYDVQQDEw9zcC50ZXN0c2hpYi5vcmeCAQAwDAYDVR0TBAUwAwEB/zAN - BgkqhkiG9w0BAQUFAAOCAQEAc06Kgt7ZP6g2TIZgMbFxg6vKwvDL0+2dzF11Onpl - 5sbtkPaNIcj24lQ4vajCrrGKdzHXo9m54BzrdRJ7xDYtw0dbu37l1IZVmiZr12eE - Iay/5YMU+aWP1z70h867ZQ7/7Y4HW345rdiS6EW663oH732wSYNt9kr7/0Uer3KD - 9CuPuOidBacospDaFyfsaJruE99Kd6Eu/w5KLAGG+m0iqENCziDGzVA47TngKz2v - PVA+aokoOyoz3b53qeti77ijatSEoKjxheBWpO+eoJeGq/e49Um3M2ogIX/JAlMa - Inh+vYSYngQB2sx9LGkR9KHaMKNIGCDehk93Xla4pWJx1w== - - - - - - - - - - - - - - - - - - - - - urn:oasis:names:tc:SAML:2.0:nameid-format:transient - urn:mace:shibboleth:1.0:nameIdentifier - - - - - - - - - - - - - - - - - - - - TestShib Two Service Provider - TestShib Two - http://www.testshib.org/testshib-two/ - - - Nate - Klingenstein - ndk@internet2.edu - - - - - - - diff --git a/conf/vagrant/etc/shibboleth/shibboleth2.xml b/conf/vagrant/etc/shibboleth/shibboleth2.xml deleted file mode 100644 index 946e73bdf6a..00000000000 --- a/conf/vagrant/etc/shibboleth/shibboleth2.xml +++ /dev/null @@ -1,85 +0,0 @@ - - - - - - - - - - - - - - - - - - - - SAML2 SAML1 - - - - SAML2 Local - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/conf/vagrant/etc/yum.repos.d/epel-apache-maven.repo b/conf/vagrant/etc/yum.repos.d/epel-apache-maven.repo deleted file mode 100644 index 1e0f8200040..00000000000 --- a/conf/vagrant/etc/yum.repos.d/epel-apache-maven.repo +++ /dev/null @@ -1,15 +0,0 @@ -# Place this file in your /etc/yum.repos.d/ directory - -[epel-apache-maven] -name=maven from apache foundation. -baseurl=http://repos.fedorapeople.org/repos/dchen/apache-maven/epel-$releasever/$basearch/ -enabled=1 -skip_if_unavailable=1 -gpgcheck=0 - -[epel-apache-maven-source] -name=maven from apache foundation. - Source -baseurl=http://repos.fedorapeople.org/repos/dchen/apache-maven/epel-$releasever/SRPMS -enabled=0 -skip_if_unavailable=1 -gpgcheck=0 diff --git a/conf/vagrant/etc/yum.repos.d/shibboleth.repo b/conf/vagrant/etc/yum.repos.d/shibboleth.repo deleted file mode 100644 index adf42185d8a..00000000000 --- a/conf/vagrant/etc/yum.repos.d/shibboleth.repo +++ /dev/null @@ -1,9 +0,0 @@ -[shibboleth] -name=Shibboleth (rockylinux8) -# Please report any problems to https://shibboleth.atlassian.net/jira -type=rpm-md -mirrorlist=https://shibboleth.net/cgi-bin/mirrorlist.cgi/rockylinux8 -gpgcheck=1 -gpgkey=https://shibboleth.net/downloads/service-provider/RPMS/repomd.xml.key - https://shibboleth.net/downloads/service-provider/RPMS/cantor.repomd.xml.key -enabled=1 diff --git a/conf/vagrant/var/lib/pgsql/data/pg_hba.conf b/conf/vagrant/var/lib/pgsql/data/pg_hba.conf deleted file mode 100644 index e3244686066..00000000000 --- a/conf/vagrant/var/lib/pgsql/data/pg_hba.conf +++ /dev/null @@ -1,74 +0,0 @@ -# PostgreSQL Client Authentication Configuration File -# =================================================== -# -# Refer to the "Client Authentication" section in the -# PostgreSQL documentation for a complete description -# of this file. A short synopsis follows. -# -# This file controls: which hosts are allowed to connect, how clients -# are authenticated, which PostgreSQL user names they can use, which -# databases they can access. Records take one of these forms: -# -# local DATABASE USER METHOD [OPTIONS] -# host DATABASE USER CIDR-ADDRESS METHOD [OPTIONS] -# hostssl DATABASE USER CIDR-ADDRESS METHOD [OPTIONS] -# hostnossl DATABASE USER CIDR-ADDRESS METHOD [OPTIONS] -# -# (The uppercase items must be replaced by actual values.) -# -# The first field is the connection type: "local" is a Unix-domain socket, -# "host" is either a plain or SSL-encrypted TCP/IP socket, "hostssl" is an -# SSL-encrypted TCP/IP socket, and "hostnossl" is a plain TCP/IP socket. -# -# DATABASE can be "all", "sameuser", "samerole", a database name, or -# a comma-separated list thereof. -# -# USER can be "all", a user name, a group name prefixed with "+", or -# a comma-separated list thereof. In both the DATABASE and USER fields -# you can also write a file name prefixed with "@" to include names from -# a separate file. -# -# CIDR-ADDRESS specifies the set of hosts the record matches. -# It is made up of an IP address and a CIDR mask that is an integer -# (between 0 and 32 (IPv4) or 128 (IPv6) inclusive) that specifies -# the number of significant bits in the mask. Alternatively, you can write -# an IP address and netmask in separate columns to specify the set of hosts. -# -# METHOD can be "trust", "reject", "md5", "password", "gss", "sspi", "krb5", -# "ident", "pam", "ldap" or "cert". Note that "password" sends passwords -# in clear text; "md5" is preferred since it sends encrypted passwords. -# -# OPTIONS are a set of options for the authentication in the format -# NAME=VALUE. The available options depend on the different authentication -# methods - refer to the "Client Authentication" section in the documentation -# for a list of which options are available for which authentication methods. -# -# Database and user names containing spaces, commas, quotes and other special -# characters must be quoted. Quoting one of the keywords "all", "sameuser" or -# "samerole" makes the name lose its special character, and just match a -# database or username with that name. -# -# This file is read on server startup and when the postmaster receives -# a SIGHUP signal. If you edit the file on a running system, you have -# to SIGHUP the postmaster for the changes to take effect. You can use -# "pg_ctl reload" to do that. - -# Put your actual configuration here -# ---------------------------------- -# -# If you want to allow non-local connections, you need to add more -# "host" records. In that case you will also need to make PostgreSQL listen -# on a non-local interface via the listen_addresses configuration parameter, -# or via the -i or -h command line switches. -# - - - -# TYPE DATABASE USER CIDR-ADDRESS METHOD - -# "local" is for Unix domain socket connections only -local all all trust -# IPv4 local connections: -host all all 127.0.0.1/32 trust -# IPv6 local connections: -host all all ::1/128 trust diff --git a/conf/vagrant/var/www/dataverse/error-documents/503.html b/conf/vagrant/var/www/dataverse/error-documents/503.html deleted file mode 100644 index 95a7dea4107..00000000000 --- a/conf/vagrant/var/www/dataverse/error-documents/503.html +++ /dev/null @@ -1 +0,0 @@ -

Custom "site is unavailable" 503 page.

diff --git a/doc/JAVADOC_GUIDE.md b/doc/JAVADOC_GUIDE.md index 8001abda248..997c40e1624 100644 --- a/doc/JAVADOC_GUIDE.md +++ b/doc/JAVADOC_GUIDE.md @@ -88,7 +88,7 @@ Here's a better approach: /** The dataverse we move the dataset from */ private Dataverse sourceDataverse; - /** The dataverse we movet the dataset to */ + /** The dataverse we move the dataset to */ private Dataverse destinationDataverse; diff --git a/doc/mergeParty/readme.md b/doc/mergeParty/readme.md index 061673fffa0..6f3af8511dc 100644 --- a/doc/mergeParty/readme.md +++ b/doc/mergeParty/readme.md @@ -73,10 +73,10 @@ Note that before we were asking `isGuest` and now we ask `isAuthenticated`, so t ## Other Added Things ### Settings bean -Settings (in `edu.harvard.iq.dataverse.settings`) are where the application stores its more complex, admin-editable configuration. Technically, its a persistent `Map`, that can be accessed via API (`edu.harvard.iq.dataverse.api.Admin`, on path `{server}/api/s/settings`). Currenly used for the signup mechanism. +Settings (in `edu.harvard.iq.dataverse.settings`) are where the application stores its more complex, admin-editable configuration. Technically, its a persistent `Map`, that can be accessed via API (`edu.harvard.iq.dataverse.api.Admin`, on path `{server}/api/s/settings`). Currently used for the signup mechanism. ### Admin API -Accessible under url `{server}/api/s/`, API calls to this bean should be editing confugurations, allowing full indexing and more. The idea behing putting all of them under the `/s/` path is that we can later block these calls using a filter. This way, we could, say, allow access from localhost only. Or, we could block this completely based on some environemnt variable. +Accessible under url `{server}/api/s/`, API calls to this bean should be editing configurations, allowing full indexing and more. The idea behind putting all of them under the `/s/` path is that we can later block these calls using a filter. This way, we could, say, allow access from localhost only. Or, we could block this completely based on some environment variable. ### `setup-all.sh` script A new script that sets up the users and the dataverses, sets the system up for built-in signup, and then indexes the dataverses using solr. Requires the [jq utility](http://stedolan.github.io/jq/). On Macs with [homebrew](http://brew.sh) installed, getting this utility is a `brew install jq` command away. diff --git a/doc/release-notes/5.12.1-release-notes.md b/doc/release-notes/5.12.1-release-notes.md new file mode 100644 index 00000000000..aa8896660f3 --- /dev/null +++ b/doc/release-notes/5.12.1-release-notes.md @@ -0,0 +1,115 @@ +# Dataverse Software 5.12.1 + +This release brings new features, enhancements, and bug fixes to the Dataverse Software. Thank you to all of the community members who contributed code, suggestions, bug reports, and other assistance across the project. + +## Release Highlights + +### Bug Fix for "Internal Server Error" When Creating a New Remote Account + +Unfortunately, as of 5.11 new remote users have seen "Internal Server Error" when creating an account (or checking notifications just after creating an account). Remote users are those who log in with institutional (Shibboleth), OAuth (ORCID, GitHub, or Google) or OIDC providers. + +This is a transient error that can be worked around by reloading the browser (or logging out and back in again) but it's obviously a very poor user experience and a bad first impression. This bug is the primary reason we are putting out this patch release. Other features and bug fixes are coming along for the ride. + +### Ability to Disable OAuth Sign Up While Allowing Existing Accounts to Log In + +A new option called `:AllowRemoteAuthSignUp` has been added providing a mechanism for disabling new account signups for specific OAuth2 authentication providers (Orcid, GitHub, Google etc.) while still allowing logins for already-existing accounts using this authentication method. + +See the [Installation Guide](https://guides.dataverse.org/en/5.12.1/installation/config.html#allowremoteauthsignup) for more information on the setting. + +### Production Date Now Used for Harvested Datasets in Addition to Distribution Date (`oai_dc` format) + +Fix the year displayed in citation for harvested dataset, especially for `oai_dc` format. + +For normal datasets, the date used is the "citation date" which is by default the publication date (the first release date) unless you [change it](https://guides.dataverse.org/en/5.12.1/api/native-api.html#set-citation-date-field-type-for-a-dataset). + +However, for a harvested dataset, the distribution date was used instead and this date is not always present in the harvested metadata. + +Now, the production date is used for harvested dataset in addition to distribution date when harvesting with the `oai_dc` format. + +### Publication Date Now Used for Harvested Dataset if Production Date is Not Set (`oai_dc` format) + +For exports and harvesting in `oai_dc` format, if "Production Date" is not set, "Publication Date" is now used instead. This change is reflected in the [Dataverse 4+ Metadata Crosswalk][] linked from the [Appendix][] of the User Guide. + +[Dataverse 4+ Metadata Crosswalk]: https://docs.google.com/spreadsheets/d/10Luzti7svVTVKTA-px27oq3RxCUM-QbiTkm8iMd5C54/edit#gid=1901625433&range=K7 +[Appendix]: https://guides.dataverse.org/en/5.12.1/user/appendix.html + +## Major Use Cases and Infrastructure Enhancements + +Changes and fixes in this release include: + +- Users creating an account by logging in with Shibboleth, OAuth, or OIDC should not see errors. (Issue 9029, PR #9030) +- When harvesting datasets, I want the Production Date if I can't get the Distribution Date (PR #8732) +- When harvesting datasets, I want the Publication Date if I can't get the Production Date (PR #8733) +- As a sysadmin I'd like to disable (temporarily or permanently) sign ups from OAuth providers while allowing existing users to continue to log in from that provider (PR #9112) +- As a C/C++ developer I want to use Dataverse APIs (PR #9070) + +## New DB Settings + +The following DB settings have been added: + +- `:AllowRemoteAuthSignUp` + +See the [Database Settings](https://guides.dataverse.org/en/5.12.1/installation/config.html#database-settings) section of the Guides for more information. + +## Complete List of Changes + +For the complete list of code changes in this release, see the [5.12.1 Milestone](https://github.com/IQSS/dataverse/milestone/106?closed=1) in GitHub. + +For help with upgrading, installing, or general questions please post to the [Dataverse Community Google Group](https://groups.google.com/forum/#!forum/dataverse-community) or email support@dataverse.org. + +## Installation + +If this is a new installation, please see our [Installation Guide](https://guides.dataverse.org/en/5.12.1/installation/). Please also contact us to get added to the [Dataverse Project Map](https://guides.dataverse.org/en/5.10/installation/config.html#putting-your-dataverse-installation-on-the-map-at-dataverse-org) if you have not done so already. + +## Upgrade Instructions + +Upgrading requires a maintenance window and downtime. Please plan ahead, create backups of your database, etc. + +0\. These instructions assume that you've already successfully upgraded from Dataverse Software 4.x to Dataverse Software 5 following the instructions in the [Dataverse Software 5 Release Notes](https://github.com/IQSS/dataverse/releases/tag/v5.0). After upgrading from the 4.x series to 5.0, you should progress through the other 5.x releases before attempting the upgrade to 5.12.1. + +If you are running Payara as a non-root user (and you should be!), **remember not to execute the commands below as root**. Use `sudo` to change to that user first. For example, `sudo -i -u dataverse` if `dataverse` is your dedicated application user. + +```shell +export PAYARA=/usr/local/payara5 +``` + +(or `setenv PAYARA /usr/local/payara5` if you are using a `csh`-like shell) + +1\. Undeploy the previous version + +```shell + $PAYARA/bin/asadmin list-applications + $PAYARA/bin/asadmin undeploy dataverse<-version> +``` + +2\. Stop Payara + +```shell + service payara stop + rm -rf $PAYARA/glassfish/domains/domain1/generated +``` + +6\. Start Payara + +```shell + service payara start +``` + +7\. Deploy this version. + +```shell + $PAYARA/bin/asadmin deploy dataverse-5.12.1.war +``` + +8\. Restart payara + +```shell + service payara stop + service payara start +``` + +## Upcoming Versions of Payara + +With the recent release of Payara 6 ([Payara 6.2022.1](https://github.com/payara/Payara/releases/tag/payara-server-6.2022.1) being the first version), the days of free-to-use Payara 5.x Platform Community versions [are numbered](https://blog.payara.fish/whats-new-in-the-november-2022-payara-platform-release). Specifically, Payara's blog post says, "Payara Platform Community 5.2022.4 has been released today as the penultimate Payara 5 Community release." + +Given the end of free-to-use Payara 5 versions, we plan to get the Dataverse software working on Payara 6 (#8305), which will require substantial efforts from the IQSS team and community members, as this also means shifting our app to be a [Jakarta EE 10](https://jakarta.ee/release/10/) application (upgrading from EE 8). We are currently working out the details and will share news as soon as we can. Rest assured we will do our best to provide you with a smooth transition. You can follow along in Issue #8305 and related pull requests and you are, of course, very welcome to participate by testing and otherwise contributing, as always. diff --git a/doc/release-notes/5.13-release-notes.md b/doc/release-notes/5.13-release-notes.md new file mode 100644 index 00000000000..5e1741aec7e --- /dev/null +++ b/doc/release-notes/5.13-release-notes.md @@ -0,0 +1,262 @@ +# Dataverse Software 5.13 + +This release brings new features, enhancements, and bug fixes to the Dataverse software. Thank you to all of the community members who contributed code, suggestions, bug reports, and other assistance across the project. + +## Release Highlights + +### Schema.org Improvements (Some Backward Incompatibility) + +The Schema.org metadata used as an export format and also embedded in dataset pages has been updated to improve compliance with Schema.org's schema and Google's recommendations for Google Dataset Search. + +Please be advised that these improvements have the chance to break integrations that rely on the old, less compliant structure. For details see the "backward incompatibility" section below. (Issue #7349) + +### Folder Uploads via Web UI (dvwebloader, S3 only) + +For installations using S3 for storage and with direct upload enabled, a new tool called [DVWebloader](https://github.com/gdcc/dvwebloader) can be enabled that allows web users to upload a folder with a hierarchy of files and subfolders while retaining the relative paths of files (similarly to how the DVUploader tool does it on the command line, but with the convenience of using the browser UI). See [Folder Upload](https://guides.dataverse.org/en/5.13/user/dataset-management.html#folder-upload) in the User Guide for details. (PR #9096) + +### Long Descriptions of Collections (Dataverses) are Now Truncated + +Like datasets, long descriptions of collections (dataverses) are now truncated by default but can be expanded with a "read full description" button. (PR #9222) + +### License Sorting + +Licenses as shown in the dropdown in UI can be now sorted by the superusers. See [Sorting Licenses](https://guides.dataverse.org/en/5.13/installation/config.html#sorting-licenses) section of the Installation Guide for details. (PR #8697) + +### Metadata Field Production Location Now Repeatable, Facetable, and Enabled for Advanced Search + +Depositors can now click the plus sign to enter multiple instances of the metadata field "Production Location" in the citation metadata block. Additionally this field now appears on the Advanced Search page and can be added to the list of search facets. (PR #9254) + +### Support for NetCDF and HDF5 Files + + NetCDF and HDF5 files are now detected based on their content rather than just their file extension. Both "classic" NetCDF 3 files and more modern NetCDF 4 files are detected based on content. Detection for older HDF4 files is only done through the file extension ".hdf", as before. + +For NetCDF and HDF5 files, an attempt will be made to extract metadata in NcML (XML) format and save it as an auxiliary file. There is a new NcML previewer available in the [dataverse-previewers](https://github.com/gdcc/dataverse-previewers) repo. + +An [extractNcml](https://guides.dataverse.org/en/5.13/api/native-api.html#extract-ncml) API endpoint has been added, especially for installations with existing NetCDF and HDF5 files. After upgrading, they can iterate through these files and try to extract an NcML file. + +See the [NetCDF and HDF5](https://guides.dataverse.org/en/5.13/user/dataset-management.html#netcdf-and-hdf5) section of the User Guide for details. (PR #9239) + +### Support for .eln Files (Electronic Laboratory Notebooks) + +The [.eln file format](https://github.com/TheELNConsortium/TheELNFileFormat) is used by Electronic Laboratory Notebooks as an exchange format for experimental protocols, results, sample descriptions, etc... + +### Improved Security for External Tools + +External tools can now be configured to use signed URLs to access the Dataverse API as an alternative to API tokens. This eliminates the need for tools to have access to the user's API token in order to access draft or restricted datasets and datafiles. Signed URLs can be transferred via POST or via a callback when triggering a tool via GET. See [Authorization Options](https://guides.dataverse.org/en/5.13/api/external-tools.html#authorization-options) in the External Tools documentation for details. (PR #9001) + +### Geospatial Search (API Only) + +Geospatial search is supported via the Search API using two new [parameters](https://guides.dataverse.org/en/5.13/api/search.html#parameters): `geo_point` and `geo_radius`. + +The fields that are geospatially indexed are "West Longitude", "East Longitude", "North Latitude", and "South Latitude" from the "Geographic Bounding Box" field in the geospatial metadata block. (PR #8239) + +### Reproducibility and Code Execution with Binder + +Binder has been added to the list of external tools that can be added to a Dataverse installation. From the dataset page, you can launch Binder, which spins up a computational environment in which you can explore the code and data in the dataset, or write new code, such as a Jupyter notebook. (PR #9341) + +### CodeMeta (Software) Metadata Support (Experimental) + +Experimental support for research software metadata deposits has been added. + +By adding a metadata block for [CodeMeta](https://codemeta.github.io), we take another step toward adding first class support of diverse FAIR objects, such as research software and computational workflows. + +There is more work underway to make Dataverse installations around the world "research software ready." + +**Note:** Like the metadata block for computational workflows before, CodeMeta is listed under [Experimental Metadata](https://guides.dataverse.org/en/5.13/user/appendix.html#experimental-metadata) in the guides. Experimental means it's brand new, opt-in, and might need future tweaking based on experience of usage in the field. We hope for feedback from installations on the new metadata block to optimize and lift it from the experimental stage. (PR #7877) + +### Mechanism Added for Stopping a Harvest in Progress + +It is now possible for a sysadmin to stop a long-running harvesting job. See [Harvesting Clients](https://guides.dataverse.org/en/5.13/admin/harvestclients.html#how-to-stop-a-harvesting-run-in-progress) in the Admin Guide for more information. (PR #9187) + +### API Endpoint Listing Metadata Block Details has been Extended + +The API endpoint `/api/metadatablocks/{block_id}` has been extended to include the following fields: + +- `controlledVocabularyValues` - All possible values for fields with a controlled vocabulary. For example, the values "Agricultural Sciences", "Arts and Humanities", etc. for the "Subject" field. +- `isControlledVocabulary`: Whether or not this field has a controlled vocabulary. +- `multiple`: Whether or not the field supports multiple values. + +See [Metadata Blocks](https://guides.dataverse.org/en/5.13/api/native-api.html#metadata-blocks-api) in the API Guide for details. (PR #9213) + +### Advanced Database Settings + +You can now enable advanced database connection pool configurations useful for debugging and monitoring as well as other settings. Of particular interest may be `sslmode=require`, though installations already setting this parameter in the Postgres connection string will need to move it to `dataverse.db.parameters`. See the new [Database Persistence](https://guides.dataverse.org/en/5.13/installation/config.html#database-persistence) section of the Installation Guide for details. (PR #8915) + +### Support for Cleaning up Leftover Files in Dataset Storage + +Experimental feature: the leftover files stored in the Dataset storage location that are not in the file list of that Dataset, but are named following the Dataverse technical convention for dataset files, can be removed with the new [Cleanup Storage of a Dataset](https://guides.dataverse.org/en/5.13/api/native-api.html#cleanup-storage-of-a-dataset) API endpoint. + +### OAI Server Bug Fixed + +A bug introduced in 5.12 was preventing the Dataverse OAI server from serving incremental harvesting requests from clients. It was fixed in this release (PR #9316). + +## Major Use Cases and Infrastructure Enhancements + +Changes and fixes in this release not already mentioned above include: + +- Administrators can configure an alternative storage location where files uploaded via the UI are temporarily stored during the transfer from client to server. (PR #8983, See also [Configuration Guide](http://guides.dataverse.org/en/5.13/installation/config.html#temporary-upload-file-storage)) +- To improve performance, Dataverse estimates download counts. This release includes an update that makes the estimate more accurate. (PR #8972) +- Direct upload and out-of-band uploads can now be used to replace multiple files with one API call (complementing the prior ability to add multiple new files). (PR #9018) +- A persistent identifier, [CSRT](https://www.cstr.cn/search/specification/), is added to the Related Publication field's ID Type child field. For datasets published with CSRT IDs, Dataverse will also include them in the datasets' Schema.org metadata exports. (Issue #8838) +- Datasets that are part of linked dataverse collections will now be displayed in their linking dataverse collections. + +## New JVM Options and MicroProfile Config Options + +The following JVM option is now available: + +- `dataverse.personOrOrg.assumeCommaInPersonName` - the default is false + +The following MicroProfile Config options are now available (these can be treated as JVM options): + +- `dataverse.files.uploads` - alternative storage location of generated temporary files for UI file uploads +- `dataverse.api.signing-secret` - used by signed URLs +- `dataverse.solr.host` +- `dataverse.solr.port` +- `dataverse.solr.protocol` +- `dataverse.solr.core` +- `dataverse.solr.path` +- `dataverse.rserve.host` + +The following existing JVM options are now available via MicroProfile Config: + +- `dataverse.siteUrl` +- `dataverse.fqdn` +- `dataverse.files.directory` +- `dataverse.rserve.host` +- `dataverse.rserve.port` +- `dataverse.rserve.user` +- `dataverse.rserve.password` +- `dataverse.rserve.tempdir` + +## Notes for Developers and Integrators + +See the "Backward Incompatibilities" section below. + +## Backward Incompatibilities + +### Schema.org + +The following changes have been made to Schema.org exports (necessary for the improvements mentioned above): + +- Descriptions are now joined and truncated to less than 5K characters. +- The "citation"/"text" key has been replaced by a "citation"/"name" key. +- File entries now have the mimetype reported as 'encodingFormat' rather than 'fileFormat' to better conform with the Schema.org specification for DataDownload entries. Download URLs are now sent for all files unless the dataverse.files.hide-schema-dot-org-download-urls setting is set to true. +- Author/creators now have an @type of Person or Organization and any affiliation (affiliation for Person, parentOrganization for Organization) is now an object of @type Organization + +### License Files + +License files are now required to contain the new "sortOrder" column. When attempting to create a new license without this field, an error would be returned. See [Configuring Licenses](https://guides.dataverse.org/en/5.13/installation/config.html#configuring-licenses) section of the Installation Guide for reference. + +## Complete List of Changes + +For the complete list of code changes in this release, see the [5.13 milestone](https://github.com/IQSS/dataverse/milestone/107?closed=1) on GitHub. + +## Installation + +If this is a new installation, please see our [Installation Guide](https://guides.dataverse.org/en/5.13/installation/). Please don't be shy about [asking for help](https://guides.dataverse.org/en/5.13/installation/intro.html#getting-help) if you need it! + +After your installation has gone into production, you are welcome to add it to our [map of installations](https://dataverse.org/installations) by opening an issue in the [dataverse-installations](https://github.com/IQSS/dataverse-installations) repo. + +## Upgrade Instructions + +0\. These instructions assume that you've already successfully upgraded from version 4.x to 5.0 of the Dataverse software following the instructions in the [release notes for version 5.0](https://github.com/IQSS/dataverse/releases/tag/v5.0). After upgrading from the 4.x series to 5.0, you should progress through the other 5.x releases before attempting the upgrade to 5.13. + +If you are running Payara as a non-root user (and you should be!), **remember not to execute the commands below as root**. Use `sudo` to change to that user first. For example, `sudo -i -u dataverse` if `dataverse` is your dedicated application user. + +In the following commands we assume that Payara 5 is installed in `/usr/local/payara5`. If not, adjust as needed. + +`export PAYARA=/usr/local/payara5` + +(or `setenv PAYARA /usr/local/payara5` if you are using a `csh`-like shell) + +1\. Undeploy the previous version. + +- `$PAYARA/bin/asadmin list-applications` +- `$PAYARA/bin/asadmin undeploy dataverse<-version>` + +2\. Stop Payara and remove the generated directory + +- `service payara stop` +- `rm -rf $PAYARA/glassfish/domains/domain1/generated` + +3\. Start Payara + +- `service payara start` + +4\. Deploy this version. + +- `$PAYARA/bin/asadmin deploy dataverse-5.13.war` + +5\. Restart Payara + +- `service payara stop` +- `service payara start` + +6\. Reload citation metadata block + +- `wget https://github.com/IQSS/dataverse/releases/download/v5.13/citation.tsv` +- `curl http://localhost:8080/api/admin/datasetfield/load -X POST --data-binary @citation.tsv -H "Content-type: text/tab-separated-values"` + +If you are running an English-only installation, you are finished with the citation block. Otherwise, download the updated citation.properties file and place in the [`dataverse.lang.directory`](https://guides.dataverse.org/en/5.13/installation/config.html#configuring-the-lang-directory). + +- `wget https://github.com/IQSS/dataverse/releases/download/v5.13/citation.properties` +- `cp citation.properties /home/dataverse/langBundles` + +7\. Replace Solr schema.xml to allow multiple production locations and support for geospatial indexing to be used. See specific instructions below for those installations without custom metadata blocks (1a) and those with custom metadata blocks (1b). + +Note: with this release support for indexing of the experimental workflow metadata block has been removed from the standard schema.xml. +If you are using the workflow metadata block be sure to follow the instructions in step 7b) below to maintain support for indexing workflow metadata. + +7a\. For installations without custom or experimental metadata blocks: + +- Stop Solr instance (usually service solr stop, depending on Solr installation/OS, see the [Installation Guide](https://guides.dataverse.org/en/5.13/installation/prerequisites.html#solr-init-script) + +- Replace schema.xml + + - `cp /tmp/dvinstall/schema.xml /usr/local/solr/solr-8.11.1/server/solr/collection1/conf` + +- Start solr instance (usually service solr start, depending on Solr/OS) + +7b\. For installations with custom or experimental metadata blocks: + +- Stop solr instance (usually service solr stop, depending on solr installation/OS, see the [Installation Guide](https://guides.dataverse.org/en/5.13/installation/prerequisites.html#solr-init-script) + +- Edit the following line to your schema.xml (to indicate that productionPlace is now multiValued='true"): + + `` + +- Add the following lines to your schema.xml to add support for geospatial indexing: + + `` + `` + `` + `` + `` + `` + `` + +- Restart Solr instance (usually service solr start, depending on solr/OS) + +### Optional Upgrade Step: Reindex Linked Dataverse Collections + +Datasets that are part of linked dataverse collections will now be displayed in +their linking dataverse collections. In order to fix the display of collections +that have already been linked you must re-index the linked collections. This +query will provide a list of commands to re-index the effected collections: + +``` +select 'curl http://localhost:8080/api/admin/index/dataverses/' +|| tmp.dvid from (select distinct dataverse_id as dvid +from dataverselinkingdataverse) as tmp +``` + +The result of the query will be a list of re-index commands such as: + +`curl http://localhost:8080/api/admin/index/dataverses/633` + +where '633' is the id of the linked collection. + +### Optional Upgrade Step: Run File Detection on .eln Files + +Now that .eln files are recognized, you can run the [Redetect File Type](https://guides.dataverse.org/en/5.13/api/native-api.html#redetect-file-type) API on them to switch them from "unknown" to "ELN Archive". Afterward, you can reindex these files to make them appear in search facets. diff --git a/doc/release-notes/5.14-release-notes.md b/doc/release-notes/5.14-release-notes.md new file mode 100644 index 00000000000..ef2a3b59659 --- /dev/null +++ b/doc/release-notes/5.14-release-notes.md @@ -0,0 +1,404 @@ +# Dataverse Software 5.14 + +(If this note appears truncated on the GitHub Releases page, you can view it in full in the source tree: https://github.com/IQSS/dataverse/blob/master/doc/release-notes/5.14-release-notes.md) + +This release brings new features, enhancements, and bug fixes to the Dataverse software. Thank you to all of the community members who contributed code, suggestions, bug reports, and other assistance across the project. + +Please note that, as an experiment, the sections of this release note are organized in a different order. The Upgrade and Installation sections are at the top, with the detailed sections highlighting new features and fixes further down. + +## Installation + +If this is a new installation, please see our [Installation Guide](https://guides.dataverse.org/en/5.14/installation/). Please don't be shy about [asking for help](https://guides.dataverse.org/en/5.14/installation/intro.html#getting-help) if you need it! + +After your installation has gone into production, you are welcome to add it to our [map of installations](https://dataverse.org/installations) by opening an issue in the [dataverse-installations](https://github.com/IQSS/dataverse-installations) repo. + +## Upgrade Instructions + +0\. These instructions assume that you are upgrading from 5.13. If you are running an earlier version, the only safe way to upgrade is to progress through the upgrades to all the releases in between before attempting the upgrade to 5.14. + +If you are running Payara as a non-root user (and you should be!), **remember not to execute the commands below as root**. Use `sudo` to change to that user first. For example, `sudo -i -u dataverse` if `dataverse` is your dedicated application user. + +In the following commands we assume that Payara 5 is installed in `/usr/local/payara5`. If not, adjust as needed. + +`export PAYARA=/usr/local/payara5` + +(or `setenv PAYARA /usr/local/payara5` if you are using a `csh`-like shell) + +1\. Undeploy the previous version. + +- `$PAYARA/bin/asadmin undeploy dataverse-5.13` + +2\. Stop Payara and remove the generated directory + +- `service payara stop` +- `rm -rf $PAYARA/glassfish/domains/domain1/generated` + +3\. Start Payara + +- `service payara start` + +4\. Deploy this version. + +- `$PAYARA/bin/asadmin deploy dataverse-5.14.war` + +5\. Restart Payara + +- `service payara stop` +- `service payara start` + +6\. Update the Citation metadata block: (the update makes the field Series repeatable) + +- `wget https://github.com/IQSS/dataverse/releases/download/v5.14/citation.tsv` +- `curl http://localhost:8080/api/admin/datasetfield/load -X POST --data-binary @citation.tsv -H "Content-type: text/tab-separated-values"` + +If you are running an English-only installation, you are finished with the citation block. Otherwise, download the updated citation.properties file and place it in the [`dataverse.lang.directory`](https://guides.dataverse.org/en/5.14/installation/config.html#configuring-the-lang-directory); `/home/dataverse/langBundles` used in the example below. + +- `wget https://github.com/IQSS/dataverse/releases/download/v5.14/citation.properties` +- `cp citation.properties /home/dataverse/langBundles` + +7\. Upate Solr schema.xml to allow multiple series to be used. See specific instructions below for those installations without custom metadata blocks (7a) and those with custom metadata blocks (7b). + +7a\. For installations without custom or experimental metadata blocks: + +- Stop Solr instance (usually `service solr stop`, depending on Solr installation/OS, see the [Installation Guide](https://guides.dataverse.org/en/5.14/installation/prerequisites.html#solr-init-script)) + +- Replace schema.xml + + - `cp /tmp/dvinstall/schema.xml /usr/local/solr/solr-8.11.1/server/solr/collection1/conf` + +- Start Solr instance (usually `service solr start`, depending on Solr/OS) + +7b\. For installations with custom or experimental metadata blocks: + +- Stop Solr instance (usually `service solr stop`, depending on Solr installation/OS, see the [Installation Guide](https://guides.dataverse.org/en/5.14/installation/prerequisites.html#solr-init-script)) + +- There are 2 ways to regenerate the schema: Either by collecting the output of the Dataverse schema API and feeding it to the `update-fields.sh` script that we supply, as in the example below (modify the command lines as needed): +``` + wget https://raw.githubusercontent.com/IQSS/dataverse/master/conf/solr/8.11.1/update-fields.sh + chmod +x update-fields.sh + curl "http://localhost:8080/api/admin/index/solr/schema" | ./update-fields.sh /usr/local/solr/solr-8.8.1/server/solr/collection1/conf/schema.xml +``` +OR, alternatively, you can edit the following lines in your schema.xml by hand as follows (to indicate that series and its components are now `multiValued="true"`): +``` + + + +``` + +- Restart Solr instance (usually `service solr restart` depending on solr/OS) + +8\. Run ReExportAll to update dataset metadata exports. Follow the directions in the [Admin Guide](http://guides.dataverse.org/en/5.14/admin/metadataexport.html#batch-exports-through-the-api). + +9\. If your installation did not have :FilePIDsEnabled set, you will need to set it to true to keep file PIDs enabled: + + curl -X PUT -d 'true' http://localhost:8080/api/admin/settings/:FilePIDsEnabled + +10\. If your installation uses Handles as persistent identifiers (instead of DOIs): remember to upgrade your Handles service installation to a currently supported version. + +Generally, Handles is known to be working reliably even when running older versions that haven't been officially supported in years. We still recommend to check on your service and make sure to upgrade to a supported version (the latest version is 9.3.1, https://www.handle.net/hnr-source/handle-9.3.1-distribution.tar.gz, as of writing this). An older version may be running for you seemingly just fine, but do keep in mind that it may just stop working unexpectedly at any moment, because of some incompatibility introduced in a Java rpm upgrade, or anything similarly unpredictable. + +Handles is also very good about backward incompatibility. Meaning, in most cases you can simply stop the old version, unpack the new version from the distribution and start it on the existing config and database files, and it'll just keep working. However, it is a good idea to keep up with the recommended format upgrades, for the sake of efficiency and to avoid any unexpected surprises, should they finally decide to drop the old database format, for example. The two specific things we recommend: 1) Make sure your service is using a json version of the `siteinfo` bundle (i.e., if you are still using `siteinfo.bin`, convert it to `siteinfo.json` and remove the binary file from the service directory) and 2) Make sure you are using the newer bdbje database format for your handles catalog (i.e., if you still have the files `handles.jdb` and `nas.jdb` in your server directory, convert them to the new format). Follow the simple conversion instructions in the file README.txt in the Handles software distribution. Make sure to stop the service before converting the files and make sure to have a full backup of the existing server directory, just in case. Do not hesitate to contact the Handles support with any questions you may have, as they are very responsive and helpful. + +## New JVM Options and MicroProfile Config Options + +The following PID provider options are now available. See the section "Changes to PID Provider JVM Settings" below for more information. + +- `dataverse.pid.datacite.mds-api-url` +- `dataverse.pid.datacite.rest-api-url` +- `dataverse.pid.datacite.username` +- `dataverse.pid.datacite.password` +- `dataverse.pid.handlenet.key.path` +- `dataverse.pid.handlenet.key.passphrase` +- `dataverse.pid.handlenet.index` +- `dataverse.pid.permalink.base-url` +- `dataverse.pid.ezid.api-url` +- `dataverse.pid.ezid.username` +- `dataverse.pid.ezid.password` + +The following MicroProfile Config options have been added as part of [Signposting](https://signposting.org/) support. See the section "Signposting for Dataverse" below for details. + +- `dataverse.signposting.level1-author-limit` +- `dataverse.signposting.level1-item-limit` + +The following JVM options are described in the "Creating datasets with incomplete metadata through API" section below. + +- `dataverse.api.allow-incomplete-metadata` +- `dataverse.ui.show-validity-filter` +- `dataverse.ui.allow-review-for-incomplete` + +The following JVM/MicroProfile setting is for External Exporters. See "Mechanism Added for Adding External Exporters" below. + +- `dataverse.spi.export.directory` + +The following JVM/MicroProfile settings are for handling of support emails. See "Contact Email Improvements" below. + +- `dataverse.mail.support-email` +- `dataverse.mail.cc-support-on-contact-emails` + +The following JVM/MicroProfile setting is for extracting a geospatial bounding box even if S3 direct upload is enabled. + +- `dataverse.netcdf.geo-extract-s3-direct-upload` + +## Backward Incompatibilities + +The following list of potential backward incompatibilities references the sections of the "Detailed Release Highlights..." portion of the document further below where the corresponding changes are explained in detail. + +### Using the new External Exporters framework + +Care should be taken when replacing Dataverse's internal metadata export formats as third party code, including other third party Exporters, may depend on the contents of those export formats. When replacing an existing format, one must also remember to delete the cached metadata export files or run the reExport command for the metadata exports of existing datasets to be updated. + +See "Mechanism Added for Adding External Exporters". + +### Publishing via API + +When publishing a dataset via API, it now mirrors the UI behavior by requiring that the dataset has either a standard license configured, or has valid Custom Terms of Use (if allowed by the instance). Attempting to publish a dataset without such **will fail with an error message**. + +See "Handling of license information fixed in the API" for guidance on how to ensure that datasets created or updated via native API have a license configured. + + + +## Detailed Release Highlights, New Features and Use Case Scenarios + +### For Dataverse developers, support for running Dataverse in Docker (experimental) + +Developers can experiment with running Dataverse in Docker: (PR #9439) + +This is an image developers build locally (or can pull from Docker Hub). It is not meant for production use! + +To provide a complete container-based local development environment, developers can deploy a Dataverse container from +the new image in addition to other containers for necessary dependencies: +https://guides.dataverse.org/en/5.14/container/dev-usage.html + +Please note that with this emerging solution we will sunset older tooling like `docker-aio` and `docker-dcm`. +We envision more testing possibilities in the future, to be discussed as part of the +[Dataverse Containerization Working Group](https://ct.gdcc.io). There is no sunsetting roadmap yet, but you have been warned. +If there is some specific feature of these tools you would like to be kept, please [reach out](https://ct.gdcc.io). + +### Indexing performance improved + +Noticeable improvements in performance, especially for large datasets containing thousands of files. +Uploading files one by one to the dataset is much faster now, allowing uploading thousands of files in an acceptable timeframe. Not only uploading a file, but all edit operations on datasets containing many files, got faster. +Performance tweaks include indexing of the datasets in the background and optimizations in the amount of the indexing operations needed. Furthermore, updates to the dateset no longer wait for ingesting to finish. Ingesting was already running in the background, but it took a lock, preventing updating the dataset and degrading performance for datasets containing many files. (PR #9558) + +### For installations using MDC (Make Data Count), it is now possible to display both the MDC metrics and the legacy access counts, generated before MDC was enabled. + +This is enabled via the new setting `:MDCStartDate` that specifies the cutoff date. If a dataset has any legacy access counts collected prior to that date, those numbers will be displayed in addition to any MDC numbers recorded since then. (PR #6543) + +### Changes to PID Provider JVM Settings + +In preparation for a future feature to use multiple PID providers at the same time, all JVM settings for PID providers +have been enabled to be configured using MicroProfile Config. In the same go, they were renamed to match the name +of the provider to be configured. + +Please watch your log files for deprecation warnings. Your old settings will be picked up, but you should migrate +to the new names to avoid unnecessary log clutter and get prepared for more future changes. An example message +looks like this: + +``` +[#|2023-03-31T16:55:27.992+0000|WARNING|Payara 5.2022.5|edu.harvard.iq.dataverse.settings.source.AliasConfigSource|_ThreadID=30;_ThreadName=RunLevelControllerThread-1680281704925;_TimeMillis=1680281727992;_LevelValue=900;| + Detected deprecated config option doi.username in use. Please update your config to use dataverse.pid.datacite.username.|#] +``` + +Here is a list of the new settings: + +- dataverse.pid.datacite.mds-api-url +- dataverse.pid.datacite.rest-api-url +- dataverse.pid.datacite.username +- dataverse.pid.datacite.password +- dataverse.pid.handlenet.key.path +- dataverse.pid.handlenet.key.passphrase +- dataverse.pid.handlenet.index +- dataverse.pid.permalink.base-url +- dataverse.pid.ezid.api-url +- dataverse.pid.ezid.username +- dataverse.pid.ezid.password + +See also https://guides.dataverse.org/en/5.14/installation/config.html#persistent-identifiers-and-publishing-datasets (multiple PRs: #8823 #8828) + +### Signposting for Dataverse + +This release adds [Signposting](https://signposting.org) support to Dataverse to improve machine discoverability of datasets and files. (PR #8424) + +The following MicroProfile Config options are now available (these can be treated as JVM options): + +- dataverse.signposting.level1-author-limit +- dataverse.signposting.level1-item-limit + +Signposting is described in more detail in a new page in the Admin Guide on discoverability: https://guides.dataverse.org/en/5.14/admin/discoverability.html + +### Permalinks support + +Dataverse now optionally supports PermaLinks, a type of persistent identifier that does not involve a global registry service. PermaLinks are appropriate for Intranet deployment and catalog use cases. (PR #8674) + + +### Creating datasets with incomplete metadata through API + +It is now possible to create a dataset with some nominally mandatory metadata fields left unpopulated. For details on the use case that lead to this feature see issue #8822 and PR #8940. + +The create dataset API call (POST to /api/dataverses/#dataverseId/datasets) is extended with the "doNotValidate" parameter. However, in order to be able to create a dataset with incomplete metadata, the Solr configuration must be updated first with the new "schema.xml" file (do not forget to run the metadata fields update script when you use custom metadata). Reindexing is optional, but recommended. Also, even when this feature is not used, it is recommended to update the Solr configuration and reindex the metadata. Finally, this new feature can be activated with the "dataverse.api.allow-incomplete-metadata" JVM option. + +You can also enable a valid/incomplete metadata filter in the "My Data" page using the "dataverse.ui.show-validity-filter" JVM option. By default, this filter is not shown. When you wish to use this filter, you must reindex the datasets first, otherwise datasets with valid metadata will not be shown in the results. + +It is not possible to publish datasets with incomplete or incomplete metadata. By default, you also cannot send such datasets for review. If you wish to enable sending for review of datasets with incomplete metadata, turn on the "dataverse.ui.allow-review-for-incomplete" JVM option. + +In order to customize the wording and add translations to the UI sections extended by this feature, you can edit the "Bundle.properties" file and the localized versions of that file. The property keys used by this feature are: +- incomplete +- valid +- dataset.message.incomplete.warning +- mydataFragment.validity +- dataverses.api.create.dataset.error.mustIncludeAuthorName + +### Registering PIDs (DOIs or Handles) for files in select collections + +It is now possible to configure registering PIDs for files in individual collections. + +For example, registration of PIDs for files can be enabled in a specific collection when it is disabled instance-wide. Or it can be disabled in specific collections where it is enabled by default. See the [:FilePIDsEnabled](https://guides.dataverse.org/en/5.14/installation/config.html#filepidsenabled) section of the Configuration guide for details. (PR #9614) + +### Mechanism Added for Adding External Exporters + +It is now possible for third parties to develop and share code to provide new metadata export formats for Dataverse. Export formats can be made available via the Dataverse UI and API or configured for use in Harvesting. Dataverse now provides developers with a separate dataverse-spi JAR file that contains the Java interfaces and classes required to create a new metadata Exporter. Once a new Exporter has been created and packaged as a JAR file, administrators can use it by specifying a local directory for third party Exporters, dropping then Exporter JAR there, and restarting Payara. This mechanism also allows new Exporters to replace any of Dataverse's existing metadata export formats. (PR #9175). See also https://guides.dataverse.org/en/5.14/developers/metadataexport.html + +#### Backward Incompatibilities + +Care should be taken when replacing Dataverse's internal metadata export formats as third party code, including other third party Exporters may depend on the contents of those export formats. When replacing an existing format, one must also remember to delete the cached metadata export files or run the reExport command for the metadata exports of existing datasets to be updated. + +#### New JVM/MicroProfile Settings + +dataverse.spi.export.directory - specifies a directory, readable by the Dataverse server. Any Exporter JAR files placed in this directory will be read by Dataverse and used to add/replace the specified metadata format. + +### Contact Email Improvements + +Email sent from the contact forms to the contact(s) for a collection, dataset, or datafile can now optionally be cc'd to a support email address. The support email address can be changed from the default :SystemEmail address to a separate :SupportEmail address. When multiple contacts are listed, the system will now send one email to all contacts (with the optional cc if configured) instead of separate emails to each contact. Contact names with a comma that refer to Organizations will no longer have the name parts reversed in the email greeting. A new protected/admin feedback API has been added. (PR #9186) See https://guides.dataverse.org/en/5.14/api/native-api.html#send-feedback-to-contact-s + +#### New JVM/MicroProfile Settings + +dataverse.mail.support-email - allows a separate email, distinct from the :SystemEmail to be used as the to address in emails from the contact form/ feedback api. +dataverse.mail.cc-support-on-contact-emails - include the support email address as a CC: entry when contact/feedback emails are sent to the contacts for a collection, dataset, or datafile. + +### Support for Grouping Dataset Files by Folder and Category Tag + +Dataverse now supports grouping dataset files by folder and/or optionally by Tag/Category. The default for whether to order by folder can be changed via :OrderByFolder. Ordering by category must be enabled by an administrator via the :CategoryOrder parameter which is used to specify which tags appear first (e.g. to put Documentation files before Data or Code files, etc.) These Group-By options work with the existing sort options, i.e. sorting alphabetically means that files within each folder or tag group will be sorted alphabetically. :AllowUsersToManageOrdering can be set to true to allow users to turn folder ordering and category ordering (if enabled) on or off in the current dataset view. (PR #9204) + +#### New Settings + +:CategoryOrder - a comma separated list of Category/Tag names defining the order in which files with those tags should be displayed. The setting can include custom tag names along with the pre-defined defaults ( Documentation, Data, and Code, which can be overridden by the ::FileCategories setting.) +:OrderByFolder - defaults to true - whether to group files in the same folder together +:AllowUserManagementOfOrder - default false - allow users to toggle ordering on/off in the dataset display + +### Metadata field Series now repeatable + +This enhancement allows depositors to define multiple instances of the metadata field Series in the Citation Metadata block. + +Data contained in a dataset may belong to multiple series. Making the field repeatable makes it possible to reflect this fact in the dataset metadata. (PR #9256) + +### Guides in PDF Format + +An experimental version of the guides in PDF format is available at (PR #9474) + +Advice for anyone who wants to help improve the PDF is available at https://guides.dataverse.org/en/5.14/developers/documentation.html#pdf-version-of-the-guides + +### Datasets API extended + +The following APIs have been added: (PR #9592) + +- `/api/datasets/summaryFieldNames` +- `/api/datasets/privateUrlDatasetVersion/{privateUrlToken}` +- `/api/datasets/privateUrlDatasetVersion/{privateUrlToken}/citation` +- `/api/datasets/{datasetId}/versions/{version}/citation` + +### Extra fields included in the JSON metadata + +The following fields are now available in the native JSON output: + +- `alternativePersistentId` +- `publicationDate` +- `citationDate` + +(PR #9657) + + +### Files downloaded from Binder are now in their original format. + +For example, data.dta (a Stata file) will be downloaded instead of data.tab (the archival version Dataverse creates as part of a successful ingest). (PR #9483) + +This should make it easier to write code to reproduce results as the dataset authors and subsequent researchers are likely operating on the original file format rather that the format that Dataverse creates. + +For details, see #9374, , and . + +### Handling of license information fixed in the API + +(PR #9568) + +When publishing a dataset via API, it now requires the dataset to either have a standard license configured, or have valid Custom Terms of Use (if allowed by the instance). Attempting to publish a dataset without such **will fail with an error message**. This introduces a backward incompatibility, and if you have scripts that automatically create, update and publish datasets, this last step may start failing. Because, unfortunately, there were some problems with the datasets APIs that made it difficult to manage licenses, so an API user was likely to end up with a dataset missing either of the above. In this release we have addressed it by making the following fixes: + +We fixed the incompatibility between the format in which license information was *exported* in json, and the format the create and update APIs were expecting it for *import* (https://github.com/IQSS/dataverse/issues/9155). This means that the following json format can now be imported: +``` +"license": { + "name": "CC0 1.0", + "uri": "http://creativecommons.org/publicdomain/zero/1.0" +} +``` +However, for the sake of backward compatibility the old format +``` +"license" : "CC0 1.0" +``` +will be accepted as well. + +We have added the default license (CC0) to the model json file that we provide and recommend to use as the model in the Native API Guide (https://github.com/IQSS/dataverse/issues/9364). + +And we have corrected the misleading language in the same guide where we used to recommend to users that they select, edit and re-import only the `.metadataBlocks` fragment of the json metadata representing the latest version. There are in fact other useful pieces of information that need to be preserved in the update (such as the `"license"` section above). So the recommended way of creating base json for updates via the API is to select *everything but* the `"files"` section, with (for example) the following `jq` command: + +``` +jq '.data | del(.files)' +``` + +Please see the [Update Metadata For a Dataset](https://guides.dataverse.org/en/5.14/api/native-api.html#update-metadata-for-a-dataset) section of our Native Api guide for more information. + + +### New External Tool Type and Implementation + +With this release a new experimental external tool type has been added to the Dataverse Software. The tool type is "query" and its first implementation is an experimental tool named [Ask the Data](https://github.com/IQSS/askdataverse) which allows users to ask natural language queries of tabular files in Dataverse. More information is available in the External Tools section of the guides. (PR #9737) See https://guides.dataverse.org/en/5.14/admin/external-tools.html#file-level-query-tools + +### Default Value for File PIDs registration has changed + +The default for whether PIDs are registered for files or not is now false. + +Installations where file PIDs were enabled by default will have to add the :FilePIDsEnabled = true setting to maintain the existing functionality. + +See Step 9 of the upgrade instructions: + + If your installation did not have :FilePIDsEnabled set, you will need to set it to true to keep file PIDs enabled: + + curl -X PUT -d 'true' http://localhost:8080/api/admin/settings/:FilePIDsEnabled + + +It is now possible to allow File PIDs to be enabled/disabled per collection. See the [:AllowEnablingFilePIDsPerCollection](https://guides.dataverse.org/en/latest/installation/config.html#allowenablingfilepidspercollection) section of the Configuration guide for details. + +For example, registration of PIDs for files can now be enabled in a specific collection when it is disabled instance-wide. Or it can be disabled in specific collections where it is enabled by default. + + +### Changes and fixes in this release not already mentioned above include: + +- An endpoint for deleting a file has been added to the native API: https://guides.dataverse.org/en/5.14/api/native-api.html#deleting-files (PR #9383) +- A date column has been added to the restricted file access request overview, indicating when the earliest request by that user was made. An issue was fixed where where the request list was not updated when a request was approved or rejected. (PR #9257) +- Changes made in v5.13 and v5.14 in multiple PRs to improve the embedded Schema.org metadata in dataset pages will only be propagated to the Schema.Org JSON-LD metadata export if a reExportAll() is done. (PR #9102) +- It is now possible to write external vocabulary scripts that target a single child field in a metadata block. Example scripts are now available at https://github.com/gdcc/dataverse-external-vocab-support that can be configured to support lookup from the Research Orgnaization Registry (ROR) for the Author Affiliation Field and for the CrossRef Funding Registry (Fundreg) in the Funding Information/Agency field, both in the standard Citation metadata block. Application if these scripts to other fields, and the development of other scripts targetting child fields are now possible (PR #9402) +- Dataverse now supports requiring a secret key to add or edit metadata in specified "system" metadata blocks. Changing the metadata in such system metadata blocks is not allowed without the key and is currently only allowed via API. (PR #9388) +- An attempt will be made to extract a geospatial bounding box (west, south, east, north) from NetCDF and HDF5 files and then insert these values into the geospatial metadata block, if enabled. (#9541) See https://guides.dataverse.org/en/5.14/user/dataset-management.html#geospatial-bounding-box +- A file previewer called H5Web is now available for exploring and visualizing NetCDF and HDF5 files. (PR #9600) See https://guides.dataverse.org/en/5.14/user/dataset-management.html#h5web-previewer +- Two file previewers for GeoTIFF and Shapefiles are now available for visualizing geotiff image files and zipped Shapefiles on a map. See https://github.com/gdcc/dataverse-previewers +- New alternative to setup the Dataverse dependencies for the development environment through Docker Compose. (PR #9417) +- New alternative, explained in the documentation, to build the Sphinx guides through a Docker container. (PR #9417) +- A container has been added called "configbaker" that configures Dataverse while running in containers. This allows developers to spin up Dataverse with a single command. (PR #9574) +- Direct upload via the Dataverse UI will now support any algorithm configured via the `:FileFixityChecksumAlgorithm` setting. External apps using the direct upload API can now query Dataverse to discover which algorithm should be used. Sites that have been using an algorithm other than MD5 and direct upload and/or dvwebloader may want to use the `/api/admin/updateHashValues` call (see https://guides.dataverse.org/en/5.14/installation/config.html?highlight=updatehashvalues#filefixitychecksumalgorithm) to replace any MD5 hashes on existing files. (PR #9482) +- The OAI_ORE metadata export (and hence the archival Bag for a dataset) now includes information about file embargoes. (PR #9698) +- DatasetFieldType attribute "displayFormat", is now returned by the API. (PR #9668) +- An API named "MyData" has been available for years but is newly documented. It is used to get a list of the objects (datasets, collections or datafiles) that an authenticated user can modify. (PR #9596) +- A Go client library for Dataverse APIs is now available. See https://guides.dataverse.org/en/5.14/api/client-libraries.html +- A feature flag called "api-session-auth" has been added temporarily to aid in the development of the new frontend (#9063) but will be removed once bearer tokens (#9229) have been implemented. There is a security risk (CSRF) in enabling this flag! Do not use it in production! For more information, see https://guides.dataverse.org/en/5.14/installation/config.html#feature-flags +- A feature flag called "api-bearer-auth" has been added. This allows OIDC useraccounts to send authenticated API requests using Bearer Tokens. Note: This feature is limited to OIDC! For more information, see https://guides.dataverse.org/en/5.14/installation/config.html#feature-flags (PR #9591) + + +## Complete List of Changes + +For the complete list of code changes in this release, see the [5.14 milestone](https://github.com/IQSS/dataverse/milestone/108?closed=1) on GitHub. diff --git a/doc/release-notes/6.0-release-notes.md b/doc/release-notes/6.0-release-notes.md new file mode 100644 index 00000000000..df916216f5b --- /dev/null +++ b/doc/release-notes/6.0-release-notes.md @@ -0,0 +1,300 @@ +# Dataverse 6.0 + +This is a platform upgrade release. Payara, Solr, and Java have been upgraded. No features have been added to the Dataverse software itself. Only a handful of bugs were fixed. + +Thank you to all of the community members who contributed code, suggestions, bug reports, and other assistance across the project! + +## Release Highlights (Major Upgrades, Breaking Changes) + +This release contains major upgrades to core components. Detailed upgrade instructions can be found below. + +### Runtime + +- The required Java version has been increased from version 11 to 17. + - See PR #9764 for details. +- Payara application server has been upgraded to version 6.2023.8. + - This is a required update. + - Please note that Payara Community 5 has reached [end of life](https://www.payara.fish/products/payara-platform-product-lifecycle/) + - See PR #9685 and PR #9795 for details. +- Solr has been upgraded to version 9.3.0. + - See PR #9787 for details. +- PostgreSQL 13 remains the tested and supported version. + - That said, the installer and Flyway have been upgraded to support PostgreSQL 14 and 15. See the [PostgreSQL](https://guides.dataverse.org/en/6.0/installation/prerequisites.html#postgresql) section of the Installation Guide and PR #9877 for details. + +### Development + +- Removal of Vagrant and Docker All In One (docker-aio), deprecated in Dataverse v5.14. See PR #9838 and PR #9685 for details. +- All tests have been migrated to use JUnit 5 exclusively from now on. See PR #9796 for details. + +## Installation + +If this is a new installation, please follow our [Installation Guide](https://guides.dataverse.org/en/latest/installation/). Please don't be shy about [asking for help](https://guides.dataverse.org/en/latest/installation/intro.html#getting-help) if you need it! + +Once you are in production, we would be delighted to update our [map of Dataverse installations](https://dataverse.org/installations) around the world to include yours! Please [create an issue](https://github.com/IQSS/dataverse-installations/issues) or email us at support@dataverse.org to join the club! + +You are also very welcome to join the [Global Dataverse Community Consortium](https://dataversecommunity.global) (GDCC). + +## Upgrade Instructions + +Upgrading requires a maintenance window and downtime. Please plan ahead, create backups of your database, etc. + +These instructions assume that you've already upgraded through all the 5.x releases and are now running Dataverse 5.14. + +### Upgrade from Java 11 to Java 17 + +Java 17 is now required for Dataverse. Solr can run under Java 11 or Java 17 but the latter is recommended. In preparation for the Java upgrade, stop both Dataverse/Payara and Solr. + +1. Undeploy Dataverse, if deployed, using the unprivileged service account. + + `sudo -u dataverse /usr/local/payara5/bin/asadmin list-applications` + + `sudo -u dataverse /usr/local/payara5/bin/asadmin undeploy dataverse-5.14` + +1. Stop Payara 5. + + `sudo -u dataverse /usr/local/payara5/bin/asadmin stop-domain` + +1. Stop Solr 8. + + `sudo systemctl stop solr.service` + +1. Install Java 17. + + Assuming you are using RHEL or a derivative such as Rocky Linux: + + `sudo yum install java-17-openjdk` + +1. Set Java 17 as the default. + + Assuming you are using RHEL or a derivative such as Rocky Linux: + + `sudo alternatives --config java` + +1. Test that Java 17 is the default. + + `java -version` + +### Upgrade from Payara 5 to Payara 6 + +If you are running Payara as a non-root user (and you should be!), **remember not to execute the commands below as root**. Use `sudo` to change to that user first. For example, `sudo -i -u dataverse` if `dataverse` is your dedicated application user. + +1. Download Payara 6.2023.8. + + `curl -L -O https://nexus.payara.fish/repository/payara-community/fish/payara/distributions/payara/6.2023.8/payara-6.2023.8.zip` + +1. Unzip it to /usr/local (or your preferred location). + + `sudo unzip payara-6.2023.8.zip -d /usr/local/` + +1. Change ownership of the unzipped Payara to your "service" user ("dataverse" by default). + + `sudo chown -R dataverse /usr/local/payara6` + +1. Undeploy Dataverse, if deployed, using the unprivileged service account. + + `sudo -u dataverse /usr/local/payara5/bin/asadmin list-applications` + + `sudo -u dataverse /usr/local/payara5/bin/asadmin undeploy dataverse-5.14` + +1. Stop Payara 5, if running. + + `sudo -u dataverse /usr/local/payara5/bin/asadmin stop-domain` + +1. Copy Dataverse-related lines from Payara 5 to Payara 6 domain.xml. + + `sudo -u dataverse cp /usr/local/payara6/glassfish/domains/domain1/config/domain.xml /usr/local/payara6/glassfish/domains/domain1/config/domain.xml.orig` + + `sudo egrep 'dataverse|doi' /usr/local/payara5/glassfish/domains/domain1/config/domain.xml > lines.txt` + + `sudo vi /usr/local/payara6/glassfish/domains/domain1/config/domain.xml` + + The lines will appear in two sections, examples shown below (but your content will vary). + + Section 1: system properties (under ``) + + ``` + + + + + + ``` + + Note: if you used the Dataverse installer, you won't have a `dataverse.db.password` property. See "Create password aliases" below. + + Section 2: JVM options (under ``, the one under ``, not under ``) + + ``` + -Ddataverse.files.directory=/usr/local/dvn/data + -Ddataverse.files.file.type=file + -Ddataverse.files.file.label=file + -Ddataverse.files.file.directory=/usr/local/dvn/data + -Ddataverse.rserve.host=localhost + -Ddataverse.rserve.port=6311 + -Ddataverse.rserve.user=rserve + -Ddataverse.rserve.password=rserve + -Ddataverse.auth.password-reset-timeout-in-minutes=60 + -Ddataverse.timerServer=true + -Ddataverse.fqdn=dev1.dataverse.org + -Ddataverse.siteUrl=https://dev1.dataverse.org + -Ddataverse.files.storage-driver-id=file + -Ddoi.username=testaccount + -Ddoi.password=notmypassword + -Ddoi.baseurlstring=https://mds.test.datacite.org/ + -Ddoi.dataciterestapiurlstring=https://api.test.datacite.org + ``` + +1. Check the `Xmx` setting in `domain.xml`. + + Under `/usr/local/payara6/glassfish/domains/domain1/config/domain.xml`, check the `Xmx` setting under ``, where you put the JVM options, not the one under ``. Note that there are two such settings, and you want to adjust the one in the stanza with Dataverse options. This sets the JVM heap size; a good rule of thumb is half of your system's total RAM. You may specify the value in MB (`8192m`) or GB (`8g`). + +1. Copy `jhove.conf` and `jhoveConfig.xsd` from Payara 5, edit and change `payara5` to `payara6`. + + `sudo cp /usr/local/payara5/glassfish/domains/domain1/config/jhove* /usr/local/payara6/glassfish/domains/domain1/config/` + + `sudo chown dataverse /usr/local/payara6/glassfish/domains/domain1/config/jhove*` + + `sudo -u dataverse vi /usr/local/payara6/glassfish/domains/domain1/config/jhove.conf` + +1. Copy logos from Payara 5 to Payara 6. + + These logos are for collections (dataverses). + + `sudo -u dataverse cp -r /usr/local/payara5/glassfish/domains/domain1/docroot/logos /usr/local/payara6/glassfish/domains/domain1/docroot` + +1. If you are using Make Data Count (MDC), edit :MDCLogPath. + + Your `:MDCLogPath` database setting might be pointing to a Payara 5 directory such as `/usr/local/payara5/glassfish/domains/domain1/logs`. If so, edit this to be Payara 6. You'll probably want to copy your logs over as well. + +1. Update systemd unit file (or other init system) from `/usr/local/payara5` to `/usr/local/payara6`, if applicable. + +1. Start Payara. + + `sudo -u dataverse /usr/local/payara6/bin/asadmin start-domain` + +1. Create a Java mail resource, replacing "localhost" for mailhost with your mail relay server, and replacing "localhost" for fromaddress with the FQDN of your Dataverse server. + + `sudo -u dataverse /usr/local/payara6/bin/asadmin create-javamail-resource --mailhost "localhost" --mailuser "dataversenotify" --fromaddress "do-not-reply@localhost" mail/notifyMailSession` + +1. Create password aliases for your database, rserve and datacite jvm-options, if you're using them. + + `echo "AS_ADMIN_ALIASPASSWORD=yourDBpassword" > /tmp/dataverse.db.password.txt` + + `sudo -u dataverse /usr/local/payara6/bin/asadmin create-password-alias --passwordfile /tmp/dataverse.db.password.txt` + + When you are prompted "Enter the value for the aliasname operand", enter `dataverse.db.password` + + You should see "Command create-password-alias executed successfully." + + You'll want to perform similar commands for `rserve_password_alias` and `doi_password_alias` if you're using Rserve and/or DataCite. + +1. Enable workaround for FISH-7722. + + The following workaround is for https://github.com/payara/Payara/issues/6337 + + `sudo -u dataverse /usr/local/payara6/bin/asadmin create-jvm-options --add-opens=java.base/java.io=ALL-UNNAMED` + +1. Create the network listener on port 8009. + + `sudo -u dataverse /usr/local/payara6/bin/asadmin create-network-listener --protocol http-listener-1 --listenerport 8009 --jkenabled true jk-connector` + +1. Deploy the Dataverse 6.0 war file. + + `sudo -u dataverse /usr/local/payara6/bin/asadmin deploy /path/to/dataverse-6.0.war` + +1. Check that you get a version number from Dataverse. + + This is just a sanity check that Dataverse has been deployed properly. + + `curl http://localhost:8080/api/info/version` + +1. Perform one final Payara restart to ensure that timers are initialized properly. + + `sudo -u dataverse /usr/local/payara6/bin/asadmin stop-domain` + + `sudo -u dataverse /usr/local/payara6/bin/asadmin start-domain` + +### Upgrade from Solr 8 to 9 + +Solr has been upgraded to Solr 9. You must install Solr fresh and reindex. You cannot use your old `schema.xml` because the format has changed. + +The instructions below are copied from https://guides.dataverse.org/en/6.0/installation/prerequisites.html#installing-solr and tweaked a bit for an upgrade scenario. + +We assume that you already have a user called "solr" (from the instructions above), added during your initial installation of Solr. We also assume that you have already stopped Solr 8 as explained in the instructions above about upgrading Java. + +1. Become the "solr" user and then download and configure Solr. + + `su - solr` + + `cd /usr/local/solr` + + `wget https://archive.apache.org/dist/solr/solr/9.3.0/solr-9.3.0.tgz` + + `tar xvzf solr-9.3.0.tgz` + + `cd solr-9.3.0` + + `cp -r server/solr/configsets/_default server/solr/collection1` + +1. Unzip "dvinstall.zip" from this release. Unzip it into /tmp. Then copy the following files into place. + + `cp /tmp/dvinstall/schema*.xml /usr/local/solr/solr-9.3.0/server/solr/collection1/conf` + + `cp /tmp/dvinstall/solrconfig.xml /usr/local/solr/solr-9.3.0/server/solr/collection1/conf` + +1. A Dataverse installation requires a change to the jetty.xml file that ships with Solr. + + Edit `/usr/local/solr/solr-9.3.0/server/etc/jetty.xml`, increasing `requestHeaderSize` from `8192` to `102400` + +1. Tell Solr to create the core "collection1" on startup. + + `echo "name=collection1" > /usr/local/solr/solr-9.3.0/server/solr/collection1/core.properties` + +1. Update your init script. + + Your init script may be located at `/etc/systemd/system/solr.service`, for example. Update the path to Solr to be `/usr/local/solr/solr-9.3.0`. + +1. Start Solr using your init script and check collection1. + + The collection1 check below should print out fields Dataverse uses like "dsDescription". + + `systemctl start solr.service` + + `curl http://localhost:8983/solr/collection1/schema/fields` + +1. Reindex Solr. + + For details, see https://guides.dataverse.org/en/6.0/admin/solr-search-index.html but here is the reindex command: + + `curl http://localhost:8080/api/admin/index` + +1. If you have custom metadata blocks installed, you must update your Solr `schema.xml` to include your custom fields. + + For details, please see https://guides.dataverse.org/en/6.0/admin/metadatacustomization.html#updating-the-solr-schema + + At a high level you will be copying custom fields from the output of http://localhost:8080/api/admin/index/solr/schema or using a script to automate this. + +## Potential Archiver Incompatibilities with Payara 6 + +The [Google Cloud and DuraCloud archivers](https://guides.dataverse.org/en/5.14/installation/config.html#bagit-export) may not work in Dataverse 6.0. + +This is due to the archivers' dependence on libraries that include classes in `javax.* packages` that are no longer available. If these classes are actually used when the archivers run, the archivers would fail. As these two archivers require additional setup, they have not been tested in 6.0. Community members using these archivers or considering their use are encouraged to test them with 6.0 and report any errors and/or provide fixes for them that can be included in future releases. + +## Bug Fix for Dataset Templates with Custom Terms of Use + +A bug was fixed for the following scenario: + +- Create a template with custom terms. +- Set that template as the default. +- Try to create a dataset. +- A 500 error appears before the form to create dataset is even shown. + +For more details, see issue #9825 and PR #9892 + +## Complete List of Changes + +For the complete list of code changes in this release, see the [6.0 Milestone](https://github.com/IQSS/dataverse/milestone/109?closed=1) in GitHub. + +## Getting Help + +For help with upgrading, installing, or general questions please post to the [Dataverse Community Google Group](https://groups.google.com/forum/#!forum/dataverse-community) or email support@dataverse.org. diff --git a/doc/release-notes/8732-date-in-citation-harvested-datasets.md b/doc/release-notes/8732-date-in-citation-harvested-datasets.md deleted file mode 100644 index 85f2d24a8a9..00000000000 --- a/doc/release-notes/8732-date-in-citation-harvested-datasets.md +++ /dev/null @@ -1,7 +0,0 @@ -Fix the year displayed in citation for harvested dataset, specialy for oai_dc format. - -For normal datasets, the date used is the "citation date" which is by default the publication date (the first release date) (https://guides.dataverse.org/en/latest/api/native-api.html?highlight=citationdate#set-citation-date-field-type-for-a-dataset). - -But for a harvested dataset, the distribution date is used instead and this date is not always present in the harvested metadata. With oai_dc format the date tag if used as production date. - -Now, the production date is used for harvested dataset in addition to distribution date. \ No newline at end of file diff --git a/doc/release-notes/8733-oai_dc-date.md b/doc/release-notes/8733-oai_dc-date.md deleted file mode 100644 index a2a09f361d3..00000000000 --- a/doc/release-notes/8733-oai_dc-date.md +++ /dev/null @@ -1,4 +0,0 @@ -For exports and harvesting in `oai_dc` format, if "Production Date" is not set, "Publication Date" is now used instead. This change is reflected in the [Dataverse 4+ Metadata Crosswalk][] linked from the [Appendix][] of the User Guide. - -[Dataverse 4+ Metadata Crosswalk]: https://docs.google.com/spreadsheets/d/10Luzti7svVTVKTA-px27oq3RxCUM-QbiTkm8iMd5C54/edit#gid=1901625433&range=K7 -[Appendix]: https://guides.dataverse.org/en/latest/user/appendix.html diff --git a/doc/shib/shib.md b/doc/shib/shib.md index 2c178a93f35..9cff6d827e7 100644 --- a/doc/shib/shib.md +++ b/doc/shib/shib.md @@ -82,11 +82,7 @@ Run `service httpd restart`. ## Update/verify files under /etc/shibboleth -For /etc/shibboleth/shibboleth2.xml use the version from https://github.com/IQSS/dataverse/blob/master/conf/vagrant/etc/shibboleth/shibboleth2.xml but replace "pdurbin.pagekite.me" with the "shibtest.dataverse.org". - -Put https://github.com/IQSS/dataverse/blob/master/conf/vagrant/etc/shibboleth/dataverse-idp-metadata.xml at /etc/shibboleth/dataverse-idp-metadata.xml - -Put https://github.com/IQSS/dataverse/blob/master/conf/vagrant/etc/shibboleth/attribute-map.xml at +Get files from the Installation Guide. After making these changes, run `service shibd restart` and `service httpd restart`. diff --git a/doc/sphinx-guides/SphinxRSTCheatSheet.md b/doc/sphinx-guides/SphinxRSTCheatSheet.md index 1ccd293080c..300260cb5b1 100755 --- a/doc/sphinx-guides/SphinxRSTCheatSheet.md +++ b/doc/sphinx-guides/SphinxRSTCheatSheet.md @@ -10,7 +10,7 @@ RST Cheat Sheet for Sphinx v 1.2.2 | Bold text | **text** | | | Italics/emphasis | *text* | | | literal | ``literal`` | | -| Internal cross-reference link | See section 5.3.1 of Sphinx documentationand example below | See section 5.3.1 of Sphinx documentationand example below | +| Internal cross-reference link | See section 5.3.1 of Sphinx documentation and example below | See section 5.3.1 of Sphinx documentation and example below | | code block | .. code-block:: guess | Allows for code blocks to be displayed properly | For more cheats please visit the [RST cheat sheet google doc] (https://docs.google.com/document/d/105H3iwPwgnPqwuMJI7q-h6FLtXV_EUCiwq2P13lADgA/edit?usp=sharing) \ No newline at end of file diff --git a/doc/sphinx-guides/requirements.txt b/doc/sphinx-guides/requirements.txt index 4488c54cd5e..028f07d11cb 100755 --- a/doc/sphinx-guides/requirements.txt +++ b/doc/sphinx-guides/requirements.txt @@ -1,5 +1,10 @@ -# current version as of this writing -Sphinx==3.5.4 +# Developers, please use Python 3.9 or lower to build the guides. +# For your convenience, a solution for Python 3.10 is provided below +# but we would prefer that you use the same version of Sphinx +# (below on the < 3.10 line) that is used to build the production guides. +Sphinx==3.5.4 ; python_version < '3.10' +Sphinx==5.3.0 ; python_version >= '3.10' + # Necessary workaround for ReadTheDocs for Sphinx 3.x - unnecessary as of Sphinx 4.5+ Jinja2>=3.0.2,<3.1 diff --git a/doc/sphinx-guides/source/_static/admin/counter-processor-config.yaml b/doc/sphinx-guides/source/_static/admin/counter-processor-config.yaml index 4f338905751..26144544d9e 100644 --- a/doc/sphinx-guides/source/_static/admin/counter-processor-config.yaml +++ b/doc/sphinx-guides/source/_static/admin/counter-processor-config.yaml @@ -1,8 +1,8 @@ # currently no other option but to have daily logs and have year-month-day format in the name with # 4-digit year and 2-digit month and day -# /usr/local/payara5/glassfish/domains/domain1/logs/counter_2019-01-11.log +# /usr/local/payara6/glassfish/domains/domain1/logs/counter_2019-01-11.log #log_name_pattern: sample_logs/counter_(yyyy-mm-dd).log -log_name_pattern: /usr/local/payara5/glassfish/domains/domain1/logs/mdc/counter_(yyyy-mm-dd).log +log_name_pattern: /usr/local/payara6/glassfish/domains/domain1/logs/mdc/counter_(yyyy-mm-dd).log # path_types regular expressions allow matching to classify page urls as either an investigation or request # based on specific URL structure for your system. diff --git a/doc/sphinx-guides/source/_static/admin/dataverse-external-tools.tsv b/doc/sphinx-guides/source/_static/admin/dataverse-external-tools.tsv index 61db5dfed93..8543300dd2c 100644 --- a/doc/sphinx-guides/source/_static/admin/dataverse-external-tools.tsv +++ b/doc/sphinx-guides/source/_static/admin/dataverse-external-tools.tsv @@ -1,5 +1,7 @@ -Tool Type Scope Description -Data Explorer explore file A GUI which lists the variables in a tabular data file allowing searching, charting and cross tabulation analysis. See the README.md file at https://github.com/scholarsportal/dataverse-data-explorer-v2 for the instructions on adding Data Explorer to your Dataverse. -Whole Tale explore dataset A platform for the creation of reproducible research packages that allows users to launch containerized interactive analysis environments based on popular tools such as Jupyter and RStudio. Using this integration, Dataverse users can launch Jupyter and RStudio environments to analyze published datasets. For more information, see the `Whole Tale User Guide `_. -File Previewers explore file A set of tools that display the content of files - including audio, html, `Hypothes.is `_ annotations, images, PDF, text, video, tabular data, spreadsheets, and GeoJSON - allowing them to be viewed without downloading. The previewers can be run directly from github.io, so the only required step is using the Dataverse API to register the ones you want to use. Documentation, including how to optionally brand the previewers, and an invitation to contribute through github are in the README.md file. Initial development was led by the Qualitative Data Repository and the spreasdheet previewer was added by the Social Sciences and Humanities Open Cloud (SSHOC) project. https://github.com/gdcc/dataverse-previewers -Data Curation Tool configure file A GUI for curating data by adding labels, groups, weights and other details to assist with informed reuse. See the README.md file at https://github.com/scholarsportal/Dataverse-Data-Curation-Tool for the installation instructions. +Tool Type Scope Description +Data Explorer explore file "A GUI which lists the variables in a tabular data file allowing searching, charting and cross tabulation analysis. See the README.md file at https://github.com/scholarsportal/dataverse-data-explorer-v2 for the instructions on adding Data Explorer to your Dataverse." +Whole Tale explore dataset "A platform for the creation of reproducible research packages that allows users to launch containerized interactive analysis environments based on popular tools such as Jupyter and RStudio. Using this integration, Dataverse users can launch Jupyter and RStudio environments to analyze published datasets. For more information, see the `Whole Tale User Guide `_." +Binder explore dataset Binder allows you to spin up custom computing environments in the cloud (including Jupyter notebooks) with the files from your dataset. `Installation instructions `_ are in the Data Exploration Lab girder_ythub project. +File Previewers explore file "A set of tools that display the content of files - including audio, html, `Hypothes.is `_ annotations, images, PDF, text, video, tabular data, spreadsheets, GeoJSON, zip, and NcML files - allowing them to be viewed without downloading the file. The previewers can be run directly from github.io, so the only required step is using the Dataverse API to register the ones you want to use. Documentation, including how to optionally brand the previewers, and an invitation to contribute through github are in the README.md file. Initial development was led by the Qualitative Data Repository and the spreasdheet previewer was added by the Social Sciences and Humanities Open Cloud (SSHOC) project. https://github.com/gdcc/dataverse-previewers" +Data Curation Tool configure file "A GUI for curating data by adding labels, groups, weights and other details to assist with informed reuse. See the README.md file at https://github.com/scholarsportal/Dataverse-Data-Curation-Tool for the installation instructions." +Ask the Data query file Ask the Data is an experimental tool that allows you ask natural language questions about the data contained in Dataverse tables (tabular data). See the README.md file at https://github.com/IQSS/askdataverse/tree/main/askthedata for the instructions on adding Ask the Data to your Dataverse installation. diff --git a/doc/sphinx-guides/source/_static/api/add-license.json b/doc/sphinx-guides/source/_static/api/add-license.json index 969d6d58dab..a9d5dd34093 100644 --- a/doc/sphinx-guides/source/_static/api/add-license.json +++ b/doc/sphinx-guides/source/_static/api/add-license.json @@ -3,5 +3,6 @@ "uri": "http://creativecommons.org/licenses/by/4.0", "shortDescription": "Creative Commons Attribution 4.0 International License.", "iconUrl": "https://i.creativecommons.org/l/by/4.0/88x31.png", - "active": true + "active": true, + "sortOrder": 2 } diff --git a/doc/sphinx-guides/source/_static/api/dataset-add-subject-metadata.json b/doc/sphinx-guides/source/_static/api/dataset-add-subject-metadata.json index ea0922dadc8..c81c5b32aab 100644 --- a/doc/sphinx-guides/source/_static/api/dataset-add-subject-metadata.json +++ b/doc/sphinx-guides/source/_static/api/dataset-add-subject-metadata.json @@ -2,7 +2,7 @@ "typeName": "subject", "value": ["Astronomy and Astrophysics", "Agricultural Sciences", -"Arts and Humanities", "Physics"] +"Arts and Humanities", "Physics", "Mathematical Sciences"] } diff --git a/doc/sphinx-guides/source/_static/api/dataset-update-metadata.json b/doc/sphinx-guides/source/_static/api/dataset-update-metadata.json index 6e499d4e164..dcb3e136907 100644 --- a/doc/sphinx-guides/source/_static/api/dataset-update-metadata.json +++ b/doc/sphinx-guides/source/_static/api/dataset-update-metadata.json @@ -1,4 +1,8 @@ { + "license": { + "name": "CC0 1.0", + "uri": "http://creativecommons.org/publicdomain/zero/1.0" + }, "metadataBlocks": { "citation": { "displayName": "Citation Metadata", diff --git a/doc/sphinx-guides/source/_static/api/ddi_dataset.xml b/doc/sphinx-guides/source/_static/api/ddi_dataset.xml index 05eaadc3458..3b155fc7e55 100644 --- a/doc/sphinx-guides/source/_static/api/ddi_dataset.xml +++ b/doc/sphinx-guides/source/_static/api/ddi_dataset.xml @@ -34,7 +34,8 @@ LastProducer1, FirstProducer1 LastProducer2, FirstProducer2 1003-01-01 - ProductionPlace + ProductionPlace One + ProductionPlace Two SoftwareName1 SoftwareName2 GrantInformationGrantNumber1 @@ -51,8 +52,12 @@ 1002-01-01 - SeriesName - SeriesInformation + SeriesName One + SeriesInformation One + + + SeriesName Two + SeriesInformation Two @@ -88,12 +93,12 @@ 10 20 - 30 - 40 + 40 + 30 - 80 - 70 + 70 + 80 60 50 diff --git a/scripts/vagrant/counter-processor-config.yaml b/doc/sphinx-guides/source/_static/developers/counter-processor-config.yaml similarity index 100% rename from scripts/vagrant/counter-processor-config.yaml rename to doc/sphinx-guides/source/_static/developers/counter-processor-config.yaml diff --git a/doc/sphinx-guides/source/_static/docsdataverse_org.css b/doc/sphinx-guides/source/_static/docsdataverse_org.css index e4afe89e217..da4ba06ddd4 100755 --- a/doc/sphinx-guides/source/_static/docsdataverse_org.css +++ b/doc/sphinx-guides/source/_static/docsdataverse_org.css @@ -68,7 +68,7 @@ a.headerlink { #sidebar.bs-sidenav { background-color: #f8d5b8; } -#sidebar.bs-sidenav .nav > li > a:hover, #sidebar.bs-sidenav .nav > li > a:focus { +#sidebar.bs-sidenav .nav > li > a:hover, #sidebar.bs-sidenav .nav > li > a:focus, #sidebar.bs-sidenav .nav > li > a.current { background-color: #fbf4c5; border-right: 1px solid #dbd8e0; text-decoration: none; diff --git a/doc/sphinx-guides/source/_static/installation/files/etc/init.d/payara.init.root b/doc/sphinx-guides/source/_static/installation/files/etc/init.d/payara.init.root index 1de94331523..b9ef9960318 100755 --- a/doc/sphinx-guides/source/_static/installation/files/etc/init.d/payara.init.root +++ b/doc/sphinx-guides/source/_static/installation/files/etc/init.d/payara.init.root @@ -4,7 +4,7 @@ set -e -ASADMIN=/usr/local/payara5/bin/asadmin +ASADMIN=/usr/local/payara6/bin/asadmin case "$1" in start) diff --git a/doc/sphinx-guides/source/_static/installation/files/etc/init.d/payara.init.service b/doc/sphinx-guides/source/_static/installation/files/etc/init.d/payara.init.service index 7c457e615d8..19bb190e740 100755 --- a/doc/sphinx-guides/source/_static/installation/files/etc/init.d/payara.init.service +++ b/doc/sphinx-guides/source/_static/installation/files/etc/init.d/payara.init.service @@ -3,7 +3,7 @@ # description: Payara App Server set -e -ASADMIN=/usr/local/payara5/bin/asadmin +ASADMIN=/usr/local/payara6/bin/asadmin APP_SERVER_USER=dataverse case "$1" in diff --git a/doc/sphinx-guides/source/_static/installation/files/etc/init.d/solr b/doc/sphinx-guides/source/_static/installation/files/etc/init.d/solr index 7ca04cdff3f..9cf8902eb14 100755 --- a/doc/sphinx-guides/source/_static/installation/files/etc/init.d/solr +++ b/doc/sphinx-guides/source/_static/installation/files/etc/init.d/solr @@ -5,7 +5,7 @@ # chkconfig: 35 92 08 # description: Starts and stops Apache Solr -SOLR_DIR="/usr/local/solr/solr-8.11.1" +SOLR_DIR="/usr/local/solr/solr-9.3.0" SOLR_COMMAND="bin/solr" SOLR_ARGS="-m 1g -j jetty.host=127.0.0.1" SOLR_USER=solr diff --git a/doc/sphinx-guides/source/_static/installation/files/etc/shibboleth/shibboleth2.xml b/doc/sphinx-guides/source/_static/installation/files/etc/shibboleth/shibboleth2.xml index 41bf4709ba9..3960d003ad2 100644 --- a/doc/sphinx-guides/source/_static/installation/files/etc/shibboleth/shibboleth2.xml +++ b/doc/sphinx-guides/source/_static/installation/files/etc/shibboleth/shibboleth2.xml @@ -18,7 +18,7 @@ https://wiki.shibboleth.net/confluence/display/SHIB2/NativeSPConfiguration - + SAML2 SAML1 diff --git a/doc/sphinx-guides/source/_static/installation/files/etc/systemd/payara.service b/doc/sphinx-guides/source/_static/installation/files/etc/systemd/payara.service index c8c82f6d6b2..c8efcb9c6f9 100644 --- a/doc/sphinx-guides/source/_static/installation/files/etc/systemd/payara.service +++ b/doc/sphinx-guides/source/_static/installation/files/etc/systemd/payara.service @@ -4,9 +4,9 @@ After = syslog.target network.target [Service] Type = forking -ExecStart = /usr/bin/java -jar /usr/local/payara5/glassfish/lib/client/appserver-cli.jar start-domain -ExecStop = /usr/bin/java -jar /usr/local/payara5/glassfish/lib/client/appserver-cli.jar stop-domain -ExecReload = /usr/bin/java -jar /usr/local/payara5/glassfish/lib/client/appserver-cli.jar restart-domain +ExecStart = /usr/bin/java -jar /usr/local/payara6/glassfish/lib/client/appserver-cli.jar start-domain +ExecStop = /usr/bin/java -jar /usr/local/payara6/glassfish/lib/client/appserver-cli.jar stop-domain +ExecReload = /usr/bin/java -jar /usr/local/payara6/glassfish/lib/client/appserver-cli.jar restart-domain User=dataverse LimitNOFILE=32768 Environment="LANG=en_US.UTF-8" diff --git a/doc/sphinx-guides/source/_static/installation/files/etc/systemd/solr.service b/doc/sphinx-guides/source/_static/installation/files/etc/systemd/solr.service index d89ee108377..0b8a8528490 100644 --- a/doc/sphinx-guides/source/_static/installation/files/etc/systemd/solr.service +++ b/doc/sphinx-guides/source/_static/installation/files/etc/systemd/solr.service @@ -5,9 +5,9 @@ After = syslog.target network.target remote-fs.target nss-lookup.target [Service] User = solr Type = forking -WorkingDirectory = /usr/local/solr/solr-8.11.1 -ExecStart = /usr/local/solr/solr-8.11.1/bin/solr start -m 1g -j "jetty.host=127.0.0.1" -ExecStop = /usr/local/solr/solr-8.11.1/bin/solr stop +WorkingDirectory = /usr/local/solr/solr-9.3.0 +ExecStart = /usr/local/solr/solr-9.3.0/bin/solr start -m 1g -j "jetty.host=127.0.0.1" +ExecStop = /usr/local/solr/solr-9.3.0/bin/solr stop LimitNOFILE=65000 LimitNPROC=65000 Restart=on-failure diff --git a/doc/sphinx-guides/source/_static/installation/files/root/external-tools/auxFileTool.json b/doc/sphinx-guides/source/_static/installation/files/root/external-tools/auxFileTool.json new file mode 100644 index 00000000000..b188520dabb --- /dev/null +++ b/doc/sphinx-guides/source/_static/installation/files/root/external-tools/auxFileTool.json @@ -0,0 +1,26 @@ +{ + "displayName": "AuxFileViewer", + "description": "Show an auxiliary file from a dataset file.", + "toolName": "auxPreviewer", + "scope": "file", + "types": [ + "preview" + ], + "toolUrl": "https://example.com/AuxFileViewer.html", + "toolParameters": { + "queryParameters": [ + { + "fileid": "{fileId}" + } + ] + }, + "requirements": { + "auxFilesExist": [ + { + "formatTag": "myFormatTag", + "formatVersion": "0.1" + } + ] + }, + "contentType": "application/foobar" +} diff --git a/doc/sphinx-guides/source/_static/installation/files/root/external-tools/dynamicDatasetTool.json b/doc/sphinx-guides/source/_static/installation/files/root/external-tools/dynamicDatasetTool.json index e30c067a86b..22dd6477cb4 100644 --- a/doc/sphinx-guides/source/_static/installation/files/root/external-tools/dynamicDatasetTool.json +++ b/doc/sphinx-guides/source/_static/installation/files/root/external-tools/dynamicDatasetTool.json @@ -12,8 +12,16 @@ "PID": "{datasetPid}" }, { - "apiToken": "{apiToken}" + "locale":"{localeCode}" } ] - } + }, + "allowedApiCalls": [ + { + "name":"retrieveDatasetJson", + "httpMethod":"GET", + "urlTemplate":"/api/v1/datasets/{datasetId}", + "timeOut":10 + } + ] } diff --git a/doc/sphinx-guides/source/_static/installation/files/root/external-tools/fabulousFileTool.json b/doc/sphinx-guides/source/_static/installation/files/root/external-tools/fabulousFileTool.json index 14f71a280b3..2b6a0b8e092 100644 --- a/doc/sphinx-guides/source/_static/installation/files/root/external-tools/fabulousFileTool.json +++ b/doc/sphinx-guides/source/_static/installation/files/root/external-tools/fabulousFileTool.json @@ -1,6 +1,6 @@ { "displayName": "Fabulous File Tool", - "description": "Fabulous Fun for Files!", + "description": "A non-existent tool that is fabulous fun for files!", "toolName": "fabulous", "scope": "file", "types": [ @@ -9,14 +9,26 @@ ], "toolUrl": "https://fabulousfiletool.com", "contentType": "text/tab-separated-values", + "httpMethod":"GET", "toolParameters": { "queryParameters": [ { "fileid": "{fileId}" }, { - "key": "{apiToken}" + "datasetPid": "{datasetPid}" + }, + { + "locale":"{localeCode}" } ] - } + }, + "allowedApiCalls": [ + { + "name":"retrieveDataFile", + "httpMethod":"GET", + "urlTemplate":"/api/v1/access/datafile/{fileId}", + "timeOut":270 + } + ] } diff --git a/doc/sphinx-guides/source/_static/installation/files/usr/local/payara5/glassfish/domains/domain1/config/logging.properties b/doc/sphinx-guides/source/_static/installation/files/usr/local/payara5/glassfish/domains/domain1/config/logging.properties deleted file mode 100644 index 4054c794452..00000000000 --- a/doc/sphinx-guides/source/_static/installation/files/usr/local/payara5/glassfish/domains/domain1/config/logging.properties +++ /dev/null @@ -1,166 +0,0 @@ -# -# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. -# -# Copyright (c) 2013 Oracle and/or its affiliates. All rights reserved. -# -# The contents of this file are subject to the terms of either the GNU -# General Public License Version 2 only ("GPL") or the Common Development -# and Distribution License("CDDL") (collectively, the "License"). You -# may not use this file except in compliance with the License. You can -# obtain a copy of the License at -# https://glassfish.dev.java.net/public/CDDL+GPL_1_1.html -# or packager/legal/LICENSE.txt. See the License for the specific -# language governing permissions and limitations under the License. -# -# When distributing the software, include this License Header Notice in each -# file and include the License file at packager/legal/LICENSE.txt. -# -# GPL Classpath Exception: -# Oracle designates this particular file as subject to the "Classpath" -# exception as provided by Oracle in the GPL Version 2 section of the License -# file that accompanied this code. -# -# Modifications: -# If applicable, add the following below the License Header, with the fields -# enclosed by brackets [] replaced by your own identifying information: -# "Portions Copyright [year] [name of copyright owner]" -# -# Contributor(s): -# If you wish your version of this file to be governed by only the CDDL or -# only the GPL Version 2, indicate your decision by adding "[Contributor] -# elects to include this software in this distribution under the [CDDL or GPL -# Version 2] license." If you don't indicate a single choice of license, a -# recipient has the option to distribute your version of this file under -# either the CDDL, the GPL Version 2 or to extend the choice of license to -# its licensees as provided above. However, if you add GPL Version 2 code -# and therefore, elected the GPL Version 2 license, then the option applies -# only if the new code is made subject to such option by the copyright -# holder. -# -# Portions Copyright [2016-2021] [Payara Foundation and/or its affiliates] - -#GlassFish logging.properties list -#Update June 13 2012 - -#All attributes details -handlers=java.util.logging.ConsoleHandler -handlerServices=com.sun.enterprise.server.logging.GFFileHandler,com.sun.enterprise.server.logging.SyslogHandler -java.util.logging.ConsoleHandler.formatter=com.sun.enterprise.server.logging.UniformLogFormatter -java.util.logging.FileHandler.count=1 -java.util.logging.FileHandler.formatter=java.util.logging.XMLFormatter -java.util.logging.FileHandler.limit=50000 -java.util.logging.FileHandler.pattern=%h/java%u.log -com.sun.enterprise.server.logging.GFFileHandler.compressOnRotation=false -com.sun.enterprise.server.logging.GFFileHandler.excludeFields= -com.sun.enterprise.server.logging.GFFileHandler.file=${com.sun.aas.instanceRoot}/logs/server.log -com.sun.enterprise.server.logging.GFFileHandler.flushFrequency=1 -com.sun.enterprise.server.logging.GFFileHandler.formatter=com.sun.enterprise.server.logging.ODLLogFormatter -com.sun.enterprise.server.logging.GFFileHandler.level=ALL -com.sun.enterprise.server.logging.GFFileHandler.logStandardStreams=true -com.sun.enterprise.server.logging.GFFileHandler.logtoConsole=false -com.sun.enterprise.server.logging.GFFileHandler.logtoFile=true -com.sun.enterprise.server.logging.GFFileHandler.maxHistoryFiles=0 -com.sun.enterprise.server.logging.GFFileHandler.multiLineMode=true -com.sun.enterprise.server.logging.GFFileHandler.retainErrorsStasticsForHours=0 -com.sun.enterprise.server.logging.GFFileHandler.rotationLimitInBytes=2000000 -com.sun.enterprise.server.logging.GFFileHandler.rotationOnDateChange=false -com.sun.enterprise.server.logging.GFFileHandler.rotationTimelimitInMinutes=0 -com.sun.enterprise.server.logging.SyslogHandler.level=ALL -com.sun.enterprise.server.logging.SyslogHandler.useSystemLogging=false -log4j.logger.org.hibernate.validator.util.Version=warn -com.sun.enterprise.server.logging.UniformLogFormatter.ansiColor=true - -#Payara Notification logging properties -fish.payara.enterprise.server.logging.PayaraNotificationFileHandler.compressOnRotation=false -fish.payara.enterprise.server.logging.PayaraNotificationFileHandler.file=${com.sun.aas.instanceRoot}/logs/notification.log -fish.payara.enterprise.server.logging.PayaraNotificationFileHandler.formatter=com.sun.enterprise.server.logging.ODLLogFormatter -fish.payara.enterprise.server.logging.PayaraNotificationFileHandler.logtoFile=true -fish.payara.enterprise.server.logging.PayaraNotificationFileHandler.maxHistoryFiles=0 -fish.payara.enterprise.server.logging.PayaraNotificationFileHandler.rotationLimitInBytes=2000000 -fish.payara.enterprise.server.logging.PayaraNotificationFileHandler.rotationOnDateChange=false -fish.payara.enterprise.server.logging.PayaraNotificationFileHandler.rotationTimelimitInMinutes=0 -fish.payara.deprecated.jsonlogformatter.underscoreprefix=false - -#All log level details - -.level=INFO -ShoalLogger.level=CONFIG -com.hazelcast.level=WARNING -java.util.logging.ConsoleHandler.level=FINEST -javax.enterprise.resource.corba.level=INFO -javax.enterprise.resource.javamail.level=INFO -javax.enterprise.resource.jdo.level=INFO -javax.enterprise.resource.jms.level=INFO -javax.enterprise.resource.jta.level=INFO -javax.enterprise.resource.resourceadapter.level=INFO -javax.enterprise.resource.sqltrace.level=FINE -javax.enterprise.resource.webcontainer.jsf.application.level=INFO -javax.enterprise.resource.webcontainer.jsf.config.level=INFO -javax.enterprise.resource.webcontainer.jsf.context.level=INFO -javax.enterprise.resource.webcontainer.jsf.facelets.level=INFO -javax.enterprise.resource.webcontainer.jsf.lifecycle.level=INFO -javax.enterprise.resource.webcontainer.jsf.managedbean.level=INFO -javax.enterprise.resource.webcontainer.jsf.renderkit.level=INFO -javax.enterprise.resource.webcontainer.jsf.resource.level=INFO -javax.enterprise.resource.webcontainer.jsf.taglib.level=INFO -javax.enterprise.resource.webcontainer.jsf.timing.level=INFO -javax.enterprise.system.container.cmp.level=INFO -javax.enterprise.system.container.ejb.level=INFO -javax.enterprise.system.container.ejb.mdb.level=INFO -javax.enterprise.system.container.web.level=INFO -javax.enterprise.system.core.classloading.level=INFO -javax.enterprise.system.core.config.level=INFO -javax.enterprise.system.core.level=INFO -javax.enterprise.system.core.security.level=INFO -javax.enterprise.system.core.selfmanagement.level=INFO -javax.enterprise.system.core.transaction.level=INFO -javax.enterprise.system.level=INFO -javax.enterprise.system.ssl.security.level=INFO -javax.enterprise.system.tools.admin.level=INFO -javax.enterprise.system.tools.backup.level=INFO -javax.enterprise.system.tools.deployment.common.level=WARNING -javax.enterprise.system.tools.deployment.dol.level=WARNING -javax.enterprise.system.tools.deployment.level=INFO -javax.enterprise.system.util.level=INFO -javax.enterprise.system.webservices.registry.level=INFO -javax.enterprise.system.webservices.rpc.level=INFO -javax.enterprise.system.webservices.saaj.level=INFO -javax.level=INFO -javax.mail.level=INFO -javax.org.glassfish.persistence.level=INFO -org.apache.catalina.level=INFO -org.apache.coyote.level=INFO -org.apache.jasper.level=INFO -org.eclipse.persistence.session.level=INFO -org.glassfish.admingui.level=INFO -org.glassfish.naming.level=INFO -org.jvnet.hk2.osgiadapter.level=INFO - -javax.enterprise.resource.corba.level=INFO -javax.enterprise.resource.jta.level=INFO -javax.enterprise.system.webservices.saaj.level=INFO -javax.enterprise.system.container.ejb.level=INFO -javax.enterprise.system.container.ejb.mdb.level=INFO -javax.enterprise.resource.javamail.level=INFO -javax.enterprise.system.webservices.rpc.level=INFO -javax.enterprise.system.container.web.level=INFO -javax.enterprise.resource.jms.level=INFO -javax.enterprise.system.webservices.registry.level=INFO -javax.enterprise.resource.webcontainer.jsf.application.level=INFO -javax.enterprise.resource.webcontainer.jsf.resource.level=INFO -javax.enterprise.resource.webcontainer.jsf.config.level=INFO -javax.enterprise.resource.webcontainer.jsf.context.level=INFO -javax.enterprise.resource.webcontainer.jsf.facelets.level=INFO -javax.enterprise.resource.webcontainer.jsf.lifecycle.level=INFO -javax.enterprise.resource.webcontainer.jsf.managedbean.level=INFO -javax.enterprise.resource.webcontainer.jsf.renderkit.level=INFO -javax.enterprise.resource.webcontainer.jsf.taglib.level=INFO -javax.enterprise.resource.webcontainer.jsf.timing.level=INFO -javax.org.glassfish.persistence.level=INFO -javax.enterprise.system.tools.backup.level=INFO -javax.mail.level=INFO -org.glassfish.admingui.level=INFO -org.glassfish.naming.level=INFO -org.eclipse.persistence.session.level=INFO -javax.enterprise.system.tools.deployment.dol.level=WARNING -javax.enterprise.system.tools.deployment.common.level=WARNING diff --git a/doc/sphinx-guides/source/_static/util/clear_timer.sh b/doc/sphinx-guides/source/_static/util/clear_timer.sh index 1d9966e4e07..641b2695084 100755 --- a/doc/sphinx-guides/source/_static/util/clear_timer.sh +++ b/doc/sphinx-guides/source/_static/util/clear_timer.sh @@ -8,7 +8,7 @@ # if you'd like to avoid that. # directory where Payara is installed -PAYARA_DIR=/usr/local/payara5 +PAYARA_DIR=/usr/local/payara6 # directory within Payara (defaults) DV_DIR=${PAYARA_DIR}/glassfish/domains/domain1 diff --git a/doc/sphinx-guides/source/_static/util/counter_daily.sh b/doc/sphinx-guides/source/_static/util/counter_daily.sh index a12439d9cf8..674972b18f2 100644 --- a/doc/sphinx-guides/source/_static/util/counter_daily.sh +++ b/doc/sphinx-guides/source/_static/util/counter_daily.sh @@ -1,7 +1,7 @@ #! /bin/bash COUNTER_PROCESSOR_DIRECTORY="/usr/local/counter-processor-0.1.04" -MDC_LOG_DIRECTORY="/usr/local/payara5/glassfish/domains/domain1/logs/mdc" +MDC_LOG_DIRECTORY="/usr/local/payara6/glassfish/domains/domain1/logs/mdc" # counter_daily.sh diff --git a/doc/sphinx-guides/source/admin/dataverses-datasets.rst b/doc/sphinx-guides/source/admin/dataverses-datasets.rst index a961ac0b067..170807d3d67 100644 --- a/doc/sphinx-guides/source/admin/dataverses-datasets.rst +++ b/doc/sphinx-guides/source/admin/dataverses-datasets.rst @@ -15,7 +15,7 @@ Dataverse collections have to be empty to delete them. Navigate to the Dataverse Move a Dataverse Collection ^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Moves a Dataverse collection whose id is passed to a new Dataverse collection whose id is passed. The Dataverse collection alias also may be used instead of the id. If the moved Dataverse collection has a guestbook, template, metadata block, link, or featured Dataverse collection that is not compatible with the destination Dataverse collection, you will be informed and given the option to force the move and remove the association. Only accessible to superusers. :: +Moves a Dataverse collection whose id is passed to an existing Dataverse collection whose id is passed. The Dataverse collection alias also may be used instead of the id. If the moved Dataverse collection has a guestbook, template, metadata block, link, or featured Dataverse collection that is not compatible with the destination Dataverse collection, you will be informed and given the option to force the move and remove the association. Only accessible to superusers. :: curl -H "X-Dataverse-key: $API_TOKEN" -X POST http://$SERVER/api/dataverses/$id/move/$destination-id @@ -118,6 +118,28 @@ Creates a link between a dataset and a Dataverse collection (see the :ref:`datas curl -H "X-Dataverse-key: $API_TOKEN" -X PUT http://$SERVER/api/datasets/$linked-dataset-id/link/$linking-dataverse-alias +List Collections that are Linked from a Dataset +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Lists the link(s) created between a dataset and a Dataverse collection (see the :ref:`dataset-linking` section of the User Guide for more information). :: + + curl -H "X-Dataverse-key: $API_TOKEN" http://$SERVER/api/datasets/$linked-dataset-id/links + +It returns a list in the following format: + +.. code-block:: json + + { + "status": "OK", + "data": { + "dataverses that link to dataset id 56782": [ + "crc990 (id 18802)" + ] + } + } + +.. _unlink-a-dataset: + Unlink a Dataset ^^^^^^^^^^^^^^^^ @@ -131,15 +153,35 @@ Mint a PID for a File That Does Not Have One In the following example, the database id of the file is 42:: export FILE_ID=42 - curl http://localhost:8080/api/admin/$FILE_ID/registerDataFile + curl "http://localhost:8080/api/admin/$FILE_ID/registerDataFile" + +This method will return a FORBIDDEN response if minting of file PIDs is not enabled for the collection the file is in. (Note that it is possible to have file PIDs enabled for a specific collection, even when it is disabled for the Dataverse installation as a whole. See :ref:`collection-attributes-api` in the Native API Guide.) + +Mint PIDs for all unregistered published files in the specified collection +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Mint PIDs for Files That Do Not Have Them -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +The following API will register the PIDs for all the yet unregistered published files in the datasets **directly within the collection** specified by its alias:: -If you have a large number of files, you might want to consider miniting PIDs for files individually using the ``registerDataFile`` endpoint above in a for loop, sleeping between each registration:: + curl "http://localhost:8080/api/admin/registerDataFiles/{collection_alias}" + +It will not attempt to register the datafiles in its sub-collections, so this call will need to be repeated on any sub-collections where files need to be registered as well. +File-level PID registration must be enabled on the collection. (Note that it is possible to have it enabled for a specific collection, even when it is disabled for the Dataverse installation as a whole. See :ref:`collection-attributes-api` in the Native API Guide.) + +This API will sleep for 1 second between registration calls by default. A longer sleep interval can be specified with an optional ``sleep=`` parameter:: + + curl "http://localhost:8080/api/admin/registerDataFiles/{collection_alias}?sleep=5" + +Mint PIDs for ALL unregistered files in the database +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The following API will attempt to register the PIDs for all the published files in your instance, in collections that allow file PIDs, that do not yet have them:: curl http://localhost:8080/api/admin/registerDataFileAll +The application will attempt to sleep for 1 second between registration attempts as not to overload your persistent identifier service provider. Note that if you have a large number of files that need to be registered in your Dataverse, you may want to consider minting file PIDs within indivdual collections, or even for individual files using the ``registerDataFiles`` and/or ``registerDataFile`` endpoints above in a loop, with a longer sleep interval between calls. + + + Mint a New DOI for a Dataset with a Handle ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/doc/sphinx-guides/source/admin/discoverability.rst b/doc/sphinx-guides/source/admin/discoverability.rst new file mode 100644 index 00000000000..767bb55bce6 --- /dev/null +++ b/doc/sphinx-guides/source/admin/discoverability.rst @@ -0,0 +1,76 @@ +Discoverability +=============== + +Datasets are made discoverable by a variety of methods. + +.. contents:: |toctitle| + :local: + +DataCite Integration +-------------------- + +If you are using `DataCite `_ as your DOI provider, when datasets are published, metadata is pushed to DataCite, where it can be searched. For more information, see :ref:`:DoiProvider` in the Installation Guide. + +OAI-PMH (Harvesting) +-------------------- + +The Dataverse software supports a protocol called OAI-PMH that facilitates harvesting dataset metadata from one system into another. For details on harvesting, see the :doc:`harvestserver` section. + +Machine-Readable Metadata on Dataset Landing Pages +-------------------------------------------------- + +As recommended in `A Data Citation Roadmap for Scholarly Data Repositories `_, the Dataverse software embeds metadata on dataset landing pages in a variety of machine-readable ways. + +Dublin Core HTML Meta Tags +++++++++++++++++++++++++++ + +The HTML source of a dataset landing page includes "DC" (Dublin Core) ```` tags such as the following:: + + {"@context":"http://schema.org","@type":"Dataset","@id":"https://doi.org/... + + +.. _discovery-sign-posting: + +Signposting ++++++++++++ + +The Dataverse software supports `Signposting `_. This allows machines to request more information about a dataset through the `Link `_ HTTP header. + +There are 2 Signposting profile levels, level 1 and level 2. In this implementation, + * Level 1 links are shown `as recommended `_ in the "Link" + HTTP header, which can be fetched by sending an HTTP HEAD request, e.g. ``curl -I https://demo.dataverse.org/dataset.xhtml?persistentId=doi:10.5072/FK2/KPY4ZC``. + The number of author and file links in the level 1 header can be configured as described below. + * The level 2 linkset can be fetched by visiting the dedicated linkset page for + that artifact. The link can be seen in level 1 links with key name ``rel="linkset"``. + +Note: Authors without author link will not be counted nor shown in any profile/linkset. +The following configuration options are available: + +- :ref:`dataverse.signposting.level1-author-limit` + + Sets the max number of authors to be shown in `level 1` profile. + If the number of authors (with identifier URLs) exceeds this value, no author links will be shown in `level 1` profile. + The default is 5. + +- :ref:`dataverse.signposting.level1-item-limit` + + Sets the max number of items/files which will be shown in `level 1` profile. Datasets with + too many files will not show any file links in `level 1` profile. They will be shown in `level 2` linkset only. + The default is 5. + +See also :ref:`signposting-api` in the API Guide. + +Additional Discoverability Through Integrations +----------------------------------------------- + +See :ref:`integrations-discovery` in the Integrations section for additional discovery methods you can enable. diff --git a/doc/sphinx-guides/source/admin/external-tools.rst b/doc/sphinx-guides/source/admin/external-tools.rst index ad6181a867a..67075e986bb 100644 --- a/doc/sphinx-guides/source/admin/external-tools.rst +++ b/doc/sphinx-guides/source/admin/external-tools.rst @@ -92,7 +92,15 @@ File Level Preview Tools File level preview tools allow the user to see a preview of the file contents without having to download it. -When a file has a preview available, a preview icon will appear next to that file in the file listing on the dataset page. On the file page itself, the preview will appear in a Preview tab either immediately or once a guestbook has been filled in or terms, if any, have been agreed to. +When a file has a preview available, a preview icon will appear next to that file in the file listing on the dataset page. On the file page itself, the preview will appear in a Preview tab (renamed File Tools, if multiple tools are available) either immediately or once a guestbook has been filled in or terms, if any, have been agreed to. + +File Level Query Tools +++++++++++++++++++++++++ + +File level query tools allow the user to ask questions (e.g. natural language queries) of a data table's contents without having to download it. + +When a file has a query tool available, a query icon will appear next to that file in the file listing on the dataset page. On the file page itself, the query tool will appear in a Query tab (renamed File Tools, if multiple tools are available) either immediately or once a guestbook has been filled in or terms, if any, have been agreed to. + File Level Configure Tools ++++++++++++++++++++++++++ diff --git a/doc/sphinx-guides/source/admin/harvestclients.rst b/doc/sphinx-guides/source/admin/harvestclients.rst index c655d5af763..59fc4dc2c64 100644 --- a/doc/sphinx-guides/source/admin/harvestclients.rst +++ b/doc/sphinx-guides/source/admin/harvestclients.rst @@ -21,9 +21,29 @@ Clients are managed on the "Harvesting Clients" page accessible via the :doc:`da The process of creating a new, or editing an existing client, is largely self-explanatory. It is split into logical steps, in a way that allows the user to go back and correct the entries made earlier. The process is interactive and guidance text is provided. For example, the user is required to enter the URL of the remote OAI server. When they click *Next*, the application will try to establish a connection to the server in order to verify that it is working, and to obtain the information about the sets of metadata records and the metadata formats it supports. The choices offered to the user on the next page will be based on this extra information. If the application fails to establish a connection to the remote archive at the address specified, or if an invalid response is received, the user is given an opportunity to check and correct the URL they entered. +Please note that in some rare cases this GUI may fail to create a client because of some unexpected errors during these real time exchanges with an OAI server that is otherwise known to be valid. For example, in the past we have had issues with servers offering very long lists of sets (*really* long, in the thousands). To allow an admin to still be able to create a client in a situation like that, we provide the REST API that will do so without attempting any validation in real time. This obviously makes it the responsibility of the admin to supply the values that are definitely known to be valid - a working OAI url, the name of a set that does exist on the server, and/or a supported metadata format. See the :ref:`managing-harvesting-clients-api` section of the :doc:`/api/native-api` guide for more information. + +Note that as of 5.13, a new entry "Custom HTTP Header" has been added to the Step 1. of Create or Edit form. This optional field can be used to configure this client with a specific HTTP header to be added to every OAI request. This is to accommodate a (rare) use case where the remote server may require a special token of some kind in order to offer some content not available to other clients. Most OAI servers offer the same publicly-available content to all clients, so few admins will have a use for this feature. It is however on the very first, Step 1. screen in case the OAI server requires this token even for the "ListSets" and "ListMetadataFormats" requests, which need to be sent in the Step 2. of creating or editing a client. Multiple headers can be supplied separated by `\\n` - actual "backslash" and "n" characters, not a single "new line" character. + + +How to Stop a Harvesting Run in Progress +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Some harvesting jobs, especially the initial full harvest of a very large set - such as the default set of public datasets at IQSS - can take many hours. In case it is necessary to terminate such a long-running job, the following mechanism is provided (note that it is only available to a sysadmin with shell access to the application server): Create an empty file in the domain logs directory with the following name: ``stopharvest_.``, where ```` is the nickname of the harvesting client and ```` is the process id of the Application Server (Payara). This flag file needs to be owned by the same user that's running Payara, so that the application can remove it after stopping the job in progress. + +For example: + +.. code-block:: bash + + sudo touch /usr/local/payara6/glassfish/domains/domain1/logs/stopharvest_bigarchive.70916 + sudo chown dataverse /usr/local/payara6/glassfish/domains/domain1/logs/stopharvest_bigarchive.70916 + +Note: If the application server is stopped and restarted, any running harvesting jobs will be killed but may remain marked as in progress in the database. We thus recommend using the mechanism here to stop ongoing harvests prior to a server restart. + + What if a Run Fails? ~~~~~~~~~~~~~~~~~~~~ -Each harvesting client run logs a separate file per run to the app server's default logging directory (``/usr/local/payara5/glassfish/domains/domain1/logs/`` unless you've changed it). Look for filenames in the format ``harvest_TARGET_YYYY_MM_DD_timestamp.log`` to get a better idea of what's going wrong. +Each harvesting client run logs a separate file per run to the app server's default logging directory (``/usr/local/payara6/glassfish/domains/domain1/logs/`` unless you've changed it). Look for filenames in the format ``harvest_TARGET_YYYY_MM_DD_timestamp.log`` to get a better idea of what's going wrong. Note that you'll want to run a minimum of Dataverse Software 4.6, optimally 4.18 or beyond, for the best OAI-PMH interoperability. diff --git a/doc/sphinx-guides/source/admin/harvestserver.rst b/doc/sphinx-guides/source/admin/harvestserver.rst index 88004d9dc5f..773e048aa76 100644 --- a/doc/sphinx-guides/source/admin/harvestserver.rst +++ b/doc/sphinx-guides/source/admin/harvestserver.rst @@ -18,7 +18,7 @@ If you want to learn more about OAI-PMH, you could take a look at or the `OAI-PMH protocol definition `_. You might consider adding your OAI-enabled Dataverse installation to -`this shared list `_ +`this shared list `_ of such instances. The email portion of :ref:`systemEmail` will be visible via OAI-PMH (from the "Identify" verb). @@ -115,10 +115,10 @@ Some useful examples of search queries to define OAI sets: ``keywordValue:censorship`` -Important: New SOLR schema required! +Important: New Solr schema required! ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -In order to be able to define OAI sets, your SOLR server must be upgraded with the search schema that came with release 4.5 (or later), and all your local datasets must be re-indexed, once the new schema is installed. +In order to be able to define OAI sets, your Solr server must be upgraded with the search schema that came with release 4.5 (or later), and all your local datasets must be re-indexed, once the new schema is installed. OAI Set updates --------------- diff --git a/doc/sphinx-guides/source/admin/index.rst b/doc/sphinx-guides/source/admin/index.rst index b97d9161d50..ac81aa737a7 100755 --- a/doc/sphinx-guides/source/admin/index.rst +++ b/doc/sphinx-guides/source/admin/index.rst @@ -14,6 +14,7 @@ This guide documents the functionality only available to superusers (such as "da dashboard external-tools + discoverability harvestclients harvestserver metadatacustomization diff --git a/doc/sphinx-guides/source/admin/integrations.rst b/doc/sphinx-guides/source/admin/integrations.rst index f6ca34bf3d4..21adf8338d9 100644 --- a/doc/sphinx-guides/source/admin/integrations.rst +++ b/doc/sphinx-guides/source/admin/integrations.rst @@ -14,10 +14,14 @@ A variety of integrations are oriented toward making it easier for your research GitHub ++++++ -Dataverse integration with GitHub is implemented via a Dataverse Uploader GitHub Action. It is a reusable, composite workflow for uploading a git repository or subdirectory into a dataset on a target Dataverse installation. The action is customizable, allowing users to choose to replace a dataset, add to the dataset, publish it or leave it as a draft version on Dataverse. The action provides some metadata to the dataset, such as the origin GitHub repository, and it preserves the directory tree structure. +GitHub can be integrated with a Dataverse installation in multiple ways. + +One Dataverse integration is implemented via a Dataverse Uploader GitHub Action. It is a reusable, composite workflow for uploading a git repository or subdirectory into a dataset on a target Dataverse installation. The action is customizable, allowing users to choose to replace a dataset, add to the dataset, publish it or leave it as a draft version in the Dataverse installation. The action provides some metadata to the dataset, such as the origin GitHub repository, and it preserves the directory tree structure. For instructions on using Dataverse Uploader GitHub Action, visit https://github.com/marketplace/actions/dataverse-uploader-action +In addition to the Dataverse Uploader GitHub Action, the :ref:`integrations-dashboard` also enables a pull of data from GitHub to a dataset. + Dropbox +++++++ @@ -28,7 +32,11 @@ Open Science Framework (OSF) The Center for Open Science's Open Science Framework (OSF) is an open source software project that facilitates open collaboration in science research across the lifespan of a scientific project. -For instructions on depositing data from OSF to your Dataverse installation, your researchers can visit https://help.osf.io/hc/en-us/articles/360019737314-Connect-Dataverse-to-a-Project +OSF can be integrated with a Dataverse installation in multiple ways. + +Researcher can configure OSF itself to deposit to your Dataverse installation by following `instructions from OSF `_. + +In addition to the method mentioned above, the :ref:`integrations-dashboard` also enables a pull of data from OSF to a dataset. RSpace ++++++ @@ -57,7 +65,7 @@ their research results and retain links to imported and exported data. Users can organize their data in "Datasets", which can be exported to a Dataverse installation via the command-line interface (CLI). -Renku dataset documentation: https://renku-python.readthedocs.io/en/latest/reference/commands.html#module-renku.cli.dataset +Renku documentation: https://renku-python.readthedocs.io Flagship deployment of the Renku platform: https://renkulab.io @@ -77,6 +85,41 @@ SampleDB is a web-based electronic lab notebook (ELN) with a focus on flexible m For instructions on using the Dataverse export, you can visit https://scientific-it-systems.iffgit.fz-juelich.de/SampleDB/administrator_guide/dataverse_export.html +RedCap +++++++ + +RedCap is a web-based application to capture data for clinical research and create databases and projects. + +The :ref:`integrations-dashboard` enables a pull of data from RedCap to a dataset in Dataverse. + +GitLab +++++++ + +GitLab is an open source Git repository and platform that provides free open and private repositories, issue-following capabilities, and wikis for collaborative software development. + +The :ref:`integrations-dashboard` enables a pull of data from GitLab to a dataset in Dataverse. + +iRODS ++++++ + +An open source, metadata driven data management system that is accessible through a host of different clients. + +The :ref:`integrations-dashboard` enables a pull of data from iRODS to a dataset in Dataverse. + +.. _integrations-dashboard: + +Integrations Dashboard +++++++++++++++++++++++ + +The integrations dashboard is software by the Dataverse community to enable easy data transfer from an existing data management platform to a dataset in a Dataverse collection. + +Instead of trying to set up Dataverse plug-ins in existing tools and systems to push data to a Dataverse installation, the dashboard works in reverse by being a portal to pull data from tools such as iRODS and GitHub into a dataset. + +Its aim is to make integrations more flexible and less dependent on the cooperation of system to integrate with. You can use it to either create a dataset from scratch and add metadata after files have been transferred, or you can use it to compare what is already in an existing dataset to make updating files in datasets easier. + +Its goal is to make the dashboard adjustable for a Dataverse installation's needs and easy to connect other systems to as well. + +The integrations dashboard is currently in development. A preview and more information can be found at: `rdm-integration GitHub repository `_ Embedding Data on Websites -------------------------- @@ -104,6 +147,8 @@ Compute Button The "Compute" button is still highly experimental and has special requirements such as use of a Swift object store, but it is documented under "Setting up Compute" in the :doc:`/installation/config` section of the Installation Guide. +.. _wholetale: + Whole Tale ++++++++++ @@ -111,12 +156,18 @@ Whole Tale `import data from a Dataverse installation `_ via identifier (e.g., DOI, URI, etc) or through the External Tools integration. For installation instructions, see the :doc:`external-tools` section or the `Integration `_ section of the Whole Tale User Guide. +.. _binder: + Binder ++++++ -Researchers can launch Jupyter Notebooks, RStudio, and other computational environments by entering the DOI of a dataset in a Dataverse installation on https://mybinder.org +Researchers can launch Jupyter Notebooks, RStudio, and other computational environments by entering the DOI of a dataset in a Dataverse installation at https://mybinder.org + +A Binder button can also be added to every dataset page to launch Binder from there. Instructions on enabling this feature can be found under :doc:`external-tools`. + +Additionally, institutions can self host `BinderHub `_ (the software that powers mybinder.org), which lists the Dataverse software as one of the supported `repository providers `_. -Institutions can self host BinderHub. The Dataverse Project is one of the supported `repository providers `_. +.. _renku: Renku +++++ @@ -134,15 +185,12 @@ Avgidea Data Search Researchers can use a Google Sheets add-on to search for Dataverse installation's CSV data and then import that data into a sheet. See `Avgidea Data Search `_ for details. +.. _integrations-discovery: + Discoverability --------------- -Integration with `DataCite `_ is built in to the Dataverse Software. When datasets are published, metadata is sent to DataCite. You can further increase the discoverability of your datasets by setting up additional integrations. - -OAI-PMH (Harvesting) -++++++++++++++++++++ - -The Dataverse Software supports a protocol called OAI-PMH that facilitates harvesting datasets from one system into another. For details on harvesting, see the :doc:`harvestserver` section. +A number of builtin features related to data discovery are listed under :doc:`discoverability` but you can further increase the discoverability of your data by setting up integrations. SHARE +++++ diff --git a/doc/sphinx-guides/source/admin/make-data-count.rst b/doc/sphinx-guides/source/admin/make-data-count.rst index 8a96e949ff9..fe32af6649a 100644 --- a/doc/sphinx-guides/source/admin/make-data-count.rst +++ b/doc/sphinx-guides/source/admin/make-data-count.rst @@ -72,7 +72,8 @@ Enable or Disable Display of Make Data Count Metrics By default, when MDC logging is enabled (when ``:MDCLogPath`` is set), your Dataverse installation will display MDC metrics instead of it's internal (legacy) metrics. You can avoid this (e.g. to collect MDC metrics for some period of time before starting to display them) by setting ``:DisplayMDCMetrics`` to false. -The following discussion assumes ``:MDCLogPath`` has been set to ``/usr/local/payara5/glassfish/domains/domain1/logs/mdc`` +The following discussion assumes ``:MDCLogPath`` has been set to ``/usr/local/payara6/glassfish/domains/domain1/logs/mdc`` +You can also decide to display MDC metrics along with Dataverse's traditional download counts from the time before MDC was enabled. To do this, set the :ref:`:MDCStartDate` to when you started MDC logging. Configure Counter Processor ~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -102,7 +103,7 @@ Soon we will be setting up a cron job to run nightly but we start with a single * If you are running Counter Processor for the first time in the middle of a month, you will need create blank log files for the previous days. e.g.: - * ``cd /usr/local/payara5/glassfish/domains/domain1/logs/mdc`` + * ``cd /usr/local/payara6/glassfish/domains/domain1/logs/mdc`` * ``touch counter_2019-02-01.log`` @@ -146,7 +147,9 @@ Configuring Your Dataverse Installation for Make Data Count Citations Please note: as explained in the note above about limitations, this feature is not available to Dataverse installations that use Handles. -To configure your Dataverse installation to pull citations from the test vs. production DataCite server see :ref:`doi.dataciterestapiurlstring` in the Installation Guide. +To configure your Dataverse installation to pull citations from the test vs. +production DataCite server see :ref:`dataverse.pid.datacite.rest-api-url` in +the Installation Guide. Please note that in the curl example, Bash environment variables are used with the idea that you can set a few environment variables and copy and paste the examples as is. For example, "$DOI" could become "doi:10.5072/FK2/BL2IBM" by issuing the following export command from Bash: diff --git a/doc/sphinx-guides/source/admin/metadatacustomization.rst b/doc/sphinx-guides/source/admin/metadatacustomization.rst index ff1b265cef7..4f737bd730b 100644 --- a/doc/sphinx-guides/source/admin/metadatacustomization.rst +++ b/doc/sphinx-guides/source/admin/metadatacustomization.rst @@ -95,6 +95,11 @@ Each of the three main sections own sets of properties: | displayName | Acts as a brief label for display related to this | Should be relatively brief. The limit is 256 character, | | | #metadataBlock. | but very long names might cause display problems. | +----------------+---------------------------------------------------------+---------------------------------------------------------+ +| displayFacet | Label displayed in the search area when this | Should be brief. Long names will cause display problems | +| | #metadataBlock is configured as a search facet | in the search area. | +| | for a collection. See | | +| | :ref:`the API `. | | ++----------------+---------------------------------------------------------+---------------------------------------------------------+ | blockURI | Associates the properties in a block with an external | The citation #metadataBlock has the blockURI | | | URI. | https://dataverse.org/schema/citation/ which assigns a | | | Properties will be assigned the | default global URI to terms such as | @@ -386,12 +391,16 @@ Metadata Block Setup Now that you understand the TSV format used for metadata blocks, the next step is to attempt to make improvements to existing metadata blocks or create entirely new metadata blocks. For either task, you should have a Dataverse Software development environment set up for testing where you can drop the database frequently while you make edits to TSV files. Once you have tested your TSV files, you should consider making a pull request to contribute your improvement back to the community. +.. _exploring-metadata-blocks: + Exploring Metadata Blocks ~~~~~~~~~~~~~~~~~~~~~~~~~ -In addition to studying the TSV files themselves you might find the following highly experimental and subject-to-change API endpoints useful to understand the metadata blocks that have already been loaded into your Dataverse installation: +In addition to studying the TSV files themselves you will probably find the :ref:`metadata-blocks-api` API helpful in getting a structured dump of metadata blocks in JSON format. -You can get a dump of metadata fields (yes, the output is odd, please open a issue) like this: +There are also a few older, highly experimental, and subject-to-change API endpoints under the "admin" API documented below but the public API above is preferred. + +You can get a dump of metadata fields like this: ``curl http://localhost:8080/api/admin/datasetfield`` @@ -404,13 +413,10 @@ Setting Up a Dev Environment for Testing You have several options for setting up a dev environment for testing metadata block changes: -- Vagrant: See the :doc:`/developers/tools` section of the Developer Guide. -- docker-aio: See https://github.com/IQSS/dataverse/tree/develop/conf/docker-aio +- Docker: See :doc:`/container/index`. - AWS deployment: See the :doc:`/developers/deployment` section of the Developer Guide. - Full dev environment: See the :doc:`/developers/dev-environment` section of the Developer Guide. -To get a clean environment in Vagrant, you'll be running ``vagrant destroy``. In Docker, you'll use ``docker rm``. For a full dev environment or AWS installation, you might find ``rebuild`` and related scripts at ``scripts/deploy/phoenix.dataverse.org`` useful. - Editing TSV files ~~~~~~~~~~~~~~~~~ @@ -448,12 +454,16 @@ metadatablock.name=(the value of **name** property from #metadatablock) metadatablock.displayName=(the value of **displayName** property from #metadatablock) +metadatablock.displayFacet=(the value of **displayFacet** property from #metadatablock) + example: metadatablock.name=citation metadatablock.displayName=Citation Metadata +metadatablock.displayFacet=Citation + #datasetField (field) properties ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ datasetfieldtype.(the value of **name** property from #datasetField).title=(the value of **title** property from #datasetField) @@ -490,6 +500,8 @@ Running a curl command like "load" example above should make the new custom meta ``curl -H "X-Dataverse-key:$API_TOKEN" -X POST -H "Content-type:application/json" -d "[\"journal\",\"geospatial\"]" http://localhost:8080/api/dataverses/:root/metadatablocks`` +.. _update-solr-schema: + Updating the Solr Schema ~~~~~~~~~~~~~~~~~~~~~~~~ @@ -501,7 +513,7 @@ the Solr schema configuration, including any enabled metadata schemas: ``curl "http://localhost:8080/api/admin/index/solr/schema"`` -You can use :download:`update-fields.sh <../../../../conf/solr/8.11.1/update-fields.sh>` to easily add these to the +You can use :download:`update-fields.sh <../../../../conf/solr/9.3.0/update-fields.sh>` to easily add these to the Solr schema you installed for your Dataverse installation. The script needs a target XML file containing your Solr schema. (See the :doc:`/installation/prerequisites/` section of @@ -525,7 +537,7 @@ from some place else than your Dataverse installation). Please note that reconfigurations of your Solr index might require a re-index. Usually release notes indicate a necessary re-index, but for your custom metadata you will need to keep track on your own. -Please note also that if you are going to make a pull request updating ``conf/solr/8.11.1/schema.xml`` with fields you have +Please note also that if you are going to make a pull request updating ``conf/solr/9.3.0/schema.xml`` with fields you have added, you should first load all the custom metadata blocks in ``scripts/api/data/metadatablocks`` (including ones you don't care about) to create a complete list of fields. (This might change in the future.) @@ -565,7 +577,7 @@ In general, the external vocabulary support mechanism may be a better choice for The specifics of the user interface for entering/selecting a vocabulary term and how that term is then displayed are managed by third-party Javascripts. The initial Javascripts that have been created provide auto-completion, displaying a list of choices that match what the user has typed so far, but other interfaces, such as displaying a tree of options for a hierarchical vocabulary, are possible. Similarly, existing scripts do relatively simple things for displaying a term - showing the term's name in the appropriate language and providing a link to an external URL with more information, but more sophisticated displays are possible. -Scripts supporting use of vocabularies from services supporting the SKOMOS protocol (see https://skosmos.org) and retrieving ORCIDs (from https:/orcid.org) are available https://github.com/gdcc/dataverse-external-vocab-support. (Custom scripts can also be used and community members are encouraged to share new scripts through the dataverse-external-vocab-support repository.) +Scripts supporting use of vocabularies from services supporting the SKOMOS protocol (see https://skosmos.org) and retrieving ORCIDs (from https://orcid.org) are available https://github.com/gdcc/dataverse-external-vocab-support. (Custom scripts can also be used and community members are encouraged to share new scripts through the dataverse-external-vocab-support repository.) Configuration involves specifying which fields are to be mapped, whether free-text entries are allowed, which vocabulary(ies) should be used, what languages those vocabulary(ies) are available in, and several service protocol and service instance specific parameters. These are all defined in the :ref:`:CVocConf <:CVocConf>` setting as a JSON array. Details about the required elements as well as example JSON arrays are available at https://github.com/gdcc/dataverse-external-vocab-support, along with an example metadata block that can be used for testing. @@ -573,6 +585,58 @@ The scripts required can be hosted locally or retrieved dynamically from https:/ Please note that in addition to the :ref:`:CVocConf` described above, an alternative is the :ref:`:ControlledVocabularyCustomJavaScript` setting. +Protecting MetadataBlocks +------------------------- + +Dataverse can be configured to only allow entries for a metadata block to be changed (created, edited, deleted) by entities that know a defined secret key. +Metadata blocks protected by such a key are referred to as "System" metadata blocks. +A primary use case for system metadata blocks is to handle metadata created by third-party tools interacting with Dataverse where unintended changes to the metadata could cause a failure. Examples might include archiving systems or workflow engines. +To protect an existing metadatablock, one must set a key (recommended to be long and un-guessable) for that block: + +dataverse.metadata.block-system-metadata-keys.= + +This can be done using system properties (see :ref:`jvm-options`), environment variables or other MicroProfile Config mechanisms supported by the app server. + `See Payara docs for supported sources `_. Note that a Payara restart may be required to enable the new option. + +For these secret keys, Payara password aliases are recommended. + + Alias creation example using the codemeta metadata block (actual name: codeMeta20): + + .. code-block:: shell + + echo "AS_ADMIN_ALIASPASSWORD=1234ChangeMeToSomethingLong" > /tmp/key.txt + asadmin create-password-alias --passwordfile /tmp/key.txt dataverse.metadata.block-system-metadata-keys.codeMeta20 + rm /tmp/key.txt + + Alias deletion example for the codemeta metadata block (removes protected status) + + .. code-block:: shell + + asadmin delete-password-alias dataverse.metadata.block-system-metadata-keys.codeMeta20 + +A Payara restart is required after these example commands. + +When protected via a key, a metadata block will not be shown in the user interface when a dataset is being created or when metadata is being edited. Entries in such a system metadata block will be shown to users, consistent with Dataverse's design in which all metadata in published datasets is publicly visible. + +Note that protecting a block with required fields, or using a template with an entry in a protected block, will make it impossible to create a new dataset via the user interface. Also note that for this reason protecting the citation metadatablock is not recommended. (Creating a dataset also automatically sets the date of deposit field in the citation block, which would be prohibited if the citation block is protected.) + +To remove proted status and return a block to working normally, remove the associated key. + +To add metadata to a system metadata block via API, one must include an additional key of the form + +mdkey.= + +as an HTTP Header or query parameter (case sensitive) for each system metadata block to any API call in which metadata values are changed in that block. Multiple keys are allowed if more than one system metadatablock is being changed in a given API call. + +For example, following the :ref:`Add Dataset Metadata ` example from the :doc:`/developers/dataset-semantic-metadata-api`: + +.. code-block:: bash + + curl -X PUT -H X-Dataverse-key:$API_TOKEN -H 'Content-Type: application/ld+json' -H 'mdkey.codeMeta20:1234ChangeMeToSomethingLong' -d '{"codeVersion": "1.0.0", "@context":{"codeVersion": "https://schema.org/softwareVersion"}}' "$SERVER_URL/api/datasets/$DATASET_ID/metadata" + + curl -X PUT -H X-Dataverse-key:$API_TOKEN -H 'Content-Type: application/ld+json' -d '{"codeVersion": "1.0.1", "@context":{"codeVersion": "https://schema.org/softwareVersion"}}' "$SERVER_URL/api/datasets/$DATASET_ID/metadata?mdkey.codeMeta20=1234ChangeMeToSomethingLong&replace=true" + + Tips from the Dataverse Community --------------------------------- diff --git a/doc/sphinx-guides/source/admin/metadataexport.rst b/doc/sphinx-guides/source/admin/metadataexport.rst index 78b8c8ce223..200c3a3e342 100644 --- a/doc/sphinx-guides/source/admin/metadataexport.rst +++ b/doc/sphinx-guides/source/admin/metadataexport.rst @@ -57,3 +57,13 @@ Downloading Metadata via API ---------------------------- The :doc:`/api/native-api` section of the API Guide explains how end users can download the metadata formats above via API. + +Exporter Configuration +---------------------- + +Two exporters - Schema.org JSONLD and OpenAire - use an algorithm to determine whether an author, or contact, name belongs to a person or organization. While the algorithm works well, there are cases in which it makes mistakes, usually inferring that an organization is a person. + +The Dataverse software implements two jvm-options that can be used to tune the algorithm: + +- :ref:`dataverse.personOrOrg.assumeCommaInPersonName` - boolean, default false. If true, Dataverse will assume any name without a comma must be an organization. This may be most useful for curated Dataverse instances that enforce the "family name, given name" convention. +- :ref:`dataverse.personOrOrg.orgPhraseArray` - a JsonArray of strings. Any name that contains one of the strings is assumed to be an organization. For example, "Project" is a word that is not otherwise associated with being an organization. diff --git a/doc/sphinx-guides/source/admin/solr-search-index.rst b/doc/sphinx-guides/source/admin/solr-search-index.rst index 5685672eceb..e6f7b588ede 100644 --- a/doc/sphinx-guides/source/admin/solr-search-index.rst +++ b/doc/sphinx-guides/source/admin/solr-search-index.rst @@ -1,7 +1,7 @@ Solr Search Index ================= -A Dataverse installation requires Solr to be operational at all times. If you stop Solr, you should see a error about this on the root Dataverse installation page, which is powered by the search index Solr provides. You can set up Solr by following the steps in our Installation Guide's :doc:`/installation/prerequisites` and :doc:`/installation/config` sections explaining how to configure it. This section you're reading now is about the care and feeding of the search index. PostgreSQL is the "source of truth" and the Dataverse installation will copy data from PostgreSQL into Solr. For this reason, the search index can be rebuilt at any time. Depending on the amount of data you have, this can be a slow process. You are encouraged to experiment with production data to get a sense of how long a full reindexing will take. +A Dataverse installation requires Solr to be operational at all times. If you stop Solr, you should see an error about this on the root Dataverse installation page, which is powered by the search index Solr provides. You can set up Solr by following the steps in our Installation Guide's :doc:`/installation/prerequisites` and :doc:`/installation/config` sections explaining how to configure it. This section you're reading now is about the care and feeding of the search index. PostgreSQL is the "source of truth" and the Dataverse installation will copy data from PostgreSQL into Solr. For this reason, the search index can be rebuilt at any time. Depending on the amount of data you have, this can be a slow process. You are encouraged to experiment with production data to get a sense of how long a full reindexing will take. .. contents:: Contents: :local: @@ -9,7 +9,7 @@ A Dataverse installation requires Solr to be operational at all times. If you st Full Reindex ------------- -There are two ways to perform a full reindex of the Dataverse installation search index. Starting with a "clear" ensures a completely clean index but involves downtime. Reindexing in place doesn't involve downtime but does not ensure a completely clean index. +There are two ways to perform a full reindex of the Dataverse installation search index. Starting with a "clear" ensures a completely clean index but involves downtime. Reindexing in place doesn't involve downtime but does not ensure a completely clean index (e.g. stale entries from destroyed datasets can remain in the index). Clear and Reindex +++++++++++++++++ @@ -22,7 +22,7 @@ Get a list of all database objects that are missing in Solr, and Solr documents ``curl http://localhost:8080/api/admin/index/status`` -Remove all Solr documents that are orphaned (ie not associated with objects in the database): +Remove all Solr documents that are orphaned (i.e. not associated with objects in the database): ``curl http://localhost:8080/api/admin/index/clear-orphans`` @@ -36,7 +36,7 @@ Please note that the moment you issue this command, it will appear to end users Start Async Reindex ~~~~~~~~~~~~~~~~~~~ -Please note that this operation may take hours depending on the amount of data in your system. This known issue is being tracked at https://github.com/IQSS/dataverse/issues/50 +Please note that this operation may take hours depending on the amount of data in your system and whether or not you installation is using full-text indexing. More information on this, as well as some reference times, can be found at https://github.com/IQSS/dataverse/issues/50. ``curl http://localhost:8080/api/admin/index`` @@ -60,7 +60,7 @@ If indexing stops, this command should pick up where it left off based on which Manual Reindexing ----------------- -If you have made manual changes to a dataset in the database or wish to reindex a dataset that solr didn't want to index properly, it is possible to manually reindex Dataverse collections and datasets. +If you have made manual changes to a dataset in the database or wish to reindex a dataset that Solr didn't want to index properly, it is possible to manually reindex Dataverse collections and datasets. Reindexing Dataverse Collections ++++++++++++++++++++++++++++++++ @@ -69,7 +69,7 @@ Dataverse collections must be referenced by database object ID. If you have dire ``select id from dataverse where alias='dataversealias';`` -should work, or you may click the Dataverse Software's "Edit" menu and look for dataverseId= in the URLs produced by the drop-down. Then, to re-index: +should work, or you may click the Dataverse Software's "Edit" menu and look for *dataverseId=* in the URLs produced by the drop-down. Then, to re-index: ``curl http://localhost:8080/api/admin/index/dataverses/135`` @@ -89,7 +89,7 @@ To re-index a dataset by its database ID: Manually Querying Solr ---------------------- -If you suspect something isn't indexed properly in solr, you may bypass the Dataverse installation's web interface and query the command line directly to verify what solr returns: +If you suspect something isn't indexed properly in Solr, you may bypass the Dataverse installation's web interface and query the command line directly to verify what Solr returns: ``curl "http://localhost:8983/solr/collection1/select?q=dsPersistentId:doi:10.15139/S3/HFV0AO"`` diff --git a/doc/sphinx-guides/source/admin/troubleshooting.rst b/doc/sphinx-guides/source/admin/troubleshooting.rst index 9f085ba90cd..acbdcaae17e 100644 --- a/doc/sphinx-guides/source/admin/troubleshooting.rst +++ b/doc/sphinx-guides/source/admin/troubleshooting.rst @@ -53,15 +53,13 @@ Long-Running Ingest Jobs Have Exhausted System Resources Ingest is both CPU- and memory-intensive, and depending on your system resources and the size and format of tabular data files uploaded, may render your Dataverse installation unresponsive or nearly inoperable. It is possible to cancel these jobs by purging the ingest queue. -``/usr/local/payara5/mq/bin/imqcmd -u admin query dst -t q -n DataverseIngest`` will query the DataverseIngest destination. The password, unless you have changed it, matches the username. +``/usr/local/payara6/mq/bin/imqcmd -u admin query dst -t q -n DataverseIngest`` will query the DataverseIngest destination. The password, unless you have changed it, matches the username. -``/usr/local/payara5/mq/bin/imqcmd -u admin purge dst -t q -n DataverseIngest`` will purge the DataverseIngest queue, and prompt for your confirmation. +``/usr/local/payara6/mq/bin/imqcmd -u admin purge dst -t q -n DataverseIngest`` will purge the DataverseIngest queue, and prompt for your confirmation. Finally, list destinations to verify that the purge was successful: -``/usr/local/payara5/mq/bin/imqcmd -u admin list dst`` - -If you are still running Glassfish, substitute glassfish4 for payara5 above. If you have installed your Dataverse installation in some other location, adjust the above paths accordingly. +``/usr/local/payara6/mq/bin/imqcmd -u admin list dst`` .. _troubleshooting-payara: @@ -73,7 +71,7 @@ Payara Finding the Payara Log File ~~~~~~~~~~~~~~~~~~~~~~~~~~~ -``/usr/local/payara5/glassfish/domains/domain1/logs/server.log`` is the main place to look when you encounter problems (assuming you installed Payara in the default directory). Hopefully an error message has been logged. If there's a stack trace, it may be of interest to developers, especially they can trace line numbers back to a tagged version or commit. Send more of the stack trace (the entire file if possible) to developers who can help (see "Getting Help", below) and be sure to say which version of the Dataverse Software you have installed. +``/usr/local/payara6/glassfish/domains/domain1/logs/server.log`` is the main place to look when you encounter problems (assuming you installed Payara in the default directory). Hopefully an error message has been logged. If there's a stack trace, it may be of interest to developers, especially they can trace line numbers back to a tagged version or commit. Send more of the stack trace (the entire file if possible) to developers who can help (see "Getting Help", below) and be sure to say which version of the Dataverse Software you have installed. .. _increase-payara-logging: diff --git a/doc/sphinx-guides/source/admin/user-administration.rst b/doc/sphinx-guides/source/admin/user-administration.rst index 608a8ab2b72..a21263f6f17 100644 --- a/doc/sphinx-guides/source/admin/user-administration.rst +++ b/doc/sphinx-guides/source/admin/user-administration.rst @@ -57,9 +57,9 @@ See :ref:`deactivate-a-user` Confirm Email ------------- -A Dataverse installation encourages builtin/local users to verify their email address upon signup or email change so that sysadmins can be assured that users can be contacted. +A Dataverse installation encourages builtin/local users to verify their email address upon sign up or email change so that sysadmins can be assured that users can be contacted. -The app will send a standard welcome email with a URL the user can click, which, when activated, will store a ``lastconfirmed`` timestamp in the ``authenticateduser`` table of the database. Any time this is "null" for a user (immediately after signup and/or changing of their Dataverse installation email address), their current email on file is considered to not be verified. The link that is sent expires after a time (the default is 24 hours), but this is configurable by a superuser via the ``:MinutesUntilConfirmEmailTokenExpires`` config option. +The app will send a standard welcome email with a URL the user can click, which, when activated, will store a ``lastconfirmed`` timestamp in the ``authenticateduser`` table of the database. Any time this is "null" for a user (immediately after sign up and/or changing of their Dataverse installation email address), their current email on file is considered to not be verified. The link that is sent expires after a time (the default is 24 hours), but this is configurable by a superuser via the ``:MinutesUntilConfirmEmailTokenExpires`` config option. Should users' URL token expire, they will see a "Verify Email" button on the account information page to send another URL. diff --git a/doc/sphinx-guides/source/api/apps.rst b/doc/sphinx-guides/source/api/apps.rst index 5573056051c..a498c62d3d4 100755 --- a/doc/sphinx-guides/source/api/apps.rst +++ b/doc/sphinx-guides/source/api/apps.rst @@ -113,6 +113,16 @@ Dataverse Software on Android makes use of a Dataverse installation's Search API https://github.com/IQSS/dataverse-android +Go +-- + +Integrations Dashboard +~~~~~~~~~~~~~~~~~~~~~~ + +The integrations dashboard is software by the Dataverse community to enable easy data transfer from an existing data management platform to a dataset in a Dataverse collection. See :ref:`integrations-dashboard` for details. + +https://github.com/libis/rdm-integration + PHP --- diff --git a/doc/sphinx-guides/source/api/auth.rst b/doc/sphinx-guides/source/api/auth.rst index a10de14de5a..bbc81b595e3 100644 --- a/doc/sphinx-guides/source/api/auth.rst +++ b/doc/sphinx-guides/source/api/auth.rst @@ -63,3 +63,20 @@ Resetting Your API Token ------------------------ You can reset your API Token from your account page in your Dataverse installation as described in the :doc:`/user/account` section of the User Guide. + +.. _bearer-tokens: + +Bearer Tokens +------------- + +Bearer tokens are defined in `RFC 6750`_ and can be used as an alternative to API tokens if your installation has been set up to use them (see :ref:`bearer-token-auth` in the Installation Guide). + +.. _RFC 6750: https://tools.ietf.org/html/rfc6750 + +To test if bearer tokens are working, you can try something like the following (using the :ref:`User Information` API endpoint), substituting in parameters for your installation and user. + +.. code-block:: bash + + export TOKEN=`curl -s -X POST --location "http://keycloak.mydomain.com:8090/realms/oidc-realm/protocol/openid-connect/token" -H "Content-Type: application/x-www-form-urlencoded" -d "username=kcuser&password=kcpassword&grant_type=password&client_id=oidc-client&client_secret=ss6gE8mODCDfqesQaSG3gwUwZqZt547E" | jq '.access_token' -r | tr -d "\n"` + + curl -H "Authorization: Bearer $TOKEN" http://localhost:8080/api/users/:me diff --git a/doc/sphinx-guides/source/api/client-libraries.rst b/doc/sphinx-guides/source/api/client-libraries.rst index 634f03a8125..62069f62c23 100755 --- a/doc/sphinx-guides/source/api/client-libraries.rst +++ b/doc/sphinx-guides/source/api/client-libraries.rst @@ -1,54 +1,75 @@ Client Libraries ================ -Currently there are client libraries for Python, Javascript, R, Java, and Julia that can be used to develop against Dataverse Software APIs. We use the term "client library" on this page but "Dataverse Software SDK" (software development kit) is another way of describing these resources. They are designed to help developers express Dataverse Software concepts more easily in the languages listed below. For support on any of these client libraries, please consult each project's README. +Listed below are a variety of clienty libraries to help you interact with Dataverse APIs from Python, R, Javascript, etc. -Because a Dataverse installation is a SWORD server, additional client libraries exist for Java, Ruby, and PHP per the :doc:`/api/sword` page. +To get support for any of these client libraries, please consult each project's README. .. contents:: |toctitle| :local: -Python ------- +C/C++ +----- -There are two Python modules for interacting with Dataverse Software APIs. +https://github.com/aeonSolutions/OpenScience-Dataverse-API-C-library is the official C/C++ library for Dataverse APIs. -`pyDataverse `_ primarily allows developers to manage Dataverse collections, datasets and datafiles. Its intention is to help with data migrations and DevOps activities such as testing and configuration management. The module is developed by `Stefan Kasberger `_ from `AUSSDA - The Austrian Social Science Data Archive `_. +This C/C++ library was created and is currently maintained by `Miguel T. `_ To learn how to install and use it, see the project's `wiki page `_. + +Go +-- +https://github.com/libis/rdm-dataverse-go-api is Go API library that can be used in your project by simply adding ``github.com/libis/rdm-dataverse-go-api`` as a dependency in your ``go.mod`` file. See the GitHub page for more details and usage examples. -`dataverse-client-python `_ had its initial release in 2015. `Robert Liebowitz `_ created this library while at the `Center for Open Science (COS) `_ and the COS uses it to integrate the `Open Science Framework (OSF) `_ with a Dataverse installation via an add-on which itself is open source and listed on the :doc:`/api/apps` page. +Java +---- + +https://github.com/IQSS/dataverse-client-java is the official Java library for Dataverse APIs. + +`Richard Adams `_ from `ResearchSpace `_ created and maintains this library. Javascript ---------- -https://github.com/IQSS/dataverse-client-javascript is the official Javascript package for Dataverse Software APIs. It can be found on npm at https://www.npmjs.com/package/js-dataverse +https://github.com/IQSS/dataverse-client-javascript is the official Javascript package for Dataverse APIs. It can be found on npm at https://www.npmjs.com/package/js-dataverse It was created and is maintained by `The Agile Monkeys `_. +Julia +----- + +https://github.com/gaelforget/Dataverse.jl is the official Julia package for Dataverse APIs. It can be found on JuliaHub (https://juliahub.com/ui/Packages/Dataverse/xWAqY/) and leverages pyDataverse to provide an interface to Dataverse's data access API and native API. Dataverse.jl provides a few additional functionalities with documentation (https://gaelforget.github.io/Dataverse.jl/dev/) and a demo notebook (https://gaelforget.github.io/Dataverse.jl/dev/notebook.html). + +It was created and is maintained by `Gael Forget `_. + +PHP +--- + +There is no official PHP library for Dataverse APIs (please :ref:`get in touch ` if you'd like to create one!) but there is a SWORD library written in PHP listed under :ref:`client-libraries` in the :doc:`/api/sword` documentation. + +Python +------ + +There are multiple Python modules for interacting with Dataverse APIs. + +`EasyDataverse `_ is a Python library designed to simplify the management of Dataverse datasets in an object-oriented way, giving users the ability to upload, download, and update datasets with ease. By utilizing metadata block configurations, EasyDataverse automatically generates Python objects that contain all the necessary details required to create the native Dataverse JSON format used to create or edit datasets. Adding files and directories is also possible with EasyDataverse and requires no additional API calls. This library is particularly well-suited for client applications such as workflows and scripts as it minimizes technical complexities and facilitates swift development. + +`pyDataverse `_ primarily allows developers to manage Dataverse collections, datasets and datafiles. Its intention is to help with data migrations and DevOps activities such as testing and configuration management. The module is developed by `Stefan Kasberger `_ from `AUSSDA - The Austrian Social Science Data Archive `_. + +`dataverse-client-python `_ had its initial release in 2015. `Robert Liebowitz `_ created this library while at the `Center for Open Science (COS) `_ and the COS uses it to integrate the `Open Science Framework (OSF) `_ with Dataverse installations via an add-on which itself is open source and listed on the :doc:`/api/apps` page. + +`Pooch `_ is a Python library that allows library and application developers to download data. Among other features, it takes care of various protocols, caching in OS-specific locations, checksum verification and adds optional features like progress bars or log messages. Among other popular repositories, Pooch supports Dataverse in the sense that you can reference Dataverse-hosted datasets by just a DOI and Pooch will determine the data repository type, query the Dataverse API for contained files and checksums, giving you an easy interface to download them. + R - -https://github.com/IQSS/dataverse-client-r is the official R package for Dataverse Software APIs. The latest release can be installed from `CRAN `_. +https://github.com/IQSS/dataverse-client-r is the official R package for Dataverse APIs. The latest release can be installed from `CRAN `_. The R client can search and download datasets. It is useful when automatically (instead of manually) downloading data files as part of a script. For bulk edit and upload operations, we currently recommend pyDataverse. The package is currently maintained by `Shiro Kuriwaki `_. It was originally created by `Thomas Leeper `_ and then formerly maintained by `Will Beasley `_. -Java ----- - -https://github.com/IQSS/dataverse-client-java is the official Java library for Dataverse Software APIs. - -`Richard Adams `_ from `ResearchSpace `_ created and maintains this library. Ruby ---- -https://github.com/libis/dataverse_api is a Ruby gem for Dataverse Software APIs. It is registered as a library on Rubygems (https://rubygems.org/search?query=dataverse). +https://github.com/libis/dataverse_api is a Ruby gem for Dataverse APIs. It is registered as a library on Rubygems (https://rubygems.org/search?query=dataverse). The gem is created and maintained by the LIBIS team (https://www.libis.be) at the University of Leuven (https://www.kuleuven.be). - -Julia ------ - -https://github.com/gaelforget/Dataverse.jl is the official Julia package for Dataverse Software APIs. It can be found on JuliaHub (https://juliahub.com/ui/Packages/Dataverse/xWAqY/) and leverages pyDataverse to provide an interface to Dataverse's data access API and native API. Dataverse.jl provides a few additional functionalities with documentation (https://gaelforget.github.io/Dataverse.jl/dev/) and a demo notebook (https://gaelforget.github.io/Dataverse.jl/dev/notebook.html). - -It was created and is maintained by `Gael Forget `_. diff --git a/doc/sphinx-guides/source/api/curation-labels.rst b/doc/sphinx-guides/source/api/curation-labels.rst index 36950a37eb3..0675eeec398 100644 --- a/doc/sphinx-guides/source/api/curation-labels.rst +++ b/doc/sphinx-guides/source/api/curation-labels.rst @@ -93,3 +93,22 @@ To get the list of allowed curation labels allowed for a given Dataset curl -H X-Dataverse-key:$API_TOKEN "$SERVER_URL/api/datasets/:persistentId/allowedCurationLabels?persistentId=$DATASET_PID" You should expect a 200 ("OK") response with a comma-separated list of allowed labels contained in a JSON 'data' object. + + +Get a Report on the Curation Status of All Datasets +--------------------------------------------------- + +To get a CSV file listing the curation label assigned to each Dataset with a draft version, along with the creation and last modification dates, and list of those with permissions to publish the version. + +This API call is restricted to superusers. + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + + Example: Get the report + + curl -H X-Dataverse-key:$API_TOKEN "$SERVER_URL/api/datasets/listCurationStates" + +You should expect a 200 ("OK") response with a CSV formatted response. diff --git a/doc/sphinx-guides/source/api/external-tools.rst b/doc/sphinx-guides/source/api/external-tools.rst index d72a6f62004..05affaf975e 100644 --- a/doc/sphinx-guides/source/api/external-tools.rst +++ b/doc/sphinx-guides/source/api/external-tools.rst @@ -39,7 +39,7 @@ How External Tools Are Presented to Users An external tool can appear in your Dataverse installation in a variety of ways: -- as an explore, preview, or configure option for a file +- as an explore, preview, query or configure option for a file - as an explore option for a dataset - as an embedded preview on the file landing page @@ -53,15 +53,21 @@ External tools must be expressed in an external tool manifest file, a specific J Examples of Manifests +++++++++++++++++++++ -Let's look at two examples of external tool manifests (one at the file level and one at the dataset level) before we dive into how they work. +Let's look at a few examples of external tool manifests (both at the file level and at the dataset level) before we dive into how they work. + +.. _tools-for-files: External Tools for Files ^^^^^^^^^^^^^^^^^^^^^^^^ -:download:`fabulousFileTool.json <../_static/installation/files/root/external-tools/fabulousFileTool.json>` is a file level both an "explore" tool and a "preview" tool that operates on tabular files: +:download:`fabulousFileTool.json <../_static/installation/files/root/external-tools/fabulousFileTool.json>` is a file level (both an "explore" tool and a "preview" tool) that operates on tabular files: .. literalinclude:: ../_static/installation/files/root/external-tools/fabulousFileTool.json +:download:`auxFileTool.json <../_static/installation/files/root/external-tools/auxFileTool.json>` is a file level preview tool that operates on auxiliary files associated with a data file (note the "requirements" section): + +.. literalinclude:: ../_static/installation/files/root/external-tools/auxFileTool.json + External Tools for Datasets ^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -86,13 +92,15 @@ Terminology scope Whether the external tool appears and operates at the **file** level or the **dataset** level. Note that a file level tool much also specify the type of file it operates on (see "contentType" below). - types Whether the external tool is an **explore** tool, a **preview** tool, a **configure** tool or any combination of these (multiple types are supported for a single tool). Configure tools require an API token because they make changes to data files (files within datasets). Configure tools are currently not supported at the dataset level. The older "type" keyword that allows you to pass a single type as a string is deprecated but still supported. + types Whether the external tool is an **explore** tool, a **preview** tool, a **query** tool, a **configure** tool or any combination of these (multiple types are supported for a single tool). Configure tools require an API token because they make changes to data files (files within datasets). Configure tools are currently not supported at the dataset level. The older "type" keyword that allows you to pass a single type as a string is deprecated but still supported. toolUrl The **base URL** of the tool before query parameters are added. contentType File level tools operate on a specific **file type** (content type or MIME type such as "application/pdf") and this must be specified. Dataset level tools do not use contentType. - toolParameters **Query parameters** are supported and described below. + toolParameters **httpMethod**, **queryParameters**, and **allowedApiCalls** are supported and described below. + + httpMethod Either ``GET`` or ``POST``. queryParameters **Key/value combinations** that can be appended to the toolUrl. For example, once substitution takes place (described below) the user may be redirected to ``https://fabulousfiletool.com?fileId=42&siteUrl=http://demo.dataverse.org``. @@ -102,6 +110,20 @@ Terminology reserved words A **set of strings surrounded by curly braces** such as ``{fileId}`` or ``{datasetId}`` that will be inserted into query parameters. See the table below for a complete list. + allowedApiCalls An array of objects defining callbacks the tool is allowed to make to the Dataverse API. If the dataset or file being accessed is not public, the callback URLs will be signed to allow the tool access for a defined time. + + allowedApiCalls name A name the tool will use to identify this callback URL such as ``retrieveDataFile``. + + allowedApiCalls urlTemplate The relative URL for the callback using reserved words to indicate where values should by dynamically substituted such as ``/api/v1/datasets/{datasetId}``. + + allowedApiCalls httpMethod Which HTTP method the specified callback uses such as ``GET`` or ``POST``. + + allowedApiCalls timeOut For non-public datasets and datafiles, how many minutes the signed URLs given to the tool should be valid for. Must be an integer. + + requirements **Resources your tool needs to function.** For now, the only requirement you can specify is that one or more auxiliary files exist (see auxFilesExist in the :ref:`tools-for-files` example). Currently, requirements only apply to preview tools. If the requirements are not met, the preview tool is not shown. + + auxFilesExist **An array containing formatTag and formatVersion pairs** for each auxiliary file that your tool needs to download to function properly. For example, a required aux file could have a ``formatTag`` of "NcML" and a ``formatVersion`` of "1.0". See also :doc:`/developers/aux-file-support`. + toolName A **name** of an external tool that is used to differentiate between external tools and also used in bundle.properties for localization in the Dataverse installation web interface. For example, the toolName for Data Explorer is ``explorer``. For the Data Curation Tool the toolName is ``dct``. This is an optional parameter in the manifest JSON file. =========================== ========== @@ -131,6 +153,25 @@ Reserved Words ``{localeCode}`` optional The code for the language ("en" for English, "fr" for French, etc.) that user has selected from the language toggle in a Dataverse installation. See also :ref:`i18n`. =========================== ========== =========== +.. _api-exttools-auth: + +Authorization Options ++++++++++++++++++++++ + +When called for datasets or data files that are not public (i.e. in a draft dataset or for a restricted file), external tools are allowed access via the user's credentials. This is accomplished by one of two mechanisms: + +* Signed URLs (more secure, recommended) + + - Configured via the ``allowedApiCalls`` section of the manifest. The tool will be provided with signed URLs allowing the specified access to the given dataset or datafile for the specified amount of time. The tool will not be able to access any other datasets or files the user may have access to and will not be able to make calls other than those specified. + - For tools invoked via a GET call, Dataverse will include a callback query parameter with a Base64 encoded value. The decoded value is a signed URL that can be called to retrieve a JSON response containing all of the queryParameters and allowedApiCalls specified in the manfiest. + - For tools invoked via POST, Dataverse will send a JSON body including the requested queryParameters and allowedApiCalls. Dataverse expects the response to the POST to indicate a redirect which Dataverse will use to open the tool. + +* API Token (deprecated, less secure, not recommended) + + - Configured via the ``queryParameters`` by including an ``{apiToken}`` value. When this is present Dataverse will send the user's apiToken to the tool. With the user's API token, the tool can perform any action via the Dataverse API that the user could. External tools configured via this method should be assessed for their trustworthiness. + - For tools invoked via GET, this will be done via a query parameter in the request URL which could be cached in the browser's history. Dataverse expects the response to the POST to indicate a redirect which Dataverse will use to open the tool. + - For tools invoked via POST, Dataverse will send a JSON body including the apiToken. + Internationalization of Your External Tool ++++++++++++++++++++++++++++++++++++++++++ diff --git a/doc/sphinx-guides/source/api/getting-started.rst b/doc/sphinx-guides/source/api/getting-started.rst index c465b726421..a6f6c259a25 100644 --- a/doc/sphinx-guides/source/api/getting-started.rst +++ b/doc/sphinx-guides/source/api/getting-started.rst @@ -11,7 +11,7 @@ Servers You Can Test With Rather than using a production Dataverse installation, API users are welcome to use http://demo.dataverse.org for testing. You can email support@dataverse.org if you have any trouble with this server. -If you would rather have full control over your own test server, deployments to AWS, Docker, Vagrant, and more are covered in the :doc:`/developers/index` and the :doc:`/installation/index`. +If you would rather have full control over your own test server, deployments to AWS, Docker, and more are covered in the :doc:`/developers/index` and the :doc:`/installation/index`. Getting an API Token -------------------- @@ -52,6 +52,20 @@ If you ever want to check an environment variable, you can "echo" it like this: echo $SERVER_URL +With curl version 7.56.0 and higher, it is recommended to use --form-string with outer quote rather than -F flag without outer quote. + +For example, curl command parameter below might cause error such as ``warning: garbage at end of field specification: ,"categories":["Data"]}``. + +.. code-block:: bash + + -F jsonData={\"description\":\"My description.\",\"categories\":[\"Data\"]} + +Instead, use --form-string with outer quote. See https://github.com/curl/curl/issues/2022 + +.. code-block:: bash + + --form-string 'jsonData={"description":"My description.","categories":["Data"]}' + If you don't like curl, don't have curl, or want to use a different programming language, you are encouraged to check out the Python, Javascript, R, and Java options in the :doc:`client-libraries` section. .. _curl: https://curl.haxx.se diff --git a/doc/sphinx-guides/source/api/metrics.rst b/doc/sphinx-guides/source/api/metrics.rst index 6a878d73a98..28ac33ea228 100755 --- a/doc/sphinx-guides/source/api/metrics.rst +++ b/doc/sphinx-guides/source/api/metrics.rst @@ -72,7 +72,7 @@ Return Formats There are a number of API calls that provide time series, information reported per item (e.g. per dataset, per file, by subject, by category, and by file Mimetype), or both (time series per item). Because these calls all report more than a single number, the API provides two optional formats for the return that can be selected by specifying an HTTP Accept Header for the desired format: -* application/json - a JSON array of objects. For time-series, the objects include key/values for the ``date`` and ``count`` for that month. For per-item calls, the objects include the item (e.g. for a subject), or it's id/pid (for a dataset or datafile). For timeseries per-item, the objects also include a date. In all cases, the response is a single array. +* application/json - a JSON array of objects. For time-series, the objects include key/values for the ``date`` and ``count`` for that month. For per-item calls, the objects include the item (e.g. for a subject), or it's id/pid (for a dataset or datafile (which may/may not not have a PID)). For timeseries per-item, the objects also include a date. In all cases, the response is a single array. * Example: ``curl -H 'Accept:application/json' https://demo.dataverse.org/api/info/metrics/downloads/monthly`` @@ -120,7 +120,7 @@ Example: ``curl https://demo.dataverse.org/api/info/metrics/makeDataCount/viewsT Endpoint Table -------------- -The following table lists the available metrics endpoints (not including the Make Data Counts endpoints a single dataset which are part of the :doc:`/api/native-api`) along with additional notes about them. +The following table lists the available metrics endpoints (not including the Make Data Counts endpoints for a single dataset which are part of the :doc:`/api/native-api`) along with additional notes about them. .. csv-table:: Metrics Endpoints @@ -158,8 +158,8 @@ The following table lists the available metrics endpoints (not including the Mak /api/info/metrics/uniquedownloads,"pid, count",json,collection subtree,published,y,total count of unique users who have downloaded from the datasets in scope,The use case for this metric (uniquedownloads) is to more fairly assess which datasets are getting downloaded/used by only counting each users who downloads any file from a dataset as one count (versus downloads of multiple files or repeat downloads counting as multiple counts which adds a bias for large datasets and/or use patterns where a file is accessed repeatedly for new analyses) /api/info/metrics/uniquedownloads/monthly,"date, pid, count","json, csv",collection subtree,published,y,monthly cumulative timeseries of unique user counts for datasets in the dataverse scope, /api/info/metrics/uniquedownloads/toMonth/{yyyy-MM},"pid, count",json,collection subtree,published,y,cumulative count of unique users who have downloaded from the datasets in scope through specified month, - /api/info/metrics/filedownloads/monthly,"date, count, id, pid","json, csv",collection subtree,published,y,"monthly cumulative timeseries by file id, pid from first date of first entry to now","unique downloads (as defined above) per month by file (id, pid) sorted in decreasing order of counts" /api/info/metrics/uniquefiledownloads,"count by id, pid","json, csv",collection subtree,published,y,as of now/totals,unique download counts per file id. PIDs are also included in output if they exist + /api/info/metrics/uniquefiledownloads/monthly,"date, count, id, pid","json, csv",collection subtree,published,y,"monthly cumulative timeseries by file id, pid from first date of first entry to now","unique downloads per month by file (id, pid) sorted in decreasing order of counts" /api/info/metrics/uniquefiledownloads/toMonth/{yyyy-MM},"count by id, pid","json, csv",collection subtree,published,y,cumulative up to month specified,unique download counts per file id to the specified month. PIDs are also included in output if they exist /api/info/metrics/tree,"id, ownerId, alias, depth, name, children",json,collection subtree,published,y,"tree of dataverses starting at the root or a specified parentAlias with their id, owner id, alias, name, a computed depth, and array of children dataverses","underlying code can also include draft dataverses, this is not currently accessible via api, depth starts at 0" /api/info/metrics/tree/toMonth/{yyyy-MM},"id, ownerId, alias, depth, name, children",json,collection subtree,published,y,"tree of dataverses in existence as of specified date starting at the root or a specified parentAlias with their id, owner id, alias, name, a computed depth, and array of children dataverses","underlying code can also include draft dataverses, this is not currently accessible via api, depth starts at 0" diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 93e1c36f179..4d9466703e4 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -56,13 +56,13 @@ Next you need to figure out the alias or database id of the "parent" Dataverse c export SERVER_URL=https://demo.dataverse.org export PARENT=root - curl -H X-Dataverse-key:$API_TOKEN -X POST $SERVER_URL/api/dataverses/$PARENT --upload-file dataverse-complete.json + curl -H "X-Dataverse-key:$API_TOKEN" -X POST "$SERVER_URL/api/dataverses/$PARENT" --upload-file dataverse-complete.json The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx -X POST https://demo.dataverse.org/api/dataverses/root --upload-file dataverse-complete.json + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST "https://demo.dataverse.org/api/dataverses/root" --upload-file dataverse-complete.json You should expect an HTTP 200 response and JSON beginning with "status":"OK" followed by a representation of the newly-created Dataverse collection. @@ -80,13 +80,13 @@ To view a published Dataverse collection: export SERVER_URL=https://demo.dataverse.org export ID=root - curl $SERVER_URL/api/dataverses/$ID + curl "$SERVER_URL/api/dataverses/$ID" The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl https://demo.dataverse.org/api/dataverses/root + curl "https://demo.dataverse.org/api/dataverses/root" To view an unpublished Dataverse collection: @@ -96,13 +96,13 @@ To view an unpublished Dataverse collection: export SERVER_URL=https://demo.dataverse.org export ID=root - curl -H X-Dataverse-key:$API_TOKEN $SERVER_URL/api/dataverses/$ID + curl -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/dataverses/$ID" The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx https://demo.dataverse.org/api/dataverses/root + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/dataverses/root" Delete a Dataverse Collection ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -117,13 +117,13 @@ Deletes the Dataverse collection whose database ID or alias is given: export SERVER_URL=https://demo.dataverse.org export ID=root - curl -H X-Dataverse-key:$API_TOKEN -X DELETE $SERVER_URL/api/dataverses/$ID + curl -H "X-Dataverse-key:$API_TOKEN" -X DELETE "$SERVER_URL/api/dataverses/$ID" The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx -X DELETE https://demo.dataverse.org/api/dataverses/root + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X DELETE "https://demo.dataverse.org/api/dataverses/root" .. _show-contents-of-a-dataverse-api: @@ -140,13 +140,13 @@ Show Contents of a Dataverse Collection export SERVER_URL=https://demo.dataverse.org export ID=root - curl -H X-Dataverse-key:$API_TOKEN $SERVER_URL/api/dataverses/$ID/contents + curl -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/dataverses/$ID/contents" The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx https://demo.dataverse.org/api/dataverses/root/contents + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/dataverses/root/contents" Report the data (file) size of a Dataverse Collection ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -159,13 +159,13 @@ Shows the combined size in bytes of all the files uploaded into the Dataverse co export SERVER_URL=https://demo.dataverse.org export ID=root - curl -H X-Dataverse-key:$API_TOKEN $SERVER_URL/api/dataverses/$ID/storagesize + curl -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/dataverses/$ID/storagesize" The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx https://demo.dataverse.org/api/dataverses/root/storagesize + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/dataverses/root/storagesize" The size of published and unpublished files will be summed both in the Dataverse collection specified and beneath all its sub-collections, recursively. By default, only the archival files are counted - i.e., the files uploaded by users (plus the tab-delimited versions generated for tabular data files on ingest). If the optional argument ``includeCached=true`` is specified, the API will also add the sizes of all the extra files generated and cached by the Dataverse installation - the resized thumbnail versions for image files, the metadata exports for published datasets, etc. @@ -181,13 +181,13 @@ All the roles defined directly in the Dataverse collection identified by ``id``: export SERVER_URL=https://demo.dataverse.org export ID=root - curl -H X-Dataverse-key:$API_TOKEN $SERVER_URL/api/dataverses/$ID/roles + curl -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/dataverses/$ID/roles" The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx https://demo.dataverse.org/api/dataverses/root/roles + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/dataverses/root/roles" List Facets Configured for a Dataverse Collection ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -200,13 +200,13 @@ List Facets Configured for a Dataverse Collection export SERVER_URL=https://demo.dataverse.org export ID=root - curl -H X-Dataverse-key:$API_TOKEN $SERVER_URL/api/dataverses/$ID/facets + curl -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/dataverses/$ID/facets" The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx https://demo.dataverse.org/api/dataverses/root/facets + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/dataverses/root/facets" Set Facets for a Dataverse Collection ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -219,16 +219,18 @@ Assign search facets for a given Dataverse collection identified by ``id``: export SERVER_URL=https://demo.dataverse.org export ID=root - curl -H X-Dataverse-key:$API_TOKEN" -X POST $SERVER_URL/api/dataverses/$ID/facets --upload-file dataverse-facets.json + curl -H "X-Dataverse-key:$API_TOKEN" -X POST "$SERVER_URL/api/dataverses/$ID/facets" --upload-file dataverse-facets.json The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx -X POST https://demo.dataverse.org/api/dataverses/root/facets --upload-file dataverse-facets.json + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST "https://demo.dataverse.org/api/dataverses/root/facets" --upload-file dataverse-facets.json Where :download:`dataverse-facets.json <../_static/api/dataverse-facets.json>` contains a JSON encoded list of metadata keys (e.g. ``["authorName","authorAffiliation"]``). +.. _metadata-block-facet-api: + List Metadata Block Facets Configured for a Dataverse Collection ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -240,13 +242,13 @@ List Metadata Block Facets Configured for a Dataverse Collection export SERVER_URL=https://demo.dataverse.org export ID=root - curl -H X-Dataverse-key:$API_TOKEN $SERVER_URL/api/dataverses/$ID/metadatablockfacets + curl -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/dataverses/$ID/metadatablockfacets" The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx https://demo.dataverse.org/api/dataverses/root/metadatablockfacets + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/dataverses/root/metadatablockfacets" Set Metadata Block Facets for a Dataverse Collection ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -263,13 +265,13 @@ To clear the metadata blocks set by a parent collection, submit an empty array ( export SERVER_URL=https://demo.dataverse.org export ID=root - curl -H X-Dataverse-key:$API_TOKEN" -X POST -H "Content-type:application/json" $SERVER_URL/api/dataverses/$ID/metadatablockfacets --upload-file metadata-block-facets.json + curl -H "X-Dataverse-key:$API_TOKEN" -X POST -H "Content-type:application/json" "$SERVER_URL/api/dataverses/$ID/metadatablockfacets" --upload-file metadata-block-facets.json The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx -X POST -H "Content-type:application/json" https://demo.dataverse.org/api/dataverses/root/metadatablockfacets --upload-file metadata-block-facets.json + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST -H "Content-type:application/json" "https://demo.dataverse.org/api/dataverses/root/metadatablockfacets" --upload-file metadata-block-facets.json Where :download:`metadata-block-facets.json <../_static/api/metadata-block-facets.json>` contains a JSON encoded list of metadata block names (e.g. ``["socialscience","geospatial"]``). This endpoint supports an empty list (e.g. ``[]``) @@ -288,13 +290,15 @@ When updating the root to false, it will clear any metadata block facets from th export SERVER_URL=https://demo.dataverse.org export ID=root - curl -H X-Dataverse-key:$API_TOKEN -X POST -H "Content-type:application/json" $SERVER_URL/api/dataverses/$ID/metadatablockfacets/isRoot -d 'true' + curl -H "X-Dataverse-key:$API_TOKEN" -X POST -H "Content-type:application/json" "$SERVER_URL/api/dataverses/$ID/metadatablockfacets/isRoot" -d 'true' The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx -X POST -H "Content-type:application/json" https://demo.dataverse.org/api/dataverses/root/metadatablockfacets/isRoot -d 'true' + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST -H "Content-type:application/json" "https://demo.dataverse.org/api/dataverses/root/metadatablockfacets/isRoot" -d 'true' + +.. _create-role-in-collection: Create a New Role in a Dataverse Collection ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -307,24 +311,15 @@ Creates a new role under Dataverse collection ``id``. Needs a json file with the export SERVER_URL=https://demo.dataverse.org export ID=root - curl -H X-Dataverse-key:$API_TOKEN -X POST $SERVER_URL/api/dataverses/$ID/roles --upload-file roles.json + curl -H "X-Dataverse-key:$API_TOKEN" -X POST "$SERVER_URL/api/dataverses/$ID/roles" --upload-file roles.json The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx -X POST -H "Content-type:application/json" https://demo.dataverse.org/api/dataverses/root/roles --upload-file roles.json + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST -H "Content-type:application/json" "https://demo.dataverse.org/api/dataverses/root/roles" --upload-file roles.json -Where ``roles.json`` looks like this:: - - { - "alias": "sys1", - "name": “Restricted System Roleâ€, - "description": “A person who may only add datasets.â€, - "permissions": [ - "AddDataset" - ] - } +For ``roles.json`` see :ref:`json-representation-of-a-role` .. note:: Only a Dataverse installation account with superuser permissions is allowed to create roles in a Dataverse Collection. @@ -341,13 +336,13 @@ List all the role assignments at the given Dataverse collection: export SERVER_URL=https://demo.dataverse.org export ID=root - curl -H X-Dataverse-key:$API_TOKEN $SERVER_URL/api/dataverses/$ID/assignments + curl -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/dataverses/$ID/assignments" The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx https://demo.dataverse.org/api/dataverses/root/assignments + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/dataverses/root/assignments" Assign Default Role to User Creating a Dataset in a Dataverse Collection ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -361,13 +356,13 @@ Assign a default role to a user creating a dataset in a Dataverse collection ``i export ID=root export ROLE_ALIAS=curator - curl -H X-Dataverse-key:$API_TOKEN -X PUT $SERVER_URL/api/dataverses/$ID/defaultContributorRole/$ROLE_ALIAS + curl -H "X-Dataverse-key:$API_TOKEN" -X PUT "$SERVER_URL/api/dataverses/$ID/defaultContributorRole/$ROLE_ALIAS" The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx -X PUT https://demo.dataverse.org/api/dataverses/root/defaultContributorRole/curator + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X PUT "https://demo.dataverse.org/api/dataverses/root/defaultContributorRole/curator" Note: You may use "none" as the ``ROLE_ALIAS``. This will prevent a user who creates a dataset from having any role on that dataset. It is not recommended for Dataverse collections with human contributors. @@ -384,13 +379,13 @@ Assigns a new role, based on the POSTed JSON: export SERVER_URL=https://demo.dataverse.org export ID=root - curl -H X-Dataverse-key:$API_TOKEN -X POST -H "Content-Type: application/json" $SERVER_URL/api/dataverses/$ID/assignments --upload-file role.json + curl -H "X-Dataverse-key:$API_TOKEN" -X POST -H "Content-Type: application/json" "$SERVER_URL/api/dataverses/$ID/assignments" --upload-file role.json The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx -X POST -H "Content-Type: application/json" https://demo.dataverse.org/api/dataverses/root/assignments --upload-file role.json + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST -H "Content-Type: application/json" "https://demo.dataverse.org/api/dataverses/root/assignments" --upload-file role.json POSTed JSON example (the content of ``role.json`` file):: @@ -413,13 +408,13 @@ Delete the assignment whose id is ``$id``: export ID=root export ASSIGNMENT_ID=6 - curl -H X-Dataverse-key:$API_TOKEN -X DELETE $SERVER_URL/api/dataverses/$ID/assignments/$ASSIGNMENT_ID + curl -H "X-Dataverse-key:$API_TOKEN" -X DELETE "$SERVER_URL/api/dataverses/$ID/assignments/$ASSIGNMENT_ID" The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx -X DELETE https://demo.dataverse.org/api/dataverses/root/assignments/6 + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X DELETE "https://demo.dataverse.org/api/dataverses/root/assignments/6" List Metadata Blocks Defined on a Dataverse Collection ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -436,13 +431,13 @@ Please note that an API token is only required if the Dataverse collection has n export SERVER_URL=https://demo.dataverse.org export ID=root - curl -H X-Dataverse-key:$API_TOKEN $SERVER_URL/api/dataverses/$ID/metadatablocks + curl -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/dataverses/$ID/metadatablocks" The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx https://demo.dataverse.org/api/dataverses/root/metadatablocks + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/dataverses/root/metadatablocks" Define Metadata Blocks for a Dataverse Collection ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -461,13 +456,13 @@ The metadata blocks that are available with a default Dataverse installation are export SERVER_URL=https://demo.dataverse.org export ID=root - curl -H X-Dataverse-key:$API_TOKEN -X POST $SERVER_URL/api/dataverses/$ID/metadatablocks -H \"Content-type:application/json\" --upload-file define-metadatablocks.json + curl -H "X-Dataverse-key:$API_TOKEN" -X POST "$SERVER_URL/api/dataverses/$ID/metadatablocks" -H \"Content-type:application/json\" --upload-file define-metadatablocks.json The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx -X POST -H "Content-type:application/json" --upload-file define-metadatablocks.json https://demo.dataverse.org/api/dataverses/root/metadatablocks + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST -H "Content-type:application/json" --upload-file define-metadatablocks.json "https://demo.dataverse.org/api/dataverses/root/metadatablocks" Determine if a Dataverse Collection Inherits Its Metadata Blocks from Its Parent ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -480,13 +475,13 @@ Get whether the Dataverse collection is a metadata block root, or does it uses i export SERVER_URL=https://demo.dataverse.org export ID=root - curl -H X-Dataverse-key:$API_TOKEN $SERVER_URL/api/dataverses/$ID/metadatablocks/isRoot + curl -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/dataverses/$ID/metadatablocks/isRoot" The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx https://demo.dataverse.org/api/dataverses/root/metadatablocks/isRoot + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/dataverses/root/metadatablocks/isRoot" Configure a Dataverse Collection to Inherit Its Metadata Blocks from Its Parent ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -500,13 +495,13 @@ values are ``true`` and ``false`` (both are valid JSON expressions): export SERVER_URL=https://demo.dataverse.org export ID=root - curl -H X-Dataverse-key:$API_TOKEN -X PUT $SERVER_URL/api/dataverses/$ID/metadatablocks/isRoot + curl -H "X-Dataverse-key:$API_TOKEN" -X PUT "$SERVER_URL/api/dataverses/$ID/metadatablocks/isRoot" The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx -X PUT https://demo.dataverse.org/api/dataverses/root/metadatablocks/isRoot + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X PUT "https://demo.dataverse.org/api/dataverses/root/metadatablocks/isRoot" .. note:: Previous endpoints ``$SERVER/api/dataverses/$id/metadatablocks/:isRoot`` and ``POST http://$SERVER/api/dataverses/$id/metadatablocks/:isRoot?key=$apiKey`` are deprecated, but supported. @@ -526,7 +521,61 @@ To create a dataset, you must supply a JSON file that contains at least the foll - Description Text - Subject -As a starting point, you can download :download:`dataset-finch1.json <../../../../scripts/search/tests/data/dataset-finch1.json>` and modify it to meet your needs. (:download:`dataset-create-new-all-default-fields.json <../../../../scripts/api/data/dataset-finch1_fr.json>` is a variant of this file that includes setting the metadata language (see :ref:`:MetadataLanguages`) to French (fr). In addition to this minimal example, you can download :download:`dataset-create-new-all-default-fields.json <../../../../scripts/api/data/dataset-create-new-all-default-fields.json>` which populates all of the metadata fields that ship with a Dataverse installation.) +Submit Incomplete Dataset +^^^^^^^^^^^^^^^^^^^^^^^^^ + +**Note:** This feature requires :ref:`dataverse.api.allow-incomplete-metadata` to be enabled and your Solr +Schema to be up-to-date with the ``datasetValid`` field. + +Providing a ``.../datasets?doNotValidate=true`` query parameter turns off the validation of metadata. +In this case, only the "Author Name" is required. For example, a minimal JSON file would look like this: + +.. code-block:: json + :name: dataset-incomplete.json + + { + "datasetVersion": { + "metadataBlocks": { + "citation": { + "fields": [ + { + "value": [ + { + "authorName": { + "value": "Finch, Fiona", + "typeClass": "primitive", + "multiple": false, + "typeName": "authorName" + } + } + ], + "typeClass": "compound", + "multiple": true, + "typeName": "author" + } + ], + "displayName": "Citation Metadata" + } + } + } + } + +The following is an example HTTP call with deactivated validation: + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export PARENT=root + export SERVER_URL=https://demo.dataverse.org + + curl -H "X-Dataverse-key:$API_TOKEN" -X POST "$SERVER_URL/api/dataverses/$PARENT/datasets?doNotValidate=true" --upload-file dataset-incomplete.json -H 'Content-type:application/json' + +**Note:** You may learn about an instance's support for deposition of incomplete datasets via :ref:`info-incomplete-metadata`. + +Submit Dataset +^^^^^^^^^^^^^^ + +As a starting point, you can download :download:`dataset-finch1.json <../../../../scripts/search/tests/data/dataset-finch1.json>` and modify it to meet your needs. (:download:`dataset-finch1_fr.json <../../../../scripts/api/data/dataset-finch1_fr.json>` is a variant of this file that includes setting the metadata language (see :ref:`:MetadataLanguages`) to French (fr). In addition to this minimal example, you can download :download:`dataset-create-new-all-default-fields.json <../../../../scripts/api/data/dataset-create-new-all-default-fields.json>` which populates all of the metadata fields that ship with a Dataverse installation.) The curl command below assumes you have kept the name "dataset-finch1.json" and that this file is in your current working directory. @@ -540,7 +589,7 @@ Next you need to figure out the alias or database id of the "parent" Dataverse c export PARENT=root export SERVER_URL=https://demo.dataverse.org - curl -H X-Dataverse-key:$API_TOKEN -X POST "$SERVER_URL/api/dataverses/$PARENT/datasets" --upload-file dataset-finch1.json -H 'Content-type:application/json' + curl -H "X-Dataverse-key:$API_TOKEN" -X POST "$SERVER_URL/api/dataverses/$PARENT/datasets" --upload-file dataset-finch1.json -H 'Content-type:application/json' The fully expanded example above (without the environment variables) looks like this: @@ -552,6 +601,8 @@ You should expect an HTTP 200 ("OK") response and JSON indicating the database I .. note:: Only a Dataverse installation account with superuser permissions is allowed to include files when creating a dataset via this API. Adding files this way only adds their file metadata to the database, you will need to manually add the physical files to the file system. +.. _api-import-dataset: + Import a Dataset into a Dataverse Collection ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -566,13 +617,13 @@ To import a dataset with an existing persistent identifier (PID), the dataset's export DATAVERSE_ID=root export PERSISTENT_IDENTIFIER=doi:ZZ7/MOSEISLEYDB94 - curl -H X-Dataverse-key:$API_TOKEN -X POST $SERVER_URL/api/dataverses/$DATAVERSE_ID/datasets/:import?pid=$PERSISTENT_IDENTIFIER&release=yes --upload-file dataset.json + curl -H "X-Dataverse-key:$API_TOKEN" -X POST "$SERVER_URL/api/dataverses/$DATAVERSE_ID/datasets/:import?pid=$PERSISTENT_IDENTIFIER&release=yes" --upload-file dataset.json The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx -X POST https://demo.dataverse.org/api/dataverses/root/datasets/:import?pid=doi:ZZ7/MOSEISLEYDB94&release=yes --upload-file dataset.json + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST "https://demo.dataverse.org/api/dataverses/root/datasets/:import?pid=doi:ZZ7/MOSEISLEYDB94&release=yes" --upload-file dataset.json The ``pid`` parameter holds a persistent identifier (such as a DOI or Handle). The import will fail if no PID is provided, or if the provided PID fails validation. @@ -607,13 +658,13 @@ To import a dataset with an existing persistent identifier (PID), you have to pr export DATAVERSE_ID=root export PERSISTENT_IDENTIFIER=doi:ZZ7/MOSEISLEYDB94 - curl -H X-Dataverse-key:$API_TOKEN -X POST $SERVER_URL/api/dataverses/$DATAVERSE_ID/datasets/:importddi?pid=$PERSISTENT_IDENTIFIER&release=yes --upload-file ddi_dataset.xml + curl -H "X-Dataverse-key:$API_TOKEN" -X POST "$SERVER_URL/api/dataverses/$DATAVERSE_ID/datasets/:importddi?pid=$PERSISTENT_IDENTIFIER&release=yes" --upload-file ddi_dataset.xml The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx -X POST https://demo.dataverse.org/api/dataverses/root/datasets/:importddi?pid=doi:ZZ7/MOSEISLEYDB94&release=yes --upload-file ddi_dataset.xml + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST "https://demo.dataverse.org/api/dataverses/root/datasets/:importddi?pid=doi:ZZ7/MOSEISLEYDB94&release=yes" --upload-file ddi_dataset.xml The optional ``pid`` parameter holds a persistent identifier (such as a DOI or Handle). The import will fail if the provided PID fails validation. @@ -643,13 +694,13 @@ In order to publish a Dataverse collection, you must know either its "alias" (wh export SERVER_URL=https://demo.dataverse.org export ID=root - curl -H X-Dataverse-key:$API_TOKEN -X POST $SERVER_URL/api/dataverses/$ID/actions/:publish + curl -H "X-Dataverse-key:$API_TOKEN" -X POST "$SERVER_URL/api/dataverses/$ID/actions/:publish" The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx -X POST https://demo.dataverse.org/api/dataverses/root/actions/:publish + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST "https://demo.dataverse.org/api/dataverses/root/actions/:publish" You should expect a 200 ("OK") response and JSON output. @@ -672,13 +723,31 @@ In order to retrieve the Guestbook Responses for a Dataverse collection, you mus export GUESTBOOK_ID=1 export FILENAME=myResponses.csv - curl -H X-Dataverse-key:$API_TOKEN $SERVER_URL/api/dataverses/$ID/guestbookResponses?guestbookId=$GUESTBOOK_ID -o $FILENAME + curl -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/dataverses/$ID/guestbookResponses?guestbookId=$GUESTBOOK_ID" -o $FILENAME The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx https://demo.dataverse.org/api/dataverses/root/guestbookResponses?guestbookId=1 -o myResponses.csv + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/dataverses/root/guestbookResponses?guestbookId=1" -o myResponses.csv + +.. _collection-attributes-api: + +Change Collection Attributes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: + + curl -X PUT -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/dataverses/$ID/attribute/$ATTRIBUTE?value=$VALUE" + +The following attributes are supported: + +* ``alias`` Collection alias +* ``name`` Name +* ``description`` Description +* ``affiliation`` Affiliation +* ``filePIDsEnabled`` ("true" or "false") Restricted to use by superusers and only when the :ref:`:AllowEnablingFilePIDsPerCollection <:AllowEnablingFilePIDsPerCollection>` setting is true. Enables or disables registration of file-level PIDs in datasets within the collection (overriding the instance-wide setting). + Datasets -------- @@ -705,13 +774,13 @@ Example: Getting the dataset whose DOI is *10.5072/FK2/J8SJZB*: export SERVER_URL=https://demo.dataverse.org export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/J8SJZB - curl -H "X-Dataverse-key:$API_TOKEN" $SERVER_URL/api/datasets/:persistentId/?persistentId=$PERSISTENT_IDENTIFIER + curl -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/datasets/:persistentId/?persistentId=$PERSISTENT_IDENTIFIER" The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H "X-Dataverse-key:$API_TOKEN" https://demo.dataverse.org/api/datasets/:persistentId/?persistentId=doi:10.5072/FK2/J8SJZB + curl -H "X-Dataverse-key:$API_TOKEN" "https://demo.dataverse.org/api/datasets/:persistentId/?persistentId=doi:10.5072/FK2/J8SJZB" Getting its draft version: @@ -720,29 +789,28 @@ Getting its draft version: export SERVER_URL=https://demo.dataverse.org export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/J8SJZB - curl -H "X-Dataverse-key:$API_TOKEN" http://$SERVER/api/datasets/:persistentId/versions/:draft?persistentId=$PERSISTENT_IDENTIFIER + curl -H "X-Dataverse-key:$API_TOKEN" "http://$SERVER/api/datasets/:persistentId/versions/:draft?persistentId=$PERSISTENT_IDENTIFIER" The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H "X-Dataverse-key:$API_TOKEN" https://demo.dataverse.org/api/datasets/:persistentId/versions/:draft?persistentId=doi:10.5072/FK2/J8SJZB + curl -H "X-Dataverse-key:$API_TOKEN" "https://demo.dataverse.org/api/datasets/:persistentId/versions/:draft?persistentId=doi:10.5072/FK2/J8SJZB" - -|CORS| Show the dataset whose id is passed: +|CORS| Show the dataset whose database id is passed: .. code-block:: bash export SERVER_URL=https://demo.dataverse.org - export ID=408730 + export ID=24 - curl $SERVER_URL/api/datasets/$ID + curl "$SERVER_URL/api/datasets/$ID" The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl https://demo.dataverse.org/api/datasets/408730 + curl "https://demo.dataverse.org/api/datasets/24" The dataset id can be extracted from the response retrieved from the API which uses the persistent identifier (``/api/datasets/:persistentId/?persistentId=$PERSISTENT_IDENTIFIER``). @@ -756,13 +824,13 @@ List Versions of a Dataset export SERVER_URL=https://demo.dataverse.org export ID=24 - curl $SERVER_URL/api/datasets/$ID/versions + curl "$SERVER_URL/api/datasets/$ID/versions" The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl https://demo.dataverse.org/api/datasets/24/versions + curl "https://demo.dataverse.org/api/datasets/24/versions" It returns a list of versions with their metadata, and file list: @@ -827,13 +895,13 @@ Get Version of a Dataset export ID=24 export VERSION=1.0 - curl $SERVER_URL/api/datasets/$ID/versions/$VERSION + curl "$SERVER_URL/api/datasets/$ID/versions/$VERSION" The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl https://demo.dataverse.org/api/datasets/24/versions/1.0 + curl "https://demo.dataverse.org/api/datasets/24/versions/1.0" .. _export-dataset-metadata-api: @@ -850,13 +918,13 @@ See also :ref:`batch-exports-through-the-api` and the note below: export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/J8SJZB export METADATA_FORMAT=ddi - curl $SERVER_URL/api/datasets/export?exporter=$METADATA_FORMAT&persistentId=PERSISTENT_IDENTIFIER + curl "$SERVER_URL/api/datasets/export?exporter=$METADATA_FORMAT&persistentId=PERSISTENT_IDENTIFIER" The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl https://demo.dataverse.org/api/datasets/export?exporter=ddi&persistentId=doi:10.5072/FK2/J8SJZB + curl "https://demo.dataverse.org/api/datasets/export?exporter=ddi&persistentId=doi:10.5072/FK2/J8SJZB" .. note:: Supported exporters (export formats) are ``ddi``, ``oai_ddi``, ``dcterms``, ``oai_dc``, ``schema.org`` , ``OAI_ORE`` , ``Datacite``, ``oai_datacite`` and ``dataverse_json``. Descriptive names can be found under :ref:`metadata-export-formats` in the User Guide. @@ -882,13 +950,13 @@ List Files in a Dataset export ID=24 export VERSION=1.0 - curl $SERVER_URL/api/datasets/$ID/versions/$VERSION/files + curl "$SERVER_URL/api/datasets/$ID/versions/$VERSION/files" The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl https://demo.dataverse.org/api/datasets/24/versions/1.0/files + curl "https://demo.dataverse.org/api/datasets/24/versions/1.0/files" View Dataset Files and Folders as a Directory Index ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -897,9 +965,9 @@ View Dataset Files and Folders as a Directory Index .. code-block:: bash - curl $SERVER_URL/api/datasets/${ID}/dirindex/ + curl "$SERVER_URL/api/datasets/${ID}/dirindex/" # or - curl ${SERVER_URL}/api/datasets/:persistentId/dirindex?persistentId=doi:${PERSISTENT_ID} + curl "${SERVER_URL}/api/datasets/:persistentId/dirindex?persistentId=doi:${PERSISTENT_ID}" Optional parameters: @@ -997,13 +1065,13 @@ List All Metadata Blocks for a Dataset export ID=24 export VERSION=1.0 - curl $SERVER_URL/api/datasets/$ID/versions/$VERSION/metadata + curl "$SERVER_URL/api/datasets/$ID/versions/$VERSION/metadata" The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl https://demo.dataverse.org/api/datasets/24/versions/1.0/metadata + curl "https://demo.dataverse.org/api/datasets/24/versions/1.0/metadata" List Single Metadata Block for a Dataset ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -1017,13 +1085,13 @@ List Single Metadata Block for a Dataset export VERSION=1.0 export METADATA_BLOCK=citation - curl $SERVER_URL/api/datasets/$ID/versions/$VERSION/metadata/$METADATA_BLOCK + curl "$SERVER_URL/api/datasets/$ID/versions/$VERSION/metadata/$METADATA_BLOCK" The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl https://demo.dataverse.org/api/datasets/24/versions/1.0/metadata/citation + curl "https://demo.dataverse.org/api/datasets/24/versions/1.0/metadata/citation" Update Metadata For a Dataset ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -1040,15 +1108,15 @@ For example, after making your edits, your JSON file might look like :download:` export SERVER_URL=https://demo.dataverse.org export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/BCCP9Z - curl -H "X-Dataverse-key: $API_TOKEN" -X PUT $SERVER_URL/api/datasets/:persistentId/versions/:draft?persistentId=$PERSISTENT_IDENTIFIER --upload-file dataset-update-metadata.json + curl -H "X-Dataverse-key: $API_TOKEN" -X PUT "$SERVER_URL/api/datasets/:persistentId/versions/:draft?persistentId=$PERSISTENT_IDENTIFIER" --upload-file dataset-update-metadata.json The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H "X-Dataverse-key: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X PUT https://demo.dataverse.org/api/datasets/:persistentId/versions/:draft?persistentId=doi:10.5072/FK2/BCCP9Z --upload-file dataset-update-metadata.json + curl -H "X-Dataverse-key: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X PUT "https://demo.dataverse.org/api/datasets/:persistentId/versions/:draft?persistentId=doi:10.5072/FK2/BCCP9Z" --upload-file dataset-update-metadata.json -Note that in the example JSON file above, there is a single JSON object with ``metadataBlocks`` as a key. When you download a representation of your dataset in JSON format, the ``metadataBlocks`` object you need is nested inside another object called ``datasetVersion``. To extract just the ``metadataBlocks`` key when downloading a JSON representation, you can use a tool such as ``jq`` like this: +Note that in the example JSON file above, there are only two JSON objects with the ``license`` and ``metadataBlocks`` keys respectively. When you download a representation of your latest dataset version in JSON format, these objects will be nested inside another object called ``data`` in the API response. Note that there may be more objects in there, in addition to the ``license`` and ``metadataBlocks`` that you may need to preserve and re-import as well. Basically, you need everything in there except for the ``files``. This can be achived by downloading the metadata and selecting the sections you need with a JSON tool such as ``jq``, like this: .. code-block:: bash @@ -1056,15 +1124,18 @@ Note that in the example JSON file above, there is a single JSON object with ``m export SERVER_URL=https://demo.dataverse.org export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/BCCP9Z - curl -H "X-Dataverse-key: $API_TOKEN" $SERVER_URL/api/datasets/:persistentId/versions/:latest?persistentId=$PERSISTENT_IDENTIFIER | jq '.data | {metadataBlocks: .metadataBlocks}' > dataset-update-metadata.json - + curl -H "X-Dataverse-key: $API_TOKEN" "$SERVER_URL/api/datasets/:persistentId/versions/:latest?persistentId=$PERSISTENT_IDENTIFIER" | jq '.data | del(.files)' > dataset-update-metadata.json + The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H "X-Dataverse-key: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" https://demo.dataverse.org/api/datasets/:persistentId/versions/:latest?persistentId=doi:10.5072/FK2/BCCP9Z | jq '.data | {metadataBlocks: .metadataBlocks}' > dataset-update-metadata.json + curl -H "X-Dataverse-key: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/datasets/:persistentId/versions/:latest?persistentId=doi:10.5072/FK2/BCCP9Z" | jq '.data | {metadataBlocks: .metadataBlocks}' > dataset-update-metadata.json + + +Now you can edit the JSON produced by the command above with a text editor of your choice. For example, with ``vi`` in the example below. -Now that the resulting JSON file only contains the ``metadataBlocks`` key, you can edit the JSON such as with ``vi`` in the example below:: +Note that you don't need to edit the top-level fields such as ``versionNumber``, ``minorVersonNumber``, ``versionState`` or any of the time stamps - these will be automatically updated as needed by the API:: vi dataset-update-metadata.json @@ -1083,13 +1154,13 @@ Alternatively to replacing an entire dataset version with its JSON representatio export SERVER_URL=https://demo.dataverse.org export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/BCCP9Z - curl -H "X-Dataverse-key: $API_TOKEN" -X PUT $SERVER_URL/api/datasets/:persistentId/editMetadata/?persistentId=$PERSISTENT_IDENTIFIER --upload-file dataset-add-metadata.json + curl -H "X-Dataverse-key: $API_TOKEN" -X PUT "$SERVER_URL/api/datasets/:persistentId/editMetadata/?persistentId=$PERSISTENT_IDENTIFIER" --upload-file dataset-add-metadata.json The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H "X-Dataverse-key: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X PUT https://demo.dataverse.org/api/datasets/:persistentId/editMetadata/?persistentId=doi:10.5072/FK2/BCCP9Z --upload-file dataset-add-metadata.json + curl -H "X-Dataverse-key: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X PUT "https://demo.dataverse.org/api/datasets/:persistentId/editMetadata/?persistentId=doi:10.5072/FK2/BCCP9Z" --upload-file dataset-add-metadata.json You may also replace existing metadata in dataset fields with the following (adding the parameter replace=true): @@ -1099,13 +1170,13 @@ You may also replace existing metadata in dataset fields with the following (add export SERVER_URL=https://demo.dataverse.org export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/BCCP9Z - curl -H "X-Dataverse-key: $API_TOKEN" -X PUT $SERVER_URL/api/datasets/:persistentId/editMetadata?persistentId=$PERSISTENT_IDENTIFIER&replace=true --upload-file dataset-update-metadata.json + curl -H "X-Dataverse-key: $API_TOKEN" -X PUT "$SERVER_URL/api/datasets/:persistentId/editMetadata?persistentId=$PERSISTENT_IDENTIFIER&replace=true" --upload-file dataset-update-metadata.json The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H "X-Dataverse-key: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X PUT https://demo.dataverse.org/api/datasets/:persistentId/editMetadata/?persistentId=doi:10.5072/FK2/BCCP9Z&replace=true --upload-file dataset-update-metadata.json + curl -H "X-Dataverse-key: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X PUT "https://demo.dataverse.org/api/datasets/:persistentId/editMetadata/?persistentId=doi:10.5072/FK2/BCCP9Z&replace=true" --upload-file dataset-update-metadata.json For these edits your JSON file need only include those dataset fields which you would like to edit. A sample JSON file may be downloaded here: :download:`dataset-edit-metadata-sample.json <../_static/api/dataset-edit-metadata-sample.json>` @@ -1120,13 +1191,13 @@ You may delete some of the metadata of a dataset version by supplying a file wit export SERVER_URL=https://demo.dataverse.org export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/BCCP9Z - curl -H "X-Dataverse-key: $API_TOKEN" -X PUT $SERVER_URL/api/datasets/:persistentId/deleteMetadata/?persistentId=$PERSISTENT_IDENTIFIER --upload-file dataset-delete-author-metadata.json + curl -H "X-Dataverse-key: $API_TOKEN" -X PUT "$SERVER_URL/api/datasets/:persistentId/deleteMetadata/?persistentId=$PERSISTENT_IDENTIFIER" --upload-file dataset-delete-author-metadata.json The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H "X-Dataverse-key: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X PUT https://demo.dataverse.org/api/datasets/:persistentId/deleteMetadata/?persistentId=doi:10.5072/FK2/BCCP9Z --upload-file dataset-delete-author-metadata.json + curl -H "X-Dataverse-key: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X PUT "https://demo.dataverse.org/api/datasets/:persistentId/deleteMetadata/?persistentId=doi:10.5072/FK2/BCCP9Z" --upload-file dataset-delete-author-metadata.json For these deletes your JSON file must include an exact match of those dataset fields which you would like to delete. A sample JSON file may be downloaded here: :download:`dataset-delete-author-metadata.json <../_static/api/dataset-delete-author-metadata.json>` @@ -1175,13 +1246,13 @@ Deletes the draft version of dataset ``$ID``. Only the draft version can be dele export SERVER_URL=https://demo.dataverse.org export ID=24 - curl -H "X-Dataverse-key: $API_TOKEN" -X DELETE $SERVER_URL/api/datasets/$ID/versions/:draft + curl -H "X-Dataverse-key: $API_TOKEN" -X DELETE "$SERVER_URL/api/datasets/$ID/versions/:draft" The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H "X-Dataverse-key: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X DELETE https://demo.dataverse.org/api/datasets/24/versions/:draft + curl -H "X-Dataverse-key: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X DELETE "https://demo.dataverse.org/api/datasets/24/versions/:draft" Set Citation Date Field Type for a Dataset ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -1196,13 +1267,13 @@ Note that the dataset citation date field type must be a date field. export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/J8SJZB export DATASET_FIELD_TYPE_NAME=dateOfDeposit - curl -H "X-Dataverse-key: $API_TOKEN" -X PUT $SERVER_URL/api/datasets/:persistentId/citationdate?persistentId=$PERSISTENT_IDENTIFIER --data "$DATASET_FIELD_TYPE_NAME" + curl -H "X-Dataverse-key: $API_TOKEN" -X PUT "$SERVER_URL/api/datasets/:persistentId/citationdate?persistentId=$PERSISTENT_IDENTIFIER" --data "$DATASET_FIELD_TYPE_NAME" The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H "X-Dataverse-key: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X PUT https://demo.dataverse.org/api/datasets/:persistentId/citationdate?persistentId=doi:10.5072/FK2/J8SJZB --data "dateOfDeposit" + curl -H "X-Dataverse-key: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X PUT "https://demo.dataverse.org/api/datasets/:persistentId/citationdate?persistentId=doi:10.5072/FK2/J8SJZB" --data "dateOfDeposit" Revert Citation Date Field Type to Default for Dataset ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -1215,13 +1286,13 @@ Restores the default citation date field type, ``:publicationDate``, for a given export SERVER_URL=https://demo.dataverse.org export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/J8SJZB - curl -H "X-Dataverse-key: $API_TOKEN" -X DELETE $SERVER_URL/api/datasets/:persistentId/citationdate?persistentId=$PERSISTENT_IDENTIFIER + curl -H "X-Dataverse-key: $API_TOKEN" -X DELETE "$SERVER_URL/api/datasets/:persistentId/citationdate?persistentId=$PERSISTENT_IDENTIFIER" The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H "X-Dataverse-key: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X DELETE https://demo.dataverse.org/api/datasets/:persistentId/citationdate?persistentId=doi:10.5072/FK2/J8SJZB + curl -H "X-Dataverse-key: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X DELETE "https://demo.dataverse.org/api/datasets/:persistentId/citationdate?persistentId=doi:10.5072/FK2/J8SJZB" .. _list-roles-on-a-dataset-api: @@ -1236,13 +1307,13 @@ Lists all role assignments on a given dataset: export SERVER_URL=https://demo.dataverse.org export ID=2347 - curl -H X-Dataverse-key:$API_TOKEN $SERVER_URL/api/datasets/$ID/assignments + curl -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/datasets/$ID/assignments" The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx https://demo.dataverse.org/api/datasets/2347/assignments + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/datasets/2347/assignments" .. _assign-role-on-a-dataset-api: @@ -1257,13 +1328,13 @@ Assigns a new role, based on the POSTed JSON: export SERVER_URL=https://demo.dataverse.org export ID=2347 - curl -H X-Dataverse-key:$API_TOKEN -X POST -H "Content-Type: application/json" $SERVER_URL/api/datasets/$ID/assignments --upload-file role.json + curl -H "X-Dataverse-key:$API_TOKEN" -X POST -H "Content-Type: application/json" "$SERVER_URL/api/datasets/$ID/assignments" --upload-file role.json The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx -X POST -H "Content-Type: application/json" https://demo.dataverse.org/api/datasets/2347/assignments --upload-file role.json + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST -H "Content-Type: application/json" "https://demo.dataverse.org/api/datasets/2347/assignments" --upload-file role.json POSTed JSON example (the content of ``role.json`` file):: @@ -1286,13 +1357,13 @@ Delete the assignment whose id is ``$id``: export ID=2347 export ASSIGNMENT_ID=6 - curl -H X-Dataverse-key:$API_TOKEN -X DELETE $SERVER_URL/api/datasets/$ID/assignments/$ASSIGNMENT_ID + curl -H "X-Dataverse-key:$API_TOKEN" -X DELETE "$SERVER_URL/api/datasets/$ID/assignments/$ASSIGNMENT_ID" The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx -X DELETE https://demo.dataverse.org/api/datasets/2347/assignments/6 + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X DELETE "https://demo.dataverse.org/api/datasets/2347/assignments/6" Create a Private URL for a Dataset @@ -1306,20 +1377,20 @@ Create a Private URL (must be able to manage dataset permissions): export SERVER_URL=https://demo.dataverse.org export ID=24 - curl -H "X-Dataverse-key: $API_TOKEN" -X POST $SERVER_URL/api/datasets/$ID/privateUrl + curl -H "X-Dataverse-key: $API_TOKEN" -X POST "$SERVER_URL/api/datasets/$ID/privateUrl" The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H "X-Dataverse-key: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST https://demo.dataverse.org/api/datasets/24/privateUrl + curl -H "X-Dataverse-key: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST "https://demo.dataverse.org/api/datasets/24/privateUrl" If Anonymized Access has been enabled on a Dataverse installation (see the :ref:`:AnonymizedFieldTypeNames` setting), an optional 'anonymizedAccess' query parameter is allowed. Setting anonymizedAccess=true in your call will create a PrivateURL that only allows an anonymized view of the Dataset (see :ref:`privateurl`). .. code-block:: bash - curl -H "X-Dataverse-key: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST https://demo.dataverse.org/api/datasets/24/privateUrl?anonymizedAccess=true + curl -H "X-Dataverse-key: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST "https://demo.dataverse.org/api/datasets/24/privateUrl?anonymizedAccess=true" Get the Private URL for a Dataset @@ -1333,13 +1404,13 @@ Get a Private URL from a dataset (if available): export SERVER_URL=https://demo.dataverse.org export ID=24 - curl -H "X-Dataverse-key: $API_TOKEN" $SERVER_URL/api/datasets/$ID/privateUrl + curl -H "X-Dataverse-key: $API_TOKEN" "$SERVER_URL/api/datasets/$ID/privateUrl" The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H "X-Dataverse-key: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" https://demo.dataverse.org/api/datasets/24/privateUrl + curl -H "X-Dataverse-key: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/datasets/24/privateUrl" Delete the Private URL from a Dataset ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -1352,13 +1423,13 @@ Delete a Private URL from a dataset (if it exists): export SERVER_URL=https://demo.dataverse.org export ID=24 - curl -H "X-Dataverse-key: $API_TOKEN" -X DELETE $SERVER_URL/api/datasets/$ID/privateUrl + curl -H "X-Dataverse-key: $API_TOKEN" -X DELETE "$SERVER_URL/api/datasets/$ID/privateUrl" The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H "X-Dataverse-key: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X DELETE https://demo.dataverse.org/api/datasets/24/privateUrl + curl -H "X-Dataverse-key: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X DELETE "https://demo.dataverse.org/api/datasets/24/privateUrl" .. _add-file-api: @@ -1385,13 +1456,13 @@ In the curl example below, all of the above are specified but they are optional. export SERVER_URL=https://demo.dataverse.org export PERSISTENT_ID=doi:10.5072/FK2/J8SJZB - curl -H X-Dataverse-key:$API_TOKEN -X POST -F "file=@$FILENAME" -F 'jsonData={"description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "tabIngest":"false"}' "$SERVER_URL/api/datasets/:persistentId/add?persistentId=$PERSISTENT_ID" + curl -H "X-Dataverse-key:$API_TOKEN" -X POST -F "file=@$FILENAME" -F 'jsonData={"description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "tabIngest":"false"}' "$SERVER_URL/api/datasets/:persistentId/add?persistentId=$PERSISTENT_ID" The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx -X POST -F file=@data.tsv -F 'jsonData={"description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "tabIngest":"false"}' "https://demo.dataverse.org/api/datasets/:persistentId/add?persistentId=doi:10.5072/FK2/J8SJZB" + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST -F file=@data.tsv -F 'jsonData={"description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "tabIngest":"false"}' "https://demo.dataverse.org/api/datasets/:persistentId/add?persistentId=doi:10.5072/FK2/J8SJZB" You should expect a 201 ("CREATED") response and JSON indicating the database id that has been assigned to your newly uploaded file. @@ -1509,7 +1580,46 @@ The fully expanded example above (without environment variables) looks like this .. code-block:: bash - curl -H X-Dataverse-key: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx -X POST https://demo.dataverse.org/api/datasets/:persistentId/add?persistentId=doi:10.5072/FK2/J8SJZB -F 'jsonData={"description":"A remote image.","storageIdentifier":"trsa://themes/custom/qdr/images/CoreTrustSeal-logo-transparent.png","checksumType":"MD5","md5Hash":"509ef88afa907eaf2c17c1c8d8fde77e","label":"testlogo.png","fileName":"testlogo.png","mimeType":"image/png"}' + curl -H "X-Dataverse-key: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST "https://demo.dataverse.org/api/datasets/:persistentId/add?persistentId=doi:10.5072/FK2/J8SJZB" -F 'jsonData={"description":"A remote image.","storageIdentifier":"trsa://themes/custom/qdr/images/CoreTrustSeal-logo-transparent.png","checksumType":"MD5","md5Hash":"509ef88afa907eaf2c17c1c8d8fde77e","label":"testlogo.png","fileName":"testlogo.png","mimeType":"image/png"}' + +.. _cleanup-storage-api: + +Cleanup storage of a Dataset +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This is an experimental feature and should be tested on your system before using it in production. +Also, make sure that your backups are up-to-date before using this on production servers. +It is advised to first call this method with the ``dryrun`` parameter set to ``true`` before actually deleting the files. +This will allow you to manually inspect the files that would be deleted if that parameter is set to ``false`` or is omitted (a list of the files that would be deleted is provided in the response). + +If your Dataverse installation has been configured to support direct uploads, or in some other situations, +you could end up with some files in the storage of a dataset that are not linked to that dataset directly. Most commonly, this could +happen when an upload fails in the middle of a transfer, i.e. if a user does a UI direct upload and leaves the page without hitting cancel or save, +Dataverse doesn't know and doesn't clean up the files. Similarly in the direct upload API, if the final /addFiles call isn't done, the files are abandoned. + +All the files stored in the Dataset storage location that are not in the file list of that Dataset (and follow the naming pattern of the dataset files) can be removed, as shown in the example below. + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export PERSISTENT_ID=doi:10.5072/FK2/J8SJZB + export DRYRUN=true + + curl -H "X-Dataverse-key: $API_TOKEN" -X GET "$SERVER_URL/api/datasets/:persistentId/cleanStorage?persistentId=$PERSISTENT_ID&dryrun=$DRYRUN" + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -H "X-Dataverse-key: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X GET "https://demo.dataverse.org/api/datasets/:persistentId/cleanStorage?persistentId=doi:10.5072/FK2/J8SJZB&dryrun=true" + +Adding Files To a Dataset via Other Tools +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In some circumstances, it may be useful to move or copy files into Dataverse's storage manually or via external tools and then add then to a dataset (i.e. without involving Dataverse in the file transfer itself). +Two API calls are available for this use case to add files to a dataset or to replace files that were already in the dataset. +These calls were developed as part of Dataverse's direct upload mechanism and are detailed in :doc:`/developers/s3-direct-upload-api`. Report the data (file) size of a Dataset ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -1522,13 +1632,13 @@ Shows the combined size in bytes of all the files uploaded into the dataset ``id export SERVER_URL=https://demo.dataverse.org export ID=24 - curl -H X-Dataverse-key:$API_TOKEN $SERVER_URL/api/datasets/$ID/storagesize + curl -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/datasets/$ID/storagesize" The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx https://demo.dataverse.org/api/datasets/24/storagesize + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/datasets/24/storagesize" The size of published and unpublished files will be summed in the dataset specified. By default, only the archival files are counted - i.e., the files uploaded by users (plus the tab-delimited versions generated for tabular data files on ingest). If the optional argument ``includeCached=true`` is specified, the API will also add the sizes of all the extra files generated and cached by the Dataverse installation - the resized thumbnail versions for image files, the metadata exports for published datasets, etc. Because this deals with unpublished files the token supplied must have permission to view unpublished drafts. @@ -1546,13 +1656,13 @@ Shows the combined size in bytes of all the files available for download from ve export ID=24 export VERSIONID=1.0 - curl -H X-Dataverse-key:$API_TOKEN $SERVER_URL/api/datasets/$ID/versions/$VERSIONID/downloadsize + curl -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/datasets/$ID/versions/$VERSIONID/downloadsize" The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx https://demo.dataverse.org/api/datasets/24/versions/1.0/downloadsize + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/datasets/24/versions/1.0/downloadsize" The size of all files available for download will be returned. If :draft is passed as versionId the token supplied must have permission to view unpublished drafts. A token is not required for published datasets. Also restricted files will be included in this total regardless of whether the user has access to download the restricted file(s). @@ -1578,6 +1688,8 @@ The fully expanded example above (without environment variables) looks like this The people who need to review the dataset (often curators or journal editors) can check their notifications periodically via API to see if any new datasets have been submitted for review and need their attention. See the :ref:`Notifications` section for details. Alternatively, these curators can simply check their email or notifications to know when datasets have been submitted (or resubmitted) for review. +.. _return-a-dataset: + Return a Dataset to Author ~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -1605,6 +1717,8 @@ The fully expanded example above (without environment variables) looks like this The review process can sometimes resemble a tennis match, with the authors submitting and resubmitting the dataset over and over until the curators are satisfied. Each time the curators send a "reason for return" via API, that reason is persisted into the database, stored at the dataset version level. +The :ref:`send-feedback` API call may be useful as a way to move the conversation to email. However, note that these emails go to contacts (versus authors) and there is no database record of the email contents. (:ref:`dataverse.mail.cc-support-on-contact-email` will send a copy of these emails to the support email address which would provide a record.) + Link a Dataset ~~~~~~~~~~~~~~ @@ -1617,13 +1731,13 @@ Creates a link between a dataset and a Dataverse collection (see :ref:`dataset-l export DATASET_ID=24 export DATAVERSE_ID=test - curl -H "X-Dataverse-key: $API_TOKEN" -X PUT $SERVER_URL/api/datasets/$DATASET_ID/link/$DATAVERSE_ID + curl -H "X-Dataverse-key: $API_TOKEN" -X PUT "$SERVER_URL/api/datasets/$DATASET_ID/link/$DATAVERSE_ID" The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H "X-Dataverse-key: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X PUT https://demo.dataverse.org/api/datasets/24/link/test + curl -H "X-Dataverse-key: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X PUT "https://demo.dataverse.org/api/datasets/24/link/test" Dataset Locks ~~~~~~~~~~~~~ @@ -1638,13 +1752,13 @@ To check if a dataset is locked: export SERVER_URL=https://demo.dataverse.org export ID=24 - curl $SERVER_URL/api/datasets/$ID/locks + curl "$SERVER_URL/api/datasets/$ID/locks" The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl https://demo.dataverse.org/api/datasets/24/locks + curl "https://demo.dataverse.org/api/datasets/24/locks" Optionally, you can check if there's a lock of a specific type on the dataset: @@ -1694,13 +1808,13 @@ The following API end point will lock a Dataset with a lock of specified type. N export ID=24 export LOCK_TYPE=Ingest - curl -H "X-Dataverse-key: $API_TOKEN" -X POST $SERVER_URL/api/datasets/$ID/lock/$LOCK_TYPE + curl -H "X-Dataverse-key: $API_TOKEN" -X POST "$SERVER_URL/api/datasets/$ID/lock/$LOCK_TYPE" The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H "X-Dataverse-key: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST https://demo.dataverse.org/api/datasets/24/lock/Ingest + curl -H "X-Dataverse-key: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST "https://demo.dataverse.org/api/datasets/24/lock/Ingest" Use the following API to unlock the dataset, by deleting all the locks currently on the dataset. Note that this requires “superuser†credentials: @@ -1710,13 +1824,13 @@ Use the following API to unlock the dataset, by deleting all the locks currently export SERVER_URL=https://demo.dataverse.org export ID=24 - curl -H "X-Dataverse-key: $API_TOKEN" -X DELETE $SERVER_URL/api/datasets/$ID/locks + curl -H "X-Dataverse-key: $API_TOKEN" -X DELETE "$SERVER_URL/api/datasets/$ID/locks" The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H "X-Dataverse-key: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X DELETE https://demo.dataverse.org/api/datasets/24/locks + curl -H "X-Dataverse-key: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X DELETE "https://demo.dataverse.org/api/datasets/24/locks" Or, to delete a lock of the type specified only. Note that this requires “superuser†credentials: @@ -1727,13 +1841,13 @@ Or, to delete a lock of the type specified only. Note that this requires “supe export ID=24 export LOCK_TYPE=finalizePublication - curl -H "X-Dataverse-key: $API_TOKEN" -X DELETE $SERVER_URL/api/datasets/$ID/locks?type=$LOCK_TYPE + curl -H "X-Dataverse-key: $API_TOKEN" -X DELETE "$SERVER_URL/api/datasets/$ID/locks?type=$LOCK_TYPE" The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H "X-Dataverse-key: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X DELETE https://demo.dataverse.org/api/datasets/24/locks?type=finalizePublication + curl -H "X-Dataverse-key: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X DELETE "https://demo.dataverse.org/api/datasets/24/locks?type=finalizePublication" If the dataset is not locked (or if there is no lock of the specified type), the API will exit with a warning message. @@ -1882,13 +1996,13 @@ Delete the dataset whose id is passed: export SERVER_URL=https://demo.dataverse.org export ID=24 - curl -H "X-Dataverse-key: $API_TOKEN" -X DELETE $SERVER_URL/api/datasets/$ID + curl -H "X-Dataverse-key: $API_TOKEN" -X DELETE "$SERVER_URL/api/datasets/$ID" The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H "X-Dataverse-key: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X DELETE https://demo.dataverse.org/api/datasets/24 + curl -H "X-Dataverse-key: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X DELETE "https://demo.dataverse.org/api/datasets/24" Delete Published Dataset ~~~~~~~~~~~~~~~~~~~~~~~~ @@ -1901,13 +2015,13 @@ Normally published datasets should not be deleted, but there exists a "destroy" export SERVER_URL=https://demo.dataverse.org export PERSISTENT_ID=doi:10.5072/FK2/AAA000 - curl -H "X-Dataverse-key: $API_TOKEN" -X DELETE $SERVER_URL/api/datasets/:persistentId/destroy/?persistentId=$PERSISTENT_ID + curl -H "X-Dataverse-key: $API_TOKEN" -X DELETE "$SERVER_URL/api/datasets/:persistentId/destroy/?persistentId=$PERSISTENT_ID" The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H "X-Dataverse-key: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X DELETE https://demo.dataverse.org/api/datasets/:persistentId/destroy/?persistentId=doi:10.5072/FK2/AAA000 + curl -H "X-Dataverse-key: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X DELETE "https://demo.dataverse.org/api/datasets/:persistentId/destroy/?persistentId=doi:10.5072/FK2/AAA000" Delete with dataset identifier: @@ -1917,13 +2031,13 @@ Delete with dataset identifier: export SERVER_URL=https://demo.dataverse.org export ID=24 - curl -H "X-Dataverse-key: $API_TOKEN" -X DELETE $SERVER_URL/api/datasets/$ID/destroy + curl -H "X-Dataverse-key: $API_TOKEN" -X DELETE "$SERVER_URL/api/datasets/$ID/destroy" The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H "X-Dataverse-key: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X DELETE https://demo.dataverse.org/api/datasets/24/destroy + curl -H "X-Dataverse-key: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X DELETE "https://demo.dataverse.org/api/datasets/24/destroy" Calling the destroy endpoint is permanent and irreversible. It will remove the dataset and its datafiles, then re-index the parent Dataverse collection in Solr. This endpoint requires the API token of a superuser. @@ -2029,10 +2143,167 @@ Archiving is an optional feature that may be configured for a Dataverse installa curl -H "X-Dataverse-key: $API_TOKEN" -X DELETE "$SERVER_URL/api/datasets/:persistentId/$VERSION/archivalStatus?persistentId=$PERSISTENT_IDENTIFIER" +Get External Tool Parameters +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This API call is intended as a callback that can be used by :doc:`/installation/external-tools` to retrieve signed Urls necessary for their interaction with Dataverse. +It can be called directly as well. + +The response is a JSON object described in the :doc:`/api/external-tools` section of the API guide. + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/7U7YBV + export VERSION=1.0 + export TOOL_ID=1 + + curl -H "X-Dataverse-key: $API_TOKEN" -H "Accept:application/json" "$SERVER_URL/api/datasets/:persistentId/versions/$VERSION/toolparams/$TOOL_ID?persistentId=$PERSISTENT_IDENTIFIER" + +.. _signposting-api: + +Retrieve Signposting Information +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Dataverse supports :ref:`discovery-sign-posting` as a discovery mechanism. +Signposting involves the addition of a `Link `__ HTTP header providing summary information on GET and HEAD requests to retrieve the dataset page and a separate /linkset API call to retrieve additional information. + +Here is an example of a "Link" header: + +``Link: ;rel="cite-as", ;rel="describedby";type="application/vnd.citationstyles.csl+json",;rel="describedby";type="application/json+ld", ;rel="type",;rel="type", https://demo.dataverse.org/api/datasets/:persistentId/versions/1.0/customlicense?persistentId=doi:10.5072/FK2/YD5QDG;rel="license", ; rel="linkset";type="application/linkset+json"`` + +The URL for linkset information is discoverable under the ``rel="linkset";type="application/linkset+json`` entry in the "Link" header, such as in the example above. + +The reponse includes a JSON object conforming to the `Signposting `__ specification. +Signposting is not supported for draft dataset versions. + +.. code-block:: bash + + export SERVER_URL=https://demo.dataverse.org + export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/YD5QDG + export VERSION=1.0 + + curl -H "Accept:application/json" "$SERVER_URL/api/datasets/:persistentId/versions/$VERSION/linkset?persistentId=$PERSISTENT_IDENTIFIER" + +Get Dataset By Private URL Token +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: bash + + export SERVER_URL=https://demo.dataverse.org + export PRIVATE_URL_TOKEN=a56444bc-7697-4711-8964-e0577f055fd2 + + curl "$SERVER_URL/api/datasets/privateUrlDatasetVersion/$PRIVATE_URL_TOKEN" + +Get Citation +~~~~~~~~~~~~ + +.. code-block:: bash + + export SERVER_URL=https://demo.dataverse.org + export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/YD5QDG + export VERSION=1.0 + + curl -H "Accept:application/json" "$SERVER_URL/api/datasets/:persistentId/versions/$VERSION/{version}/citation?persistentId=$PERSISTENT_IDENTIFIER" + +Get Citation by Private URL Token +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: bash + + export SERVER_URL=https://demo.dataverse.org + export PRIVATE_URL_TOKEN=a56444bc-7697-4711-8964-e0577f055fd2 + + curl "$SERVER_URL/api/datasets/privateUrlDatasetVersion/$PRIVATE_URL_TOKEN/citation" + +.. _get-dataset-summary-field-names: + +Get Summary Field Names +~~~~~~~~~~~~~~~~~~~~~~~ + +See :ref:`:CustomDatasetSummaryFields` in the Installation Guide for how the list of dataset fields that summarize a dataset can be customized. Here's how to list them: + +.. code-block:: bash + + export SERVER_URL=https://demo.dataverse.org + + curl "$SERVER_URL/api/datasets/summaryFieldNames" Files ----- +Get JSON Representation of a File +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. note:: Files can be accessed using persistent identifiers. This is done by passing the constant ``:persistentId`` where the numeric id of the file is expected, and then passing the actual persistent id as a query parameter with the name ``persistentId``. + +Example: Getting the file whose DOI is *10.5072/FK2/J8SJZB*: + +.. code-block:: bash + + export SERVER_URL=https://demo.dataverse.org + export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/J8SJZB + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + + curl -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/files/:persistentId/?persistentId=$PERSISTENT_IDENTIFIER" + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/files/:persistentId/?persistentId=doi:10.5072/FK2/J8SJZB" + +You may get its draft version of an unpublished file if you pass an api token with view draft permissions: + +.. code-block:: bash + + export SERVER_URL=https://demo.dataverse.org + export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/J8SJZB + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + + curl -H "X-Dataverse-key:$API_TOKEN" "$SERVER/api/files/:persistentId/?persistentId=$PERSISTENT_IDENTIFIER" + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/files/:persistentId/?persistentId=doi:10.5072/FK2/J8SJZB" + + +|CORS| Show the file whose id is passed: + +.. code-block:: bash + + export SERVER_URL=https://demo.dataverse.org + export ID=408730 + + curl "$SERVER_URL/api/file/$ID" + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl "https://demo.dataverse.org/api/files/408730" + +You may get its draft version of an published file if you pass an api token with view draft permissions and use the draft path parameter: + +.. code-block:: bash + + export SERVER_URL=https://demo.dataverse.org + export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/J8SJZB + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + + curl -H "X-Dataverse-key:$API_TOKEN" "$SERVER/api/files/:persistentId/draft/?persistentId=$PERSISTENT_IDENTIFIER" + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/files/:persistentId/draft/?persistentId=doi:10.5072/FK2/J8SJZB" + +The file id can be extracted from the response retrieved from the API which uses the persistent identifier (``/api/datasets/:persistentId/?persistentId=$PERSISTENT_IDENTIFIER``). + Adding Files ~~~~~~~~~~~~ @@ -2080,13 +2351,13 @@ A curl example using an ``id`` export SERVER_URL=https://demo.dataverse.org export ID=24 - curl -H "X-Dataverse-key:$API_TOKEN" -X PUT -d true $SERVER_URL/api/files/$ID/restrict + curl -H "X-Dataverse-key:$API_TOKEN" -X PUT -d true "$SERVER_URL/api/files/$ID/restrict" The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X PUT -d true https://demo.dataverse.org/api/files/24/restrict + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X PUT -d true "https://demo.dataverse.org/api/files/24/restrict" A curl example using a ``pid`` @@ -2096,7 +2367,7 @@ A curl example using a ``pid`` export SERVER_URL=https://demo.dataverse.org export PERSISTENT_ID=doi:10.5072/FK2/AAA000 - curl -H "X-Dataverse-key:$API_TOKEN" -X PUT -d true $SERVER_URL/api/files/:persistentId/restrict?persistentId=$PERSISTENT_ID + curl -H "X-Dataverse-key:$API_TOKEN" -X PUT -d true "$SERVER_URL/api/files/:persistentId/restrict?persistentId=$PERSISTENT_ID" The fully expanded example above (without environment variables) looks like this: @@ -2117,13 +2388,13 @@ A curl example using an ``ID``: export SERVER_URL=https://demo.dataverse.org export ID=24 - curl -H "X-Dataverse-key:$API_TOKEN" -X POST $SERVER_URL/api/files/$ID/uningest + curl -H "X-Dataverse-key:$API_TOKEN" -X POST "$SERVER_URL/api/files/$ID/uningest" The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST https://demo.dataverse.org/api/files/24/uningest + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST "https://demo.dataverse.org/api/files/24/uningest" A curl example using a ``PERSISTENT_ID``: @@ -2156,13 +2427,13 @@ A curl example using an ``ID`` export SERVER_URL=https://demo.dataverse.org export ID=24 - curl -H "X-Dataverse-key:$API_TOKEN" -X POST $SERVER_URL/api/files/$ID/reingest + curl -H "X-Dataverse-key:$API_TOKEN" -X POST "$SERVER_URL/api/files/$ID/reingest" The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST https://demo.dataverse.org/api/files/24/reingest + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST "https://demo.dataverse.org/api/files/24/reingest" A curl example using a ``PERSISTENT_ID`` @@ -2172,7 +2443,7 @@ A curl example using a ``PERSISTENT_ID`` export SERVER_URL=https://demo.dataverse.org export PERSISTENT_ID=doi:10.5072/FK2/AAA000 - curl -H "X-Dataverse-key:$API_TOKEN" -X POST $SERVER_URL/api/files/:persistentId/reingest?persistentId=$PERSISTENT_ID + curl -H "X-Dataverse-key:$API_TOKEN" -X POST "$SERVER_URL/api/files/:persistentId/reingest?persistentId=$PERSISTENT_ID" The fully expanded example above (without environment variables) looks like this: @@ -2230,6 +2501,47 @@ Currently the following methods are used to detect file types: - The file extension (e.g. ".ipybn") is used, defined in a file called ``MimeTypeDetectionByFileExtension.properties``. - The file name (e.g. "Dockerfile") is used, defined in a file called ``MimeTypeDetectionByFileName.properties``. +.. _extractNcml: + +Extract NcML +~~~~~~~~~~~~ + +As explained in the :ref:`netcdf-and-hdf5` section of the User Guide, when those file types are uploaded, an attempt is made to extract an NcML file from them and store it as an auxiliary file. + +This happens automatically but superusers can also manually trigger this NcML extraction process with the API endpoint below. + +Note that "true" will be returned if an NcML file was created. "false" will be returned if there was an error or if the NcML file already exists (check server.log for details). + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export ID=24 + + curl -H "X-Dataverse-key:$API_TOKEN" -X POST "$SERVER_URL/api/files/$ID/extractNcml" + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST "https://demo.dataverse.org/api/files/24/extractNcml" + +A curl example using a PID: + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export PERSISTENT_ID=doi:10.5072/FK2/AAA000 + + curl -H "X-Dataverse-key:$API_TOKEN" -X POST "$SERVER_URL/api/files/:persistentId/extractNcml?persistentId=$PERSISTENT_ID" + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST "https://demo.dataverse.org/api/files/:persistentId/extractNcml?persistentId=doi:10.5072/FK2/AAA000" + Replacing Files ~~~~~~~~~~~~~~~ @@ -2245,7 +2557,7 @@ A curl example using an ``ID`` export SERVER_URL=https://demo.dataverse.org export ID=24 - curl -H "X-Dataverse-key:$API_TOKEN" -X POST -F 'file=@file.extension' -F 'jsonData={json}' $SERVER_URL/api/files/$ID/replace + curl -H "X-Dataverse-key:$API_TOKEN" -X POST -F 'file=@file.extension' -F 'jsonData={json}' "$SERVER_URL/api/files/$ID/replace" The fully expanded example above (without environment variables) looks like this: @@ -2253,7 +2565,7 @@ The fully expanded example above (without environment variables) looks like this curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST -F 'file=@data.tsv' \ -F 'jsonData={"description":"My description.","categories":["Data"],"forceReplace":false}' \ - https://demo.dataverse.org/api/files/24/replace + "https://demo.dataverse.org/api/files/24/replace" A curl example using a ``PERSISTENT_ID`` @@ -2274,10 +2586,16 @@ The fully expanded example above (without environment variables) looks like this -F 'jsonData={"description":"My description.","categories":["Data"],"forceReplace":false}' \ "https://demo.dataverse.org/api/files/:persistentId/replace?persistentId=doi:10.5072/FK2/AAA000" -Getting File Metadata -~~~~~~~~~~~~~~~~~~~~~ +Deleting Files +~~~~~~~~~~~~~~ -Provides a json representation of the file metadata for an existing file where ``ID`` is the database id of the file to get metadata from or ``PERSISTENT_ID`` is the persistent id (DOI or Handle) of the file. +Delete an existing file where ``ID`` is the database id of the file to delete or ``PERSISTENT_ID`` is the persistent id (DOI or Handle, if it exists) of the file. + +Note that the behavior of deleting files depends on if the dataset has ever been published or not. + +- If the dataset has never been published, the file will be deleted forever. +- If the dataset has published, the file is deleted from the draft (and future published versions). +- If the dataset has published, the deleted file can still be downloaded because it was part of a published version. A curl example using an ``ID`` @@ -2287,13 +2605,13 @@ A curl example using an ``ID`` export SERVER_URL=https://demo.dataverse.org export ID=24 - curl $SERVER_URL/api/files/$ID/metadata + curl -H "X-Dataverse-key:$API_TOKEN" -X DELETE "$SERVER_URL/api/files/$ID" The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl https://demo.dataverse.org/api/files/24/metadata + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X DELETE "https://demo.dataverse.org/api/files/24" A curl example using a ``PERSISTENT_ID`` @@ -2303,15 +2621,18 @@ A curl example using a ``PERSISTENT_ID`` export SERVER_URL=https://demo.dataverse.org export PERSISTENT_ID=doi:10.5072/FK2/AAA000 - curl "$SERVER_URL/api/files/:persistentId/metadata?persistentId=$PERSISTENT_ID" + curl -H "X-Dataverse-key:$API_TOKEN" -X DELETE "$SERVER_URL/api/files/:persistentId?persistentId=$PERSISTENT_ID" The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl "https://demo.dataverse.org/api/files/:persistentId/metadata?persistentId=doi:10.5072/FK2/AAA000" + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X DELETE "https://demo.dataverse.org/api/files/:persistentId?persistentId=doi:10.5072/FK2/AAA000" -The current draft can also be viewed if you have permissions and pass your API token +Getting File Metadata +~~~~~~~~~~~~~~~~~~~~~ + +Provides a json representation of the file metadata for an existing file where ``ID`` is the database id of the file to get metadata from or ``PERSISTENT_ID`` is the persistent id (DOI or Handle) of the file. A curl example using an ``ID`` @@ -2321,13 +2642,13 @@ A curl example using an ``ID`` export SERVER_URL=https://demo.dataverse.org export ID=24 - curl -H "X-Dataverse-key:$API_TOKEN" $SERVER_URL/api/files/$ID/metadata/draft + curl "$SERVER_URL/api/files/$ID/metadata" The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" https://demo.dataverse.org/api/files/24/metadata/draft + curl "https://demo.dataverse.org/api/files/24/metadata" A curl example using a ``PERSISTENT_ID`` @@ -2337,58 +2658,50 @@ A curl example using a ``PERSISTENT_ID`` export SERVER_URL=https://demo.dataverse.org export PERSISTENT_ID=doi:10.5072/FK2/AAA000 - curl -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/files/:persistentId/metadata/draft?persistentId=$PERSISTENT_ID" + curl "$SERVER_URL/api/files/:persistentId/metadata?persistentId=$PERSISTENT_ID" The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/files/:persistentId/metadata/draft?persistentId=doi:10.5072/FK2/AAA000" - -Note: The ``id`` returned in the json response is the id of the file metadata version. - + curl "https://demo.dataverse.org/api/files/:persistentId/metadata?persistentId=doi:10.5072/FK2/AAA000" +The current draft can also be viewed if you have permissions and pass your API token -Adding File Metadata -~~~~~~~~~~~~~~~~~~~~ +A curl example using an ``ID`` -This API call requires a ``jsonString`` expressing the metadata of multiple files. It adds file metadata to the database table where the file has already been copied to the storage. +.. code-block:: bash -The jsonData object includes values for: + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export ID=24 -* "description" - A description of the file -* "directoryLabel" - The "File Path" of the file, indicating which folder the file should be uploaded to within the dataset -* "storageIdentifier" - String -* "fileName" - String -* "mimeType" - String -* "fixity/checksum" either: + curl -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/files/$ID/metadata/draft" - * "md5Hash" - String with MD5 hash value, or - * "checksum" - Json Object with "@type" field specifying the algorithm used and "@value" field with the value from that algorithm, both Strings +The fully expanded example above (without environment variables) looks like this: -.. note:: See :ref:`curl-examples-and-environment-variables` if you are unfamiliar with the use of ``export`` below. +.. code-block:: bash -A curl example using an ``PERSISTENT_ID`` + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/files/24/metadata/draft" -* ``SERVER_URL`` - e.g. https://demo.dataverse.org -* ``API_TOKEN`` - API endpoints require an API token that can be passed as the X-Dataverse-key HTTP header. For more details, see the :doc:`auth` section. -* ``PERSISTENT_IDENTIFIER`` - Example: ``doi:10.5072/FK2/7U7YBV`` +A curl example using a ``PERSISTENT_ID`` .. code-block:: bash export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx export SERVER_URL=https://demo.dataverse.org - export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/7U7YBV - export JSON_DATA="[{'description':'My description.','directoryLabel':'data/subdir1','categories':['Data'], 'restrict':'false', 'storageIdentifier':'s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42', 'fileName':'file1.txt', 'mimeType':'text/plain', 'checksum': {'@type': 'SHA-1', '@value': '123456'}}, \ - {'description':'My description.','directoryLabel':'data/subdir1','categories':['Data'], 'restrict':'false', 'storageIdentifier':'s3://demo-dataverse-bucket:176e28068b0-1c3f80357d53', 'fileName':'file2.txt', 'mimeType':'text/plain', 'checksum': {'@type': 'SHA-1', '@value': '123789'}}]" + export PERSISTENT_ID=doi:10.5072/FK2/AAA000 - curl -X POST -H "X-Dataverse-key: $API_TOKEN" "$SERVER_URL/api/datasets/:persistentId/addFiles?persistentId=$PERSISTENT_IDENTIFIER" -F "jsonData=$JSON_DATA" + curl -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/files/:persistentId/metadata/draft?persistentId=$PERSISTENT_ID" The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST https://demo.dataverse.org/api/datasets/:persistentId/addFiles?persistentId=doi:10.5072/FK2/7U7YBV -F jsonData='[{"description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "storageIdentifier":"s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42", "fileName":"file1.txt", "mimeType":"text/plain", "checksum": {"@type": "SHA-1", "@value": "123456"}}, {"description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "storageIdentifier":"s3://demo-dataverse-bucket:176e28068b0-1c3f80357d53", "fileName":"file2.txt", "mimeType":"text/plain", "checksum": {"@type": "SHA-1", "@value": "123789"}}]' + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/files/:persistentId/metadata/draft?persistentId=doi:10.5072/FK2/AAA000" + +Note: The ``id`` returned in the json response is the id of the file metadata version. + Updating File Metadata ~~~~~~~~~~~~~~~~~~~~~~ @@ -2405,7 +2718,7 @@ A curl example using an ``ID`` curl -H "X-Dataverse-key:$API_TOKEN" -X POST \ -F 'jsonData={"description":"My description bbb.","provFreeform":"Test prov freeform","categories":["Data"],"restrict":false}' \ - $SERVER_URL/api/files/$ID/metadata + "$SERVER_URL/api/files/$ID/metadata" The fully expanded example above (without environment variables) looks like this: @@ -2413,7 +2726,7 @@ The fully expanded example above (without environment variables) looks like this curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST \ -F 'jsonData={"description":"My description bbb.","provFreeform":"Test prov freeform","categories":["Data"],"restrict":false}' \ - http://demo.dataverse.org/api/files/24/metadata + "http://demo.dataverse.org/api/files/24/metadata" A curl example using a ``PERSISTENT_ID`` @@ -2453,13 +2766,13 @@ A curl example using an ``ID`` export ID=24 export FILE=dct.xml - curl -H "X-Dataverse-key:$API_TOKEN" -X PUT $SERVER_URL/api/edit/$ID --upload-file $FILE + curl -H "X-Dataverse-key:$API_TOKEN" -X PUT "$SERVER_URL/api/edit/$ID" --upload-file $FILE The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X PUT https://demo.dataverse.org/api/edit/24 --upload-file dct.xml + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X PUT "https://demo.dataverse.org/api/edit/24" --upload-file dct.xml You can download :download:`dct.xml <../../../../src/test/resources/xml/dct.xml>` from the example above to see what the XML looks like. @@ -2477,13 +2790,13 @@ A curl example using an ``ID`` export SERVER_URL=https://demo.dataverse.org export ID=24 - curl -H "X-Dataverse-key:$API_TOKEN" $SERVER_URL/api/files/$ID/prov-json + curl -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/files/$ID/prov-json" The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" https://demo.dataverse.org/api/files/24/prov-json + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/files/24/prov-json" A curl example using a ``PERSISTENT_ID`` @@ -2512,13 +2825,13 @@ A curl example using an ``ID`` export SERVER_URL=https://demo.dataverse.org export ID=24 - curl -H "X-Dataverse-key:$API_TOKEN" $SERVER_URL/api/files/$ID/prov-freeform + curl -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/files/$ID/prov-freeform" The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" https://demo.dataverse.org/api/files/24/prov-freeform + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/files/24/prov-freeform" A curl example using a ``PERSISTENT_ID`` @@ -2549,7 +2862,7 @@ A curl example using an ``ID`` export ENTITY_NAME="..." export FILE_PATH=provenance.json - curl -H "X-Dataverse-key:$API_TOKEN" -X POST $SERVER_URL/api/files/$ID/prov-json?entityName=$ENTITY_NAME -H "Content-type:application/json" --upload-file $FILE_PATH + curl -H "X-Dataverse-key:$API_TOKEN" -X POST "$SERVER_URL/api/files/$ID/prov-json?entityName=$ENTITY_NAME" -H "Content-type:application/json" --upload-file $FILE_PATH The fully expanded example above (without environment variables) looks like this: @@ -2589,13 +2902,13 @@ A curl example using an ``ID`` export ID=24 export FILE_PATH=provenance.json - curl -H "X-Dataverse-key:$API_TOKEN" -X POST $SERVER_URL/api/files/$ID/prov-freeform -H "Content-type:application/json" --upload-file $FILE_PATH + curl -H "X-Dataverse-key:$API_TOKEN" -X POST "$SERVER_URL/api/files/$ID/prov-freeform" -H "Content-type:application/json" --upload-file $FILE_PATH The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST https://demo.dataverse.org/api/files/24/prov-freeform -H "Content-type:application/json" --upload-file provenance.json + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST "https://demo.dataverse.org/api/files/24/prov-freeform" -H "Content-type:application/json" --upload-file provenance.json A curl example using a ``PERSISTENT_ID`` @@ -2614,7 +2927,7 @@ The fully expanded example above (without environment variables) looks like this curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST "https://demo.dataverse.org/api/files/:persistentId/prov-freeform?persistentId=doi:10.5072/FK2/AAA000" -H "Content-type:application/json" --upload-file provenance.json -See a sample JSON file :download:`file-provenance.json <../_static/api/file-provenance.json>` from http://openprovenance.org (c.f. Huynh, Trung Dong and Moreau, Luc (2014) ProvStore: a public provenance repository. At 5th International Provenance and Annotation Workshop (IPAW'14), Cologne, Germany, 09-13 Jun 2014. pp. 275-277). +See a sample JSON file :download:`file-provenance.json <../_static/api/file-provenance.json>` from https://openprovenance.org (c.f. Huynh, Trung Dong and Moreau, Luc (2014) ProvStore: a public provenance repository. At 5th International Provenance and Annotation Workshop (IPAW'14), Cologne, Germany, 09-13 Jun 2014. pp. 275-277). Delete Provenance JSON for an uploaded file ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -2627,13 +2940,13 @@ A curl example using an ``ID`` export SERVER_URL=https://demo.dataverse.org export ID=24 - curl -H "X-Dataverse-key:$API_TOKEN" -X DELETE $SERVER_URL/api/files/$ID/prov-json + curl -H "X-Dataverse-key:$API_TOKEN" -X DELETE "$SERVER_URL/api/files/$ID/prov-json" The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X DELETE https://demo.dataverse.org/api/files/24/prov-json + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X DELETE "https://demo.dataverse.org/api/files/24/prov-json" A curl example using a ``PERSISTENT_ID`` @@ -2662,7 +2975,7 @@ Starting with the release 4.10 the size of the saved original file (for an inges export SERVER_URL=https://localhost - curl $SERVER_URL/api/admin/datafiles/integrity/fixmissingoriginalsizes + curl "$SERVER_URL/api/admin/datafiles/integrity/fixmissingoriginalsizes" with limit parameter: @@ -2677,18 +2990,52 @@ The fully expanded example above (without environment variables) looks like this .. code-block:: bash - curl https://localhost/api/admin/datafiles/integrity/fixmissingoriginalsizes" + curl "https://localhost/api/admin/datafiles/integrity/fixmissingoriginalsizes" with limit parameter: .. code-block:: bash - curl https://localhost/api/admin/datafiles/integrity/fixmissingoriginalsizes?limit=10" + curl "https://localhost/api/admin/datafiles/integrity/fixmissingoriginalsizes?limit=10" Note the optional "limit" parameter. Without it, the API will attempt to populate the sizes for all the saved originals that don't have them in the database yet. Otherwise it will do so for the first N such datafiles. By default, the admin API calls are blocked and can only be called from localhost. See more details in :ref:`:BlockedApiEndpoints <:BlockedApiEndpoints>` and :ref:`:BlockedApiPolicy <:BlockedApiPolicy>` settings in :doc:`/installation/config`. +Get External Tool Parameters +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This API call is intended as a callback that can be used by :doc:`/installation/external-tools` to retrieve signed Urls necessary for their interaction with Dataverse. +It can be called directly as well. (Note that the required FILEMETADATA_ID is the "id" returned in the JSON response from the /api/files/$FILE_ID/metadata call.) + +The response is a JSON object described in the :doc:`/api/external-tools` section of the API guide. + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export FILE_ID=3 + export FILEMETADATA_ID=1 + export TOOL_ID=1 + + curl -H "X-Dataverse-key: $API_TOKEN" -H "Accept:application/json" "$SERVER_URL/api/files/$FILE_ID/metadata/$FILEMETADATA_ID/toolparams/$TOOL_ID" + +.. _get-fixity-algorithm: + +Get Fixity Algorithm +~~~~~~~~~~~~~~~~~~~~~~ + +This API call can be used to discover the configured fixity/checksum algorithm being used by a Dataverse installation (as configured by - :ref:`:FileFixityChecksumAlgorithm`). +Currently, the possible values are MD5, SHA-1, SHA-256, and SHA-512. +This algorithm will be used when the Dataverse software manages a file upload and should be used by external clients uploading files to a Dataverse instance. (Existing files may or may not have checksums with this algorithm.) + +.. code-block:: bash + + export SERVER_URL=https://demo.dataverse.org + + curl "$SERVER_URL/api/files/fixityAlgorithm" + + Users Token Management ---------------------- @@ -2699,21 +3046,21 @@ Find a Token's Expiration Date In order to obtain the expiration date of a token use:: - curl -H X-Dataverse-key:$API_TOKEN -X GET $SERVER_URL/api/users/token + curl -H "X-Dataverse-key:$API_TOKEN" -X GET "$SERVER_URL/api/users/token" Recreate a Token ~~~~~~~~~~~~~~~~ In order to obtain a new token use:: - curl -H X-Dataverse-key:$API_TOKEN -X POST $SERVER_URL/api/users/token/recreate + curl -H "X-Dataverse-key:$API_TOKEN" -X POST "$SERVER_URL/api/users/token/recreate" Delete a Token ~~~~~~~~~~~~~~ In order to delete a token use:: - curl -H X-Dataverse-key:$API_TOKEN -X DELETE $SERVER_URL/api/users/token + curl -H "X-Dataverse-key:$API_TOKEN" -X DELETE "$SERVER_URL/api/users/token" @@ -2740,26 +3087,14 @@ Optionally, you may use a third query parameter "sendEmailNotification=false" to Roles ----- -Create a New Role in a Dataverse Collection -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +A role is a set of permissions. -Creates a new role under Dataverse collection ``id``. Needs a json file with the role description: - -.. code-block:: bash +.. _json-representation-of-a-role: - export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx - export SERVER_URL=https://demo.dataverse.org - export ID=root - - curl -H X-Dataverse-key:$API_TOKEN -X POST -H "Content-type:application/json" $SERVER_URL/api/dataverses/$ID/roles --upload-file roles.json - -The fully expanded example above (without environment variables) looks like this: - -.. code-block:: bash - - curl -H X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx -X POST -H "Content-type:application/json" https://demo.dataverse.org/api/dataverses/root/roles --upload-file roles.json +JSON Representation of a Role +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Where ``roles.json`` looks like this:: +The JSON representation of a role (``roles.json``) looks like this:: { "alias": "sys1", @@ -2770,8 +3105,12 @@ Where ``roles.json`` looks like this:: ] } -.. note:: Only a Dataverse installation account with superuser permissions is allowed to create roles in a Dataverse Collection. +.. note:: alias is constrained to a length of 16 characters +Create Role +~~~~~~~~~~~ + +Roles can be created globally (:ref:`create-global-role`) or for individual Dataverse collections (:ref:`create-role-in-collection`). Show Role ~~~~~~~~~ @@ -2791,13 +3130,13 @@ A curl example using an ``ID`` export SERVER_URL=https://demo.dataverse.org export ID=24 - curl -H "X-Dataverse-key:$API_TOKEN" -X DELETE $SERVER_URL/api/roles/$ID + curl -H "X-Dataverse-key:$API_TOKEN" -X DELETE "$SERVER_URL/api/roles/$ID" The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X DELETE https://demo.dataverse.org/api/roles/24 + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X DELETE "https://demo.dataverse.org/api/roles/24" A curl example using a Role alias ``ALIAS`` @@ -2813,7 +3152,7 @@ The fully expanded example above (without environment variables) looks like this .. code-block:: bash - curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X DELETE https://demo.dataverse.org/api/roles/:alias?alias=roleAlias + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X DELETE "https://demo.dataverse.org/api/roles/:alias?alias=roleAlias" Explicit Groups @@ -2906,13 +3245,13 @@ Show Dataverse Software Version and Build Number export SERVER_URL=https://demo.dataverse.org - curl $SERVER_URL/api/info/version + curl "$SERVER_URL/api/info/version" The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl https://demo.dataverse.org/api/info/version + curl "https://demo.dataverse.org/api/info/version" Show Dataverse Installation Server Name ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -2925,13 +3264,13 @@ Get the server name. This is useful when a Dataverse installation is composed of export SERVER_URL=https://demo.dataverse.org - curl $SERVER_URL/api/info/server + curl "$SERVER_URL/api/info/server" The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl https://demo.dataverse.org/api/info/server + curl "https://demo.dataverse.org/api/info/server" Show Custom Popup Text for Publishing Datasets ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -2944,13 +3283,13 @@ For now, only the value for the :ref:`:DatasetPublishPopupCustomText` setting fr export SERVER_URL=https://demo.dataverse.org - curl $SERVER_URL/api/info/settings/:DatasetPublishPopupCustomText + curl "$SERVER_URL/api/info/settings/:DatasetPublishPopupCustomText" The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl https://demo.dataverse.org/api/info/settings/:DatasetPublishPopupCustomText + curl "https://demo.dataverse.org/api/info/settings/:DatasetPublishPopupCustomText" Get API Terms of Use URL ~~~~~~~~~~~~~~~~~~~~~~~~ @@ -2963,30 +3302,76 @@ Get API Terms of Use. The response contains the text value inserted as API Terms export SERVER_URL=https://demo.dataverse.org - curl $SERVER_URL/api/info/apiTermsOfUse + curl "$SERVER_URL/api/info/apiTermsOfUse" The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl https://demo.dataverse.org/api/info/apiTermsOfUse + curl "https://demo.dataverse.org/api/info/apiTermsOfUse" + +.. _info-incomplete-metadata: + +Show Support Of Incomplete Metadata Deposition +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Learn if an instance has been configured to allow deposition of incomplete datasets via the API. +See also :ref:`create-dataset-command` and :ref:`dataverse.api.allow-incomplete-metadata` + +.. code-block:: bash + + export SERVER_URL=https://demo.dataverse.org + + curl "$SERVER_URL/api/info/settings/incompleteMetadataViaApi" + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl "https://demo.dataverse.org/api/info/settings/incompleteMetadataViaApi" + + +.. _metadata-blocks-api: Metadata Blocks --------------- +See also :ref:`exploring-metadata-blocks`. + Show Info About All Metadata Blocks ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -|CORS| Lists brief info about all metadata blocks registered in the system:: +|CORS| Lists brief info about all metadata blocks registered in the system. + +.. code-block:: bash + + export SERVER_URL=https://demo.dataverse.org + + curl "$SERVER_URL/api/metadatablocks" + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash - GET http://$SERVER/api/metadatablocks + curl "https://demo.dataverse.org/api/metadatablocks" Show Info About Single Metadata Block ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -|CORS| Return data about the block whose ``identifier`` is passed. ``identifier`` can either be the block's id, or its name:: +|CORS| Return data about the block whose ``identifier`` is passed, including allowed controlled vocabulary values. ``identifier`` can either be the block's database id, or its name (i.e. "citation"). + +.. code-block:: bash + + export SERVER_URL=https://demo.dataverse.org + export IDENTIFIER=citation + + curl "$SERVER_URL/api/metadatablocks/$IDENTIFIER" + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash - GET http://$SERVER/api/metadatablocks/$identifier + curl "https://demo.dataverse.org/api/metadatablocks/citation" .. _Notifications: @@ -3002,7 +3387,7 @@ Each user can get a dump of their notifications by passing in their API token: .. code-block:: bash - curl -H "X-Dataverse-key:$API_TOKEN" $SERVER_URL/api/notifications/all + curl -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/notifications/all" Delete Notification by User ~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -3013,7 +3398,7 @@ Each user can delete notifications by passing in their API token and specifying export NOTIFICATION_ID=555 - curl -H X-Dataverse-key:$API_TOKEN -X DELETE "$SERVER_URL/api/notifications/$NOTIFICATION_ID" + curl -H "X-Dataverse-key:$API_TOKEN" -X DELETE "$SERVER_URL/api/notifications/$NOTIFICATION_ID" Get All Muted In-app Notifications by User ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -3022,7 +3407,7 @@ Each user can get a list of their muted in-app notification types by passing in .. code-block:: bash - curl -H X-Dataverse-key:$API_TOKEN -X GET "$SERVER_URL/api/notifications/mutedNotifications" + curl -H "X-Dataverse-key:$API_TOKEN" -X GET "$SERVER_URL/api/notifications/mutedNotifications" Mute In-app Notification by User ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -3033,7 +3418,7 @@ Each user can mute in-app notifications by passing in their API token and specif export NOTIFICATION_TYPE=ASSIGNROLE - curl -H X-Dataverse-key:$API_TOKEN -X PUT "$SERVER_URL/api/notifications/mutedNotifications/$NOTIFICATION_TYPE" + curl -H "X-Dataverse-key:$API_TOKEN" -X PUT "$SERVER_URL/api/notifications/mutedNotifications/$NOTIFICATION_TYPE" Unmute In-app Notification by User ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -3044,7 +3429,7 @@ Each user can unmute in-app notifications by passing in their API token and spec export NOTIFICATION_TYPE=ASSIGNROLE - curl -H X-Dataverse-key:$API_TOKEN -X DELETE "$SERVER_URL/api/notifications/mutedNotifications/$NOTIFICATION_TYPE" + curl -H "X-Dataverse-key:$API_TOKEN" -X DELETE "$SERVER_URL/api/notifications/mutedNotifications/$NOTIFICATION_TYPE" Get All Muted Email Notifications by User ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -3053,7 +3438,7 @@ Each user can get a list of their muted email notification types by passing in t .. code-block:: bash - curl -H X-Dataverse-key:$API_TOKEN -X GET "$SERVER_URL/api/notifications/mutedEmails" + curl -H "X-Dataverse-key:$API_TOKEN" -X GET "$SERVER_URL/api/notifications/mutedEmails" Mute Email Notification by User ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -3064,7 +3449,7 @@ Each user can mute email notifications by passing in their API token and specify export NOTIFICATION_TYPE=ASSIGNROLE - curl -H X-Dataverse-key:$API_TOKEN -X PUT "$SERVER_URL/api/notifications/mutedEmails/$NOTIFICATION_TYPE" + curl -H "X-Dataverse-key:$API_TOKEN" -X PUT "$SERVER_URL/api/notifications/mutedEmails/$NOTIFICATION_TYPE" Unmute Email Notification by User ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -3075,7 +3460,7 @@ Each user can unmute email notifications by passing in their API token and speci export NOTIFICATION_TYPE=ASSIGNROLE - curl -H X-Dataverse-key:$API_TOKEN -X DELETE "$SERVER_URL/api/notifications/mutedEmails/$NOTIFICATION_TYPE" + curl -H "X-Dataverse-key:$API_TOKEN" -X DELETE "$SERVER_URL/api/notifications/mutedEmails/$NOTIFICATION_TYPE" .. _User Information: @@ -3087,9 +3472,9 @@ Get User Information in JSON Format Each user can get a dump of their basic information in JSON format by passing in their API token:: - curl -H "X-Dataverse-key:$API_TOKEN" $SERVER_URL/api/users/:me + curl -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/users/:me" + -.. _pids-api: Managing Harvesting Server and Sets ----------------------------------- @@ -3134,7 +3519,7 @@ An example JSON file would look like this:: export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx export SERVER_URL=https://demo.dataverse.org - curl -H X-Dataverse-key:$API_TOKEN -X POST "$SERVER_URL/api/harvest/server/oaisets/add" --upload-file harvestset-finch.json + curl -H "X-Dataverse-key:$API_TOKEN" -X POST "$SERVER_URL/api/harvest/server/oaisets/add" --upload-file harvestset-finch.json The fully expanded example above (without the environment variables) looks like this: @@ -3169,7 +3554,7 @@ An example JSON file would look like this:: export SERVER_URL=https://demo.dataverse.org export SPECNAME=ffAuthor - curl -H X-Dataverse-key:$API_TOKEN -X PUT "$SERVER_URL/api/harvest/server/oaisets/$SPECNAME" --upload-file modify-harvestset-finch.json + curl -H "X-Dataverse-key:$API_TOKEN" -X PUT "$SERVER_URL/api/harvest/server/oaisets/$SPECNAME" --upload-file modify-harvestset-finch.json The fully expanded example above (without the environment variables) looks like this: @@ -3190,7 +3575,7 @@ To delete a harvesting set, use the set's database name. For example, to delete export SERVER_URL=https://demo.dataverse.org export SPECNAME=ffAuthor - curl -H X-Dataverse-key:$API_TOKEN -X DELETE "$SERVER_URL/api/harvest/server/oaisets/$SPECNAME" + curl -H "X-Dataverse-key:$API_TOKEN" -X DELETE "$SERVER_URL/api/harvest/server/oaisets/$SPECNAME" The fully expanded example above (without the environment variables) looks like this: @@ -3200,6 +3585,158 @@ The fully expanded example above (without the environment variables) looks like Only users with superuser permissions may delete harvesting sets. + +.. _managing-harvesting-clients-api: + +Managing Harvesting Clients +--------------------------- + +The following API can be used to create and manage "Harvesting Clients". A Harvesting Client is a configuration entry that allows your Dataverse installation to harvest and index metadata from a specific remote location, either regularly, on a configured schedule, or on a one-off basis. For more information, see the :doc:`/admin/harvestclients` section of the Admin Guide. + +List All Configured Harvesting Clients +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Shows all the Harvesting Clients configured:: + + GET http://$SERVER/api/harvest/clients/ + +Show a Specific Harvesting Client +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Shows a Harvesting Client with a defined nickname:: + + GET http://$SERVER/api/harvest/clients/$nickname + +.. code-block:: bash + + curl "http://localhost:8080/api/harvest/clients/myclient" + + { + "status":"OK", + { + "data": { + "lastDatasetsFailed": "22", + "lastDatasetsDeleted": "0", + "metadataFormat": "oai_dc", + "archiveDescription": "This Dataset is harvested from our partners. Clicking the link will take you directly to the archival source of the data.", + "archiveUrl": "https://dataverse.foo.edu", + "harvestUrl": "https://dataverse.foo.edu/oai", + "style": "dataverse", + "type": "oai", + "dataverseAlias": "fooData", + "nickName": "myClient", + "set": "fooSet", + "schedule": "none", + "status": "inActive", + "lastHarvest": "Thu Oct 13 14:48:57 EDT 2022", + "lastResult": "SUCCESS", + "lastSuccessful": "Thu Oct 13 14:48:57 EDT 2022", + "lastNonEmpty": "Thu Oct 13 14:48:57 EDT 2022", + "lastDatasetsHarvested": "137" + } + } + + +Create a Harvesting Client +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To create a new harvesting client:: + + POST http://$SERVER/api/harvest/clients/$nickname + +``nickName`` is the name identifying the new client. It should be alpha-numeric and may also contain -, _, or %, but no spaces. Must also be unique in the installation. + +You must supply a JSON file that describes the configuration, similarly to the output of the GET API above. The following fields are mandatory: + +- dataverseAlias: The alias of an existing collection where harvested datasets will be deposited +- harvestUrl: The URL of the remote OAI archive +- archiveUrl: The URL of the remote archive that will be used in the redirect links pointing back to the archival locations of the harvested records. It may or may not be on the same server as the harvestUrl above. If this OAI archive is another Dataverse installation, it will be the same URL as harvestUrl minus the "/oai". For example: https://demo.dataverse.org/ vs. https://demo.dataverse.org/oai +- metadataFormat: A supported metadata format. As of writing this the supported formats are "oai_dc", "oai_ddi" and "dataverse_json". + +The following optional fields are supported: + +- archiveDescription: What the name suggests. If not supplied, will default to "This Dataset is harvested from our partners. Clicking the link will take you directly to the archival source of the data." +- set: The OAI set on the remote server. If not supplied, will default to none, i.e., "harvest everything". +- style: Defaults to "default" - a generic OAI archive. (Make sure to use "dataverse" when configuring harvesting from another Dataverse installation). +- customHeaders: This can be used to configure this client with a specific HTTP header that will be added to every OAI request. This is to accommodate a use case where the remote server requires this header to supply some form of a token in order to offer some content not available to other clients. See the example below. Multiple headers can be supplied separated by `\\n` - actual "backslash" and "n" characters, not a single "new line" character. + +Generally, the API will accept the output of the GET version of the API for an existing client as valid input, but some fields will be ignored. For example, as of writing this there is no way to configure a harvesting schedule via this API. + +An example JSON file would look like this:: + + { + "nickName": "zenodo", + "dataverseAlias": "zenodoHarvested", + "harvestUrl": "https://zenodo.org/oai2d", + "archiveUrl": "https://zenodo.org", + "archiveDescription": "Moissonné depuis la collection LMOPS de l'entrepôt Zenodo. En cliquant sur ce jeu de données, vous serez redirigé vers Zenodo.", + "metadataFormat": "oai_dc", + "customHeaders": "x-oai-api-key: xxxyyyzzz", + "set": "user-lmops" + } + +Something important to keep in mind about this API is that, unlike the harvesting clients GUI, it will create a client with the values supplied without making any attempts to validate them in real time. In other words, for the `harvestUrl` it will accept anything that looks like a well-formed url, without making any OAI calls to verify that the name of the set and/or the metadata format entered are supported by it. This is by design, to give an admin an option to still be able to create a client, in a rare case when it cannot be done via the GUI because of some real time failures in an exchange with an otherwise valid OAI server. This however puts the responsibility on the admin to supply the values already confirmed to be valid. + + +.. note:: See :ref:`curl-examples-and-environment-variables` if you are unfamiliar with the use of export below. + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=http://localhost:8080 + + curl -H "X-Dataverse-key:$API_TOKEN" -X POST -H "Content-Type: application/json" "$SERVER_URL/api/harvest/clients/zenodo" --upload-file client.json + +The fully expanded example above (without the environment variables) looks like this: + +.. code-block:: bash + + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST -H "Content-Type: application/json" "http://localhost:8080/api/harvest/clients/zenodo" --upload-file "client.json" + + { + "status": "OK", + "data": { + "metadataFormat": "oai_dc", + "archiveDescription": "Moissonné depuis la collection LMOPS de l'entrepôt Zenodo. En cliquant sur ce jeu de données, vous serez redirigé vers Zenodo.", + "archiveUrl": "https://zenodo.org", + "harvestUrl": "https://zenodo.org/oai2d", + "style": "default", + "type": "oai", + "dataverseAlias": "zenodoHarvested", + "nickName": "zenodo", + "set": "user-lmops", + "schedule": "none", + "status": "inActive", + "lastHarvest": "N/A", + "lastSuccessful": "N/A", + "lastNonEmpty": "N/A", + "lastDatasetsHarvested": "N/A", + "lastDatasetsDeleted": "N/A" + } + } + +Only users with superuser permissions may create or configure harvesting clients. + +Modify a Harvesting Client +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Similar to the API above, using the same JSON format, but run on an existing client and using the PUT method instead of POST. + +Delete a Harvesting Client +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Self-explanatory: + +.. code-block:: bash + + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X DELETE "http://localhost:8080/api/harvest/clients/$nickName" + +Only users with superuser permissions may delete harvesting clients. + + + +.. _pids-api: + PIDs ---- @@ -3218,13 +3755,13 @@ Get information on a PID, especially its "state" such as "draft" or "findable". export SERVER_URL=https://demo.dataverse.org export PID=doi:10.70122/FK2/9BXT5O - curl -H "X-Dataverse-key:$API_TOKEN" $SERVER_URL/api/pids?persistentId=$PID + curl -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/pids?persistentId=$PID" The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx https://demo.dataverse.org/api/pids?persistentId=doi:10.70122/FK2/9BXT5O + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/pids?persistentId=doi:10.70122/FK2/9BXT5O" List Unreserved PIDs ~~~~~~~~~~~~~~~~~~~~ @@ -3238,14 +3775,14 @@ Get a list of PIDs that have not been reserved on the PID provider side. This ca export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx export SERVER_URL=https://demo.dataverse.org - curl -H "X-Dataverse-key:$API_TOKEN" $SERVER_URL/api/pids/unreserved + curl -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/pids/unreserved" The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx https://demo.dataverse.org/api/pids/unreserved + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/pids/unreserved" Reserve a PID ~~~~~~~~~~~~~ @@ -3260,13 +3797,13 @@ Reserved a PID for a dataset. A superuser API token is required. export SERVER_URL=https://demo.dataverse.org export PID=doi:10.70122/FK2/9BXT5O - curl -H "X-Dataverse-key:$API_TOKEN" -X POST $SERVER_URL/api/pids/:persistentId/reserve?persistentId=$PID + curl -H "X-Dataverse-key:$API_TOKEN" -X POST "$SERVER_URL/api/pids/:persistentId/reserve?persistentId=$PID" The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx -X POST https://demo.dataverse.org/api/pids/:persistentId/reserve?persistentId=doi:10.70122/FK2/9BXT5O + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST "https://demo.dataverse.org/api/pids/:persistentId/reserve?persistentId=doi:10.70122/FK2/9BXT5O" Delete a PID ~~~~~~~~~~~~ @@ -3281,13 +3818,13 @@ Delete PID (this is only possible for PIDs that are in the "draft" state) and wi export SERVER_URL=https://demo.dataverse.org export PID=doi:10.70122/FK2/9BXT5O - curl -H "X-Dataverse-key:$API_TOKEN" -X DELETE $SERVER_URL/api/pids/:persistentId/delete?persistentId=$PID + curl -H "X-Dataverse-key:$API_TOKEN" -X DELETE "$SERVER_URL/api/pids/:persistentId/delete?persistentId=$PID" The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx -X DELETE https://demo.dataverse.org/api/pids/:persistentId/delete?persistentId=doi:10.70122/FK2/9BXT5O + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X DELETE "https://demo.dataverse.org/api/pids/:persistentId/delete?persistentId=doi:10.70122/FK2/9BXT5O" .. _admin: @@ -3337,7 +3874,7 @@ Note that HTML can be included in banner messages. Add a Banner Message:: - curl -H "Content-type:application/json" -X POST http://$SERVER/api/admin/bannerMessage --upload-file messages.json + curl -H "Content-type:application/json" -X POST "http://$SERVER/api/admin/bannerMessage" --upload-file messages.json Where ``messages.json`` looks like this:: @@ -3357,15 +3894,15 @@ Where ``messages.json`` looks like this:: Get a list of active Banner Messages:: - curl -X GET http://$SERVER/api/admin/bannerMessage + curl -X GET "http://$SERVER/api/admin/bannerMessage" Delete a Banner Message by its id:: - curl -X DELETE http://$SERVER/api/admin/bannerMessage/$id + curl -X DELETE "http://$SERVER/api/admin/bannerMessage/$id" Deactivate a Banner Message by its id (allows you to hide a message while retaining information about which users have dismissed the banner):: - curl -X PUT http://$SERVER/api/admin/bannerMessage/$id/deactivate + curl -X PUT "http://$SERVER/api/admin/bannerMessage/$id/deactivate" List Authentication Provider Factories ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -3418,7 +3955,7 @@ Check whether an authentication proider is enabled:: The body of the request should be either ``true`` or ``false``. Content type has to be ``application/json``, like so:: - curl -H "Content-type: application/json" -X POST -d"false" http://localhost:8080/api/admin/authenticationProviders/echo-dignified/:enabled + curl -H "Content-type: application/json" -X POST -d"false" "http://localhost:8080/api/admin/authenticationProviders/echo-dignified/:enabled" Delete an Authentication Provider ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -3434,13 +3971,25 @@ List all global roles in the system. :: GET http://$SERVER/api/admin/roles +.. _create-global-role: + Create Global Role ~~~~~~~~~~~~~~~~~~ Creates a global role in the Dataverse installation. The data POSTed are assumed to be a role JSON. :: POST http://$SERVER/api/admin/roles - + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export ID=root + + curl -H "X-Dataverse-key:$API_TOKEN" -X POST "$SERVER_URL/api/admin/roles" --upload-file roles.json + +``roles.json`` see :ref:`json-representation-of-a-role` + Delete Global Role ~~~~~~~~~~~~~~~~~~ @@ -3452,13 +4001,13 @@ A curl example using an ``ID`` export SERVER_URL=https://demo.dataverse.org export ID=24 - curl -H "X-Dataverse-key:$API_TOKEN" -X DELETE $SERVER_URL/api/admin/roles/$ID + curl -H "X-Dataverse-key:$API_TOKEN" -X DELETE "$SERVER_URL/api/admin/roles/$ID" The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X DELETE https://demo.dataverse.org/api/admin/roles/24 + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X DELETE "https://demo.dataverse.org/api/admin/roles/24" A curl example using a Role alias ``ALIAS`` @@ -3474,7 +4023,7 @@ The fully expanded example above (without environment variables) looks like this .. code-block:: bash - curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X DELETE https://demo.dataverse.org/api/admin/roles/:alias?alias=roleAlias + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X DELETE "https://demo.dataverse.org/api/admin/roles/:alias?alias=roleAlias" List Users ~~~~~~~~~~ @@ -3492,7 +4041,7 @@ List users with the options to search and "page" through results. Only accessibl export SERVER_URL=https://demo.dataverse.org export ID=24 - curl -H "X-Dataverse-key:$API_TOKEN" $SERVER_URL/api/admin/list-users + curl -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/admin/list-users" # sort by createdtime (the creation time of the account) curl -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/admin/list-users?sortKey=createdtime" @@ -3501,7 +4050,7 @@ The fully expanded example above (without environment variables) looks like this .. code-block:: bash - curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" https://demo.dataverse.org/api/admin/list-users + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/admin/list-users" # sort by createdtime (the creation time of the account) curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/admin/list-users?sortKey=createdtime" @@ -3652,7 +4201,7 @@ If a user has created multiple accounts and has been performed actions under bot POST https://$SERVER/api/users/$toMergeIdentifier/mergeIntoUser/$continuingIdentifier -Example: ``curl -H "X-Dataverse-key: $API_TOKEN" -X POST http://demo.dataverse.org/api/users/jsmith2/mergeIntoUser/jsmith`` +Example: ``curl -H "X-Dataverse-key: $API_TOKEN" -X POST "http://demo.dataverse.org/api/users/jsmith2/mergeIntoUser/jsmith"`` This action moves account data from jsmith2 into the account jsmith and deletes the account of jsmith2. @@ -3667,7 +4216,7 @@ Changes identifier for user in ``AuthenticatedUser``, ``BuiltinUser``, ``Authent POST http://$SERVER/api/users/$oldIdentifier/changeIdentifier/$newIdentifier -Example: ``curl -H "X-Dataverse-key: $API_TOKEN" -X POST https://demo.dataverse.org/api/users/johnsmith/changeIdentifier/jsmith`` +Example: ``curl -H "X-Dataverse-key: $API_TOKEN" -X POST "https://demo.dataverse.org/api/users/johnsmith/changeIdentifier/jsmith"`` This action changes the identifier of user johnsmith to jsmith. @@ -3707,13 +4256,13 @@ Deactivates a user. A superuser API token is not required but the command will o export SERVER_URL=http://localhost:8080 export USERNAME=jdoe - curl -X POST $SERVER_URL/api/admin/authenticatedUsers/$USERNAME/deactivate + curl -X POST "$SERVER_URL/api/admin/authenticatedUsers/$USERNAME/deactivate" The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -X POST http://localhost:8080/api/admin/authenticatedUsers/jdoe/deactivate + curl -X POST "http://localhost:8080/api/admin/authenticatedUsers/jdoe/deactivate" The database ID of the user can be passed instead of the username. @@ -3722,7 +4271,7 @@ The database ID of the user can be passed instead of the username. export SERVER_URL=http://localhost:8080 export USERID=42 - curl -X POST $SERVER_URL/api/admin/authenticatedUsers/id/$USERID/deactivate + curl -X POST "$SERVER_URL/api/admin/authenticatedUsers/id/$USERID/deactivate" Note: A primary purpose of most Dataverse installations is to serve an archive. In the archival space, there are best practices around the tracking of data access and the tracking of modifications to data and metadata. In support of these key workflows, a simple mechanism to delete users that have performed edit or access actions in the system is not provided. Providing a Deactivate User endpoint for users who have taken certain actions in the system alongside a Delete User endpoint to remove users that haven't taken certain actions in the system is by design. @@ -3761,13 +4310,13 @@ Show the traces that the user has left in the system, such as datasets created, export SERVER_URL=https://demo.dataverse.org export USERNAME=jdoe - curl -H "X-Dataverse-key:$API_TOKEN" -X GET $SERVER_URL/api/users/$USERNAME/traces + curl -H "X-Dataverse-key:$API_TOKEN" -X GET "$SERVER_URL/api/users/$USERNAME/traces" The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx -X GET https://demo.dataverse.org/api/users/jdoe/traces + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X GET "https://demo.dataverse.org/api/users/jdoe/traces" Remove All Roles from a User ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -3782,13 +4331,13 @@ Removes all roles from the user. This is equivalent of clicking the "Remove All export SERVER_URL=https://demo.dataverse.org export USERNAME=jdoe - curl -H "X-Dataverse-key:$API_TOKEN" -X POST $SERVER_URL/api/users/$USERNAME/removeRoles + curl -H "X-Dataverse-key:$API_TOKEN" -X POST "$SERVER_URL/api/users/$USERNAME/removeRoles" The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx -X POST http://localhost:8080/api/users/jdoe/removeRoles + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST "http://localhost:8080/api/users/jdoe/removeRoles" List Role Assignments of a Role Assignee ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -3852,11 +4401,11 @@ Datafile Integrity Recalculate the check sum value value of a datafile, by supplying the file's database id and an algorithm (Valid values for $ALGORITHM include MD5, SHA-1, SHA-256, and SHA-512):: - curl -H X-Dataverse-key:$API_TOKEN -X POST $SERVER_URL/api/admin/computeDataFileHashValue/{fileId}/algorithm/$ALGORITHM + curl -H "X-Dataverse-key:$API_TOKEN" -X POST "$SERVER_URL/api/admin/computeDataFileHashValue/{fileId}/algorithm/$ALGORITHM" Validate an existing check sum value against one newly calculated from the saved file:: - curl -H X-Dataverse-key:$API_TOKEN -X POST $SERVER_URL/api/admin/validateDataFileHashValue/{fileId} + curl -H "X-Dataverse-key:$API_TOKEN" -X POST "$SERVER_URL/api/admin/validateDataFileHashValue/{fileId}" .. _dataset-files-validation-api: @@ -3869,7 +4418,7 @@ The following validates all the physical files in the dataset specified, by reca It will report the specific files that have failed the validation. For example:: - curl http://localhost:8080/api/admin/validate/dataset/files/:persistentId/?persistentId=doi:10.5072/FK2/XXXXX + curl "http://localhost:8080/api/admin/validate/dataset/files/:persistentId/?persistentId=doi:10.5072/FK2/XXXXX" {"dataFiles": [ {"datafileId":2658,"storageIdentifier":"file://123-aaa","status":"valid"}, {"datafileId":2659,"storageIdentifier":"file://123-bbb","status":"invalid","errorMessage":"Checksum mismatch for datafile id 2669"}, @@ -3879,6 +4428,26 @@ It will report the specific files that have failed the validation. For example:: These are only available to super users. +.. _UpdateChecksums: + +Update Checksums To Use New Algorithm +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The fixity algorithm used on existing files can be changed by a superuser using this API call. An optional query parameter (num) can be used to limit the number of updates attempted (i.e. to do processing in batches). +The API call will only update the algorithm and checksum for a file if the existing checksum can be validated against the file. +Statistics concerning the updates are returned in the response to the API call with details in the log. +The primary use for this API call is to update existing files after the algorithm used when uploading new files is changes - see - :ref:`:FileFixityChecksumAlgorithm`. +Allowed values are MD5, SHA-1, SHA-256, and SHA-512 + +.. code-block:: bash + + export ALG=SHA-256 + export BATCHSIZE=1 + + curl "http://localhost:8080/api/admin/updateHashValues/$ALG" + curl "http://localhost:8080/api/admin/updateHashValues/$ALG?num=$BATCHSIZE" + + .. _dataset-validation-api: Dataset Validation @@ -3886,7 +4455,7 @@ Dataset Validation Validate the dataset and its components (DatasetVersion, FileMetadatas, etc.) for constraint violations:: - curl $SERVER_URL/api/admin/validate/dataset/{datasetId} + curl "$SERVER_URL/api/admin/validate/dataset/{datasetId}" if validation fails, will report the specific database entity and the offending value. For example:: @@ -3896,7 +4465,7 @@ If the optional argument ``variables=true`` is specified, the API will also vali Validate all the datasets in the Dataverse installation, report any constraint violations found:: - curl $SERVER_URL/api/admin/validate/datasets + curl "$SERVER_URL/api/admin/validate/datasets" If the optional argument ``variables=true`` is specified, the API will also validate the metadata associated with any tabular data files. (For example: an invalid or empty variable name). Note that validating all the tabular metadata may significantly increase the run time of the full validation pass. @@ -4005,41 +4574,48 @@ View the list of standard license terms that can be selected for a dataset: .. code-block:: bash export SERVER_URL=https://demo.dataverse.org - curl $SERVER_URL/api/licenses + curl "$SERVER_URL/api/licenses" View the details of the standard license with the database ID specified in ``$ID``: .. code-block:: bash export ID=1 - curl $SERVER_URL/api/licenses/$ID + curl "$SERVER_URL/api/licenses/$ID" -Superusers can add a new license by posting a JSON file adapted from this example :download:`add-license.json <../_static/api/add-license.json>`. The ``name`` and ``uri`` of the new license must be unique. If you are interested in adding a Creative Commons license, you are encouarged to use the JSON files under :ref:`adding-creative-commons-licenses`: +Superusers can add a new license by posting a JSON file adapted from this example :download:`add-license.json <../_static/api/add-license.json>`. The ``name`` and ``uri`` of the new license must be unique. Sort order field is mandatory. If you are interested in adding a Creative Commons license, you are encouarged to use the JSON files under :ref:`adding-creative-commons-licenses`: .. code-block:: bash export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx - curl -X POST -H 'Content-Type: application/json' -H X-Dataverse-key:$API_TOKEN --data-binary @add-license.json $SERVER_URL/api/licenses + curl -X POST -H 'Content-Type: application/json' -H "X-Dataverse-key:$API_TOKEN" --data-binary @add-license.json "$SERVER_URL/api/licenses" Superusers can change whether an existing license is active (usable for new dataset versions) or inactive (only allowed on already-published versions) specified by the license ``$ID``: .. code-block:: bash export STATE=true - curl -X PUT -H 'Content-Type: application/json' -H X-Dataverse-key:$API_TOKEN $SERVER_URL/api/licenses/$ID/:active/$STATE + curl -X PUT -H 'Content-Type: application/json' -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/licenses/$ID/:active/$STATE" Superusers may change the default license by specifying the license ``$ID``: .. code-block:: bash - curl -X PUT -H X-Dataverse-key:$API_TOKEN $SERVER_URL/api/licenses/default/$ID + curl -X PUT -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/licenses/default/$ID" Superusers can delete a license, provided it is not in use, by the license ``$ID``: .. code-block:: bash - curl -X DELETE -H X-Dataverse-key:$API_TOKEN $SERVER_URL/api/licenses/$ID + curl -X DELETE -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/licenses/$ID" + +Superusers can change the sorting order of a license specified by the license ``$ID``: + +.. code-block:: bash + + export SORT_ORDER=100 + curl -X PUT -H 'Content-Type: application/json' -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/licenses/$ID/:sortOrder/$SORT_ORDER" List Dataset Templates ~~~~~~~~~~~~~~~~~~~~~~ @@ -4063,13 +4639,106 @@ A curl example using an ``ID`` export SERVER_URL=https://demo.dataverse.org export ID=24 - curl -X DELETE $SERVER_URL/api/admin/template/$ID + curl -X DELETE "$SERVER_URL/api/admin/template/$ID" The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -X DELETE https://demo.dataverse.org/api/admin/template/24 - - + curl -X DELETE "https://demo.dataverse.org/api/admin/template/24" + +.. _api-native-signed-url: +Request Signed URL +~~~~~~~~~~~~~~~~~~ + +Dataverse has the ability to create signed URLs for it's API calls. +A signature, which is valid only for the specific API call and only for a specified duration, allows the call to proceed with the authentication of the specified user. +It is intended as an alternative to the use of an API key (which is valid for a long time period and can be used with any API call). +Signed URLs were developed to support External Tools but may be useful in other scenarios where Dataverse or a third-party tool needs to delegate limited access to another user or tool. +This API call allows a Dataverse superUser to generate a signed URL for such scenarios. +The JSON input parameter required is an object with the following keys: + +- ``url`` - the exact URL to sign, including api version number and all query parameters +- ``timeOut`` - how long in minutes the signature should be valid for, default is 10 minutes +- ``httpMethod`` - which HTTP method is required, default is GET +- ``user`` - the user identifier for the account associated with this signature, the default is the superuser making the call. The API call will succeed/fail based on whether the specified user has the required permissions. + +A curl example using allowing access to a dataset's metadata + +.. code-block:: bash + + export SERVER_URL=https://demo.dataverse.org + export API_KEY=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export JSON='{"url":"https://demo.dataverse.org/api/v1/datasets/:persistentId/?persistentId=doi:10.5072/FK2/J8SJZB","timeOut":5,"user":"alberteinstein"}' + + curl -H "X-Dataverse-key:$API_KEY" -H 'Content-Type:application/json' -d "$JSON" "$SERVER_URL/api/admin/requestSignedUrl" + +Please see :ref:`dataverse.api.signature-secret` for the configuration option to add a shared secret, enabling extra +security. + + +.. _send-feedback: + +Send Feedback To Contact(s) +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This API call allows sending an email to the contacts for a collection, dataset, or datafile or to the support email address when no object is specified. +The call is protected by the normal /admin API protections (limited to localhost or requiring a separate key), but does not otherwise limit the sending of emails. +Administrators should be sure only trusted applications have access to avoid the potential for spam. + +The call is a POST with a JSON object as input with four keys: +- "targetId" - the id of the collection, dataset, or datafile. Persistent ids and collection aliases are not supported. (Optional) +- "subject" - the email subject line +- "body" - the email body to send +- "fromEmail" - the email to list in the reply-to field. (Dataverse always sends mail from the system email, but does it "on behalf of" and with a reply-to for the specified user.) + +A curl example using an ``ID`` + +.. code-block:: bash + + export SERVER_URL=http://localhost + export JSON='{"targetId":24, "subject":"Data Question", "body":"Please help me understand your data. Thank you!", "fromEmail":"dataverseSupport@mailinator.com"}' + + curl -X POST -H 'Content-Type:application/json' -d "$JSON" "$SERVER_URL/api/admin/feedback" + +Note that this call could be useful in coordinating with dataset authors (assuming they are also contacts) as an alternative/addition to the functionality provided by :ref:`return-a-dataset`. + + +MyData +------ + +The MyData API is used to get a list of just the datasets, dataverses or datafiles an authenticated user can edit. + +A curl example listing objects + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export ROLE_IDS=6 + export DVOBJECT_TYPES=Dataset + export PUBLISHED_STATES=Unpublished + export PER_PAGE=10 + + curl -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/mydata/retrieve?role_ids=$ROLE_IDS&dvobject_types=$DVOBJECT_TYPES&published_states=$PUBLISHED_STATES&per_page=$PER_PAGE" + +Parameters: + +``role_id`` Roles are customizable. Standard roles include: + +- ``1`` = Admin +- ``2`` = File Downloader +- ``3`` = Dataverse + Dataset Creator +- ``4`` = Dataverse Creator +- ``5`` = Dataset Creator +- ``6`` = Contributor +- ``7`` = Curator +- ``8`` = Member + +``dvobject_types`` Type of object, several possible values among: ``DataFile`` , ``Dataset`` & ``Dataverse`` . + +``published_states`` State of the object, several possible values among:``Published`` , ``Unpublished`` , ``Draft`` , ``Deaccessioned`` & ``In+Review`` . + +``per_page`` Number of results returned per page. + diff --git a/doc/sphinx-guides/source/api/search.rst b/doc/sphinx-guides/source/api/search.rst index d5e56543fb1..b941064f173 100755 --- a/doc/sphinx-guides/source/api/search.rst +++ b/doc/sphinx-guides/source/api/search.rst @@ -35,6 +35,8 @@ show_relevance boolean Whether or not to show details of which fields were ma show_facets boolean Whether or not to show facets that can be operated on by the "fq" parameter. False by default. See :ref:`advanced search example `. fq string A filter query on the search term. Multiple "fq" parameters can be used. See :ref:`advanced search example `. show_entity_ids boolean Whether or not to show the database IDs of the search results (for developer use). +geo_point string Latitude and longitude in the form ``geo_point=42.3,-71.1``. You must supply ``geo_radius`` as well. See also :ref:`geospatial-search`. +geo_radius string Radial distance in kilometers from ``geo_point`` (which must be supplied as well) such as ``geo_radius=1.5``. metadata_fields string Includes the requested fields for each dataset in the response. Multiple "metadata_fields" parameters can be used to include several fields. The value must be in the form "{metadata_block_name}:{field_name}" to include a specific field from a metadata block (see :ref:`example `) or "{metadata_field_set_name}:\*" to include all the fields for a metadata block (see :ref:`example `). "{field_name}" cannot be a subfield of a compound field. If "{field_name}" is a compound field, all subfields are included. =============== ======= =========== diff --git a/doc/sphinx-guides/source/conf.py b/doc/sphinx-guides/source/conf.py index 880ed561720..7ff17eb45ed 100755 --- a/doc/sphinx-guides/source/conf.py +++ b/doc/sphinx-guides/source/conf.py @@ -66,9 +66,9 @@ # built documents. # # The short X.Y version. -version = '5.12' +version = '6.0' # The full version, including alpha/beta/rc tags. -release = '5.12' +release = '6.0' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/doc/sphinx-guides/source/container/app-image.rst b/doc/sphinx-guides/source/container/app-image.rst new file mode 100644 index 00000000000..29f6d6ac1d4 --- /dev/null +++ b/doc/sphinx-guides/source/container/app-image.rst @@ -0,0 +1,217 @@ +Dataverse Application Image +=========================== + +The application image is a layer on top of the base image and contains the Dataverse software. + +.. contents:: |toctitle| + :local: + +An "application image" offers you a deployment ready Dataverse application running on the underlying +application server, which is provided by the :doc:`base-image`. Its sole purpose is to bundle the application +and any additional material necessary to successfully jumpstart the application. + +Until all :ref:`jvm-options` are *MicroProfile Config* enabled, it also adds the necessary scripting glue to +configure the applications domain during booting the application server. See :ref:`app-tunables`. + +Within the main repository, you may find the application image's files at ``/src/main/docker``. +This is the same Maven module providing a Dataverse WAR file for classic installations, and uses the +`Maven Docker Plugin `_ to build and ship the image within a special Maven profile. + +**NOTE: This image is created, maintained and supported by the Dataverse community on a best-effort basis.** +IQSS will not offer you support how to deploy or run it, please reach out to the community for help on using it. +You might be interested in taking a look at :doc:`../developers/containers`, linking you to some (community-based) +efforts. + + + +Supported Image Tags +++++++++++++++++++++ + +This image is sourced from the main upstream code `repository of the Dataverse software `_. +Development and maintenance of the `image's code `_ happens there +(again, by the community). + +.. note:: + Please note that this image is not (yet) available from Docker Hub. You need to build local to use + (see below). Follow https://github.com/IQSS/dataverse/issues/9444 for new developments. + + + +Image Contents +++++++++++++++ + +The application image builds by convention upon the :doc:`base image ` and provides: + +- Dataverse class files +- Resource files +- Dependency JAR files +- `JHove `_ configuration +- Script to configure the application server domain for :ref:`jvm-options` not yet *MicroProfile Config* enabled. + +The image is provided as a multi-arch image to support the most common architectures Dataverse usually runs on: +AMD64 (Windows/Linux/...) and ARM64 (Apple M1/M2). (Easy to extend.) + + + +Build Instructions +++++++++++++++++++ + +Assuming you have `Docker `_, `Docker Desktop `_, +`Moby `_ or some remote Docker host configured, up and running from here on. + +Simply execute the Maven modules packaging target with activated "container" profile from the projects Git root to +compile the Java code and build the image: + +``mvn -Pct clean package`` + +Some additional notes, using Maven parameters to change the build and use ...: + +- | ... a different tag only: add ``-Dapp.image.tag=tag``. + | *Note:* default is ``unstable`` +- | ... a different image name and tag: add ``-Dapp.image=name:tag``. + | *Note:* default is ``gdcc/dataverse:${app.image.tag}`` +- ... a different image registry than Docker Hub: add ``-Ddocker.registry=registry.example.org`` (see also + `DMP docs on registries `__) +- | ... a different base image tag: add ``-Dbase.image.tag=tag`` + | *Note:* default is ``unstable`` +- | ... a different base image: add ``-Dbase.image=name:tag`` + | *Note:* default is ``gdcc/base:${base.image.tag}``. See also :doc:`base-image` for more details on it. + +Automated Builds & Publishing +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +See note above at "Supported Image Tags". + +.. _app-multiarch: + +Processor Architecture and Multiarch +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +This image is created as a "multi-arch image", supporting the most common architectures Dataverse usually runs on: +AMD64 (Windows/Linux/...) and ARM64 (Apple M1/M2), by using `Maven Docker Plugin's BuildX mode `_. + +Building the image via ``mvn -Pct package`` or ``mvn -Pct install`` as above will only build for the architecture of +the Docker machine's CPU. + +Only ``mvn -Pct clean deploy -Ddocker.platforms=linux/amd64,linux/arm64`` will trigger building on all enabled architectures. +Yet, to enable building with non-native code on your build machine, you will need to setup a cross-platform builder. + +On Linux, you should install `qemu-user-static `__ (preferably via +your package management) on the host and run ``docker run --rm --privileged multiarch/qemu-user-static --reset -p yes`` +to enable that builder. The Docker plugin will setup everything else for you. + + + +.. _app-tunables: + +Tunables +++++++++ + +The :doc:`base-image` provides a long list of possible options to tune many aspects of the application server, and, +as the application image builds upon it, :ref:`Base Image Tunables ` apply to it as well. + +In addition, the application image provides the following tunables: + +.. list-table:: + :align: left + :width: 100 + :widths: 10 10 10 50 + :header-rows: 1 + + * - Env. variable + - Default + - Type + - Description + * - ``MP_CONFIG_PROFILE`` + - ``ct`` + - String + - Set to switch the activated *MicroProfile Config Profile*. Note that certain defaults will not apply any longer. + See :ref:`:ApplicationServerSettings` for details. + * - ``dataverse_*`` and ``doi_*`` + - \- + - String + - Configure any :ref:`jvm-options` not yet *MicroProfile Config* enabled with this magic trick. + + 1. Simply pick a JVM option from the list and replace any ``.`` with ``_``. + 2. Replace any ``-`` in the option name with ``__``. + * - ``DATAVERSE_MAIL_HOST`` + - ``smtp`` + - String + - A hostname (w/o port!) where to reach a Mail MTA on port 25. + * - ``DATAVERSE_MAIL_USER`` + - ``dataversenotify`` + - String + - A username to use with the Mail MTA + * - ``DATAVERSE_MAIL_FROM`` + - ``dataverse@localhost`` + - Mail address + - The "From" field for all outbound mail. Make sure to set :ref:`systemEmail` to the same value or no mail will + be sent. + + +Note that the script ``init_2_configure.sh`` will apply a few very important defaults to enable quick usage +by a) activating the scheduled tasks timer, b) add local file storage if not disabled, and c) a sensible password +reset timeout: + +.. code-block:: shell + + dataverse_auth_password__reset__timeout__in__minutes=60 + dataverse_timerServer=true + dataverse_files_storage__driver__id=local + + if dataverse_files_storage__driver__id = "local" then + dataverse_files_local_type=file + dataverse_files_local_label=Local + dataverse_files_local_directory=${STORAGE_DIR}/store + + + +.. _app-locations: + +Locations ++++++++++ + +There are only a few important additions to the list of `locations by the base image `_. +Please make sure to back these locations with volumes or tmpfs to avoid writing data into the overlay filesystem, which +will significantly hurt performance. + +.. list-table:: + :align: left + :width: 100 + :widths: 10 10 50 + :header-rows: 1 + + * - Location + - Value + - Description + * - ``${STORAGE_DIR}`` + - ``/dv`` + - Defined by base image. Either back this folder or, if suitable, the locations below it with volumes + or tmpfs. + * - ``${STORAGE_DIR}/uploads`` + - ``/dv/uploads`` + - See :ref:`dataverse.files.uploads` for a detailed description. + * - ``${STORAGE_DIR}/temp`` + - ``/dv/temp`` + - See :ref:`dataverse.files.directory` for a detailed description. + * - ``${STORAGE_DIR}/store`` + - ``/dv/store`` + - Important when using the default provided local storage option (see above and :ref:`storage-files-dir`) + * - ``/tmp`` + - \- + - Location for temporary files, see also :ref:`temporary-file-storage` + + + +Exposed Ports ++++++++++++++ + +See base image :ref:`exposed port `. + + + +Entry & Extension Points +++++++++++++++++++++++++ + +The application image makes use of the base image provided system to execute scripts on boot, see :ref:`base-entrypoint`. +See there for potential extension of this image in your own derivative. diff --git a/doc/sphinx-guides/source/container/base-image.rst b/doc/sphinx-guides/source/container/base-image.rst new file mode 100644 index 00000000000..1a47a8fc413 --- /dev/null +++ b/doc/sphinx-guides/source/container/base-image.rst @@ -0,0 +1,366 @@ +Application Base Image +====================== + +The base image contains Payara and other dependencies that the Dataverse software runs on. It is the foundation for the :doc:`app-image`. Note that some dependencies, such as PostgreSQL and Solr, run in their own containers and are not part of the base image. + +.. contents:: |toctitle| + :local: + +A "base image" offers you a pre-installed and pre-tuned application server to deploy Dataverse software to. +Adding basic functionality like executing scripts at container boot, monitoring, memory tweaks etc. is all done +at this layer, to make the application image focus on the app itself. + +**NOTE: The base image does not contain the Dataverse application itself.** + +Within the main repository, you may find the base image's files at ``/modules/container-base``. +This Maven module uses the `Maven Docker Plugin `_ to build and ship the image. +You may use, extend, or alter this image to your liking and/or host in some different registry if you want to. + +**NOTE: This image is created, maintained and supported by the Dataverse community on a best-effort basis.** +IQSS will not offer you support how to deploy or run it, please reach out to the community (:ref:`support`) for help on using it. +You might be interested in taking a look at :doc:`../developers/containers`, linking you to some (community-based) +efforts. + +Supported Image Tags +++++++++++++++++++++ + +This image is sourced from the main upstream code `repository of the Dataverse software `_. +Development and maintenance of the `image's code `_ +happens there (again, by the community). Community-supported image tags are based on the two most important +upstream branches: + +- The ``unstable`` tag corresponds to the ``develop`` branch, where pull requests are merged. + (`Dockerfile `__) +- The ``alpha`` tag corresponds to the ``master`` branch, where releases are cut from. + (`Dockerfile `__) + + + +Image Contents +++++++++++++++ + +The base image provides: + +- `Eclipse Temurin JRE using Java 17 `_ +- `Payara Community Application Server `_ +- CLI tools necessary to run Dataverse (i. e. ``curl`` or ``jq`` - see also :doc:`../installation/prerequisites` in Installation Guide) +- Linux tools for analysis, monitoring and so on +- `Jattach `__ (attach to running JVM) +- `wait-for `__ (tool to "wait for" a service to be available) +- `dumb-init `__ (see :ref:`below ` for details) + +This image is created as a "multi-arch image", see :ref:`below `. + +It inherits (is built on) an Ubuntu environment from the upstream +`base image of Eclipse Temurin `_. +You are free to change the JRE/JDK image to your liking (see below). + + + +Build Instructions +++++++++++++++++++ + +Assuming you have `Docker `_, `Docker Desktop `_, +`Moby `_ or some remote Docker host configured, up and running from here on. + +Simply execute the Maven modules packaging target with activated "container" profile. Either from the projects Git root: + +``mvn -Pct -f modules/container-base install`` + +Or move to the module and execute: + +``cd modules/container-base && mvn -Pct install`` + +Some additional notes, using Maven parameters to change the build and use ...: + +- | ... a different tag only: add ``-Dbase.image.tag=tag``. + | *Note:* default is ``unstable`` +- | ... a different image name and tag: add ``-Dbase.image=name:tag``. + | *Note:* default is ``gdcc/base:${base.image.tag}`` +- ... a different image registry than Docker Hub: add ``-Ddocker.registry=registry.example.org`` (see also + `DMP docs on registries `__) +- ... a different Payara version: add ``-Dpayara.version=V.YYYY.R``. +- | ... a different Temurin JRE version ``A``: add ``-Dtarget.java.version=A`` (i.e. ``11``, ``17``, ...). + | *Note:* must resolve to an available image tag ``A-jre`` of Eclipse Temurin! + (See also `Docker Hub search example `_) +- ... a different Java Distribution: add ``-Djava.image="name:tag"`` with precise reference to an + image available local or remote. +- ... a different UID/GID for the ``payara`` user/group: add ``-Dbase.image.uid=1234`` (or ``.gid``) + +Automated Builds & Publishing +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +To make reusing most simple, the image is built with a Github Action within the IQSS repository and then pushed +to `Docker Hub gdcc/base repository `_. It is built and pushed on every edit to +its sources plus uncached scheduled nightly builds to make sure security updates are finding their way in. + +*Note:* For the Github Action to be able to push to Docker Hub, two repository secrets +(DOCKERHUB_USERNAME, DOCKERHUB_TOKEN) have been added by IQSS admins to their repository. + +.. _base-multiarch: + +Processor Architecture and Multiarch +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +This image is created as a "multi-arch image", supporting the most common architectures Dataverse usually runs on: +AMD64 (Windows/Linux/...) and ARM64 (Apple M1/M2), by using `Maven Docker Plugin's BuildX mode `_. + +Building the image via ``mvn -Pct package`` or ``mvn -Pct install`` as above will only build for the architecture of +the Docker machine's CPU. + +Only ``mvn -Pct deploy`` will trigger building on all enabled architectures (and will try to push the images to a +registry, which is Docker Hub by default). + +You can specify which architectures you would like to build for and include by them as a comma separated list: +``mvn -Pct deploy -Ddocker.platforms="linux/amd64,linux/arm64"``. The shown configuration is the default and may be omitted. + +Yet, to enable building with non-native code on your build machine, you will need to setup a cross-platform builder! + +On Linux, you should install `qemu-user-static `__ (preferably via +your package management) on the host and run ``docker run --rm --privileged multiarch/qemu-user-static --reset -p yes`` +to enable that builder. The Docker plugin will setup everything else for you. + +The upstream CI workflows publish images supporting AMD64 and ARM64 (see e.g. tag details on Docker Hub) + +.. _base-tunables: + +Tunables +++++++++ + +The base image provides a Payara domain suited for production use, but can also be used during development. +Many settings have been carefully selected for best performance and stability of the Dataverse application. + +As with any service, you should always monitor any metrics and make use of the tuning capabilities the base image +provides. These are mostly based on environment variables (very common with containers) and provide sane defaults. + +.. list-table:: + :align: left + :width: 100 + :widths: 10 10 10 50 + :header-rows: 1 + + * - Env. variable + - Default + - Type + - Description + * - ``DEPLOY_PROPS`` + - (empty) + - String + - Set to add arguments to generated `asadmin deploy` commands. + * - ``PREBOOT_COMMANDS`` + - [preboot]_ + - Abs. path + - Provide path to file with ``asadmin`` commands to run **before** boot of application server. + See also `Pre/postboot script docs`_. + * - ``POSTBOOT_COMMANDS`` + - [postboot]_ + - Abs. path + - Provide path to file with ``asadmin`` commands to run **after** boot of application server. + See also `Pre/postboot script docs`_. + * - ``JVM_ARGS`` + - (empty) + - String + - Additional arguments to pass to application server's JVM on start. + * - ``MEM_MAX_RAM_PERCENTAGE`` + - ``70.0`` + - Percentage + - Maximum amount of container's allocated RAM to be used as heap space. + Make sure to leave some room for native memory, OS overhead etc! + * - ``MEM_XSS`` + - ``512k`` + - Size + - Tune the maximum JVM stack size. + * - ``MEM_MIN_HEAP_FREE_RATIO`` + - ``20`` + - Integer + - Make the heap shrink aggressively and grow conservatively. See also `run-java-sh recommendations`_. + * - ``MEM_MAX_HEAP_FREE_RATIO`` + - ``40`` + - Integer + - Make the heap shrink aggressively and grow conservatively. See also `run-java-sh recommendations`_. + * - ``MEM_MAX_GC_PAUSE_MILLIS`` + - ``500`` + - Milliseconds + - Shorter pause times might result in lots of collections causing overhead without much gain. + This needs monitoring and tuning. It's a complex matter. + * - ``MEM_METASPACE_SIZE`` + - ``256m`` + - Size + - Initial size of memory reserved for class metadata, also used as trigger to run a garbage collection + once passing this size. + * - ``MEM_MAX_METASPACE_SIZE`` + - ``2g`` + - Size + - The metaspace's size will not outgrow this limit. + * - ``ENABLE_DUMPS`` + - ``0`` + - Bool, ``0|1`` + - If enabled, the argument(s) given in ``JVM_DUMP_ARG`` will be added to the JVM starting up. + This means it will enable dumping the heap to ``${DUMPS_DIR}`` (see below) in "out of memory" cases. + (You should back this location with disk space / ramdisk, so it does not write into an overlay filesystem!) + * - ``JVM_DUMPS_ARG`` + - [dump-option]_ + - String + - Can be fine tuned for more grained controls of dumping behaviour. + * - ``ENABLE_JMX`` + - ``0`` + - Bool, ``0|1`` + - Allow insecure JMX connections, enable AMX and tune all JMX monitoring levels to ``HIGH``. + See also `Payara Docs - Basic Monitoring `_. + A basic JMX service is enabled by default in Payara, exposing basic JVM MBeans, but especially no Payara MBeans. + * - ``ENABLE_JDWP`` + - ``0`` + - Bool, ``0|1`` + - Enable the "Java Debug Wire Protocol" to attach a remote debugger to the JVM in this container. + Listens on port 9009 when enabled. Search the internet for numerous tutorials to use it. + * - ``ENABLE_RELOAD`` + - ``0`` + - Bool, ``0|1`` + - Enable the dynamic "hot" reloads of files when changed in a deployment. Useful for development, + when new artifacts are copied into the running domain. + * - ``DATAVERSE_HTTP_TIMEOUT`` + - ``900`` + - Seconds + - See :ref:`:ApplicationServerSettings` ``http.request-timeout-seconds``. + + *Note:* can also be set using any other `MicroProfile Config Sources`_ available via ``dataverse.http.timeout``. + + +.. [preboot] ``${CONFIG_DIR}/pre-boot-commands.asadmin`` +.. [postboot] ``${CONFIG_DIR}/post-boot-commands.asadmin`` +.. [dump-option] ``-XX:+HeapDumpOnOutOfMemoryError`` + + +.. _base-locations: + +Locations ++++++++++ + +This environment variables represent certain locations and might be reused in your scripts etc. +All of these variables aren't meant to be reconfigurable and reflect state in the filesystem layout! + +**Writeable at build time:** + +The overlay filesystem of Docker and other container technologies is not meant to be used for any performance IO. +You should avoid *writing* data anywhere in the file tree at runtime, except for well known locations with mounted +volumes backing them (see below). + +The locations below are meant to be written to when you build a container image, either this base or anything +building upon it. You can also use these for references in scripts, etc. + +.. list-table:: + :align: left + :width: 100 + :widths: 10 10 50 + :header-rows: 1 + + * - Env. variable + - Value + - Description + * - ``HOME_DIR`` + - ``/opt/payara`` + - Home base to Payara and the application + * - ``PAYARA_DIR`` + - ``${HOME_DIR}/appserver`` + - Installation directory of Payara server + * - ``SCRIPT_DIR`` + - ``${HOME_DIR}/scripts`` + - Any scripts like the container entrypoint, init scripts, etc + * - ``CONFIG_DIR`` + - ``${HOME_DIR}/config`` + - Payara Server configurations like pre/postboot command files go here + (Might be reused for Dataverse one day) + * - ``DEPLOY_DIR`` + - ``${HOME_DIR}/deployments`` + - Any EAR or WAR file, exploded WAR directory etc are autodeployed on start + * - ``DOMAIN_DIR`` + - ``${PAYARA_DIR}/glassfish`` ``/domains/${DOMAIN_NAME}`` + - Path to root of the Payara domain applications will be deployed into. Usually ``${DOMAIN_NAME}`` will be ``domain1``. + + +**Writeable at runtime:** + +The locations below are defined as `Docker volumes `_ by the base image. +They will by default get backed by an "anonymous volume", but you can (and should) bind-mount a host directory or +named Docker volume in these places to avoid data loss, gain performance and/or use a network file system. + +**Notes:** +1. On Kubernetes you still need to provide volume definitions for these places in your deployment objects! +2. You should not write data into these locations at build time - it will be shadowed by the mounted volumes! + +.. list-table:: + :align: left + :width: 100 + :widths: 10 10 50 + :header-rows: 1 + + * - Env. variable + - Value + - Description + * - ``STORAGE_DIR`` + - ``/dv`` + - This place is writeable by the Payara user, making it usable as a place to store research data, customizations + or other. Images inheriting the base image should create distinct folders here, backed by different + mounted volumes. + * - ``SECRETS_DIR`` + - ``/secrets`` + - Mount secrets or other here, being picked up automatically by + `Directory Config Source `_. + See also various :doc:`../installation/config` options involving secrets. + * - ``DUMPS_DIR`` + - ``/dumps`` + - Default location where heap dumps will be stored (see above). + You should mount some storage here (disk or ephemeral). + + +.. _base-exposed-ports: + +Exposed Ports ++++++++++++++ + +The default ports that are exposed by this image are: + +- 8080 - HTTP listener +- 4848 - Admin Service HTTPS listener +- 8686 - JMX listener +- 9009 - "Java Debug Wire Protocol" port (when ``ENABLE_JDWP=1``) + +The HTTPS listener (on port 8181) becomes deactivated during the build, as we will always need to reverse-proxy the +application server and handle SSL/TLS termination at this point. Save the memory and some CPU cycles! + + + +.. _base-entrypoint: + +Entry & Extension Points +++++++++++++++++++++++++ + +The entrypoint shell script provided by this base image will by default ensure to: + +- Run any scripts named ``${SCRIPT_DIR}/init_*`` or in ``${SCRIPT_DIR}/init.d/*`` directory for initialization + **before** the application server starts. +- Run an executable script ``${SCRIPT_DIR}/startInBackground.sh`` in the background - if present. +- Run the application server startup scripting in foreground (``${SCRIPT_DIR}/startInForeground.sh``). + +If you need to create some scripting that runs in parallel under supervision of `dumb-init `_, +e.g. to wait for the application to deploy before executing something, this is your point of extension: simply provide +the ``${SCRIPT_DIR}/startInBackground.sh`` executable script with your application image. + + + +Other Hints ++++++++++++ + +By default, ``domain1`` is enabled to use the ``G1GC`` garbage collector. + +For running a Java application within a Linux based container, the support for CGroups is essential. It has been +included and activated by default since Java 8u192, Java 11 LTS and later. If you are interested in more details, +you can read about those in a few places like https://developers.redhat.com/articles/2022/04/19/java-17-whats-new-openjdks-container-awareness, +https://www.eclipse.org/openj9/docs/xxusecontainersupport, etc. The other memory defaults are inspired +from `run-java-sh recommendations`_. + + + +.. _Pre/postboot script docs: https://docs.payara.fish/community/docs/Technical%20Documentation/Payara%20Micro%20Documentation/Payara%20Micro%20Configuration%20and%20Management/Micro%20Management/Asadmin%20Commands/Pre%20and%20Post%20Boot%20Commands.html +.. _MicroProfile Config Sources: https://docs.payara.fish/community/docs/Technical%20Documentation/MicroProfile/Config/Overview.html +.. _run-java-sh recommendations: https://github.com/fabric8io-images/run-java-sh/blob/master/TUNING.md#recommandations diff --git a/doc/sphinx-guides/source/container/configbaker-image.rst b/doc/sphinx-guides/source/container/configbaker-image.rst new file mode 100644 index 00000000000..7218e2d8d14 --- /dev/null +++ b/doc/sphinx-guides/source/container/configbaker-image.rst @@ -0,0 +1,231 @@ +Config Baker Image +================== + +The config baker container may be used to execute all sorts of tasks around setting up, preparing and finalizing +an instance of the Dataverse software. Its focus is bootstrapping non-initialized installations. + +.. contents:: |toctitle| + :local: + +Quickstart +++++++++++ + +To see the Config Baker help screen: + +``docker run -it --rm gdcc/configbaker:unstable`` + +Supported Image Tags +++++++++++++++++++++ + +This image is sourced from the main upstream code `repository of the Dataverse software `_. +Development and maintenance of the `image's code `_ +happens there (again, by the community). Community-supported image tags are based on the two most important +upstream branches: + +- The ``unstable`` tag corresponds to the ``develop`` branch, where pull requests are merged. + (`Dockerfile `__) +- The ``alpha`` tag corresponds to the ``master`` branch, where releases are cut from. + (`Dockerfile `__) + + + +Image Contents +++++++++++++++ + +This image contains some crucial parts to make a freshly baked Dataverse installation usable. + +Scripts +^^^^^^^ + +.. list-table:: + :align: left + :widths: 20 80 + :header-rows: 1 + + * - Script + - Description + * - ``bootstrap.sh`` + - Run an initialization script contained in a persona. See ``bootstrap.sh -h`` for usage details. + For development purposes, use ``bootstrap.sh dev`` or provide your own. + * - ``fix-fs-perms.sh`` + - Fixes filesystem permissions. App and Solr container run as non-privileged users and might need adjusted + filesystem permissions on mounted volumes to be able to write data. Run without parameters to see usage details. + * - ``help.sh`` + - Default script when running container without parameters. Lists available scripts and details about them. + * - ``update-fields.sh`` + - Update a Solr ``schema.xml`` with a given list of metadata fields. See ``update-fields.sh -h`` for usage details + and :ref:`update-solr-schema` for an example use case. + +Solr Template +^^^^^^^^^^^^^ + +In addition, at ``/template`` a `Solr Configset `_ +is available, ready for Dataverse usage with a tuned core config and schema. + +Providing this template to a vanilla Solr image and using `solr-precreate `_ +with it will create the necessary Solr search index. + +The ``solrconfig.xml`` and ``schema.xml`` are included from the upstream project ``conf/solr/...`` folder. You are +obviously free to provide such a template in some other way, maybe tuned for your purposes. +As a start, the contained script ``update-fields.sh`` may be used to edit the field definitions. + + + +Build Instructions +++++++++++++++++++ + +Assuming you have `Docker `_, `Docker Desktop `_, +`Moby `_ or some remote Docker host configured, up and running from here on. +Note: You need to use Maven when building this image, as we collate selective files from different places of the upstream +repository. (Building with pure Docker Compose does not support this kind of selection.) + +By default, when building the application image, it will also create a new config baker image. Simply execute the +Maven modules packaging target with activated "container" profile from the projects Git root to build the image: + +``mvn -Pct package`` + +If you specifically want to build a config baker image *only*, try + +``mvn -Pct package -Ddocker.filter=dev_bootstrap`` + +The build of config baker involves copying Solr configset files. The Solr version used is inherited from Maven, +acting as the single source of truth. Also, the tag of the image should correspond the application image, as +their usage is intertwined. + +Some additional notes, using Maven parameters to change the build and use ...: + +- | ... a different tag only: add ``-Dconf.image.tag=tag``. + | *Note:* default is ``${app.image.tag}``, which defaults to ``unstable`` +- | ... a different image name and tag: add ``-Dconf.image=name:tag``. + | *Note:* default is ``gdcc/configbaker:${conf.image.tag}`` +- ... a different image registry than Docker Hub: add ``-Ddocker.registry=registry.example.org`` (see also + `DMP docs on registries `__) +- ... a different Solr version: use ``-Dsolr.version=x.y.z`` + +Processor Architecture and Multiarch +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +This image is published as a "multi-arch image", supporting the most common architectures Dataverse usually runs on: +AMD64 (Windows/Linux/...) and ARM64 (Apple M1/M2), by using `Maven Docker Plugin's BuildX mode `_. + +Building the image via ``mvn -Pct package``, etc. will only build for the architecture of the Docker machine's CPU. + +Only ``mvn -Pct deploy -Ddocker.platforms=linux/amd64,linux/arm64`` will trigger building on all enabled architectures. +Yet, to enable building with non-native code on your build machine, you will need to setup a cross-platform builder. + +On Linux, you should install `qemu-user-static `__ (preferably via +your package management) on the host and run ``docker run --rm --privileged multiarch/qemu-user-static --reset -p yes`` +to enable that builder. The Docker plugin will setup everything else for you. + + + +Tunables +++++++++ + +This image has no tunable runtime parameters yet. + + + +Locations ++++++++++ + +.. list-table:: + :align: left + :width: 100 + :widths: 10 10 50 + :header-rows: 1 + + * - Location + - Value + - Description + * - ``${SCRIPT_DIR}`` + - ``/scripts`` + - Place to store the scripts. Part of ``$PATH``. + * - ``${SOLR_TEMPLATE}`` + - ``/template`` + - Place where the Solr Configset resides to create an index core from it. + * - ``${BOOTSTRAP_DIR}`` + - ``/scripts/bootstrap`` + - Stores the bootstrapping personas in sub-folders. + * - ``${BOOTSTRAP_DIR}/base`` + - ``/scripts/bootstrap/base`` + - Minimal set of scripts and data from upstream ``scripts/api`` folder, just enough for the most basic setup. + The idea is that other personas may reuse it within their own ``init.sh``, avoiding (some) code duplication. + See ``dev`` persona for an example. + + + +Exposed Ports ++++++++++++++ + +This image contains no runnable services yet, so no ports exposed. + + + +Entry & Extension Points +++++++++++++++++++++++++ + +The entrypoint of this image is pinned to ``dumb-init`` to safeguard signal handling. You may feed any script or +executable to it as command. + +By using our released images as base image to add your own scripting, personas, Solr configset and so on, simply +adapt and alter any aspect you need changed. + + + +Examples +++++++++ + +Docker Compose snippet to wait for Dataverse deployment and execute bootstrapping using a custom persona you added +by bind mounting (as an alternative to extending the image): + +.. code-block:: yaml + + bootstrap: + image: gdcc/configbaker:unstable + restart: "no" + command: + - bootstrap.sh + - mypersona + volumes: + - ./mypersona:/scripts/bootstrap/mypersona + networks: + - dataverse + +Docker Compose snippet to prepare execution of Solr and copy your custom configset you added by bind mounting +(instead of an extension). Note that ``solr-precreate`` will not overwrite an already existing core! To update +the config of an existing core, you need to mount the right volume with the stateful data! + +.. code-block:: yaml + + solr_initializer: + container_name: solr_initializer + image: gdcc/configbaker:unstable + restart: "no" + command: + - sh + - -c + - "fix-fs-perms.sh solr && cp -a /template/* /solr-template" + volumes: + - ./volumes/solr/data:/var/solr + - ./volumes/solr/conf:/solr-template + - /tmp/my-generated-configset:/template + + solr: + container_name: solr + hostname: solr + image: solr:${SOLR_VERSION} + depends_on: + - dev_solr_initializer + restart: on-failure + ports: + - "8983:8983" + networks: + - dataverse + command: + - "solr-precreate" + - "collection1" + - "/template" + volumes: + - ./volumes/solr/data:/var/solr + - ./volumes/solr/conf:/template diff --git a/doc/sphinx-guides/source/container/dev-usage.rst b/doc/sphinx-guides/source/container/dev-usage.rst new file mode 100644 index 00000000000..04c7eba7913 --- /dev/null +++ b/doc/sphinx-guides/source/container/dev-usage.rst @@ -0,0 +1,180 @@ +Development Usage +================= + +Please note! This Docker setup is not for production! + +.. contents:: |toctitle| + :local: + +Quickstart +---------- + +See :ref:`container-dev-quickstart`. + +Intro +----- + +Assuming you have `Docker `_, `Docker Desktop `_, +`Moby `_ or some remote Docker host configured, up and running from here on. Also assuming +you have Java and Maven installed, as you are at least about to develop code changes. + +To test drive these local changes to the Dataverse codebase in a containerized application server (and avoid the +setup described in :doc:`../developers/dev-environment`), you must a) build the application and b) run it in addition +to the necessary dependencies. (Which might involve building a new local version of the :doc:`configbaker-image`.) + +.. _dev-build: + +Building +-------- + +To build the :doc:`application ` and :doc:`config baker image `, run the following command: + +``mvn -Pct clean package`` + +Once this is done, you will see images ``gdcc/dataverse:unstable`` and ``gdcc/configbaker:unstable`` available in your +Docker cache. + +**Note:** This will skip any unit tests. If you have built the code before for testing, etc. you might omit the +``clean`` to avoid recompiling. + +**Note:** Also we have a ``docker-compose-dev.yml`` file, it's currently not possible to build the images without +invoking Maven. This might change in the future. + + +.. _dev-run: + +Running +------- + +After building the app and config baker image containing your local changes to the Dataverse application, you want to +run it together with all dependencies. There are four ways to do this (commands executed at root of project directory): + +.. list-table:: Cheatsheet: Running Containers + :widths: 15 40 45 + :header-rows: 1 + :stub-columns: 1 + :align: left + + * - \ + - Using Maven + - Using Compose + * - In foreground + - ``mvn -Pct docker:run`` + - ``docker compose -f docker-compose-dev.yml up`` + * - In background + - ``mvn -Pct docker:start`` + - ``docker compose -f docker-compose-dev.yml up -d`` + +Both ways have their pros and cons: + +.. list-table:: Decision Helper: Fore- or Background? + :widths: 15 40 45 + :header-rows: 1 + :stub-columns: 1 + :align: left + + * - \ + - Pros + - Cons + * - Foreground + - | Logs scroll by when interacting with API / UI + | To stop all containers simply hit ``Ctrl+C`` + - | Lots and lots of logs scrolling by + | Must stop all containers to restart + * - Background + - | No logs scrolling by + | Easy to replace single containers + - | No logs scrolling by + | Stopping containers needs an extra command + +In case you want to concatenate building and running, here's a cheatsheet for you: + +.. list-table:: Cheatsheet: Building and Running Containers + :widths: 15 40 45 + :header-rows: 1 + :stub-columns: 1 + :align: left + + * - \ + - Using Maven + - Using Compose + * - In foreground + - ``mvn -Pct package docker:run`` + - ``mvn -Pct package && docker compose -f docker-compose-dev.yml up`` + * - In background + - ``mvn -Pct package docker:start`` + - ``mvn -Pct package && docker compose -f docker-compose-dev.yml up -d`` + +Once all containers have been started, you can check if the application was deployed correctly by checking the version +at http://localhost:8080/api/info/version or watch the logs. + +**Note:** To stop all containers you started in background, invoke ``mvn -Pct docker:stop`` or +``docker compose -f docker-compose-dev.yml down``. + +Check that you can log in to http://localhost:8080 using user ``dataverseAdmin`` and password ``admin1``. + +You can also access the Payara Admin Console if needed, which is available at http://localhost:4848. To log in, use +user ``admin`` and password ``admin``. As a reminder, the application container is for development use only, so we +are exposing the admin console for testing purposes. In a production environment, it may be more convenient to leave +this console unopened. + +Note that data is persisted in ``./docker-dev-volumes`` in the root of the Git repo. For a clean start, you should +remove this directory before running the ``mvn`` commands above. + + +.. _dev-logs: + +Viewing Logs +------------ + +In case you started containers in background mode (see :ref:`dev-run`), you can use the following commands to view and/or +watch logs from the containers. + +The safe bet for any running container's logs is to lookup the container name via ``docker ps`` and use it in +``docker logs ``. You can tail logs by adding ``-n`` and follow them by adding ``-f`` (just like ``tail`` cmd). +See ``docker logs --help`` for more. + +Alternatives: + +- In case you used Maven for running, you may use ``mvn -Pct docker:logs -Ddocker.filter=``. +- If you used Docker Compose for running, you may use ``docker compose -f docker-compose-dev.yml logs ``. + Options are the same. + + +Re-Deploying +------------ + +Currently, the only safe and tested way to re-deploy the Dataverse application after you applied code changes is +by recreating the container(s). In the future, more options may be added here. + +If you started your containers in foreground, just stop them and follow the steps for building and running again. +The same goes for using Maven to start the containers in the background. + +In case of using Docker Compose and starting the containers in the background, you can use a workaround to only +restart the application container: + +.. code-block:: + + # First rebuild the container (will complain about an image still in use, this is fine.) + mvn -Pct package + # Then re-create the container (will automatically restart the container for you) + docker compose -f docker-compose-dev.yml create dev_dataverse + +Using ``docker container inspect dev_dataverse | grep Image`` you can verify the changed checksums. + +Using a Debugger +---------------- + +The :doc:`base-image` enables usage of the `Java Debugging Wire Protocol `_ +for remote debugging if you set ``ENABLE_JDWP=1`` as environment variable for the application container. +The default configuration when executing containers with the commands listed at :ref:`dev-run` already enables this. + +There are a lot of tutorials how to connect your IDE's debugger to a remote endpoint. Please use ``localhost:9009`` +as the endpoint. Here are links to the most common IDEs docs on remote debugging: +`Eclipse `_, +`IntelliJ `_ + +Building Your Own Base Image +---------------------------- + +If you find yourself tasked with upgrading Payara, you will need to create your own base image before running the :ref:`container-dev-quickstart`. For instructions, see :doc:`base-image`. diff --git a/doc/sphinx-guides/source/container/index.rst b/doc/sphinx-guides/source/container/index.rst new file mode 100644 index 00000000000..4bbc87a4845 --- /dev/null +++ b/doc/sphinx-guides/source/container/index.rst @@ -0,0 +1,29 @@ +Container Guide +=============== + +Running the Dataverse software in containers is quite different than in a :doc:`standard installation <../installation/prep>`. + +Both approaches have pros and cons. These days, containers are very often used for development and testing, +but there is an ever rising move toward running applications in the cloud using container technology. + +**NOTE:** +**As the Institute for Quantitative Social Sciences (IQSS) at Harvard is running a standard, non-containerized installation, +container support described in this guide is mostly created and maintained by the Dataverse community on a best-effort +basis.** + +This guide is *not* about installation on technology like Docker Swarm, Kubernetes, Rancher or other +solutions to run containers in production. There is the `Dataverse on K8s project `_ for this +purpose, as mentioned in the :doc:`/developers/containers` section of the Developer Guide. + +This guide focuses on describing the container images managed from the main Dataverse repository (again: by the +community, not IQSS), their features and limitations. Instructions on how to build the images yourself and how to +develop and extend them further are provided. + +**Contents:** + +.. toctree:: + + dev-usage + base-image + app-image + configbaker-image diff --git a/doc/sphinx-guides/source/developers/big-data-support.rst b/doc/sphinx-guides/source/developers/big-data-support.rst index 0782fd239a1..04885571a01 100644 --- a/doc/sphinx-guides/source/developers/big-data-support.rst +++ b/doc/sphinx-guides/source/developers/big-data-support.rst @@ -36,10 +36,32 @@ At present, one potential drawback for direct-upload is that files are only part ``./asadmin create-jvm-options "-Ddataverse.files..ingestsizelimit="`` +.. _s3-direct-upload-features-disabled: -**IMPORTANT:** One additional step that is required to enable direct uploads via a Dataverse installation and for direct download to work with previewers is to allow cross site (CORS) requests on your S3 store. +Features that are Disabled if S3 Direct Upload is Enabled +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The following features are disabled when S3 direct upload is enabled. + +- Unzipping of zip files. (See :ref:`compressed-files`.) +- Extraction of metadata from FITS files. (See :ref:`fits`.) +- Creation of NcML auxiliary files (See :ref:`netcdf-and-hdf5`.) +- Extraction of a geospatial bounding box from NetCDF and HDF5 files (see :ref:`netcdf-and-hdf5`) unless :ref:`dataverse.netcdf.geo-extract-s3-direct-upload` is set to true. + +.. _cors-s3-bucket: + +Allow CORS for S3 Buckets +~~~~~~~~~~~~~~~~~~~~~~~~~ + +**IMPORTANT:** One additional step that is required to enable direct uploads via a Dataverse installation and for direct download to work with previewers and direct upload to work with dvwebloader (:ref:`folder-upload`) is to allow cross site (CORS) requests on your S3 store. The example below shows how to enable CORS rules (to support upload and download) on a bucket using the AWS CLI command line tool. Note that you may want to limit the AllowedOrigins and/or AllowedHeaders further. https://github.com/gdcc/dataverse-previewers/wiki/Using-Previewers-with-download-redirects-from-S3 has some additional information about doing this. +If you'd like to check the CORS configuration on your bucket before making changes: + +``aws s3api get-bucket-cors --bucket `` + +To proceed with making changes: + ``aws s3api put-bucket-cors --bucket --cors-configuration file://cors.json`` with the contents of the file cors.json as follows: @@ -52,7 +74,7 @@ with the contents of the file cors.json as follows: "AllowedOrigins": ["*"], "AllowedHeaders": ["*"], "AllowedMethods": ["PUT", "GET"], - "ExposeHeaders": ["ETag"] + "ExposeHeaders": ["ETag", "Accept-Ranges", "Content-Encoding", "Content-Range"] } ] } @@ -151,6 +173,8 @@ See also :ref:`Globus settings <:GlobusBasicToken>`. Data Capture Module (DCM) ------------------------- +Please note: The DCM feature is deprecated. + Data Capture Module (DCM) is an experimental component that allows users to upload large datasets via rsync over ssh. DCM was developed and tested using Glassfish but these docs have been updated with references to Payara. @@ -187,7 +211,7 @@ The JSON that a DCM sends to your Dataverse installation on successful checksum :language: json - ``status`` - The valid strings to send are ``validation passed`` and ``validation failed``. -- ``uploadFolder`` - This is the directory on disk where your Dataverse installation should attempt to find the files that a DCM has moved into place. There should always be a ``files.sha`` file and a least one data file. ``files.sha`` is a manifest of all the data files and their checksums. The ``uploadFolder`` directory is inside the directory where data is stored for the dataset and may have the same name as the "identifier" of the persistent id (DOI or Handle). For example, you would send ``"uploadFolder": "DNXV2H"`` in the JSON file when the absolute path to this directory is ``/usr/local/payara5/glassfish/domains/domain1/files/10.5072/FK2/DNXV2H/DNXV2H``. +- ``uploadFolder`` - This is the directory on disk where your Dataverse installation should attempt to find the files that a DCM has moved into place. There should always be a ``files.sha`` file and a least one data file. ``files.sha`` is a manifest of all the data files and their checksums. The ``uploadFolder`` directory is inside the directory where data is stored for the dataset and may have the same name as the "identifier" of the persistent id (DOI or Handle). For example, you would send ``"uploadFolder": "DNXV2H"`` in the JSON file when the absolute path to this directory is ``/usr/local/payara6/glassfish/domains/domain1/files/10.5072/FK2/DNXV2H/DNXV2H``. - ``totalSize`` - Your Dataverse installation will use this value to represent the total size in bytes of all the files in the "package" that's created. If 360 data files and one ``files.sha`` manifest file are in the ``uploadFolder``, this value is the sum of the 360 data files. @@ -209,9 +233,9 @@ Add Dataverse Installation settings to use mock (same as using DCM, noted above) At this point you should be able to download a placeholder rsync script. Your Dataverse installation is then waiting for news from the DCM about if checksum validation has succeeded or not. First, you have to put files in place, which is usually the job of the DCM. You should substitute "X1METO" for the "identifier" of the dataset you create. You must also use the proper path for where you store files in your dev environment. -- ``mkdir /usr/local/payara5/glassfish/domains/domain1/files/10.5072/FK2/X1METO`` -- ``mkdir /usr/local/payara5/glassfish/domains/domain1/files/10.5072/FK2/X1METO/X1METO`` -- ``cd /usr/local/payara5/glassfish/domains/domain1/files/10.5072/FK2/X1METO/X1METO`` +- ``mkdir /usr/local/payara6/glassfish/domains/domain1/files/10.5072/FK2/X1METO`` +- ``mkdir /usr/local/payara6/glassfish/domains/domain1/files/10.5072/FK2/X1METO/X1METO`` +- ``cd /usr/local/payara6/glassfish/domains/domain1/files/10.5072/FK2/X1METO/X1METO`` - ``echo "hello" > file1.txt`` - ``shasum file1.txt > files.sha`` @@ -226,104 +250,11 @@ The following low level command should only be used when troubleshooting the "im ``curl -H "X-Dataverse-key: $API_TOKEN" -X POST "$DV_BASE_URL/api/batch/jobs/import/datasets/files/$DATASET_DB_ID?uploadFolder=$UPLOAD_FOLDER&totalSize=$TOTAL_SIZE"`` -Steps to set up a DCM via Docker for Development -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -If you need a fully operating DCM client for development purposes, these steps will guide you to setting one up. This includes steps to set up the DCM on S3 variant. - -Docker Image Set-up -^^^^^^^^^^^^^^^^^^^ - -See https://github.com/IQSS/dataverse/blob/develop/conf/docker-dcm/readme.md - -- Install docker if you do not have it - -Optional steps for setting up the S3 Docker DCM Variant -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -- Before: the default bucket for DCM to hold files in S3 is named test-dcm. It is coded into `post_upload_s3.bash` (line 30). Change to a different bucket if needed. -- Also Note: With the new support for multiple file store in the Dataverse Software, DCM requires a store with id="s3" and DCM will only work with this store. - - - Add AWS bucket info to dcmsrv - - Add AWS credentials to ``~/.aws/credentials`` - - - ``[default]`` - - ``aws_access_key_id =`` - - ``aws_secret_access_key =`` - -- Dataverse installation configuration (on dvsrv): - - - Set S3 as the storage driver - - - ``cd /opt/payara5/bin/`` - - ``./asadmin delete-jvm-options "\-Ddataverse.files.storage-driver-id=file"`` - - ``./asadmin create-jvm-options "\-Ddataverse.files.storage-driver-id=s3"`` - - ``./asadmin create-jvm-options "\-Ddataverse.files.s3.type=s3"`` - - ``./asadmin create-jvm-options "\-Ddataverse.files.s3.label=s3"`` - - - - Add AWS bucket info to your Dataverse installation - - Add AWS credentials to ``~/.aws/credentials`` - - - ``[default]`` - - ``aws_access_key_id =`` - - ``aws_secret_access_key =`` - - - Also: set region in ``~/.aws/config`` to create a region file. Add these contents: - - - ``[default]`` - - ``region = us-east-1`` - - - Add the S3 bucket names to your Dataverse installation - - - S3 bucket for your Dataverse installation - - - ``/usr/local/payara5/glassfish/bin/asadmin create-jvm-options "-Ddataverse.files.s3.bucket-name=iqsstestdcmbucket"`` - - - S3 bucket for DCM (as your Dataverse installation needs to do the copy over) - - - ``/usr/local/payara5/glassfish/bin/asadmin create-jvm-options "-Ddataverse.files.dcm-s3-bucket-name=test-dcm"`` - - - Set download method to be HTTP, as DCM downloads through S3 are over this protocol ``curl -X PUT "http://localhost:8080/api/admin/settings/:DownloadMethods" -d "native/http"`` - -Using the DCM Docker Containers -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -For using these commands, you will need to connect to the shell prompt inside various containers (e.g. ``docker exec -it dvsrv /bin/bash``) - -- Create a dataset and download rsync upload script - - - connect to client container: ``docker exec -it dcm_client bash`` - - create dataset: ``cd /mnt ; ./create.bash`` ; this will echo the database ID to stdout - - download transfer script: ``./get_transfer.bash $database_id_from_create_script`` - - execute the transfer script: ``bash ./upload-${database_id_from-create_script}.bash`` , and follow instructions from script. - -- Run script - - - e.g. ``bash ./upload-3.bash`` (``3`` being the database id from earlier commands in this example). - -- Manually run post upload script on dcmsrv - - - for posix implementation: ``docker exec -it dcmsrv /opt/dcm/scn/post_upload.bash`` - - for S3 implementation: ``docker exec -it dcmsrv /opt/dcm/scn/post_upload_s3.bash`` - -Additional DCM docker development tips -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -- You can completely blow away all the docker images with these commands (including non DCM ones!) - - ``docker-compose -f docmer-compose.yml down -v`` - -- There are a few logs to tail - - - dvsrv : ``tail -n 2000 -f /opt/payara5/glassfish/domains/domain1/logs/server.log`` - - dcmsrv : ``tail -n 2000 -f /var/log/lighttpd/breakage.log`` - - dcmsrv : ``tail -n 2000 -f /var/log/lighttpd/access.log`` - -- You may have to restart the app server domain occasionally to deal with memory filling up. If deployment is getting reallllllly slow, its a good time. - Repository Storage Abstraction Layer (RSAL) ------------------------------------------- +Please note: The RSAL feature is deprecated. + Steps to set up a DCM via Docker for Development ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/sphinx-guides/source/developers/classic-dev-env.rst b/doc/sphinx-guides/source/developers/classic-dev-env.rst new file mode 100755 index 00000000000..062a1bb36f3 --- /dev/null +++ b/doc/sphinx-guides/source/developers/classic-dev-env.rst @@ -0,0 +1,266 @@ +======================= +Classic Dev Environment +======================= + +These are the old instructions we used for Dataverse 4 and 5. They should still work but these days we favor running Dataverse in Docker as described in :doc:`dev-environment`. + +These instructions are purposefully opinionated and terse to help you get your development environment up and running as quickly as possible! Please note that familiarity with running commands from the terminal is assumed. + +.. contents:: |toctitle| + :local: + +Quick Start (Docker) +-------------------- + +The quickest way to get Dataverse running is in Docker as explained in :doc:`../container/dev-usage` section of the Container Guide. + + +Classic Dev Environment +----------------------- + +Since before Docker existed, we have encouraged installing Dataverse and all its dependencies directly on your development machine, as described below. This can be thought of as the "classic" development environment for Dataverse. + +However, in 2023 we decided that we'd like to encourage all developers to start using Docker instead and opened https://github.com/IQSS/dataverse/issues/9616 to indicate that we plan to rewrite this page to recommend the use of Docker. + +There's nothing wrong with the classic instructions below and we don't plan to simply delete them. They are a valid alternative to running Dataverse in Docker. We will likely move them to another page. + +Set Up Dependencies +------------------- + +Supported Operating Systems +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Mac OS X or Linux is required because the setup scripts assume the presence of standard Unix utilities. + +Windows is gaining support through Docker as described in the :doc:`windows` section. + +Install Java +~~~~~~~~~~~~ + +The Dataverse Software requires Java 11. + +We suggest downloading OpenJDK from https://adoptopenjdk.net + +On Linux, you are welcome to use the OpenJDK available from package managers. + +Install Netbeans or Maven +~~~~~~~~~~~~~~~~~~~~~~~~~ + +NetBeans IDE is recommended, and can be downloaded from http://netbeans.org . Developers may use any editor or IDE. We recommend NetBeans because it is free, works cross platform, has good support for Jakarta EE projects, and includes a required build tool, Maven. + +Below we describe how to build the Dataverse Software war file with Netbeans but if you prefer to use only Maven, you can find installation instructions in the :doc:`tools` section. + +Install Homebrew (Mac Only) +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +On Mac, install Homebrew to simplify the steps below: https://brew.sh + +Clone the Dataverse Software Git Repo +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Fork https://github.com/IQSS/dataverse and then clone your fork like this: + +``git clone git@github.com:[YOUR GITHUB USERNAME]/dataverse.git`` + +Build the Dataverse Software War File +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If you installed Netbeans, follow these steps: + +- Launch Netbeans and click "File" and then "Open Project". Navigate to where you put the Dataverse Software code and double-click "Dataverse" to open the project. +- If you see "resolve project problems," go ahead and let Netbeans try to resolve them. This will probably including downloading dependencies, which can take a while. +- Allow Netbeans to install nb-javac (required for Java 8 and below). +- Select "Dataverse" under Projects and click "Run" in the menu and then "Build Project (Dataverse)". Check back for "BUILD SUCCESS" at the end. + +If you installed Maven instead of Netbeans, run ``mvn package``. Check for "BUILD SUCCESS" at the end. + +NOTE: Do you use a locale different than ``en_US.UTF-8`` on your development machine? Are you in a different timezone +than Harvard (Eastern Time)? You might experience issues while running tests that were written with these settings +in mind. The Maven ``pom.xml`` tries to handle this for you by setting the locale to ``en_US.UTF-8`` and timezone +``UTC``, but more, not yet discovered building or testing problems might lurk in the shadows. + +Install jq +~~~~~~~~~~ + +On Mac, run this command: + +``brew install jq`` + +On Linux, install ``jq`` from your package manager or download a binary from http://stedolan.github.io/jq/ + +Install Payara +~~~~~~~~~~~~~~ + +Payara 6.2023.8 or higher is required. + +To install Payara, run the following commands: + +``cd /usr/local`` + +``sudo curl -O -L https://nexus.payara.fish/repository/payara-community/fish/payara/distributions/payara/6.2023.8/payara-6.2023.8.zip`` + +``sudo unzip payara-6.2023.8.zip`` + +``sudo chown -R $USER /usr/local/payara6`` + +If nexus.payara.fish is ever down for maintenance, Payara distributions are also available from https://repo1.maven.org/maven2/fish/payara/distributions/payara/ + +Install Service Dependencies Directly on localhost +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Install PostgreSQL +^^^^^^^^^^^^^^^^^^ + +The Dataverse Software has been tested with PostgreSQL versions up to 13. PostgreSQL version 10+ is required. + +On Mac, go to https://www.postgresql.org/download/macosx/ and choose "Interactive installer by EDB" option. Note that version 13.5 is used in the command line examples below, but the process should be similar for other versions. When prompted to set a password for the "database superuser (postgres)" just enter "password". + +After installation is complete, make a backup of the ``pg_hba.conf`` file like this: + +``sudo cp /Library/PostgreSQL/13/data/pg_hba.conf /Library/PostgreSQL/13/data/pg_hba.conf.orig`` + +Then edit ``pg_hba.conf`` with an editor such as vi: + +``sudo vi /Library/PostgreSQL/13/data/pg_hba.conf`` + +In the "METHOD" column, change all instances of "scram-sha-256" (or whatever is in that column) to "trust". This will make it so PostgreSQL doesn't require a password. + +In the Finder, click "Applications" then "PostgreSQL 13" and launch the "Reload Configuration" app. Click "OK" after you see "server signaled". + +Next, to confirm the edit worked, launch the "pgAdmin" application from the same folder. Under "Browser", expand "Servers" and double click "PostgreSQL 13". When you are prompted for a password, leave it blank and click "OK". If you have successfully edited "pg_hba.conf", you can get in without a password. + +On Linux, you should just install PostgreSQL using your favorite package manager, such as ``yum``. (Consult the PostgreSQL section of :doc:`/installation/prerequisites` in the main Installation guide for more info and command line examples). Find ``pg_hba.conf`` and set the authentication method to "trust" and restart PostgreSQL. + +Install Solr +^^^^^^^^^^^^ + +`Solr `_ 9.3.0 is required. + +To install Solr, execute the following commands: + +``sudo mkdir /usr/local/solr`` + +``sudo chown $USER /usr/local/solr`` + +``cd /usr/local/solr`` + +``curl -O http://archive.apache.org/dist/solr/solr/9.3.0/solr-9.3.0.tgz`` + +``tar xvfz solr-9.3.0.tgz`` + +``cd solr-9.3.0/server/solr`` + +``cp -r configsets/_default collection1`` + +``curl -O https://raw.githubusercontent.com/IQSS/dataverse/develop/conf/solr/9.3.0/schema.xml`` + +``curl -O https://raw.githubusercontent.com/IQSS/dataverse/develop/conf/solr/9.3.0/schema_dv_mdb_fields.xml`` + +``mv schema*.xml collection1/conf`` + +``curl -O https://raw.githubusercontent.com/IQSS/dataverse/develop/conf/solr/9.3.0/solrconfig.xml`` + +``mv solrconfig.xml collection1/conf/solrconfig.xml`` + +``cd /usr/local/solr/solr-9.3.0`` + +(Please note that the extra jetty argument below is a security measure to limit connections to Solr to only your computer. For extra security, run a firewall.) + +``bin/solr start -j "-Djetty.host=127.0.0.1"`` + +``bin/solr create_core -c collection1 -d server/solr/collection1/conf`` + +Install Service Dependencies Using Docker Compose +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +To avoid having to install service dependencies like PostgreSQL or Solr directly on your localhost, there is the alternative of using the ``docker-compose-dev.yml`` file available in the repository root. For this option you need to have Docker and Docker Compose installed on your machine. + +The ``docker-compose-dev.yml`` can be configured to only run the service dependencies necessary to support a Dataverse installation running directly on localhost. In addition to PostgreSQL and Solr, it also runs a SMTP server. + +Before running the Docker Compose file, you need to update the value of the ``DATAVERSE_DB_USER`` environment variable to ``postgres``. The variable can be found inside the ``.env`` file in the repository root. This step is required as the Dataverse installation script expects that database user. + +To run the Docker Compose file, go to the Dataverse repository root, then run: + +``docker-compose -f docker-compose-dev.yml up -d --scale dev_dataverse=0`` + +Note that this command omits the Dataverse container defined in the Docker Compose file, since Dataverse is going to be installed directly on localhost in the next section. + +The command runs the containers in detached mode, but if you want to run them attached and thus view container logs in real time, remove the ``-d`` option from the command. + +Data volumes of each dependency will be persisted inside the ``docker-dev-volumes`` folder, inside the repository root. + +If you want to stop the containers, then run (for detached mode only, otherwise use ``Ctrl + C``): + +``docker-compose -f docker-compose-dev.yml stop`` + +If you want to remove the containers, then run: + +``docker-compose -f docker-compose-dev.yml down`` + +If you want to run a single container (the mail server, for example) then run: + +``docker-compose -f docker-compose-dev.yml up dev_smtp`` + +For a fresh installation, and before running the Software Installer Script, it is recommended to delete the docker-dev-env folder to avoid installation problems due to existing data in the containers. + +Run the Dataverse Software Installer Script +------------------------------------------- + +Navigate to the directory where you cloned the Dataverse Software git repo change directories to the ``scripts/installer`` directory like this: + +``cd scripts/installer`` + +Create a Python virtual environment, activate it, then install dependencies: + +``python3 -m venv venv`` + +``source venv/bin/activate`` + +``pip install psycopg2-binary`` + +The installer will try to connect to the SMTP server you tell it to use. If you haven't used the Docker Compose option for setting up the dependencies, or you don't have a mail server handy, you can run ``nc -l 25`` in another terminal and choose "localhost" (the default) to get past this check. + +Finally, run the installer (see also :download:`README_python.txt <../../../../scripts/installer/README_python.txt>` if necessary): + +``python3 install.py`` + +Verify the Dataverse Software is Running +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +After the script has finished, you should be able to log into your Dataverse installation with the following credentials: + +- http://localhost:8080 +- username: dataverseAdmin +- password: admin + +Configure Your Development Environment for Publishing +----------------------------------------------------- + +Run the following command: + +``curl http://localhost:8080/api/admin/settings/:DoiProvider -X PUT -d FAKE`` + +This will disable DOI registration by using a fake (in-code) DOI provider. Please note that this feature is only available in Dataverse Software 4.10+ and that at present, the UI will give no indication that the DOIs thus minted are fake. + +Developers may also wish to consider using :ref:`PermaLinks ` + +Configure Your Development Environment for GUI Edits +---------------------------------------------------- + +Out of the box, a JSF setting is configured for production use and prevents edits to the GUI (xhtml files) from being visible unless you do a full deployment. + +It is recommended that you run the following command so that simply saving the xhtml file in Netbeans is enough for the change to show up. + +``asadmin create-system-properties "dataverse.jsf.refresh-period=1"`` + +For more on JSF settings like this, see :ref:`jsf-config`. + +Next Steps +---------- + +If you can log in to the Dataverse installation, great! If not, please see the :doc:`troubleshooting` section. For further assistance, please see "Getting Help" in the :doc:`intro` section. + +You're almost ready to start hacking on code. Now that the installer script has you up and running, you need to continue on to the :doc:`tips` section to get set up to deploy code from your IDE or the command line. + +---- + +Previous: :doc:`intro` | Next: :doc:`tips` diff --git a/doc/sphinx-guides/source/developers/configuration.rst b/doc/sphinx-guides/source/developers/configuration.rst index fb15fea7900..d342c28efc6 100644 --- a/doc/sphinx-guides/source/developers/configuration.rst +++ b/doc/sphinx-guides/source/developers/configuration.rst @@ -93,6 +93,7 @@ sub-scopes first. - All sub-scopes are below that. - Scopes are separated by dots (periods). - A scope may be a placeholder, filled with a variable during lookup. (Named object mapping.) +- The setting should be in kebab case (``signing-secret``) rather than camel case (``signingSecret``). Any consumer of the setting can choose to use one of the fluent ``lookup()`` methods, which hides away alias handling, conversion etc from consuming code. See also the detailed Javadoc for these methods. @@ -109,3 +110,17 @@ always like ``dataverse..newname...=old.property.name``. Note this d aliases. Details can be found in ``edu.harvard.iq.dataverse.settings.source.AliasConfigSource`` + +Adding a Feature Flag +^^^^^^^^^^^^^^^^^^^^^ + +Some parts of our codebase might be opt-in only. Experimental or optional feature previews can be switched on using our +usual configuration mechanism, a JVM setting. + +Feature flags are implemented in the enumeration ``edu.harvard.iq.dataverse.settings.FeatureFlags``, which allows for +convenient usage of it anywhere in the codebase. When adding a flag, please add it to the enum, think of a default +status, add some Javadocs about the flagged feature and add a ``@since`` tag to make it easier to identify when a flag +has been introduced. + +We want to maintain a list of all :ref:`feature flags ` in the :ref:`configuration guide `, +please add yours to the list. \ No newline at end of file diff --git a/doc/sphinx-guides/source/developers/containers.rst b/doc/sphinx-guides/source/developers/containers.rst index 64c7710f0f5..175b178b455 100755 --- a/doc/sphinx-guides/source/developers/containers.rst +++ b/doc/sphinx-guides/source/developers/containers.rst @@ -2,15 +2,33 @@ Docker, Kubernetes, and Containers ================================== -The Dataverse Community is exploring the use of Docker, Kubernetes, and other container-related technologies. The primary community-lead projects to watch are: +The Dataverse community is exploring the use of Docker, Kubernetes, and other container-related technologies. + +.. contents:: |toctitle| + :local: + +Container Guide +--------------- + +We recommend starting with the :doc:`/container/index`. The core Dataverse development team, with lots of help from the community, is iterating on containerizing the Dataverse software and its dependencies there. + +Help Containerize Dataverse +--------------------------- + +If you would like to contribute to the containerization effort, please consider joining the `Containerization Working Group `_. + +Community-Lead Projects +----------------------- + +The primary community-lead projects (which the core team is drawing inspiration from!) are: -- https://github.com/IQSS/dataverse-kubernetes - https://github.com/IQSS/dataverse-docker +- https://github.com/IQSS/dataverse-kubernetes (especially the https://github.com/EOSC-synergy/dataverse-kubernetes fork) -The :doc:`testing` section mentions using Docker for integration tests. +Using Containers for Reproducible Research +------------------------------------------ -.. contents:: |toctitle| - :local: +Please see :ref:`research-code` in the User Guide for this related topic. ---- diff --git a/doc/sphinx-guides/source/developers/dataset-semantic-metadata-api.rst b/doc/sphinx-guides/source/developers/dataset-semantic-metadata-api.rst index 0d16a299fce..ded62288eb2 100644 --- a/doc/sphinx-guides/source/developers/dataset-semantic-metadata-api.rst +++ b/doc/sphinx-guides/source/developers/dataset-semantic-metadata-api.rst @@ -36,6 +36,8 @@ To get the json-ld formatted metadata for a Dataset, specify the Dataset ID (DAT You should expect a 200 ("OK") response and JSON-LD mirroring the OAI-ORE representation in the returned 'data' object. +.. _add-semantic-metadata: + Add Dataset Metadata -------------------- @@ -77,7 +79,7 @@ To delete metadata for a Dataset, send a json-ld representation of the fields to curl -X PUT -H X-Dataverse-key:$API_TOKEN -H 'Content-Type: application/ld+json' -d '{"https://dataverse.org/schema/core#restrictions":"No restrictions"}' "$SERVER_URL/api/datasets/:persistentId/metadata/delete?persistentId=$DATASET_PID" -Note, this example uses the term URI directly rather than adding an '@context' element. You can use either form in any of these API calls. +Note, this example uses the term URI directly rather than adding an ``@context`` element. You can use either form in any of these API calls. You should expect a 200 ("OK") response indicating whether a draft Dataset version was created or an existing draft was updated. diff --git a/doc/sphinx-guides/source/developers/debugging.rst b/doc/sphinx-guides/source/developers/debugging.rst index 2088afe5521..50e8901b1ff 100644 --- a/doc/sphinx-guides/source/developers/debugging.rst +++ b/doc/sphinx-guides/source/developers/debugging.rst @@ -20,8 +20,8 @@ during development without recompiling. Changing the options will require at lea how you get these options in. (Variable substitution only happens during deployment and when using system properties or environment variables, you'll need to pass these into the domain, which usually will require an app server restart.) -Please note that since Payara 5.2021.1 supporting MicroProfile Config 2.0, you can -`use profiles `_ +Please note you can use +`MicroProfile Config `_ to maintain your settings more easily for different environments. .. list-table:: diff --git a/doc/sphinx-guides/source/developers/dependencies.rst b/doc/sphinx-guides/source/developers/dependencies.rst index 65edfa3ffac..0208c49f90a 100644 --- a/doc/sphinx-guides/source/developers/dependencies.rst +++ b/doc/sphinx-guides/source/developers/dependencies.rst @@ -344,8 +344,7 @@ Repositories ------------ Maven receives all dependencies from *repositories*. These can be public like `Maven Central `_ -and others, but you can also use a private repository on premises or in the cloud. Last but not least, you can use -local repositories, which can live next to your application code (see ``local_lib`` dir within the Dataverse Software codebase). +and others, but you can also use a private repository on premises or in the cloud. Repositories are defined within the Dataverse Software POM like this: @@ -364,11 +363,6 @@ Repositories are defined within the Dataverse Software POM like this: http://repository.primefaces.org default - - dvn.private - Local repository for hosting jars not available from network repositories. - file://${project.basedir}/local_lib - You can also add repositories to your local Maven settings, see `docs `_. diff --git a/doc/sphinx-guides/source/developers/deployment.rst b/doc/sphinx-guides/source/developers/deployment.rst index 84b821360be..045b0d0abbc 100755 --- a/doc/sphinx-guides/source/developers/deployment.rst +++ b/doc/sphinx-guides/source/developers/deployment.rst @@ -40,10 +40,10 @@ After all this, you can try the "version" command again. Note that it's possible to add an ``export`` line like the one above to your ``~/.bash_profile`` file so you don't have to run it yourself when you open a new terminal. -Configure AWS CLI -~~~~~~~~~~~~~~~~~ +Configure AWS CLI with Stored Credentials +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Next you need to configure AWS CLI. +Dataverse can access S3 using credentials stored as described below, or using an IAM role described a little further below. Create a ``.aws`` directory in your home directory (which is called ``~``) like this: @@ -70,6 +70,11 @@ Then update the file and replace the values for "aws_access_key_id" and "aws_sec If you are having trouble configuring the files manually as described above, see https://docs.aws.amazon.com/cli/latest/userguide/cli-chap-getting-started.html which documents the ``aws configure`` command. +Configure Role-Based S3 Access +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Amazon offers instructions on using an IAM role to grant permissions to applications running in EC2 at https://docs.aws.amazon.com/IAM/latest/UserGuide/id_roles_use_switch-role-ec2.html + Configure Ansible File (Optional) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/sphinx-guides/source/developers/dev-environment.rst b/doc/sphinx-guides/source/developers/dev-environment.rst index e44a70a405f..1301994cc82 100755 --- a/doc/sphinx-guides/source/developers/dev-environment.rst +++ b/doc/sphinx-guides/source/developers/dev-environment.rst @@ -2,214 +2,81 @@ Development Environment ======================= -These instructions are purposefully opinionated and terse to help you get your development environment up and running as quickly as possible! Please note that familiarity with running commands from the terminal is assumed. +These instructions are oriented around Docker but the "classic" instructions we used for Dataverse 4 and 5 are still available at :doc:`classic-dev-env`. .. contents:: |toctitle| :local: -Quick Start ------------ +.. _container-dev-quickstart: -The quickest way to get the Dataverse Software running is to use Vagrant as described in the :doc:`tools` section, but for day to day development work, we recommended the following setup. +Quickstart +---------- + +First, install Java 17, Maven, and Docker. -Set Up Dependencies -------------------- +After cloning the `dataverse repo `_, run this: -Supported Operating Systems -~~~~~~~~~~~~~~~~~~~~~~~~~~~ +``mvn -Pct clean package docker:run`` -Mac OS X or Linux is required because the setup scripts assume the presence of standard Unix utilities. +After some time you should be able to log in: -Windows is not well supported, unfortunately, but Vagrant and Minishift environments are described in the :doc:`windows` section. +- url: http://localhost:8080 +- username: dataverseAdmin +- password: admin1 + +Detailed Steps +-------------- Install Java ~~~~~~~~~~~~ -The Dataverse Software requires Java 11. +The Dataverse Software requires Java 17. -We suggest downloading OpenJDK from https://adoptopenjdk.net +On Mac and Windows, we suggest downloading OpenJDK from https://adoptium.net (formerly `AdoptOpenJDK `_) or `SDKMAN `_. On Linux, you are welcome to use the OpenJDK available from package managers. -Install Netbeans or Maven -~~~~~~~~~~~~~~~~~~~~~~~~~ +Install Maven +~~~~~~~~~~~~~ -NetBeans IDE is recommended, and can be downloaded from http://netbeans.org . Developers may use any editor or IDE. We recommend NetBeans because it is free, works cross platform, has good support for Jakarta EE projects, and includes a required build tool, Maven. +Follow instructions at https://maven.apache.org -Below we describe how to build the Dataverse Software war file with Netbeans but if you prefer to use only Maven, you can find installation instructions in the :doc:`tools` section. +Install and Start Docker +~~~~~~~~~~~~~~~~~~~~~~~~ -Install Homebrew (Mac Only) -~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Follow instructions at https://www.docker.com -On Mac, install Homebrew to simplify the steps below: https://brew.sh +Be sure to start Docker. -Clone the Dataverse Software Git Repo -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Git Clone Repo +~~~~~~~~~~~~~~ Fork https://github.com/IQSS/dataverse and then clone your fork like this: ``git clone git@github.com:[YOUR GITHUB USERNAME]/dataverse.git`` -Build the Dataverse Software War File -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -If you installed Netbeans, follow these steps: - -- Launch Netbeans and click "File" and then "Open Project". Navigate to where you put the Dataverse Software code and double-click "Dataverse" to open the project. -- If you see "resolve project problems," go ahead and let Netbeans try to resolve them. This will probably including downloading dependencies, which can take a while. -- Allow Netbeans to install nb-javac (required for Java 8 and below). -- Select "Dataverse" under Projects and click "Run" in the menu and then "Build Project (Dataverse)". Check back for "BUILD SUCCESS" at the end. - -If you installed Maven instead of Netbeans, run ``mvn package``. Check for "BUILD SUCCESS" at the end. - -NOTE: Do you use a locale different than ``en_US.UTF-8`` on your development machine? Are you in a different timezone -than Harvard (Eastern Time)? You might experience issues while running tests that were written with these settings -in mind. The Maven ``pom.xml`` tries to handle this for you by setting the locale to ``en_US.UTF-8`` and timezone -``UTC``, but more, not yet discovered building or testing problems might lurk in the shadows. - -Install jq -~~~~~~~~~~ - -On Mac, run this command: - -``brew install jq`` - -On Linux, install ``jq`` from your package manager or download a binary from http://stedolan.github.io/jq/ - -Install Payara -~~~~~~~~~~~~~~ - -Payara 5.2022.3 or higher is required. - -To install Payara, run the following commands: - -``cd /usr/local`` - -``sudo curl -O -L https://s3-eu-west-1.amazonaws.com/payara.fish/Payara+Downloads/5.2022.3/payara-5.2022.3.zip`` - -``sudo unzip payara-5.2022.3.zip`` - -``sudo chown -R $USER /usr/local/payara5`` - -Install PostgreSQL -~~~~~~~~~~~~~~~~~~ - -The Dataverse Software has been tested with PostgreSQL versions up to 13. PostgreSQL version 10+ is required. - -On Mac, go to https://www.postgresql.org/download/macosx/ and choose "Interactive installer by EDB" option. Note that version 13.5 is used in the command line examples below, but the process should be similar for other versions. When prompted to set a password for the "database superuser (postgres)" just enter "password". - -After installation is complete, make a backup of the ``pg_hba.conf`` file like this: - -``sudo cp /Library/PostgreSQL/13/data/pg_hba.conf /Library/PostgreSQL/13/data/pg_hba.conf.orig`` - -Then edit ``pg_hba.conf`` with an editor such as vi: - -``sudo vi /Library/PostgreSQL/13/data/pg_hba.conf`` - -In the "METHOD" column, change all instances of "scram-sha-256" (or whatever is in that column) to "trust". This will make it so PostgreSQL doesn't require a password. - -In the Finder, click "Applications" then "PostgreSQL 13" and launch the "Reload Configuration" app. Click "OK" after you see "server signaled". - -Next, to confirm the edit worked, launch the "pgAdmin" application from the same folder. Under "Browser", expand "Servers" and double click "PostgreSQL 13". When you are prompted for a password, leave it blank and click "OK". If you have successfully edited "pg_hba.conf", you can get in without a password. - -On Linux, you should just install PostgreSQL using your favorite package manager, such as ``yum``. (Consult the PostgreSQL section of :doc:`/installation/prerequisites` in the main Installation guide for more info and command line examples). Find ``pg_hba.conf`` and set the authentication method to "trust" and restart PostgreSQL. - -Install Solr -~~~~~~~~~~~~ - -`Solr `_ 8.11.1 is required. - -To install Solr, execute the following commands: +Build and Run +~~~~~~~~~~~~~ -``sudo mkdir /usr/local/solr`` +Change into the ``dataverse`` directory you just cloned and run the following command: -``sudo chown $USER /usr/local/solr`` +``mvn -Pct clean package docker:run`` -``cd /usr/local/solr`` +Verify +~~~~~~ -``curl -O http://archive.apache.org/dist/lucene/solr/8.11.1/solr-8.11.1.tgz`` +After some time you should be able to log in: -``tar xvfz solr-8.11.1.tgz`` - -``cd solr-8.11.1/server/solr`` - -``cp -r configsets/_default collection1`` - -``curl -O https://raw.githubusercontent.com/IQSS/dataverse/develop/conf/solr/8.11.1/schema.xml`` - -``curl -O https://raw.githubusercontent.com/IQSS/dataverse/develop/conf/solr/8.11.1/schema_dv_mdb_fields.xml`` - -``mv schema*.xml collection1/conf`` - -``curl -O https://raw.githubusercontent.com/IQSS/dataverse/develop/conf/solr/8.11.1/solrconfig.xml`` - -``mv solrconfig.xml collection1/conf/solrconfig.xml`` - -``cd /usr/local/solr/solr-8.11.1`` - -(Please note that the extra jetty argument below is a security measure to limit connections to Solr to only your computer. For extra security, run a firewall.) - -``bin/solr start -j "-Djetty.host=127.0.0.1"`` - -``bin/solr create_core -c collection1 -d server/solr/collection1/conf`` - -Run the Dataverse Software Installer Script -------------------------------------------- - -Navigate to the directory where you cloned the Dataverse Software git repo change directories to the ``scripts/installer`` directory like this: - -``cd scripts/installer`` - -Create a Python virtual environment, activate it, then install dependencies: - -``python3 -m venv venv`` - -``source venv/bin/activate`` - -``pip install psycopg2-binary`` - -The installer will try to connect to the SMTP server you tell it to use. If you don't have a mail server handy you can run ``nc -l 25`` in another terminal and choose "localhost" (the default) to get past this check. - -Finally, run the installer (see also :download:`README_python.txt <../../../../scripts/installer/README_python.txt>` if necessary): - -``python3 install.py`` - -Verify the Dataverse Software is Running -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -After the script has finished, you should be able to log into your Dataverse installation with the following credentials: - -- http://localhost:8080 +- url: http://localhost:8080 - username: dataverseAdmin -- password: admin - -Configure Your Development Environment for Publishing ------------------------------------------------------ - -Run the following command: - -``curl http://localhost:8080/api/admin/settings/:DoiProvider -X PUT -d FAKE`` - -This will disable DOI registration by using a fake (in-code) DOI provider. Please note that this feature is only available in Dataverse Software 4.10+ and that at present, the UI will give no indication that the DOIs thus minted are fake. - -Configure Your Development Environment for GUI Edits ----------------------------------------------------- - -Out of the box, a JSF setting is configured for production use and prevents edits to the GUI (xhtml files) from being visible unless you do a full deployment. - -It is recommended that you run the following command so that simply saving the xhtml file in Netbeans is enough for the change to show up. - -``asadmin create-system-properties "dataverse.jsf.refresh-period=1"`` - -For more on JSF settings like this, see :ref:`jsf-config`. - -Next Steps ----------- +- password: admin1 -If you can log in to the Dataverse installation, great! If not, please see the :doc:`troubleshooting` section. For further assistance, please see "Getting Help" in the :doc:`intro` section. +More Information +---------------- -You're almost ready to start hacking on code. Now that the installer script has you up and running, you need to continue on to the :doc:`tips` section to get set up to deploy code from your IDE or the command line. +See also the :doc:`/container/dev-usage` section of the Container Guide. ----- +Getting Help +------------ -Previous: :doc:`intro` | Next: :doc:`tips` +Please feel free to reach out at https://chat.dataverse.org or https://groups.google.com/g/dataverse-dev if you have any difficulty setting up a dev environment! diff --git a/doc/sphinx-guides/source/developers/documentation.rst b/doc/sphinx-guides/source/developers/documentation.rst index b20fd112533..f0729c59dcf 100755 --- a/doc/sphinx-guides/source/developers/documentation.rst +++ b/doc/sphinx-guides/source/developers/documentation.rst @@ -22,6 +22,8 @@ That's it! Thank you for your contribution! Your pull request will be added manu Please see https://github.com/IQSS/dataverse/pull/5857 for an example of a quick fix that was merged (the "Files changed" tab shows how a typo was fixed). +Preview your documentation changes which will be built automatically as part of your pull request in Github. It will show up as a check entitled: `docs/readthedocs.org:dataverse-guide — Read the Docs build succeeded!`. For example, this PR built to https://dataverse-guide--9249.org.readthedocs.build/en/9249/. + If you would like to read more about the Dataverse Project's use of GitHub, please see the :doc:`version-control` section. For bug fixes and features we request that you create an issue before making a pull request but this is not at all necessary for quick fixes to the documentation. .. _admin: https://github.com/IQSS/dataverse/tree/develop/doc/sphinx-guides/source/admin @@ -34,7 +36,9 @@ If you would like to read more about the Dataverse Project's use of GitHub, plea Building the Guides with Sphinx ------------------------------- -The Dataverse guides are written using Sphinx (http://sphinx-doc.org). We recommend installing Sphinx and building the guides locally so you can get an accurate preview of your changes. +The Dataverse guides are written using Sphinx (http://sphinx-doc.org). We recommend installing Sphinx on your localhost or using a Sphinx Docker container to build the guides locally so you can get an accurate preview of your changes. + +In case you decide to use a Sphinx Docker container to build the guides, you can skip the next two installation sections, but you will need to have Docker installed. Installing Sphinx ~~~~~~~~~~~~~~~~~ @@ -70,7 +74,12 @@ To edit the existing documentation: - In ``doc/sphinx-guides/source`` you will find the .rst files that correspond to http://guides.dataverse.org. - Using your preferred text editor, open and edit the necessary files, or create new ones. -Once you are done, open a terminal, change directories to ``doc/sphinx-guides``, activate (or reactivate) your Python virtual environment, and build the guides. +Once you are done, you can preview the changes by building the guides locally. As explained, you can build the guides with Sphinx locally installed, or with a Docker container. + +Building the Guides with Sphinx Locally Installed +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Open a terminal, change directories to ``doc/sphinx-guides``, activate (or reactivate) your Python virtual environment, and build the guides. ``cd doc/sphinx-guides`` @@ -80,6 +89,16 @@ Once you are done, open a terminal, change directories to ``doc/sphinx-guides``, ``make html`` +Building the Guides with a Sphinx Docker Container +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +If you want to build the guides using a Docker container, execute the following command in the repository root: + +``docker run -it --rm -v $(pwd):/docs sphinxdoc/sphinx:3.5.4 bash -c "cd doc/sphinx-guides && pip3 install -r requirements.txt && make html"`` + +Previewing the Guides +^^^^^^^^^^^^^^^^^^^^^ + After Sphinx is done processing the files you should notice that the ``html`` folder in ``doc/sphinx-guides/build`` directory has been updated. You can click on the files in the ``html`` folder to preview the changes. @@ -122,6 +141,25 @@ In order to make it clear to the crawlers that we only want the latest version d Allow: /en/latest/ Disallow: /en/ +PDF Version of the Guides +------------------------- + +The HTML version of the guides is the official one. Any other formats are maintained on a best effort basis. + +If you would like to build a PDF version of the guides and have Docker installed, please try the command below from the root of the git repo: + +``docker run -it --rm -v $(pwd):/docs sphinxdoc/sphinx-latexpdf:3.5.4 bash -c "cd doc/sphinx-guides && pip3 install -r requirements.txt && make latexpdf LATEXMKOPTS=\"-interaction=nonstopmode\"; cd ../.. && ls -1 doc/sphinx-guides/build/latex/Dataverse.pdf"`` + +A few notes about the command above: + +- Hopefully the PDF was created at ``doc/sphinx-guides/build/latex/Dataverse.pdf``. +- For now, we are using "nonstopmode" but this masks some errors. +- See requirements.txt for a note regarding the version of Sphinx we are using. + +Also, as of this writing we have enabled PDF builds from the "develop" branch. You download the PDF from http://preview.guides.gdcc.io/_/downloads/en/develop/pdf/ + +If you would like to help improve the PDF version of the guides, please get in touch! Please see :ref:`getting-help-developers` for ways to contact the developer community. + ---- Previous: :doc:`testing` | Next: :doc:`dependencies` diff --git a/doc/sphinx-guides/source/developers/index.rst b/doc/sphinx-guides/source/developers/index.rst index bf525422c84..3ac9e955ea2 100755 --- a/doc/sphinx-guides/source/developers/index.rst +++ b/doc/sphinx-guides/source/developers/index.rst @@ -4,7 +4,7 @@ contain the root `toctree` directive. Developer Guide -======================================================= +=============== **Contents:** @@ -19,6 +19,7 @@ Developer Guide sql-upgrade-scripts testing documentation + security dependencies debugging coding-style @@ -26,6 +27,8 @@ Developer Guide deployment containers making-releases + making-library-releases + metadataexport tools unf/index make-data-count @@ -39,4 +42,5 @@ Developer Guide dataset-migration-api workflows fontcustom + classic-dev-env diff --git a/doc/sphinx-guides/source/developers/intro.rst b/doc/sphinx-guides/source/developers/intro.rst index 7f4e8c1ba34..4a64c407fc1 100755 --- a/doc/sphinx-guides/source/developers/intro.rst +++ b/doc/sphinx-guides/source/developers/intro.rst @@ -52,7 +52,9 @@ Related Guides If you are a developer who wants to make use of the Dataverse Software APIs, please see the :doc:`/api/index`. If you have front-end UI questions, please see the :doc:`/style/index`. -If you are a sysadmin who likes to code, you may be interested in hacking on installation scripts mentioned in the :doc:`/installation/index`. We validate the installation scripts with :doc:`/developers/tools` such as `Vagrant `_ and Docker (see the :doc:`containers` section). +If you are a sysadmin who likes to code, you may be interested in hacking on installation scripts mentioned in the :doc:`/installation/index`. + +If you are a Docker enthusiasts, please check out the :doc:`/container/index`. Related Projects ---------------- diff --git a/doc/sphinx-guides/source/developers/make-data-count.rst b/doc/sphinx-guides/source/developers/make-data-count.rst index a3c0d10dc5e..8eaa5c0d7f8 100644 --- a/doc/sphinx-guides/source/developers/make-data-count.rst +++ b/doc/sphinx-guides/source/developers/make-data-count.rst @@ -30,15 +30,13 @@ Full Setup The recommended way to work on the Make Data Count feature is to spin up an EC2 instance that has both the Dataverse Software and Counter Processor installed. Go to the :doc:`deployment` page for details on how to spin up an EC2 instance and make sure that your Ansible file is configured to install Counter Processor before running the "create" script. -(Alternatively, you can try installing Counter Processor in Vagrant. :download:`setup-counter-processor.sh <../../../../scripts/vagrant/setup-counter-processor.sh>` might help you get it installed.) - After you have spun to your EC2 instance, set ``:MDCLogPath`` so that the Dataverse installation creates a log for Counter Processor to operate on. For more on this database setting, see the :doc:`/installation/config` section of the Installation Guide. Next you need to have the Dataverse installation add some entries to the log that Counter Processor will operate on. To do this, click on some published datasets and download some files. -Next you should run Counter Processor to convert the log into a SUSHI report, which is in JSON format. Before running Counter Processor, you need to put a configuration file into place. As a starting point use :download:`counter-processor-config.yaml <../../../../scripts/vagrant/counter-processor-config.yaml>` and edit the file, paying particular attention to the following settings: +Next you should run Counter Processor to convert the log into a SUSHI report, which is in JSON format. Before running Counter Processor, you need to put a configuration file into place. As a starting point use :download:`counter-processor-config.yaml <../_static/developers/counter-processor-config.yaml>` and edit the file, paying particular attention to the following settings: -- ``log_name_pattern`` You might want something like ``/usr/local/payara5/glassfish/domains/domain1/logs/counter_(yyyy-mm-dd).log`` +- ``log_name_pattern`` You might want something like ``/usr/local/payara6/glassfish/domains/domain1/logs/counter_(yyyy-mm-dd).log`` - ``year_month`` You should probably set this to the current month. - ``output_file`` This needs to be a directory that the "dataverse" Unix user can read but that the "counter" user can write to. In dev, you can probably get away with "/tmp" as the directory. - ``platform`` Out of the box from Counter Processor this is set to ``Dash`` but this should be changed to match the name of your Dataverse installation. Examples are "Harvard Dataverse Repository" for Harvard University or "LibraData" for the University of Virginia. diff --git a/doc/sphinx-guides/source/developers/making-library-releases.rst b/doc/sphinx-guides/source/developers/making-library-releases.rst new file mode 100755 index 00000000000..63b6eeb1c2a --- /dev/null +++ b/doc/sphinx-guides/source/developers/making-library-releases.rst @@ -0,0 +1,93 @@ +======================= +Making Library Releases +======================= + +.. contents:: |toctitle| + :local: + +Introduction +------------ + +Note: See :doc:`making-releases` for Dataverse itself. + +We release Java libraries to Maven Central that are used by Dataverse (and perhaps `other `_ `software `_!): + +- https://central.sonatype.com/namespace/org.dataverse +- https://central.sonatype.com/namespace/io.gdcc + +We release JavaScript/TypeScript libraries to npm: + +- https://www.npmjs.com/package/@iqss/dataverse-design-system + +Maven Central (Java) +-------------------- + +From the perspective of the Maven Central, we are both `producers `_ because we publish/release libraries there and `consumers `_ because we pull down those libraries (and many others) when we build Dataverse. + +Releasing Existing Libraries to Maven Central +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +If you need to release an existing library, all the setup should be done already. The steps below assume that GitHub Actions are in place to do the heavy lifting for you, such as signing artifacts with GPG. + +Releasing a Snapshot Version to Maven Central +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +`Snapshot `_ releases are published automatically through GitHub Actions (e.g. through a `snapshot workflow `_ for the SWORD library) every time a pull request is merged (or the default branch, typically ``main``, is otherwise updated). + +That is to say, to make a snapshot release, you only need to get one or more commits into the default branch. + +Releasing a Release (Non-Snapshot) Version to Maven Central +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +From a pom.xml it may not be apparent that snapshots like ``6.0-SNAPSHOT`` might be changing under your feet. Browsing the snapshot repository (e.g. our `UNF 6.0-SNAPSHOT `_), may reveal versions changing over time. To finalize the code and stop it from changing, we publish/release what Maven calls a "`release version `_". This will remove ``-SNAPSHOT`` from the version (through an ``mvn`` command). + +Non-snapshot releases (`release `_ versions) are published automatically through GitHub Actions (e.g. through a `release workflow `_), kicked off locally by an ``mvn`` command that invokes the `Maven Release Plugin `_. + +First, run a clean: + +``mvn release:clean`` + +Then run a prepare: + +``mvn release:prepare`` + +The prepare step is interactive. You will be prompted for the following information: + +- the release version (e.g. `2.0.0 `_) +- the git tag to create and push (e.g. `sword2-server-2.0.0 `_) +- the next development (snapshot) version (e.g. `2.0.1-SNAPSHOT `_) + +These examples from the SWORD library. Below is what to expect from the interactive session. In many cases, you can just hit enter to accept the defaults. + +.. code-block:: bash + + [INFO] 5/17 prepare:map-release-versions + What is the release version for "SWORD v2 Common Server Library (forked)"? (sword2-server) 2.0.0: : + [INFO] 6/17 prepare:input-variables + What is the SCM release tag or label for "SWORD v2 Common Server Library (forked)"? (sword2-server) sword2-server-2.0.0: : + [INFO] 7/17 prepare:map-development-versions + What is the new development version for "SWORD v2 Common Server Library (forked)"? (sword2-server) 2.0.1-SNAPSHOT: : + [INFO] 8/17 prepare:rewrite-poms-for-release + +It can take some time for the jar to be visible on Maven Central. You can start by looking on the repo1 server, like this: https://repo1.maven.org/maven2/io/gdcc/sword2-server/2.0.0/ + +Don't bother putting the new version in a pom.xml until you see it on repo1. + +Note that the next snapshot release should be available as well, like this: https://s01.oss.sonatype.org/content/groups/staging/io/gdcc/sword2-server/2.0.1-SNAPSHOT/ + +Releasing a New Library to Maven Central +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +At a high level: + +- Use an existing pom.xml as a starting point. +- Use existing GitHub Actions workflows as a starting point. +- Create secrets in the new library's GitHub repo used by the workflow. +- If you need an entire new namespace, look at previous issues such as https://issues.sonatype.org/browse/OSSRH-94575 and https://issues.sonatype.org/browse/OSSRH-94577 + +npm (JavaScript/TypeScript) +--------------------------- + +Currently, publishing `@iqss/dataverse-design-system `_ to npm done manually. We plan to automate this as part of https://github.com/IQSS/dataverse-frontend/issues/140 + +https://www.npmjs.com/package/js-dataverse is the previous 1.0 version of js-dataverse. No 1.x releases are planned. We plan to publish 2.0 (used by the new frontend) as discussed in https://github.com/IQSS/dataverse-frontend/issues/13 \ No newline at end of file diff --git a/doc/sphinx-guides/source/developers/making-releases.rst b/doc/sphinx-guides/source/developers/making-releases.rst index 53fc11a5915..23c4773a06e 100755 --- a/doc/sphinx-guides/source/developers/making-releases.rst +++ b/doc/sphinx-guides/source/developers/making-releases.rst @@ -8,9 +8,11 @@ Making Releases Introduction ------------ +Note: See :doc:`making-library-releases` for how to publish our libraries to Maven Central. + See :doc:`version-control` for background on our branching strategy. -The steps below describe making both normal releases and hotfix releases. +The steps below describe making both regular releases and hotfix releases. Write Release Notes ------------------- @@ -43,49 +45,121 @@ Increment the version number to the milestone (e.g. 5.10.1) in the following two - modules/dataverse-parent/pom.xml -> ```` -> ```` (e.g. `pom.xml commit `_) - doc/sphinx-guides/source/conf.py (two places, e.g. `conf.py commit `_) -Add the version being released to the lists in the following two files: +Add the version being released to the lists in the following file: - doc/sphinx-guides/source/versions.rst (e.g. `versions.rst commit `_) Check in the Changes Above into a Release Branch and Merge It ------------------------------------------------------------- -For any ordinary release, make the changes above in the release branch you created, make a pull request, and merge it into the "develop" branch. Like usual, you can safely delete the branch after the merge is complete. +For a regular release, make the changes above in the release branch you created, make a pull request, and merge it into the "develop" branch. Like usual, you can safely delete the branch after the merge is complete. If you are making a hotfix release, make the pull request against the "master" branch. Do not delete the branch after merging because we will later merge it into the "develop" branch to pick up the hotfix. More on this later. -Either way, as usual, you should ensure that all tests are passing. Please note that you might need to bump the version in `jenkins.yml `_ in dataverse-ansible to get the tests to run. +Either way, as usual, you should ensure that all tests are passing. Please note that you will need to bump the version in `jenkins.yml `_ in dataverse-ansible to get the tests to pass. Consider doing this before making the pull request. Alternatively, you can bump jenkins.yml after making the pull request and re-run the Jenkins job to make sure tests pass. Merge "develop" into "master" ----------------------------- -Note: If you are making a hotfix release, the "develop" branch is not involved so you can skip this step. +If this is a regular (non-hotfix) release, create a pull request to merge the "develop" branch into the "master" branch using this "compare" link: https://github.com/IQSS/dataverse/compare/master...develop + +Once important tests have passed (compile, unit tests, etc.), merge the pull request. Don't worry about style tests failing such as for shell scripts. + +If this is a hotfix release, skip this whole "merge develop to master" step (the "develop" branch is not involved until later). + +Build the Guides for the Release +-------------------------------- + +Go to https://jenkins.dataverse.org/job/guides.dataverse.org/ and make the following adjustments to the config: + +- Repository URL: ``https://github.com/IQSS/dataverse.git`` +- Branch Specifier (blank for 'any'): ``*/master`` +- ``VERSION`` (under "Build Steps"): ``5.10.1`` (for example) + +Click "Save" then "Build Now". -The "develop" branch should be merged into "master" before tagging. +Make sure the guides directory appears in the expected location such as https://guides.dataverse.org/en/5.10.1/ + +As described below, we'll soon point the "latest" symlink to that new directory. Create a Draft Release on GitHub -------------------------------- -Create a draft release at https://github.com/IQSS/dataverse/releases/new +Go to https://github.com/IQSS/dataverse/releases/new to start creating a draft release. + +- Under "Choose a tag" you will be creating a new tag. Have it start with a "v" such as ``v5.10.1``. Click "Create new tag on publish". +- Under "Target" go to "Recent Commits" and select the merge commit from when you merged ``develop`` into ``master`` above. This commit will appear in ``/api/info/version`` from a running installation. +- Under "Release title" use the same name as the tag such as ``v5.10.1``. +- In the description, copy and paste the content from the release notes .md file created in the "Write Release Notes" steps above. +- Click "Save draft" because we do not want to publish the release yet. + +At this point you can send around the draft release for any final feedback. Links to the guides for this release should be working now, since you build them above. + +Make corrections to the draft, if necessary. It will be out of sync with the .md file, but that's ok (`#7988 `_ is tracking this). + +.. _run-build-create-war: + +Run a Build to Create the War File +---------------------------------- + +ssh into the dataverse-internal server and undeploy the current war file. -The "tag version" and "title" should be the number of the milestone with a "v" in front (i.e. v5.10.1). +Go to https://jenkins.dataverse.org/job/IQSS_Dataverse_Internal/ and make the following adjustments to the config: -Copy in the content from the .md file created in the "Write Release Notes" steps above. +- Repository URL: ``https://github.com/IQSS/dataverse.git`` +- Branch Specifier (blank for 'any'): ``*/master`` +- Execute shell: Update version in filenames to ``dataverse-5.10.1.war`` (for example) + +Click "Save" then "Build Now". + +The build number will appear in ``/api/info/version`` (along with the commit mentioned above) from a running installation (e.g. ``{"version":"5.10.1","build":"907-b844672``). + +Note that the build number comes from script in an early build step... + +.. code-block:: bash + + COMMIT_SHA1=`echo $GIT_COMMIT | cut -c-7` + echo "build.number=${BUILD_NUMBER}-${COMMIT_SHA1}" > $WORKSPACE/src/main/java/BuildNumber.properties + +... but we can explore alternative methods of specifying the build number, as described in :ref:`auto-custom-build-number`. + +Build Installer (dvinstall.zip) +------------------------------- + +ssh into the dataverse-internal server and do the following: + +- In a git checkout of the dataverse source switch to the master branch and pull the latest. +- Copy the war file from the previous step to the ``target`` directory in the root of the repo (create it, if necessary). +- ``cd scripts/installer`` +- ``make`` + +A zip file called ``dvinstall.zip`` should be produced. Make Artifacts Available for Download ------------------------------------- Upload the following artifacts to the draft release you created: -- war file (``mvn package`` from Jenkins) -- installer (``cd scripts/installer && make``) -- other files as needed, such as updated Solr schema and config files +- the war file (e.g. ``dataverse-5.10.1.war``, from above) +- the installer (``dvinstall.zip``, from above) +- other files as needed: + + - updated Solr schema + - metadata block tsv files + - config files Publish the Release ------------------- Click the "Publish release" button. +Update Guides Link +------------------ + +"latest" at https://guides.dataverse.org/en/latest/ is a symlink to the directory with the latest release. That directory (e.g. ``5.10.1``) was put into place by the Jenkins "guides" job described above. + +ssh into the guides server and update the symlink to point to the latest release. + Close Milestone on GitHub and Create a New One ---------------------------------------------- @@ -115,7 +189,7 @@ For Hotfixes, Merge Hotfix Branch into "develop" and Rename SQL Scripts Note: this only applies to hotfixes! -We've merged the hotfix into the "master" branch but now we need the fixes (and version bump) in the "develop" branch. Make a new branch off the hotfix branch and create a pull request against develop. Merge conflicts are possible and this pull request should go through review and QA like normal. Afterwards it's fine to delete this branch and the hotfix brach that was merged into master. +We've merged the hotfix into the "master" branch but now we need the fixes (and version bump) in the "develop" branch. Make a new branch off the hotfix branch and create a pull request against develop. Merge conflicts are possible and this pull request should go through review and QA like normal. Afterwards it's fine to delete this branch and the hotfix branch that was merged into master. Because of the hotfix version, any SQL scripts in "develop" should be renamed (from "5.11.0" to "5.11.1" for example). To read more about our naming conventions for SQL scripts, see :doc:`sql-upgrade-scripts`. diff --git a/doc/sphinx-guides/source/developers/metadataexport.rst b/doc/sphinx-guides/source/developers/metadataexport.rst new file mode 100644 index 00000000000..7f7536fb7f8 --- /dev/null +++ b/doc/sphinx-guides/source/developers/metadataexport.rst @@ -0,0 +1,88 @@ +======================= +Metadata Export Formats +======================= + +.. contents:: |toctitle| + :local: + +Introduction +------------ + +Dataverse ships with a number of metadata export formats available for published datasets. A given metadata export +format may be available for user download (via the UI and API) and/or be available for use in Harvesting between +Dataverse instances. + +As of v5.14, Dataverse provides a mechanism for third-party developers to create new metadata Exporters than implement +new metadata formats or that replace existing formats. All the necessary dependencies are packaged in an interface JAR file +available from Maven Central. Developers can distribute their new Exporters as JAR files which can be dynamically loaded +into Dataverse instances - see :ref:`external-exporters`. Developers are encouraged to make their Exporter code available +via https://github.com/gdcc/dataverse-exporters (or minimally, to list their existence in the README there). + +Exporter Basics +--------------- + +New Exports must implement the ``io.gdcc.spi.export.Exporter`` interface. The interface includes a few methods for the Exporter +to provide Dataverse with the format it produces, a display name, format mimetype, and whether the format is for download +and/or harvesting use, etc. It also includes a main ``exportDataset(ExportDataProvider dataProvider, OutputStream outputStream)`` +method through which the Exporter receives metadata about the given dataset (via the ``ExportDataProvider``, described further +below) and writes its output (as an OutputStream). + +Exporters that create an XML format must implement the ``io.gdcc.spi.export.XMLExporter`` interface (which extends the Exporter +interface). XMLExporter adds a few methods through which the XMLExporter provides information to Dataverse about the XML +namespace and version being used. + +Exporters also need to use the ``@AutoService(Exporter.class)`` which makes the class discoverable as an Exporter implementation. + +The ``ExportDataProvider`` interface provides several methods through which your Exporter can receive dataset and file metadata +in various formats. Your exporter would parse the information in one or more of these inputs to retrieve the values needed to +generate the Exporter's output format. + +The most important methods/input formats are: + +- ``getDatasetJson()`` - metadata in the internal Dataverse JSON format used in the native API and available via the built-in JSON metadata export. +- ``getDatasetORE()`` - metadata in the OAI_ORE format available as a built-in metadata format and as used in Dataverse's BagIT-based Archiving capability. +- ``getDatasetFileDetails`` - detailed file-level metadata for ingested tabular files. + +The first two of these provide ~complete metadata about the dataset along with the metadata common to all files. This includes all metadata +entries from all metadata blocks, PIDs, tags, Licenses and custom terms, etc. Almost all built-in exporters today use the JSON input. +The newer OAI_ORE export, which is JSON-LD-based, provides a flatter structure and references metadata terms by their external vocabulary ids +(e.g. http://purl.org/dc/terms/title) which may make it a prefereable starting point in some cases. + +The last method above provides a new JSON-formatted serialization of the variable-level file metadata Dataverse generates during ingest of tabular files. +This information has only been included in the built-in DDI export, as the content of a ``dataDscr`` element. (Hence inspecting the edu.harvard.iq.dataverse.export.DDIExporter and related classes would be a good way to explore how the JSON is structured.) + +The interface also provides + +- ``getDatasetSchemaDotOrg();`` and +- ``getDataCiteXml();``. + +These provide subsets of metadata in the indicated formats. They may be useful starting points if your exporter will, for example, only add one or two additional fields to the given format. + +If an Exporter cannot create a requested metadata format for some reason, it should throw an ``io.gdcc.spi.export.ExportException``. + +Building an Exporter +-------------------- + +The example at https://github.com/gdcc/dataverse-exporters provides a Maven pom.xml file suitable for building an Exporter JAR file and that repository provides additional development guidance. + +There are four dependencies needed to build an Exporter: + +- ``io.gdcc dataverse-spi`` library containing the interfaces discussed above and the ExportException class +- ``com.google.auto.service auto-service``, which provides the @AutoService annotation +- ``jakarta.json jakarata.json-api`` for JSON classes +- ``jakarta.ws.rs jakarta.ws.rs-api``, which provides a MediaType enumeration for specifying mime types. + +Specifying a Prerequisite Export +-------------------------------- + +An advanced feature of the Exporter mechanism allows a new Exporter to specify that it requires, as input, +the output of another Exporter. An example of this is the builting HTMLExporter which requires the output +of the DDI XML Exporter to produce an HTML document with the same DDI content. + +This is configured by providing the metadata format name via the ``Exporter.getPrerequisiteFormatName()`` method. +When this method returns a non-empty format name, Dataverse will provide the requested format to the Exporter via +the ``ExportDataProvider.getPrerequisiteInputStream()`` method. + +Developers and administrators deploying Exporters using this mechanism should be aware that, since metadata formats +can be changed by other Exporters, the InputStream received may not hold the expected metadata. Developers should clearly +document their compatability with the built-in or third-party Exporters they support as prerequisites. diff --git a/doc/sphinx-guides/source/developers/remote-users.rst b/doc/sphinx-guides/source/developers/remote-users.rst index a5e51aa5e54..d8f90e9257f 100755 --- a/doc/sphinx-guides/source/developers/remote-users.rst +++ b/doc/sphinx-guides/source/developers/remote-users.rst @@ -1,6 +1,6 @@ -==================== -Shibboleth and OAuth -==================== +========================== +Shibboleth, OAuth and OIDC +========================== .. contents:: |toctitle| :local: @@ -30,4 +30,40 @@ Now when you go to http://localhost:8080/oauth2/firstLogin.xhtml you should be p ---- +.. _oidc-dev: + +OpenID Connect (OIDC) +--------------------- + +STOP! ``oidc-keycloak-auth-provider.json`` was changed from http://localhost:8090 to http://keycloak.mydomain.com:8090 to test :ref:`bearer-tokens`. In addition, ``docker-compose-dev.yml`` in the root of the repo was updated to start up Keycloak. To use these, you should add ``127.0.0.1 keycloak.mydomain.com`` to your ``/etc/hosts file``. If you'd like to use the docker compose as described below (``conf/keycloak/docker-compose.yml``), you should revert the change to ``oidc-keycloak-auth-provider.json``. + +If you are working on the OpenID Connect (OIDC) user authentication flow, you do not need to connect to a remote provider (as explained in :doc:`/installation/oidc`) to test this feature. Instead, you can use the available configuration that allows you to run a test Keycloak OIDC identity management service locally through a Docker container. + +(Please note! The client secret (``ss6gE8mODCDfqesQaSG3gwUwZqZt547E``) is hard-coded in ``oidc-realm.json`` and ``oidc-keycloak-auth-provider.json``. Do not use this config in production! This is only for developers.) + +You can find this configuration in ``conf/keycloak``. There are two options available in this directory to run a Keycloak container: bash script or docker-compose. + +To run the container via bash script, execute the following command (positioned in ``conf/keycloak``): + +``./run-keycloak.sh`` + +The script will create a Keycloak container or restart it if the container was already created and stopped. Once the script is executed, Keycloak should be accessible from http://localhost:8090/ + +Now load the configuration defined in ``oidc-keycloak-auth-provider.json`` into your Dataverse installation to enable Keycloak as an authentication provider. + +``curl -X POST -H 'Content-type: application/json' --upload-file oidc-keycloak-auth-provider.json http://localhost:8080/api/admin/authenticationProviders`` + +You should see the new provider, called "OIDC-Keycloak", under "Other options" on the Log In page. + +You should be able to log into Keycloak with the following credentials: + +- username: kcuser +- password: kcpassword + +In case you want to stop and remove the Keycloak container, just run the other available bash script: + +``./rm-keycloak.sh`` + +---- + Previous: :doc:`unf/index` | Next: :doc:`geospatial` diff --git a/doc/sphinx-guides/source/developers/s3-direct-upload-api.rst b/doc/sphinx-guides/source/developers/s3-direct-upload-api.rst index 3dc73ce6a0c..4d323455d28 100644 --- a/doc/sphinx-guides/source/developers/s3-direct-upload-api.rst +++ b/doc/sphinx-guides/source/developers/s3-direct-upload-api.rst @@ -122,7 +122,7 @@ To add multiple Uploaded Files to the Dataset --------------------------------------------- Once the files exists in the s3 bucket, a final API call is needed to add all the files to the Dataset. In this API call, additional metadata is added using the "jsonData" parameter. -jsonData normally includes information such as a file description, tags, provenance, whether the file is restricted, etc. For direct uploads, the jsonData object must also include values for: +jsonData for this call is an array of objects that normally include information such as a file description, tags, provenance, whether the file is restricted, etc. For direct uploads, the jsonData object must also include values for: * "description" - A description of the file * "directoryLabel" - The "File Path" of the file, indicating which folder the file should be uploaded to within the dataset @@ -154,7 +154,7 @@ Replacing an existing file in the Dataset ----------------------------------------- Once the file exists in the s3 bucket, a final API call is needed to register it as a replacement of an existing file. This call is the same call used to replace a file to a Dataverse installation but, rather than sending the file bytes, additional metadata is added using the "jsonData" parameter. -jsonData normally includes information such as a file description, tags, provenance, whether the file is restricted, whether to allow the mimetype to change (forceReplace=true), etc. For direct uploads, the jsonData object must also include values for: +jsonData normally includes information such as a file description, tags, provenance, whether the file is restricted, whether to allow the mimetype to change (forceReplace=true), etc. For direct uploads, the jsonData object must include values for: * "storageIdentifier" - String, as specified in prior calls * "fileName" - String @@ -172,9 +172,107 @@ Note that the API call does not validate that the file matches the hash value su export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx export SERVER_URL=https://demo.dataverse.org export FILE_IDENTIFIER=5072 - export JSON_DATA="{'description':'My description.','directoryLabel':'data/subdir1','categories':['Data'], 'restrict':'false', 'forceReplace':'true', 'storageIdentifier':'s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42', 'fileName':'file1.txt', 'mimeType':'text/plain', 'checksum': {'@type': 'SHA-1', '@value': '123456'}}" + export JSON_DATA='{"description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "forceReplace":"true", "storageIdentifier":"s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42", "fileName":"file1.txt", "mimeType":"text/plain", "checksum": {"@type": "SHA-1", "@value": "123456"}}' curl -X POST -H "X-Dataverse-key: $API_TOKEN" "$SERVER_URL/api/files/$FILE_IDENTIFIER/replace" -F "jsonData=$JSON_DATA" Note that this API call can be used independently of the others, e.g. supporting use cases in which the file already exists in S3/has been uploaded via some out-of-band method. With current S3 stores the object identifier must be in the correct bucket for the store, include the PID authority/identifier of the parent dataset, and be guaranteed unique, and the supplied storage identifer must be prefaced with the store identifier used in the Dataverse installation, as with the internally generated examples above. + +Replacing multiple existing files in the Dataset +------------------------------------------------ + +Once the replacement files exist in the s3 bucket, a final API call is needed to register them as replacements for existing files. In this API call, additional metadata is added using the "jsonData" parameter. +jsonData for this call is array of objects that normally include information such as a file description, tags, provenance, whether the file is restricted, etc. For direct uploads, the jsonData object must include some additional values: + +* "fileToReplaceId" - the id of the file being replaced +* "forceReplace" - whether to replace a file with one of a different mimetype (optional, default is false) +* "description" - A description of the file +* "directoryLabel" - The "File Path" of the file, indicating which folder the file should be uploaded to within the dataset +* "storageIdentifier" - String +* "fileName" - String +* "mimeType" - String +* "fixity/checksum" either: + + * "md5Hash" - String with MD5 hash value, or + * "checksum" - Json Object with "@type" field specifying the algorithm used and "@value" field with the value from that algorithm, both Strings + + +The allowed checksum algorithms are defined by the edu.harvard.iq.dataverse.DataFile.CheckSumType class and currently include MD5, SHA-1, SHA-256, and SHA-512 + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/7U7YBV + export JSON_DATA='[{"fileToReplaceId": 10, "description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "storageIdentifier":"s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42", "fileName":"file1.txt", "mimeType":"text/plain", "checksum": {"@type": "SHA-1", "@value": "123456"}},{"fileToReplaceId": 11, "forceReplace": true, "description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "storageIdentifier":"s3://demo-dataverse-bucket:176e28068b0-1c3f80357d53", "fileName":"file2.txt", "mimeType":"text/plain", "checksum": {"@type": "SHA-1", "@value": "123789"}}]' + + curl -X POST -H "X-Dataverse-key: $API_TOKEN" "$SERVER_URL/api/datasets/:persistentId/replaceFiles?persistentId=$PERSISTENT_IDENTIFIER" -F "jsonData=$JSON_DATA" + +The JSON object returned as a response from this API call includes a "data" that indicates how many of the file replacements succeeded and provides per-file error messages for those that don't, e.g. + +.. code-block:: + + { + "status": "OK", + "data": { + "Files": [ + { + "storageIdentifier": "s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42", + "errorMessage": "Bad Request:The file to replace does not belong to this dataset.", + "fileDetails": { + "fileToReplaceId": 10, + "description": "My description.", + "directoryLabel": "data/subdir1", + "categories": [ + "Data" + ], + "restrict": "false", + "storageIdentifier": "s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42", + "fileName": "file1.Bin", + "mimeType": "application/octet-stream", + "checksum": { + "@type": "SHA-1", + "@value": "123456" + } + } + }, + { + "storageIdentifier": "s3://demo-dataverse-bucket:176e28068b0-1c3f80357d53", + "successMessage": "Replaced successfully in the dataset", + "fileDetails": { + "description": "My description.", + "label": "file2.txt", + "restricted": false, + "directoryLabel": "data/subdir1", + "categories": [ + "Data" + ], + "dataFile": { + "persistentId": "", + "pidURL": "", + "filename": "file2.txt", + "contentType": "text/plain", + "filesize": 2407, + "description": "My description.", + "storageIdentifier": "s3://demo-dataverse-bucket:176e28068b0-1c3f80357d53", + "rootDataFileId": 11, + "previousDataFileId": 11, + "checksum": { + "type": "SHA-1", + "value": "123789" + } + } + } + } + ], + "Result": { + "Total number of files": 2, + "Number of files successfully replaced": 1 + } + } + } + + +Note that this API call can be used independently of the others, e.g. supporting use cases in which the files already exists in S3/has been uploaded via some out-of-band method. +With current S3 stores the object identifier must be in the correct bucket for the store, include the PID authority/identifier of the parent dataset, and be guaranteed unique, and the supplied storage identifer must be prefaced with the store identifier used in the Dataverse installation, as with the internally generated examples above. diff --git a/doc/sphinx-guides/source/developers/security.rst b/doc/sphinx-guides/source/developers/security.rst new file mode 100755 index 00000000000..09b80a4c840 --- /dev/null +++ b/doc/sphinx-guides/source/developers/security.rst @@ -0,0 +1,34 @@ +======== +Security +======== + +This section describes security practices and procedures for the Dataverse team. + +.. contents:: |toctitle| + :local: + +Intake of Security Issues +------------------------- + +As described under :ref:`reporting-security-issues`, we encourage the community to email security@dataverse.org if they have any security concerns. These emails go into our private ticket tracker (RT_). + +.. _RT: https://help.hmdc.harvard.edu + +We use a private GitHub issue tracker at https://github.com/IQSS/dataverse-security/issues for security issues. + +Sending Security Notices +------------------------ + +When drafting the security notice, it might be helpful to look at `previous examples`_. + +.. _previous examples: https://drive.google.com/drive/folders/0B_qMYwdHFZghaDZIU2hWQnBDZVE?resourcekey=0-SYjuhCohAIM7_pmysVc3Xg&usp=sharing + +Gather email addresses from the following sources (these are also described under :ref:`ongoing-security` in the Installation Guide): + +- "contact_email" in the `public installation spreadsheet`_ +- "Other Security Contacts" in the `private installation spreadsheet`_ + +Once you have the emails, include them as bcc. + +.. _public installation spreadsheet: https://docs.google.com/spreadsheets/d/1bfsw7gnHlHerLXuk7YprUT68liHfcaMxs1rFciA-mEo/edit#gid=0 +.. _private installation spreadsheet: https://docs.google.com/spreadsheets/d/1EWDwsj6eptQ7nEr-loLvdU7I6Tm2ljAplfNSVWR42i0/edit?usp=sharing diff --git a/doc/sphinx-guides/source/developers/testing.rst b/doc/sphinx-guides/source/developers/testing.rst index 4b3d5fd0a55..acaeccf4f23 100755 --- a/doc/sphinx-guides/source/developers/testing.rst +++ b/doc/sphinx-guides/source/developers/testing.rst @@ -47,12 +47,14 @@ Writing Unit Tests with JUnit ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ We are aware that there are newer testing tools such as TestNG, but we use `JUnit `_ because it's tried and true. -We support both (legacy) JUnit 4.x tests (forming the majority of our tests) and -newer JUnit 5 based testing. +We support JUnit 5 based testing and require new tests written with it. +(Since Dataverse 6.0, we migrated all of our tests formerly based on JUnit 4.) -NOTE: When adding new tests, you should give JUnit 5 a go instead of adding more dependencies to JUnit 4.x. - -If writing tests is new to you, poke around existing unit tests which all end in ``Test.java`` and live under ``src/test``. Each test is annotated with ``@Test`` and should have at least one assertion which specifies the expected result. In Netbeans, you can run all the tests in it by clicking "Run" -> "Test File". From the test file, you should be able to navigate to the code that's being tested by right-clicking on the file and clicking "Navigate" -> "Go to Test/Tested class". Likewise, from the code, you should be able to use the same "Navigate" menu to go to the tests. +If writing tests is new to you, poke around existing unit tests which all end in ``Test.java`` and live under ``src/test``. +Each test is annotated with ``@Test`` and should have at least one assertion which specifies the expected result. +In Netbeans, you can run all the tests in it by clicking "Run" -> "Test File". +From the test file, you should be able to navigate to the code that's being tested by right-clicking on the file and clicking "Navigate" -> "Go to Test/Tested class". +Likewise, from the code, you should be able to use the same "Navigate" menu to go to the tests. NOTE: Please remember when writing tests checking possibly localized outputs to check against ``en_US.UTF-8`` and ``UTC`` l10n strings! @@ -62,22 +64,24 @@ Refactoring Code to Make It Unit-Testable Existing code is not necessarily written in a way that lends itself to easy testing. Generally speaking, it is difficult to write unit tests for both JSF "backing" beans (which end in ``Page.java``) and "service" beans (which end in ``Service.java``) because they require the database to be running in order to test them. If service beans can be exercised via API they can be tested with integration tests (described below) but a good technique for making the logic testable it to move code to "util beans" (which end in ``Util.java``) that operate on Plain Old Java Objects (POJOs). ``PrivateUrlUtil.java`` is a good example of moving logic from ``PrivateUrlServiceBean.java`` to a "util" bean to make the code testable. -Parameterized Tests and JUnit Theories -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Parameterized Tests +^^^^^^^^^^^^^^^^^^^ + Often times you will want to test a method multiple times with similar values. In order to avoid test bloat (writing a test for every data combination), JUnit offers Data-driven unit tests. This allows a test to be run for each set of defined data values. -JUnit 4 uses ``Parameterized.class`` and ``Theories.class``. For reference, take a look at issue https://github.com/IQSS/dataverse/issues/5619. - -JUnit 5 doesn't offer theories (see `jqwik `_ for this), but -greatly extended parameterized testing. Some guidance how to write those: +JUnit 5 offers great parameterized testing. Some guidance how to write those: - https://junit.org/junit5/docs/current/user-guide/#writing-tests-parameterized-tests - https://www.baeldung.com/parameterized-tests-junit-5 - https://blog.codefx.org/libraries/junit-5-parameterized-tests/ -- See also some examples in our codebase. +- See also many examples in our codebase. + +Note that JUnit 5 also offers support for custom test parameter resolvers. This enables keeping tests cleaner, +as preparation might happen within some extension and the test code is more focused on the actual testing. +See https://junit.org/junit5/docs/current/user-guide/#extensions-parameter-resolution for more information. JUnit 5 Test Helper Extensions ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -116,11 +120,14 @@ In addition, there is a writeup on "The Testable Command" at https://github.com/ Running Non-Essential (Excluded) Unit Tests ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -You should be aware that some unit tests have been deemed "non-essential" and have been annotated with ``@Category(NonEssentialTests.class)`` and are excluded from the "dev" Maven profile, which is the default profile. All unit tests (that have not been annotated with ``@Ignore``), including these non-essential tests, are run from continuous integration systems such as Jenkins and GitHub Actions with the following ``mvn`` command that invokes a non-default profile: +You should be aware that some unit tests have been deemed "non-essential" and have been annotated with ``@Tag(Tags.NOT_ESSENTIAL_UNITTESTS)`` and are excluded from the "dev" Maven profile, which is the default profile. +All unit tests (that have not been annotated with ``@Disable``), including these non-essential tests, are run from continuous integration systems such as Jenkins and GitHub Actions with the following ``mvn`` command that invokes a non-default profile: ``mvn test -P all-unit-tests`` -Generally speaking, unit tests have been flagged as non-essential because they are slow or because they require an Internet connection. You should not feel obligated to run these tests continuously but you can use the ``mvn`` command above to run them. To iterate on the unit test in Netbeans and execute it with "Run -> Test File", you must temporarily comment out the annotation flagging the test as non-essential. +Generally speaking, unit tests have been flagged as non-essential because they are slow or because they require an Internet connection. +You should not feel obligated to run these tests continuously but you can use the ``mvn`` command above to run them. +To iterate on the unit test in Netbeans and execute it with "Run -> Test File", you must temporarily comment out the annotation flagging the test as non-essential. Integration Tests ----------------- @@ -173,7 +180,7 @@ Finally, run the script: Running the full API test suite using Docker ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -To run the full suite of integration tests on your laptop, we recommend using the "all in one" Docker configuration described in ``conf/docker-aio/readme.md`` in the root of the repo. +To run the full suite of integration tests on your laptop, running Dataverse and its dependencies in Docker, as explained in the :doc:`/container/dev-usage` section of the Container Guide. Alternatively, you can run tests against the app server running on your laptop by following the "getting set up" steps below. @@ -303,9 +310,9 @@ To run these tests, simply call out to Maven: Measuring Coverage of Integration Tests --------------------------------------- -Measuring the code coverage of integration tests with Jacoco requires several steps. In order to make these steps clear we'll use "/usr/local/payara5" as the Payara directory and "dataverse" as the Payara Unix user. +Measuring the code coverage of integration tests with Jacoco requires several steps. In order to make these steps clear we'll use "/usr/local/payara6" as the Payara directory and "dataverse" as the Payara Unix user. -Please note that this was tested under Glassfish 4 but it is hoped that the same steps will work with Payara 5. +Please note that this was tested under Glassfish 4 but it is hoped that the same steps will work with Payara. Add jacocoagent.jar to Payara ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -324,9 +331,9 @@ Note that we are running the following commands as the user "dataverse". In shor cd local/jacoco-0.8.1 wget https://github.com/jacoco/jacoco/releases/download/v0.8.1/jacoco-0.8.1.zip unzip jacoco-0.8.1.zip - /usr/local/payara5/bin/asadmin stop-domain - cp /home/dataverse/local/jacoco-0.8.1/lib/jacocoagent.jar /usr/local/payara5/glassfish/lib - /usr/local/payara5/bin/asadmin start-domain + /usr/local/payara6/bin/asadmin stop-domain + cp /home/dataverse/local/jacoco-0.8.1/lib/jacocoagent.jar /usr/local/payara6/glassfish/lib + /usr/local/payara6/bin/asadmin start-domain Add jacococli.jar to the WAR File ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -349,21 +356,21 @@ Run this as the "dataverse" user. .. code-block:: bash - /usr/local/payara5/bin/asadmin deploy dataverse-jacoco.war + /usr/local/payara6/bin/asadmin deploy dataverse-jacoco.war -Note that after deployment the file "/usr/local/payara5/glassfish/domains/domain1/config/jacoco.exec" exists and is empty. +Note that after deployment the file "/usr/local/payara6/glassfish/domains/domain1/config/jacoco.exec" exists and is empty. Run Integration Tests ~~~~~~~~~~~~~~~~~~~~~ Note that even though you see "docker-aio" in the command below, we assume you are not necessarily running the test suite within Docker. (Some day we'll probably move this script to another directory.) For this reason, we pass the URL with the normal port (8080) that app servers run on to the ``run-test-suite.sh`` script. -Note that "/usr/local/payara5/glassfish/domains/domain1/config/jacoco.exec" will become non-empty after you stop and start Payara. You must stop and start Payara before every run of the integration test suite. +Note that "/usr/local/payara6/glassfish/domains/domain1/config/jacoco.exec" will become non-empty after you stop and start Payara. You must stop and start Payara before every run of the integration test suite. .. code-block:: bash - /usr/local/payara5/bin/asadmin stop-domain - /usr/local/payara5/bin/asadmin start-domain + /usr/local/payara6/bin/asadmin stop-domain + /usr/local/payara6/bin/asadmin start-domain git clone https://github.com/IQSS/dataverse.git cd dataverse conf/docker-aio/run-test-suite.sh http://localhost:8080 @@ -378,7 +385,7 @@ Run these commands as the "dataverse" user. The ``cd dataverse`` means that you .. code-block:: bash cd dataverse - java -jar /home/dataverse/local/jacoco-0.8.1/lib/jacococli.jar report --classfiles target/classes --sourcefiles src/main/java --html target/coverage-it/ /usr/local/payara5/glassfish/domains/domain1/config/jacoco.exec + java -jar /home/dataverse/local/jacoco-0.8.1/lib/jacococli.jar report --classfiles target/classes --sourcefiles src/main/java --html target/coverage-it/ /usr/local/payara6/glassfish/domains/domain1/config/jacoco.exec Read Code Coverage Report ~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -499,7 +506,6 @@ Browser-Based Testing Installation Testing ~~~~~~~~~~~~~~~~~~~~ -- Run `vagrant up` on a server to test the installer - Work with @donsizemore to automate testing of https://github.com/GlobalDataverseCommunityConsortium/dataverse-ansible Future Work on Load/Performance Testing diff --git a/doc/sphinx-guides/source/developers/tips.rst b/doc/sphinx-guides/source/developers/tips.rst index 3fff3e76ea8..e1ee40cafa5 100755 --- a/doc/sphinx-guides/source/developers/tips.rst +++ b/doc/sphinx-guides/source/developers/tips.rst @@ -19,20 +19,20 @@ Undeploy the war File from the Dataverse Software Installation Script Because the initial deployment of the war file was done outside of Netbeans by the Dataverse Software installation script, it's a good idea to undeploy that war file to give Netbeans a clean slate to work with. -Assuming you installed Payara in ``/usr/local/payara5``, run the following ``asadmin`` command to see the version of the Dataverse Software that the Dataverse Software installation script deployed: +Assuming you installed Payara in ``/usr/local/payara6``, run the following ``asadmin`` command to see the version of the Dataverse Software that the Dataverse Software installation script deployed: -``/usr/local/payara5/bin/asadmin list-applications`` +``/usr/local/payara6/bin/asadmin list-applications`` You will probably see something like ``dataverse-5.0 `` as the output. To undeploy, use whichever version you see like this: -``/usr/local/payara5/bin/asadmin undeploy dataverse-5.0`` +``/usr/local/payara6/bin/asadmin undeploy dataverse-5.0`` Now that Payara doesn't have anything deployed, we can proceed with getting Netbeans set up to deploy the code. Add Payara as a Server in Netbeans ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Launch Netbeans and click "Tools" and then "Servers". Click "Add Server" and select "Payara Server" and set the installation location to ``/usr/local/payara5``. The defaults are fine so you can click "Next" and "Finish". +Launch Netbeans and click "Tools" and then "Servers". Click "Add Server" and select "Payara Server" and set the installation location to ``/usr/local/payara6``. The defaults are fine so you can click "Next" and "Finish". Please note that if you are on a Mac, Netbeans may be unable to start Payara due to proxy settings in Netbeans. Go to the "General" tab in Netbeans preferences and click "Test connection" to see if you are affected. If you get a green checkmark, you're all set. If you get a red exclamation mark, change "Proxy Settings" to "No Proxy" and retest. A more complicated answer having to do with changing network settings is available at https://discussions.apple.com/thread/7680039?answerId=30715103022#30715103022 and the bug is also described at https://netbeans.org/bugzilla/show_bug.cgi?id=268076 @@ -58,6 +58,8 @@ From the root of the git repo, run the following command to set the build number This should update or place a file at ``src/main/java/BuildNumber.properties``. +(See also :ref:`auto-custom-build-number` for other ways of changing the build number.) + Then, from Netbeans, click "Run" and then "Clean and Build Project (dataverse)". After this completes successfully, click "Run" and then "Run Project (dataverse)" Confirm the Change Was Deployed @@ -115,7 +117,7 @@ Deploying With ``asadmin`` Sometimes you want to deploy code without using Netbeans or from the command line on a server you have ssh'ed into. -For the ``asadmin`` commands below, we assume you have already changed directories to ``/usr/local/payara5/glassfish/bin`` or wherever you have installed Payara. +For the ``asadmin`` commands below, we assume you have already changed directories to ``/usr/local/payara6/glassfish/bin`` or wherever you have installed Payara. There are four steps to this process: @@ -164,6 +166,8 @@ Git on Mac On a Mac, you won't have git installed unless you have "Command Line Developer Tools" installed but running ``git clone`` for the first time will prompt you to install them. +.. _auto-custom-build-number: + Automation of Custom Build Number on Webpage ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -173,6 +177,15 @@ commit id in your test deployment webpages on the bottom right corner next to th When you prefer manual updates, there is another script, see above: :ref:`custom_build_num_script`. +An alternative to that is using *MicroProfile Config* and set the option ``dataverse.build`` via a system property, +environment variable (``DATAVERSE_BUILD``) or `one of the other config sources +`__. + +You could even override the version itself with the option ``dataverse.version`` in the same way, which is usually +picked up from a build time source. + +See also discussion of version numbers in :ref:`run-build-create-war`. + Sample Data ----------- diff --git a/doc/sphinx-guides/source/developers/tools.rst b/doc/sphinx-guides/source/developers/tools.rst index cbd27d6e8d2..a21becd14cf 100755 --- a/doc/sphinx-guides/source/developers/tools.rst +++ b/doc/sphinx-guides/source/developers/tools.rst @@ -25,21 +25,6 @@ Maven With Maven installed you can run ``mvn package`` and ``mvn test`` from the command line. It can be downloaded from https://maven.apache.org -.. _vagrant: - -Vagrant -+++++++ - -Vagrant allows you to spin up a virtual machine running the Dataverse Software on your development workstation. You'll need to install Vagrant from https://www.vagrantup.com and VirtualBox from https://www.virtualbox.org. - -We assume you have already cloned the repo from https://github.com/IQSS/dataverse as explained in the :doc:`/developers/dev-environment` section. - -From the root of the git repo (where the ``Vagrantfile`` is), run ``vagrant up`` and eventually you should be able to reach a Dataverse installation at http://localhost:8888 (the ``forwarded_port`` indicated in the ``Vagrantfile``). - -Please note that running ``vagrant up`` for the first time should run the ``downloads/download.sh`` script for you to download required software such as an app server, Solr, etc. However, these dependencies change over time so it's a place to look if ``vagrant up`` was working but later fails. - -On Windows if you see an error like ``/usr/bin/perl^M: bad interpreter`` you might need to run ``dos2unix`` on the installation scripts. - PlantUML ++++++++ diff --git a/doc/sphinx-guides/source/developers/troubleshooting.rst b/doc/sphinx-guides/source/developers/troubleshooting.rst index 0463a68d8c8..832785f9860 100755 --- a/doc/sphinx-guides/source/developers/troubleshooting.rst +++ b/doc/sphinx-guides/source/developers/troubleshooting.rst @@ -41,7 +41,7 @@ This command helps verify what host your domain is using to send mail. Even if i 2. From the left-side panel, select **JavaMail Sessions** 3. You should see one session named **mail/notifyMailSession** -- click on that. -From this window you can modify certain fields of your Dataverse installation's notifyMailSession, which is the JavaMail session for outgoing system email (such as on user signup or data publication). Two of the most important fields we need are: +From this window you can modify certain fields of your Dataverse installation's notifyMailSession, which is the JavaMail session for outgoing system email (such as on user sign up or data publication). Two of the most important fields we need are: - **Mail Host:** The DNS name of the default mail server (e.g. smtp.gmail.com) - **Default User:** The username provided to your Mail Host when you connect to it (e.g. johndoe@gmail.com) diff --git a/doc/sphinx-guides/source/developers/windows.rst b/doc/sphinx-guides/source/developers/windows.rst index 038f3497495..53578fe980c 100755 --- a/doc/sphinx-guides/source/developers/windows.rst +++ b/doc/sphinx-guides/source/developers/windows.rst @@ -2,84 +2,17 @@ Windows Development =================== -Development on Windows is not well supported, unfortunately. You will have a much easier time if you develop on Mac or Linux as described under :doc:`dev-environment` section. - -Vagrant commands appear below and were tested on Windows 10 but the Vagrant environment is currently broken. Please see https://github.com/IQSS/dataverse/issues/6849 +Historically, development on Windows is `not well supported `_ but as of 2023 a container-based approach is recommended. .. contents:: |toctitle| :local: -Running the Dataverse Software in Vagrant ------------------------------------------ - -Install Vagrant -~~~~~~~~~~~~~~~ - -Download and install Vagrant from https://www.vagrantup.com - -Vagrant advises you to reboot but let's install VirtualBox first. - -Install VirtualBox -~~~~~~~~~~~~~~~~~~ - -Download and install VirtualBox from https://www.virtualbox.org - -Note that we saw an error saying "Oracle VM VirtualBox 5.2.8 Setup Wizard ended prematurely" but then we re-ran the installer and it seemed to work. - -Reboot -~~~~~~ - -Again, Vagrant asks you to reboot, so go ahead. - -Install Git -~~~~~~~~~~~ - -Download and install Git from https://git-scm.com - -Configure Git to use Unix Line Endings -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Launch Git Bash and run the following commands: - -``git config --global core.autocrlf input`` - -Pro tip: Use Shift-Insert to paste into Git Bash. - -See also https://help.github.com/articles/dealing-with-line-endings/ - -If you skip this step you are likely to see the following error when you run ``vagrant up``. - -``/tmp/vagrant-shell: ./install: /usr/bin/perl^M: bad interpreter: No such file or directory`` - -Clone Git Repo -~~~~~~~~~~~~~~ - -From Git Bash, run the following command: - -``git clone https://github.com/IQSS/dataverse.git`` - -vagrant up -~~~~~~~~~~ - -From Git Bash, run the following commands: - -``cd dataverse`` - -The ``dataverse`` directory you changed is the one you just cloned. Vagrant will operate on a file called ``Vagrantfile``. - -``vagrant up`` - -After a long while you hopefully will have a Dataverse installation available at http://localhost:8888 - -Improving Windows Support -------------------------- - -Windows Subsystem for Linux -~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Running Dataverse in Docker on Windows +-------------------------------------- -We have been unable to get Windows Subsystem for Linux (WSL) to work. We tried following the steps at https://docs.microsoft.com/en-us/windows/wsl/install-win10 but the "Get" button was greyed out when we went to download Ubuntu. +See the `post `_ by Akio Sone for additional details, but please observe the following: -Discussion and Feedback -~~~~~~~~~~~~~~~~~~~~~~~ +- In git, the line-ending setting should be set to always LF (line feed, ``core.autocrlf=input``) +- You must have jq installed: https://jqlang.github.io/jq/download/ -For more discussion of Windows support for Dataverse Software development see our community list thread `"Do you want to develop on Windows?" `_ We would be happy to incorporate feedback from Windows developers into this page. The :doc:`documentation` section describes how. +One the above is all set you can move on to :doc:`/container/dev-usage` in the Container Guide. diff --git a/doc/sphinx-guides/source/index.rst b/doc/sphinx-guides/source/index.rst index f7e81756e5b..f6eda53d718 100755 --- a/doc/sphinx-guides/source/index.rst +++ b/doc/sphinx-guides/source/index.rst @@ -6,7 +6,7 @@ Dataverse Documentation v. |version| ==================================== -These documentation guides are for the |version| version of Dataverse. To find guides belonging to previous versions, :ref:`guides_versions` has a list of all available versions. +These documentation guides are for the |version| version of Dataverse. To find guides belonging to previous or future versions, :ref:`guides_versions` has a list of all available versions. .. toctree:: :glob: @@ -18,6 +18,7 @@ These documentation guides are for the |version| version of Dataverse. To find g api/index installation/index developers/index + container/index style/index How the Guides Are Organized @@ -25,11 +26,13 @@ How the Guides Are Organized The guides are documentation that explain how to use Dataverse, which are divided into the following sections: User Guide, -Installation Guide, Developer Guide, API Guide and Style Guide. The User Guide is further divided into primary activities: finding & using +Installation Guide, Developer Guide, API Guide, Style Guide and Container Guide. +The User Guide is further divided into primary activities: finding & using data, adding Datasets, administering dataverses or Datasets, and Dataset exploration/visualizations. Details on all of the above tasks can be found in the Users Guide. The Installation Guide is for people or organizations who want to host their -own Dataverse. The Developer Guide contains instructions for +own Dataverse. The Container Guide gives information on how to deploy Dataverse with containers. +The Developer Guide contains instructions for people who want to contribute to the Open Source Dataverse project or who want to modify the code to suit their own needs. Finally, the API Guide is for Developers that work on other applications and are interested in connecting with Dataverse through our APIs. @@ -67,12 +70,10 @@ The support email address is `support@dataverse.org `__ or use `GitHub pull requests `__, if you have some code, scripts or documentation that you'd like to share. -If you have a **security issue** to report, please email `security@dataverse.org `__. +If you have a **security issue** to report, please email `security@dataverse.org `__. See also :ref:`reporting-security-issues`. Indices and Tables ------------------ -* :ref:`genindex` -* :ref:`modindex` * :ref:`search` diff --git a/doc/sphinx-guides/source/installation/advanced.rst b/doc/sphinx-guides/source/installation/advanced.rst index 4f06ed37d01..87f2a4fd0ab 100644 --- a/doc/sphinx-guides/source/installation/advanced.rst +++ b/doc/sphinx-guides/source/installation/advanced.rst @@ -13,8 +13,8 @@ Multiple App Servers You should be conscious of the following when running multiple app servers. - Only one app server can be the dedicated timer server, as explained in the :doc:`/admin/timers` section of the Admin Guide. -- When users upload a logo or footer for their Dataverse collection using the "theme" feature described in the :doc:`/user/dataverse-management` section of the User Guide, these logos are stored only on the app server the user happened to be on when uploading the logo. By default these logos and footers are written to the directory ``/usr/local/payara5/glassfish/domains/domain1/docroot/logos``. -- When a sitemap is created by an app server it is written to the filesystem of just that app server. By default the sitemap is written to the directory ``/usr/local/payara5/glassfish/domains/domain1/docroot/sitemap``. +- When users upload a logo or footer for their Dataverse collection using the "theme" feature described in the :doc:`/user/dataverse-management` section of the User Guide, these logos are stored only on the app server the user happened to be on when uploading the logo. By default these logos and footers are written to the directory ``/usr/local/payara6/glassfish/domains/domain1/docroot/logos``. +- When a sitemap is created by an app server it is written to the filesystem of just that app server. By default the sitemap is written to the directory ``/usr/local/payara6/glassfish/domains/domain1/docroot/sitemap``. - If Make Data Count is used, its raw logs must be copied from each app server to single instance of Counter Processor. See also :ref:`:MDCLogPath` section in the Configuration section of this guide and the :doc:`/admin/make-data-count` section of the Admin Guide. - Dataset draft version logging occurs separately on each app server. See :ref:`edit-draft-versions-logging` section in Monitoring of the Admin Guide for details. - Password aliases (``dataverse.db.password``, etc.) are stored per app server. @@ -115,3 +115,29 @@ To activate in your Dataverse installation:: curl -X PUT -d '/cgi-bin/zipdownload' http://localhost:8080/api/admin/settings/:CustomZipDownloadServiceUrl +.. _external-exporters: + +Installing External Metadata Exporters +++++++++++++++++++++++++++++++++++++++ + +As of Dataverse Software 5.14 Dataverse supports the use of external Exporters as a way to add additional metadata +export formats to Dataverse or replace the built-in formats. This should be considered an **experimental** capability +in that the mechanism is expected to evolve and using it may require additional effort when upgrading to new Dataverse +versions. + +This capability is enabled by specifying a directory in which Dataverse should look for third-party Exporters. See +:ref:`dataverse.spi.exporters.directory`. + +See :doc:`/developers/metadataexport` for details about how to develop new Exporters. + +An minimal example Exporter is available at https://github.com/gdcc/dataverse-exporters. The community is encourage to +add additional exporters (and/or links to exporters elsewhere) in this repository. Once you have downloaded the +dataverse-spi-export-examples-1.0.0.jar (or other exporter jar), installed it in the directory specified above, and +restarted your Payara server, the new exporter should be available. + +The example dataverse-spi-export-examples-1.0.0.jar replaces the ``JSON`` export with a ``MyJSON in `` version +that just wraps the existing JSON export object in a new JSON object with the key ``inputJson`` containing the original +JSON.(Note that the ``MyJSON in `` label will appear in the dataset Metadata Export download menu immediately, +but the content for already published datasets will only be updated after you delete the cached exports and/or use a +reExport API call (see :ref:`batch-exports-through-the-api`).) + diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index f2de9d5702f..f9fe74afc7c 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -34,6 +34,12 @@ It is very important to keep the block in place for the "admin" endpoint, and to It's also possible to prevent file uploads via API by adjusting the :ref:`:UploadMethods` database setting. +If you are using a load balancer or a reverse proxy, there are some additional considerations. If no additional configurations are made and the upstream is configured to redirect to localhost, the API will be accessible from the outside, as your installation will register as origin the localhost for any requests to the endpoints "admin" and "builtin-users". To prevent this, you have two options: + +- If your upstream is configured to redirect to localhost, you will need to set the :ref:`JVM option ` to one of the following values ``%client.name% %datetime% %request% %status% %response.length% %header.referer% %header.x-forwarded-for%`` and configure from the load balancer side the chosen header to populate with the client IP address. + +- Another solution is to set the upstream to the client IP address. In this case no further configuration is needed. + Forcing HTTPS +++++++++++++ @@ -101,6 +107,31 @@ Password complexity rules for "builtin" accounts can be adjusted with a variety - :ref:`:PVGoodStrength` - :ref:`:PVCustomPasswordResetAlertMessage` +.. _ongoing-security: + +Ongoing Security of Your Installation ++++++++++++++++++++++++++++++++++++++ + +Like any application, you should keep up-to-date with patches to both the Dataverse software and the platform (usually Linux) it runs on. Dataverse releases are announced on the dataverse-community_ mailing list, the Dataverse blog_, and in chat.dataverse.org_. + +.. _dataverse-community: https://groups.google.com/g/dataverse-community +.. _blog: https://dataverse.org/blog +.. _chat.dataverse.org: https://chat.dataverse.org + +In addition to these public channels, you can subscribe to receive security notices via email from the Dataverse team. These notices are sent to the ``contact_email`` in the installation spreadsheet_ and you can open an issue in the dataverse-installations_ repo to add or change the contact email. Security notices are also sent to people and organizations that prefer to remain anonymous. To be added to this private list, please email support@dataverse.org. + +.. _spreadsheet: https://docs.google.com/spreadsheets/d/1bfsw7gnHlHerLXuk7YprUT68liHfcaMxs1rFciA-mEo/edit#gid=0 +.. _dataverse-installations: https://github.com/IQSS/dataverse-installations + +For additional details about security practices by the Dataverse team, see the :doc:`/developers/security` section of the Developer Guide. + +.. _reporting-security-issues: + +Reporting Security Issues ++++++++++++++++++++++++++ + +If you have a security issue to report, please email it to security@dataverse.org. + .. _network-ports: Network Ports @@ -141,39 +172,79 @@ In order for non-superusers to start creating Dataverse collections or datasets, As the person installing the Dataverse Software, you may or may not be a local metadata expert. You may want to have others sign up for accounts and grant them the "Admin" role at the root Dataverse collection to configure metadata fields, templates, browse/search facets, guestbooks, etc. For more on these topics, consult the :doc:`/user/dataverse-management` section of the User Guide. +.. _pids-configuration: + Persistent Identifiers and Publishing Datasets ---------------------------------------------- -Persistent identifiers are a required and integral part of the Dataverse Software. They provide a URL that is guaranteed to resolve to the datasets or files they represent. The Dataverse Software currently supports creating identifiers using DOI and Handle. +Persistent identifiers (PIDs) are a required and integral part of the Dataverse Software. They provide a URL that is +guaranteed to resolve to the datasets or files they represent. The Dataverse Software currently supports creating +identifiers using one of several PID providers. The most appropriate PIDs for public data are DOIs (provided by +DataCite or EZID) and Handles. Dataverse also supports PermaLinks which could be useful for intranet or catalog use +cases. A DOI provider called "FAKE" is recommended only for testing and development purposes. + +Testing PID Providers ++++++++++++++++++++++ -By default, the installer configures a default DOI namespace (10.5072) with DataCite as the registration provider. Please note that as of the release 4.9.3, we can no longer use EZID as the provider. Unlike EZID, DataCite requires that you register for a test account, configured with your own prefix (please contact support@datacite.org). Once you receive the login name, password, and prefix for the account, configure the credentials in your domain.xml, as the following two JVM options:: +By default, the installer configures the DataCite test service as the registration provider. DataCite requires that you +register for a test account, configured with your own prefix (please contact support@datacite.org). - -Ddoi.username=... - -Ddoi.password=... +Once you receive the login name, password, and prefix for the account, +configure the credentials via :ref:`dataverse.pid.datacite.username` and +:ref:`dataverse.pid.datacite.password`, then restart Payara. -and restart Payara. The prefix can be configured via the API (where it is referred to as "Authority"): +Configure the prefix via the API (where it is referred to as :ref:`:Authority`): ``curl -X PUT -d 10.xxxx http://localhost:8080/api/admin/settings/:Authority`` -Once this is done, you will be able to publish datasets and files, but the persistent identifiers will not be citable, and they will only resolve from the DataCite test environment (and then only if the Dataverse installation from which you published them is accessible - DOIs minted from your laptop will not resolve). Note that any datasets or files created using the test configuration cannot be directly migrated and would need to be created again once a valid DOI namespace is configured. +.. TIP:: + This testing section is oriented around DataCite but other PID Providers can be tested as well. + + - EZID is available to University of California scholars and researchers. Testing can be done using the authority 10.5072 and shoulder FK2 with the "apitest" account (contact EZID for credentials) or an institutional account. Configuration in Dataverse is then analogous to using DataCite. + + - The PermaLink and FAKE DOI providers do not involve an external account. See :ref:`permalinks` and (for the FAKE DOI provider) the :doc:`/developers/dev-environment` section of the Developer Guide. + +Once all is configured, you will be able to publish datasets and files, but **the persistent identifiers will not be citable**, +and they will only resolve from the DataCite test environment (and then only if the Dataverse installation from which +you published them is accessible - DOIs minted from your laptop will not resolve). Note that any datasets or files +created using the test configuration cannot be directly migrated and would need to be created again once a valid DOI +namespace is configured. -To properly configure persistent identifiers for a production installation, an account and associated namespace must be acquired for a fee from a DOI or HDL provider. **DataCite** (https://www.datacite.org) is the recommended DOI provider (see https://dataversecommunity.global for more on joining DataCite) but **EZID** (http://ezid.cdlib.org) is an option for the University of California according to https://www.cdlib.org/cdlinfo/2017/08/04/ezid-doi-service-is-evolving/ . **Handle.Net** (https://www.handle.net) is the HDL provider. +One you are done testing, to properly configure persistent identifiers for a production installation, an account and associated namespace must be +acquired for a fee from a DOI or HDL provider. **DataCite** (https://www.datacite.org) is the recommended DOI provider +(see https://dataversecommunity.global for more on joining DataCite through the Global Dataverse Community Consortium) but **EZID** +(http://ezid.cdlib.org) is an option for the University of California according to +https://www.cdlib.org/cdlinfo/2017/08/04/ezid-doi-service-is-evolving/ . +**Handle.Net** (https://www.handle.net) is the HDL provider. -Once you have your DOI or Handle account credentials and a namespace, configure your Dataverse installation to use them using the JVM options and database settings below. +Once you have your DOI or Handle account credentials and a namespace, configure your Dataverse installation +using the JVM options and database settings below. + +.. _pids-doi-configuration: Configuring Your Dataverse Installation for DOIs ++++++++++++++++++++++++++++++++++++++++++++++++ -By default, your Dataverse installation attempts to register DOIs for each dataset and file under a test authority, though you must apply for your own credentials as explained above. +As explained above, by default your Dataverse installation attempts to register DOIs for each +dataset and file under a test authority. You must apply for your own credentials. Here are the configuration options for DOIs: -**JVM Options:** +**JVM Options for DataCite:** -- :ref:`doi.baseurlstring` -- :ref:`doi.username` -- :ref:`doi.password` -- :ref:`doi.dataciterestapiurlstring` +- :ref:`dataverse.pid.datacite.mds-api-url` +- :ref:`dataverse.pid.datacite.rest-api-url` +- :ref:`dataverse.pid.datacite.username` +- :ref:`dataverse.pid.datacite.password` + +**JVM Options for EZID:** + +As stated above, with very few exceptions (e.g. University of California), you will not be able to use +this provider. + +- :ref:`dataverse.pid.ezid.api-url` +- :ref:`dataverse.pid.ezid.username` +- :ref:`dataverse.pid.ezid.password` **Database Settings:** @@ -183,18 +254,21 @@ Here are the configuration options for DOIs: - :ref:`:Shoulder <:Shoulder>` - :ref:`:IdentifierGenerationStyle <:IdentifierGenerationStyle>` (optional) - :ref:`:DataFilePIDFormat <:DataFilePIDFormat>` (optional) -- :ref:`:FilePIDsEnabled <:FilePIDsEnabled>` (optional, defaults to true) +- :ref:`:FilePIDsEnabled <:FilePIDsEnabled>` (optional, defaults to false) + +.. _pids-handle-configuration: Configuring Your Dataverse Installation for Handles +++++++++++++++++++++++++++++++++++++++++++++++++++ -Here are the configuration options for handles: +Here are the configuration options for handles. Most notably, you need to +change the ``:Protocol`` setting, as it defaults to DOI usage. **JVM Options:** -- :ref:`dataverse.handlenet.admcredfile` -- :ref:`dataverse.handlenet.admprivphrase` -- :ref:`dataverse.handlenet.index` +- :ref:`dataverse.pid.handlenet.key.path` +- :ref:`dataverse.pid.handlenet.key.passphrase` +- :ref:`dataverse.pid.handlenet.index` **Database Settings:** @@ -207,6 +281,30 @@ Here are the configuration options for handles: Note: If you are **minting your own handles** and plan to set up your own handle service, please refer to `Handle.Net documentation `_. +.. _permalinks: + +Configuring Your Dataverse Installation for PermaLinks +++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +PermaLinks are a simple mechanism to provide persistent URLs for datasets and datafiles (if configured) that does not involve an external service providing metadata-based search services. +They are potentially appropriate for Intranet use cases as well as in cases where Dataverse is being used as a catalog or holding duplicate copies of datasets where the authoritative copy already has a DOI or Handle. +PermaLinks use the protocol "perma" (versus "doi" or "handle") and do not use a "/" character as a separator between the authority and shoulder. It is recommended to choose an alphanumeric value for authority that does not resemble that of DOIs (which are primarily numeric and start with "10." as in "10.5072") to avoid PermaLinks being mistaken for DOIs. + +Here are the configuration options for PermaLinks: + +**JVM Options:** + +- :ref:`dataverse.pid.permalink.base-url` + +**Database Settings:** + +- :ref:`:Protocol <:Protocol>` +- :ref:`:Authority <:Authority>` +- :ref:`:Shoulder <:Shoulder>` +- :ref:`:IdentifierGenerationStyle <:IdentifierGenerationStyle>` (optional) +- :ref:`:DataFilePIDFormat <:DataFilePIDFormat>` (optional) +- :ref:`:FilePIDsEnabled <:FilePIDsEnabled>` (optional, defaults to false) + .. _auth-modes: Auth Modes: Local vs. Remote vs. Both @@ -238,12 +336,172 @@ As for the "Remote only" authentication mode, it means that: - ``:DefaultAuthProvider`` has been set to use the desired authentication provider - The "builtin" authentication provider has been disabled (:ref:`api-toggle-auth-provider`). Note that disabling the "builtin" authentication provider means that the API endpoint for converting an account from a remote auth provider will not work. Converting directly from one remote authentication provider to another (i.e. from GitHub to Google) is not supported. Conversion from remote is always to "builtin". Then the user initiates a conversion from "builtin" to remote. Note that longer term, the plan is to permit multiple login options to the same Dataverse installation account per https://github.com/IQSS/dataverse/issues/3487 (so all this talk of conversion will be moot) but for now users can only use a single login option, as explained in the :doc:`/user/account` section of the User Guide. In short, "remote only" might work for you if you only plan to use a single remote authentication provider such that no conversion between remote authentication providers will be necessary. +.. _bearer-token-auth: + +Bearer Token Authentication +--------------------------- + +Bearer tokens are defined in `RFC 6750`_ and can be used as an alternative to API tokens. This is an experimental feature hidden behind a feature flag. + +.. _RFC 6750: https://tools.ietf.org/html/rfc6750 + +To enable bearer tokens, you must install and configure Keycloak (for now, see :ref:`oidc-dev` in the Developer Guide) and enable ``api-bearer-auth`` under :ref:`feature-flags`. + +You can test that bearer tokens are working by following the example under :ref:`bearer-tokens` in the API Guide. + +.. _database-persistence: + +Database Persistence +-------------------- + +The Dataverse software uses a PostgreSQL database to store objects users create. +You can configure basic and advanced settings for the PostgreSQL database connection with the help of +MicroProfile Config API. + +Basic Database Settings ++++++++++++++++++++++++ + +1. Any of these settings can be set via system properties (see :ref:`jvm-options` starting at :ref:`dataverse.db.name`), environment variables or other + MicroProfile Config mechanisms supported by the app server. + `See Payara docs for supported sources `_. +2. Remember to protect your secrets. For passwords, use an environment variable (bare minimum), a password alias named the same + as the key (OK) or use the "dir config source" of Payara (best). + + Alias creation example: + + .. code-block:: shell + + echo "AS_ADMIN_ALIASPASSWORD=changeme" > /tmp/p.txt + asadmin create-password-alias --passwordfile /tmp/p.txt dataverse.db.password + rm /tmp/p.txt + +3. Environment variables follow the key, replacing any dot, colon, dash, etc. into an underscore "_" and all uppercase + letters. Example: ``dataverse.db.host`` -> ``DATAVERSE_DB_HOST`` + +.. list-table:: + :widths: 15 60 25 + :header-rows: 1 + :align: left + + * - MPCONFIG Key + - Description + - Default + * - dataverse.db.host + - The PostgreSQL server to connect to. + - ``localhost`` + * - dataverse.db.port + - The PostgreSQL server port to connect to. + - ``5432`` + * - dataverse.db.user + - The PostgreSQL user name to connect with. + - | ``dataverse`` + | (installer sets to ``dvnapp``) + * - dataverse.db.password + - The PostgreSQL users password to connect with. + + **Please note the safety advisory above.** + - *No default* + * - dataverse.db.name + - The PostgreSQL database name to use for the Dataverse installation. + - | ``dataverse`` + | (installer sets to ``dvndb``) + * - dataverse.db.parameters + - Connection parameters, such as ``sslmode=require``. See `Postgres JDBC docs `_ + Note: you don't need to provide the initial "?". + - *Empty string* + +Advanced Database Settings +++++++++++++++++++++++++++ + +The following options are useful in many scenarios. You might be interested in debug output during development or +monitoring performance in production. + +You can find more details within the Payara docs: + +- `User Guide: Connection Pool Configuration `_ +- `Tech Doc: Advanced Connection Pool Configuration `_. + +Connection Validation +^^^^^^^^^^^^^^^^^^^^^ + +.. list-table:: + :widths: 15 60 25 + :header-rows: 1 + :align: left + + * - MPCONFIG Key + - Description + - Default + * - dataverse.db.is-connection-validation-required + - ``true``: Validate connections, allow server to reconnect in case of failure. + - false + * - dataverse.db.connection-validation-method + - | The method of connection validation: + | ``table|autocommit|meta-data|custom-validation``. + - *Empty string* + * - dataverse.db.validation-table-name + - The name of the table used for validation if the validation method is set to ``table``. + - *Empty string* + * - dataverse.db.validation-classname + - The name of the custom class used for validation if the ``validation-method`` is set to ``custom-validation``. + - *Empty string* + * - dataverse.db.validate-atmost-once-period-in-seconds + - Specifies the time interval in seconds between successive requests to validate a connection at most once. + - ``0`` (disabled) + +Connection & Statement Leaks +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. list-table:: + :widths: 15 60 25 + :header-rows: 1 + :align: left + + * - MPCONFIG Key + - Description + - Default + * - dataverse.db.connection-leak-timeout-in-seconds + - Specify timeout when connections count as "leaked". + - ``0`` (disabled) + * - dataverse.db.connection-leak-reclaim + - If enabled, leaked connection will be reclaimed by the pool after connection leak timeout occurs. + - ``false`` + * - dataverse.db.statement-leak-timeout-in-seconds + - Specifiy timeout when statements should be considered to be "leaked". + - ``0`` (disabled) + * - dataverse.db.statement-leak-reclaim + - If enabled, leaked statement will be reclaimed by the pool after statement leak timeout occurs. + - ``false`` + +Logging & Slow Performance +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. list-table:: + :widths: 15 60 25 + :header-rows: 1 + :align: left + + * - MPCONFIG Key + - Description + - Default + * - dataverse.db.statement-timeout-in-seconds + - Timeout property of a connection to enable termination of abnormally long running queries. + - ``-1`` (disabled) + * - dataverse.db.slow-query-threshold-in-seconds + - SQL queries that exceed this time in seconds will be logged. + - ``-1`` (disabled) + * - dataverse.db.log-jdbc-calls + - When set to true, all JDBC calls will be logged allowing tracing of all JDBC interactions including SQL. + - ``false`` + + + .. _file-storage: File Storage: Using a Local Filesystem and/or Swift and/or Object Stores and/or Trusted Remote Stores ----------------------------------------------------------------------------------------------------- -By default, a Dataverse installation stores all data files (files uploaded by end users) on the filesystem at ``/usr/local/payara5/glassfish/domains/domain1/files``. This path can vary based on answers you gave to the installer (see the :ref:`dataverse-installer` section of the Installation Guide) or afterward by reconfiguring the ``dataverse.files.\.directory`` JVM option described below. +By default, a Dataverse installation stores all data files (files uploaded by end users) on the filesystem at ``/usr/local/payara6/glassfish/domains/domain1/files``. This path can vary based on answers you gave to the installer (see the :ref:`dataverse-installer` section of the Installation Guide) or afterward by reconfiguring the ``dataverse.files.\.directory`` JVM option described below. A Dataverse installation can alternately store files in a Swift or S3-compatible object store, and can now be configured to support multiple stores at once. With a multi-store configuration, the location for new files can be controlled on a per-Dataverse collection basis. @@ -263,7 +521,9 @@ To support multiple stores, a Dataverse installation now requires an id, type, a Out of the box, a Dataverse installation is configured to use local file storage in the 'file' store by default. You can add additional stores and, as a superuser, configure specific Dataverse collections to use them (by editing the 'General Information' for the Dataverse collection as described in the :doc:`/admin/dataverses-datasets` section). -Note that the "\-Ddataverse.files.directory", if defined, continues to control where temporary files are stored (in the /temp subdir of that directory), independent of the location of any 'file' store defined above. +Note that the "\-Ddataverse.files.directory", if defined, continues to control where temporary files are stored +(in the /temp subdir of that directory), independent of the location of any 'file' store defined above. +(See also the option reference: :ref:`dataverse.files.directory`) If you wish to change which store is used by default, you'll need to delete the existing default storage driver and set a new one using jvm options. @@ -274,6 +534,8 @@ If you wish to change which store is used by default, you'll need to delete the It is also possible to set maximum file upload size limits per store. See the :ref:`:MaxFileUploadSizeInBytes` setting below. +.. _storage-files-dir: + File Storage ++++++++++++ @@ -403,14 +665,14 @@ To **create a user** with full S3 access and nothing more for security reasons, (Identity and Access Management). See `IAM User Guide `_ for more info on this process. -**Generate the user keys** needed for a Dataverse installation afterwards by clicking on the created user. +To use programmatic access, **Generate the user keys** needed for a Dataverse installation afterwards by clicking on the created user. (You can skip this step when running on EC2, see below.) .. TIP:: If you are hosting your Dataverse installation on an AWS EC2 instance alongside storage in S3, it is possible to use IAM Roles instead of the credentials file (the file at ``~/.aws/credentials`` mentioned below). Please note that you will still need the ``~/.aws/config`` file to specify the region. For more information on this option, see - http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/iam-roles-for-amazon-ec2.html + https://docs.aws.amazon.com/IAM/latest/UserGuide/id_roles_use_switch-role-ec2.html Preparation When Using Custom S3-Compatible Service ################################################### @@ -478,7 +740,7 @@ named config in the same folder") Console Commands to Set Up Access Configuration ############################################### -Begin by installing the CLI tool `pip `_ to install the +Begin by installing the CLI tool `pip (package installer for Python) `_ to install the `AWS command line interface `_ if you don't have it. First, we'll get our access keys set up. If you already have your access keys configured, skip this step. @@ -580,8 +842,7 @@ Optionally, you may provide static credentials for each S3 storage using MicroPr - ``dataverse.files..access-key`` for this storage's "access key ID" - ``dataverse.files..secret-key`` for this storage's "secret access key" -You may provide the values for these via any of the -`supported config sources `_. +You may provide the values for these via any `supported MicroProfile Config API source`_. **WARNING:** @@ -700,6 +961,26 @@ Once you have configured a trusted remote store, you can point your users to the =========================================== ================== ========================================================================== =================== +.. _temporary-file-storage: + +Temporary Upload File Storage ++++++++++++++++++++++++++++++ + +When uploading files via the API or Web UI, you need to be aware that multiple steps are involved to enable +features like ingest processing, transfer to a permanent storage, checking for duplicates, unzipping etc. + +All of these processes are triggered after finishing transfers over the wire and moving the data into a temporary +(configurable) location on disk at :ref:`${dataverse.files.directory} `\ ``/temp``. + +Before being moved there, + +- JSF Web UI uploads are stored at :ref:`${dataverse.files.uploads} `, defaulting to + ``/usr/local/payara6/glassfish/domains/domain1/uploads`` folder in a standard installation. This place is + configurable and might be set to a separate disk volume where stale uploads are purged periodically. +- API uploads are stored at the system's temporary files location indicated by the Java system property + ``java.io.tmpdir``, defaulting to ``/tmp`` on Linux. If this location is backed by a `tmpfs `_ + on your machine, large file uploads via API will cause RAM and/or swap usage bursts. You might want to point this to + a different location, restrict maximum size of it, and monitor for stale uploads. .. _Branding Your Installation: @@ -772,7 +1053,7 @@ Custom Navbar Logo The Dataverse Software allows you to replace the default Dataverse Project icon and name branding in the navbar with your own custom logo. Note that this logo is separate from the logo used in the theme of the root Dataverse collection (see :ref:`theme`). -The custom logo image file is expected to be small enough to fit comfortably in the navbar, no more than 50 pixels in height and 160 pixels in width. Create a ``navbar`` directory in your Payara ``logos`` directory and place your custom logo there. By default, your logo image file will be located at ``/usr/local/payara5/glassfish/domains/domain1/docroot/logos/navbar/logo.png``. +The custom logo image file is expected to be small enough to fit comfortably in the navbar, no more than 50 pixels in height and 160 pixels in width. Create a ``navbar`` directory in your Payara ``logos`` directory and place your custom logo there. By default, your logo image file will be located at ``/usr/local/payara6/glassfish/domains/domain1/docroot/logos/navbar/logo.png``. Given this location for the custom logo image file, run this curl command to add it to your settings: @@ -800,7 +1081,7 @@ Refer to :ref:`:NavbarSupportUrl` for setting to a fully-qualified URL which wil Sign Up ####### -Refer to :ref:`:SignUpUrl` and :ref:`conf-allow-signup` for setting a relative path URL to which users will be sent for signup and for controlling the ability for creating local user accounts. +Refer to :ref:`:SignUpUrl` and :ref:`conf-allow-signup` for setting a relative path URL to which users will be sent for sign up and for controlling the ability for creating local user accounts. Custom Header ^^^^^^^^^^^^^ @@ -1044,6 +1325,14 @@ On a new Dataverse installation, users may select from the following licenses or (Note that existing Dataverse installations which are upgraded from 5.9 or previous will only offer CC0 1.0, added automatically during the upgrade to version 5.10.) +If the Dataverse Installation supports multiple languages, the license name/description translations should be added to the ``License`` properties files. (See :ref:`i18n` for more on properties files and internationalization in general.) +To create the key, the license name has to be converted to lowercase, replace space with underscore. + +Example:: + + license.cc0_1.0.description=Creative Commons CC0 1.0 Universal Public Domain Dedication. + license.cc0_1.0.name=CC0 1.0 + You have a lot of control over which licenses and terms are available. You can remove licenses and add new ones. You can decide which license is the default. You can remove "Custom Dataset Terms" as a option. You can remove all licenses and make "Custom Dataset Terms" the only option. Before making changes, you are encouraged to read the :ref:`license-terms` section of the User Guide about why CC0 is the default and what the "Custom Dataset Terms" option allows. @@ -1092,6 +1381,29 @@ Disabling Custom Dataset Terms See :ref:`:AllowCustomTermsOfUse` for how to disable the "Custom Dataset Terms" option. +.. _ChangeLicenseSortOrder: + +Sorting licenses +---------------- + +The default order of licenses in the dropdown in the user interface is as follows: + +* The default license is shown first +* Followed by the remaining installed licenses in the order of installation +* The custom license is at the end + +Only the order of the installed licenses can be changed with the API calls. The default license always remains first and the custom license last. + +The order of licenses can be changed by setting the ``sortOrder`` property of a license. For the purpose of making sorting easier and to allow grouping of the licenses, ``sortOrder`` property does not have to be unique. Licenses with the same ``sortOrder`` are sorted by their ID, i.e., first by the sortOrder, then by the ID. Nevertheless, you can set a unique ``sortOrder`` for every license in order to sort them fully manually. + +The ``sortOrder`` is an whole number and is used to sort licenses in ascending fashion. + +Changing the sorting order of a license specified by the license ``$ID`` is done by superusers using the following API call: + +.. code-block:: bash + + export SORT_ORDER=100 + curl -X PUT -H 'Content-Type: application/json' -H X-Dataverse-key:$API_TOKEN $SERVER_URL/api/licenses/$ID/:sortOrder/$SORT_ORDER .. _BagIt File Handler: BagIt File Handler @@ -1206,7 +1518,7 @@ The Google Cloud Archiver also requires a key file that must be renamed to 'goog For example: -``cp /usr/local/payara5/glassfish/domains/domain1/files/googlecloudkey.json`` +``cp /usr/local/payara6/glassfish/domains/domain1/files/googlecloudkey.json`` .. _S3 Archiver Configuration: @@ -1235,8 +1547,8 @@ The :S3ArchiverConfig setting is a JSON object that must include an "s3_bucket_n .. _Archiving API Call: -API Calls -+++++++++ +BagIt Export API Calls +++++++++++++++++++++++ Once this configuration is complete, you, as a user with the *PublishDataset* permission, should be able to use the admin API call to manually submit a DatasetVersion for processing: @@ -1244,25 +1556,29 @@ Once this configuration is complete, you, as a user with the *PublishDataset* pe where: -``{id}`` is the DatasetId (or ``:persistentId`` with the ``?persistentId=""`` parameter), and +``{id}`` is the DatasetId (the database id of the dataset) and ``{version}`` is the friendly version number, e.g. "1.2". -The submitDatasetVersionToArchive API (and the workflow discussed below) attempt to archive the dataset version via an archive specific method. For Chronopolis, a DuraCloud space named for the dataset (it's DOI with ':' and '.' replaced with '-') is created and two files are uploaded to it: a version-specific datacite.xml metadata file and a BagIt bag containing the data and an OAI-ORE map file. (The datacite.xml file, stored outside the Bag as well as inside is intended to aid in discovery while the ORE map file is 'complete', containing all user-entered metadata and is intended as an archival record.) +or in place of the DatasetID, you can use the string ``:persistentId`` as the ``{id}`` and add the DOI/PID as a query parameter like this: ``?persistentId=""``. Here is how the full command would look: -In the Chronopolis case, since the transfer from the DuraCloud front-end to archival storage in Chronopolis can take significant time, it is currently up to the admin/curator to submit a 'snap-shot' of the space within DuraCloud and to monitor its successful transfer. Once transfer is complete the space should be deleted, at which point the Dataverse Software API call can be used to submit a Bag for other versions of the same Dataset. (The space is reused, so that archival copies of different Dataset versions correspond to different snapshots of the same DuraCloud space.). +``curl -X POST -H "X-Dataverse-key: " http://localhost:8080/api/admin/submitDatasetVersionToArchive/:persistentId/{version}?persistentId=""`` -A batch version of this admin api call is also available: +The submitDatasetVersionToArchive API (and the workflow discussed below) attempt to archive the dataset version via an archive specific method. For Chronopolis, a DuraCloud space named for the dataset (its DOI with ":" and "." replaced with "-", e.g. ``doi-10-5072-fk2-tgbhlb``) is created and two files are uploaded to it: a version-specific datacite.xml metadata file and a BagIt bag containing the data and an OAI-ORE map file. (The datacite.xml file, stored outside the Bag as well as inside, is intended to aid in discovery while the ORE map file is "complete", containing all user-entered metadata and is intended as an archival record.) + +In the Chronopolis case, since the transfer from the DuraCloud front-end to archival storage in Chronopolis can take significant time, it is currently up to the admin/curator to submit a 'snap-shot' of the space within DuraCloud and to monitor its successful transfer. Once transfer is complete the space should be deleted, at which point the Dataverse Software API call can be used to submit a Bag for other versions of the same dataset. (The space is reused, so that archival copies of different dataset versions correspond to different snapshots of the same DuraCloud space.). + +A batch version of this admin API call is also available: ``curl -X POST -H "X-Dataverse-key: " 'http://localhost:8080/api/admin/archiveAllUnarchivedDatasetVersions?listonly=true&limit=10&latestonly=true'`` -The archiveAllUnarchivedDatasetVersions call takes 3 optional configuration parameters. +The archiveAllUnarchivedDatasetVersions call takes 3 optional configuration parameters. + * listonly=true will cause the API to list dataset versions that would be archived but will not take any action. -* limit= will limit the number of dataset versions archived in one api call to <= . +* limit= will limit the number of dataset versions archived in one API call to ``<=`` . * latestonly=true will limit archiving to only the latest published versions of datasets instead of archiving all unarchived versions. -Note that because archiving is done asynchronously, the calls above will return OK even if the user does not have the *PublishDataset* permission on the dataset(s) involved. Failures are indocated in the log and the archivalStatus calls in the native api can be used to check the status as well. - +Note that because archiving is done asynchronously, the calls above will return OK even if the user does not have the *PublishDataset* permission on the dataset(s) involved. Failures are indicated in the log and the archivalStatus calls in the native API can be used to check the status as well. PostPublication Workflow ++++++++++++++++++++++++ @@ -1318,7 +1634,7 @@ You have a couple of options for putting an updated robots.txt file into product For more of an explanation of ``ProxyPassMatch`` see the :doc:`shibboleth` section. -If you are not fronting Payara with Apache you'll need to prevent Payara from serving the robots.txt file embedded in the war file by overwriting robots.txt after the war file has been deployed. The downside of this technique is that you will have to remember to overwrite robots.txt in the "exploded" war file each time you deploy the war file, which probably means each time you upgrade to a new version of the Dataverse Software. Furthermore, since the version of the Dataverse Software is always incrementing and the version can be part of the file path, you will need to be conscious of where on disk you need to replace the file. For example, for Dataverse Software 4.6.1 the path to robots.txt may be ``/usr/local/payara5/glassfish/domains/domain1/applications/dataverse-4.6.1/robots.txt`` with the version number ``4.6.1`` as part of the path. +If you are not fronting Payara with Apache you'll need to prevent Payara from serving the robots.txt file embedded in the war file by overwriting robots.txt after the war file has been deployed. The downside of this technique is that you will have to remember to overwrite robots.txt in the "exploded" war file each time you deploy the war file, which probably means each time you upgrade to a new version of the Dataverse Software. Furthermore, since the version of the Dataverse Software is always incrementing and the version can be part of the file path, you will need to be conscious of where on disk you need to replace the file. For example, for Dataverse Software 4.6.1 the path to robots.txt may be ``/usr/local/payara6/glassfish/domains/domain1/applications/dataverse-4.6.1/robots.txt`` with the version number ``4.6.1`` as part of the path. Creating a Sitemap and Submitting it to Search Engines ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -1331,7 +1647,7 @@ Create or update your sitemap by adding the following curl command to cron to ru This will create or update a file in the following location unless you have customized your installation directory for Payara: -``/usr/local/payara5/glassfish/domains/domain1/docroot/sitemap/sitemap.xml`` +``/usr/local/payara6/glassfish/domains/domain1/docroot/sitemap/sitemap.xml`` On Dataverse installation with many datasets, the creation or updating of the sitemap can take a while. You can check Payara's server.log file for "BEGIN updateSiteMap" and "END updateSiteMap" lines to know when the process started and stopped and any errors in between. @@ -1374,43 +1690,115 @@ When changing values these values with ``asadmin``, you'll need to delete the ol ``./asadmin create-jvm-options "-Ddataverse.fqdn=dataverse.example.com"`` -It's also possible to change these values by stopping Payara, editing ``payara5/glassfish/domains/domain1/config/domain.xml``, and restarting Payara. +It's also possible to change these values by stopping Payara, editing ``payara6/glassfish/domains/domain1/config/domain.xml``, and restarting Payara. + +.. _dataverse.fqdn: dataverse.fqdn ++++++++++++++ -If the Dataverse installation has multiple DNS names, this option specifies the one to be used as the "official" host name. For example, you may want to have dataverse.example.edu, and not the less appealing server-123.socsci.example.edu to appear exclusively in all the registered global identifiers, Data Deposit API records, etc. +If the Dataverse installation has multiple DNS names, this option specifies the one to be used as the "official" +hostname. For example, you may want to have ``dataverse.example.edu``, and not the less appealing +``server-123.example.edu`` to appear exclusively in all the registered global identifiers, etc. + +- Email confirmation links +- Password reset links +- Generating a Private URL +- PID minting +- Exporting to Schema.org format (and showing JSON-LD in HTML's tag) +- Exporting to DDI format +- Which Dataverse installation an "external tool" should return to +- URLs embedded in SWORD API responses +- ... -The password reset feature requires ``dataverse.fqdn`` to be configured. +Usually it will follow the pattern ``https:///``. +*Only* the FQDN part of your Dataverse installation URL can be determined by setting ``dataverse.fqdn``. -.. note:: +**Notes:** - Do note that whenever the system needs to form a service URL, by default, it will be formed with ``https://`` and port 443. I.e., - ``https://{dataverse.fqdn}/`` - If that does not suit your setup, you can define an additional option, ``dataverse.siteUrl``, explained below. +- The URL will default to using ``https://`` and no additional port information. If that does not suit your setup, you + can define an additional option, ``dataverse.siteUrl``, :ref:`explained below `, which always + takes precedence. +- Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_FQDN``. + Defaults to ``localhost`` when used with ``mp.config.profile=ct`` .. _dataverse.siteUrl: dataverse.siteUrl +++++++++++++++++ -.. note:: +``dataverse.siteUrl`` is used to configure the URL for your Dataverse installation that you plan to advertise to your +users. As explained in the :ref:`installation ` docs, this setting is critical for the correct +operation of your installation. For example, your site URL could be https://dataverse.example.edu . That is, even though +the server might also be available at uglier URLs such as https://server-123.example.edu, the site URL is the +"official" URL. + +That said, some environments may require using a different URL pattern to access your installation. You might need to +use HTTP without "S", a non-standard port and so on. This is especially useful in development or testing environments. + +You can provide any custom tailored site URL via ``dataverse.siteUrl``, which always takes precedence. +Example: ``dataverse.siteUrl=http://localhost:8080`` - and specify the protocol and port number you would prefer to be used to advertise the URL for your Dataverse installation. - For example, configured in domain.xml: - ``-Ddataverse.fqdn=dataverse.example.edu`` - ``-Ddataverse.siteUrl=http://${dataverse.fqdn}:8080`` +If you wish to change your site URL by changing the domain configuration, you should edit your ``domain.xml`` directly +to avoid problems with colons in commands. Find a line similar to +``-Ddataverse.siteUrl=https://dataverse.example.edu`` and change it. You can specify the +protocol, host, and port number and should not include a trailing slash. + +**Notes:** + +- This setting may be used in combination with variable replacement, referencing :ref:`dataverse.fqdn` with + ``./asadmin create-jvm-options "\-Ddataverse.siteUrl=http\://\${dataverse.fqdn}\:8080"`` +- Remember to restart Payara after editing ``domain.xml``. +- Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_SITEURL``. + Defaults to ``http://${dataverse.fqdn}:8080`` when used with ``mp.config.profile=ct`` +- We are absolutely aware that it's confusing to have both ``dataverse.fqdn`` and ``dataverse.siteUrl``. + https://github.com/IQSS/dataverse/issues/6636 is about resolving this confusion. + +.. _dataverse.files.directory: dataverse.files.directory +++++++++++++++++++++++++ -This is how you configure the path Dataverse uses for temporary files. (File store specific dataverse.files.\.directory options set the permanent data storage locations.) +Please provide an absolute path to a directory backed by some mounted file system. This directory is used for a number +of purposes: + +1. ``/temp`` after uploading, data is temporarily stored here for ingest and/or before + shipping to the final storage destination. +2. ``/sword`` a place to store uploads via the :doc:`../api/sword` before transfer + to final storage location and/or ingest. +3. ``/googlecloudkey.json`` used with :ref:`Google Cloud Configuration` for BagIt exports. + This location is deprecated and might be refactored into a distinct setting in the future. +4. The experimental DCM feature for :doc:`../developers/big-data-support` is able to trigger imports for externally + uploaded files in a directory tree at ``//`` + under certain conditions. This directory may also be used by file stores for :ref:`permanent file storage `, + but this is controlled by other, store-specific settings. + +Defaults to ``/tmp/dataverse``. Can also be set via *MicroProfile Config API* sources, e.g. the environment variable +``DATAVERSE_FILES_DIRECTORY``. Defaults to ``${STORAGE_DIR}`` for profile ``ct``, important for the +:ref:`Dataverse Application Image `. + +.. _dataverse.files.uploads: + +dataverse.files.uploads ++++++++++++++++++++++++ + +Configure a folder to store the incoming file stream during uploads (before transfering to `${dataverse.files.directory}/temp`). +Please also see :ref:`temporary-file-storage` for more details. +You can use an absolute path or a relative, which is relative to the application server domain directory. + +Defaults to ``./uploads``, which resolves to ``/usr/local/payara6/glassfish/domains/domain1/uploads`` in a default +installation. + +Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_FILES_UPLOADS``. +Defaults to ``${STORAGE_DIR}/uploads`` for profile ``ct``, important for the :ref:`Dataverse Application Image `. dataverse.auth.password-reset-timeout-in-minutes ++++++++++++++++++++++++++++++++++++++++++++++++ Users have 60 minutes to change their passwords by default. You can adjust this value here. +.. _dataverse.db.name: + dataverse.db.name +++++++++++++++++ @@ -1420,6 +1808,8 @@ Defaults to ``dataverse`` (but the installer sets it to ``dvndb``). Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_DB_NAME``. +See also :ref:`database-persistence`. + dataverse.db.user +++++++++++++++++ @@ -1462,30 +1852,118 @@ Defaults to ``5432``, the default PostgreSQL port. Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_DB_PORT``. +.. _dataverse.solr.host: + +dataverse.solr.host ++++++++++++++++++++ + +The hostname of a Solr server to connect to. Remember to restart / redeploy Dataverse after changing the setting +(as with :ref:`:SolrHostColonPort`). + +Defaults to ``localhost``. + +Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_SOLR_HOST``. +Defaults to ``solr``, when used with ``mp.config.profile=ct`` (:ref:`see below <:ApplicationServerSettings>`). + +dataverse.solr.port ++++++++++++++++++++ + +The Solr server port to connect to. Remember to restart / redeploy Dataverse after changing the setting +(as with :ref:`:SolrHostColonPort`). + +Defaults to ``8983``, the default Solr port. + +Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_SOLR_PORT``. + +dataverse.solr.core ++++++++++++++++++++ + +The name of the Solr core to use for this Dataverse installation. Might be used to switch to a different core quickly. +Remember to restart / redeploy Dataverse after changing the setting (as with :ref:`:SolrHostColonPort`). + +Defaults to ``collection1``. + +Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_SOLR_CORE``. + +dataverse.solr.protocol ++++++++++++++++++++++++ + +The Solr server URL protocol for the connection. Remember to restart / redeploy Dataverse after changing the setting +(as with :ref:`:SolrHostColonPort`). + +Defaults to ``http``, but might be set to ``https`` for extra secure Solr installations. + +Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_SOLR_PROTOCOL``. + +dataverse.solr.path ++++++++++++++++++++ + +The path part of the Solr endpoint URL (e.g. ``/solr/collection1`` of ``http://localhost:8389/solr/collection1``). +Might be used to target a Solr API at non-default places. Remember to restart / redeploy Dataverse after changing the +setting (as with :ref:`:SolrHostColonPort`). + +Defaults to ``/solr/${dataverse.solr.core}``, interpolating the core name when used. Make sure to include the variable +when using it to configure your core name! + +Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_SOLR_PATH``. + dataverse.rserve.host +++++++++++++++++++++ -Host name for Rserve, used for tasks that require use of R (to ingest RData files and to save tabular data as RData frames). +Host name for Rserve, used for tasks that require use of R (to ingest RData +files and to save tabular data as RData frames). + +Defaults to ``localhost``. + +Can also be set via *MicroProfile Config API* sources, e.g. the environment +variable ``DATAVERSE_RSERVE_HOST``. dataverse.rserve.port +++++++++++++++++++++ -Port number for Rserve, used for tasks that require use of R (to ingest RData files and to save tabular data as RData frames). +Port number for Rserve, used for tasks that require use of R (to ingest RData +files and to save tabular data as RData frames). + +Defaults to ``6311`` when not configured or no valid integer. + +Can also be set via *MicroProfile Config API* sources, e.g. the environment +variable ``DATAVERSE_RSERVE_PORT``. dataverse.rserve.user +++++++++++++++++++++ -Username for Rserve, used for tasks that require use of R (to ingest RData files and to save tabular data as RData frames). +Username for Rserve, used for tasks that require use of R (to ingest RData +files and to save tabular data as RData frames). + +Defaults to ``rserve``. + +Can also be set via *MicroProfile Config API* sources, e.g. the environment +variable ``DATAVERSE_RSERVE_USER``. dataverse.rserve.password +++++++++++++++++++++++++ -Password for Rserve, used for tasks that require use of R (to ingest RData files and to save tabular data as RData frames). +Password for Rserve, used for tasks that require use of R (to ingest RData +files and to save tabular data as RData frames). + +Defaults to ``rserve``. + +Can also be set via *MicroProfile Config API* sources, e.g. the environment +variable ``DATAVERSE_RSERVE_PASSWORD``. dataverse.rserve.tempdir ++++++++++++++++++++++++ -Temporary directory used by Rserve (defaults to /tmp/Rserv). Note that this location is local to the host on which Rserv is running (specified in ``dataverse.rserve.host`` above). When talking to Rserve, Dataverse needs to know this location in order to generate absolute path names of the files on the other end. +Temporary directory used by Rserve (defaults to ``/tmp/Rserv``). Note that this +location is local to the host on which Rserv is running (specified in +``dataverse.rserve.host`` above). When talking to Rserve, Dataverse needs to +know this location in order to generate absolute path names of the files on the +other end. + +Defaults to ``/tmp/Rserv``. + +Can also be set via *MicroProfile Config API* sources, e.g. the environment +variable ``DATAVERSE_RSERVE_TEMPDIR``. .. _dataverse.dropbox.key: @@ -1511,92 +1989,255 @@ dataverse.dataAccess.thumbnail.pdf.limit For limiting the size (in bytes) of thumbnail images generated from files. The default is 1000000 bytes (1 MB). -.. _doi.baseurlstring: -doi.baseurlstring -+++++++++++++++++ +.. _dataverse.pid.datacite.mds-api-url: -As of this writing, "https://mds.datacite.org" (DataCite) and "https://ezid.cdlib.org" (EZID) are the main valid values. +dataverse.pid.datacite.mds-api-url +++++++++++++++++++++++++++++++++++ -Out of the box, the Dataverse Software is configured to use a test MDS DataCite base URL string. You can delete it like this: +Configure the base URL of the `DataCite MDS API `_, +used to mint and manage DOIs. Valid values are "https://mds.datacite.org" and "https://mds.test.datacite.org" +(see also note below). -``./asadmin delete-jvm-options '-Ddoi.baseurlstring=https\://mds.test.datacite.org'`` +Out of the box, the installer configures your installation to use a DataCite REST Test API base URL (see DataCite's `testing guide `_). You can delete it like this: -Then, to switch to production DataCite, you can issue the following command: +``./asadmin delete-jvm-options '-Ddataverse.pid.datacite.mds-api-url=https\://mds.test.datacite.org'`` -``./asadmin create-jvm-options '-Ddoi.baseurlstring=https\://mds.datacite.org'`` +Then, to switch to the production DataCite base URL (see the `DataCite MDS API Guide `_), you can issue the following command: -See also these related database settings below: +``./asadmin create-jvm-options '-Ddataverse.pid.datacite.mds-api-url=https\://mds.datacite.org'`` -- :ref:`:DoiProvider` -- :ref:`:Protocol` -- :ref:`:Authority` -- :ref:`:Shoulder` +Without setting an option, always defaults to testing API endpoint. -.. _doi.dataciterestapiurlstring: +**Notes:** -doi.dataciterestapiurlstring -++++++++++++++++++++++++++++ +- See also these related database settings below: :ref:`:DoiProvider`, + :ref:`:Protocol`, :ref:`:Authority`, :ref:`:Shoulder`. +- Can also be set via *MicroProfile Config API* sources, e.g. the environment + variable ``DATAVERSE_PID_DATACITE_MDS_API_URL``. +- This setting was formerly known as ``doi.baseurlstring`` and has been renamed. + You should delete and re-add it. +- While using DataCite directly is recommended because it is tested by the Dataverse + Project Team plus field tested with most installations, it is also possible + to use a DataCite Client API as a proxy to DataCite. `Since the launch of DataCite Fabrica in + 2019, the only example by Australian National Data Services (ANDS) has been decommissioned + `_. -This configuration option affects the ``updateCitationsForDataset`` API endpoint documented under :ref:`MDC-updateCitationsForDataset` in the Admin Guide as well as the /pids/* API. -As of this writing, "https://api.datacite.org" (DataCite) and "https://api.test.datacite.org" (DataCite Testing) are the main valid values. +.. _dataverse.pid.datacite.rest-api-url: -Out of the box, the Dataverse Software is configured to use a test DataCite REST API base URL string. You can delete it like this: - -``./asadmin delete-jvm-options '-Ddoi.dataciterestapiurlstring=https\://api.test.datacite.org'`` +dataverse.pid.datacite.rest-api-url ++++++++++++++++++++++++++++++++++++ -Then, to switch to production DataCite, you can issue the following command: +Configure the base URL endpoint of the `DataCite REST API `_, used for +:ref:`PIDs API ` information retrieval and :doc:`/admin/make-data-count`. -``./asadmin create-jvm-options '-Ddoi.dataciterestapiurlstring=https\://api.datacite.org'`` +Valid values are "https://api.datacite.org" and "https://api.test.datacite.org". When unset, the default is the testing API endpoint. -For backward compatibility, if this option is not defined, the value of '-Ddoi.mdcbaseurlstring' is used if set. If not the default used is "https\://api.datacite.org:. +Out of the box, the installer configures your installation to use a DataCite REST test base URL (see DataCite's `testing guide `_). You can delete it like this: -See also these related database settings below: +``./asadmin delete-jvm-options '-Ddataverse.pid.datacite.rest-api-url=https\://api.test.datacite.org'`` -- :ref:`:MDCLogPath` -- :ref:`:DisplayMDCMetrics` +Then, to switch to the production DataCite base URL (see the `DataCite REST API Guide `_), +you can issue the following command: -.. _doi.username: +``./asadmin create-jvm-options '-Ddataverse.pid.datacite.rest-api-url=https\://api.datacite.org'`` -doi.username -++++++++++++ +**Notes:** -Used in conjuction with ``doi.baseurlstring``. +- See also these related database settings below: :ref:`:MDCLogPath`, + :ref:`:DisplayMDCMetrics`. +- Can also be set via *MicroProfile Config API* sources, e.g. the environment + variable ``DATAVERSE_PID_DATACITE_REST_API_URL``. +- This setting was formerly known as ``doi.dataciterestapiurlstring`` or + ``doi.mdcbaseurlstring`` and has been renamed. You should delete these and re-add it (once) under the new name. -Once you have a username from your provider, you can enter it like this: +.. _dataverse.pid.datacite.username: -``./asadmin create-jvm-options '-Ddoi.username=YOUR_USERNAME_HERE'`` +dataverse.pid.datacite.username ++++++++++++++++++++++++++++++++ -.. _doi.password: +DataCite uses `HTTP Basic authentication `_ +for `Fabrica `_ and their APIs. You need to provide +the same credentials to Dataverse software to mint and manage DOIs for you. -doi.password -++++++++++++ +Once you have a username from DataCite, you can enter it like this: -Used in conjuction with ``doi.baseurlstring``. +``./asadmin create-jvm-options '-Ddataverse.pid.datacite.username=YOUR_USERNAME_HERE'`` -Once you have a password from your provider, you can enter it like this: +**Notes:** -``./asadmin create-jvm-options '-Ddoi.password=YOUR_PASSWORD_HERE'`` +- Used in conjuction with :ref:`dataverse.pid.datacite.mds-api-url`, + :ref:`dataverse.pid.datacite.rest-api-url` and :ref:`dataverse.pid.datacite.password`. +- Can also be set via *MicroProfile Config API* sources, e.g. the environment + variable ``DATAVERSE_PID_DATACITE_USERNAME``. +- This setting was formerly known as ``doi.username`` and has been renamed. + You should delete and re-add it. -.. _dataverse.handlenet.admcredfile: +.. _dataverse.pid.datacite.password: -dataverse.handlenet.admcredfile +dataverse.pid.datacite.password +++++++++++++++++++++++++++++++ -If you're using **handles**, this JVM setting configures access credentials so your Dataverse installation can talk to your Handle.Net server. This is the private key generated during Handle.Net server installation. Typically the full path is set to ``handle/svr_1/admpriv.bin``. Please refer to `Handle.Net's documentation `_ for more info. +Once you have a password from your provider, you should create a password alias. +This avoids storing it in clear text, although you could use a JVM option `to reference +a different place `__. -.. _dataverse.handlenet.admprivphrase: +``./asadmin create-password-alias dataverse.pid.datacite.password`` -dataverse.handlenet.admprivphrase -+++++++++++++++++++++++++++++++++ -This JVM setting is also part of **handles** configuration. The Handle.Net installer lets you choose whether to encrypt the admcredfile private key or not. If you do encrypt it, this is the pass phrase that it's encrypted with. +It will allow you to enter the password while not echoing the characters. +To manage these, read up on `Payara docs about password aliases `__. + +**Notes:** + +- Used in conjuction with :ref:`dataverse.pid.datacite.mds-api-url`, + :ref:`dataverse.pid.datacite.rest-api-url` and :ref:`dataverse.pid.datacite.username`. +- Can also be set via *MicroProfile Config API* sources, e.g. the environment + variable ``DATAVERSE_PID_DATACITE_PASSWORD`` (although you shouldn't use + environment variables for passwords). +- This setting was formerly known as ``doi.password`` and has been renamed. + You should delete the old JVM option and the wrapped password alias, then recreate + with new alias name as above. -.. _dataverse.handlenet.index: -dataverse.handlenet.index -+++++++++++++++++++++++++ -If you want to use different index than the default 300 + +.. _dataverse.pid.handlenet.key.path: + +dataverse.pid.handlenet.key.path +++++++++++++++++++++++++++++++++ + +Related to :ref:`Handle.Net PID provider usage `. + +Provide an absolute path to a private key file authenticating requests to your +Handle.Net server. + +Handle.Net servers use a public key authentication method where the public key +is stored in a handle itself and the matching private key is provided from this +file. Typically, the absolute path ends like ``handle/svr_1/admpriv.bin``. See +also chapter 1.4 "Authentication" of the `Handle.Net Technical Documentation +`__ + +Can also be set via *MicroProfile Config API* sources, e.g. the environment +variable ``DATAVERSE_PID_HANDLENET_KEY_PATH``. This setting was formerly known +as ``dataverse.handlenet.admcredfile`` and has been renamed. You should delete +and re-add it. + + +.. _dataverse.pid.handlenet.key.passphrase: + +dataverse.pid.handlenet.key.passphrase +++++++++++++++++++++++++++++++++++++++ + +Related to :ref:`Handle.Net PID provider usage `. + +Provide a passphrase to decrypt the :ref:`private key file `. + +The key file may (and should) be encrypted with a passphrase (used for +encryption with AES-128). See also chapter 1.4 "Authentication" of the +`Handle.Net Technical Documentation `__ + +Can also be set via *MicroProfile Config API* sources, e.g. the environment +variable ``DATAVERSE_PID_HANDLENET_KEY_PASSPHRASE`` (although you shouldn't use +environment variables for passwords). This setting was formerly known as +``dataverse.handlenet.admprivphrase`` and has been renamed. You should delete +the old JVM option and the wrapped password alias, then recreate as shown for +:ref:`dataverse.pid.datacite.password` but with this option as alias name. + + +.. _dataverse.pid.handlenet.index: + +dataverse.pid.handlenet.index ++++++++++++++++++++++++++++++ + +Related to :ref:`Handle.Net PID provider usage `. + +Configure your *Handle.Net Index* to be used registering new persistent +identifiers. Defaults to ``300``. + +Indices are used to separate concerns within the Handle system. To add data to +an index, authentication is mandatory. See also chapter 1.4 "Authentication" of +the `Handle.Net Technical Documentation `__ + +Can also be set via *MicroProfile Config API* sources, e.g. the environment +variable ``DATAVERSE_PID_HANDLENET_INDEX``. This setting was formerly known as +``dataverse.handlenet.index`` and has been renamed. You should delete and +re-add it. + +.. _dataverse.pid.permalink.base-url: + +dataverse.pid.permalink.base-url +++++++++++++++++++++++++++++++++ + +When using :ref:`PermaLinks `, this setting can be used to configure an external resolver. Dataverse will associate a PermaLink PID with the URL: +``/citation?persistentId=perma:``. The default value is your Dataverse site URL, which will result in PermaLinks correctly resolving to the appropriate dataset page. + +To set this option, issue a command such as: + +``./asadmin create-jvm-options '-Ddataverse.pid.permalink.base-url=https\://localresolver.yourdataverse.org'`` + +See also these related database settings: + +- :ref:`:Protocol` +- :ref:`:Authority` +- :ref:`:Shoulder` + +Can also be set via *MicroProfile Config API* sources, e.g. the environment +variable ``DATAVERSE_PID_PERMALINK_BASE_URL``. This setting was formerly known as +``perma.baseurlstring`` and has been renamed. You should delete and re-add it. + +.. _dataverse.pid.ezid.api-url: + +dataverse.pid.ezid.api-url +++++++++++++++++++++++++++ + +The EZID DOI provider is likely not an option if you are `not associated with +California Digital Library (CDL) or Purdue University +`_. + +Defaults to ``https://ezid.cdlib.org``. + +Can also be set via *MicroProfile Config API* sources, e.g. the environment +variable ``DATAVERSE_PID_EZID_API_URL``. This setting was formerly known as +``doi.baseurlstring`` and has been renamed. You should delete and re-add it. + +.. _dataverse.pid.ezid.username: + +dataverse.pid.ezid.username ++++++++++++++++++++++++++++ + +The EZID DOI provider is likely not an option if you are `not associated with +California Digital Library (CDL) or Purdue University +`_. + +Works the same way as :ref:`dataverse.pid.datacite.username`, but for the EZID DOI +provider. + +Can also be set via *MicroProfile Config API* sources, e.g. the environment +variable ``DATAVERSE_PID_EZID_USERNAME``. + +This setting was formerly known as ``doi.username`` and has been renamed. You +should delete and re-add it. + +.. _dataverse.pid.ezid.password: + +dataverse.pid.ezid.password ++++++++++++++++++++++++++++ + +The EZID DOI provider is likely not an option if you are `not associated with +California Digital Library (CDL) or Purdue University +`_. + +Works the same way as :ref:`dataverse.pid.datacite.password`, but for the EZID DOI +provider. + +Can also be set via *MicroProfile Config API* sources, e.g. the environment +variable ``DATAVERSE_PID_EZID_PASSWORD`` (although you shouldn't use +environment variables for passwords). + +This setting was formerly known as ``doi.password`` and has been renamed. You +should delete the old JVM option and the wrapped password alias, then recreate +as shown for :ref:`dataverse.pid.datacite.password` but with the EZID alias +name. .. _dataverse.timerServer: @@ -1627,8 +2268,6 @@ By default, download URLs to files will be included in Schema.org JSON-LD output ``./asadmin create-jvm-options '-Ddataverse.files.hide-schema-dot-org-download-urls=true'`` -Please note that there are other reasons why download URLs may not be included for certain files such as if a guestbook entry is required or if the file is restricted. - For more on Schema.org JSON-LD, see the :doc:`/admin/metadataexport` section of the Admin Guide. .. _useripaddresssourceheader: @@ -1658,6 +2297,180 @@ This setting is useful in cases such as running your Dataverse installation behi "HTTP_FORWARDED", "HTTP_VIA", "REMOTE_ADDR" + +.. _dataverse.personOrOrg.assumeCommaInPersonName: + +dataverse.personOrOrg.assumeCommaInPersonName ++++++++++++++++++++++++++++++++++++++++++++++ + +Please note that this setting is experimental. + +The Schema.org metadata and OpenAIRE exports and the Schema.org metadata included in DatasetPages try to infer whether each entry in the various fields (e.g. Author, Contributor) is a Person or Organization. If you are sure that +users are following the guidance to add people in the recommended family name, given name order, with a comma, you can set this true to always assume entries without a comma are for Organizations. The default is false. + +.. _dataverse.personOrOrg.orgPhraseArray: + +dataverse.personOrOrg.orgPhraseArray +++++++++++++++++++++++++++++++++++++ + +Please note that this setting is experimental. + +The Schema.org metadata and OpenAIRE exports and the Schema.org metadata included in DatasetPages try to infer whether each entry in the various fields (e.g. Author, Contributor) is a Person or Organization. +If you have examples where an orgization name is being inferred to belong to a person, you can use this setting to force it to be recognized as an organization. +The value is expected to be a JsonArray of strings. Any name that contains one of the strings is assumed to be an organization. For example, "Project" is a word that is not otherwise associated with being an organization. + + +.. _dataverse.api.signature-secret: + +dataverse.api.signature-secret +++++++++++++++++++++++++++++++ + +Context: Dataverse has the ability to create "Signed URLs" for it's API calls. Using a signed URLs is more secure than +providing API tokens, which are long-lived and give the holder all of the permissions of the user. In contrast, signed URLs +are time limited and only allow the action of the API call in the URL. See :ref:`api-exttools-auth` and +:ref:`api-native-signed-url` for more details. + +The key used to sign a URL is created from the API token of the creating user plus a signature-secret provided by an administrator. +**Using a signature-secret is highly recommended.** This setting defaults to an empty string. Using a non-empty +signature-secret makes it impossible for someone who knows an API token from forging signed URLs and provides extra security by +making the overall signing key longer. + +Since the signature-secret is sensitive, you should treat it like a password. Here is an example how to set your shared secret +with the secure method "password alias": + +.. code-block:: shell + + echo "AS_ADMIN_ALIASPASSWORD=change-me-super-secret" > /tmp/password.txt + asadmin create-password-alias --passwordfile /tmp/password.txt dataverse.api.signature-secret + rm /tmp/password.txt + +Can also be set via any `supported MicroProfile Config API source`_, e.g. the environment variable +``DATAVERSE_API_SIGNATURE_SECRET``. + +**WARNING:** For security, do not use the sources "environment variable" or "system property" (JVM option) in a +production context! Rely on password alias, secrets directory or cloud based sources instead! + +.. _dataverse.api.allow-incomplete-metadata: + +dataverse.api.allow-incomplete-metadata ++++++++++++++++++++++++++++++++++++++++ + +When enabled, dataset with incomplete metadata can be submitted via API for later corrections. +See :ref:`create-dataset-command` for details. + +Defaults to ``false``. + +Can also be set via any `supported MicroProfile Config API source`_, e.g. the environment variable +``DATAVERSE_API_ALLOW_INCOMPLETE_METADATA``. Will accept ``[tT][rR][uU][eE]|1|[oO][nN]`` as "true" expressions. + +.. _dataverse.signposting.level1-author-limit: + +dataverse.signposting.level1-author-limit ++++++++++++++++++++++++++++++++++++++++++ + +See :ref:`discovery-sign-posting` for details. + +Can also be set via any `supported MicroProfile Config API source`_, e.g. the environment variable ``DATAVERSE_SIGNPOSTING_LEVEL1_AUTHOR_LIMIT``. + +.. _dataverse.signposting.level1-item-limit: + +dataverse.signposting.level1-item-limit ++++++++++++++++++++++++++++++++++++++++ + +See :ref:`discovery-sign-posting` for details. + +Can also be set via any `supported MicroProfile Config API source`_, e.g. the environment variable ``DATAVERSE_SIGNPOSTING_LEVEL1_ITEM_LIMIT``. + +dataverse.mail.support-email +++++++++++++++++++++++++++++ + +This provides an email address distinct from the :ref:`systemEmail` that will be used as the email address for Contact Forms and Feedback API. This address is used as the To address when the Contact form is launched from the Support entry in the top navigation bar and, if configured via :ref:`dataverse.mail.cc-support-on-contact-email`, as a CC address when the form is launched from a Dataverse/Dataset Contact button. +This allows configuration of a no-reply email address for :ref:`systemEmail` while allowing feedback to go to/be cc'd to the support email address, which would normally accept replies. If not set, the :ref:`systemEmail` is used for the feedback API/contact form email. + +Note that only the email address is required, which you can supply without the ``<`` and ``>`` signs, but if you include the text, it's the way to customize the name of your support team, which appears in the "from" address in emails as well as in help text in the UI. If you don't include the text, the installation name (see :ref:`Branding Your Installation`) will appear in the "from" address. + +Can also be set via any `supported MicroProfile Config API source`_, e.g. the environment variable ``DATAVERSE_MAIL_SUPPORT_EMAIL``. + +.. _dataverse.mail.cc-support-on-contact-email: + +dataverse.mail.cc-support-on-contact-email +++++++++++++++++++++++++++++++++++++++++++ + +If this setting is true, the contact forms and feedback API will cc the system (:SupportEmail if set, :SystemEmail if not) when sending email to the collection, dataset, or datafile contacts. +A CC line is added to the contact form when this setting is true so that users are aware that the cc will occur. +The default is false. + +Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_MAIL_CC_SUPPORT_ON_CONTACT_EMAIL``. + +dataverse.ui.allow-review-for-incomplete +++++++++++++++++++++++++++++++++++++++++ + +Determines if dataset submitted via API with incomplete metadata (for later corrections) can be submitted for review +from the UI. + +Defaults to ``false``. + +Can also be set via any `supported MicroProfile Config API source`_, e.g. the environment variable +``DATAVERSE_UI_ALLOW_REVIEW_FOR_INCOMPLETE``. Will accept ``[tT][rR][uU][eE]|1|[oO][nN]`` as "true" expressions. + +dataverse.ui.show-validity-filter ++++++++++++++++++++++++++++++++++ + +When enabled, the filter for validity of metadata is shown in "My Data" page. +**Note:** When you wish to use this filter, you must reindex the datasets first, otherwise datasets with valid metadata +will not be shown in the results. + +Defaults to ``false``. + +Can also be set via any `supported MicroProfile Config API source`_, e.g. the environment variable +``DATAVERSE_UI_SHOW_VALIDITY_FILTER``. Will accept ``[tT][rR][uU][eE]|1|[oO][nN]`` as "true" expressions. + +.. _dataverse.spi.exporters.directory: + +dataverse.spi.exporters.directory ++++++++++++++++++++++++++++++++++ + +This JVM option is used to configure the file system path where external Exporter JARs can be placed. See :ref:`external-exporters` for more information. + +``./asadmin create-jvm-options '-Ddataverse.spi.exporters.directory=PATH_LOCATION_HERE'`` + +If this value is set, Dataverse will examine all JARs in the specified directory and will use them to add, or replace existing, metadata export formats. +If this value is not set (the default), Dataverse will not use external Exporters. + +Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_SPI_EXPORTERS_DIRECTORY``. + +.. _dataverse.netcdf.geo-extract-s3-direct-upload: + +dataverse.netcdf.geo-extract-s3-direct-upload ++++++++++++++++++++++++++++++++++++++++++++++ + +This setting was added to keep S3 direct upload lightweight. When that feature is enabled and you still want NetCDF and HDF5 files to go through metadata extraction of a Geospatial Bounding Box (see :ref:`netcdf-and-hdf5`), which requires the file to be downloaded from S3 in this scenario, make this setting true. + +See also :ref:`s3-direct-upload-features-disabled`. + +.. _feature-flags: + +Feature Flags +------------- + +Certain features might be deactivated because they are experimental and/or opt-in previews. If you want to enable these, +please find all known feature flags below. Any of these flags can be activated using a boolean value +(case-insensitive, one of "true", "1", "YES", "Y", "ON") for the setting. + +.. list-table:: + :widths: 35 50 15 + :header-rows: 1 + :align: left + + * - Flag Name + - Description + - Default status + * - api-session-auth + - Enables API authentication via session cookie (JSESSIONID). **Caution: Enabling this feature flag exposes the installation to CSRF risks!** We expect this feature flag to be temporary (only used by frontend developers, see `#9063 `_) and for the feature to be removed in the future. + - ``Off`` + +**Note:** Feature flags can be set via any `supported MicroProfile Config API source`_, e.g. the environment variable +``DATAVERSE_FEATURE_XXX`` (e.g. ``DATAVERSE_FEATURE_API_SESSION_AUTH=1``). These environment variables can be set in your shell before starting Payara. If you are using :doc:`Docker for development `, you can set them in the `docker compose `_ file. .. _:ApplicationServerSettings: @@ -1673,6 +2486,23 @@ To facilitate large file upload and download, the Dataverse Software installer b and restart Payara to apply your change. +mp.config.profile ++++++++++++++++++ + +MicroProfile Config 2.0 defines the `concept of "profiles" `_. +They can be used to change configuration values by context. This is used in Dataverse to change some configuration +defaults when used inside container context rather classic installations. + +As per the spec, you will need to set the configuration value ``mp.config.profile`` to ``ct`` as early as possible. +This is best done with a system property: + +``./asadmin create-system-properties 'mp.config.profile=ct'`` + +*Note: the* :doc:`../container/app-image` *uses an (overrideable) environment variable to activate this.* + +You might also create your own profiles and use these, please refer to the upstream documentation linked above. + + .. _database-settings: Database Settings @@ -1805,22 +2635,28 @@ By default the footer says "Copyright © [YYYY]" but you can add text after the :DoiProvider ++++++++++++ -As of this writing "DataCite" and "EZID" are the only valid options for production installations. Developers using Dataverse Software 4.10+ are welcome to use the keyword "FAKE" to configure a non-production installation with an non-resolving, in-code provider, which will basically short-circuit the DOI publishing process. ``:DoiProvider`` is only needed if you are using DOI. +As of this writing "DataCite" and "EZID" are the only valid options for production installations. Developers using +Dataverse Software 4.10+ are welcome to use the keyword "FAKE" to configure a non-production installation with an +non-resolving, in-code provider, which will basically short-circuit the DOI publishing process. ``:DoiProvider`` +is only needed if you are using DOI. ``curl -X PUT -d DataCite http://localhost:8080/api/admin/settings/:DoiProvider`` -This setting relates to the ``:Protocol``, ``:Authority``, ``:Shoulder``, and ``:IdentifierGenerationStyle`` database settings below as well as the following JVM options: +This setting relates to the ``:Protocol``, ``:Authority``, ``:Shoulder``, and +``:IdentifierGenerationStyle`` database settings below as well as the following +JVM options: -- :ref:`doi.baseurlstring` -- :ref:`doi.username` -- :ref:`doi.password` +- :ref:`dataverse.pid.datacite.mds-api-url` +- :ref:`dataverse.pid.datacite.rest-api-url` +- :ref:`dataverse.pid.datacite.username` +- :ref:`dataverse.pid.datacite.password` .. _:Protocol: :Protocol +++++++++ -As of this writing "doi" and "hdl" are the only valid option for the protocol for a persistent ID. +As of this writing "doi","hdl", and "perma" are the only valid option for the protocol for a persistent ID. ``curl -X PUT -d doi http://localhost:8080/api/admin/settings/:Protocol`` @@ -1829,9 +2665,9 @@ As of this writing "doi" and "hdl" are the only valid option for the protocol fo :Authority ++++++++++ -Use the authority assigned to you by your DoiProvider or HandleProvider. +Use the authority assigned to you by your DoiProvider or HandleProvider, or your choice if using PermaLinks. -Please note that the authority cannot have a slash ("/") in it. +Please note that a DOI or Handle authority cannot have a slash ("/") in it (slash is also not recommended for PermaLink authorities). ``curl -X PUT -d 10.xxxx http://localhost:8080/api/admin/settings/:Authority`` @@ -1840,7 +2676,7 @@ Please note that the authority cannot have a slash ("/") in it. :Shoulder +++++++++ -Out of the box, the DOI shoulder is set to "FK2/" but this is for testing only! When you apply for your DOI namespace, you may have requested a shoulder. The following is only an example and a trailing slash is optional. +The shoulder is used with DOIs and PermaLinks. Out of the box, the shoulder is set to "FK2/" but this is for testing only! When you apply for your DOI authority/namespace, you may have been assigned a shoulder. The following is only an example and a trailing slash is optional. ``curl -X PUT -d "MyShoulder/" http://localhost:8080/api/admin/settings/:Shoulder`` @@ -1945,13 +2781,35 @@ timestamps. :FilePIDsEnabled ++++++++++++++++ -Toggles publishing of file-based PIDs for the entire installation. By default this setting is absent and Dataverse Software assumes it to be true. If enabled, the registration will be performed asynchronously (in the background) during publishing of a dataset. +Toggles publishing of file-level PIDs for the entire installation. By default this setting is absent and Dataverse Software assumes it to be false. If enabled, the registration will be performed asynchronously (in the background) during publishing of a dataset. + +It is possible to override the installation-wide setting for specific collections, see :ref:`:AllowEnablingFilePIDsPerCollection <:AllowEnablingFilePIDsPerCollection>`. For example, registration of PIDs for files can be enabled in a specific collection when it is disabled instance-wide. Or it can be disabled in specific collections where it is enabled by default. See :ref:`collection-attributes-api` for details. + +To enable file-level PIDs for the entire installation:: + +``curl -X PUT -d 'true' http://localhost:8080/api/admin/settings/:FilePIDsEnabled`` + -If you don't want to register file-based PIDs for your installation, set: +If you don't want to register file-based PIDs for your entire installation:: ``curl -X PUT -d 'false' http://localhost:8080/api/admin/settings/:FilePIDsEnabled`` -Note: File-level PID registration was added in Dataverse Software 4.9; it could not be disabled until Dataverse Software 4.9.3. +.. _:AllowEnablingFilePIDsPerCollection: + +:AllowEnablingFilePIDsPerCollection ++++++++++++++++++++++++++++++++++++ + +Toggles whether superusers can change the File PIDs policy per collection. By default this setting is absent and Dataverse Software assumes it to be false. + +For example, if this setting is true, registration of PIDs for files can be enabled in a specific collection when it is disabled instance-wide. Or it can be disabled in specific collections where it is enabled by default. See :ref:`collection-attributes-api` for details. + +To enable setting file-level PIDs per collection:: + +``curl -X PUT -d 'true' http://localhost:8080/api/admin/settings/:AllowEnablingFilePIDsPerCollection`` + + +When :AllowEnablingFilePIDsPerCollection is true, setting File PIDs to be enabled/disabled for a given collection can be done via the Native API - see :ref:`collection-attributes-api` in the Native API Guide. + .. _:IndependentHandleService: @@ -2160,6 +3018,8 @@ Limit the number of files in a zip that your Dataverse installation will accept. ``curl -X PUT -d 2048 http://localhost:8080/api/admin/settings/:ZipUploadFilesLimit`` +.. _:SolrHostColonPort: + :SolrHostColonPort ++++++++++++++++++ @@ -2167,6 +3027,8 @@ By default your Dataverse installation will attempt to connect to Solr on port 8 ``curl -X PUT -d localhost:8983 http://localhost:8080/api/admin/settings/:SolrHostColonPort`` +**Note:** instead of using a database setting, you could alternatively use JVM settings like :ref:`dataverse.solr.host`. + :SolrFullTextIndexing +++++++++++++++++++++ @@ -2186,7 +3048,7 @@ If ``:SolrFullTextIndexing`` is set to true, the content of files of any size wi :SignUpUrl ++++++++++ -The relative path URL to which users will be sent for signup. The default setting is below. +The relative path URL to which users will be sent for sign up. The default setting is below. ``curl -X PUT -d '/dataverseuser.xhtml?editMode=CREATE' http://localhost:8080/api/admin/settings/:SignUpUrl`` @@ -2262,6 +3124,33 @@ Set to false to disallow local accounts from being created. See also the section ``curl -X PUT -d 'false' http://localhost:8080/api/admin/settings/:AllowSignUp`` +.. _:AllowRemoteAuthSignUp: + +:AllowRemoteAuthSignUp +++++++++++++++++++++++ + +This is a **compound** setting that enables or disables sign up for new accounts for individual OAuth2 authentication methods (such as Orcid, Google and GitHub). This way it is possible to continue allowing logins via an OAuth2 provider for already existing accounts, without letting new users create accounts with this method. + +By default, if the setting is not present, all configured OAuth sign ups are allowed. If the setting is present, but the value for this specific method is not specified, it is assumed that the sign ups are allowed for it. + +Examples: + +This curl command... + +``curl -X PUT -d '{"default":"false"}' http://localhost:8080/api/admin/settings/:AllowRemoteAuthSignUp`` + +...disables all OAuth sign ups. + +This curl command... + +``curl -X PUT -d '{"default":"true","google":"false"}' http://localhost:8080/api/admin/settings/:AllowRemoteAuthSignUp`` + +...keeps sign ups open for all the OAuth login providers except google. (That said, note that the ``"default":"true"`` part in this example is redundant, since it would default to true anyway for all the methods other than google.) + +See also :doc:`oauth2`. + +.. _:FileFixityChecksumAlgorithm: + :FileFixityChecksumAlgorithm ++++++++++++++++++++++++++++ @@ -2271,12 +3160,9 @@ The default checksum algorithm used is MD5 and should be sufficient for establis ``curl -X PUT -d 'SHA-512' http://localhost:8080/api/admin/settings/:FileFixityChecksumAlgorithm`` -The fixity algorithm used on existing files can be changed by a superuser using the API. An optional query parameter (num) can be used to limit the number of updates attempted. -The API call will only update the algorithm and checksum for a file if the existing checksum can be validated against the file. -Statistics concerning the updates are returned in the response to the API call with details in the log. +To update the algorithm used for existing files, see :ref:`UpdateChecksums` -``curl http://localhost:8080/api/admin/updateHashValues/{alg}`` -``curl http://localhost:8080/api/admin/updateHashValues/{alg}?num=1`` +The fixity checksum algorithm in use can be discovered via API. See :ref:`get-fixity-algorithm` in the API Guide. .. _:PVMinLength: @@ -2531,6 +3417,7 @@ The URL for your Repository Storage Abstraction Layer (RSAL) installation. This This setting controls which upload methods are available to users of your Dataverse installation. The following upload methods are available: - ``native/http``: Corresponds to "Upload with HTTP via your browser" and APIs that use HTTP (SWORD and native). +- ``dvwebloader``: Corresponds to :ref:`folder-upload`. Note that ``dataverse.files..upload-redirect`` must be set to "true" on an S3 store for this method to show up in the UI. In addition, :ref:`:WebloaderUrl` must be set. CORS allowed on the S3 bucket. See :ref:`cors-s3-bucket`. - ``dcm/rsync+ssh``: Corresponds to "Upload with rsync+ssh via Data Capture Module (DCM)". A lot of setup is required, as explained in the :doc:`/developers/big-data-support` section of the Developer Guide. Out of the box only ``native/http`` is enabled and will work without further configuration. To add multiple upload method, separate them using a comma like this: @@ -2555,6 +3442,8 @@ Limit on how many guestbook entries to display on the guestbook-responses page. ``curl -X PUT -d 10000 http://localhost:8080/api/admin/settings/:GuestbookResponsesPageDisplayLimit`` +.. _:CustomDatasetSummaryFields: + :CustomDatasetSummaryFields +++++++++++++++++++++++++++ @@ -2564,6 +3453,10 @@ You can replace the default dataset metadata fields that are displayed above fil You have to put the datasetFieldType name attribute in the :CustomDatasetSummaryFields setting for this to work. +The default fields are ``dsDescription,subject,keyword,publication,notesText``. + +This setting can be retrieved via API. See :ref:`get-dataset-summary-field-names` in the API Guide. + :AllowApiTokenLookupViaApi ++++++++++++++++++++++++++ @@ -2592,7 +3485,7 @@ Sets how long a cached metrics result is used before re-running the query for a Sets the path where the raw Make Data Count logs are stored before being processed. If not set, no logs will be created for Make Data Count. See also the :doc:`/admin/make-data-count` section of the Admin Guide. -``curl -X PUT -d '/usr/local/payara5/glassfish/domains/domain1/logs' http://localhost:8080/api/admin/settings/:MDCLogPath`` +``curl -X PUT -d '/usr/local/payara6/glassfish/domains/domain1/logs' http://localhost:8080/api/admin/settings/:MDCLogPath`` .. _:DisplayMDCMetrics: @@ -2603,6 +3496,20 @@ Sets the path where the raw Make Data Count logs are stored before being process ``curl -X PUT -d 'false' http://localhost:8080/api/admin/settings/:DisplayMDCMetrics`` +.. _:MDCStartDate: + +:MDCStartDate ++++++++++++++ + +It is possible to display MDC metrics (as of the start date of MDC logging) along with legacy download counts, generated before MDC was enabled. +This is enabled via the new setting `:MDCStartDate` that specifies the cut-over date. If a dataset has any legacy access counts collected prior to that date, those numbers will be displayed in addition to the MDC views and downloads recorded since then. +(Nominally, this date should be when your installation started logging MDC metrics but it can be any date after that if desired.) + + +``curl -X PUT -d '2019-10-01' http://localhost:8080/api/admin/settings/:MDCStartDate`` + + + .. _:Languages: :Languages @@ -2714,9 +3621,9 @@ Part of the database settings to configure the BagIt file handler. This is the p ++++++++++++++++++ Your Dataverse installation can export archival "Bag" files to an extensible set of storage systems (see :ref:`BagIt Export` above for details about this and for further explanation of the other archiving related settings below). -This setting specifies which storage system to use by identifying the particular Java class that should be run. Current options include DuraCloudSubmitToArchiveCommand, LocalSubmitToArchiveCommand, and GoogleCloudSubmitToArchiveCommand. +This setting specifies which storage system to use by identifying the particular Java class that should be run. Current configuration options include DuraCloudSubmitToArchiveCommand, LocalSubmitToArchiveCommand, GoogleCloudSubmitToArchiveCommand, and S3SubmitToArchiveCommand. -``curl -X PUT -d 'LocalSubmitToArchiveCommand' http://localhost:8080/api/admin/settings/:ArchiverClassName`` +For examples, see the specific configuration above in :ref:`BagIt Export`. :ArchiverSettings +++++++++++++++++ @@ -2936,12 +3843,14 @@ For example: ``curl -X PUT -d "This content needs to go through an additional review by the Curation Team before it can be published." http://localhost:8080/api/admin/settings/:DatasetMetadataValidationFailureMsg`` - + :ExternalValidationAdminOverride ++++++++++++++++++++++++++++++++ When set to ``true``, this setting allows a superuser to publish and/or update Dataverse collections and datasets bypassing the external validation checks (specified by the settings above). In an event where an external script is reporting validation failures that appear to be in error, this option gives an admin with superuser privileges a quick way to publish the dataset or update a collection for the user. +.. _:FileCategories: + :FileCategories +++++++++++++++ @@ -3031,3 +3940,34 @@ The interval in seconds between Dataverse calls to Globus to check on upload pro +++++++++++++++++++++++++ A true/false option to add a Globus transfer option to the file download menu which is not yet fully supported in the dataverse-globus app. See :ref:`globus-support` for details. + +.. _:WebloaderUrl: + +:WebloaderUrl ++++++++++++++ + +The URL of `dvuploader `'s HTML file when dvuploader is enabled in :ref:`:UploadMethods`. + +To use the current GDCC version directly: + +``curl -X PUT -d 'https://gdcc.github.io/dvwebloader/src/dvwebloader.html' http://localhost:8080/api/admin/settings/:WebloaderUrl`` + +:CategoryOrder +++++++++++++++ + +A comma separated list of Category/Tag names defining the order in which files with those tags should be displayed. +The setting can include custom tag names along with the pre-defined tags(Documentation, Data, and Code are the defaults but the :ref:`:FileCategories` setting can be used to use a different set of tags). +The default is category ordering disabled. + +:OrderByFolder +++++++++++++++ + +A true(default)/false option determining whether datafiles listed on the dataset page should be grouped by folder. + +:AllowUserManagementOfOrder ++++++++++++++++++++++++++++ + +A true/false (default) option determining whether the dataset datafile table display includes checkboxes enabling users to turn folder ordering and/or category ordering (if an order is defined by :CategoryOrder) on and off dynamically. + +.. _supported MicroProfile Config API source: https://docs.payara.fish/community/docs/Technical%20Documentation/MicroProfile/Config/Overview.html + diff --git a/doc/sphinx-guides/source/installation/installation-main.rst b/doc/sphinx-guides/source/installation/installation-main.rst index 4b000f1ef9e..021a97415e3 100755 --- a/doc/sphinx-guides/source/installation/installation-main.rst +++ b/doc/sphinx-guides/source/installation/installation-main.rst @@ -28,8 +28,8 @@ Unpack the zip file - this will create the directory ``dvinstall``. Just make sure the user running the installer has write permission to: -- /usr/local/payara5/glassfish/lib -- /usr/local/payara5/glassfish/domains/domain1 +- /usr/local/payara6/glassfish/lib +- /usr/local/payara6/glassfish/domains/domain1 - the current working directory of the installer (it currently writes its logfile there), and - your jvm-option specified files.dir @@ -47,7 +47,7 @@ Follow the instructions in the text file. The script will prompt you for some configuration values. If this is a test/evaluation installation, it may be possible to accept the default values provided for most of the settings: - Internet Address of your host: localhost -- Payara Directory: /usr/local/payara5 +- Payara Directory: /usr/local/payara6 - Payara User: current user running the installer script - Administrator email address for this Dataverse installation: (none) - SMTP (mail) server to relay notification messages: localhost @@ -82,6 +82,8 @@ While Postgres can accomodate usernames and database names containing hyphens, i For more information, please see https://docs.payara.fish/documentation/payara-server/password-aliases/password-alias-asadmin-commands.html +.. _importance-of-siteUrl: + **IMPORTANT:** The installer will also ask for an external site URL for the Dataverse installation. It is *imperative* that this value be supplied accurately, or a long list of functions will be inoperable, including: - email confirmation links @@ -96,7 +98,7 @@ The supplied site URL will be saved under the JVM option :ref:`dataverse.siteUrl **IMPORTANT:** Please note, that "out of the box" the installer will configure the Dataverse installation to leave unrestricted access to the administration APIs from (and only from) localhost. Please consider the security implications of this arrangement (anyone with shell access to the server can potentially mess with your Dataverse installation). An alternative solution would be to block open access to these sensitive API endpoints completely; and to only allow requests supplying a pre-defined "unblock token" (password). If you prefer that as a solution, please consult the supplied script ``post-install-api-block.sh`` for examples on how to set it up. See also "Securing Your Installation" under the :doc:`config` section. -The Dataverse Software uses JHOVE_ to help identify the file format (CSV, PNG, etc.) for files that users have uploaded. The installer places files called ``jhove.conf`` and ``jhoveConfig.xsd`` into the directory ``/usr/local/payara5/glassfish/domains/domain1/config`` by default and makes adjustments to the jhove.conf file based on the directory into which you chose to install Payara. +The Dataverse Software uses JHOVE_ to help identify the file format (CSV, PNG, etc.) for files that users have uploaded. The installer places files called ``jhove.conf`` and ``jhoveConfig.xsd`` into the directory ``/usr/local/payara6/glassfish/domains/domain1/config`` by default and makes adjustments to the jhove.conf file based on the directory into which you chose to install Payara. .. _JHOVE: http://jhove.openpreservation.org @@ -134,6 +136,11 @@ Dataset Cannot Be Published Check to make sure you used a fully qualified domain name when installing the Dataverse Software. You can change the ``dataverse.fqdn`` JVM option after the fact per the :doc:`config` section. +Got ERR_ADDRESS_UNREACHABLE While Navigating on Interface or API Calls +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +If you are receiving an ``ERR_ADDRESS_UNREACHABLE`` while navigating the GUI or making an API call, make sure the ``siteUrl`` JVM option is defined. For details on how to set ``siteUrl``, please refer to :ref:`dataverse.siteUrl` from the :doc:`config` section. For context on why setting this option is necessary, refer to :ref:`dataverse.fqdn` from the :doc:`config` section. + Problems Sending Email ^^^^^^^^^^^^^^^^^^^^^^ @@ -242,7 +249,7 @@ Deleting Uploaded Files The path below will depend on the value for ``dataverse.files.directory`` as described in the :doc:`config` section: -``rm -rf /usr/local/payara5/glassfish/domains/domain1/files`` +``rm -rf /usr/local/payara6/glassfish/domains/domain1/files`` Rerun Installer ^^^^^^^^^^^^^^^ diff --git a/doc/sphinx-guides/source/installation/intro.rst b/doc/sphinx-guides/source/installation/intro.rst index 2251af7b81b..67fc774bdbd 100644 --- a/doc/sphinx-guides/source/installation/intro.rst +++ b/doc/sphinx-guides/source/installation/intro.rst @@ -48,7 +48,7 @@ If you've encountered a problem installing Dataverse and are ready to ask for he - Operating system (usually a Linux distribution) and version. - Output from the installer (STDOUT, STDERR). - The ``scripts/api/setup-all.*.log`` files left behind by the installer. -- The ``server.log`` file from Payara (by default at ``/usr/local/payara5/glassfish/domains/domain1/logs/server.log``). +- The ``server.log`` file from Payara (by default at ``/usr/local/payara6/glassfish/domains/domain1/logs/server.log``). Improving this Guide -------------------- diff --git a/doc/sphinx-guides/source/installation/oauth2.rst b/doc/sphinx-guides/source/installation/oauth2.rst index 0dfdb0393e0..8dffde87cc2 100644 --- a/doc/sphinx-guides/source/installation/oauth2.rst +++ b/doc/sphinx-guides/source/installation/oauth2.rst @@ -78,6 +78,11 @@ This template can be used for configuring this setting (**this is not something - :download:`orcid-sandbox.json <../_static/installation/files/root/auth-providers/orcid-sandbox.json>` +Disabling Sign Up +~~~~~~~~~~~~~~~~~ + +See :ref:`:AllowRemoteAuthSignUp`. + Converting Local Users to OAuth ------------------------------- diff --git a/doc/sphinx-guides/source/installation/oidc.rst b/doc/sphinx-guides/source/installation/oidc.rst index a40ef758dc7..1fdfcce63b5 100644 --- a/doc/sphinx-guides/source/installation/oidc.rst +++ b/doc/sphinx-guides/source/installation/oidc.rst @@ -86,4 +86,3 @@ configuration option. For details, see :doc:`config`. .. hint:: In contrast to our :doc:`oauth2`, you can use multiple providers by creating distinct configurations enabled by the same technology and without modifying the Dataverse Software code base (standards for the win!). - diff --git a/doc/sphinx-guides/source/installation/prep.rst b/doc/sphinx-guides/source/installation/prep.rst index c491659cd56..abb4349d3ad 100644 --- a/doc/sphinx-guides/source/installation/prep.rst +++ b/doc/sphinx-guides/source/installation/prep.rst @@ -79,15 +79,24 @@ System Requirements Hardware Requirements +++++++++++++++++++++ -A basic Dataverse installation runs fine on modest hardware. For example, as of this writing the test installation at http://phoenix.dataverse.org is backed by a single virtual machine with two 2.8 GHz processors, 8 GB of RAM and 50 GB of disk. +A basic Dataverse installation runs fine on modest hardware. For example, in the recent past we had a test instance backed by a single virtual machine with two 2.8 GHz processors, 8 GB of RAM and 50 GB of disk. In contrast, before we moved it to the Amazon Cloud, the production installation at https://dataverse.harvard.edu was backed by six servers with two Intel Xeon 2.53 Ghz CPUs and either 48 or 64 GB of RAM. The three servers with 48 GB of RAM run were web frontends running Glassfish 4 and Apache and were load balanced by a hardware device. The remaining three servers with 64 GB of RAM were the primary and backup database servers and a server dedicated to running Rserve. Multiple TB of storage were mounted from a SAN via NFS. -Currently, the Harvard Dataverse Repository is served by four AWS server nodes: two "m4.4xlarge" instances (64GB/16 vCPU) as web frontends, one 32GB/8 vCPU ("m4.2xlarge") instance for the Solr search engine, and one 16GB/4 vCPU ("m4.xlarge") instance for R. The PostgreSQL database is served by Amazon RDS, and physical files are stored on Amazon S3. +Currently, the Harvard Dataverse Repository is served by four AWS server nodes -The Dataverse Software installation script will attempt to give your app server the right amount of RAM based on your system. +- two instances for web frontends running Payara fronted by Apache ("m4.4xlarge" with 64 GB RAM and 16 vCPUs) -Experimentation and testing with various hardware configurations is encouraged, or course, but do reach out as explained in the :doc:`intro` as needed for assistance. + - these are sitting behind an AWS ELB load balancer + +- one instance for the Solr search engine ("m4.2xlarge" with 32 GB RAM and 8 vCPUs) +- one instance for R ("m4.xlarge" instances with 16 GB RAM and 4 vCPUs) + +The PostgreSQL database is served by Amazon RDS. + +Physical files are stored on Amazon S3. The primary bucket is replicated in real-time to a secondary bucket, which is backed up to Glacier. Deleted files are kept around on the secondary bucket for a little while for convenient recovery. In addition, we use a backup script mentioned under :doc:`/admin/backups`. + +Experimentation and testing with various hardware configurations is encouraged, or course. Note that the installation script will attempt to give your app server (the web frontend) the right amount of RAM based on your system. Software Requirements +++++++++++++++++++++ diff --git a/doc/sphinx-guides/source/installation/prerequisites.rst b/doc/sphinx-guides/source/installation/prerequisites.rst index 3cf876a2251..1847f1b8f63 100644 --- a/doc/sphinx-guides/source/installation/prerequisites.rst +++ b/doc/sphinx-guides/source/installation/prerequisites.rst @@ -19,7 +19,7 @@ We assume you plan to run your Dataverse installation on Linux and we recommend Java ---- -The Dataverse Software requires Java SE 11 (or higher). +The Dataverse Software requires Java SE 17 (or higher). Installing Java =============== @@ -30,11 +30,11 @@ The Oracle JDK can be downloaded from http://www.oracle.com/technetwork/java/jav On a RHEL/derivative, install OpenJDK (devel version) using yum:: - # sudo yum install java-11-openjdk + # sudo yum install java-17-openjdk -If you have multiple versions of Java installed, Java 11 should be the default when ``java`` is invoked from the command line. You can test this by running ``java -version``. +If you have multiple versions of Java installed, Java 17 should be the default when ``java`` is invoked from the command line. You can test this by running ``java -version``. -On RHEL/derivative you can make Java 11 the default with the ``alternatives`` command, having it prompt you to select the version of Java from a list:: +On RHEL/derivative you can make Java 17 the default with the ``alternatives`` command, having it prompt you to select the version of Java from a list:: # alternatives --config java @@ -44,7 +44,7 @@ On RHEL/derivative you can make Java 11 the default with the ``alternatives`` co Payara ------ -Payara 5.2022.3 is recommended. Newer versions might work fine, regular updates are recommended. +Payara 6.2023.8 is recommended. Newer versions might work fine. Regular updates are recommended. Installing Payara ================= @@ -53,25 +53,27 @@ Installing Payara # useradd dataverse -- Download and install Payara (installed in ``/usr/local/payara5`` in the example commands below):: +- Download and install Payara (installed in ``/usr/local/payara6`` in the example commands below):: - # wget https://s3-eu-west-1.amazonaws.com/payara.fish/Payara+Downloads/5.2022.3/payara-5.2022.3.zip - # unzip payara-5.2022.3.zip - # mv payara5 /usr/local + # wget https://nexus.payara.fish/repository/payara-community/fish/payara/distributions/payara/6.2023.8/payara-6.2023.8.zip + # unzip payara-6.2023.8.zip + # mv payara6 /usr/local + +If nexus.payara.fish is ever down for maintenance, Payara distributions are also available from https://repo1.maven.org/maven2/fish/payara/distributions/payara/ If you intend to install and run Payara under a service account (and we hope you do), chown -R the Payara hierarchy to root to protect it but give the service account access to the below directories: - Set service account permissions:: - # chown -R root:root /usr/local/payara5 - # chown dataverse /usr/local/payara5/glassfish/lib - # chown -R dataverse:dataverse /usr/local/payara5/glassfish/domains/domain1 + # chown -R root:root /usr/local/payara6 + # chown dataverse /usr/local/payara6/glassfish/lib + # chown -R dataverse:dataverse /usr/local/payara6/glassfish/domains/domain1 After installation, you may chown the lib/ directory back to root; the installer only needs write access to copy the JDBC driver into that directory. - Change from ``-client`` to ``-server`` under ``-client``:: - # vim /usr/local/payara5/glassfish/domains/domain1/config/domain.xml + # vim /usr/local/payara6/glassfish/domains/domain1/config/domain.xml This recommendation comes from http://www.c2b2.co.uk/middleware-blog/glassfish-4-performance-tuning-monitoring-and-troubleshooting.php among other places. @@ -95,10 +97,14 @@ Also note that Payara may utilize more than the default number of file descripto PostgreSQL ---------- +PostgreSQL 13 is recommended because it's the version we test against. Version 10 or higher is required because that's what's `supported by Flyway `_, which we use for database migrations. + +You are welcome to experiment with newer versions of PostgreSQL, but please note that as of PostgreSQL 15, permissions have been restricted on the ``public`` schema (`release notes `_, `EDB blog post `_, `Crunchy Data blog post `_). The Dataverse installer has been updated to restore the old permissions, but this may not be a long term solution. + Installing PostgreSQL ===================== -The application has been tested with PostgreSQL versions up to 13 and version 10+ is required. We recommend installing the latest version that is available for your OS distribution. *For example*, to install PostgreSQL 13 under RHEL7/derivative:: +*For example*, to install PostgreSQL 13 under RHEL7/derivative:: # yum install -y https://download.postgresql.org/pub/repos/yum/reporpms/EL-7-x86_64/pgdg-redhat-repo-latest.noarch.rpm # yum makecache fast @@ -152,12 +158,12 @@ Configuring Database Access for the Dataverse Installation (and the Dataverse So Solr ---- -The Dataverse Software search index is powered by Solr. +The Dataverse software search index is powered by Solr. Supported Versions ================== -The Dataverse Software has been tested with Solr version 8.11.1. Future releases in the 8.x series are likely to be compatible; however, this cannot be confirmed until they are officially tested. Major releases above 8.x (e.g. 9.x) are not supported. +The Dataverse software has been tested with Solr version 9.3.0. Future releases in the 9.x series are likely to be compatible. Please get in touch (:ref:`support`) if you are having trouble with a newer version. Installing Solr =============== @@ -172,19 +178,19 @@ Become the ``solr`` user and then download and configure Solr:: su - solr cd /usr/local/solr - wget https://archive.apache.org/dist/lucene/solr/8.11.1/solr-8.11.1.tgz - tar xvzf solr-8.11.1.tgz - cd solr-8.11.1 + wget https://archive.apache.org/dist/solr/solr/9.3.0/solr-9.3.0.tgz + tar xvzf solr-9.3.0.tgz + cd solr-9.3.0 cp -r server/solr/configsets/_default server/solr/collection1 You should already have a "dvinstall.zip" file that you downloaded from https://github.com/IQSS/dataverse/releases . Unzip it into ``/tmp``. Then copy the files into place:: - cp /tmp/dvinstall/schema*.xml /usr/local/solr/solr-8.11.1/server/solr/collection1/conf - cp /tmp/dvinstall/solrconfig.xml /usr/local/solr/solr-8.11.1/server/solr/collection1/conf + cp /tmp/dvinstall/schema*.xml /usr/local/solr/solr-9.3.0/server/solr/collection1/conf + cp /tmp/dvinstall/solrconfig.xml /usr/local/solr/solr-9.3.0/server/solr/collection1/conf Note: The Dataverse Project team has customized Solr to boost results that come from certain indexed elements inside the Dataverse installation, for example prioritizing results from Dataverse collections over Datasets. If you would like to remove this, edit your ``solrconfig.xml`` and remove the ```` element and its contents. If you have ideas about how this boosting could be improved, feel free to contact us through our Google Group https://groups.google.com/forum/#!forum/dataverse-dev . -A Dataverse installation requires a change to the ``jetty.xml`` file that ships with Solr. Edit ``/usr/local/solr/solr-8.11.1/server/etc/jetty.xml`` , increasing ``requestHeaderSize`` from ``8192`` to ``102400`` +A Dataverse installation requires a change to the ``jetty.xml`` file that ships with Solr. Edit ``/usr/local/solr/solr-9.3.0/server/etc/jetty.xml`` , increasing ``requestHeaderSize`` from ``8192`` to ``102400`` Solr will warn about needing to increase the number of file descriptors and max processes in a production environment but will still run with defaults. We have increased these values to the recommended levels by adding ulimit -n 65000 to the init script, and the following to ``/etc/security/limits.conf``:: @@ -203,7 +209,7 @@ Solr launches asynchronously and attempts to use the ``lsof`` binary to watch fo Finally, you need to tell Solr to create the core "collection1" on startup:: - echo "name=collection1" > /usr/local/solr/solr-8.11.1/server/solr/collection1/core.properties + echo "name=collection1" > /usr/local/solr/solr-9.3.0/server/solr/collection1/core.properties Solr Init Script ================ @@ -291,8 +297,8 @@ If the installed location of the convert executable is different from ``/usr/bin R - -The Dataverse Software uses `R `_ to handle -tabular data files. The instructions below describe a **minimal** R +The Dataverse Software uses `R `_ to handle +tabular data files. The instructions below describe a **minimal** R Project installation. It will allow you to ingest R (.RData) files as tabular data and to export tabular data as .RData files. R can be considered an optional component, meaning that if you don't have R installed, you will still be able to run and diff --git a/doc/sphinx-guides/source/user/account.rst b/doc/sphinx-guides/source/user/account.rst index 12cc54c7fde..675bae90e5d 100755 --- a/doc/sphinx-guides/source/user/account.rst +++ b/doc/sphinx-guides/source/user/account.rst @@ -146,6 +146,8 @@ Microsoft Azure AD, GitHub, and Google Log In You can also convert your Dataverse installation account to use authentication provided by GitHub, Microsoft, or Google. These options may be found in the "Other options" section of the log in page, and function similarly to how ORCID is outlined above. If you would like to convert your account away from using one of these services for log in, then you can follow the same steps as listed above for converting away from the ORCID log in. +.. _my-data: + My Data ------- diff --git a/doc/sphinx-guides/source/user/appendix.rst b/doc/sphinx-guides/source/user/appendix.rst index b05459b6aaf..7d60054ae17 100755 --- a/doc/sphinx-guides/source/user/appendix.rst +++ b/doc/sphinx-guides/source/user/appendix.rst @@ -26,8 +26,8 @@ Detailed below are what metadata schemas we support for Citation and Domain Spec - `Geospatial Metadata `__ (`see .tsv version `__): compliant with DDI Lite, DDI 2.5 Codebook, DataCite, and Dublin Core. Country / Nation field uses `ISO 3166-1 `_ controlled vocabulary. - `Social Science & Humanities Metadata `__ (`see .tsv version `__): compliant with DDI Lite, DDI 2.5 Codebook, and Dublin Core. - `Astronomy and Astrophysics Metadata `__ (`see .tsv version `__): These metadata elements can be mapped/exported to the International Virtual Observatory Alliance’s (IVOA) - `VOResource Schema format `__ and is based on - `Virtual Observatory (VO) Discovery and Provenance Metadata `__. + `VOResource Schema format `__ and is based on + `Virtual Observatory (VO) Discovery and Provenance Metadata `__ (`see .tsv version `__). - `Life Sciences Metadata `__ (`see .tsv version `__): based on `ISA-Tab Specification `__, along with controlled vocabulary from subsets of the `OBI Ontology `__ and the `NCBI Taxonomy for Organisms `__. - `Journal Metadata `__ (`see .tsv version `__): based on the `Journal Archiving and Interchange Tag Set, version 1.2 `__. @@ -36,8 +36,12 @@ Experimental Metadata Unlike supported metadata, experimental metadata is not enabled by default in a new Dataverse installation. Feedback via any `channel `_ is welcome! +- `CodeMeta Software Metadata `__: based on the `CodeMeta Software Metadata Schema, version 2.0 `__ (`see .tsv version `__) - `Computational Workflow Metadata `__ (`see .tsv version `__): adapted from `Bioschemas Computational Workflow Profile, version 1.0 `__ and `Codemeta `__. +Please note: these custom metadata schemas are not included in the Solr schema for indexing by default, you will need +to add them as necessary for your custom metadata blocks. See "Update the Solr Schema" in :doc:`../admin/metadatacustomization`. + See Also ~~~~~~~~ diff --git a/doc/sphinx-guides/source/user/dataset-management.rst b/doc/sphinx-guides/source/user/dataset-management.rst index 77a760ef838..3b5b4ec6ba8 100755 --- a/doc/sphinx-guides/source/user/dataset-management.rst +++ b/doc/sphinx-guides/source/user/dataset-management.rst @@ -93,6 +93,13 @@ Dropbox Upload Some Dataverse installations support the ability to upload files directly from Dropbox. To do so, click the "Upload from Dropbox" button, log in to Dropbox in the pop-up window, and select the files you'd like to transfer over. +.. _folder-upload: + +Folder Upload +------------- + +Some Dataverse installations support the ability to upload files from a local folder and subfolders. To do this, click the "Upload from Folder" button, select the folder you wish to upload, select/unselect specific files, and click "Start Uploads". More detailed instructions are available in the `DVWebloader wiki `_. + .. _rsync_upload: rsync + SSH Upload @@ -143,6 +150,11 @@ Additional command line arguments are available to make the DVUploader list what DVUploader is a community-developed tool, and its creation was primarily supported by the Texas Digital Library. Further information and support for DVUploader can be sought at `the project's GitHub repository `_ . +Integrations Dashboard Uploader +------------------------------- + +There is an experimental uploader described at :ref:`integrations-dashboard` that provides a graphical user interface (GUI) for uploading files from a local file system and various remote locations such as GitHub. + .. _duplicate-files: Duplicate Files @@ -177,11 +189,34 @@ File Handling Certain file types in the Dataverse installation are supported by additional functionality, which can include downloading in different formats, previews, file-level metadata preservation, file-level data citation; and exploration through data visualization and analysis. See the sections below for information about special functionality for specific file types. +.. _file-previews: + File Previews ------------- Dataverse installations can add previewers for common file types uploaded by their research communities. The previews appear on the file page. If a preview tool for a specific file type is available, the preview will be created and will display automatically, after terms have been agreed to or a guestbook entry has been made, if necessary. File previews are not available for restricted files unless they are being accessed using a Private URL. See also :ref:`privateurl`. +Previewers are available for the following file types: + +- Text +- PDF +- Tabular (CSV, Excel, etc., see :doc:`tabulardataingest/index`) +- Code (R, etc.) +- Images (PNG, GIF, JPG) +- Audio (MP3, MPEG, WAV, OGG, M4A) +- Video (MP4, OGG, Quicktime) +- Zip (preview and extract/download) +- HTML +- GeoJSON +- GeoTIFF +- Shapefile +- NetCDF/HDF5 +- Hypothes.is + +Additional file types will be added to the `dataverse-previewers `_ repo before they are listed above so please check there for the latest information or to request (or contribute!) an additional file previewer. + +Installation of previewers is explained in the :doc:`/admin/external-tools` section of in the Admin Guide. + Tabular Data Files ------------------ @@ -193,11 +228,13 @@ Additional download options available for tabular data (found in the same drop-d - The original file uploaded by the user; - Saved as R data (if the original file was not in R format); - Variable Metadata (as a `DDI Codebook `_ XML file); -- Data File Citation (currently in either RIS, EndNote XML, or BibTeX format); -- All of the above, as a zipped bundle. +- Data File Citation (currently in either RIS, EndNote XML, or BibTeX format). + Differentially Private (DP) Metadata can also be accessed for restricted tabular files if the data depositor has created a DP Metadata Release. See :ref:`dp-release-create` for more information. +.. _research-code: + Research Code ------------- @@ -213,7 +250,7 @@ The following are general guidelines applicable to all programming languages. - Consider providing notes (in the README) on the expected code outputs or adding tests in the code, which would ensure that its functionality is intact. Capturing code dependencies will help other researchers recreate the necessary runtime environment. Without it, your code will not be able to run correctly (or at all). -One option is to use platforms such as `Whole Tale `_, `Jupyter Binder `_ or `Renku `_, which facilitate research reproducibility. Have a look at `Dataverse Integrations `_ for more information. +One option is to use platforms such as `Whole Tale `_, `Jupyter Binder `_ or `Renku `_, which facilitate research reproducibility. For more information, have a look at :doc:`/admin/integrations` in the Admin Guide, especially the sections on :ref:`wholetale`, :ref:`binder`, and :ref:`renku`. Another option is to use an automatic code dependency capture, which is often supported through the programming language. Here are a few examples: - If you are using the conda package manager, you can export your environment with the command ``conda env export > environment.yml``. For more information, see the `official documentation `__. @@ -268,7 +305,7 @@ After you :ref:`upload your files `, you can apply a "Workf |cw-image4| How to Describe Your Computational Workflow -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The Dataverse installation you are using may have enabled Computational Workflow metadata fields for your use. If so, when :ref:`editing your dataset metadata `, you will see the fields described below. @@ -294,11 +331,81 @@ You can also search for files within datasets that have been tagged as "Workflow |cw-image6| +.. _fits: + Astronomy (FITS) ---------------- Metadata found in the header section of `Flexible Image Transport System (FITS) files `_ are automatically extracted by the Dataverse Software, aggregated and displayed in the Astronomy Domain-Specific Metadata of the Dataset that the file belongs to. This FITS file metadata, is therefore searchable and browsable (facets) at the Dataset-level. +.. _geojson: + +GeoJSON +------- + +A map will be shown as a preview of GeoJSON files when the previewer has been enabled (see :ref:`file-previews`). See also a `video demo `_ of the GeoJSON previewer by its author, Kaitlin Newson. + +.. _geotiff: + +GeoTIFF +------- + +A map is also displayed as a preview of GeoTiFF image files, whose previewer must be enabled (see :ref:`file-previews`). Since GeoTIFFs do not have their own mimetype, it is advisable to use this previewer only when GeoTIFFs are used (and not "normal" TIFs). For performance reasons, this previewer has a file size limit of 15 MB and a row/column limit of 50,000 so that larger files are not loaded. + +.. _shapefile: + +Shapefile +--------- + +Another previewer can be enabled for shapefiles (see :ref:`file-previews`). This previewer only works with zipped shapefiles (see :doc:`/developers/geospatial`). A file size limit of 20 MB is set for this previewer (also because of performance reasons). + +.. _netcdf-and-hdf5: + +NetCDF and HDF5 +--------------- + +H5Web Previewer +~~~~~~~~~~~~~~~ + +NetCDF and HDF5 files can be explored and visualized with H5Web_, which has been adapted into a file previewer tool (see :ref:`file-previews`) that can be enabled in your Dataverse installation. + +.. _H5Web: https://h5web.panosc.eu + +|h5web| + +NcML +~~~~ + +For NetCDF and HDF5 files, an attempt will be made to extract metadata in NcML_ (XML) format and save it as an auxiliary file. (See also :doc:`/developers/aux-file-support` in the Developer Guide.) A previewer for these NcML files is available (see :ref:`file-previews`). + +Please note that only modern versions of these formats, the ones based on HDF5 such as NetCDF 4+ and HDF5 itself (rather than HDF4), will yield an NcML auxiliary file. + +.. _NcML: https://docs.unidata.ucar.edu/netcdf-java/current/userguide/ncml_overview.html + +Geospatial Bounding Box +~~~~~~~~~~~~~~~~~~~~~~~ + +An attempt will be made to extract a geospatial bounding box (west, south, east, north) from NetCDF and HDF5 files and then insert these values into the geospatial metadata block, if enabled. + +This is the mapping that is used: + +- geospatial_lon_min: West Longitude +- geospatial_lon_max: East Longitude +- geospatial_lat_max: North Latitude +- geospatial_lat_min: South Latitude + +Please note the following rules regarding these fields: + +- West Longitude and East Longitude are expected to be in the range of -180 and 180. (When using :ref:`geospatial-search`, you should use this range for longitude.) +- If West Longitude and East Longitude are both over 180 (outside the expected -180:180 range), 360 will be subtracted to shift the values from the 0:360 range to the expected -180:180 range. +- If either West Longitude or East Longitude are less than zero but the other longitude is greater than 180 (which would imply an indeterminate domain, a lack of clarity of if the domain is -180:180 or 0:360), metadata will be not be extracted. +- If the bounding box was successfully populated, the subsequent removal of the NetCDF or HDF5 file from the dataset does not automatically remove the bounding box from the dataset metadata. You must remove the bounding box manually, if desired. +- This feature is disabled if S3 direct upload is enabled (see :ref:`s3-direct-upload-features-disabled`) unless :ref:`dataverse.netcdf.geo-extract-s3-direct-upload` has been set to true. + +If the bounding box was successfully populated, :ref:`geospatial-search` should be able to find it. + +.. _compressed-files: + Compressed Files ---------------- @@ -719,6 +826,8 @@ If you deaccession the most recently published version of the dataset but not al :class: img-responsive .. |bagit-image1| image:: ./img/bagit-handler-errors.png :class: img-responsive +.. |h5web| image:: ./img/h5web.png + :class: img-responsive .. _Make Data Count: https://makedatacount.org .. _Crossref: https://crossref.org diff --git a/doc/sphinx-guides/source/user/dataverse-management.rst b/doc/sphinx-guides/source/user/dataverse-management.rst index ed90497da8c..b5e8d8f4fc9 100755 --- a/doc/sphinx-guides/source/user/dataverse-management.rst +++ b/doc/sphinx-guides/source/user/dataverse-management.rst @@ -216,7 +216,7 @@ In order to link a dataset, you will need your account to have the "Add Dataset" To link a dataset to your Dataverse collection, you must navigate to that dataset and click the white "Link" button in the upper-right corner of the dataset page. This will open up a window where you can type in the name of the Dataverse collection that you would like to link the dataset to. Select your Dataverse collection and click the save button. This will establish the link, and the dataset will now appear under your Dataverse collection. -There is currently no way to remove established links in the UI. If you need to remove a link between a Dataverse collection and a dataset, please contact the support team for the Dataverse installation you are using. +There is currently no way to remove established links in the UI. If you need to remove a link between a Dataverse collection and a dataset, please contact the support team for the Dataverse installation you are using (see the :ref:`unlink-a-dataset` section of the Admin Guide for more information). .. _dataverse-linking: diff --git a/doc/sphinx-guides/source/user/find-use-data.rst b/doc/sphinx-guides/source/user/find-use-data.rst index 42e1a2b23d4..2e82a1482b4 100755 --- a/doc/sphinx-guides/source/user/find-use-data.rst +++ b/doc/sphinx-guides/source/user/find-use-data.rst @@ -39,6 +39,13 @@ enter search terms for Dataverse collections, dataset metadata (citation and dom metadata. If you are searching for tabular data files you can also search at the variable level for name and label. To find out more about what each field searches, hover over the field name for a detailed description of the field. +.. _geospatial-search: + +Geospatial Search +----------------- + +Geospatial search is available from the :doc:`/api/search` (look for "geo" parameters). The metadata fields that are geospatially indexed are "West Longitude", "East Longitude", "North Latitude", and "South Latitude" from the "Geographic Bounding Box" field in the "Geospatial Metadata" block. + Browsing a Dataverse Installation --------------------------------- diff --git a/doc/sphinx-guides/source/user/img/h5web.png b/doc/sphinx-guides/source/user/img/h5web.png new file mode 100644 index 00000000000..176aa775114 Binary files /dev/null and b/doc/sphinx-guides/source/user/img/h5web.png differ diff --git a/doc/sphinx-guides/source/versions.rst b/doc/sphinx-guides/source/versions.rst index 1cbd785b5dd..2000a2097f0 100755 --- a/doc/sphinx-guides/source/versions.rst +++ b/doc/sphinx-guides/source/versions.rst @@ -4,9 +4,14 @@ Dataverse Software Documentation Versions ========================================= -This list provides a way to refer to the documentation for previous versions of the Dataverse Software. In order to learn more about the updates delivered from one version to another, visit the `Releases `__ page in our GitHub repo. +This list provides a way to refer to the documentation for previous and future versions of the Dataverse Software. In order to learn more about the updates delivered from one version to another, visit the `Releases `__ page in our GitHub repo. -- 5.12 +- pre-release `HTML (not final!) `__ and `PDF (experimental!) `__ built from the :doc:`develop ` branch :doc:`(how to contribute!) ` +- 6.0 +- `5.14 `__ +- `5.13 `__ +- `5.12.1 `__ +- `5.12 `__ - `5.11.1 `__ - `5.11 `__ - `5.10.1 `__ diff --git a/doc/sphinx_bootstrap_theme/bootstrap/layout.html b/doc/sphinx_bootstrap_theme/bootstrap/layout.html index 9d17996292b..d3ccd463814 100755 --- a/doc/sphinx_bootstrap_theme/bootstrap/layout.html +++ b/doc/sphinx_bootstrap_theme/bootstrap/layout.html @@ -106,7 +106,7 @@ {%- if hasdoc('copyright') %} {% trans path=pathto('copyright'), copyright=copyright|e %}

Copyright © {{ copyright }}.

{% endtrans %} {%- else %} - {% trans copyright=copyright|e %}

Developed at the Institute for Quantitative Social Science  |  Code available at  |  Created using Sphinx {{ sphinx_version }}
Last updated on {{ last_updated }}  |  Dataverse v. {{ version }}  |  View the latest version of Dataverse Guides

+ {% trans copyright=copyright|e %}

Developed at the Institute for Quantitative Social Science  |  Code available at  |  Created using Sphinx {{ sphinx_version }}
Last updated on {{ last_updated }}  |  Dataverse v. {{ version }}  |  View the latest version of Dataverse Guides

Copyright © {{ copyright }}

{% endtrans %} {%- endif %} {%- endif %} diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml new file mode 100644 index 00000000000..ab44dbc1806 --- /dev/null +++ b/docker-compose-dev.yml @@ -0,0 +1,132 @@ +version: "2.4" + +services: + + dev_dataverse: + container_name: "dev_dataverse" + hostname: dataverse + image: ${APP_IMAGE} + restart: on-failure + user: payara + environment: + - DATAVERSE_DB_HOST=postgres + - DATAVERSE_DB_PASSWORD=secret + - DATAVERSE_DB_USER=${DATAVERSE_DB_USER} + - ENABLE_JDWP=1 + - DATAVERSE_FEATURE_API_BEARER_AUTH=1 + ports: + - "8080:8080" # HTTP (Dataverse Application) + - "4848:4848" # HTTP (Payara Admin Console) + - "9009:9009" # JDWP + - "8686:8686" # JMX + networks: + - dataverse + depends_on: + - dev_postgres + - dev_solr + volumes: + - ./docker-dev-volumes/app/data:/dv + - ./docker-dev-volumes/app/secrets:/secrets + tmpfs: + - /dumps:mode=770,size=2052M,uid=1000,gid=1000 + - /tmp:mode=770,size=2052M,uid=1000,gid=1000 + mem_limit: 2147483648 # 2 GiB + mem_reservation: 1024m + privileged: false + + dev_bootstrap: + container_name: "dev_bootstrap" + image: gdcc/configbaker:unstable + restart: "no" + command: + - bootstrap.sh + - dev + networks: + - dataverse + + dev_postgres: + container_name: "dev_postgres" + hostname: postgres + image: postgres:${POSTGRES_VERSION} + restart: on-failure + environment: + - POSTGRES_USER=${DATAVERSE_DB_USER} + - POSTGRES_PASSWORD=secret + ports: + - "5432:5432" + networks: + - dataverse + volumes: + - ./docker-dev-volumes/postgresql/data:/var/lib/postgresql/data + + dev_solr_initializer: + container_name: "dev_solr_initializer" + image: gdcc/configbaker:unstable + restart: "no" + command: + - sh + - -c + - "fix-fs-perms.sh solr && cp -a /template/* /solr-template" + volumes: + - ./docker-dev-volumes/solr/data:/var/solr + - ./docker-dev-volumes/solr/conf:/solr-template + + dev_solr: + container_name: "dev_solr" + hostname: "solr" + image: solr:${SOLR_VERSION} + depends_on: + - dev_solr_initializer + restart: on-failure + ports: + - "8983:8983" + networks: + - dataverse + command: + - "solr-precreate" + - "collection1" + - "/template" + volumes: + - ./docker-dev-volumes/solr/data:/var/solr + - ./docker-dev-volumes/solr/conf:/template + + dev_smtp: + container_name: "dev_smtp" + hostname: "smtp" + image: maildev/maildev:2.0.5 + restart: on-failure + ports: + - "25:25" # smtp server + - "1080:1080" # web ui + environment: + - MAILDEV_SMTP_PORT=25 + - MAILDEV_MAIL_DIRECTORY=/mail + networks: + - dataverse + #volumes: + # - ./docker-dev-volumes/smtp/data:/mail + tmpfs: + - /mail:mode=770,size=128M,uid=1000,gid=1000 + + dev_keycloak: + container_name: "dev_keycloack" + image: 'quay.io/keycloak/keycloak:19.0' + hostname: keycloak + environment: + - KEYCLOAK_ADMIN=kcadmin + - KEYCLOAK_ADMIN_PASSWORD=kcpassword + - KEYCLOAK_LOGLEVEL=DEBUG + - KC_HOSTNAME_STRICT=false + networks: + dataverse: + aliases: + - keycloak.mydomain.com #create a DNS alias within the network (add the same alias to your /etc/hosts to get a working OIDC flow) + command: start-dev --import-realm --http-port=8090 # change port to 8090, so within the network and external the same port is used + ports: + - "8090:8090" + volumes: + - './conf/keycloak/oidc-realm.json:/opt/keycloak/data/import/oidc-realm.json' + +networks: + dataverse: + driver: bridge diff --git a/downloads/.gitignore b/downloads/.gitignore deleted file mode 100644 index 1b51bf4def7..00000000000 --- a/downloads/.gitignore +++ /dev/null @@ -1,4 +0,0 @@ -payara-5.201.zip -solr-7.3.0.tgz -weld-osgi-bundle-2.2.10.Final-glassfish4.jar -schemaSpy_5.0.0.jar diff --git a/downloads/download.sh b/downloads/download.sh deleted file mode 100755 index 7b9de0397cb..00000000000 --- a/downloads/download.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/sh -curl -L -O https://s3-eu-west-1.amazonaws.com/payara.fish/Payara+Downloads/5.2022.3/payara-5.2022.3.zip -curl -L -O https://archive.apache.org/dist/lucene/solr/8.11.1/solr-8.11.1.tgz -curl -L -O https://search.maven.org/remotecontent?filepath=org/jboss/weld/weld-osgi-bundle/2.2.10.Final/weld-osgi-bundle-2.2.10.Final-glassfish4.jar -curl -s -L http://sourceforge.net/projects/schemaspy/files/schemaspy/SchemaSpy%205.0.0/schemaSpy_5.0.0.jar/download > schemaSpy_5.0.0.jar diff --git a/downloads/stata-13-test-files/Stata14TestFile.dta b/downloads/stata-13-test-files/Stata14TestFile.dta deleted file mode 100644 index 6f1c31dc798..00000000000 Binary files a/downloads/stata-13-test-files/Stata14TestFile.dta and /dev/null differ diff --git a/local_lib/com/apicatalog/titanium-json-ld/1.3.0-SNAPSHOT/titanium-json-ld-1.3.0-SNAPSHOT.jar b/local_lib/com/apicatalog/titanium-json-ld/1.3.0-SNAPSHOT/titanium-json-ld-1.3.0-SNAPSHOT.jar deleted file mode 100644 index ee499ae4b76..00000000000 Binary files a/local_lib/com/apicatalog/titanium-json-ld/1.3.0-SNAPSHOT/titanium-json-ld-1.3.0-SNAPSHOT.jar and /dev/null differ diff --git a/local_lib/edu/harvard/iq/dvn/unf5/5.0/unf5-5.0.jar b/local_lib/edu/harvard/iq/dvn/unf5/5.0/unf5-5.0.jar deleted file mode 100644 index dc41f94046f..00000000000 Binary files a/local_lib/edu/harvard/iq/dvn/unf5/5.0/unf5-5.0.jar and /dev/null differ diff --git a/local_lib/edu/harvard/iq/dvn/unf5/5.0/unf5-5.0.jar.md5 b/local_lib/edu/harvard/iq/dvn/unf5/5.0/unf5-5.0.jar.md5 deleted file mode 100644 index 7018ea4e822..00000000000 --- a/local_lib/edu/harvard/iq/dvn/unf5/5.0/unf5-5.0.jar.md5 +++ /dev/null @@ -1 +0,0 @@ -eeef5c0dc201d1105b9529a51fa8cdab diff --git a/local_lib/edu/harvard/iq/dvn/unf5/5.0/unf5-5.0.jar.sha1 b/local_lib/edu/harvard/iq/dvn/unf5/5.0/unf5-5.0.jar.sha1 deleted file mode 100644 index 97f192f3732..00000000000 --- a/local_lib/edu/harvard/iq/dvn/unf5/5.0/unf5-5.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -1fa716d318920fd59fc63f77965d113decf97355 diff --git a/local_lib/edu/harvard/iq/dvn/unf5/5.0/unf5-5.0.pom b/local_lib/edu/harvard/iq/dvn/unf5/5.0/unf5-5.0.pom deleted file mode 100644 index ea2e4c03f9f..00000000000 --- a/local_lib/edu/harvard/iq/dvn/unf5/5.0/unf5-5.0.pom +++ /dev/null @@ -1,8 +0,0 @@ - - - 4.0.0 - edu.harvard.iq.dvn - unf5 - 5.0 - diff --git a/local_lib/edu/harvard/iq/dvn/unf5/5.0/unf5-5.0.pom.md5 b/local_lib/edu/harvard/iq/dvn/unf5/5.0/unf5-5.0.pom.md5 deleted file mode 100644 index a88cf2a1c02..00000000000 --- a/local_lib/edu/harvard/iq/dvn/unf5/5.0/unf5-5.0.pom.md5 +++ /dev/null @@ -1 +0,0 @@ -2df5dac09375e1e7fcd66c705d9ca2ef diff --git a/local_lib/edu/harvard/iq/dvn/unf5/5.0/unf5-5.0.pom.sha1 b/local_lib/edu/harvard/iq/dvn/unf5/5.0/unf5-5.0.pom.sha1 deleted file mode 100644 index 967b977b79e..00000000000 --- a/local_lib/edu/harvard/iq/dvn/unf5/5.0/unf5-5.0.pom.sha1 +++ /dev/null @@ -1 +0,0 @@ -431cd55e2e9379677d14e402dd3c474bb7be4ac9 diff --git a/local_lib/net/handle/handle/8.1.1/handle-8.1.1.jar b/local_lib/net/handle/handle/8.1.1/handle-8.1.1.jar deleted file mode 100644 index 1f8e1c3eb12..00000000000 Binary files a/local_lib/net/handle/handle/8.1.1/handle-8.1.1.jar and /dev/null differ diff --git a/local_lib/net/handle/handle/8.1.1/handle-8.1.1.pom b/local_lib/net/handle/handle/8.1.1/handle-8.1.1.pom deleted file mode 100644 index e3c09349172..00000000000 --- a/local_lib/net/handle/handle/8.1.1/handle-8.1.1.pom +++ /dev/null @@ -1,9 +0,0 @@ - - - 4.0.0 - net.handle - handle - 8.1.1 - POM was created from install:install-file - diff --git a/local_lib/nom/tam/fits/fits/2012-10-25-generated/fits-2012-10-25-generated.jar b/local_lib/nom/tam/fits/fits/2012-10-25-generated/fits-2012-10-25-generated.jar deleted file mode 100644 index b3bddd62c24..00000000000 Binary files a/local_lib/nom/tam/fits/fits/2012-10-25-generated/fits-2012-10-25-generated.jar and /dev/null differ diff --git a/local_lib/nom/tam/fits/fits/2012-10-25-generated/fits-2012-10-25-generated.jar.md5 b/local_lib/nom/tam/fits/fits/2012-10-25-generated/fits-2012-10-25-generated.jar.md5 deleted file mode 100644 index 576062f55a1..00000000000 --- a/local_lib/nom/tam/fits/fits/2012-10-25-generated/fits-2012-10-25-generated.jar.md5 +++ /dev/null @@ -1 +0,0 @@ -b0abb2fee242c479f305f47352600bbf diff --git a/local_lib/nom/tam/fits/fits/2012-10-25-generated/fits-2012-10-25-generated.jar.sha1 b/local_lib/nom/tam/fits/fits/2012-10-25-generated/fits-2012-10-25-generated.jar.sha1 deleted file mode 100644 index e81e8450ef0..00000000000 --- a/local_lib/nom/tam/fits/fits/2012-10-25-generated/fits-2012-10-25-generated.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -9643e138cb5ed2684838e4b4faa118adfb2ecb4b diff --git a/local_lib/nom/tam/fits/fits/2012-10-25-generated/fits-2012-10-25-generated.pom b/local_lib/nom/tam/fits/fits/2012-10-25-generated/fits-2012-10-25-generated.pom deleted file mode 100644 index b57cd67278b..00000000000 --- a/local_lib/nom/tam/fits/fits/2012-10-25-generated/fits-2012-10-25-generated.pom +++ /dev/null @@ -1,8 +0,0 @@ - - - 4.0.0 - nom.tam.fits - fits - 2012-10-25-generated - diff --git a/local_lib/nom/tam/fits/fits/2012-10-25-generated/fits-2012-10-25-generated.pom.md5 b/local_lib/nom/tam/fits/fits/2012-10-25-generated/fits-2012-10-25-generated.pom.md5 deleted file mode 100644 index 777b4df3325..00000000000 --- a/local_lib/nom/tam/fits/fits/2012-10-25-generated/fits-2012-10-25-generated.pom.md5 +++ /dev/null @@ -1 +0,0 @@ -23ca47c46df791f220a87cfef3b2190c diff --git a/local_lib/nom/tam/fits/fits/2012-10-25-generated/fits-2012-10-25-generated.pom.sha1 b/local_lib/nom/tam/fits/fits/2012-10-25-generated/fits-2012-10-25-generated.pom.sha1 deleted file mode 100644 index b5f41fd1a69..00000000000 --- a/local_lib/nom/tam/fits/fits/2012-10-25-generated/fits-2012-10-25-generated.pom.sha1 +++ /dev/null @@ -1 +0,0 @@ -c1ec9dfbbc72dc4623d309d772b804e47284ee27 diff --git a/local_lib/nom/tam/fits/fits/maven-metadata.xml b/local_lib/nom/tam/fits/fits/maven-metadata.xml deleted file mode 100644 index 4fc3254df3f..00000000000 --- a/local_lib/nom/tam/fits/fits/maven-metadata.xml +++ /dev/null @@ -1,12 +0,0 @@ - - - nom.tam.fits - fits - - 2012-10-25-generated - - 2012-10-25-generated - - 20130925190525 - - diff --git a/local_lib/nom/tam/fits/fits/maven-metadata.xml.md5 b/local_lib/nom/tam/fits/fits/maven-metadata.xml.md5 deleted file mode 100644 index b6d7e4a726f..00000000000 --- a/local_lib/nom/tam/fits/fits/maven-metadata.xml.md5 +++ /dev/null @@ -1 +0,0 @@ -545c78160393b4c80e40377f2a7cf406 \ No newline at end of file diff --git a/local_lib/nom/tam/fits/fits/maven-metadata.xml.sha1 b/local_lib/nom/tam/fits/fits/maven-metadata.xml.sha1 deleted file mode 100644 index 188cf8ae044..00000000000 --- a/local_lib/nom/tam/fits/fits/maven-metadata.xml.sha1 +++ /dev/null @@ -1 +0,0 @@ -9cf56b8ef3f2bacdc669c2c7cdcd7cd50ed38dbb \ No newline at end of file diff --git a/local_lib/org/dataverse/unf/6.0/unf-6.0.jar b/local_lib/org/dataverse/unf/6.0/unf-6.0.jar deleted file mode 100644 index d2738e2dadd..00000000000 Binary files a/local_lib/org/dataverse/unf/6.0/unf-6.0.jar and /dev/null differ diff --git a/local_lib/org/dataverse/unf/6.0/unf-6.0.jar.md5 b/local_lib/org/dataverse/unf/6.0/unf-6.0.jar.md5 deleted file mode 100644 index 04ca3e73ce8..00000000000 --- a/local_lib/org/dataverse/unf/6.0/unf-6.0.jar.md5 +++ /dev/null @@ -1 +0,0 @@ -bd9b84a9ad737a81a2699ab81541a901 diff --git a/local_lib/org/dataverse/unf/6.0/unf-6.0.jar.sha1 b/local_lib/org/dataverse/unf/6.0/unf-6.0.jar.sha1 deleted file mode 100644 index a48cef32570..00000000000 --- a/local_lib/org/dataverse/unf/6.0/unf-6.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -4cad279c362e4c5c17a2058dc2c8f2fc97c76bf8 diff --git a/local_lib/org/dataverse/unf/6.0/unf-6.0.pom b/local_lib/org/dataverse/unf/6.0/unf-6.0.pom deleted file mode 100644 index 06f1508723f..00000000000 --- a/local_lib/org/dataverse/unf/6.0/unf-6.0.pom +++ /dev/null @@ -1,8 +0,0 @@ - - - 4.0.0 - org.dataverse - unf - 6.0 - diff --git a/local_lib/org/dataverse/unf/6.0/unf-6.0.pom.md5 b/local_lib/org/dataverse/unf/6.0/unf-6.0.pom.md5 deleted file mode 100644 index 138bc9c95f6..00000000000 --- a/local_lib/org/dataverse/unf/6.0/unf-6.0.pom.md5 +++ /dev/null @@ -1 +0,0 @@ -230c5b1f5ae71bb2fe80ef9e7209f681 diff --git a/local_lib/org/dataverse/unf/6.0/unf-6.0.pom.sha1 b/local_lib/org/dataverse/unf/6.0/unf-6.0.pom.sha1 deleted file mode 100644 index 689e8045418..00000000000 --- a/local_lib/org/dataverse/unf/6.0/unf-6.0.pom.sha1 +++ /dev/null @@ -1 +0,0 @@ -286b819f2fc7432a94b5940c6171be1589f66a37 diff --git a/modules/container-base/.gitignore b/modules/container-base/.gitignore new file mode 100644 index 00000000000..d75620abf70 --- /dev/null +++ b/modules/container-base/.gitignore @@ -0,0 +1 @@ +.flattened-pom.xml diff --git a/modules/container-base/README.md b/modules/container-base/README.md new file mode 100644 index 00000000000..dc4d185bbb5 --- /dev/null +++ b/modules/container-base/README.md @@ -0,0 +1,63 @@ +# Dataverse Base Container Image + +The Dataverse Base Container Image contains primarily a pre-installed and pre-tuned application server with the +necessary software dependencies for deploying and launching a Dataverse repository installation. + +Adding basic functionality like executing scripts at container boot, monitoring, memory tweaks, etc., is all done +at this layer. Application images building from this very base focus on adding deployable Dataverse code and +actual scripts. + +There is a community based [application image](https://hub.docker.com/r/gdcc/dataverse) +([docs](https://guides.dataverse.org/en/latest/container/app-image.html)), but you may create your own or even reuse +this image for other purposes than the Dataverse application. + +## Quick Reference + +**Maintained by:** + +This image is created, maintained and supported by the Dataverse community on a best-effort basis. + +**Where to find documentation:** + +The [Dataverse Container Guide - Base Image](https://guides.dataverse.org/en/latest/container/base-image.html) +provides in-depth information about content, building, tuning and so on for this image. + +**Where to get help and ask questions:** + +IQSS will not offer support on how to deploy or run it. Please reach out to the community for help on using it. +You can join the Community Chat on Matrix at https://chat.dataverse.org and https://groups.google.com/g/dataverse-community +to ask for help and guidance. + +## Supported Image Tags + +This image is sourced within the main upstream code [repository of the Dataverse software](https://github.com/IQSS/dataverse). +Development and maintenance of the [image's code](https://github.com/IQSS/dataverse/tree/develop/modules/container-base) +happens there (again, by the community). Community-supported image tags are based on the two most important branches: + +- The `unstable` tag corresponds to the `develop` branch, where pull requests are merged. + ([`Dockerfile`](https://github.com/IQSS/dataverse/tree/develop/modules/container-base/src/main/docker/Dockerfile)) +- The `alpha` tag corresponds to the `master` branch, where releases are cut from. + ([`Dockerfile`](https://github.com/IQSS/dataverse/tree/master/modules/container-base/src/main/docker/Dockerfile)) + +Within the main repository, you may find the base image files at `/modules/container-base`. +This Maven module uses the [Maven Docker Plugin](https://dmp.fabric8.io) to build and ship the image. +You may use, extend, or alter this image to your liking and/or host in some different registry if you want to. + +**Supported architectures:** This image is created as a "multi-arch image", supporting the most common architectures +Dataverse usually runs on: AMD64 (Windows/Linux/...) and ARM64 (Apple M1/M2). + +## License + +Image content created by the community is licensed under [Apache License, Version 2.0](https://www.apache.org/licenses/LICENSE-2.0), +like the [main Dataverse project](https://github.com/IQSS/dataverse/blob/develop/LICENSE.md). + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and limitations under the License. + +As with all Docker images, all images likely also contain other software which may be under other licenses (such as +[Payara Server](https://github.com/payara/Payara/blob/master/LICENSE.txt), Bash, etc., from the base +distribution, along with any direct or indirect (Java) dependencies contained). + +As for any pre-built image usage, it is the image user's responsibility to ensure that any use of this image complies +with any relevant licenses for all software contained within. diff --git a/modules/container-base/pom.xml b/modules/container-base/pom.xml new file mode 100644 index 00000000000..fc672696df4 --- /dev/null +++ b/modules/container-base/pom.xml @@ -0,0 +1,177 @@ + + + 4.0.0 + + + edu.harvard.iq + dataverse-parent + ${revision} + ../dataverse-parent + + + io.gdcc + container-base + ${packaging.type} + Container Base Image + This module provides an application server base image to be decorated with the Dataverse app. + + + + poikilotherm + Oliver Bertuch + github@bertuch.eu + Europe/Berlin + + maintainer + + + + + + + + pom + + + + + ct + + docker-build + gdcc/base:${base.image.tag} + unstable + eclipse-temurin:${target.java.version}-jre + 1000 + 1000 + linux/amd64,linux/arm64 + + + + + + + org.apache.maven.plugins + maven-dependency-plugin + + + unpack + initialize + + unpack + + + + + fish.payara.distributions + payara + ${payara.version} + zip + false + ${project.build.directory} + + + ^payara\d + payara + + + + + + + + + + + + io.fabric8 + docker-maven-plugin + true + + + + base + ${base.image} + + + + + ${docker.platforms} + + ${project.build.directory}/buildx-state + + Dockerfile + + ${java.image} + ${base.image.uid} + ${base.image.gid} + + @ + + assembly.xml + + + + + + + + + + org.codehaus.mojo + flatten-maven-plugin + 1.2.7 + + true + oss + + remove + remove + + + + + + flatten + process-resources + + flatten + + + + + flatten.clean + clean + + clean + + + + + + + + maven-install-plugin + + + default-install + install + + install + + + + + + + + + \ No newline at end of file diff --git a/modules/container-base/src/main/docker/Dockerfile b/modules/container-base/src/main/docker/Dockerfile new file mode 100644 index 00000000000..97aa4cd2792 --- /dev/null +++ b/modules/container-base/src/main/docker/Dockerfile @@ -0,0 +1,236 @@ +# Copyright 2022 Forschungszentrum Jülich GmbH +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# +################################################################################################################ +# +# THIS FILE IS TO BE USED WITH MAVEN DOCKER BUILD: +# mvn -Pct clean package docker:build +# +################################################################################################################ +# +# Some commands used are inspired by https://github.com/payara/Payara/tree/master/appserver/extras/docker-images. +# Most parts origin from older versions of https://github.com/gdcc/dataverse-kubernetes. +# +# We are not using upstream Payara images because: +# - Using same base image as Solr (https://hub.docker.com/_/solr) is reducing pulls +# - Their image is less optimised for production usage and Dataverse by design choices +# - We provide multi-arch images +# - We provide some tweaks for development and monitoring +# + +# Make the Java base image and version configurable (useful for trying newer Java versions and flavors) +ARG JAVA_IMAGE="eclipse-temurin:11-jre" +FROM $JAVA_IMAGE + +# Default payara ports to expose +# 4848: admin console +# 9009: debug port (JDWP) +# 8080: http +# 8181: https - but http-listener-2 is disabled here! +# 8686: JMX +EXPOSE 4848 9009 8080 8686 + +ENV HOME_DIR="/opt/payara" +ENV PAYARA_DIR="${HOME_DIR}/appserver" \ + SCRIPT_DIR="${HOME_DIR}/scripts" \ + CONFIG_DIR="${HOME_DIR}/config" \ + DEPLOY_DIR="${HOME_DIR}/deployments" \ + STORAGE_DIR="/dv" \ + SECRETS_DIR="/secrets" \ + DUMPS_DIR="/dumps" \ + PASSWORD_FILE="${HOME_DIR}/passwordFile" \ + ADMIN_USER="admin" \ + ADMIN_PASSWORD="admin" \ + DOMAIN_NAME="domain1" \ + PAYARA_ARGS="" +ENV PATH="${PATH}:${PAYARA_DIR}/bin:${SCRIPT_DIR}" \ + DOMAIN_DIR="${PAYARA_DIR}/glassfish/domains/${DOMAIN_NAME}" \ + DEPLOY_PROPS="" \ + PREBOOT_COMMANDS="${CONFIG_DIR}/pre-boot-commands.asadmin" \ + POSTBOOT_COMMANDS="${CONFIG_DIR}/post-boot-commands.asadmin" \ + JVM_ARGS="" \ + MEM_MAX_RAM_PERCENTAGE="70.0" \ + MEM_XSS="512k" \ + # Source: https://github.com/fabric8io-images/run-java-sh/blob/master/TUNING.md#recommandations + MEM_MIN_HEAP_FREE_RATIO="20" \ + MEM_MAX_HEAP_FREE_RATIO="40" \ + MEM_MAX_GC_PAUSE_MILLIS="500" \ + MEM_METASPACE_SIZE="256m" \ + MEM_MAX_METASPACE_SIZE="2g" \ + # Make heap dumps on OOM appear in DUMPS_DIR + ENABLE_DUMPS=0 \ + JVM_DUMPS_ARG="-XX:+HeapDumpOnOutOfMemoryError" \ + ENABLE_JMX=0 \ + ENABLE_JDWP=0 \ + ENABLE_RELOAD=0 + +### PART 1: SYSTEM ### +ARG UID=1000 +ARG GID=1000 +USER root +WORKDIR / +SHELL ["/bin/bash", "-euo", "pipefail", "-c"] +# Mark these directories as mutuable data containers to avoid cluttering the images overlayfs at runtime. +VOLUME ${STORAGE_DIR} ${SECRETS_DIR} ${DUMPS_DIR} +RUN <> /tmp/password-change-file.txt + echo "AS_ADMIN_PASSWORD=${ADMIN_PASSWORD}" >> ${PASSWORD_FILE} + asadmin --user=${ADMIN_USER} --passwordfile=/tmp/password-change-file.txt change-admin-password --domain_name=${DOMAIN_NAME} + # Start domain for configuration + ${ASADMIN} start-domain ${DOMAIN_NAME} + # Allow access to admin with password only + ${ASADMIN} enable-secure-admin + + ### CONTAINER USAGE ENABLEMENT + # List & delete memory settings from domain + for MEMORY_JVM_OPTION in $(${ASADMIN} list-jvm-options | grep "Xm[sx]\|Xss\|NewRatio"); + do + ${ASADMIN} delete-jvm-options $(echo $MEMORY_JVM_OPTION | sed -e 's/:/\\:/g'); + done + # Tweak memory settings for containers + ${ASADMIN} create-jvm-options "-XX\:+UseContainerSupport" + ${ASADMIN} create-jvm-options "-XX\:MaxRAMPercentage=\${ENV=MEM_MAX_RAM_PERCENTAGE}" + ${ASADMIN} create-jvm-options "-Xss\${ENV=MEM_XSS}" + ${ASADMIN} create-jvm-options "-XX\:MinHeapFreeRatio=\${ENV=MEM_MIN_HEAP_FREE_RATIO}" + ${ASADMIN} create-jvm-options "-XX\:MaxHeapFreeRatio=\${ENV=MEM_MAX_HEAP_FREE_RATIO}" + ${ASADMIN} create-jvm-options "-XX\:HeapDumpPath=\${ENV=DUMPS_DIR}" + # Set logging to console only for containers + ${ASADMIN} set-log-attributes com.sun.enterprise.server.logging.GFFileHandler.logtoFile=false \ + + ### PRODUCTION READINESS + ${ASADMIN} create-jvm-options '-XX\:+UseG1GC' + ${ASADMIN} create-jvm-options '-XX\:+UseStringDeduplication' + ${ASADMIN} create-jvm-options '-XX\:+DisableExplicitGC' + ${ASADMIN} create-jvm-options '-XX\:MaxGCPauseMillis=${ENV=MEM_MAX_GC_PAUSE_MILLIS}' + ${ASADMIN} create-jvm-options '-XX\:MetaspaceSize=${ENV=MEM_METASPACE_SIZE}' + ${ASADMIN} create-jvm-options '-XX\:MaxMetaspaceSize=${ENV=MEM_MAX_METASPACE_SIZE}' + ${ASADMIN} create-jvm-options '-XX\:+IgnoreUnrecognizedVMOptions' + # Workaround for FISH-7722: Failed to deploy war with @Stateless https://github.com/payara/Payara/issues/6337 + ${ASADMIN} create-jvm-options --add-opens=java.base/java.io=ALL-UNNAMED + # Disable autodeploy and hot reload + ${ASADMIN} set configs.config.server-config.admin-service.das-config.dynamic-reload-enabled="false" + ${ASADMIN} set configs.config.server-config.admin-service.das-config.autodeploy-enabled="false" + # Enlarge thread pools + ${ASADMIN} set server-config.thread-pools.thread-pool.http-thread-pool.max-thread-pool-size="50" + ${ASADMIN} set server-config.thread-pools.thread-pool.http-thread-pool.max-queue-size="" + ${ASADMIN} set default-config.thread-pools.thread-pool.thread-pool-1.max-thread-pool-size="250" + # Enable file caching + ${ASADMIN} set server-config.network-config.protocols.protocol.http-listener-1.http.file-cache.enabled="true" + ${ASADMIN} set server-config.network-config.protocols.protocol.http-listener-2.http.file-cache.enabled="true" + ${ASADMIN} set default-config.network-config.protocols.protocol.http-listener-1.http.file-cache.enabled="true" + ${ASADMIN} set default-config.network-config.protocols.protocol.http-listener-2.http.file-cache.enabled="true" + # Disable the HTTPS listener (we are always fronting our appservers with a reverse proxy handling SSL) + ${ASADMIN} set configs.config.server-config.network-config.network-listeners.network-listener.http-listener-2.enabled="false" + # Enlarge and tune EJB pools (cannot do this for server-config as set does not create new entries) + ${ASADMIN} set default-config.ejb-container.pool-resize-quantity="2" + ${ASADMIN} set default-config.ejb-container.max-pool-size="128" + ${ASADMIN} set default-config.ejb-container.steady-pool-size="10" + # Misc settings + ${ASADMIN} create-system-properties fish.payara.classloading.delegate="false" + ${ASADMIN} create-system-properties jersey.config.client.readTimeout="300000" + ${ASADMIN} create-system-properties jersey.config.client.connectTimeout="300000" \ + + ### DATAVERSE APPLICATION SPECIFICS + # Configure the MicroProfile directory config source to point to /secrets + ${ASADMIN} set-config-dir --directory="${SECRETS_DIR}" + # Password alias store = 105, default = 100 - lets sort between those to enable overriding from all of the others + # except alias config source and microprofile-config.properties + ${ASADMIN} set-config-ordinal --ordinal=104 --source=secrets + # Make request timeouts configurable via MPCONFIG (default to 900 secs = 15 min) + ${ASADMIN} set 'server-config.network-config.protocols.protocol.http-listener-1.http.request-timeout-seconds=${MPCONFIG=dataverse.http.timeout:900}' + # TODO: what of the below 3 items can be deleted for container usage? + ${ASADMIN} create-network-listener --protocol=http-listener-1 --listenerport=8009 --jkenabled=true jk-connector + ${ASADMIN} set server-config.network-config.protocols.protocol.http-listener-1.http.comet-support-enabled=true + ${ASADMIN} create-system-properties javax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl + # Always disable phoning home... + ${ASADMIN} disable-phone-home \ + + ### CLEANUP + # Stop domain + ${ASADMIN} stop-domain "${DOMAIN_NAME}" + # Disable JSP servlet dynamic reloads + sed -i 's#org.glassfish.wasp.servlet.JspServlet#org.glassfish.wasp.servlet.JspServlet\n \n development\n false\n \n \n genStrAsCharArray\n true\n #' "${DOMAIN_DIR}/config/default-web.xml" + # Cleanup old CA certificates to avoid unnecessary log clutter during startup + ${SCRIPT_DIR}/removeExpiredCaCerts.sh + # Delete generated files + rm -rf \ + "/tmp/password-change-file.txt" \ + "${PAYARA_DIR}/glassfish/domains/${DOMAIN_NAME}/osgi-cache" \ + "${PAYARA_DIR}/glassfish/domains/${DOMAIN_NAME}/logs" +EOF + +# Set the entrypoint to tini (as a process supervisor) +ENTRYPOINT ["/usr/bin/dumb-init", "--"] +# This works because we add ${SCRIPT_DIR} to $PATH above! +CMD ["entrypoint.sh"] + +LABEL org.opencontainers.image.created="@git.build.time@" \ + org.opencontainers.image.authors="Research Data Management at FZJ " \ + org.opencontainers.image.url="https://guides.dataverse.org/en/latest/container/" \ + org.opencontainers.image.documentation="https://guides.dataverse.org/en/latest/container/" \ + org.opencontainers.image.source="https://github.com/IQSS/dataverse/tree/develop/modules/container-base" \ + org.opencontainers.image.version="@project.version@" \ + org.opencontainers.image.revision="@git.commit.id.abbrev@" \ + org.opencontainers.image.vendor="Global Dataverse Community Consortium" \ + org.opencontainers.image.licenses="Apache-2.0" \ + org.opencontainers.image.title="Dataverse Base Image" \ + org.opencontainers.image.description="This container image provides an application server tuned for Dataverse software" diff --git a/modules/container-base/src/main/docker/assembly.xml b/modules/container-base/src/main/docker/assembly.xml new file mode 100644 index 00000000000..9fc62d49fa1 --- /dev/null +++ b/modules/container-base/src/main/docker/assembly.xml @@ -0,0 +1,17 @@ + + + + + ${project.basedir}/target/payara + appserver + + + + ${project.basedir}/src/main/docker/scripts + scripts + 0755 + + + \ No newline at end of file diff --git a/modules/container-base/src/main/docker/scripts/entrypoint.sh b/modules/container-base/src/main/docker/scripts/entrypoint.sh new file mode 100644 index 00000000000..47933bd42e2 --- /dev/null +++ b/modules/container-base/src/main/docker/scripts/entrypoint.sh @@ -0,0 +1,33 @@ +#!/usr/bin/dumb-init /bin/bash +########################################################################################################## +# +# This script is a fork of https://github.com/payara/Payara/blob/master/appserver/extras/docker-images/ +# server-full/src/main/docker/bin/entrypoint.sh and licensed under CDDL 1.1 by the Payara Foundation. +# +########################################################################################################## + +# This shellscript is supposed to be executed by https://github.com/Yelp/dumb-init to keep subprocesses +# and zombies under control. If the ENTRYPOINT command is changed, it will still use dumb-init because shebang. +# dumb-init takes care to send any signals to subshells, too! (Which might run in the background...) + + +# Execute any scripts BEFORE the appserver starts +for f in "${SCRIPT_DIR}"/init_* "${SCRIPT_DIR}"/init.d/*; do + # shellcheck disable=SC1090 + case "$f" in + *.sh) echo "[Entrypoint] running $f"; . "$f" ;; + *) echo "[Entrypoint] ignoring $f" ;; + esac + echo +done + +# If present, run a startInBackground.sh in the background (e.g. to run tasks AFTER the application server starts) +if [ -x "${SCRIPT_DIR}/startInBackground.sh" ]; then + echo "[Entrypoint] running ${SCRIPT_DIR}/startInBackground.sh in background" + "${SCRIPT_DIR}"/startInBackground.sh & +fi + +# Start the application server and make it REPLACE this shell, so init system and Java directly interact +# Remember - this means no code below this statement will be run! +echo "[Entrypoint] running ${SCRIPT_DIR}/startInForeground.sh in foreground" +exec "${SCRIPT_DIR}"/startInForeground.sh "${PAYARA_ARGS}" diff --git a/modules/container-base/src/main/docker/scripts/init_1_generate_deploy_commands.sh b/modules/container-base/src/main/docker/scripts/init_1_generate_deploy_commands.sh new file mode 100644 index 00000000000..8729f78e466 --- /dev/null +++ b/modules/container-base/src/main/docker/scripts/init_1_generate_deploy_commands.sh @@ -0,0 +1,66 @@ +#!/bin/bash +########################################################################################################## +# +# A script to append deploy commands to the post boot command file at +# $PAYARA_HOME/scripts/post-boot-commands.asadmin file. All applications in the +# $DEPLOY_DIR (either files or folders) will be deployed. +# The $POSTBOOT_COMMANDS file can then be used with the start-domain using the +# --postbootcommandfile parameter to deploy applications on startup. +# +# Usage: +# ./generate_deploy_commands.sh +# +# Optionally, any number of parameters of the asadmin deploy command can be +# specified as parameters to this script. +# E.g., to deploy applications with implicit CDI scanning disabled: +# +# ./generate_deploy_commands.sh --properties=implicitCdiEnabled=false +# +# Environment variables used: +# - $PREBOOT_COMMANDS - the pre boot command file. +# - $POSTBOOT_COMMANDS - the post boot command file. +# +# Note that many parameters to the deploy command can be safely used only when +# a single application exists in the $DEPLOY_DIR directory. +# +########################################################################################################## +# +# This script is a fork of https://github.com/payara/Payara/blob/master/appserver/extras/docker-images/ +# server-full/src/main/docker/bin/init_1_generate_deploy_commands.sh and licensed under CDDL 1.1 +# by the Payara Foundation. +# +########################################################################################################## + +# Check required variables are set +if [ -z "$DEPLOY_DIR" ]; then echo "Variable DEPLOY_DIR is not set."; exit 1; fi +if [ -z "$PREBOOT_COMMANDS" ]; then echo "Variable PREBOOT_COMMANDS is not set."; exit 1; fi +if [ -z "$POSTBOOT_COMMANDS" ]; then echo "Variable POSTBOOT_COMMANDS is not set."; exit 1; fi + +# Create pre and post boot command files if they don't exist +touch "$POSTBOOT_COMMANDS" +touch "$PREBOOT_COMMANDS" + +deploy() { + + if [ -z "$1" ]; then + echo "No deployment specified"; + exit 1; + fi + + DEPLOY_STATEMENT="deploy $DEPLOY_PROPS $1" + if grep -q "$1" "$POSTBOOT_COMMANDS"; then + echo "post boot commands already deploys $1"; + else + echo "Adding deployment target $1 to post boot commands"; + echo "$DEPLOY_STATEMENT" >> "$POSTBOOT_COMMANDS"; + fi +} + +# RAR files first +find "$DEPLOY_DIR" -mindepth 1 -maxdepth 1 -name "*.rar" -print0 \ + | while IFS= read -r -d '' file; do deploy "$file"; done + +# Then every other WAR, EAR, JAR or directory +find "$DEPLOY_DIR" -mindepth 1 -maxdepth 1 \ + \( ! -name "*.rar" -a -name "*.war" -o -name "*.ear" -o -name "*.jar" -o -type d \) \ + -print0 | while IFS= read -r -d '' file; do deploy "$file"; done diff --git a/modules/container-base/src/main/docker/scripts/init_1_generate_devmode_commands.sh b/modules/container-base/src/main/docker/scripts/init_1_generate_devmode_commands.sh new file mode 100644 index 00000000000..bb0984332f7 --- /dev/null +++ b/modules/container-base/src/main/docker/scripts/init_1_generate_devmode_commands.sh @@ -0,0 +1,68 @@ +#!/bin/bash + +set -euo pipefail + +###### ###### ###### ###### ###### ###### ###### ###### ###### ###### ###### +# This script enables different development options, like a JMX connector +# usable with VisualVM, JRebel hot-reload support and JDWP debugger service. +# Enable it by adding env vars on startup (e.g. via ConfigMap) +# +# As this script is "sourced" from entrypoint.sh, we can manipulate env vars +# for the parent shell before executing Payara. +###### ###### ###### ###### ###### ###### ###### ###### ###### ###### ###### + +# 0. Init variables +ENABLE_JMX=${ENABLE_JMX:-0} +ENABLE_JDWP=${ENABLE_JDWP:-0} +ENABLE_RELOAD=${ENABLE_RELOAD:-0} + +DV_PREBOOT=${PAYARA_DIR}/dataverse_preboot +echo "# Dataverse preboot configuration for Payara" > "${DV_PREBOOT}" + +# 1. Configure JMX (enabled by default on port 8686, but requires SSL) +# See also https://blog.payara.fish/monitoring-payara-server-with-jconsole +# To still use it, you can use a sidecar container proxying or using JMX via localhost without SSL. +if [ "${ENABLE_JMX}" = "1" ]; then + echo "Enabling unsecured JMX on 0.0.0.0:8686, enabling AMX and tuning monitoring levels to HIGH. You'll need a sidecar for this, as access is allowed from same machine only (without SSL)." + { \ + echo "set configs.config.server-config.amx-configuration.enabled=true" + echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.jvm=HIGH" + echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.connector-service=HIGH" + echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.connector-connection-pool=HIGH" + echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.jdbc-connection-pool=HIGH" + echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.web-services-container=HIGH" + echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.ejb-container=HIGH" + echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.thread-pool=HIGH" + echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.http-service=HIGH" + echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.security=HIGH" + echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.jms-service=HIGH" + echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.jersey=HIGH" + echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.transaction-service=HIGH" + echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.jpa=HIGH" + echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.web-container=HIGH" + echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.orb=HIGH" + echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.deployment=HIGH" + echo "set configs.config.server-config.admin-service.jmx-connector.system.security-enabled=false" + } >> "${DV_PREBOOT}" +fi + +# 2. Enable JDWP via debugging switch +if [ "${ENABLE_JDWP}" = "1" ]; then + echo "Enabling JDWP remote debugging support via asadmin debugging switch." + export PAYARA_ARGS="${PAYARA_ARGS} --debug=true" +fi + +# 3. Enable hot reload +if [ "${ENABLE_RELOAD}" = "1" ]; then + echo "Enabling hot reload of deployments." + echo "set configs.config.server-config.admin-service.das-config.dynamic-reload-enabled=true" >> "${DV_PREBOOT}" +fi + +# 4. Add the commands to the existing preboot file, but insert BEFORE deployment +TMP_PREBOOT=$(mktemp) +cat "${DV_PREBOOT}" "${PREBOOT_COMMANDS}" > "${TMP_PREBOOT}" +mv "${TMP_PREBOOT}" "${PREBOOT_COMMANDS}" +echo "DEBUG: preboot contains the following commands:" +echo "--------------------------------------------------" +cat "${PREBOOT_COMMANDS}" +echo "--------------------------------------------------" \ No newline at end of file diff --git a/modules/container-base/src/main/docker/scripts/removeExpiredCaCerts.sh b/modules/container-base/src/main/docker/scripts/removeExpiredCaCerts.sh new file mode 100644 index 00000000000..c019c09130e --- /dev/null +++ b/modules/container-base/src/main/docker/scripts/removeExpiredCaCerts.sh @@ -0,0 +1,50 @@ +#!/bin/bash + +# Remove expired certs from a keystore +# ------------------------------------ +# This script was copied from https://gist.github.com/damkh/a4a0d74891f92b0285a3853418357c1e (thanks @damkh) +# and slightly modified to be used within our scenario and comply with shellcheck good practices. + +set -euo pipefail + +KEYSTORE="${DOMAIN_DIR}/config/cacerts.jks" +if [ ! -r "${KEYSTORE}" ]; then + KEYSTORE="${DOMAIN_DIR}/config/cacerts.p12" + if [ ! -r "${KEYSTORE}" ]; then + echo "Could not find CA certs keystore" + exit 1 + fi +fi + +keytool -list -v -keystore "${KEYSTORE}" -storepass changeit 2>/dev/null | \ + grep -i 'alias\|until' > aliases.txt + +i=1 +# Split dates and aliases to different arrays +while read -r p; do + # uneven lines are dates, evens are aliases + if ! ((i % 2)); then + arr_date+=("$p") + else + arr_cn+=("$p") + fi + i=$((i+1)) +done < aliases.txt +i=0 + +# Parse until-dates -> +# convert until-dates to "seconds from 01-01-1970"-format -> +# compare until-dates with today-date -> +# delete expired aliases +for date_idx in $(seq 0 $((${#arr_date[*]}-1))); +do + a_date=$(echo "${arr_date[$date_idx]}" | awk -F"until: " '{print $2}') + if [ "$(date +%s --date="$a_date")" -lt "$(date +%s)" ]; + then + echo "removing ${arr_cn[$i]} expired: $a_date" + alias_name=$(echo "${arr_cn[$i]}" | awk -F"name: " '{print $2}') + keytool -delete -alias "$alias_name" -keystore "${KEYSTORE}" -storepass changeit + fi + i=$((i+1)) +done +echo "Done." \ No newline at end of file diff --git a/modules/container-base/src/main/docker/scripts/startInForeground.sh b/modules/container-base/src/main/docker/scripts/startInForeground.sh new file mode 100644 index 00000000000..4843f6ae055 --- /dev/null +++ b/modules/container-base/src/main/docker/scripts/startInForeground.sh @@ -0,0 +1,89 @@ +#!/bin/bash +########################################################################################################## +# +# This script is to execute Payara Server in foreground, mainly in a docker environment. +# It allows to avoid running 2 instances of JVM, which happens with the start-domain --verbose command. +# +# Usage: +# Running +# startInForeground.sh +# is equivalent to running +# asadmin start-domain +# +# It's possible to use any arguments of the start-domain command as arguments to startInForeground.sh +# +# Environment variables used: +# - $ADMIN_USER - the username to use for the asadmin utility. +# - $PASSWORD_FILE - the password file to use for the asadmin utility. +# - $PREBOOT_COMMANDS - the pre boot command file. +# - $POSTBOOT_COMMANDS - the post boot command file. +# - $DOMAIN_NAME - the name of the domain to start. +# - $JVM_ARGS - extra JVM options to pass to the Payara Server instance. +# - $AS_ADMIN_MASTERPASSWORD - the master password for the Payara Server instance. +# +# This script executes the asadmin tool which is expected at ~/appserver/bin/asadmin. +# +########################################################################################################## +# +# This script is a fork of https://github.com/payara/Payara/blob/master/appserver/ +# extras/docker-images/server-full/src/main/docker/bin/startInForeground.sh and licensed under CDDL 1.1 +# by the Payara Foundation. +# +########################################################################################################## + +# Check required variables are set +if [ -z "$ADMIN_USER" ]; then echo "Variable ADMIN_USER is not set."; exit 1; fi +if [ -z "$PASSWORD_FILE" ]; then echo "Variable PASSWORD_FILE is not set."; exit 1; fi +if [ -z "$PREBOOT_COMMANDS" ]; then echo "Variable PREBOOT_COMMANDS is not set."; exit 1; fi +if [ -z "$POSTBOOT_COMMANDS" ]; then echo "Variable POSTBOOT_COMMANDS is not set."; exit 1; fi +if [ -z "$DOMAIN_NAME" ]; then echo "Variable DOMAIN_NAME is not set."; exit 1; fi + +# Check if dumps are enabled - add arg to JVM_ARGS in this case +if [ -n "${ENABLE_DUMPS}" ] && [ "${ENABLE_DUMPS}" = "1" ]; then + JVM_ARGS="${JVM_DUMPS_ARG} ${JVM_ARGS}" +fi + +# The following command gets the command line to be executed by start-domain +# - print the command line to the server with --dry-run, each argument on a separate line +# - remove -read-string argument +# - surround each line except with parenthesis to allow spaces in paths +# - remove lines before and after the command line and squash commands on a single line + +# Create pre and post boot command files if they don't exist +touch "$POSTBOOT_COMMANDS" +touch "$PREBOOT_COMMANDS" + +# shellcheck disable=SC2068 +# -- Using $@ is necessary here as asadmin cannot deal with options enclosed in ""! +OUTPUT=$("${PAYARA_DIR}"/bin/asadmin --user="${ADMIN_USER}" --passwordfile="${PASSWORD_FILE}" start-domain --dry-run --prebootcommandfile="${PREBOOT_COMMANDS}" --postbootcommandfile="${POSTBOOT_COMMANDS}" $@ "$DOMAIN_NAME") +STATUS=$? +if [ "$STATUS" -ne 0 ] + then + echo ERROR: "$OUTPUT" >&2 + exit 1 +fi + +COMMAND=$(echo "$OUTPUT"\ + | sed -n -e '2,/^$/p'\ + | sed "s|glassfish.jar|glassfish.jar $JVM_ARGS |g") + +echo Executing Payara Server with the following command line: +echo "$COMMAND" | tr ' ' '\n' +echo + +# Run the server in foreground - read master password from variable or file or use the default "changeit" password + +set +x +if test "$AS_ADMIN_MASTERPASSWORD"x = x -a -f "$PASSWORD_FILE" + then + # shellcheck disable=SC1090 + source "$PASSWORD_FILE" +fi +if test "$AS_ADMIN_MASTERPASSWORD"x = x + then + AS_ADMIN_MASTERPASSWORD=changeit +fi +echo "AS_ADMIN_MASTERPASSWORD=$AS_ADMIN_MASTERPASSWORD" > /tmp/masterpwdfile +# shellcheck disable=SC2086 +# -- Unquoted exec var is necessary, as otherwise things get escaped that may not be escaped (parameters for Java) +exec ${COMMAND} < /tmp/masterpwdfile diff --git a/modules/container-configbaker/Dockerfile b/modules/container-configbaker/Dockerfile new file mode 100644 index 00000000000..564216b3572 --- /dev/null +++ b/modules/container-configbaker/Dockerfile @@ -0,0 +1,57 @@ +# Copyright 2023 Forschungszentrum Jülich GmbH +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 + +# This build arg must be given or build will fail +ARG SOLR_VERSION +# We simply have this intermediate stage here without any activity to copy the default configset over +FROM solr:${SOLR_VERSION} AS solr + +# Let's build us a baker +FROM alpine:3 + +ENV SCRIPT_DIR="/scripts" \ + SECRETS_DIR="/secrets" \ + SOLR_TEMPLATE="/template" +ENV PATH="${PATH}:${SCRIPT_DIR}" \ + BOOTSTRAP_DIR="${SCRIPT_DIR}/bootstrap" + +ARG APK_PACKAGES="curl bind-tools netcat-openbsd jq bash dumb-init wait4x ed" + +RUN true && \ + # Install necessary software and tools + apk add --no-cache ${APK_PACKAGES} && \ + # Make our working directories + mkdir -p ${SCRIPT_DIR} ${SECRETS_DIR} ${SOLR_TEMPLATE} + +# Get in the scripts and make them executable (just in case...) +COPY maven/scripts maven/solr/update-fields.sh ${SCRIPT_DIR}/ +RUN chmod +x ${SCRIPT_DIR}/*.sh ${BOOTSTRAP_DIR}/*/*.sh + +# Copy the Solr config bits +COPY --from=solr /opt/solr/server/solr/configsets/_default ${SOLR_TEMPLATE}/ +COPY maven/solr/*.xml ${SOLR_TEMPLATE}/conf/ +RUN rm ${SOLR_TEMPLATE}/conf/managed-schema.xml + +# Copy the data from scripts/api that provide the common base setup you'd get from the installer. +# ".dockerignore" will take care of taking only the bare necessities +COPY maven/setup ${SCRIPT_DIR}/bootstrap/base/ + +# Set the entrypoint to tini (as a process supervisor) +ENTRYPOINT ["/usr/bin/dumb-init", "--"] +# By default run a script that will print a help message and terminate +CMD ["help.sh"] + +LABEL org.opencontainers.image.created="@git.build.time@" \ + org.opencontainers.image.authors="Research Data Management at FZJ " \ + org.opencontainers.image.url="https://guides.dataverse.org/en/latest/container/" \ + org.opencontainers.image.documentation="https://guides.dataverse.org/en/latest/container/" \ + org.opencontainers.image.source="https://github.com/IQSS/dataverse/tree/develop/modules/container-configbaker" \ + org.opencontainers.image.version="@project.version@" \ + org.opencontainers.image.revision="@git.commit.id.abbrev@" \ + org.opencontainers.image.vendor="Global Dataverse Community Consortium" \ + org.opencontainers.image.licenses="Apache-2.0" \ + org.opencontainers.image.title="Dataverse Config Baker Image" \ + org.opencontainers.image.description="This container image configures Dataverse and provides other tooling" diff --git a/modules/container-configbaker/README.md b/modules/container-configbaker/README.md new file mode 100644 index 00000000000..17b6f985798 --- /dev/null +++ b/modules/container-configbaker/README.md @@ -0,0 +1,46 @@ +# Config Baker + +The Config Baker container may be used to execute all sorts of tasks around setting up, preparing and finalizing +an instance of the Dataverse software. Its focus is bootstrapping non-initialized installations. + +You may use this image as is, base your own derivative image on it or use bind mounts to change behavior. + +## Quick Reference + +**Maintained by:** + +This image is created, maintained and supported by the Dataverse community on a best-effort basis. + +**Where to find documentation:** + +The [Dataverse Container Guide - Config Baker Image](https://guides.dataverse.org/en/latest/container/configbaker-image.html) +provides information about this image. + +**Where to get help and ask questions:** + +IQSS will not offer support on how to deploy or run it. Please reach out to the community for help on using it. +You can join the Community Chat on Matrix at https://chat.dataverse.org and https://groups.google.com/g/dataverse-community +to ask for help and guidance. + +## Supported Image Tags + +This image is sourced within the main upstream code [repository of the Dataverse software](https://github.com/IQSS/dataverse). +Development and maintenance of the [image's code](https://github.com/IQSS/dataverse/tree/develop/modules/container-configbaker) +happens there (again, by the community). Community-supported image tags are based on the two most important branches: + +- The `unstable` tag corresponds to the `develop` branch, where pull requests are merged. + ([`Dockerfile`](https://github.com/IQSS/dataverse/tree/develop/modules/container-configbaker/src/main/docker/Dockerfile)) +- The `alpha` tag corresponds to the `master` branch, where releases are cut from. + ([`Dockerfile`](https://github.com/IQSS/dataverse/tree/master/modules/container-configbaker/src/main/docker/Dockerfile)) + +## License + +Image content created by the community is licensed under [Apache License, Version 2.0](https://www.apache.org/licenses/LICENSE-2.0), +like the [main Dataverse project](https://github.com/IQSS/dataverse/blob/develop/LICENSE.md). + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and limitations under the License. + +As for any pre-built image usage, it is the image user's responsibility to ensure that any use of this image complies +with any relevant licenses for all software contained within. diff --git a/modules/container-configbaker/assembly.xml b/modules/container-configbaker/assembly.xml new file mode 100644 index 00000000000..3285eef510a --- /dev/null +++ b/modules/container-configbaker/assembly.xml @@ -0,0 +1,46 @@ + + + + + modules/container-configbaker/scripts + scripts + + + + conf/solr/9.3.0 + solr + + + + scripts/api + setup + + setup-all.sh + setup-builtin-roles.sh + setup-datasetfields.sh + setup-identity-providers.sh + + data/licenses/*.json + data/authentication-providers/builtin.json + data/metadatablocks/*.tsv + + data/dv-root.json + + data/role-admin.json + data/role-curator.json + data/role-dsContributor.json + data/role-dvContributor.json + data/role-editor.json + data/role-filedownloader.json + data/role-fullContributor.json + data/role-member.json + + data/user-admin.json + + + data/metadatablocks/custom* + + + + diff --git a/modules/container-configbaker/scripts/bootstrap.sh b/modules/container-configbaker/scripts/bootstrap.sh new file mode 100644 index 00000000000..1aa9e232953 --- /dev/null +++ b/modules/container-configbaker/scripts/bootstrap.sh @@ -0,0 +1,59 @@ +#!/bin/bash + +# [INFO]: Execute bootstrapping configuration of a freshly baked instance + +set -euo pipefail + +function usage() { + echo "Usage: $(basename "$0") [-h] [-u instanceUrl] [-t timeout] []" + echo "" + echo "Execute initial configuration (bootstrapping) of an empty Dataverse instance." + echo -n "Known personas: " + find "${BOOTSTRAP_DIR}" -mindepth 1 -maxdepth 1 -type d -exec basename {} \; | paste -sd ' ' + echo "" + echo "Parameters:" + echo "instanceUrl - Location on container network where to reach your instance. Default: 'http://dataverse:8080'" + echo " timeout - Provide how long to wait for the instance to become available (using wait4x). Default: '2m'" + echo " persona - Configure persona to execute. Calls ${BOOTSTRAP_DIR}//init.sh. Default: 'base'" + echo "" + echo "Note: This script will wait for the Dataverse instance to be available before executing the bootstrapping." + echo " It also checks if already bootstrapped before (availability of metadata blocks) and skip if true." + echo "" + exit 1 +} + +# Set some defaults as documented +DATAVERSE_URL=${DATAVERSE_URL:-"http://dataverse:8080"} +TIMEOUT=${TIMEOUT:-"2m"} + +while getopts "u:t:h" OPTION +do + case "$OPTION" in + u) DATAVERSE_URL="$OPTARG" ;; + t) TIMEOUT="$OPTARG" ;; + h) usage;; + \?) usage;; + esac +done +shift $((OPTIND-1)) + +# Assign persona if present or go default +PERSONA=${1:-"base"} + +# Export the URL to be reused in the actual setup scripts +export DATAVERSE_URL + +# Wait for the instance to become available +echo "Waiting for ${DATAVERSE_URL} to become ready in max ${TIMEOUT}." +wait4x http "${DATAVERSE_URL}/api/info/version" -i 8s -t "$TIMEOUT" --expect-status-code 200 --expect-body-json data.version + +# Avoid bootstrapping again by checking if a metadata block has been loaded +BLOCK_COUNT=$(curl -sSf "${DATAVERSE_URL}/api/metadatablocks" | jq ".data | length") +if [[ $BLOCK_COUNT -gt 0 ]]; then + echo "Your instance has been bootstrapped already, skipping." + exit 0 +fi + +# Now execute the bootstrapping script +echo "Now executing bootstrapping script at ${BOOTSTRAP_DIR}/${PERSONA}/init.sh." +exec "${BOOTSTRAP_DIR}/${PERSONA}/init.sh" diff --git a/modules/container-configbaker/scripts/bootstrap/base/init.sh b/modules/container-configbaker/scripts/bootstrap/base/init.sh new file mode 100644 index 00000000000..81c2b59f347 --- /dev/null +++ b/modules/container-configbaker/scripts/bootstrap/base/init.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +set -euo pipefail + +# Set some defaults as documented +DATAVERSE_URL=${DATAVERSE_URL:-"http://dataverse:8080"} +export DATAVERSE_URL + +./setup-all.sh diff --git a/modules/container-configbaker/scripts/bootstrap/dev/init.sh b/modules/container-configbaker/scripts/bootstrap/dev/init.sh new file mode 100644 index 00000000000..1042478963d --- /dev/null +++ b/modules/container-configbaker/scripts/bootstrap/dev/init.sh @@ -0,0 +1,31 @@ +#!/bin/bash + +set -euo pipefail + +# Set some defaults as documented +DATAVERSE_URL=${DATAVERSE_URL:-"http://dataverse:8080"} +export DATAVERSE_URL + +echo "Running base setup-all.sh (INSECURE MODE)..." +"${BOOTSTRAP_DIR}"/base/setup-all.sh --insecure -p=admin1 | tee /tmp/setup-all.sh.out + +echo "Setting system mail address..." +curl -X PUT -d "dataverse@localhost" "${DATAVERSE_URL}/api/admin/settings/:SystemEmail" + +echo "Setting DOI provider to \"FAKE\"..." +curl "${DATAVERSE_URL}/api/admin/settings/:DoiProvider" -X PUT -d FAKE + +API_TOKEN=$(grep apiToken "/tmp/setup-all.sh.out" | jq ".data.apiToken" | tr -d \") +export API_TOKEN + +echo "Publishing root dataverse..." +curl -H "X-Dataverse-key:$API_TOKEN" -X POST "${DATAVERSE_URL}/api/dataverses/:root/actions/:publish" + +echo "Allowing users to create dataverses and datasets in root..." +curl -H "X-Dataverse-key:$API_TOKEN" -X POST -H "Content-type:application/json" -d "{\"assignee\": \":authenticated-users\",\"role\": \"fullContributor\"}" "${DATAVERSE_URL}/api/dataverses/:root/assignments" + +echo "Checking Dataverse version..." +curl "${DATAVERSE_URL}/api/info/version" + +echo "" +echo "Done, your instance has been configured for development. Have a nice day!" diff --git a/modules/container-configbaker/scripts/fix-fs-perms.sh b/modules/container-configbaker/scripts/fix-fs-perms.sh new file mode 100644 index 00000000000..9ce8f475d70 --- /dev/null +++ b/modules/container-configbaker/scripts/fix-fs-perms.sh @@ -0,0 +1,62 @@ +#!/bin/bash + +# [INFO]: Fix folder permissions using 'chown' to be writeable by containers not running as root. + +set -euo pipefail + +if [[ "$(id -un)" != "root" ]]; then + echo "This script must be run as user root (not $(id -un)), otherwise no fix is possible." +fi + +DEF_DV_PATH="/dv" +DEF_SOLR_PATH="/var/solr" +DEF_DV_UID="1000" +DEF_SOLR_UID="8983" + +function usage() { + echo "Usage: $(basename "$0") (dv|solr|[1-9][0-9]{3,4}) [PATH [PATH [...]]]" + echo "" + echo "You may omit a path when using 'dv' or 'solr' as first argument:" + echo " - 'dv' will default to user $DEF_DV_UID and $DEF_DV_PATH" + echo " - 'solr' will default to user $DEF_SOLR_UID and $DEF_SOLR_PATH" + exit 1 +} + +# Get a target name or id +TARGET=${1:-help} +# Get the rest of the arguments as paths to apply the fix to +PATHS=( "${@:2}" ) + +ID=0 +case "$TARGET" in + dv) + ID="$DEF_DV_UID" + # If there is no path, add the default for our app image + if [[ ${#PATHS[@]} -eq 0 ]]; then + PATHS=( "$DEF_DV_PATH" ) + fi + ;; + solr) + ID="$DEF_SOLR_UID" + # In case there is no path, add the default path for Solr images + if [[ ${#PATHS[@]} -eq 0 ]]; then + PATHS=( "$DEF_SOLR_PATH" ) + fi + ;; + # If there is a digit in the argument, check if this is a valid UID (>= 1000, ...) + *[[:digit:]]* ) + echo "$TARGET" | grep -q "^[1-9][0-9]\{3,4\}$" || usage + ID="$TARGET" + ;; + *) + usage + ;; +esac + +# Check that we actually have at least 1 path +if [[ ${#PATHS[@]} -eq 0 ]]; then + usage +fi + +# Do what we came for +chown -R "$ID:$ID" "${PATHS[@]}" diff --git a/modules/container-configbaker/scripts/help.sh b/modules/container-configbaker/scripts/help.sh new file mode 100644 index 00000000000..744ec8c8b4c --- /dev/null +++ b/modules/container-configbaker/scripts/help.sh @@ -0,0 +1,34 @@ +#!/bin/bash + +set -euo pipefail + +# [INFO]: This script. + +# This is the Dataverse logo in ASCII +# shellcheck disable=SC2016 +echo -e ' â•“mαo\n â•« jh\n `%╥æ╨\n ╫µ\n â•“@M%â•—,\n â–“` â•«U\n ▓² â•«â•›\n â–“M#Mâ•"\n ڑMâ•â•%φ╫┘\n┌╫" "â•«â”\nâ–“ â–“\nâ–“ â–“\n`╫µ ¿╫"\n "â•œ%%MMâ•œ`' +echo "" +echo "Hello!" +echo "" +echo "My name is Config Baker. I'm a container image with lots of tooling to 'bake' a containerized Dataverse instance!" +echo "I can cook up an instance (initial config), put icing on your Solr search index configuration, and more!" +echo "" +echo "Here's a list of things I can do for you:" + +# Get the longest name length +LENGTH=1 +for SCRIPT in "${SCRIPT_DIR}"/*.sh; do + L="$(basename "$SCRIPT" | wc -m)" + if [ "$L" -gt "$LENGTH" ]; then + LENGTH="$L" + fi +done + +# Print script names and info, but formatted +for SCRIPT in "${SCRIPT_DIR}"/*.sh; do + printf "%${LENGTH}s - " "$(basename "$SCRIPT")" + grep "# \[INFO\]: " "$SCRIPT" | sed -e "s|# \[INFO\]: ||" +done + +echo "" +echo "Simply execute this container with the script name (and potentially arguments) as 'command'." diff --git a/modules/dataverse-parent/pom.xml b/modules/dataverse-parent/pom.xml index ccc0a9a7f60..c45d59e4f5f 100644 --- a/modules/dataverse-parent/pom.xml +++ b/modules/dataverse-parent/pom.xml @@ -13,6 +13,8 @@ ../../pom.xml ../../scripts/zipdownload + ../container-base + ../dataverse-spi - 5.12 + 6.0 - 11 + 17 UTF-8 -Xdoclint:none @@ -146,9 +148,9 @@ -Duser.timezone=${project.timezone} -Dfile.encoding=${project.build.sourceEncoding} -Duser.language=${project.language} -Duser.region=${project.region} - 5.2022.3 - 42.5.0 - 8.11.1 + 6.2023.8 + 42.6.0 + 9.3.0 1.12.290 0.177.0 @@ -163,16 +165,14 @@ 4.4.14 - 5.0.0-RC1 + 5.1.0 1.15.0 2.10.1 - - 4.13.1 - 5.7.0 - ${junit.jupiter.version} - 2.28.2 + + 5.10.0 + 5.4.0 9.3 @@ -181,10 +181,24 @@ 3.2.2 3.3.2 3.2.0 + 3.0.0-M1 3.0.0-M5 3.0.0-M5 3.3.0 + 3.0.0-M7 + 3.0.1 + 4.0.0-M4 + 3.2.1 + 3.4.1 + 1.3.0 + 3.3.0 + 3.1.2 + 1.6.13 + 1.7.0 + + + 0.43.0
@@ -225,6 +239,11 @@ maven-dependency-plugin ${maven-dependency-plugin.version} + + org.apache.maven.plugins + maven-install-plugin + ${maven-install-plugin.version} + org.apache.maven.plugins maven-surefire-plugin @@ -235,6 +254,11 @@ maven-failsafe-plugin ${maven-failsafe-plugin.version} + + org.apache.maven.plugins + maven-enforcer-plugin + ${maven-enforcer-plugin.version} + org.apache.maven.plugins maven-checkstyle-plugin @@ -247,8 +271,94 @@ + + io.fabric8 + docker-maven-plugin + ${fabric8-dmp.version} + + + org.apache.maven.plugins + maven-site-plugin + ${maven-site-plugin.version} + + + org.apache.maven.plugins + maven-source-plugin + ${maven-source-plugin.version} + + + org.apache.maven.plugins + maven-javadoc-plugin + ${maven-javadoc-plugin.version} + + + org.apache.maven.plugins + maven-gpg-plugin + ${maven-gpg-plugin.version} + + + org.codehaus.mojo + flatten-maven-plugin + ${maven-flatten-plugin.version} + + + org.kordamp.maven + pomchecker-maven-plugin + ${pomchecker-maven-plugin.version} + + + org.sonatype.plugins + nexus-staging-maven-plugin + ${nexus-staging-plugin.version} + + + org.apache.maven.plugins + maven-release-plugin + ${maven-release-plugin.version} + + + + org.apache.maven.plugins + maven-enforcer-plugin + + + no-junit4 + generate-test-resources + + enforce + + + + + + junit:junit:*:*:test + org.junit:junit:*:*:test + org.junit.vintage:*:*:*:test + + + + + + + general-reqs + + enforce + + initialize + + + + + [${target.java.version}.0,) + + + + + + + oss-sonatype oss-sonatype + + https://oss.sonatype.org/content/repositories/snapshots/ + + + true + + + + s01-oss-sonatype + s01-oss-sonatype https://s01.oss.sonatype.org/content/repositories/snapshots/ @@ -315,7 +435,46 @@ true - --> + + + + ct + + + + + + + + + + io.github.git-commit-id + git-commit-id-maven-plugin + 5.0.0 + + + retrieve-git-details + + revision + + initialize + + + + ${project.basedir}/../../.git + UTC + 8 + false + + + + + + + + diff --git a/modules/dataverse-spi/.gitignore b/modules/dataverse-spi/.gitignore new file mode 100644 index 00000000000..d75620abf70 --- /dev/null +++ b/modules/dataverse-spi/.gitignore @@ -0,0 +1 @@ +.flattened-pom.xml diff --git a/modules/dataverse-spi/pom.xml b/modules/dataverse-spi/pom.xml new file mode 100644 index 00000000000..b00053fe5e0 --- /dev/null +++ b/modules/dataverse-spi/pom.xml @@ -0,0 +1,238 @@ + + + 4.0.0 + + + edu.harvard.iq + dataverse-parent + ${revision} + ../dataverse-parent + + + io.gdcc + dataverse-spi + 2.0.0${project.version.suffix} + jar + + Dataverse SPI Plugin API + https://dataverse.org + + A package to create out-of-tree Java code for Dataverse Software. Plugin projects can use this package + as an API dependency just like Jakarta EE APIs if they want to create external plugins. These will be loaded + at runtime of a Dataverse installation using SPI. See also https://guides.dataverse.org/en/latest/developers + for more information. + + + + + Apache-2.0 + https://www.apache.org/licenses/LICENSE-2.0.txt + repo + + + + + + Dataverse Core Team + support@dataverse.org + + + + + https://github.com/IQSS/dataverse/issues + GitHub Issues + + + + scm:git:git@github.com:IQSS/dataverse.git + scm:git:git@github.com:IQSS/dataverse.git + git@github.com:IQSS/dataverse.git + HEAD + + + + https://github.com/IQSS/dataverse/actions + github + + +
dataversebot@gdcc.io
+
+
+
+ + + + ossrh + https://s01.oss.sonatype.org/content/repositories/snapshots + + + ossrh + https://s01.oss.sonatype.org/service/local/staging/deploy/maven2/ + + + + + + + none + false + + + + + jakarta.json + jakarta.json-api + provided + + + + jakarta.ws.rs + jakarta.ws.rs-api + provided + + + + + + + + maven-compiler-plugin + + ${target.java.version} + + + + + + org.sonatype.plugins + nexus-staging-maven-plugin + true + + ossrh + https://s01.oss.sonatype.org + true + + + + org.apache.maven.plugins + maven-release-plugin + + false + release + true + deploy + + + + org.codehaus.mojo + flatten-maven-plugin + + true + oss + + remove + remove + + + + + + flatten + process-resources + + flatten + + + + + flatten.clean + clean + + clean + + + + + + org.apache.maven.plugins + maven-deploy-plugin + + ${skipDeploy} + + + + + + + + release + + + + org.apache.maven.plugins + maven-gpg-plugin + + + sign-artifacts + verify + + sign + + + + + + org.kordamp.maven + pomchecker-maven-plugin + + + process-resources + + check-maven-central + + + + + + + org.apache.maven.plugins + maven-javadoc-plugin + + + attach-javadocs + + jar + + + + + ${target.java.version} + false + ${javadoc.lint} + + + + org.apache.maven.plugins + maven-source-plugin + + + attach-sources + + jar + + + + + + + + + ct + + true + + + +
diff --git a/modules/dataverse-spi/src/main/java/io/gdcc/spi/export/ExportDataProvider.java b/modules/dataverse-spi/src/main/java/io/gdcc/spi/export/ExportDataProvider.java new file mode 100644 index 00000000000..d039ac39e8f --- /dev/null +++ b/modules/dataverse-spi/src/main/java/io/gdcc/spi/export/ExportDataProvider.java @@ -0,0 +1,96 @@ +package io.gdcc.spi.export; + +import java.io.InputStream; +import java.util.Optional; + +import jakarta.json.JsonArray; +import jakarta.json.JsonObject; + +/** + * Provides all the metadata Dataverse has about a given dataset that can then + * be used by an @see Exporter to create a new metadata export format. + * + */ +public interface ExportDataProvider { + + /** + * @return - dataset metadata in the standard Dataverse JSON format used in the + * API and available as the JSON metadata export via the user interface. + * @apiNote - there is no JSON schema defining this output, but the format is + * well documented in the Dataverse online guides. This, and the + * OAI_ORE export are the only two that provide 'complete' + * dataset-level metadata along with basic file metadata for each file + * in the dataset. + */ + JsonObject getDatasetJson(); + + /** + * + * @return - dataset metadata in the JSON-LD based OAI_ORE format used in + * Dataverse's archival bag export mechanism and as available in the + * user interface and by API. + * @apiNote - THis, and the JSON format are the only two that provide complete + * dataset-level metadata along with basic file metadata for each file + * in the dataset. + */ + JsonObject getDatasetORE(); + + /** + * Dataverse is capable of extracting DDI-centric metadata from tabular + * datafiles. This detailed metadata, which is only available for successfully + * "ingested" tabular files, is not included in the output of any other methods + * in this interface. + * + * @return - a JSONArray with one entry per ingested tabular dataset file. + * @apiNote - there is no JSON schema available for this output and the format + * is not well documented. Implementers may wish to expore the @see + * edu.harvard.iq.dataverse.export.DDIExporter and the @see + * edu.harvard.iq.dataverse.util.json.JSONPrinter classes where this + * output is used/generated (respectively). + */ + JsonArray getDatasetFileDetails(); + + /** + * + * @return - the subset of metadata conforming to the schema.org standard as + * available in the user interface and as included as header metadata in + * dataset pages (for use by search engines) + * @apiNote - as this metadata export is not complete, it should only be used as + * a starting point for an Exporter if it simplifies your exporter + * relative to using the JSON or OAI_ORE exports. + */ + JsonObject getDatasetSchemaDotOrg(); + + /** + * + * @return - the subset of metadata conforming to the DataCite standard as + * available in the Dataverse user interface and as sent to DataCite when DataCite DOIs are used. + * @apiNote - as this metadata export is not complete, it should only be used as + * a starting point for an Exporter if it simplifies your exporter + * relative to using the JSON or OAI_ORE exports. + */ + String getDataCiteXml(); + + /** + * If an Exporter has specified a prerequisite format name via the + * getPrerequisiteFormatName() method, it can call this method to retrieve + * metadata in that format. + * + * @return - metadata in the specified prerequisite format (if available from + * another internal or added Exporter) as an Optional + * @apiNote - This functionality is intended as way to easily generate alternate + * formats of the ~same metadata, e.g. to support download as XML, + * HTML, PDF for a specific metadata standard (e.g. DDI). It can be + * particularly useful, reative to starting from the output of one of + * the getDataset* methods above, if there are existing libraries that + * can convert between these formats. Note that, since Exporters can be + * replaced, relying on this method could cause your Exporter to + * malfunction, e.g. if you depend on format "ddi" and a third party + * Exporter is configured to replace the internal ddi Exporter in + * Dataverse. + */ + default Optional getPrerequisiteInputStream() { + return Optional.empty(); + } + +} diff --git a/modules/dataverse-spi/src/main/java/io/gdcc/spi/export/ExportException.java b/modules/dataverse-spi/src/main/java/io/gdcc/spi/export/ExportException.java new file mode 100644 index 00000000000..c816a605860 --- /dev/null +++ b/modules/dataverse-spi/src/main/java/io/gdcc/spi/export/ExportException.java @@ -0,0 +1,13 @@ +package io.gdcc.spi.export; + +import java.io.IOException; + +public class ExportException extends IOException { + public ExportException(String message) { + super(message); + } + + public ExportException(String message, Throwable cause) { + super(message, cause); + } +} diff --git a/modules/dataverse-spi/src/main/java/io/gdcc/spi/export/Exporter.java b/modules/dataverse-spi/src/main/java/io/gdcc/spi/export/Exporter.java new file mode 100644 index 00000000000..1338a3c9734 --- /dev/null +++ b/modules/dataverse-spi/src/main/java/io/gdcc/spi/export/Exporter.java @@ -0,0 +1,110 @@ +package io.gdcc.spi.export; + +import java.io.OutputStream; +import java.util.Locale; +import java.util.Optional; + + +/** + * Dataverse allows new metadata export formats to be dynamically added a running instance. This is done by + * deploying new classes that implement this Exporter interface. + */ + +public interface Exporter { + + + /** + * When this method is called, the Exporter should write the metadata to the given OutputStream. + * + * @apiNote When implementing exportDataset, when done writing content, please make sure + * to flush() the outputStream, but NOT close() it! This way an exporter can be + * used to insert the produced metadata into the body of an HTTP response, etc. + * (for example, to insert it into the body of an OAI response, where more XML + * needs to be written, for the outer OAI-PMH record). -- L.A. 4.5 + * + * @param dataProvider - the @see ExportDataProvider interface includes several methods that can be used to retrieve the dataset metadata in different formats. An Exporter should use one or more of these to obtain the values needed to generate metadata in the format it supports. + * @param outputStream - the OutputStream to write the metadata to + * @throws ExportException - if there is an error writing the metadata + */ + void exportDataset(ExportDataProvider dataProvider, OutputStream outputStream) throws ExportException; + + /** + * This method should return the name of the metadata format this Exporter + * provides. + * + * @apiNote Format names are unique identifiers for the formats supported in + * Dataverse. Reusing the same format name as another Exporter will + * result only one implementation being available. Exporters packaged + * as an external Jar file have precedence over the default + * implementations in Dataverse. Hence re-using one of the existing + * format names will result in the Exporter replacing the internal one + * with the same name. The precedence between two external Exporters + * using the same format name is not defined. + * Current format names used internally by Dataverse are: + * Datacite + * dcterms + * ddi + * oai_dc + * html + * dataverse_json + * oai_ddi + * OAI_ORE + * oai_datacite + * schema.org + * + * @return - the unique name of the metadata format this Exporter + */ + String getFormatName(); + + /** + * This method should return the display name of the metadata format this + * Exporter provides. Display names are used in the UI, specifically in the menu + * of avaiable Metadata Exports on the dataset page/metadata tab to identify the + * format. + */ + String getDisplayName(Locale locale); + + /** + * Exporters can specify that they require, as input, the output of another + * exporter. This is done by providing the name of that format in response to a + * call to this method. + * + * @implNote The one current example where this is done is with the html(display + * name "DDI html codebook") exporter which starts from the XML-based + * ddi format produced by that exporter. + * @apiNote - The Exporter can expect that the metadata produced by its + * prerequisite exporter (as defined with this method) will be + * available via the ExportDataProvider.getPrerequisiteInputStream() + * method. The default implementation of this method returns an empty + * value which means the getPrerequisiteInputStream() method of the + * ExportDataProvider sent in the exportDataset method will return an + * empty Optional. + * + */ + default Optional getPrerequisiteFormatName() { + return Optional.empty(); + } + + + /** + * Harvestable Exporters will be available as options in Dataverse's Harvesting mechanism. + * @return true to make this exporter available as a harvesting option. + */ + Boolean isHarvestable(); + + /** + * If an Exporter is available to users, its format will be generated for every + * published dataset and made available via the dataset page/metadata + * tab/Metadata Exports menu item and via the API. + * @return true to make this exporter available to users. + */ + Boolean isAvailableToUsers(); + + /** + * To support effective downloads of metadata in this Exporter's format, the Exporter should specify an appropriate mime type. + * @apiNote - It is recommended to used the @see javax.ws.rs.core.MediaType enum to specify the mime type. + * @return The mime type, e.g. "application/json", "text/plain", etc. + */ + String getMediaType(); + +} diff --git a/modules/dataverse-spi/src/main/java/io/gdcc/spi/export/XMLExporter.java b/modules/dataverse-spi/src/main/java/io/gdcc/spi/export/XMLExporter.java new file mode 100644 index 00000000000..3c3fa35c69d --- /dev/null +++ b/modules/dataverse-spi/src/main/java/io/gdcc/spi/export/XMLExporter.java @@ -0,0 +1,37 @@ +package io.gdcc.spi.export; + +import jakarta.ws.rs.core.MediaType; + +/** + * XML Exporter is an extension of the base Exporter interface that adds the + * additional methods needed for generating XML metadata export formats. + */ +public interface XMLExporter extends Exporter { + + /** + * @implNote for the ddi exporter, this method returns "ddi:codebook:2_5" + * @return - the name space of the XML schema + */ + String getXMLNameSpace(); + + /** + * @apiNote According to the XML specification, the value must be a URI + * @implNote for the ddi exporter, this method returns + * "https://ddialliance.org/Specification/DDI-Codebook/2.5/XMLSchema/codebook.xsd" + * @return - the location of the XML schema as a String (must be a valid URI) + */ + String getXMLSchemaLocation(); + + /** + * @implNote for the ddi exporter, this method returns "2.5" + * @return - the version of the XML schema + */ + String getXMLSchemaVersion(); + + /** + * @return - should always be MediaType.APPLICATION_XML + */ + public default String getMediaType() { + return MediaType.APPLICATION_XML; + }; +} diff --git a/pom.xml b/pom.xml index c6459cfc55c..7ba22d2a076 100644 --- a/pom.xml +++ b/pom.xml @@ -15,16 +15,23 @@ doc/sphinx-guides/source/developers/dependencies.rst --> dataverse - war + ${packaging.type} dataverse false + false + + + + war + 1.2.18.4 - 8.5.10 + 9.21.2 1.20.1 0.8.7 5.2.1 2.4.1 + 5.5.3 @@ -112,7 +119,7 @@ com.apicatalog titanium-json-ld - 1.3.0-SNAPSHOT + 1.3.2 com.google.code.gson @@ -153,12 +160,20 @@ flyway-core ${flyway.version} + + + org.eclipse.persistence + org.eclipse.persistence.jpa + provided + com.google.guava guava - 29.0-jre + 32.1.2-jre jar + + org.eclipse.microprofile.config microprofile-config-api @@ -167,21 +182,35 @@ jakarta.platform jakarta.jakartaee-api - ${jakartaee-api.version} provided - + + + - org.glassfish - jakarta.json + org.eclipse.angus + angus-activation provided - com.sun.mail - jakarta.mail + fish.payara.api + payara-api + provided + + ${payara.version} + + + + + + org.eclipse.parsson + jakarta.json provided + + org.glassfish jakarta.faces @@ -191,6 +220,7 @@ org.primefaces primefaces 11.0.0 + jakarta org.primefaces.themes @@ -200,9 +230,10 @@ org.omnifaces omnifaces - 3.8 + 4.0-M13 + jakarta.validation jakarta.validation-api @@ -213,9 +244,12 @@ hibernate-validator provided + + + - org.glassfish - jakarta.el + org.glassfish.expressly + expressly provided @@ -249,29 +283,23 @@ org.apache.solr solr-solrj - 8.11.1 + 9.3.0 colt colt 1.2.0 - + - nom.tam.fits - fits - 2012-10-25-generated + gov.nasa.gsfc.heasarc + nom-tam-fits + 1.12.0 net.handle - handle - 8.1.1 - - - - edu.harvard.iq.dvn - unf5 - 5.0 + handle-client + 9.3.1 @@ -331,18 +359,24 @@ org.ocpsoft.rewrite rewrite-servlet - 3.5.0.Final + 6.0.0-SNAPSHOT org.ocpsoft.rewrite rewrite-config-prettyfaces - 3.5.0.Final + 6.0.0-SNAPSHOT edu.ucsb.nceas ezid 1.0.0 jar + + + junit + junit + + org.jsoup @@ -380,9 +414,9 @@ com.nimbusds oauth2-oidc-sdk - 9.41.1 + 10.7.1 - + io.gdcc xoai-data-provider @@ -403,15 +437,13 @@ - org.glassfish.jersey.containers - jersey-container-servlet - 2.23.2 + org.glassfish.jersey.core + jersey-server org.glassfish.jersey.media jersey-media-multipart - 2.23.2 com.mashape.unirest @@ -493,24 +525,21 @@ java-json-canonicalization 1.1 - - - org.junit.jupiter - junit-jupiter - ${junit.jupiter.version} - test + edu.ucar + cdm-core + ${netcdf.version} - junit - junit - ${junit.version} - test + io.gdcc + dataverse-spi + 2.0.0 + - org.junit.vintage - junit-vintage-engine - ${junit.vintage.version} + org.junit.jupiter + junit-jupiter + ${junit.jupiter.version} test @@ -532,9 +561,9 @@ test - com.jayway.restassured + io.rest-assured rest-assured - 2.4.0 + 5.3.1 test @@ -553,6 +582,12 @@ org.testcontainers testcontainers test + + + junit + junit + + org.testcontainers @@ -740,7 +775,7 @@ true - edu.harvard.iq.dataverse.NonEssentialTests + not-essential-unittests @@ -748,22 +783,128 @@ - tc + ct + true - 9.6 + true + + docker-build + 13 + + gdcc/dataverse:${app.image.tag} + unstable + gdcc/base:${base.image.tag} + unstable + gdcc/configbaker:${conf.image.tag} + ${app.image.tag} + + + + + ${app.image} + ${postgresql.server.version} + ${solr.version} + dataverse + + + + org.apache.maven.plugins + maven-war-plugin + + + prepare-package + + exploded + + + + + + + + + + io.fabric8 + docker-maven-plugin + true + + + + + dev_dataverse + ${app.image} + + + + ${docker.platforms} + + + Dockerfile + + ${base.image} + + @ + + assembly.xml + + + + + + + + compose + ${project.basedir} + docker-compose-dev.yml + + + + + dev_bootstrap + ${conf.image} + + + + ${docker.platforms} + + + ${project.basedir}/modules/container-configbaker/Dockerfile + + ${SOLR_VERSION} + + @ + + ${project.basedir}/modules/container-configbaker/assembly.xml + + + + + + true + + + + true + + org.apache.maven.plugins maven-failsafe-plugin ${maven-failsafe-plugin.version} - testcontainers + end2end ${postgresql.server.version} + ${skipIntegrationTests} diff --git a/scripts/api/data/dataset-create-new-all-default-fields.json b/scripts/api/data/dataset-create-new-all-default-fields.json index d7ae8cefbf7..4af128955c9 100644 --- a/scripts/api/data/dataset-create-new-all-default-fields.json +++ b/scripts/api/data/dataset-create-new-all-default-fields.json @@ -466,9 +466,9 @@ }, { "typeName": "productionPlace", - "multiple": false, + "multiple": true, "typeClass": "primitive", - "value": "ProductionPlace" + "value": ["ProductionPlace"] }, { "typeName": "contributor", @@ -710,9 +710,9 @@ }, { "typeName": "series", - "multiple": false, + "multiple": true, "typeClass": "compound", - "value": { + "value": [{ "seriesName": { "typeName": "seriesName", "multiple": false, @@ -725,7 +725,7 @@ "typeClass": "primitive", "value": "SeriesInformation" } - } + }] }, { "typeName": "software", @@ -899,25 +899,25 @@ "typeName": "westLongitude", "multiple": false, "typeClass": "primitive", - "value": "10" + "value": "-72" }, "eastLongitude": { "typeName": "eastLongitude", "multiple": false, "typeClass": "primitive", - "value": "20" + "value": "-70" }, "northLongitude": { "typeName": "northLongitude", "multiple": false, "typeClass": "primitive", - "value": "30" + "value": "43" }, "southLongitude": { "typeName": "southLongitude", "multiple": false, "typeClass": "primitive", - "value": "40" + "value": "42" } }, { @@ -925,25 +925,25 @@ "typeName": "westLongitude", "multiple": false, "typeClass": "primitive", - "value": "50" + "value": "-18" }, "eastLongitude": { "typeName": "eastLongitude", "multiple": false, "typeClass": "primitive", - "value": "60" + "value": "-13" }, "northLongitude": { "typeName": "northLongitude", "multiple": false, "typeClass": "primitive", - "value": "70" + "value": "29" }, "southLongitude": { "typeName": "southLongitude", "multiple": false, "typeClass": "primitive", - "value": "80" + "value": "28" } } ] @@ -1404,7 +1404,7 @@ "multiple": true, "typeClass": "controlledVocabulary", "value": [ - "cell counting", + "genome sequencing", "cell sorting", "clinical chemistry analysis", "DNA methylation profiling" diff --git a/scripts/api/data/dataset-create-new.json b/scripts/api/data/dataset-create-new.json index 0017da15974..5831e0b17e6 100644 --- a/scripts/api/data/dataset-create-new.json +++ b/scripts/api/data/dataset-create-new.json @@ -4,6 +4,10 @@ "persistentUrl": "http://dx.doi.org/10.5072/FK2/9", "protocol": "chadham-house-rule", "datasetVersion": { + "license": { + "name": "CC0 1.0", + "uri": "http://creativecommons.org/publicdomain/zero/1.0" + }, "metadataBlocks": { "citation": { "displayName": "Citation Metadata", @@ -121,4 +125,4 @@ } } } -} \ No newline at end of file +} diff --git a/scripts/api/data/dataset-finch1_fr.json b/scripts/api/data/dataset-finch1_fr.json index ce9616fdef5..848e5e3587e 100644 --- a/scripts/api/data/dataset-finch1_fr.json +++ b/scripts/api/data/dataset-finch1_fr.json @@ -1,6 +1,10 @@ { "metadataLanguage": "fr", "datasetVersion": { + "license": { + "name": "CC0 1.0", + "uri": "http://creativecommons.org/publicdomain/zero/1.0" + }, "metadataBlocks": { "citation": { "fields": [ diff --git a/scripts/api/data/licenses/licenseCC-BY-4.0.json b/scripts/api/data/licenses/licenseCC-BY-4.0.json index 5596e65e947..59201b8d08e 100644 --- a/scripts/api/data/licenses/licenseCC-BY-4.0.json +++ b/scripts/api/data/licenses/licenseCC-BY-4.0.json @@ -3,5 +3,6 @@ "uri": "http://creativecommons.org/licenses/by/4.0", "shortDescription": "Creative Commons Attribution 4.0 International License.", "iconUrl": "https://licensebuttons.net/l/by/4.0/88x31.png", - "active": true + "active": true, + "sortOrder": 2 } diff --git a/scripts/api/data/licenses/licenseCC-BY-NC-4.0.json b/scripts/api/data/licenses/licenseCC-BY-NC-4.0.json index 8154c9ec5df..c19087664db 100644 --- a/scripts/api/data/licenses/licenseCC-BY-NC-4.0.json +++ b/scripts/api/data/licenses/licenseCC-BY-NC-4.0.json @@ -3,5 +3,6 @@ "uri": "http://creativecommons.org/licenses/by-nc/4.0", "shortDescription": "Creative Commons Attribution-NonCommercial 4.0 International License.", "iconUrl": "https://licensebuttons.net/l/by-nc/4.0/88x31.png", - "active": true + "active": true, + "sortOrder": 4 } diff --git a/scripts/api/data/licenses/licenseCC-BY-NC-ND-4.0.json b/scripts/api/data/licenses/licenseCC-BY-NC-ND-4.0.json index 247ce52f6ea..2e374917d28 100644 --- a/scripts/api/data/licenses/licenseCC-BY-NC-ND-4.0.json +++ b/scripts/api/data/licenses/licenseCC-BY-NC-ND-4.0.json @@ -3,5 +3,6 @@ "uri": "http://creativecommons.org/licenses/by-nc-nd/4.0", "shortDescription": "Creative Commons Attribution-NonCommercial-NoDerivatives 4.0 International License.", "iconUrl": "https://licensebuttons.net/l/by-nc-nd/4.0/88x31.png", - "active": true + "active": true, + "sortOrder": 7 } diff --git a/scripts/api/data/licenses/licenseCC-BY-NC-SA-4.0.json b/scripts/api/data/licenses/licenseCC-BY-NC-SA-4.0.json index e9726fb6374..5018884f65e 100644 --- a/scripts/api/data/licenses/licenseCC-BY-NC-SA-4.0.json +++ b/scripts/api/data/licenses/licenseCC-BY-NC-SA-4.0.json @@ -3,5 +3,6 @@ "uri": "http://creativecommons.org/licenses/by-nc-sa/4.0", "shortDescription": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License.", "iconUrl": "https://licensebuttons.net/l/by-nc-sa/4.0/88x31.png", - "active": true + "active": true, + "sortOrder": 3 } diff --git a/scripts/api/data/licenses/licenseCC-BY-ND-4.0.json b/scripts/api/data/licenses/licenseCC-BY-ND-4.0.json index 7ae81bacc10..317d459a7ae 100644 --- a/scripts/api/data/licenses/licenseCC-BY-ND-4.0.json +++ b/scripts/api/data/licenses/licenseCC-BY-ND-4.0.json @@ -3,5 +3,6 @@ "uri": "http://creativecommons.org/licenses/by-nd/4.0", "shortDescription": "Creative Commons Attribution-NoDerivatives 4.0 International License.", "iconUrl": "https://licensebuttons.net/l/by-nd/4.0/88x31.png", - "active": true + "active": true, + "sortOrder": 6 } diff --git a/scripts/api/data/licenses/licenseCC-BY-SA-4.0.json b/scripts/api/data/licenses/licenseCC-BY-SA-4.0.json index e9a02880885..0d28c9423aa 100644 --- a/scripts/api/data/licenses/licenseCC-BY-SA-4.0.json +++ b/scripts/api/data/licenses/licenseCC-BY-SA-4.0.json @@ -3,5 +3,6 @@ "uri": "http://creativecommons.org/licenses/by-sa/4.0", "shortDescription": "Creative Commons Attribution-ShareAlike 4.0 International License.", "iconUrl": "https://licensebuttons.net/l/by-sa/4.0/88x31.png", - "active": true + "active": true, + "sortOrder": 5 } diff --git a/scripts/api/data/licenses/licenseCC0-1.0.json b/scripts/api/data/licenses/licenseCC0-1.0.json index 396ba133327..216260a5de8 100644 --- a/scripts/api/data/licenses/licenseCC0-1.0.json +++ b/scripts/api/data/licenses/licenseCC0-1.0.json @@ -3,5 +3,6 @@ "uri": "http://creativecommons.org/publicdomain/zero/1.0", "shortDescription": "Creative Commons CC0 1.0 Universal Public Domain Dedication.", "iconUrl": "https://licensebuttons.net/p/zero/1.0/88x31.png", - "active": true + "active": true, + "sortOrder": 1 } diff --git a/scripts/api/data/metadatablocks/citation.tsv b/scripts/api/data/metadatablocks/citation.tsv index 29d121aae16..18bc31c2dd6 100644 --- a/scripts/api/data/metadatablocks/citation.tsv +++ b/scripts/api/data/metadatablocks/citation.tsv @@ -43,7 +43,7 @@ producerURL URL The URL of the producer's website https:// url 39 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE producer citation producerLogoURL Logo URL The URL of the producer's logo https:// url 40
FALSE FALSE FALSE FALSE FALSE FALSE producer citation productionDate Production Date The date when the data were produced (not distributed, published, or archived) YYYY-MM-DD date 41 TRUE FALSE FALSE TRUE FALSE FALSE citation - productionPlace Production Location The location where the data and any related materials were produced or collected text 42 FALSE FALSE FALSE FALSE FALSE FALSE citation + productionPlace Production Location The location where the data and any related materials were produced or collected text 42 TRUE FALSE TRUE TRUE FALSE FALSE citation contributor Contributor The entity, such as a person or organization, responsible for collecting, managing, or otherwise contributing to the development of the Dataset none 43 : FALSE FALSE TRUE FALSE FALSE FALSE citation http://purl.org/dc/terms/contributor contributorType Type Indicates the type of contribution made to the dataset text 44 #VALUE TRUE TRUE FALSE TRUE FALSE FALSE contributor citation contributorName Name The name of the contributor, e.g. the person's name or the name of an organization 1) FamilyName, GivenName or 2) Organization text 45 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE contributor citation @@ -66,7 +66,7 @@ dateOfCollectionStart Start Date The date when the data collection started YYYY-MM-DD date 62 #NAME: #VALUE FALSE FALSE FALSE FALSE FALSE FALSE dateOfCollection citation dateOfCollectionEnd End Date The date when the data collection ended YYYY-MM-DD date 63 #NAME: #VALUE FALSE FALSE FALSE FALSE FALSE FALSE dateOfCollection citation kindOfData Data Type The type of data included in the files (e.g. survey data, clinical data, or machine-readable text) text 64 TRUE FALSE TRUE TRUE FALSE FALSE citation http://rdf-vocabulary.ddialliance.org/discovery#kindOfData - series Series Information about the dataset series to which the Dataset belong none 65 : FALSE FALSE FALSE FALSE FALSE FALSE citation + series Series Information about the dataset series to which the Dataset belong none 65 : FALSE FALSE TRUE FALSE FALSE FALSE citation seriesName Name The name of the dataset series text 66 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE series citation seriesInformation Information Can include 1) a history of the series and 2) a summary of features that apply to the series textbox 67 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE series citation software Software Information about the software used to generate the Dataset none 68 , FALSE FALSE TRUE FALSE FALSE FALSE citation https://www.w3.org/TR/prov-o/#wasGeneratedBy @@ -96,22 +96,23 @@ subject Other D12 13 publicationIDType ark 0 publicationIDType arXiv 1 - publicationIDType bibcode 2 - publicationIDType doi 3 - publicationIDType ean13 4 - publicationIDType eissn 5 - publicationIDType handle 6 - publicationIDType isbn 7 - publicationIDType issn 8 - publicationIDType istc 9 - publicationIDType lissn 10 - publicationIDType lsid 11 - publicationIDType pmid 12 - publicationIDType purl 13 - publicationIDType upc 14 - publicationIDType url 15 - publicationIDType urn 16 - publicationIDType DASH-NRS 17 + publicationIDType bibcode 2 + publicationIDType cstr 3 + publicationIDType doi 4 + publicationIDType ean13 5 + publicationIDType eissn 6 + publicationIDType handle 7 + publicationIDType isbn 8 + publicationIDType issn 9 + publicationIDType istc 10 + publicationIDType lissn 11 + publicationIDType lsid 12 + publicationIDType pmid 13 + publicationIDType purl 14 + publicationIDType upc 15 + publicationIDType url 16 + publicationIDType urn 17 + publicationIDType DASH-NRS 18 contributorType Data Collector 0 contributorType Data Curator 1 contributorType Data Manager 2 diff --git a/scripts/api/data/metadatablocks/codemeta.tsv b/scripts/api/data/metadatablocks/codemeta.tsv new file mode 100644 index 00000000000..a5c50368b75 --- /dev/null +++ b/scripts/api/data/metadatablocks/codemeta.tsv @@ -0,0 +1,37 @@ +#metadataBlock name dataverseAlias displayName blockURI + codeMeta20 Software Metadata (CodeMeta v2.0) https://codemeta.github.io/terms/ +#datasetField name title description watermark fieldType displayOrder displayFormat advancedSearchField allowControlledVocabulary allowmultiples facetable displayoncreate required parent metadatablock_id termURI + codeVersion Software Version Version of the software instance, usually following some convention like SemVer etc. e.g. 0.2.1 or 1.3 or 2021.1 etc text 0 #VALUE TRUE FALSE FALSE TRUE TRUE FALSE codeMeta20 https://schema.org/softwareVersion + developmentStatus Development Status Description of development status, e.g. work in progress (wip), active, etc. See repostatus.org for more information. text 1 #VALUE TRUE TRUE FALSE TRUE FALSE FALSE codeMeta20 https://www.repostatus.org + codeRepository Code Repository Link to the repository where the un-compiled, human-readable code and related code is located (SVN, GitHub, CodePlex, institutional GitLab instance, Gitea, etc.). e.g. https://github.com/user/project url 2 #VALUE TRUE FALSE TRUE FALSE TRUE FALSE codeMeta20 https://schema.org/codeRepository + applicationCategory Application Category Type of software application, e.g. Simulation, Analysis, Visualisation. text 3 #VALUE TRUE FALSE TRUE TRUE TRUE FALSE codeMeta20 https://schema.org/applicationCategory + applicationSubCategory Application Subcategory Subcategory of the application, e.g. Arcade Game. text 4 #VALUE TRUE FALSE TRUE TRUE FALSE FALSE codeMeta20 https://schema.org/applicationSubCategory + programmingLanguage Programming Language The programming language(s) used to implement the software (e.g. Python, C++, Matlab, Fortran, Java, Julia,...) text 5 #VALUE TRUE FALSE TRUE TRUE TRUE FALSE codeMeta20 https://schema.org/programmingLanguage + runtimePlatform Runtime Platform Runtime platform or script interpreter dependencies (e.g. Java 11, Python 3.10 or .Net Framework 4.8). e.g. Python 3.10 text 6 #VALUE TRUE FALSE TRUE TRUE FALSE FALSE codeMeta20 https://schema.org/runtimePlatform + operatingSystem Operating Systems Operating systems supported (e.g. Windows 10, OSX 11.3, Android 11). text 7 #VALUE TRUE FALSE TRUE TRUE TRUE FALSE codeMeta20 https://schema.org/operatingSystem + targetProduct Target Product Target Operating System / Product to which the code applies. If applies to several versions, just the product name can be used. text 8 #VALUE TRUE FALSE TRUE TRUE FALSE FALSE codeMeta20 https://schema.org/targetProduct + buildInstructions Build Instructions Link to installation instructions/documentation e.g. https://github.com/user/project/blob/main/BUILD.md url 9 #VALUE FALSE FALSE TRUE FALSE FALSE FALSE codeMeta20 https://codemeta.github.io/terms/buildInstructions + softwareRequirementsItem Software Requirements Required software dependencies none 10 FALSE FALSE TRUE FALSE TRUE FALSE codeMeta20 + softwareRequirements Name & Version Name and version of the required software/library dependency e.g. Pandas 1.4.3 text 0 #VALUE TRUE FALSE FALSE FALSE TRUE FALSE softwareRequirementsItem codeMeta20 https://schema.org/softwareRequirements + softwareRequirementsInfoUrl Info URL Link to required software/library homepage or documentation (ideally also versioned) e.g. https://pandas.pydata.org/pandas-docs/version/1.4.3 url 1 #VALUE FALSE FALSE FALSE FALSE TRUE FALSE softwareRequirementsItem codeMeta20 https://dataverse.org/schema/codeMeta20/softwareRequirementsInfoUrl + softwareSuggestionsItem Software Suggestions Optional dependencies, e.g. for optional features, code development, etc. none 11 FALSE FALSE TRUE FALSE FALSE FALSE codeMeta20 + softwareSuggestions Name & Version Name and version of the optional software/library dependency e.g. Sphinx 5.0.2 text 0 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE softwareSuggestionsItem codeMeta20 https://codemeta.github.io/terms/softwareSuggestions + softwareSuggestionsInfoUrl Info URL Link to optional software/library homepage or documentation (ideally also versioned) e.g. https://www.sphinx-doc.org url 1 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE softwareSuggestionsItem codeMeta20 https://dataverse.org/schema/codeMeta20/softwareSuggestionsInfoUrl + memoryRequirements Memory Requirements Minimum memory requirements. text 12 #VALUE TRUE FALSE FALSE FALSE FALSE FALSE codeMeta20 https://schema.org/memoryRequirements + processorRequirements Processor Requirements Processor architecture or other CPU requirements to run the application (e.g. IA64). text 13 #VALUE TRUE FALSE TRUE FALSE FALSE FALSE codeMeta20 https://schema.org/processorRequirements + storageRequirements Storage Requirements Minimum storage requirements (e.g. free space required). text 14 #VALUE TRUE FALSE FALSE FALSE FALSE FALSE codeMeta20 https://schema.org/storageRequirements + permissions Permissions Permission(s) required to run the code (for example, a mobile app may require full internet access or may run only on wifi). text 15 #VALUE TRUE FALSE TRUE FALSE FALSE FALSE codeMeta20 https://schema.org/permissions + softwareHelp Software Help/Documentation Link to help texts or documentation e.g. https://user.github.io/project/docs url 16 #VALUE FALSE FALSE TRUE FALSE TRUE FALSE codeMeta20 https://schema.org/softwareHelp + readme Readme Link to the README of the project e.g. https://github.com/user/project/blob/main/README.md url 17 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE codeMeta20 https://codemeta.github.io/terms/readme + releaseNotes Release Notes Link to release notes e.g. https://github.com/user/project/blob/main/docs/release-0.1.md url 18 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE codeMeta20 https://schema.org/releaseNotes + contIntegration Continuous Integration Link to continuous integration service e.g. https://github.com/user/project/actions url 19 #VALUE FALSE FALSE TRUE FALSE FALSE FALSE codeMeta20 https://codemeta.github.io/terms/contIntegration + issueTracker Issue Tracker Link to software bug reporting or issue tracking system e.g. https://github.com/user/project/issues url 20 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE codeMeta20 https://codemeta.github.io/terms/issueTracker +#controlledVocabulary DatasetField Value identifier displayOrder + developmentStatus Concept concept 0 + developmentStatus WIP wip 1 + developmentStatus Active active 2 + developmentStatus Inactive inactive 3 + developmentStatus Unsupported unsupported 4 + developmentStatus Moved moved 5 + developmentStatus Suspended suspended 6 + developmentStatus Abandoned abandoned 7 diff --git a/scripts/api/setup-all.sh b/scripts/api/setup-all.sh index c4bd6c2c9c5..e247caa72b5 100755 --- a/scripts/api/setup-all.sh +++ b/scripts/api/setup-all.sh @@ -3,7 +3,14 @@ SECURESETUP=1 DV_SU_PASSWORD="admin" -for opt in $* +DATAVERSE_URL=${DATAVERSE_URL:-"http://localhost:8080"} +# Make sure scripts we call from this one also get this env var! +export DATAVERSE_URL + +# scripts/api when called from the root of the source tree +SCRIPT_PATH="$(dirname "$0")" + +for opt in "$@" do case $opt in "--insecure") @@ -24,13 +31,9 @@ do esac done +# shellcheck disable=SC2016 command -v jq >/dev/null 2>&1 || { echo >&2 '`jq` ("sed for JSON") is required, but not installed. Download the binary for your platform from http://stedolan.github.io/jq/ and make sure it is in your $PATH (/usr/bin/jq is fine) and executable with `sudo chmod +x /usr/bin/jq`. On Mac, you can install it with `brew install jq` if you use homebrew: http://brew.sh . Aborting.'; exit 1; } -echo "deleting all data from Solr" -curl http://localhost:8983/solr/collection1/update/json?commit=true -H "Content-type: application/json" -X POST -d "{\"delete\": { \"query\":\"*:*\"}}" - -SERVER=http://localhost:8080/api - # Everything + the kitchen sink, in a single script # - Setup the metadata blocks and controlled vocabulary # - Setup the builtin roles @@ -41,49 +44,49 @@ SERVER=http://localhost:8080/api echo "Setup the metadata blocks" -./setup-datasetfields.sh +"$SCRIPT_PATH"/setup-datasetfields.sh echo "Setup the builtin roles" -./setup-builtin-roles.sh +"$SCRIPT_PATH"/setup-builtin-roles.sh echo "Setup the authentication providers" -./setup-identity-providers.sh +"$SCRIPT_PATH"/setup-identity-providers.sh echo "Setting up the settings" echo "- Allow internal signup" -curl -X PUT -d yes "$SERVER/admin/settings/:AllowSignUp" -curl -X PUT -d /dataverseuser.xhtml?editMode=CREATE "$SERVER/admin/settings/:SignUpUrl" - -curl -X PUT -d doi "$SERVER/admin/settings/:Protocol" -curl -X PUT -d 10.5072 "$SERVER/admin/settings/:Authority" -curl -X PUT -d "FK2/" "$SERVER/admin/settings/:Shoulder" -curl -X PUT -d DataCite "$SERVER/admin/settings/:DoiProvider" -curl -X PUT -d burrito $SERVER/admin/settings/BuiltinUsers.KEY -curl -X PUT -d localhost-only $SERVER/admin/settings/:BlockedApiPolicy -curl -X PUT -d 'native/http' $SERVER/admin/settings/:UploadMethods +curl -X PUT -d yes "${DATAVERSE_URL}/api/admin/settings/:AllowSignUp" +curl -X PUT -d "/dataverseuser.xhtml?editMode=CREATE" "${DATAVERSE_URL}/api/admin/settings/:SignUpUrl" + +curl -X PUT -d doi "${DATAVERSE_URL}/api/admin/settings/:Protocol" +curl -X PUT -d 10.5072 "${DATAVERSE_URL}/api/admin/settings/:Authority" +curl -X PUT -d "FK2/" "${DATAVERSE_URL}/api/admin/settings/:Shoulder" +curl -X PUT -d DataCite "${DATAVERSE_URL}/api/admin/settings/:DoiProvider" +curl -X PUT -d burrito "${DATAVERSE_URL}/api/admin/settings/BuiltinUsers.KEY" +curl -X PUT -d localhost-only "${DATAVERSE_URL}/api/admin/settings/:BlockedApiPolicy" +curl -X PUT -d 'native/http' "${DATAVERSE_URL}/api/admin/settings/:UploadMethods" echo echo "Setting up the admin user (and as superuser)" -adminResp=$(curl -s -H "Content-type:application/json" -X POST -d @data/user-admin.json "$SERVER/builtin-users?password=$DV_SU_PASSWORD&key=burrito") -echo $adminResp -curl -X POST "$SERVER/admin/superuser/dataverseAdmin" +adminResp=$(curl -s -H "Content-type:application/json" -X POST -d @"$SCRIPT_PATH"/data/user-admin.json "${DATAVERSE_URL}/api/builtin-users?password=$DV_SU_PASSWORD&key=burrito") +echo "$adminResp" +curl -X POST "${DATAVERSE_URL}/api/admin/superuser/dataverseAdmin" echo echo "Setting up the root dataverse" -adminKey=$(echo $adminResp | jq .data.apiToken | tr -d \") -curl -s -H "Content-type:application/json" -X POST -d @data/dv-root.json "$SERVER/dataverses/?key=$adminKey" +adminKey=$(echo "$adminResp" | jq .data.apiToken | tr -d \") +curl -s -H "Content-type:application/json" -X POST -d @"$SCRIPT_PATH"/data/dv-root.json "${DATAVERSE_URL}/api/dataverses/?key=$adminKey" echo echo "Set the metadata block for Root" -curl -s -X POST -H "Content-type:application/json" -d "[\"citation\"]" $SERVER/dataverses/:root/metadatablocks/?key=$adminKey +curl -s -X POST -H "Content-type:application/json" -d "[\"citation\"]" "${DATAVERSE_URL}/api/dataverses/:root/metadatablocks/?key=$adminKey" echo echo "Set the default facets for Root" -curl -s -X POST -H "Content-type:application/json" -d "[\"authorName\",\"subject\",\"keywordValue\",\"dateOfDeposit\"]" $SERVER/dataverses/:root/facets/?key=$adminKey +curl -s -X POST -H "Content-type:application/json" -d "[\"authorName\",\"subject\",\"keywordValue\",\"dateOfDeposit\"]" "${DATAVERSE_URL}/api/dataverses/:root/facets/?key=$adminKey" echo echo "Set up licenses" # Note: CC0 has been added and set as the default license through # Flyway script V5.9.0.1__7440-configurable-license-list.sql -curl -X POST -H 'Content-Type: application/json' -H "X-Dataverse-key:$adminKey" $SERVER/licenses --upload-file data/licenses/licenseCC-BY-4.0.json +curl -X POST -H 'Content-Type: application/json' -H "X-Dataverse-key:$adminKey" "${DATAVERSE_URL}/api/licenses" --upload-file "$SCRIPT_PATH"/data/licenses/licenseCC-BY-4.0.json # OPTIONAL USERS AND DATAVERSES #./setup-optional.sh @@ -92,8 +95,8 @@ if [ $SECURESETUP = 1 ] then # Revoke the "burrito" super-key; # Block sensitive API endpoints; - curl -X DELETE $SERVER/admin/settings/BuiltinUsers.KEY - curl -X PUT -d 'admin,builtin-users' $SERVER/admin/settings/:BlockedApiEndpoints + curl -X DELETE "${DATAVERSE_URL}/api/admin/settings/BuiltinUsers.KEY" + curl -X PUT -d 'admin,builtin-users' "${DATAVERSE_URL}/api/admin/settings/:BlockedApiEndpoints" echo "Access to the /api/admin and /api/test is now disabled, except for connections from localhost." else echo "IMPORTANT!!!" diff --git a/scripts/api/setup-builtin-roles.sh b/scripts/api/setup-builtin-roles.sh index 0f3c1c150cd..f1f268debbc 100755 --- a/scripts/api/setup-builtin-roles.sh +++ b/scripts/api/setup-builtin-roles.sh @@ -1,34 +1,37 @@ -SERVER=http://localhost:8080/api +#!/bin/bash + +DATAVERSE_URL=${DATAVERSE_URL:-"http://localhost:8080"} +SCRIPT_PATH="$(dirname "$0")" # Setup the builtin roles echo "Setting up admin role" -curl -H "Content-type:application/json" -d @data/role-admin.json http://localhost:8080/api/admin/roles/ +curl -H "Content-type:application/json" -d @"$SCRIPT_PATH"/data/role-admin.json "${DATAVERSE_URL}/api/admin/roles/" echo echo "Setting up file downloader role" -curl -H "Content-type:application/json" -d @data/role-filedownloader.json http://localhost:8080/api/admin/roles/ +curl -H "Content-type:application/json" -d @"$SCRIPT_PATH"/data/role-filedownloader.json "${DATAVERSE_URL}/api/admin/roles/" echo echo "Setting up full contributor role" -curl -H "Content-type:application/json" -d @data/role-fullContributor.json http://localhost:8080/api/admin/roles/ +curl -H "Content-type:application/json" -d @"$SCRIPT_PATH"/data/role-fullContributor.json "${DATAVERSE_URL}/api/admin/roles/" echo echo "Setting up dv contributor role" -curl -H "Content-type:application/json" -d @data/role-dvContributor.json http://localhost:8080/api/admin/roles/ +curl -H "Content-type:application/json" -d @"$SCRIPT_PATH"/data/role-dvContributor.json "${DATAVERSE_URL}/api/admin/roles/" echo echo "Setting up ds contributor role" -curl -H "Content-type:application/json" -d @data/role-dsContributor.json http://localhost:8080/api/admin/roles/ +curl -H "Content-type:application/json" -d @"$SCRIPT_PATH"/data/role-dsContributor.json "${DATAVERSE_URL}/api/admin/roles/" echo echo "Setting up editor role" -curl -H "Content-type:application/json" -d @data/role-editor.json http://localhost:8080/api/admin/roles/ +curl -H "Content-type:application/json" -d @"$SCRIPT_PATH"/data/role-editor.json "${DATAVERSE_URL}/api/admin/roles/" echo echo "Setting up curator role" -curl -H "Content-type:application/json" -d @data/role-curator.json http://localhost:8080/api/admin/roles/ +curl -H "Content-type:application/json" -d @"$SCRIPT_PATH"/data/role-curator.json "${DATAVERSE_URL}/api/admin/roles/" echo echo "Setting up member role" -curl -H "Content-type:application/json" -d @data/role-member.json http://localhost:8080/api/admin/roles/ +curl -H "Content-type:application/json" -d @"$SCRIPT_PATH"/data/role-member.json "${DATAVERSE_URL}/api/admin/roles/" echo diff --git a/scripts/api/setup-datasetfields.sh b/scripts/api/setup-datasetfields.sh index 0d79176c099..51da677ceb8 100755 --- a/scripts/api/setup-datasetfields.sh +++ b/scripts/api/setup-datasetfields.sh @@ -1,10 +1,13 @@ -#!/bin/sh -curl http://localhost:8080/api/admin/datasetfield/loadNAControlledVocabularyValue -# TODO: The "@" is confusing. Consider switching to --upload-file citation.tsv -curl http://localhost:8080/api/admin/datasetfield/load -X POST --data-binary @data/metadatablocks/citation.tsv -H "Content-type: text/tab-separated-values" -curl http://localhost:8080/api/admin/datasetfield/load -X POST --data-binary @data/metadatablocks/geospatial.tsv -H "Content-type: text/tab-separated-values" -curl http://localhost:8080/api/admin/datasetfield/load -X POST --data-binary @data/metadatablocks/social_science.tsv -H "Content-type: text/tab-separated-values" -curl http://localhost:8080/api/admin/datasetfield/load -X POST --data-binary @data/metadatablocks/astrophysics.tsv -H "Content-type: text/tab-separated-values" -curl http://localhost:8080/api/admin/datasetfield/load -X POST --data-binary @data/metadatablocks/biomedical.tsv -H "Content-type: text/tab-separated-values" -curl http://localhost:8080/api/admin/datasetfield/load -X POST --data-binary @data/metadatablocks/journals.tsv -H "Content-type: text/tab-separated-values" +#!/bin/bash + +DATAVERSE_URL=${DATAVERSE_URL:-"http://localhost:8080"} +SCRIPT_PATH="$(dirname "$0")" +curl "${DATAVERSE_URL}/api/admin/datasetfield/loadNAControlledVocabularyValue" +# TODO: The "@" is confusing. Consider switching to --upload-file citation.tsv +curl "${DATAVERSE_URL}/api/admin/datasetfield/load" -X POST --data-binary @"$SCRIPT_PATH"/data/metadatablocks/citation.tsv -H "Content-type: text/tab-separated-values" +curl "${DATAVERSE_URL}/api/admin/datasetfield/load" -X POST --data-binary @"$SCRIPT_PATH"/data/metadatablocks/geospatial.tsv -H "Content-type: text/tab-separated-values" +curl "${DATAVERSE_URL}/api/admin/datasetfield/load" -X POST --data-binary @"$SCRIPT_PATH"/data/metadatablocks/social_science.tsv -H "Content-type: text/tab-separated-values" +curl "${DATAVERSE_URL}/api/admin/datasetfield/load" -X POST --data-binary @"$SCRIPT_PATH"/data/metadatablocks/astrophysics.tsv -H "Content-type: text/tab-separated-values" +curl "${DATAVERSE_URL}/api/admin/datasetfield/load" -X POST --data-binary @"$SCRIPT_PATH"/data/metadatablocks/biomedical.tsv -H "Content-type: text/tab-separated-values" +curl "${DATAVERSE_URL}/api/admin/datasetfield/load" -X POST --data-binary @"$SCRIPT_PATH"/data/metadatablocks/journals.tsv -H "Content-type: text/tab-separated-values" diff --git a/scripts/api/setup-identity-providers.sh b/scripts/api/setup-identity-providers.sh index 89ac59de32f..e877f71c6b0 100755 --- a/scripts/api/setup-identity-providers.sh +++ b/scripts/api/setup-identity-providers.sh @@ -1,8 +1,11 @@ -SERVER=http://localhost:8080/api +#!/bin/bash + +DATAVERSE_URL=${DATAVERSE_URL:-"http://localhost:8080"} +SCRIPT_PATH="$(dirname "$0")" # Setup the authentication providers echo "Setting up internal user provider" -curl -H "Content-type:application/json" -d @data/authentication-providers/builtin.json http://localhost:8080/api/admin/authenticationProviders/ +curl -H "Content-type:application/json" -d @"$SCRIPT_PATH"/data/authentication-providers/builtin.json "${DATAVERSE_URL}/api/admin/authenticationProviders/" #echo "Setting up Echo providers" #curl -H "Content-type:application/json" -d @data/authentication-providers/echo.json http://localhost:8080/api/admin/authenticationProviders/ diff --git a/scripts/dev/dev-rebuild.sh b/scripts/dev/dev-rebuild.sh index 71857b14068..9eae195b135 100755 --- a/scripts/dev/dev-rebuild.sh +++ b/scripts/dev/dev-rebuild.sh @@ -1,8 +1,9 @@ #!/bin/sh -PAYARA_DIR=/usr/local/payara5 +PAYARA_DIR=/usr/local/payara6 ASADMIN=$PAYARA_DIR/glassfish/bin/asadmin DB_NAME=dvndb DB_USER=dvnapp +export PGPASSWORD=secret echo "Checking if there is a war file to undeploy..." LIST_APP=$($ASADMIN list-applications -t) @@ -23,7 +24,7 @@ echo "Deleting ALL DATA FILES uploaded to Dataverse..." rm -rf $PAYARA_DIR/glassfish/domains/domain1/files echo "Terminating database sessions so we can drop the database..." -psql -U postgres -c " +psql -h localhost -U postgres -c " SELECT pg_terminate_backend(pg_stat_activity.pid) FROM pg_stat_activity WHERE pg_stat_activity.datname = '$DB_NAME' @@ -31,14 +32,14 @@ WHERE pg_stat_activity.datname = '$DB_NAME' " template1 echo "Dropping the database..." -psql -U $DB_USER -c "DROP DATABASE \"$DB_NAME\"" template1 +psql -h localhost -U $DB_USER -c "DROP DATABASE \"$DB_NAME\"" template1 echo $? echo "Clearing out data from Solr..." -curl http://localhost:8983/solr/collection1/update/json?commit=true -H "Content-type: application/json" -X POST -d "{\"delete\": { \"query\":\"*:*\"}}" +curl "http://localhost:8983/solr/collection1/update/json?commit=true" -H "Content-type: application/json" -X POST -d "{\"delete\": { \"query\":\"*:*\"}}" echo "Creating a new database..." -psql -U $DB_USER -c "CREATE DATABASE \"$DB_NAME\" WITH OWNER = \"$DB_USER\"" template1 +psql -h localhost -U $DB_USER -c "CREATE DATABASE \"$DB_NAME\" WITH OWNER = \"$DB_USER\"" template1 echo $? echo "Starting app server..." @@ -53,7 +54,7 @@ cd scripts/api cd ../.. echo "Creating SQL sequence..." -psql -U $DB_USER $DB_NAME -f doc/sphinx-guides/source/_static/util/createsequence.sql +psql -h localhost -U $DB_USER $DB_NAME -f doc/sphinx-guides/source/_static/util/createsequence.sql echo "Setting DOI provider to \"FAKE\"..." curl http://localhost:8080/api/admin/settings/:DoiProvider -X PUT -d FAKE diff --git a/scripts/dev/docker-final-setup.sh b/scripts/dev/docker-final-setup.sh new file mode 100755 index 00000000000..d2453619ec2 --- /dev/null +++ b/scripts/dev/docker-final-setup.sh @@ -0,0 +1,26 @@ +#!/bin/sh + +set -euo pipefail + +echo "Running setup-all.sh (INSECURE MODE)..." +cd scripts/api || exit +./setup-all.sh --insecure -p=admin1 | tee /tmp/setup-all.sh.out +cd ../.. + +echo "Setting system mail address..." +curl -X PUT -d "dataverse@localhost" "http://localhost:8080/api/admin/settings/:SystemEmail" + +echo "Setting DOI provider to \"FAKE\"..." +curl "http://localhost:8080/api/admin/settings/:DoiProvider" -X PUT -d FAKE + +API_TOKEN=$(grep apiToken "/tmp/setup-all.sh.out" | jq ".data.apiToken" | tr -d \") +export API_TOKEN + +echo "Publishing root dataverse..." +curl -H "X-Dataverse-key:$API_TOKEN" -X POST "http://localhost:8080/api/dataverses/:root/actions/:publish" + +echo "Allowing users to create dataverses and datasets in root..." +curl -H "X-Dataverse-key:$API_TOKEN" -X POST -H "Content-type:application/json" -d "{\"assignee\": \":authenticated-users\",\"role\": \"fullContributor\"}" "http://localhost:8080/api/dataverses/:root/assignments" + +echo "Checking Dataverse version..." +curl "http://localhost:8080/api/info/version" \ No newline at end of file diff --git a/scripts/installer/Makefile b/scripts/installer/Makefile index d40d4d792ea..399bc65168a 100644 --- a/scripts/installer/Makefile +++ b/scripts/installer/Makefile @@ -7,7 +7,6 @@ JHOVE_SCHEMA=${INSTALLER_ZIP_DIR}/jhoveConfig.xsd SOLR_SCHEMA=${INSTALLER_ZIP_DIR}/schema.xml ${INSTALLER_ZIP_DIR}/update-fields.sh SOLR_CONFIG=${INSTALLER_ZIP_DIR}/solrconfig.xml PYTHON_FILES=${INSTALLER_ZIP_DIR}/README_python.txt ${INSTALLER_ZIP_DIR}/installConfig.py ${INSTALLER_ZIP_DIR}/installUtils.py ${INSTALLER_ZIP_DIR}/install.py ${INSTALLER_ZIP_DIR}/installAppServer.py ${INSTALLER_ZIP_DIR}/requirements.txt ${INSTALLER_ZIP_DIR}/default.config ${INSTALLER_ZIP_DIR}/interactive.config -INSTALL_SCRIPT=${INSTALLER_ZIP_DIR}/install installer: dvinstall.zip @@ -56,13 +55,13 @@ ${JHOVE_SCHEMA}: ../../conf/jhove/jhoveConfig.xsd ${INSTALLER_ZIP_DIR} @echo copying jhove schema file /bin/cp ../../conf/jhove/jhoveConfig.xsd ${INSTALLER_ZIP_DIR} -${SOLR_SCHEMA}: ../../conf/solr/8.11.1/schema.xml ../../conf/solr/8.11.1/update-fields.sh ${INSTALLER_ZIP_DIR} +${SOLR_SCHEMA}: ../../conf/solr/9.3.0/schema.xml ../../conf/solr/9.3.0/update-fields.sh ${INSTALLER_ZIP_DIR} @echo copying Solr schema file - /bin/cp ../../conf/solr/8.11.1/schema.xml ../../conf/solr/8.11.1/update-fields.sh ${INSTALLER_ZIP_DIR} + /bin/cp ../../conf/solr/9.3.0/schema.xml ../../conf/solr/9.3.0/update-fields.sh ${INSTALLER_ZIP_DIR} -${SOLR_CONFIG}: ../../conf/solr/8.11.1/solrconfig.xml ${INSTALLER_ZIP_DIR} +${SOLR_CONFIG}: ../../conf/solr/9.3.0/solrconfig.xml ${INSTALLER_ZIP_DIR} @echo copying Solr config file - /bin/cp ../../conf/solr/8.11.1/solrconfig.xml ${INSTALLER_ZIP_DIR} + /bin/cp ../../conf/solr/9.3.0/solrconfig.xml ${INSTALLER_ZIP_DIR} ${PYTHON_FILES}: README_python.txt install.py installConfig.py installAppServer.py installUtils.py requirements.txt default.config interactive.config ${INSTALLER_ZIP_DIR} @echo copying Python installer files diff --git a/scripts/installer/README.txt b/scripts/installer/README.txt index 350a17fc00c..c3ed8211082 100644 --- a/scripts/installer/README.txt +++ b/scripts/installer/README.txt @@ -1,42 +1 @@ -The installer script (install) can be run either by a developer (inside the source tree), or by an end-user installing the Dataverse. The latter will obtain the script as part of the distribution bundle; and they will be running it inside the unzipped bundle directory. - -In the former (developer) case, the installer will be looking for the files it needs in the other directories in the source tree. -For example, the war file (once built) can be found in ../../target/. The name of the war file will be dataverse-{VERSION}.war, where -{VERSION} is the version number of the Dataverse, obtained from the pom file (../../pom.xml). For example, as of writing this README.txt (July 2015) the war file is ../../target/dataverse-4.1.war/ - -When building a distribution archive, the Makefile will pile all the files that the installer needs in one directory (./dvinstall here) and then zip it up. We upload the resulting zip bundle on github as the actual software release. This way the end user only gets the files they actually need to install the Dataverse app. So they can do so without pulling the entire source tree. - - -The installer script itself (the perl script ./install) knows to look for all these files in 2 places (for example, it will look for the war file in ../../target/; if it's not there, it'll assume this is a distribution bundle and look for it as ./dataverse.war) - -Here's the list of the files that the installer needs: - -the war file: -target/dataverse-{VERSION}.war - -and also: - -from scripts/installer (this directory): - -install -glassfish-setup.sh - -from scripts/api: - -setup-all.sh -setup-builtin-roles.sh -setup-datasetfields.sh -setup-dvs.sh -setup-identity-providers.sh -setup-users.sh -data (the entire directory with all its contents) - -from conf/jhove: - -jhove.conf - -SOLR schema and config files, from conf/solr/8.11.1: - -schema.xml -schema_dv_mdb_fields.xml -solrconfig.xml +See README_python.txt diff --git a/scripts/installer/as-setup.sh b/scripts/installer/as-setup.sh index 853db77f471..fc5b378cff5 100755 --- a/scripts/installer/as-setup.sh +++ b/scripts/installer/as-setup.sh @@ -56,15 +56,15 @@ function preliminary_setup() # avoid OutOfMemoryError: PermGen per http://eugenedvorkin.com/java-lang-outofmemoryerror-permgen-space-error-during-deployment-to-glassfish/ #./asadmin $ASADMIN_OPTS list-jvm-options - # Note that these JVM options are different for Payara5 and Glassfish4: + # Note that these JVM options are different for Payara and Glassfish4: # old Glassfish4 options: (commented out) #./asadmin $ASADMIN_OPTS delete-jvm-options "-XX\:MaxPermSize=192m" #./asadmin $ASADMIN_OPTS create-jvm-options "-XX\:MaxPermSize=512m" #./asadmin $ASADMIN_OPTS create-jvm-options "-XX\:PermSize=256m" - # payara5 ships with the "-server" option already in domain.xml, so no need: + # Payara ships with the "-server" option already in domain.xml, so no need: #./asadmin $ASADMIN_OPTS delete-jvm-options -client - # new Payara5 options: (thanks to donsizemore@unc.edu) + # new Payara options: (thanks to donsizemore@unc.edu) ./asadmin $ASADMIN_OPTS create-jvm-options "-XX\:MaxMetaspaceSize=512m" ./asadmin $ASADMIN_OPTS create-jvm-options "-XX\:MetaspaceSize=256m" ./asadmin $ASADMIN_OPTS create-jvm-options "-Dfish.payara.classloading.delegate=false" @@ -106,16 +106,19 @@ function preliminary_setup() # (we can no longer offer EZID with their shared test account) # jvm-options use colons as separators, escape as literal DOI_BASEURL_ESC=`echo $DOI_BASEURL | sed -e 's/:/\\\:/'` - ./asadmin $ASADMIN_OPTS create-jvm-options "\-Ddoi.username=${DOI_USERNAME}" - ./asadmin $ASADMIN_OPTS create-jvm-options '\-Ddoi.password=${ALIAS=doi_password_alias}' - ./asadmin $ASADMIN_OPTS create-jvm-options "\-Ddoi.baseurlstring=$DOI_BASEURL_ESC" + ./asadmin $ASADMIN_OPTS create-jvm-options "\-Ddataverse.pid.datacite.username=${DOI_USERNAME}" + ./asadmin $ASADMIN_OPTS create-jvm-options '\-Ddataverse.pid.datacite.password=${ALIAS=doi_password_alias}' + ./asadmin $ASADMIN_OPTS create-jvm-options "\-Ddataverse.pid.datacite.mds-api-url=$DOI_BASEURL_ESC" # jvm-options use colons as separators, escape as literal DOI_DATACITERESTAPIURL_ESC=`echo $DOI_DATACITERESTAPIURL | sed -e 's/:/\\\:/'` - ./asadmin $ASADMIN_OPTS create-jvm-options "\-Ddoi.dataciterestapiurlstring=$DOI_DATACITERESTAPIURL_ESC" + ./asadmin $ASADMIN_OPTS create-jvm-options "\-Ddataverse.pid.datacite.rest-api-url=$DOI_DATACITERESTAPIURL_ESC" ./asadmin $ASADMIN_OPTS create-jvm-options "-Ddataverse.timerServer=true" + # Workaround for FISH-7722: Failed to deploy war with @Stateless https://github.com/payara/Payara/issues/6337 + ./asadmin $ASADMIN_OPTS create-jvm-options --add-opens=java.base/java.io=ALL-UNNAMED + # enable comet support ./asadmin $ASADMIN_OPTS set server-config.network-config.protocols.protocol.http-listener-1.http.comet-support-enabled="true" @@ -155,18 +158,18 @@ function final_setup(){ if [ "$DOCKER_BUILD" = "true" ] then - FILES_DIR="/usr/local/payara5/glassfish/domains/domain1/files" + FILES_DIR="/usr/local/payara6/glassfish/domains/domain1/files" RSERVE_HOST="localhost" RSERVE_PORT="6311" RSERVE_USER="rserve" RSERVE_PASS="rserve" HOST_ADDRESS="localhost\:8080" - pushd /usr/local/payara5/glassfish/bin/ + pushd /usr/local/payara6/glassfish/bin/ ./asadmin start-domain domain1 preliminary_setup - chmod -R 777 /usr/local/payara5/ - rm -rf /usr/local/payara5/glassfish/domains/domain1/generated - rm -rf /usr/local/payara5/glassfish/domains/domain1/applications + chmod -R 777 /usr/local/payara6/ + rm -rf /usr/local/payara6/glassfish/domains/domain1/generated + rm -rf /usr/local/payara6/glassfish/domains/domain1/applications popd exit 0 fi @@ -276,7 +279,7 @@ if [ ! -d "$DOMAIN_DIR" ] exit 2 fi -echo "Setting up your app. server (Payara5) to support Dataverse" +echo "Setting up your app. server (Payara) to support Dataverse" echo "Payara directory: "$GLASSFISH_ROOT echo "Domain directory: "$DOMAIN_DIR diff --git a/scripts/installer/default.config b/scripts/installer/default.config index 312dd2cb2d8..8647cd02416 100644 --- a/scripts/installer/default.config +++ b/scripts/installer/default.config @@ -1,7 +1,7 @@ [glassfish] HOST_DNS_ADDRESS = localhost GLASSFISH_USER = dataverse -GLASSFISH_DIRECTORY = /usr/local/payara5 +GLASSFISH_DIRECTORY = /usr/local/payara6 GLASSFISH_ADMIN_USER = admin GLASSFISH_ADMIN_PASSWORD = secret GLASSFISH_HEAP = 2048 diff --git a/scripts/installer/install b/scripts/installer/install deleted file mode 100755 index 2208f014606..00000000000 --- a/scripts/installer/install +++ /dev/null @@ -1,1538 +0,0 @@ -#!/usr/bin/perl - -use strict; -use warnings; -use Getopt::Long; -use Socket; -use File::Copy; - -# command line options: - -my $verbose; -my $postgresonly; -my $hostname; -my $gfuser; -my $gfdir; -my $mailserver; -my $noninteractive; -my $skipdatabasesetup; -my $force; -my $nogfpasswd; -my $admin_email; - -my ($rez) = GetOptions( - #"length=i" => \$length, # numeric - #"file=s" => \$data, # string - "verbose" => \$verbose, - "pg_only" => \$postgresonly, - "skip_db_setup" => \$skipdatabasesetup, - "hostname=s" => \$hostname, - "gfuser=s" => \$gfuser, - "gfdir=s" => \$gfdir, - "mailserver=s" => \$mailserver, - "y|yes" => \$noninteractive, - "f|force" => \$force, - "nogfpasswd" => \$nogfpasswd, - "admin_email=s" => \$admin_email, -); - -# openshift/docker-specific - name of the "pod" executing the installer: -my $pod_name = ""; -if (exists($ENV{'MY_POD_NAME'})) -{ - $pod_name = $ENV{'MY_POD_NAME'}; -} - -my $jq_exec_path = ""; -my $psql_exec_path = ""; -my $cwd; -my $WARFILE_LOCATION = "dataverse.war"; - - -my @CONFIG_VARIABLES; - -if ($postgresonly) -{ - @CONFIG_VARIABLES = - ( 'POSTGRES_SERVER', 'POSTGRES_PORT', 'POSTGRES_DATABASE', 'POSTGRES_USER', 'POSTGRES_PASSWORD', 'POSTGRES_ADMIN_PASSWORD' ); - -} -else -{ - - @CONFIG_VARIABLES = ( - 'HOST_DNS_ADDRESS', - 'GLASSFISH_USER', - 'GLASSFISH_DIRECTORY', - 'ADMIN_EMAIL', - 'MAIL_SERVER', - - 'POSTGRES_SERVER', - 'POSTGRES_PORT', - 'POSTGRES_ADMIN_PASSWORD', - 'POSTGRES_DATABASE', - 'POSTGRES_USER', - 'POSTGRES_PASSWORD', - - 'SOLR_LOCATION', - - 'RSERVE_HOST', - 'RSERVE_PORT', - 'RSERVE_USER', - 'RSERVE_PASSWORD', - - 'DOI_USERNAME', - 'DOI_PASSWORD', - 'DOI_BASEURL', - 'DOI_DATACITERESTAPIURL' - - ); -} - -my %CONFIG_DEFAULTS; - -&read_config_defaults("default.config"); - -my %CONFIG_PROMPTS; -my %CONFIG_COMMENTS; - -&read_interactive_config_values("interactive.config"); - -my $API_URL = "http://localhost:8080/api"; - -# jodbc.postgresql.org recommends 4.2 for Java 8. -# updated drivers may be obtained from -# https://jdbc.postgresql.org/download.html -my $postgres_jdbc = "postgresql-42.2.12.jar"; - -# 0. A few preliminary checks: - -# 0a. OS: - -my $uname_out = `uname -a`; - -my @uname_tokens = split( " ", $uname_out ); - -my $WORKING_OS; -if ( $uname_tokens[0] eq "Darwin" ) { - print "\nThis appears to be a MacOS X system; good.\n"; - # TODO: check the OS version - - $WORKING_OS = "MacOSX"; -} -elsif ( $uname_tokens[0] eq "Linux" ) { - if ( -f "/etc/redhat-release" ) { - print "\nThis appears to be a RedHat system; good.\n"; - $WORKING_OS = "RedHat"; - # TODO: check the distro version - } - else { - print "\nThis appears to be a non-RedHat Linux system;\n"; - print "this installation *may* succeed; but we're not making any promises!\n"; - $WORKING_OS = "Linux"; - } -} else { - print "\nWARNING: This appears to be neither a Linux or MacOS X system!\n"; - print "This installer script will most likely fail. Please refer to the\n"; - print "DVN Installers Guide for more information.\n\n"; - - $WORKING_OS = "Unknown"; - - unless ($noninteractive) { - exit 0; - } - - print "(Normally we would stop right there; but since the \"--yes\" option was specified, we'll attempt to continue)\n\n"; - -} - - -# 0b. host name: - -if ($hostname) { - $CONFIG_DEFAULTS{'HOST_DNS_ADDRESS'} = $hostname; -} else { - my $hostname_from_cmdline = `hostname`; - chop $hostname_from_cmdline; - - $CONFIG_DEFAULTS{'HOST_DNS_ADDRESS'} = $hostname_from_cmdline; -} - -# 0c. check if there is the default.config file with the pre-set configuration values: - -# read default configuration values from tab separated file "default.config" if it exists -# moved after the $hostname_from_cmdline section to avoid excessively complicating the logic -# of command line argument, automatic selection, or config file. -# -# NOTE: if the file contain any Postgres configuration (for example: "POSTGRES_USER dvnApp") -# but an environmental variable with the same name exists - THE ENV. VAR WILL WIN! (don't ask) -# (actually this is to accommodate the openshift docker deployment scenario) - -sub trim { my $s = shift; $s =~ s/^\s+|\s+$//g; return $s }; - -#my $config_default_file = "default.config"; -# -#if ( -e $config_default_file ) -#{ -# print("loading default configuration values from $config_default_file\n"); -# open( my $inp_cfg, $config_default_file ); -# while( my $ln = <$inp_cfg> ) -# { -# my @xs = split('\t', $ln ); -# if ( 2 == @xs ) -# { -# my $k = $xs[0]; -# my $v = trim($xs[1]); -## if (defined $ENV{$k} && ($k eq "POSTGRES_USER" || $k eq "POSTGRES_PASSWORD")) { -## $v = $ENV{$k}; -## } -## if (defined $ENV{'POSTGRESQL_ADMIN_PASSWORD'} && $k eq "POSTGRES_ADMIN_PASSWORD") { -## $v = $ENV{'POSTGRESQL_ADMIN_PASSWORD'}; -## } -# $CONFIG_DEFAULTS{$k}=$v; -# } -# } -#} -#else -#{ -# print("using hard-coded default configuration values (no $config_default_file available)\n"); -#} - -# 0d. current OS user. (the first one we find wins) - -my $current_user = $ENV{LOGNAME} || $ENV{USER} || getpwuid($<); - -# if the username was specified on the command-line, it takes precendence: -if ($gfuser) { - print "Using CLI-specified user $gfuser.\n"; - $CONFIG_DEFAULTS{'GLASSFISH_USER'} = $gfuser; -} - - -if (!$CONFIG_DEFAULTS{'GLASSFISH_USER'} || !$noninteractive) { - $CONFIG_DEFAULTS{'GLASSFISH_USER'} = $current_user; - print "using $current_user.\n"; -} - - -# prefer that we not install as root. -unless ( $< != 0 ) { - print "####################################################################\n"; - print " It is recommended that this script not be run as root.\n"; - print " Consider creating the service account \"dataverse\", giving it ownership\n"; - print " on the glassfish/domains/domain1/ and glassfish/lib/ directories,\n"; - print " along with the JVM-specified files.dir location, and designate\n"; - print " that account to launch and run the Application Server (Payara),\n"; - print " AND use that user account to run this installer.\n"; - print "####################################################################\n"; - - unless ($noninteractive) - { - print "\nPress any key to continue, or ctrl-C to exit the installer...\n\n"; - system "stty cbreak /dev/tty 2>&1"; - unless ($noninteractive) { - my $key = getc(STDIN); - } - system "stty -cbreak /dev/tty 2>&1"; - print "\n"; - } -} - -# ensure $gfuser exists or bail -my $gfidcmd="id $CONFIG_DEFAULTS{'GLASSFISH_USER'} > /dev/null"; -my $gfreturncode=system($gfidcmd); -if ($gfreturncode != 0) { - die "Couldn't find user $gfuser. Please ensure the account exists and is readable by the user running this installer.\n"; -} - -# 0e. the following 2 options can also be specified on the command line, and -# also take precedence over the default values that are hard-coded and/or -# provided in the default.config file: - -if ($mailserver) { - $CONFIG_DEFAULTS{'MAIL_SERVER'} = $mailserver; -} - -if ($gfdir) { - $CONFIG_DEFAULTS{'GLASSFISH_DIRECTORY'} = $gfdir; -} - -# 1. CHECK FOR SOME MANDATORY COMPONENTS (WAR FILE, ETC.) -# since we can't do anything without these things in place, better check for -# them before we go into the interactive config mode. -# (skip if this is a database-only setup) - -unless ($postgresonly) -{ -# 1a. war file: - print "\nChecking if the application .war file is available... "; - -# if this installer section is running out of the installer zip bundle directory, -# the war file will be sitting right here, named "dataverse.war": - - $WARFILE_LOCATION = "dataverse.war"; - -# but if it's not here, this is probably a personal development -# setup, so their build should be up in their source tree: - - unless ( -f $WARFILE_LOCATION ) { - my $DATAVERSE_VERSION = ""; - my $DATAVERSE_POM_FILE = "../../modules/dataverse-parent/pom.xml"; - if ( -f $DATAVERSE_POM_FILE ) - { - open DPF, $DATAVERSE_POM_FILE; - my $pom_line; - while ($pom_line=) - { - chop $pom_line; - if ($pom_line =~/^[ \t]*([0-9\.]+)<\/revision>/) - { - $DATAVERSE_VERSION=$1; - last; - } - } - close DPF; - - if ($DATAVERSE_VERSION ne "") { - $WARFILE_LOCATION = "../../target/dataverse-" . $DATAVERSE_VERSION . ".war"; - } - } - } - -# But, if the war file cannot be found in either of the 2 -# places - we'll just have to give up: - - unless ( -f $WARFILE_LOCATION ) { - print "\nWARNING: Can't find the project .war file!\n"; - print "\tAre you running the installer in the right directory?\n"; - print "\tHave you built the war file?\n"; - print "\t(if not, build the project and run the installer again)\n"; - - exit 0; - } - print " Yes, it is!\n"; - - -# 1b. check and remember the working dir: - chomp( $cwd = `pwd` ); - -# 1d. jq executable: - - my $sys_path = $ENV{'PATH'}; - my @sys_path_dirs = split( ":", $sys_path ); - - if ( $pod_name ne "start-glassfish") # Why is that again? - { - for my $sys_path_dir (@sys_path_dirs) { - if ( -x $sys_path_dir . "/jq" ) { - $jq_exec_path = $sys_path_dir; - last; - } - } - if ( $jq_exec_path eq "" ) { - print STDERR "\nERROR: I haven't been able to find the jq command in your PATH! Please install it from http://stedolan.github.io/jq/\n"; - exit 1; - - } - } - -} - - -# 2. INTERACTIVE CONFIG SECTION: - -print "\nWelcome to the Dataverse installer.\n"; -unless ($postgresonly) { - print "You will be guided through the process of setting up a NEW\n"; - print "instance of the dataverse application\n"; -} -else { - print "You will be guided through the process of configuring your\n"; - print "PostgreSQL database for use by the Dataverse application.\n"; -} - -my $yesno; - -unless ($noninteractive) -{ - print "\nATTENTION: As of Dataverse v.4.19, we are offering a new, experimental \n"; - print "version of the installer script, implemented in Python. It will eventually \n"; - print "replace this script (implemented in Perl). Consult the file README_python.txt \n"; - print "for more information on how to run it. \n"; - - print "\nWould you like to exit and use the new installer instead? [y/n] "; - $yesno = <>; - chop $yesno; - - while ( $yesno ne "y" && $yesno ne "n" ) { - print "Please enter 'y' or 'n'!\n"; - print "(or ctrl-C to exit the installer)\n"; - $yesno = <>; - chop $yesno; - } - - exit 0 if $yesno eq "y"; -} - -ENTERCONFIG: - -print "\n"; -print "Please enter the following configuration values:\n"; -print "(hit [RETURN] to accept the default value)\n"; -print "\n"; - -for my $ENTRY (@CONFIG_VARIABLES) -{ - my $config_prompt = $CONFIG_PROMPTS{$ENTRY}; - my $config_comment = $CONFIG_COMMENTS{$ENTRY}; - - if ( $config_comment eq '' ) - { - print $config_prompt . ": "; - print "[" . $CONFIG_DEFAULTS{$ENTRY} . "] "; - } - else - { - print $config_prompt . $config_comment; - print "[" . $CONFIG_DEFAULTS{$ENTRY} . "] "; - } - - my $user_entry = ""; - - # ($noninteractive means the installer is being run in the non-interactive mode; it will use - # the default values specified so far, without prompting the user for alternative values)\ - unless ($noninteractive) - { - $user_entry = <>; - chop $user_entry; - - if ( $user_entry ne "" ) { - $CONFIG_DEFAULTS{$ENTRY} = $user_entry; - } - - # for some values, we'll try to do some validation right here, in real time: - - if ($ENTRY eq 'ADMIN_EMAIL') - { - $user_entry = $CONFIG_DEFAULTS{$ENTRY}; - my $attempts = 0; - while ($user_entry !~/[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,4}/) - { - $attempts++; - print "Please enter a valid email address: "; - $user_entry = <>; - chop $user_entry; - } - - if ($attempts) - { - print "OK, looks legit.\n"; - $CONFIG_DEFAULTS{$ENTRY} = $user_entry; - } - } - elsif ($ENTRY eq 'GLASSFISH_DIRECTORY') - { - # CHECK IF GLASSFISH DIR LOOKS OK: - print "\nChecking your Glassfish installation..."; - - my $g_dir = $CONFIG_DEFAULTS{'GLASSFISH_DIRECTORY'}; - - - unless ( -d $g_dir . "/glassfish/domains/domain1" ) - { - while ( !( -d $g_dir . "/glassfish/domains/domain1" ) ) - { - print "\nInvalid Glassfish directory " . $g_dir . "!\n"; - print "Enter the root directory of your Glassfish installation:\n"; - print "(Or ctrl-C to exit the installer): "; - - $g_dir = <>; - chop $g_dir; - } - } - - # verify that we can write in the Glassfish directory - # (now that we are no longer requiring to run the installer as root) - - my @g_testdirs = ( "/glassfish/domains/domain1", - "/glassfish/domains/domain1/config", - "/glassfish/lib"); - - for my $test_dir (@g_testdirs) - { - if (!(-w ($g_dir . $test_dir))) - { - print "\n"; - die("ERROR: " . $g_dir . $test_dir . " not writable to the user running the installer! Check permissions on Payara5 hierarchy.\n"); - } - } - - - - print "$g_dir looks OK!\n"; - $CONFIG_DEFAULTS{'GLASSFISH_DIRECTORY'} = $g_dir; - - } - elsif ($ENTRY eq 'MAIL_SERVER') - { - my $smtp_server = ""; - while (! &validate_smtp_server() ) - { - print "Enter a valid SMTP (mail) server:\n"; - print "(Or ctrl-C to exit the installer): "; - - $smtp_server = <>; - chop $smtp_server; - - $CONFIG_DEFAULTS{'MAIL_SERVER'} = $smtp_server unless $smtp_server eq ''; - } - - print "\nOK, we were able to establish connection to the SMTP server you have specified.\n"; - print "Please note that you *may* need to configure some extra settings before your \n"; - print "Dataverse can send email. Please consult the \"Mail Host Configuration & Authentication\"\n"; - print "section of the installation guide (http://guides.dataverse.org/en/latest/installation/installation-main.html)\n"; - print "for more information.\n"; - } - } - - print "\n"; -} - -# 2b. CONFIRM VALUES ENTERED: - -print "\nOK, please confirm what you've entered:\n\n"; - -for my $ENTRY (@CONFIG_VARIABLES) { - print $CONFIG_PROMPTS{$ENTRY} . ": " . $CONFIG_DEFAULTS{$ENTRY} . "\n"; -} - -if ($noninteractive) { - $yesno = "y"; -} -else { - print "\nIs this correct? [y/n] "; - $yesno = <>; - chop $yesno; -} - -while ( $yesno ne "y" && $yesno ne "n" ) { - print "Please enter 'y' or 'n'!\n"; - print "(or ctrl-C to exit the installer)\n"; - $yesno = <>; - chop $yesno; -} - -if ( $yesno eq "n" ) { - goto ENTERCONFIG; -} - -# 3. SET UP POSTGRES USER AND DATABASE - -unless($pod_name eq "start-glassfish" || $pod_name eq "dataverse-glassfish-0" || $skipdatabasesetup) { - &setup_postgres(); -# (there's no return code - if anything goes wrong, the method will exit the script, with some diagnostic messages for the user) - print "\nOK, done.\n"; - - if ($postgresonly) - { - exit 0; - } -} - -# 5. CONFIGURE PAYARA - -my $glassfish_dir = $CONFIG_DEFAULTS{'GLASSFISH_DIRECTORY'}; - -my $done = &setup_appserver(); - -# Check if the App is running: - -unless (( - my $exit_code = - system( $glassfish_dir . "/bin/asadmin list-applications | grep -q '^dataverse'" ) - ) == 0 ) -{ - # If the "asadmin list-applications" has failed, it may only mean that an earlier - # "asadmin login" had failed, and asadmin is now failing to run without the user - # supplying the username and password. (And the fact that we are trying to pile the - # output to grep prevents it from providing the prompts). - # So before we give up, we'll try an alternative: - - unless (( - my $exit_code_2 = - system( "curl http://localhost:8080/robots.txt | grep -q '^User-agent'" ) - ) == 0 ) - { - print STDERR "It appears that the Dataverse application is not running...\n"; - print STDERR "Even though the \"asadmin deploy\" command had succeeded earlier.\n\n"; - print STDERR "Aborting - sorry...\n\n"; - - exit 1; - } -} - - -print "\nOK, the Dataverse application appears to be running...\n\n"; - -# Run the additional setup scripts, that populate the metadata block field values, create users -# and dataverses, etc. - -unless ( -d "data" && -f "setup-datasetfields.sh" && -f "setup-users.sh" && -f "setup-dvs.sh" && -f "setup-all.sh" ) { - chdir("../api"); -} - -unless ( -d "data" && -f "setup-datasetfields.sh" && -f "setup-users.sh" && -f "setup-dvs.sh" && -f "setup-builtin-roles.sh" && -f "setup-all.sh" ) { - print "\nERROR: Can't find the metadata and user/dataverse setup scripts!\n"; - print "\tAre you running the installer in the right directory?\n"; - exit 1; -} - -# if there's an admin_email set from arguments, replace the value in `dv-root.json` (called by `setup-all.sh`) -if ($admin_email) -{ - print "setting contact email for root dataverse to: $admin_email\n"; - set_root_contact_email( $admin_email ); -} -else -{ - print "using default contact email for root dataverse\n"; -} - -for my $script ( "setup-all.sh" ) { - # (there's only 1 setup script to run now - it runs all the other required scripts) - print "Executing post-deployment setup script " . $script . "... "; - - my $my_hostname = $CONFIG_DEFAULTS{'HOST_DNS_ADDRESS'}; - - # We used to filter the supplied scripts, replacing "localhost" and the port, in - # case they are running Dataverse on a different port... Now we are simply requiring - # that the port 8080 is still configured in domain.xml when they are running the - # installer: - my $run_script; - #if ( $my_hostname ne "localhost" ) { - # system( "sed 's/localhost:8080/$my_hostname/g' < " . $script . " > tmpscript.sh; chmod +x tmpscript.sh" ); - # $run_script = "tmpscript.sh"; - #} - #else { - $run_script = $script; - #} - - unless ( my $exit_code = system( "./" . $run_script . " > $run_script.$$.log 2>&1") == 0 ) - { - print "\nERROR executing script " . $script . "!\n"; - exit 1; - } - print "done!\n"; -} - -# SOME ADDITIONAL SETTINGS THAT ARE NOT TAKEN CARE OF BY THE setup-all SCRIPT -# NEED TO BE CONFIGURED HERE: - -print "Making additional configuration changes...\n\n"; - - -# a. Configure the Admin email in the Dataverse settings: - -print "Executing " . "curl -X PUT -d " . $CONFIG_DEFAULTS{'ADMIN_EMAIL'} . " " . $API_URL . "/admin/settings/:SystemEmail" . "\n"; - -my $exit_code = system("curl -X PUT -d " . $CONFIG_DEFAULTS{'ADMIN_EMAIL'} . " " . $API_URL . "/admin/settings/:SystemEmail"); -if ( $exit_code ) -{ - print "WARNING: failed to configure the admin email in the Dataverse settings!\n\n"; -} -else -{ - print "OK.\n\n"; -} - -# b. If this installation is going to be using a remote SOLR search engine service, configure its location in the settings: - -if ($CONFIG_DEFAULTS{'SOLR_LOCATION'} ne 'LOCAL') -{ - print "Executing " . "curl -X PUT -d " . $CONFIG_DEFAULTS{'SOLR_LOCATION'} . " " . $API_URL . "/admin/settings/:SolrHostColonPort" . "\n"; - my $exit_code = system("curl -X PUT -d " . $CONFIG_DEFAULTS{'SOLR_LOCATION'} . " " . $API_URL . "/admin/settings/:SolrHostColonPort"); - if ( $exit_code ) - { - print "WARNING: failed to configure the location of the remote SOLR service!\n\n"; - } - else - { - print "OK.\n\n"; - } -} - - - -chdir($cwd); - -print "\n\nYou should now have a running Dataverse instance at\n"; -print " http://" . $CONFIG_DEFAULTS{'HOST_DNS_ADDRESS'} . ":8080\n\n\n"; - -if ($WARFILE_LOCATION =~/([0-9]\.[0-9]\.[0-9])\.war$/) -{ - my $version = $1; - print "If this is a personal development installation, we recommend that you undeploy the currently-running copy \n"; - print "of the application, with the following asadmin command:\n\n"; - print "\t" . $CONFIG_DEFAULTS{'GLASSFISH_DIRECTORY'} . '/bin/asadmin undeploy dataverse-' . $version . "\n\n"; - print "before attempting to deploy from your development environment in NetBeans.\n\n"; -} - - -print "\nYour Dataverse has been configured to use DataCite, to register DOI global identifiers in the \n"; -print "test name space \"10.5072\" with the \"shoulder\" \"FK2\"\n"; -print "However, you have to contact DataCite (support\@datacite.org) and request a test account, before you \n"; -print "can publish datasets. Once you receive the account name and password, add them to your domain.xml,\n"; -print "as the following two JVM options:\n"; -print "\t-Ddoi.username=...\n"; -print "\t-Ddoi.password=...\n"; -print "and restart payara5\n"; -print "If this is a production Dataverse and you are planning to register datasets as \n"; -print "\"real\", non-test DOIs or Handles, consult the \"Persistent Identifiers and Publishing Datasets\"\n"; -print "section of the Installataion guide, on how to configure your Dataverse with the proper registration\n"; -print "credentials.\n\n"; - - - -# (going to skip the Rserve check; it's no longer a required, or even a recommended component) - -exit 0; - -# 9. FINALLY, CHECK IF RSERVE IS RUNNING: -print "\n\nFinally, checking if Rserve is running and accessible...\n"; - -unless ( $CONFIG_DEFAULTS{'RSERVE_PORT'} =~ /^[0-9][0-9]*$/ ) { - print $CONFIG_DEFAULTS{'RSERVE_HOST'} . " does not look like a valid port number,\n"; - print "defaulting to 6311.\n\n"; - - $CONFIG_DEFAULTS{'RSERVE_PORT'} = 6311; -} - -my ( $rserve_iaddr, $rserve_paddr, $rserve_proto ); - -unless ( $rserve_iaddr = inet_aton( $CONFIG_DEFAULTS{'RSERVE_HOST'} ) ) { - print STDERR "Could not look up $CONFIG_DEFAULTS{'RSERVE_HOST'},\n"; - print STDERR "the host you specified as your R server.\n"; - print STDERR "\nDVN can function without a working R server, but\n"; - print STDERR "much of the functionality concerning running statistics\n"; - print STDERR "and analysis on quantitative data will not be available.\n"; - print STDERR "Please consult the Installers guide for more info.\n"; - - exit 0; -} - -$rserve_paddr = sockaddr_in( $CONFIG_DEFAULTS{'RSERVE_PORT'}, $rserve_iaddr ); -$rserve_proto = getprotobyname('tcp'); - -unless ( socket( SOCK, PF_INET, SOCK_STREAM, $rserve_proto ) - && connect( SOCK, $rserve_paddr ) ) -{ - print STDERR "Could not establish connection to $CONFIG_DEFAULTS{'RSERVE_HOST'}\n"; - print STDERR "on port $CONFIG_DEFAULTS{'RSERVE_PORT'}, the address you provided\n"; - print STDERR "for your R server.\n"; - print STDERR "DVN can function without a working R server, but\n"; - print STDERR "much of the functionality concerning running statistics\n"; - print STDERR "and analysis on quantitative data will not be available.\n"; - print STDERR "Please consult the \"Installing R\" section in the Installers guide\n"; - print STDERR "for more info.\n"; - - exit 0; - -} - -close(SOCK); -print "\nOK!\n"; - -# 5. CONFIGURE PAYARA -sub setup_appserver { - my $success = 1; - my $failure = 0; - - my $glassfish_dir = $CONFIG_DEFAULTS{'GLASSFISH_DIRECTORY'}; - - print "\nProceeding with the app. server (Payara5) setup.\n"; - -# 5a. DETERMINE HOW MUCH MEMORY TO GIVE TO GLASSFISH AS HEAP: - - my $gf_heap_default = "2048m"; - my $sys_mem_total = 0; - - if ( -e "/proc/meminfo" && open MEMINFO, "/proc/meminfo" ) { - # Linux - - while ( my $mline = ) { - if ( $mline =~ /MemTotal:[ \t]*([0-9]*) kB/ ) { - $sys_mem_total = $1; - } - } - - close MEMINFO; - -# TODO: Figure out how to determine the amount of memory when running in Docker -# because we're wondering if Dataverse can run in the free OpenShift Online -# offering that only gives you 1 GB of memory. Obviously, if this is someone's -# first impression of Dataverse, we want to to run well! What if you try to -# ingest a large file or perform other memory-intensive operations? For more -# context, see https://github.com/IQSS/dataverse/issues/4040#issuecomment-331282286 - if ( -e "/sys/fs/cgroup/memory/memory.limit_in_bytes" && open CGROUPMEM, "/sys/fs/cgroup/memory/memory.limit_in_bytes" ) { - print "INFO: This system has the CGROUP file /sys/fs/cgroup/memory/memory.limit_in_bytes\n"; - while ( my $limitline = ) { - ### TODO: NO, WE ARE NOT NECESSARILY IN DOCKER! - ###print "We must be running in Docker! Fancy!\n"; - # The goal of this cgroup check is for - # "Setting the heap limit for Glassfish/Payara to 750MB" - # to change to some other value, based on memory available. - print "INFO: /sys/fs/cgroup/memory/memory.limit_in_bytes: $limitline\n"; - my $limit_in_kb = $limitline / 1024; - print "INFO: CGROUP limit_in_kb = $limit_in_kb [ignoring]\n"; - # In openshift.json, notice how PostgreSQL and Solr have - # resources.limits.memory set to "256Mi". - # If you try to give the Dataverse/Glassfish container twice - # as much memory (512 MB) and allow $sys_mem_total to - # be set below, you should see the following: - # "Setting the heap limit for Glassfish to 192MB." - # FIXME: dataverse.war will not deploy with only 512 MB of memory. - # Again, the goal is 1 GB total (512MB + 256MB + 256MB) for - # Glassfish, PostgreSQL, and Solr to fit in the free OpenShift tier. - #print "setting sys_mem_total to: $limit_in_kb\n"; - #$sys_mem_total = $limit_in_kb; - } - close CGROUPMEM; - } - } - elsif ( -x "/usr/sbin/sysctl" ) - { - # MacOS X, probably... - - $sys_mem_total = `/usr/sbin/sysctl -n hw.memsize`; - chop $sys_mem_total; - if ( $sys_mem_total > 0 ) { - $sys_mem_total = int( $sys_mem_total / 1024 ); - # size in kb - } - } - - if ( $sys_mem_total > 0 ) { - # setting the default heap size limit to 3/8 of the available - # amount of memory: - $gf_heap_default = ( int( $sys_mem_total / ( 8 / 3 * 1024 ) ) ); - - print "\nSetting the heap limit for Payara5 to " . $gf_heap_default . "MB. \n"; - print "You may need to adjust this setting to better suit \n"; - print "your system.\n\n"; - - #$gf_heap_default .= "m"; - - } - else - { - print "\nCould not determine the amount of memory on your system.\n"; - print "Setting the heap limit for Payara5 to 2GB. You may need \n"; - print "to adjust the value to better suit your system.\n\n"; - } - - push @CONFIG_VARIABLES, "DEF_MEM_SIZE"; - $CONFIG_DEFAULTS{"DEF_MEM_SIZE"} = $gf_heap_default; - -# TODO: -# is the below still the case with Payara5? -# if the system has more than 4GB of memory (I believe), glassfish must -# be run with the 64 bit flag set explicitly (at least that was the case -# with the MacOS glassfish build...). Verify, and if still the case, -# add a check. - - print "\n*********************\n"; - print "PLEASE NOTE, SOME OF THE ASADMIN COMMANDS ARE GOING TO FAIL,\n"; - print "FOR EXAMPLE, IF A CONFIGURATION SETTING THAT WE ARE TRYING\n"; - print "TO CREATE ALREADY EXISTS; OR IF A JVM OPTION THAT WE ARE\n"; - print "DELETING DOESN'T. THESE \"FAILURES\" ARE NORMAL!\n"; - print "*********************\n\n"; - print "When/if asadmin asks you to \"Enter admin user name\",\n"; - print "it should be safe to hit return and accept the default\n"; - print "(which is \"admin\").\n"; - - print "\nPress any key to continue...\n\n"; - - unless ($noninteractive) - { - system "stty cbreak /dev/tty 2>&1"; - unless ($noninteractive) { - my $key = getc(STDIN); - } - system "stty -cbreak /dev/tty 2>&1"; - } - - print "\n"; - -# 5b. start domain, if not running: - - my $javacheck = `java -version`; - my $exitcode = $?; - unless ( $exitcode == 0 ) { - print STDERR "$javacheck\n" if $javacheck; - print STDERR "Do you have java installed?\n"; - exit 1; - } - my $DOMAIN = "domain1"; - my $DOMAIN_DOWN = - `$CONFIG_DEFAULTS{'GLASSFISH_DIRECTORY'}/bin/asadmin list-domains | grep "$DOMAIN " | grep "not running"`; - print STDERR $DOMAIN_DOWN . "\n"; - if ($DOMAIN_DOWN) { - print "Trying to start domain up...\n"; - if ( $current_user eq $CONFIG_DEFAULTS{'GLASSFISH_USER'} ){ - system( $CONFIG_DEFAULTS{'GLASSFISH_DIRECTORY'} . "/bin/asadmin start-domain domain1" ); - } - else - { - system( "sudo -u $CONFIG_DEFAULTS{'GLASSFISH_USER'} " . $CONFIG_DEFAULTS{'GLASSFISH_DIRECTORY'} . "/bin/asadmin start-domain domain1" ); - } - # TODO: (?) - retest that the domain is running now? - } - else - { - print "domain appears to be up...\n"; - } - -# 5c. create asadmin login, so that the user doesn't have to enter -# the username and password for every asadmin command, if -# access to :4848 is password-protected: - - system( $glassfish_dir. "/bin/asadmin login" ); - -# 5d. configure glassfish using ASADMIN commands: - - $success = &run_asadmin_script(); - -# CHECK EXIT STATUS, BARF IF SETUP SCRIPT FAILED: - - unless ($success) { - print "\nERROR! Failed to configure Payara5 domain!\n"; - print "(see the error messages above - if any)\n"; - print "Aborting...\n"; - - exit 1; - } - -# 5e. Additional config files: - - my $JHOVE_CONFIG = "jhove.conf"; - my $JHOVE_CONF_SCHEMA = "jhoveConfig.xsd"; - - - my $JHOVE_CONFIG_DIST = $JHOVE_CONFIG; - my $JHOVE_CONF_SCHEMA_DIST = $JHOVE_CONF_SCHEMA; - -# (if the installer is being run NOT as part of a distribution zipped bundle, but -# from inside the source tree - adjust the locations of the jhove config files: - - unless ( -f $JHOVE_CONFIG ) { - $JHOVE_CONFIG_DIST = "../../conf/jhove/jhove.conf"; - $JHOVE_CONF_SCHEMA_DIST = "../../conf/jhove/jhoveConfig.xsd"; - } - -# but if we can't find the files in either location, it must mean -# that they are not running the script in the correct directory - so -# nothing else left for us to do but give up: - - unless ( -f $JHOVE_CONFIG_DIST && -f $JHOVE_CONF_SCHEMA_DIST ) { - print "\nERROR! JHOVE configuration files not found in the config dir!\n"; - print "(are you running the installer in the right directory?\n"; - print "Aborting...\n"; - exit 1; - } - - print "\nCopying additional configuration files... "; - - #system( "/bin/cp -f " . $JHOVE_CONF_SCHEMA_DIST . " " . $glassfish_dir . "/glassfish/domains/domain1/config" ); - my $jhove_success = copy ($JHOVE_CONF_SCHEMA_DIST, $glassfish_dir . "/glassfish/domains/domain1/config"); - unless ($jhove_success) - { - print "\n*********************\n"; - print "ERROR: failed to copy jhove config file into " . $glassfish_dir . "/glassfish/domains/domain1/config - do you have write permission in that directory?"; - exit 1; - } - -# The JHOVE conf file has an absolute PATH of the JHOVE config schema file (uh, yeah...) -# - so it may need to be readjusted here: - - if ( $glassfish_dir ne "/usr/local/payara5" ) - { - system( "sed 's:/usr/local/payara5:$glassfish_dir:g' < " . $JHOVE_CONFIG_DIST . " > " . $glassfish_dir . "/glassfish/domains/domain1/config/" . $JHOVE_CONFIG); - } - else - { - system( "/bin/cp -f " . $JHOVE_CONFIG_DIST . " " . $glassfish_dir . "/glassfish/domains/domain1/config" ); - } - - print "done!\n"; - -# 5f. check if payara is running: -# TODO. - -# 5g. DEPLOY THE APPLICATION: - - print "\nAttempting to deploy the application.\n"; - print "Command line: " . $glassfish_dir . "/bin/asadmin deploy " . $WARFILE_LOCATION . "\n"; - unless (( - my $exit_code = - system( $glassfish_dir . "/bin/asadmin deploy " . $WARFILE_LOCATION ) - ) == 0 ) - { - print STDERR "Failed to deploy the application! WAR file: " . $WARFILE_LOCATION . ".\n"; - print STDERR "(exit code: " . $exit_code . ")\n"; - print STDERR "Aborting.\n"; - exit 1; - } - - - print "Finished configuring Payara and deploying the dataverse application. \n"; - - - return $success; -} - -sub run_asadmin_script { - my $success = 1; - my $failure = 0; - - # We are going to run a standalone shell script with a bunch of asadmin - # commands to set up all the Payara components for the application. - # All the parameters must be passed to that script as environmental - # variables: - - $ENV{'GLASSFISH_ROOT'} = $CONFIG_DEFAULTS{'GLASSFISH_DIRECTORY'}; - $ENV{'GLASSFISH_DOMAIN'} = "domain1"; - $ENV{'ASADMIN_OPTS'} = ""; - $ENV{'MEM_HEAP_SIZE'} = $CONFIG_DEFAULTS{'DEF_MEM_SIZE'}; - - $ENV{'DB_PORT'} = $CONFIG_DEFAULTS{'POSTGRES_PORT'}; - $ENV{'DB_HOST'} = $CONFIG_DEFAULTS{'POSTGRES_SERVER'}; - $ENV{'DB_NAME'} = $CONFIG_DEFAULTS{'POSTGRES_DATABASE'}; - $ENV{'DB_USER'} = $CONFIG_DEFAULTS{'POSTGRES_USER'}; - $ENV{'DB_PASS'} = $CONFIG_DEFAULTS{'POSTGRES_PASSWORD'}; - - $ENV{'RSERVE_HOST'} = $CONFIG_DEFAULTS{'RSERVE_HOST'}; - $ENV{'RSERVE_PORT'} = $CONFIG_DEFAULTS{'RSERVE_PORT'}; - $ENV{'RSERVE_USER'} = $CONFIG_DEFAULTS{'RSERVE_USER'}; - $ENV{'RSERVE_PASS'} = $CONFIG_DEFAULTS{'RSERVE_PASSWORD'}; - $ENV{'DOI_BASEURL'} = $CONFIG_DEFAULTS{'DOI_BASEURL'}; - $ENV{'DOI_USERNAME'} = $CONFIG_DEFAULTS{'DOI_USERNAME'}; - $ENV{'DOI_PASSWORD'} = $CONFIG_DEFAULTS{'DOI_PASSWORD'}; - $ENV{'DOI_DATACITERESTAPIURL'} = $CONFIG_DEFAULTS{'DOI_DATACITERESTAPIURL'}; - - $ENV{'HOST_ADDRESS'} = $CONFIG_DEFAULTS{'HOST_DNS_ADDRESS'}; - - my ($mail_server_host, $mail_server_port) = split (":", $CONFIG_DEFAULTS{'MAIL_SERVER'}); - - $ENV{'SMTP_SERVER'} = $mail_server_host; - - if ($mail_server_port) - { - $ENV{'SMTP_SERVER_PORT'} = $mail_server_port; - } - - $ENV{'FILES_DIR'} = - $CONFIG_DEFAULTS{'GLASSFISH_DIRECTORY'} . "/glassfish/domains/" . $ENV{'GLASSFISH_DOMAIN'} . "/files"; - - system("./as-setup.sh"); - - if ($?) { - return $failure; - } - return $success; -} - -sub create_pg_hash { - my $pg_username = shift @_; - my $pg_password = shift @_; - - my $encode_line = $pg_password . $pg_username; - - # for Redhat: - - ##print STDERR "executing /bin/echo -n $encode_line | md5sum\n"; - - my $hash; - if ( $WORKING_OS eq "MacOSX" ) { - $hash = `/bin/echo -n $encode_line | md5`; - } - else { - $hash = `/bin/echo -n $encode_line | md5sum`; - } - - chop $hash; - - $hash =~ s/ \-$//; - - if ( ( length($hash) != 32 ) || ( $hash !~ /^[0-9a-f]*$/ ) ) { - print STDERR "Failed to generate a MD5-encrypted password hash for the Postgres database.\n"; - exit 1; - } - - return $hash; -} - -sub validate_smtp_server { - my ( $mail_server_iaddr, $mail_server__paddr, $mail_server_proto, $mail_server_status ); - - $mail_server_status = 1; - - my $userentry = $CONFIG_DEFAULTS{'MAIL_SERVER'}; - my ($testserver, $testport) = split (":", $userentry); - - unless ( $mail_server_iaddr = inet_aton( $testserver ) ) { - print STDERR "Could not look up $testserver,\n"; - print STDERR "the host you specified as your mail server\n"; - $mail_server_status = 0; - } - - if ($mail_server_status) { - $testport = 25 unless $testport; - my $mail_server_paddr = sockaddr_in( $testport, $mail_server_iaddr ); - $mail_server_proto = getprotobyname('tcp'); - - unless ( socket( SOCK, PF_INET, SOCK_STREAM, $mail_server_proto ) - && connect( SOCK, $mail_server_paddr ) ) - { - print STDERR "Could not establish connection to $CONFIG_DEFAULTS{'MAIL_SERVER'},\n"; - print STDERR "the address you provided for your Mail server.\n"; - print STDERR "Please select a valid mail server, and try again.\n\n"; - - $mail_server_status = 0; - } - - close(SOCK); - } - - return $mail_server_status; -} - -# support function for set_root_contact_email -sub search_replace_file -{ - my ($infile, $pattern, $replacement, $outfile) = @_; - open (my $inp, $infile); - local $/ = undef; - my $txt = <$inp>; - close $inp; - $txt =~s/$pattern/$replacement/g; - open (my $opf, '>:encoding(UTF-8)', $outfile); - print $opf $txt; - close $opf; - return; -} -# set the email address for the default `dataverseAdmin` account -sub set_root_contact_email -{ - my ($contact_email) = @_; - my $config_json = "data/user-admin.json"; - search_replace_file($config_json,"\"email\":\"dataverse\@mailinator.com\"","\"email\":\"$contact_email\"",$config_json); - return; -} - - -sub setup_postgres { - my $pg_local_connection = 0; - my $pg_major_version = 0; - my $pg_minor_version = 0; - - -# We'll need to do a few things as the Postgres admin user; -# We'll assume the name of the admin user is "postgres". - my $POSTGRES_ADMIN_USER = "postgres"; - - - -##Handling container env - - if ($pod_name eq "start-glassfish") - { - # When we are in this openshift "start-glassfish" pod, we get all the - # Postgres configuration from the environmental variables. - print "Init container starting \n"; - $CONFIG_DEFAULTS{'POSTGRES_SERVER'} = $ENV{"POSTGRES_SERVER"} . "." . $ENV{"POSTGRES_SERVICE_HOST"}; - $CONFIG_DEFAULTS{'POSTGRES_DATABASE'} = $ENV{"POSTGRES_DATABASE"}; - $CONFIG_DEFAULTS{'POSTGRES_USER'} = $ENV{"POSTGRES_USER"}; - $CONFIG_DEFAULTS{'POSTGRES_ADMIN_PASSWORD'} = $ENV{"POSTGRES_ADMIN_PASSWORD"}; - # there was a weird case of the postgres admin password option spelled differently in openshift.json - # - as "POSTGRESQL_ADMIN_PASSWORD"; I'm going to change it in openshift.json - but I'm leaving this - # next line here, just in case: (L.A. -- Sept. 2018) - $CONFIG_DEFAULTS{'POSTGRES_ADMIN_PASSWORD'} = $ENV{'POSTGRESQL_ADMIN_PASSWORD'}; - $CONFIG_DEFAULTS{'POSTGRES_PASSWORD'} = $ENV{"POSTGRES_PASSWORD"}; - } - - if ( $CONFIG_DEFAULTS{'POSTGRES_SERVER'} eq 'localhost' || $CONFIG_DEFAULTS{'POSTGRES_SERVER'} eq '127.0.0.1' ) - { - $pg_local_connection = 1; - } -# elsif ($postgresonly) -# { -# print "In the --pg_only mode the script can only be run LOCALLY,\n"; -# print "i.e., on the server where PostgresQL is running, with the\n"; -# print "Postgres server address as localhost - \"127.0.0.1\".\n"; -# exit 1; -# } - -#If it is executing in a container, proceed easy with this all-in-one block - - - - -# 3b. LOCATE THE psql EXECUTABLE: - - if ( $pod_name eq "start-glassfish"){ - $psql_exec_path = "/usr/bin" - } - else - { - my $sys_path = $ENV{'PATH'}; - my @sys_path_dirs = split( ":", $sys_path ); - - for my $sys_path_dir (@sys_path_dirs) { - - if ( -x $sys_path_dir . "/psql" ) { - $psql_exec_path = $sys_path_dir; - - last; - } - } - } - - my $psql_major_version = 0; - my $psql_minor_version = 0; - -# 3c. IF PSQL WAS FOUND IN THE PATH, CHECK ITS VERSION: - - unless ( $psql_exec_path eq "" ) { - open( PSQLOUT, $psql_exec_path . "/psql --version|" ); - - my $psql_version_line = ; - chop $psql_version_line; - close PSQLOUT; - - my ( $postgresName, $postgresNameLong, $postgresVersion ) = split( " ", $psql_version_line ); - - unless ( $postgresName eq "psql" && $postgresVersion =~ /^[0-9][0-9\.]*$/ ) { - print STDERR "\nWARNING: Unexpected output from psql command!\n"; - } - else - { - my (@psql_version_tokens) = split( '\.', $postgresVersion ); - - print "\n\nFound Postgres psql command, version $postgresVersion.\n\n"; - - $psql_major_version = $psql_version_tokens[0]; - $psql_minor_version = $psql_version_tokens[1]; - - $pg_major_version = $psql_major_version; - $pg_minor_version = $psql_minor_version; - - } - } - -# a frequent problem with MacOSX is that the copy of psql found in the PATH -# belongs to the older version of PostgresQL supplied with the OS, which happens -# to be incompatible with the newer builds from the Postgres project; which are -# recommended to be used with Dataverse. So if this is a MacOSX box, we'll -# check what other versions of PG are available, and select the highest version -# we can find: - - if ( $WORKING_OS eq "MacOSX" ) { - my $macos_pg_major_version = 0; - my $macos_pg_minor_version = 0; - - for $macos_pg_minor_version ( "9", "8", "7", "6", "5", "4", "3", "2", "1", "0" ) { - if ( -x "/Library/PostgreSQL/9." . $macos_pg_minor_version . "/bin/psql" ) { - $macos_pg_major_version = 9; - if ( ( $macos_pg_major_version > $psql_major_version ) - || ( $macos_pg_minor_version >= $psql_minor_version ) ) - { - $psql_exec_path = "/Library/PostgreSQL/9." . $macos_pg_minor_version . "/bin"; - $pg_major_version = $macos_pg_major_version; - $pg_minor_version = $macos_pg_minor_version; - } - last; - } - } - } - - my $psql_admin_exec = ""; - - if ( $psql_exec_path eq "" ) - { - if ( $pg_local_connection || $noninteractive) - { - print STDERR "\nERROR: I haven't been able to find the psql command in your PATH!\n"; - print STDERR "Please make sure PostgresQL is properly installed; if necessary, add\n"; - print STDERR "the location of psql to the PATH, then try again.\n\n"; - - exit 1; - } - else - { - print "WARNING: I haven't been able to find the psql command in your PATH!\n"; - print "But since we are configuring a Dataverse instance to use a remote Postgres server,\n"; - print "we can still set up the database by running a setup script on that remote server\n"; - print "(see below for instructions).\n"; - - } - } else { - - print "(Using psql version " . $pg_major_version . "." . $pg_minor_version . ": " . $psql_exec_path . "/psql)\n"; - - - $psql_admin_exec = "PGPASSWORD=" . $CONFIG_DEFAULTS{'POSTGRES_ADMIN_PASSWORD'} . "; export PGPASSWORD; " . $psql_exec_path; - $psql_exec_path = "PGPASSWORD=" . $CONFIG_DEFAULTS{'POSTGRES_PASSWORD'} . "; export PGPASSWORD; " . $psql_exec_path; - - print "Checking if we can talk to Postgres as the admin user...\n"; - } - -# 3d. CHECK IF WE CAN TALK TO POSTGRES AS THE ADMIN: - - if ($psql_exec_path eq "" || system( $psql_admin_exec . "/psql -h " . $CONFIG_DEFAULTS{'POSTGRES_SERVER'} . " -p " . $CONFIG_DEFAULTS{'POSTGRES_PORT'} . " -U " . $POSTGRES_ADMIN_USER . " -d postgres -c 'SELECT * FROM pg_roles' > /dev/null 2>&1" ) ) - { - # No, we can't. :( - if ($pg_local_connection || $noninteractive) - { - # If Postgres is running locally, this is a fatal condition. - # We'll give them some (potentially) helpful pointers and exit. - - print "(Tried executing: " . $psql_admin_exec . "/psql -h " . $CONFIG_DEFAULTS{'POSTGRES_SERVER'} . " -p " . $CONFIG_DEFAULTS{'POSTGRES_PORT'} . " -U " . $POSTGRES_ADMIN_USER . " -d postgres -c 'SELECT * FROM pg_roles' > /dev/null 2>&1) \n"; - print "Nope, I haven't been able to connect to the local instance of PostgresQL as the admin user.\n"; - print "\nIs postgresql running? \n"; - print " On a RedHat-like system, you can check the status of the daemon with\n\n"; - print " service postgresql start\n\n"; - print " On MacOSX, use Applications -> PostgresQL -> Start Server.\n"; - print " (or, if there's no \"Start Server\" item in your PostgresQL folder, \n"; - print " simply restart your MacOSX system!)\n"; - print "\nAlso, please make sure that the daemon is listening to network connections!\n"; - print " - at least on the localhost interface. (See \"Installing Postgres\" section\n"; - print " of the installation manual).\n"; - print "\nFinally, did you supply the correct admin password?\n"; - print " Don't know the admin password for your Postgres installation?\n"; - print " - then simply set the access level to \"trust\" temporarily (for localhost only!)\n"; - print " in your pg_hba.conf file. Again, please consult the \n"; - print " installation manual).\n"; - exit 1; - } - else - { - # If we are configuring the Dataverse instance to use a Postgres server - # running on a remote host, it is possible to configure the database - # without opening remote access for the admin user. They will simply - # have to run this script in the "postgres-only" mode on that server, locally, - # then resume the installation here: - print "(Tried executing: " . $psql_admin_exec . "/psql -h " . $CONFIG_DEFAULTS{'POSTGRES_SERVER'} . " -p " . $CONFIG_DEFAULTS{'POSTGRES_PORT'} . " -U " . $POSTGRES_ADMIN_USER . " -d postgres -c 'SELECT * FROM pg_roles' > /dev/null 2>&1)\n\n"; - print "Haven't been able to connect to the remote Postgres server as the admin user.\n"; - print "(Or you simply don't have psql installed on this server)\n"; - print "It IS possible to configure a database for your Dataverse on a remote server,\n"; - print "without having admin access to that remote Postgres installation.\n\n"; - print "In order to do that, please copy the installer (the entire package) to the server\n"; - print "where PostgresQL is running and run the installer with the \"--pg_only\" option:\n\n"; - print " ./install --pg_only\n\n"; - - print "Press any key to continue the installation process once that has been\n"; - print "done. Or press ctrl-C to exit the installer.\n\n"; - - system "stty cbreak /dev/tty 2>&1"; - my $key = getc(STDIN); - system "stty -cbreak /dev/tty 2>&1"; - print "\n"; - } - } - else - { - print "Yes, we can!\n"; - - # ok, we can proceed with configuring things... - - print "\nConfiguring Postgres Database:\n"; - - # 4c. CHECK IF THIS DB ALREADY EXISTS: - - my $psql_command_dbcheck = - $psql_admin_exec . "/psql -h " . $CONFIG_DEFAULTS{'POSTGRES_SERVER'} . " -p " . $CONFIG_DEFAULTS{'POSTGRES_PORT'} . " -U " . $POSTGRES_ADMIN_USER . " -c '' -d " . $CONFIG_DEFAULTS{'POSTGRES_DATABASE'} . ">/dev/null 2>&1"; - - if ( ( my $exitcode = system($psql_command_dbcheck) ) == 0 ) - { - if ($force) - { - print "WARNING! Database " - . $CONFIG_DEFAULTS{'POSTGRES_DATABASE'} - . " already exists but --force given... continuing.\n"; - } - else - { - print "WARNING! Database " . $CONFIG_DEFAULTS{'POSTGRES_DATABASE'} . " already exists!\n"; - - if ($noninteractive) - { - exit 1; - } - else - { - print "\nPress any key to continue, or ctrl-C to exit the installer...\n\n"; - - system "stty cbreak /dev/tty 2>&1"; - my $key = getc(STDIN); - system "stty -cbreak /dev/tty 2>&1"; - print "\n"; - - } - } - } - - # 3e. CHECK IF THIS USER ALREADY EXISTS: - - my $psql_command_rolecheck = - $psql_exec_path . "/psql -h " . $CONFIG_DEFAULTS{'POSTGRES_SERVER'} . " -c '' -d postgres " . $CONFIG_DEFAULTS{'POSTGRES_USER'} . " >/dev/null 2>&1"; - my $exitcode; - - if ( ( $exitcode = system($psql_command_rolecheck) ) == 0 ) - { - print "User (role) " . $CONFIG_DEFAULTS{'POSTGRES_USER'} . " already exists;\n"; - print "Proceeding."; - } - else - { - # 3f. CREATE DVN DB USER: - - print "\nCreating Postgres user (role) for the DVN:\n"; - - open TMPCMD, ">/tmp/pgcmd.$$.tmp"; - - # with md5-encrypted password: - my $pg_password_md5 = - &create_pg_hash( $CONFIG_DEFAULTS{'POSTGRES_USER'}, $CONFIG_DEFAULTS{'POSTGRES_PASSWORD'} ); - my $sql_command = - "CREATE ROLE \"" - . $CONFIG_DEFAULTS{'POSTGRES_USER'} - . "\" PASSWORD 'md5" - . $pg_password_md5 - . "' NOSUPERUSER CREATEDB CREATEROLE INHERIT LOGIN"; - - print TMPCMD $sql_command; - close TMPCMD; - - my $psql_commandline = $psql_admin_exec . "/psql -h " . $CONFIG_DEFAULTS{'POSTGRES_SERVER'} . " -p " . $CONFIG_DEFAULTS{'POSTGRES_PORT'} . " -U " . $POSTGRES_ADMIN_USER . " -d postgres -f /tmp/pgcmd.$$.tmp >/dev/null 2>&1"; - - my $out = qx($psql_commandline 2>&1); - $exitcode = $?; - unless ( $exitcode == 0 ) - { - print STDERR "Could not create the DVN Postgres user role!\n"; - print STDERR "(SQL: " . $sql_command . ")\n"; - print STDERR "(psql exit code: " . $exitcode . ")\n"; - print STDERR "(STDERR and STDOUT was: " . $out . ")\n"; - exit 1; - } - - unlink "/tmp/pgcmd.$$.tmp"; - print "done.\n"; - } - - # 3g. CREATE DVN DB: - - print "\nCreating Postgres database:\n"; - - my $psql_command = - $psql_admin_exec - . "/createdb -h " . $CONFIG_DEFAULTS{'POSTGRES_SERVER'} . " -p " . $CONFIG_DEFAULTS{'POSTGRES_PORT'} . " -U " . $POSTGRES_ADMIN_USER ." " - . $CONFIG_DEFAULTS{'POSTGRES_DATABASE'} . " --owner=" - . $CONFIG_DEFAULTS{'POSTGRES_USER'}; - - my $out = qx($psql_command 2>&1); - $exitcode = $?; - unless ( $exitcode == 0 ) - { - print STDERR "Could not create Postgres database for the Dataverse app!\n"; - print STDERR "(command: " . $psql_command . ")\n"; - print STDERR "(psql exit code: " . $exitcode . ")\n"; - print STDERR "(STDOUT and STDERR: " . $out . ")\n"; - if ($force) - { - print STDERR "\ncalled with --force, continuing\n"; - } - else - { - print STDERR "\naborting the installation (sorry!)\n\n"; - exit 1; - } - } - } - -# Whether the user and the database were created locally or remotely, we'll now -# verify that we can talk to that database, with the credentials of the database -# user that we want the Dataverse application to be using: - - if ( $psql_exec_path ne "" && system( $psql_exec_path . "/psql -h " . $CONFIG_DEFAULTS{'POSTGRES_SERVER'} . " -p " . $CONFIG_DEFAULTS{'POSTGRES_PORT'} . " -U " . $CONFIG_DEFAULTS{'POSTGRES_USER'} . " -d " . $CONFIG_DEFAULTS{'POSTGRES_DATABASE'} . " -c 'SELECT * FROM pg_roles' > /dev/null 2>&1" ) ) - { - print STDERR "Oops, haven't been able to connect to the database " . $CONFIG_DEFAULTS{'POSTGRES_DATABASE'} . ",\n"; - print STDERR "running on " . $CONFIG_DEFAULTS{'POSTGRES_SERVER'} . ", as user " . $CONFIG_DEFAULTS{'POSTGRES_USER'} . ".\n\n"; - print STDERR "Aborting the installation (sorry!)\n"; - exit 1; - } -} - -sub read_config_defaults { - my $config_file = shift @_; - - unless ( -f $config_file ) - { - print STDERR "Can't find the config file " . $config_file . "!\n"; - exit 1; - } - - open CF, $config_file || die "Can't open config file " . $config_file . " for reading.\n"; - - while () - { - chop; - - if ( $_ =~/^[A-Z]/ && $_ =~/ *= */ ) - { - my ($name, $value) = split(/ *= */, $_, 2); - $CONFIG_DEFAULTS{$name} = $value; - } - } - close CF; -} - -sub read_interactive_config_values { - my $config_file = shift @_; - - unless ( -f $config_file ) - { - print STDERR "Can't find the config file " . $config_file . "!\n"; - exit 1; - } - - open CF, $config_file || die "Can't open config file " . $config_file . " for reading.\n"; - - my $mode = ""; - - while () - { - chop; - - if ( $_ eq "[prompts]" || $_ eq "[comments]" ) - { - $mode = $_; - } - - if ( $_ =~/^[A-Z]/ && $_ =~/ *= */ ) - { - my ($name, $value) = split(/ *= */, $_, 2); - - if ( $mode eq "[prompts]" ) - { - $CONFIG_PROMPTS{$name} = $value; - } - elsif ( $mode eq "[comments]" ) - { - $value =~s/\\n/\n/g; - $CONFIG_COMMENTS{$name} = $value; - } - } - } - close CF; -} diff --git a/scripts/installer/install.py b/scripts/installer/install.py index ea1a69db6a7..3aedbd8c6ad 100644 --- a/scripts/installer/install.py +++ b/scripts/installer/install.py @@ -252,8 +252,8 @@ # 1d. check java version java_version = subprocess.check_output(["java", "-version"], stderr=subprocess.STDOUT).decode() print("Found java version "+java_version) - if not re.search('(1.8|11)', java_version): - sys.exit("Dataverse requires OpenJDK 1.8 or 11. Please make sure it's in your PATH, and try again.") + if not re.search('(17)', java_version): + sys.exit("Dataverse requires OpenJDK 17. Please make sure it's in your PATH, and try again.") # 1e. check if the setup scripts - setup-all.sh, are available as well, maybe? # @todo (?) @@ -314,7 +314,7 @@ gfDir = config.get('glassfish', 'GLASSFISH_DIRECTORY') while not test_appserver_directory(gfDir): print("\nInvalid Payara directory!") - gfDir = read_user_input("Enter the root directory of your Payara5 installation:\n(Or ctrl-C to exit the installer): ") + gfDir = read_user_input("Enter the root directory of your Payara installation:\n(Or ctrl-C to exit the installer): ") config.set('glassfish', 'GLASSFISH_DIRECTORY', gfDir) elif option == "mail_server": mailServer = config.get('system', 'MAIL_SERVER') @@ -380,12 +380,13 @@ print("Can't connect to PostgresQL as the admin user.\n") sys.exit("Is the server running, have you adjusted pg_hba.conf, etc?") - # 3b. get the Postgres version (do we need it still?) + # 3b. get the Postgres version for new permissions model in versions 15+ try: - pg_full_version = conn.server_version - print("PostgresQL version: "+str(pg_full_version)) + pg_full_version = str(conn.server_version) + pg_major_version = pg_full_version[0:2] + print("PostgreSQL version: "+pg_major_version) except: - print("Warning: Couldn't determine PostgresQL version.") + print("Warning: Couldn't determine PostgreSQL version.") conn.close() # 3c. create role: @@ -410,7 +411,9 @@ else: sys.exit("Couldn't create database or database already exists.\n") - conn_cmd = "GRANT ALL PRIVILEGES on DATABASE "+pgDb+" to "+pgUser+";" + # 3e. set permissions: + + conn_cmd = "GRANT CREATE PRIVILEGES on DATABASE "+pgDb+" to "+pgUser+";" try: cur.execute(conn_cmd) except: @@ -418,6 +421,19 @@ cur.close() conn.close() + if int(pg_major_version) >= 15: + conn_cmd = "GRANT ALL ON SCHEMA public TO "+pgUser+";" + print("PostgreSQL 15 or higher detected. Running " + conn_cmd) + try: + cur.execute(conn_cmd) + except: + if force: + print("WARNING: failed to grant permissions on schema public - continuing, since the --force option was specified") + else: + sys.exit("Couldn't grant privileges on schema public to "+pgUser) + cur.close() + conn.close() + print("Database and role created!") if pgOnly: print("postgres-only setup complete.") @@ -511,12 +527,12 @@ try: copy2(jhoveConfigSchemaDist, gfConfigDir) # The JHOVE conf file has an absolute PATH of the JHOVE config schema file (uh, yeah...) - # and may need to be adjusted, if Payara is installed anywhere other than /usr/local/payara5: - if gfDir == "/usr/local/payara5": + # and may need to be adjusted, if Payara is installed anywhere other than /usr/local/payara6: + if gfDir == "/usr/local/payara6": copy2(jhoveConfigDist, gfConfigDir) else: - # use sed to replace /usr/local/payara5 in the distribution copy with the real gfDir: - sedCommand = "sed 's:/usr/local/payara5:"+gfDir+":g' < " + jhoveConfigDist + " > " + gfConfigDir + "/" + jhoveConfig + # use sed to replace /usr/local/payara6 in the distribution copy with the real gfDir: + sedCommand = "sed 's:/usr/local/payara6:"+gfDir+":g' < " + jhoveConfigDist + " > " + gfConfigDir + "/" + jhoveConfig subprocess.call(sedCommand, shell=True) print("done.") @@ -578,8 +594,8 @@ print("However, you have to contact DataCite (support\@datacite.org) and request a test account, before you ") print("can publish datasets. Once you receive the account name and password, add them to your domain.xml,") print("as the following two JVM options:") -print("\t-Ddoi.username=...") -print("\t-Ddoi.password=...") +print("\t-Ddataverse.pid.datacite.username=...") +print("\t-Ddataverse.pid.datacite.password=...") print("and restart payara") print("If this is a production Dataverse and you are planning to register datasets as ") print("\"real\", non-test DOIs or Handles, consult the \"Persistent Identifiers and Publishing Datasets\"") diff --git a/scripts/installer/installAppServer.py b/scripts/installer/installAppServer.py index 8b719ac09d1..698f5ba9a58 100644 --- a/scripts/installer/installAppServer.py +++ b/scripts/installer/installAppServer.py @@ -3,7 +3,7 @@ def runAsadminScript(config): # We are going to run a standalone shell script with a bunch of asadmin - # commands to set up all the app. server (payara5) components for the application. + # commands to set up all the app. server (payara6) components for the application. # All the parameters must be passed to that script as environmental # variables: os.environ['GLASSFISH_DOMAIN'] = "domain1"; diff --git a/scripts/installer/installUtils.py b/scripts/installer/installUtils.py index 7cc368de5f8..ff5e6eb708d 100644 --- a/scripts/installer/installUtils.py +++ b/scripts/installer/installUtils.py @@ -57,7 +57,7 @@ def test_appserver_directory(directory): #print("version: major: "+str(major_version)+", minor: "+str(minor_version)) - if major_version != 5 or minor_version < 201: + if major_version != 6 or minor_version < 2023: return False return True diff --git a/scripts/installer/interactive.config b/scripts/installer/interactive.config index 86ea926fe5d..ef8110c554f 100644 --- a/scripts/installer/interactive.config +++ b/scripts/installer/interactive.config @@ -26,7 +26,7 @@ DOI_BASEURL = Datacite URL DOI_DATACITERESTAPIURL = Datacite REST API URL [comments] HOST_DNS_ADDRESS = :(enter numeric IP address, if FQDN is unavailable) -GLASSFISH_USER = :This user will be running the App. Server (Payara5) service on your system.\n - If this is a dev. environment, this should be your own username; \n - In production, we suggest you create the account "dataverse", or use any other unprivileged user account\n: +GLASSFISH_USER = :This user will be running the App. Server (Payara) service on your system.\n - If this is a dev. environment, this should be your own username; \n - In production, we suggest you create the account "dataverse", or use any other unprivileged user account\n: GLASSFISH_DIRECTORY = GLASSFISH_REQUEST_TIMEOUT = :\n Defaults to 1800 seconds (30 minutes) ADMIN_EMAIL = :\n(please enter a valid email address!) diff --git a/scripts/search/tests/data/dataset-finch1-nolicense.json b/scripts/search/tests/data/dataset-finch1-nolicense.json new file mode 100644 index 00000000000..ec0856a2aa3 --- /dev/null +++ b/scripts/search/tests/data/dataset-finch1-nolicense.json @@ -0,0 +1,77 @@ +{ + "datasetVersion": { + "metadataBlocks": { + "citation": { + "fields": [ + { + "value": "Darwin's Finches", + "typeClass": "primitive", + "multiple": false, + "typeName": "title" + }, + { + "value": [ + { + "authorName": { + "value": "Finch, Fiona", + "typeClass": "primitive", + "multiple": false, + "typeName": "authorName" + }, + "authorAffiliation": { + "value": "Birds Inc.", + "typeClass": "primitive", + "multiple": false, + "typeName": "authorAffiliation" + } + } + ], + "typeClass": "compound", + "multiple": true, + "typeName": "author" + }, + { + "value": [ + { "datasetContactEmail" : { + "typeClass": "primitive", + "multiple": false, + "typeName": "datasetContactEmail", + "value" : "finch@mailinator.com" + }, + "datasetContactName" : { + "typeClass": "primitive", + "multiple": false, + "typeName": "datasetContactName", + "value": "Finch, Fiona" + } + }], + "typeClass": "compound", + "multiple": true, + "typeName": "datasetContact" + }, + { + "value": [ { + "dsDescriptionValue":{ + "value": "Darwin's finches (also known as the Galápagos finches) are a group of about fifteen species of passerine birds.", + "multiple":false, + "typeClass": "primitive", + "typeName": "dsDescriptionValue" + }}], + "typeClass": "compound", + "multiple": true, + "typeName": "dsDescription" + }, + { + "value": [ + "Medicine, Health and Life Sciences" + ], + "typeClass": "controlledVocabulary", + "multiple": true, + "typeName": "subject" + } + ], + "displayName": "Citation Metadata" + } + } + } +} diff --git a/scripts/search/tests/data/dataset-finch1.json b/scripts/search/tests/data/dataset-finch1.json index ec0856a2aa3..433ea758711 100644 --- a/scripts/search/tests/data/dataset-finch1.json +++ b/scripts/search/tests/data/dataset-finch1.json @@ -1,5 +1,9 @@ { "datasetVersion": { + "license": { + "name": "CC0 1.0", + "uri": "http://creativecommons.org/publicdomain/zero/1.0" + }, "metadataBlocks": { "citation": { "fields": [ diff --git a/scripts/search/tests/data/dataset-finch2.json b/scripts/search/tests/data/dataset-finch2.json index d20f835b629..446df54676a 100644 --- a/scripts/search/tests/data/dataset-finch2.json +++ b/scripts/search/tests/data/dataset-finch2.json @@ -1,5 +1,9 @@ { "datasetVersion": { + "license": { + "name": "CC0 1.0", + "uri": "http://creativecommons.org/publicdomain/zero/1.0" + }, "metadataBlocks": { "citation": { "fields": [ diff --git a/scripts/tests/ec2-memory-benchmark/ec2-memory-benchmark-remote.sh b/scripts/tests/ec2-memory-benchmark/ec2-memory-benchmark-remote.sh index 0cfdd20c272..367aa214563 100755 --- a/scripts/tests/ec2-memory-benchmark/ec2-memory-benchmark-remote.sh +++ b/scripts/tests/ec2-memory-benchmark/ec2-memory-benchmark-remote.sh @@ -5,7 +5,7 @@ then EC2_HTTP_LOCATION="" fi -DATAVERSE_APP_DIR=/usr/local/payara5/glassfish/domains/domain1/applications/dataverse; export DATAVERSE_APP_DIR +DATAVERSE_APP_DIR=/usr/local/payara6/glassfish/domains/domain1/applications/dataverse; export DATAVERSE_APP_DIR # restart app server diff --git a/scripts/vagrant/install-dataverse.sh b/scripts/vagrant/install-dataverse.sh deleted file mode 100644 index c9873f7d3ec..00000000000 --- a/scripts/vagrant/install-dataverse.sh +++ /dev/null @@ -1,31 +0,0 @@ -#!/usr/bin/env bash - -if [ ! -z "$1" ]; then - MAILSERVER=$1 - MAILSERVER_ARG="--mailserver $MAILSERVER" -fi -WAR=/dataverse/target/dataverse*.war -if [ ! -f $WAR ]; then - echo "no war file found... building" - #echo "Installing nss on CentOS 6 to avoid java.security.KeyException while building war file: https://github.com/IQSS/dataverse/issues/2744" - #yum install -y nss - su $SUDO_USER -s /bin/sh -c "cd /dataverse && source /etc/profile.d/maven.sh && mvn -q package" -fi -cd /dataverse/scripts/installer - -# move any pre-existing `default.config` file out of the way to avoid overwriting -pid=$$ -if [ -e default.config ]; then - cp default.config tmp-${pid}-default.config -fi - -# Switch to newer Python-based installer -python3 ./install.py --noninteractive --config_file="default.config" - -if [ -e tmp-${pid}-default.config ]; then # if we moved it out, move it back - mv -f tmp-${pid}-default.config default.config -fi - -echo "If "vagrant up" was successful (check output above) Dataverse is running on port 8080 of the Linux machine running within Vagrant, but this port has been forwarded to port 8088 of the computer you ran "vagrant up" on. For this reason you should go to http://localhost:8088 to see the Dataverse app running." - -echo "Please also note that the installation script has now started Payara, but has not set up an autostart mechanism for it.\nTherefore, the next time this VM is started, Payara must be started manually.\nSee https://guides.dataverse.org/en/latest/installation/prerequisites.html#launching-payara-on-system-boot for details." diff --git a/scripts/vagrant/rpmbuild.sh b/scripts/vagrant/rpmbuild.sh deleted file mode 100755 index f10830afb5b..00000000000 --- a/scripts/vagrant/rpmbuild.sh +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/sh -rpm -Uvh http://dl.fedoraproject.org/pub/epel/7/x86_64/e/epel-release-7-7.noarch.rpm -yum install -y rpm-build httpd-devel libapreq2-devel R-devel diff --git a/scripts/vagrant/setup-counter-processor.sh b/scripts/vagrant/setup-counter-processor.sh deleted file mode 100755 index a418e8d6251..00000000000 --- a/scripts/vagrant/setup-counter-processor.sh +++ /dev/null @@ -1,33 +0,0 @@ -#!/bin/bash -echo "Setting up counter-processor" -echo "Installing dependencies" -yum -y install unzip vim-enhanced -yum install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm -# EPEL provides Python 3.6.6, new enough (3.6.4 in .python-version) -yum -y install python36 jq -# "ensurepip" tip from https://stackoverflow.com/questions/50408941/recommended-way-to-install-pip3-on-centos7/52518512#52518512 -python3.6 -m ensurepip -# FIXME: actually use this dedicated "counter" user. -COUNTER_USER=counter -echo "Ensuring Unix user '$COUNTER_USER' exists" -useradd $COUNTER_USER || : -COMMIT='7974dad259465ba196ef639f48dea007cae8f9ac' -UNZIPPED_DIR="counter-processor-$COMMIT" -if [ ! -e $UNZIPPED_DIR ]; then - ZIP_FILE="${COMMIT}.zip" - echo "Downloading and unzipping $ZIP_FILE" - wget https://github.com/CDLUC3/counter-processor/archive/$ZIP_FILE - unzip $ZIP_FILE -fi -cd $UNZIPPED_DIR -echo "Installation of the GeoLite2 country database for counter-processor can no longer be automated. See the Installation Guide for the manual installation process." -pip3 install -r requirements.txt -# For now, parsing sample_logs/counter_2018-05-08.log -for i in `echo {00..31}`; do - # avoid errors like: No such file or directory: 'sample_logs/counter_2018-05-01.log' - touch sample_logs/counter_2018-05-$i.log -done -#LOG_GLOB="sample_logs/counter_2018-05-*.log" -#START_DATE="2018-05-08" -#END_DATE="2018-05-09" -CONFIG_FILE=/dataverse/scripts/vagrant/counter-processor-config.yaml python3.6 main.py diff --git a/scripts/vagrant/setup-solr.sh b/scripts/vagrant/setup-solr.sh deleted file mode 100755 index 70d3fc632a7..00000000000 --- a/scripts/vagrant/setup-solr.sh +++ /dev/null @@ -1,18 +0,0 @@ -#!/bin/bash -echo "Setting up Solr" -dnf install -qy lsof -SOLR_USER=solr -SOLR_HOME=/usr/local/solr -mkdir $SOLR_HOME -chown $SOLR_USER:$SOLR_USER $SOLR_HOME -su $SOLR_USER -s /bin/sh -c "cp /dataverse/downloads/solr-8.11.1.tgz $SOLR_HOME" -su $SOLR_USER -s /bin/sh -c "cd $SOLR_HOME && tar xfz solr-8.11.1.tgz" -su $SOLR_USER -s /bin/sh -c "cd $SOLR_HOME/solr-8.11.1/server/solr && cp -r configsets/_default . && mv _default collection1" -su $SOLR_USER -s /bin/sh -c "cp /dataverse/conf/solr/8.11.1/schema*.xml $SOLR_HOME/solr-8.11.1/server/solr/collection1/conf/" -su $SOLR_USER -s /bin/sh -c "cp /dataverse/conf/solr/8.11.1/solrconfig.xml $SOLR_HOME/solr-8.11.1/server/solr/collection1/conf/solrconfig.xml" -su $SOLR_USER -s /bin/sh -c "cd $SOLR_HOME/solr-8.11.1 && bin/solr start && bin/solr create_core -c collection1 -d server/solr/collection1/conf/" -cp /dataverse/doc/sphinx-guides/source/_static/installation/files/etc/init.d/solr /etc/init.d/solr -chmod 755 /etc/init.d/solr -/etc/init.d/solr stop -/etc/init.d/solr start -chkconfig solr on diff --git a/scripts/vagrant/setup.sh b/scripts/vagrant/setup.sh deleted file mode 100644 index 0af4afb22af..00000000000 --- a/scripts/vagrant/setup.sh +++ /dev/null @@ -1,96 +0,0 @@ -#!/bin/bash -echo "Installing dependencies for Dataverse" - -# wget seems to be missing in box 'bento/centos-8.2' -dnf install -qy wget - -# python3 and psycopg2 for the Dataverse installer -dnf install -qy python3 python3-psycopg2 - -# JQ -echo "Installing jq for the setup scripts" -dnf install -qy epel-release -dnf install -qy jq - -echo "Adding Shibboleth yum repo" -cp /dataverse/conf/vagrant/etc/yum.repos.d/shibboleth.repo /etc/yum.repos.d -# Uncomment this (and other shib stuff below) if you want -# to use Vagrant (and maybe PageKite) to test Shibboleth. -#yum install -y shibboleth shibboleth-embedded-ds - -# java configuration et alia -dnf install -qy java-11-openjdk-devel httpd mod_ssl unzip -alternatives --set java /usr/lib/jvm/jre-11-openjdk/bin/java -java -version - -# maven included in centos8 requires 1.8.0 - download binary instead -wget -q https://archive.apache.org/dist/maven/maven-3/3.8.2/binaries/apache-maven-3.8.2-bin.tar.gz -tar xfz apache-maven-3.8.2-bin.tar.gz -mkdir /opt/maven -mv apache-maven-3.8.2/* /opt/maven/ -echo "export JAVA_HOME=/usr/lib/jvm/jre-openjdk" > /etc/profile.d/maven.sh -echo "export M2_HOME=/opt/maven" >> /etc/profile.d/maven.sh -echo "export MAVEN_HOME=/opt/maven" >> /etc/profile.d/maven.sh -echo "export PATH=/opt/maven/bin:${PATH}" >> /etc/profile.d/maven.sh -chmod 0755 /etc/profile.d/maven.sh - -# disable centos8 postgresql module and install postgresql13-server -dnf -qy module disable postgresql -dnf install -qy https://download.postgresql.org/pub/repos/yum/reporpms/EL-8-x86_64/pgdg-redhat-repo-latest.noarch.rpm -dnf install -qy postgresql13-server -/usr/pgsql-13/bin/postgresql-13-setup initdb -/usr/bin/systemctl stop postgresql-13 -cp /dataverse/conf/vagrant/var/lib/pgsql/data/pg_hba.conf /var/lib/pgsql/13/data/pg_hba.conf -/usr/bin/systemctl start postgresql-13 -/usr/bin/systemctl enable postgresql-13 - -PAYARA_USER=dataverse -echo "Ensuring Unix user '$PAYARA_USER' exists" -useradd $PAYARA_USER || : -SOLR_USER=solr -echo "Ensuring Unix user '$SOLR_USER' exists" -useradd $SOLR_USER || : -DOWNLOAD_DIR='/dataverse/downloads' -PAYARA_ZIP="$DOWNLOAD_DIR/payara-5.2022.3.zip" -SOLR_TGZ="$DOWNLOAD_DIR/solr-8.11.1.tgz" -if [ ! -f $PAYARA_ZIP ] || [ ! -f $SOLR_TGZ ]; then - echo "Couldn't find $PAYARA_ZIP or $SOLR_TGZ! Running download script...." - cd $DOWNLOAD_DIR && ./download.sh && cd - echo "Done running download script." -fi -PAYARA_USER_HOME=~dataverse -PAYARA_ROOT=/usr/local/payara5 -if [ ! -d $PAYARA_ROOT ]; then - echo "Copying $PAYARA_ZIP to $PAYARA_USER_HOME and unzipping" - su $PAYARA_USER -s /bin/sh -c "cp $PAYARA_ZIP $PAYARA_USER_HOME" - su $PAYARA_USER -s /bin/sh -c "cd $PAYARA_USER_HOME && unzip -q $PAYARA_ZIP" - # default.config defaults to /usr/local/payara5 so let's go with that - rsync -a $PAYARA_USER_HOME/payara5/ $PAYARA_ROOT/ -else - echo "$PAYARA_ROOT already exists" -fi - -#service shibd start -/usr/bin/systemctl stop httpd -cp /dataverse/conf/httpd/conf.d/dataverse.conf /etc/httpd/conf.d/dataverse.conf -mkdir -p /var/www/dataverse/error-documents -cp /dataverse/conf/vagrant/var/www/dataverse/error-documents/503.html /var/www/dataverse/error-documents -/usr/bin/systemctl start httpd -#curl -k --sslv3 https://pdurbin.pagekite.me/Shibboleth.sso/Metadata > /tmp/pdurbin.pagekite.me -#cp -a /etc/shibboleth/shibboleth2.xml /etc/shibboleth/shibboleth2.xml.orig -#cp -a /etc/shibboleth/attribute-map.xml /etc/shibboleth/attribute-map.xml.orig -# need more attributes, such as sn, givenName, mail -#cp /dataverse/conf/vagrant/etc/shibboleth/attribute-map.xml /etc/shibboleth/attribute-map.xml -# FIXME: automate this? -#curl 'https://www.testshib.org/cgi-bin/sp2config.cgi?dist=Others&hostname=pdurbin.pagekite.me' > /etc/shibboleth/shibboleth2.xml -#cp /dataverse/conf/vagrant/etc/shibboleth/shibboleth2.xml /etc/shibboleth/shibboleth2.xml -#service shibd restart -#curl -k --sslv3 https://pdurbin.pagekite.me/Shibboleth.sso/Metadata > /downloads/pdurbin.pagekite.me -#service httpd restart - -echo "#########################################################################################" -echo "# This is a Vagrant test box, so we're disabling firewalld. # -echo "# Re-enable it with $ sudo systemctl enable firewalld && sudo systemctl start firewalld #" -echo "#########################################################################################" -systemctl disable firewalld -systemctl stop firewalld diff --git a/src/main/docker/Dockerfile b/src/main/docker/Dockerfile new file mode 100644 index 00000000000..88020a118b5 --- /dev/null +++ b/src/main/docker/Dockerfile @@ -0,0 +1,54 @@ +# Copyright 2023 Forschungszentrum Jülich GmbH +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# +################################################################################################################ +# +# THIS FILE IS TO BE USED WITH MAVEN DOCKER BUILD: +# mvn -Pct clean package +# +################################################################################################################ +# +# Some commands used are inspired by https://github.com/payara/Payara/tree/master/appserver/extras/docker-images. +# Most parts origin from older versions of https://github.com/gdcc/dataverse-kubernetes. +# +# We are not using upstream Payara images because: +# - Their image is less optimised for production usage and Dataverse by design choices +# - We provide multi-arch images +# - We provide some tweaks for development and monitoring +# + +# Make the Java base image and version configurable (useful for trying newer Java versions and flavors) +ARG BASE_IMAGE="gdcc/base:unstable" +FROM $BASE_IMAGE + +# Make Payara use the "ct" profile for MicroProfile Config. Will switch various defaults for the application +# setup in META-INF/microprofile-config.properties. +# See also https://download.eclipse.org/microprofile/microprofile-config-3.0/microprofile-config-spec-3.0.html#configprofile +ENV MP_CONFIG_PROFILE=ct + +# Copy app and deps from assembly in proper layers +COPY --chown=payara:payara maven/deps ${DEPLOY_DIR}/dataverse/WEB-INF/lib/ +COPY --chown=payara:payara maven/app ${DEPLOY_DIR}/dataverse/ +COPY --chown=payara:payara maven/supplements ${DEPLOY_DIR}/dataverse/supplements/ +COPY --chown=payara:payara maven/scripts ${SCRIPT_DIR}/ +RUN chmod +x "${SCRIPT_DIR}"/* + +# Create symlinks for jHove +RUN ln -s "${DEPLOY_DIR}/dataverse/supplements/jhove.conf" "${PAYARA_DIR}/glassfish/domains/${DOMAIN_NAME}/config/jhove.conf" && \ + ln -s "${DEPLOY_DIR}/dataverse/supplements/jhoveConfig.xsd" "${PAYARA_DIR}/glassfish/domains/${DOMAIN_NAME}/config/jhoveConfig.xsd" && \ + sed -i "${PAYARA_DIR}/glassfish/domains/${DOMAIN_NAME}/config/jhove.conf" -e "s:/usr/local/payara./glassfish/domains/domain1:${PAYARA_DIR}/glassfish/domains/${DOMAIN_NAME}:g" + +LABEL org.opencontainers.image.created="@git.build.time@" \ + org.opencontainers.image.authors="Research Data Management at FZJ " \ + org.opencontainers.image.url="https://guides.dataverse.org/en/latest/container/" \ + org.opencontainers.image.documentation="https://guides.dataverse.org/en/latest/container/" \ + org.opencontainers.image.source="https://github.com/IQSS/dataverse" \ + org.opencontainers.image.version="@project.version@" \ + org.opencontainers.image.revision="@git.commit.id.abbrev@" \ + org.opencontainers.image.vendor="Global Dataverse Community Consortium" \ + org.opencontainers.image.licenses="Apache-2.0" \ + org.opencontainers.image.title="Dataverse Application Image" \ + org.opencontainers.image.description="This container image provides the research data repository software Dataverse in a box." \ No newline at end of file diff --git a/src/main/docker/README.md b/src/main/docker/README.md new file mode 100644 index 00000000000..06e2769ed6e --- /dev/null +++ b/src/main/docker/README.md @@ -0,0 +1,62 @@ +# Dataverse Application Container Image + +The "application image" offers you a deployment-ready Dataverse application running on the underlying +application server, which is provided by the [base image](https://hub.docker.com/r/gdcc/base). +Its sole purpose is to bundle the application and any additional material necessary to successfully jumpstart +the application. + +Note: Until all :ref:`jvm-options` are *MicroProfile Config* enabled, it also adds the necessary scripting glue to +configure the applications domain during booting the application server. See :ref:`app-tunables`. + +## Quick Reference + +**Maintained by:** + +This image is created, maintained and supported by the Dataverse community on a best-effort basis. + +**Where to find documentation:** + +The [Dataverse Container Guide - Application Image](https://guides.dataverse.org/en/latest/container/app-image.html) +provides in-depth information about content, building, tuning and so on for this image. You should also consult +the [Dataverse Container Guide - Base Image](https://guides.dataverse.org/en/latest/container/base-image.html) page +for more details on tunable settings, locations, etc. + +**Where to get help and ask questions:** + +IQSS will not offer support on how to deploy or run it. Please reach out to the community for help on using it. +You can join the Community Chat on Matrix at https://chat.dataverse.org and https://groups.google.com/g/dataverse-community +to ask for help and guidance. + +## Supported Image Tags + +This image is sourced within the main upstream code [repository of the Dataverse software](https://github.com/IQSS/dataverse). +Development and maintenance of the [image's code](https://github.com/IQSS/dataverse/tree/develop/src/main/docker) +happens there (again, by the community). Community-supported image tags are based on the two most important branches: + +- The `unstable` tag corresponds to the `develop` branch, where pull requests are merged. + ([`Dockerfile`](https://github.com/IQSS/dataverse/tree/develop/src/main/docker/Dockerfile)) +- The `alpha` tag corresponds to the `master` branch, where releases are cut from. + ([`Dockerfile`](https://github.com/IQSS/dataverse/tree/master/src/main/docker/Dockerfile)) + +Within the main repository, you may find the application image files at `/src/main/docker`. +This Maven module uses the [Maven Docker Plugin](https://dmp.fabric8.io) to build and ship the image. +You may use, extend, or alter this image to your liking and/or host in some different registry if you want to. + +**Supported architectures:** This image is created as a "multi-arch image", supporting the most common architectures +Dataverse usually runs on: AMD64 (Windows/Linux/...) and ARM64 (Apple M1/M2). + +## License + +Image content created by the community is licensed under [Apache License, Version 2.0](https://www.apache.org/licenses/LICENSE-2.0), +like the [main Dataverse project](https://github.com/IQSS/dataverse/blob/develop/LICENSE.md). + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and limitations under the License. + +As with all Docker images, all images likely also contain other software which may be under other licenses (such as +[Payara Server](https://github.com/payara/Payara/blob/master/LICENSE.txt), Bash, etc., from the base +distribution, along with any direct or indirect (Java) dependencies contained). + +As for any pre-built image usage, it is the image user's responsibility to ensure that any use of this image complies +with any relevant licenses for all software contained within. diff --git a/src/main/docker/assembly.xml b/src/main/docker/assembly.xml new file mode 100644 index 00000000000..9f9b39617a3 --- /dev/null +++ b/src/main/docker/assembly.xml @@ -0,0 +1,28 @@ + + + + + target/${project.artifactId}-${project.version} + app + + WEB-INF/lib/**/* + + + + + target/${project.artifactId}-${project.version}/WEB-INF/lib + deps + + + + conf/jhove + supplements + + + + src/main/docker/scripts + scripts + + + \ No newline at end of file diff --git a/src/main/docker/scripts/init_2_configure.sh b/src/main/docker/scripts/init_2_configure.sh new file mode 100755 index 00000000000..a98f08088c1 --- /dev/null +++ b/src/main/docker/scripts/init_2_configure.sh @@ -0,0 +1,64 @@ +#!/bin/bash +################################################################################ +# Configure Payara +# +# BEWARE: As this is done for Kubernetes, we will ALWAYS start with a fresh container! +# When moving to Payara 5+ the option commands are idempotent. +# The resources are to be created by the application on deployment, +# once Dataverse has proper refactoring, etc. +################################################################################ + +# Fail on any error +set -euo pipefail + +# Include some sane defaults (which are currently not settable via MicroProfile Config). +# This is an ugly hack and shall be removed once #7000 is resolved. +export dataverse_auth_password__reset__timeout__in__minutes="${dataverse_auth_password__reset__timeout__in__minutes:-60}" +export dataverse_timerServer="${dataverse_timerServer:-true}" +export dataverse_files_storage__driver__id="${dataverse_files_storage__driver__id:-local}" +if [ "${dataverse_files_storage__driver__id}" = "local" ]; then + export dataverse_files_local_type="${dataverse_files_local_type:-file}" + export dataverse_files_local_label="${dataverse_files_local_label:-Local}" + export dataverse_files_local_directory="${dataverse_files_local_directory:-${STORAGE_DIR}/store}" +fi + +# 0. Define postboot commands file to be read by Payara and clear it +DV_POSTBOOT=${PAYARA_DIR}/dataverse_postboot +echo "# Dataverse postboot configuration for Payara" > "${DV_POSTBOOT}" + +# 2. Domain-spaced resources (JDBC, JMS, ...) +# TODO: This is ugly and dirty. It should be replaced with resources from +# EE 8 code annotations or at least glassfish-resources.xml +# NOTE: postboot commands is not multi-line capable, thus spaghetti needed. + +# JavaMail +echo "INFO: Defining JavaMail." +echo "create-javamail-resource --mailhost=${DATAVERSE_MAIL_HOST:-smtp} --mailuser=${DATAVERSE_MAIL_USER:-dataversenotify} --fromaddress=${DATAVERSE_MAIL_FROM:-dataverse@localhost} mail/notifyMailSession" >> "${DV_POSTBOOT}" + +# 3. Domain based configuration options +# Set Dataverse environment variables +echo "INFO: Defining system properties for Dataverse configuration options." +#env | grep -Ee "^(dataverse|doi)_" | sort -fd +env -0 | grep -z -Ee "^(dataverse|doi)_" | while IFS='=' read -r -d '' k v; do + # transform __ to - + # shellcheck disable=SC2001 + KEY=$(echo "${k}" | sed -e "s#__#-#g") + # transform remaining single _ to . + KEY=$(echo "${KEY}" | tr '_' '.') + + # escape colons in values + # shellcheck disable=SC2001 + v=$(echo "${v}" | sed -e 's/:/\\\:/g') + + echo "DEBUG: Handling ${KEY}=${v}." + echo "create-system-properties ${KEY}=${v}" >> "${DV_POSTBOOT}" +done + +# 4. Add the commands to the existing postboot file, but insert BEFORE deployment +TMPFILE=$(mktemp) +cat "${DV_POSTBOOT}" "${POSTBOOT_COMMANDS}" > "${TMPFILE}" && mv "${TMPFILE}" "${POSTBOOT_COMMANDS}" +echo "DEBUG: postboot contains the following commands:" +echo "--------------------------------------------------" +cat "${POSTBOOT_COMMANDS}" +echo "--------------------------------------------------" + diff --git a/src/main/java/edu/harvard/iq/dataverse/AbstractGlobalIdServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/AbstractGlobalIdServiceBean.java index f6cbd01ece0..f1bfc3e290b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/AbstractGlobalIdServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/AbstractGlobalIdServiceBean.java @@ -3,11 +3,13 @@ import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.SystemConfig; import java.io.InputStream; - -import javax.ejb.EJB; +import jakarta.ejb.EJB; +import jakarta.inject.Inject; import java.util.*; import java.util.logging.Level; import java.util.logging.Logger; + +import org.apache.commons.lang3.RandomStringUtils; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; @@ -17,27 +19,21 @@ public abstract class AbstractGlobalIdServiceBean implements GlobalIdServiceBean private static final Logger logger = Logger.getLogger(AbstractGlobalIdServiceBean.class.getCanonicalName()); - @EJB + @Inject DataverseServiceBean dataverseService; @EJB + protected SettingsServiceBean settingsService; - @EJB - EjbDataverseEngine commandEngine; - @EJB - DatasetServiceBean datasetService; - @EJB - DataFileServiceBean datafileService; - @EJB + @Inject + protected + DvObjectServiceBean dvObjectService; + @Inject SystemConfig systemConfig; + + protected Boolean configured = null; public static String UNAVAILABLE = ":unav"; - @Override - public String getIdentifierForLookup(String protocol, String authority, String identifier) { - logger.log(Level.FINE,"getIdentifierForLookup"); - return protocol + ":" + authority + "/" + identifier; - } - @Override public Map getMetadataForCreateIndicator(DvObject dvObjectIn) { logger.log(Level.FINE,"getMetadataForCreateIndicator(DvObject)"); @@ -101,14 +97,10 @@ protected String getTargetUrl(DvObject dvObjectIn) { @Override public String getIdentifier(DvObject dvObject) { - return dvObject.getGlobalId().asString(); + GlobalId gid = dvObject.getGlobalId(); + return gid != null ? gid.asString() : null; } - protected String getTargetUrl(Dataset datasetIn) { - logger.log(Level.FINE,"getTargetUrl"); - return systemConfig.getDataverseSiteUrl() + Dataset.TARGET_URL + datasetIn.getGlobalIdString(); - } - protected String generateYear (DvObject dvObjectIn){ return dvObjectIn.getYearPublishedCreated(); } @@ -120,16 +112,41 @@ public Map getMetadataForTargetURL(DvObject dvObject) { return metadata; } + @Override + public boolean alreadyRegistered(DvObject dvo) throws Exception { + if(dvo==null) { + logger.severe("Null DvObject sent to alreadyRegistered()."); + return false; + } + GlobalId globalId = dvo.getGlobalId(); + if(globalId == null) { + return false; + } + return alreadyRegistered(globalId, false); + } + + public abstract boolean alreadyRegistered(GlobalId globalId, boolean noProviderDefault) throws Exception; + + /* + * ToDo: the DvObject being sent in provides partial support for the case where + * it has a different authority/protocol than what is configured (i.e. a legacy + * Pid that can actually be updated by the Pid account being used.) Removing + * this now would potentially break/make it harder to handle that case prior to + * support for configuring multiple Pid providers. Once that exists, it would be + * cleaner to always find the PidProvider associated with the + * protocol/authority/shoulder of the current dataset and then not pass the + * DvObject as a param. (This would also remove calls to get the settings since + * that would be done at construction.) + */ @Override public DvObject generateIdentifier(DvObject dvObject) { String protocol = dvObject.getProtocol() == null ? settingsService.getValueForKey(SettingsServiceBean.Key.Protocol) : dvObject.getProtocol(); String authority = dvObject.getAuthority() == null ? settingsService.getValueForKey(SettingsServiceBean.Key.Authority) : dvObject.getAuthority(); - GlobalIdServiceBean idServiceBean = GlobalIdServiceBean.getBean(protocol, commandEngine.getContext()); if (dvObject.isInstanceofDataset()) { - dvObject.setIdentifier(datasetService.generateDatasetIdentifier((Dataset) dvObject, idServiceBean)); + dvObject.setIdentifier(generateDatasetIdentifier((Dataset) dvObject)); } else { - dvObject.setIdentifier(datafileService.generateDataFileIdentifier((DataFile) dvObject, idServiceBean)); + dvObject.setIdentifier(generateDataFileIdentifier((DataFile) dvObject)); } if (dvObject.getProtocol() == null) { dvObject.setProtocol(protocol); @@ -140,6 +157,232 @@ public DvObject generateIdentifier(DvObject dvObject) { return dvObject; } + //ToDo just send the DvObject.DType + public String generateDatasetIdentifier(Dataset dataset) { + //ToDo - track these in the bean + String identifierType = settingsService.getValueForKey(SettingsServiceBean.Key.IdentifierGenerationStyle, "randomString"); + String shoulder = settingsService.getValueForKey(SettingsServiceBean.Key.Shoulder, ""); + + switch (identifierType) { + case "randomString": + return generateIdentifierAsRandomString(dataset, shoulder); + case "storedProcGenerated": + return generateIdentifierFromStoredProcedureIndependent(dataset, shoulder); + default: + /* Should we throw an exception instead?? -- L.A. 4.6.2 */ + return generateIdentifierAsRandomString(dataset, shoulder); + } + } + + + /** + * Check that a identifier entered by the user is unique (not currently used + * for any other study in this Dataverse Network) also check for duplicate + * in EZID if needed + * @param userIdentifier + * @param dataset + * @return {@code true} if the identifier is unique, {@code false} otherwise. + */ + public boolean isGlobalIdUnique(GlobalId globalId) { + if ( ! dvObjectService.isGlobalIdLocallyUnique(globalId) ) { + return false; // duplication found in local database + } + + // not in local DB, look in the persistent identifier service + try { + return ! alreadyRegistered(globalId, false); + } catch (Exception e){ + //we can live with failure - means identifier not found remotely + } + + return true; + } + + /** + * Parse a Persistent Id and set the protocol, authority, and identifier + * + * Example 1: doi:10.5072/FK2/BYM3IW + * protocol: doi + * authority: 10.5072 + * identifier: FK2/BYM3IW + * + * Example 2: hdl:1902.1/111012 + * protocol: hdl + * authority: 1902.1 + * identifier: 111012 + * + * @param identifierString + * @param separator the string that separates the authority from the identifier. + * @param destination the global id that will contain the parsed data. + * @return {@code destination}, after its fields have been updated, or + * {@code null} if parsing failed. + */ + @Override + public GlobalId parsePersistentId(String fullIdentifierString) { + if(!isConfigured()) { + return null; + } + // Occasionally, the protocol separator character ':' comes in still + // URL-encoded as %3A (usually as a result of the URL having been + // encoded twice): + fullIdentifierString = fullIdentifierString.replace("%3A", ":"); + + int index1 = fullIdentifierString.indexOf(':'); + if (index1 > 0) { // ':' found with one or more characters before it + String protocol = fullIdentifierString.substring(0, index1); + GlobalId globalId = parsePersistentId(protocol, fullIdentifierString.substring(index1+1)); + return globalId; + } + logger.log(Level.INFO, "Error parsing identifier: {0}: '':'' not found in string", fullIdentifierString); + return null; + } + + protected GlobalId parsePersistentId(String protocol, String identifierString) { + if(!isConfigured()) { + return null; + } + String authority; + String identifier; + if (identifierString == null) { + return null; + } + int index = identifierString.indexOf('/'); + if (index > 0 && (index + 1) < identifierString.length()) { + // '/' found with one or more characters + // before and after it + // Strip any whitespace, ; and ' from authority (should finding them cause a + // failure instead?) + authority = GlobalIdServiceBean.formatIdentifierString(identifierString.substring(0, index)); + if (GlobalIdServiceBean.testforNullTerminator(authority)) { + return null; + } + identifier = GlobalIdServiceBean.formatIdentifierString(identifierString.substring(index + 1)); + if (GlobalIdServiceBean.testforNullTerminator(identifier)) { + return null; + } + } else { + logger.log(Level.INFO, "Error parsing identifier: {0}: '':/'' not found in string", + identifierString); + return null; + } + return parsePersistentId(protocol, authority, identifier); + } + + public GlobalId parsePersistentId(String protocol, String authority, String identifier) { + if(!isConfigured()) { + return null; + } + logger.fine("Parsing: " + protocol + ":" + authority + getSeparator() + identifier + " in " + getProviderInformation().get(0)); + if(!GlobalIdServiceBean.isValidGlobalId(protocol, authority, identifier)) { + return null; + } + return new GlobalId(protocol, authority, identifier, getSeparator(), getUrlPrefix(), + getProviderInformation().get(0)); + } + + + public String getSeparator() { + //The standard default + return "/"; + } + + @Override + public String generateDataFileIdentifier(DataFile datafile) { + String doiIdentifierType = settingsService.getValueForKey(SettingsServiceBean.Key.IdentifierGenerationStyle, "randomString"); + String doiDataFileFormat = settingsService.getValueForKey(SettingsServiceBean.Key.DataFilePIDFormat, SystemConfig.DataFilePIDFormat.DEPENDENT.toString()); + + String prepend = ""; + if (doiDataFileFormat.equals(SystemConfig.DataFilePIDFormat.DEPENDENT.toString())){ + //If format is dependent then pre-pend the dataset identifier + prepend = datafile.getOwner().getIdentifier() + "/"; + datafile.setProtocol(datafile.getOwner().getProtocol()); + datafile.setAuthority(datafile.getOwner().getAuthority()); + } else { + //If there's a shoulder prepend independent identifiers with it + prepend = settingsService.getValueForKey(SettingsServiceBean.Key.Shoulder, ""); + datafile.setProtocol(settingsService.getValueForKey(SettingsServiceBean.Key.Protocol)); + datafile.setAuthority(settingsService.getValueForKey(SettingsServiceBean.Key.Authority)); + } + + switch (doiIdentifierType) { + case "randomString": + return generateIdentifierAsRandomString(datafile, prepend); + case "storedProcGenerated": + if (doiDataFileFormat.equals(SystemConfig.DataFilePIDFormat.INDEPENDENT.toString())){ + return generateIdentifierFromStoredProcedureIndependent(datafile, prepend); + } else { + return generateIdentifierFromStoredProcedureDependent(datafile, prepend); + } + default: + /* Should we throw an exception instead?? -- L.A. 4.6.2 */ + return generateIdentifierAsRandomString(datafile, prepend); + } + } + + + /* + * This method checks locally for a DvObject with the same PID and if that is OK, checks with the PID service. + * @param dvo - the object to check (ToDo - get protocol/authority from this PidProvider object) + * @param prepend - for Datasets, this is always the shoulder, for DataFiles, it could be the shoulder or the parent Dataset identifier + */ + private String generateIdentifierAsRandomString(DvObject dvo, String prepend) { + String identifier = null; + do { + identifier = prepend + RandomStringUtils.randomAlphanumeric(6).toUpperCase(); + } while (!isGlobalIdUnique(new GlobalId(dvo.getProtocol(), dvo.getAuthority(), identifier, this.getSeparator(), this.getUrlPrefix(), this.getProviderInformation().get(0)))); + + return identifier; + } + + /* + * This method checks locally for a DvObject with the same PID and if that is OK, checks with the PID service. + * @param dvo - the object to check (ToDo - get protocol/authority from this PidProvider object) + * @param prepend - for Datasets, this is always the shoulder, for DataFiles, it could be the shoulder or the parent Dataset identifier + */ + + private String generateIdentifierFromStoredProcedureIndependent(DvObject dvo, String prepend) { + String identifier; + do { + String identifierFromStoredProcedure = dvObjectService.generateNewIdentifierByStoredProcedure(); + // some diagnostics here maybe - is it possible to determine that it's failing + // because the stored procedure hasn't been created in the database? + if (identifierFromStoredProcedure == null) { + return null; + } + identifier = prepend + identifierFromStoredProcedure; + } while (!isGlobalIdUnique(new GlobalId(dvo.getProtocol(), dvo.getAuthority(), identifier, this.getSeparator(), this.getUrlPrefix(), this.getProviderInformation().get(0)))); + + return identifier; + } + + /*This method is only used for DataFiles with DEPENDENT Pids. It is not for Datasets + * + */ + private String generateIdentifierFromStoredProcedureDependent(DataFile datafile, String prepend) { + String identifier; + Long retVal; + retVal = Long.valueOf(0L); + //ToDo - replace loops with one lookup for largest entry? (the do loop runs ~n**2/2 calls). The check for existingIdentifiers means this is mostly a local loop now, versus involving db or PidProvider calls, but still...) + + // This will catch identifiers already assigned in the current transaction (e.g. + // in FinalizeDatasetPublicationCommand) that haven't been committed to the db + // without having to make a call to the PIDProvider + Set existingIdentifiers = new HashSet(); + List files = datafile.getOwner().getFiles(); + for(DataFile f:files) { + existingIdentifiers.add(f.getIdentifier()); + } + + do { + retVal++; + identifier = prepend + retVal.toString(); + + } while (existingIdentifiers.contains(identifier) || !isGlobalIdUnique(new GlobalId(datafile.getProtocol(), datafile.getAuthority(), identifier, this.getSeparator(), this.getUrlPrefix(), this.getProviderInformation().get(0)))); + + return identifier; + } + + class GlobalIdMetadataTemplate { @@ -159,7 +402,6 @@ public GlobalIdMetadataTemplate(){ private String xmlMetadata; private String identifier; - private String datasetIdentifier; private List datafileIdentifiers; private List creators; private String title; @@ -245,7 +487,7 @@ public String generateXML(DvObject dvObject) { // Added to prevent a NullPointerException when trying to destroy datasets when using DataCite rather than EZID. publisherYearFinal = this.publisherYear; } - xmlMetadata = template.replace("${identifier}", this.identifier.trim()) + xmlMetadata = template.replace("${identifier}", getIdentifier().trim()) .replace("${title}", this.title) .replace("${publisher}", this.publisher) .replace("${publisherYear}", publisherYearFinal) @@ -371,10 +613,6 @@ public void setIdentifier(String identifier) { this.identifier = identifier; } - public void setDatasetIdentifier(String datasetIdentifier) { - this.datasetIdentifier = datasetIdentifier; - } - public List getCreators() { return creators; } @@ -428,10 +666,6 @@ public String getMetadataFromDvObject(String identifier, Map met DataFile df = (DataFile) dvObject; String fileDescription = df.getDescription(); metadataTemplate.setDescription(fileDescription == null ? "" : fileDescription); - String datasetPid = df.getOwner().getGlobalId().asString(); - metadataTemplate.setDatasetIdentifier(datasetPid); - } else { - metadataTemplate.setDatasetIdentifier(""); } metadataTemplate.setContacts(dataset.getLatestVersion().getDatasetContacts()); @@ -448,5 +682,19 @@ public String getMetadataFromDvObject(String identifier, Map met logger.log(Level.FINE, "XML to send to DataCite: {0}", xmlMetadata); return xmlMetadata; } + + @Override + public boolean canManagePID() { + //The default expectation is that PID providers are configured to manage some set (i.e. based on protocol/authority/shoulder) of PIDs + return true; + } + @Override + public boolean isConfigured() { + if(configured==null) { + return false; + } else { + return configured.booleanValue(); + } + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/AlternativePersistentIdentifier.java b/src/main/java/edu/harvard/iq/dataverse/AlternativePersistentIdentifier.java index 6fc7262925a..db3c6029a78 100644 --- a/src/main/java/edu/harvard/iq/dataverse/AlternativePersistentIdentifier.java +++ b/src/main/java/edu/harvard/iq/dataverse/AlternativePersistentIdentifier.java @@ -3,14 +3,14 @@ import java.io.Serializable; import java.util.Date; -import javax.persistence.Entity; -import javax.persistence.GeneratedValue; -import javax.persistence.GenerationType; -import javax.persistence.Id; -import javax.persistence.JoinColumn; -import javax.persistence.ManyToOne; -import javax.persistence.Temporal; -import javax.persistence.TemporalType; +import jakarta.persistence.Entity; +import jakarta.persistence.GeneratedValue; +import jakarta.persistence.GenerationType; +import jakarta.persistence.Id; +import jakarta.persistence.JoinColumn; +import jakarta.persistence.ManyToOne; +import jakarta.persistence.Temporal; +import jakarta.persistence.TemporalType; /** * diff --git a/src/main/java/edu/harvard/iq/dataverse/ApiTokenPage.java b/src/main/java/edu/harvard/iq/dataverse/ApiTokenPage.java index 4838847e400..16ff4d266d8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ApiTokenPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/ApiTokenPage.java @@ -5,14 +5,14 @@ import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.api.Util; -import java.sql.Timestamp; + import java.util.ArrayList; import java.util.List; import java.util.logging.Logger; -import javax.ejb.EJB; -import javax.faces.view.ViewScoped; -import javax.inject.Inject; -import javax.inject.Named; +import jakarta.ejb.EJB; +import jakarta.faces.view.ViewScoped; +import jakarta.inject.Inject; +import jakarta.inject.Named; /** * @todo Rename this to ApiTokenFragment? The separate page is being taken out diff --git a/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFile.java b/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFile.java index a7a89934f47..d03ebbc6f7b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFile.java +++ b/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFile.java @@ -4,16 +4,16 @@ import edu.harvard.iq.dataverse.util.BundleUtil; import java.io.Serializable; import java.util.MissingResourceException; -import javax.persistence.Entity; -import javax.persistence.GeneratedValue; -import javax.persistence.GenerationType; -import javax.persistence.Id; -import javax.persistence.JoinColumn; -import javax.persistence.ManyToOne; -import javax.persistence.NamedNativeQueries; -import javax.persistence.NamedNativeQuery; -import javax.persistence.NamedQueries; -import javax.persistence.NamedQuery; +import jakarta.persistence.Entity; +import jakarta.persistence.GeneratedValue; +import jakarta.persistence.GenerationType; +import jakarta.persistence.Id; +import jakarta.persistence.JoinColumn; +import jakarta.persistence.ManyToOne; +import jakarta.persistence.NamedNativeQueries; +import jakarta.persistence.NamedNativeQuery; +import jakarta.persistence.NamedQueries; +import jakarta.persistence.NamedQuery; /** * @@ -55,7 +55,10 @@ public class AuxiliaryFile implements Serializable { private String formatTag; private String formatVersion; - + + /** + * The application/entity that created the auxiliary file. + */ private String origin; private boolean isPublic; diff --git a/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java index 76c91382868..8c96f98ce39 100644 --- a/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java @@ -14,19 +14,19 @@ import java.util.ArrayList; import java.util.List; import java.util.logging.Logger; -import javax.ejb.EJB; -import javax.ejb.Stateless; -import javax.inject.Named; -import javax.persistence.EntityManager; -import javax.persistence.NoResultException; -import javax.persistence.PersistenceContext; -import javax.persistence.Query; -import javax.persistence.TypedQuery; -import javax.ws.rs.ClientErrorException; -import javax.ws.rs.InternalServerErrorException; -import javax.ws.rs.ServerErrorException; -import javax.ws.rs.core.MediaType; -import javax.ws.rs.core.Response; +import jakarta.ejb.EJB; +import jakarta.ejb.Stateless; +import jakarta.inject.Named; +import jakarta.persistence.EntityManager; +import jakarta.persistence.NoResultException; +import jakarta.persistence.PersistenceContext; +import jakarta.persistence.Query; +import jakarta.persistence.TypedQuery; +import jakarta.ws.rs.ClientErrorException; +import jakarta.ws.rs.InternalServerErrorException; +import jakarta.ws.rs.ServerErrorException; +import jakarta.ws.rs.core.MediaType; +import jakarta.ws.rs.core.Response; import org.apache.tika.Tika; @@ -70,9 +70,13 @@ public AuxiliaryFile save(AuxiliaryFile auxiliaryFile) { * @param type how to group the files such as "DP" for "Differentially * @param mediaType user supplied content type (MIME type) * Private Statistics". - * @return success boolean - returns whether the save was successful + * @param save boolean - true to save immediately, false to let the cascade + * do persist to the database. + * @return an AuxiliaryFile with an id when save=true (assuming no + * exceptions) or an AuxiliaryFile without an id that will be persisted + * later through the cascade. */ - public AuxiliaryFile processAuxiliaryFile(InputStream fileInputStream, DataFile dataFile, String formatTag, String formatVersion, String origin, boolean isPublic, String type, MediaType mediaType) { + public AuxiliaryFile processAuxiliaryFile(InputStream fileInputStream, DataFile dataFile, String formatTag, String formatVersion, String origin, boolean isPublic, String type, MediaType mediaType, boolean save) { StorageIO storageIO = null; AuxiliaryFile auxFile = new AuxiliaryFile(); @@ -114,7 +118,14 @@ public AuxiliaryFile processAuxiliaryFile(InputStream fileInputStream, DataFile auxFile.setType(type); auxFile.setDataFile(dataFile); auxFile.setFileSize(storageIO.getAuxObjectSize(auxExtension)); - auxFile = save(auxFile); + if (save) { + auxFile = save(auxFile); + } else { + if (dataFile.getAuxiliaryFiles() == null) { + dataFile.setAuxiliaryFiles(new ArrayList<>()); + } + dataFile.getAuxiliaryFiles().add(auxFile); + } } catch (IOException ioex) { logger.severe("IO Exception trying to save auxiliary file: " + ioex.getMessage()); throw new InternalServerErrorException(); @@ -129,7 +140,11 @@ public AuxiliaryFile processAuxiliaryFile(InputStream fileInputStream, DataFile } return auxFile; } - + + public AuxiliaryFile processAuxiliaryFile(InputStream fileInputStream, DataFile dataFile, String formatTag, String formatVersion, String origin, boolean isPublic, String type, MediaType mediaType) { + return processAuxiliaryFile(fileInputStream, dataFile, formatTag, formatVersion, origin, isPublic, type, mediaType, true); + } + public AuxiliaryFile lookupAuxiliaryFile(DataFile dataFile, String formatTag, String formatVersion) { Query query = em.createNamedQuery("AuxiliaryFile.lookupAuxiliaryFile"); diff --git a/src/main/java/edu/harvard/iq/dataverse/BannerMessage.java b/src/main/java/edu/harvard/iq/dataverse/BannerMessage.java index 4f465168580..214e26965fa 100644 --- a/src/main/java/edu/harvard/iq/dataverse/BannerMessage.java +++ b/src/main/java/edu/harvard/iq/dataverse/BannerMessage.java @@ -4,13 +4,13 @@ import edu.harvard.iq.dataverse.util.BundleUtil; import java.io.Serializable; import java.util.Collection; -import javax.persistence.CascadeType; -import javax.persistence.Column; -import javax.persistence.Entity; -import javax.persistence.GeneratedValue; -import javax.persistence.GenerationType; -import javax.persistence.Id; -import javax.persistence.OneToMany; +import jakarta.persistence.CascadeType; +import jakarta.persistence.Column; +import jakarta.persistence.Entity; +import jakarta.persistence.GeneratedValue; +import jakarta.persistence.GenerationType; +import jakarta.persistence.Id; +import jakarta.persistence.OneToMany; /** diff --git a/src/main/java/edu/harvard/iq/dataverse/BannerMessageServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/BannerMessageServiceBean.java index 91b4128c545..0e757998d58 100644 --- a/src/main/java/edu/harvard/iq/dataverse/BannerMessageServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/BannerMessageServiceBean.java @@ -10,10 +10,10 @@ import java.util.Date; import java.util.List; import java.util.logging.Logger; -import javax.ejb.Stateless; -import javax.inject.Named; -import javax.persistence.EntityManager; -import javax.persistence.PersistenceContext; +import jakarta.ejb.Stateless; +import jakarta.inject.Named; +import jakarta.persistence.EntityManager; +import jakarta.persistence.PersistenceContext; /** * diff --git a/src/main/java/edu/harvard/iq/dataverse/BannerMessageText.java b/src/main/java/edu/harvard/iq/dataverse/BannerMessageText.java index dbae9a6dc27..ea2dd1b41fc 100644 --- a/src/main/java/edu/harvard/iq/dataverse/BannerMessageText.java +++ b/src/main/java/edu/harvard/iq/dataverse/BannerMessageText.java @@ -6,13 +6,13 @@ package edu.harvard.iq.dataverse; import java.io.Serializable; -import javax.persistence.Column; -import javax.persistence.Entity; -import javax.persistence.GeneratedValue; -import javax.persistence.GenerationType; -import javax.persistence.Id; -import javax.persistence.JoinColumn; -import javax.persistence.ManyToOne; +import jakarta.persistence.Column; +import jakarta.persistence.Entity; +import jakarta.persistence.GeneratedValue; +import jakarta.persistence.GenerationType; +import jakarta.persistence.Id; +import jakarta.persistence.JoinColumn; +import jakarta.persistence.ManyToOne; /** * diff --git a/src/main/java/edu/harvard/iq/dataverse/CitationServlet.java b/src/main/java/edu/harvard/iq/dataverse/CitationServlet.java index 2b342b09610..68c8d49ad7e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/CitationServlet.java +++ b/src/main/java/edu/harvard/iq/dataverse/CitationServlet.java @@ -5,14 +5,15 @@ */ package edu.harvard.iq.dataverse; -import edu.harvard.iq.dataverse.util.StringUtil; +import edu.harvard.iq.dataverse.pidproviders.PidUtil; + import java.io.IOException; -import java.io.PrintWriter; -import javax.ejb.EJB; -import javax.servlet.ServletException; -import javax.servlet.http.HttpServlet; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; + +import jakarta.ejb.EJB; +import jakarta.servlet.ServletException; +import jakarta.servlet.http.HttpServlet; +import jakarta.servlet.http.HttpServletRequest; +import jakarta.servlet.http.HttpServletResponse; /** * @@ -21,7 +22,7 @@ public class CitationServlet extends HttpServlet { @EJB - DatasetServiceBean datasetService; + DvObjectServiceBean dvObjectService; /** * Processes requests for both HTTP GET and POST @@ -37,10 +38,14 @@ protected void processRequest(HttpServletRequest request, HttpServletResponse re String persistentId = request.getParameter("persistentId"); if (persistentId != null) { - Dataset ds = datasetService.findByGlobalId(persistentId); - if (ds != null) { - response.sendRedirect("dataset.xhtml?persistentId=" + persistentId); - return; + DvObject dob = dvObjectService.findByGlobalId(PidUtil.parseAsGlobalID(persistentId)); + if (dob != null) { + if (dob instanceof Dataset) { + response.sendRedirect("dataset.xhtml?persistentId=" + persistentId); + } else if (dob instanceof DataFile) { + response.sendRedirect("file.xhtml?persistentId=" + persistentId); + } + return; } } response.sendError(HttpServletResponse.SC_NOT_FOUND); diff --git a/src/main/java/edu/harvard/iq/dataverse/ConfigureFragmentBean.java b/src/main/java/edu/harvard/iq/dataverse/ConfigureFragmentBean.java index d51a73fd2dc..bf509c33995 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ConfigureFragmentBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/ConfigureFragmentBean.java @@ -16,10 +16,10 @@ import java.sql.Timestamp; import java.util.logging.Logger; -import javax.ejb.EJB; -import javax.faces.view.ViewScoped; -import javax.inject.Inject; -import javax.inject.Named; +import jakarta.ejb.EJB; +import jakarta.faces.view.ViewScoped; +import jakarta.inject.Inject; +import jakarta.inject.Named; import java.util.Date; diff --git a/src/main/java/edu/harvard/iq/dataverse/ControlledVocabAlternate.java b/src/main/java/edu/harvard/iq/dataverse/ControlledVocabAlternate.java index 5d5d9597746..9542cfe3f71 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ControlledVocabAlternate.java +++ b/src/main/java/edu/harvard/iq/dataverse/ControlledVocabAlternate.java @@ -7,15 +7,15 @@ import java.io.Serializable; import java.util.Objects; -import javax.persistence.Column; -import javax.persistence.Entity; -import javax.persistence.GeneratedValue; -import javax.persistence.GenerationType; -import javax.persistence.Id; -import javax.persistence.Index; -import javax.persistence.JoinColumn; -import javax.persistence.ManyToOne; -import javax.persistence.Table; +import jakarta.persistence.Column; +import jakarta.persistence.Entity; +import jakarta.persistence.GeneratedValue; +import jakarta.persistence.GenerationType; +import jakarta.persistence.Id; +import jakarta.persistence.Index; +import jakarta.persistence.JoinColumn; +import jakarta.persistence.ManyToOne; +import jakarta.persistence.Table; /** * diff --git a/src/main/java/edu/harvard/iq/dataverse/ControlledVocabularyValue.java b/src/main/java/edu/harvard/iq/dataverse/ControlledVocabularyValue.java index 181d939f4a1..5dcce98a90f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ControlledVocabularyValue.java +++ b/src/main/java/edu/harvard/iq/dataverse/ControlledVocabularyValue.java @@ -17,16 +17,16 @@ import java.util.Objects; import java.util.logging.Logger; import java.util.MissingResourceException; -import javax.persistence.CascadeType; -import javax.persistence.Column; -import javax.persistence.Entity; -import javax.persistence.GeneratedValue; -import javax.persistence.GenerationType; -import javax.persistence.Id; -import javax.persistence.Index; -import javax.persistence.ManyToOne; -import javax.persistence.OneToMany; -import javax.persistence.Table; +import jakarta.persistence.CascadeType; +import jakarta.persistence.Column; +import jakarta.persistence.Entity; +import jakarta.persistence.GeneratedValue; +import jakarta.persistence.GenerationType; +import jakarta.persistence.Id; +import jakarta.persistence.Index; +import jakarta.persistence.ManyToOne; +import jakarta.persistence.OneToMany; +import jakarta.persistence.Table; /** * diff --git a/src/main/java/edu/harvard/iq/dataverse/ControlledVocabularyValueConverter.java b/src/main/java/edu/harvard/iq/dataverse/ControlledVocabularyValueConverter.java index 1d530e136ba..eadc13721b3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ControlledVocabularyValueConverter.java +++ b/src/main/java/edu/harvard/iq/dataverse/ControlledVocabularyValueConverter.java @@ -5,13 +5,13 @@ */ package edu.harvard.iq.dataverse; -import javax.ejb.EJB; -import javax.enterprise.inject.spi.CDI; +import jakarta.ejb.EJB; +import jakarta.enterprise.inject.spi.CDI; -import javax.faces.component.UIComponent; -import javax.faces.context.FacesContext; -import javax.faces.convert.Converter; -import javax.faces.convert.FacesConverter; +import jakarta.faces.component.UIComponent; +import jakarta.faces.context.FacesContext; +import jakarta.faces.convert.Converter; +import jakarta.faces.convert.FacesConverter; /** * diff --git a/src/main/java/edu/harvard/iq/dataverse/ControlledVocabularyValueServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/ControlledVocabularyValueServiceBean.java index 0e9501414d0..4255c3b2dbc 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ControlledVocabularyValueServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/ControlledVocabularyValueServiceBean.java @@ -6,11 +6,11 @@ package edu.harvard.iq.dataverse; import java.util.List; -import javax.ejb.Stateless; -import javax.inject.Named; -import javax.persistence.EntityManager; -import javax.persistence.PersistenceContext; -import javax.persistence.TypedQuery; +import jakarta.ejb.Stateless; +import jakarta.inject.Named; +import jakarta.persistence.EntityManager; +import jakarta.persistence.PersistenceContext; +import jakarta.persistence.TypedQuery; /** * diff --git a/src/main/java/edu/harvard/iq/dataverse/CustomQuestion.java b/src/main/java/edu/harvard/iq/dataverse/CustomQuestion.java index 64723fff79a..2cb6f27c3e4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/CustomQuestion.java +++ b/src/main/java/edu/harvard/iq/dataverse/CustomQuestion.java @@ -1,7 +1,7 @@ package edu.harvard.iq.dataverse; import java.io.Serializable; import java.util.List; -import javax.persistence.*; +import jakarta.persistence.*; import org.hibernate.validator.constraints.NotBlank; /** diff --git a/src/main/java/edu/harvard/iq/dataverse/CustomQuestionResponse.java b/src/main/java/edu/harvard/iq/dataverse/CustomQuestionResponse.java index 32af06014a7..f19ee3c3fc7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/CustomQuestionResponse.java +++ b/src/main/java/edu/harvard/iq/dataverse/CustomQuestionResponse.java @@ -7,8 +7,8 @@ import java.io.Serializable; import java.util.List; -import javax.faces.model.SelectItem; -import javax.persistence.*; +import jakarta.faces.model.SelectItem; +import jakarta.persistence.*; /** * diff --git a/src/main/java/edu/harvard/iq/dataverse/CustomQuestionValue.java b/src/main/java/edu/harvard/iq/dataverse/CustomQuestionValue.java index a5329c8b96d..f3a6b83b53f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/CustomQuestionValue.java +++ b/src/main/java/edu/harvard/iq/dataverse/CustomQuestionValue.java @@ -1,7 +1,7 @@ package edu.harvard.iq.dataverse; import java.io.Serializable; -import javax.persistence.*; +import jakarta.persistence.*; import org.hibernate.validator.constraints.NotBlank; /** diff --git a/src/main/java/edu/harvard/iq/dataverse/CustomizationFilesServlet.java b/src/main/java/edu/harvard/iq/dataverse/CustomizationFilesServlet.java index 713d365ba0f..9dd524127d7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/CustomizationFilesServlet.java +++ b/src/main/java/edu/harvard/iq/dataverse/CustomizationFilesServlet.java @@ -14,13 +14,13 @@ import java.io.PrintWriter; import java.nio.file.Path; import java.nio.file.Paths; -import javax.servlet.ServletException; -import javax.servlet.annotation.WebServlet; -import javax.servlet.http.HttpServlet; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; +import jakarta.servlet.ServletException; +import jakarta.servlet.annotation.WebServlet; +import jakarta.servlet.http.HttpServlet; +import jakarta.servlet.http.HttpServletRequest; +import jakarta.servlet.http.HttpServletResponse; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; -import javax.ejb.EJB; +import jakarta.ejb.EJB; import org.apache.commons.io.IOUtils; /** diff --git a/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteRegisterCache.java b/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteRegisterCache.java index 7ccd4adb78f..7c75b1a4da6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteRegisterCache.java +++ b/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteRegisterCache.java @@ -7,14 +7,14 @@ import java.io.Serializable; -import javax.persistence.Column; -import javax.persistence.Entity; -import javax.persistence.GeneratedValue; -import javax.persistence.GenerationType; -import javax.persistence.Id; -import javax.persistence.Lob; -import javax.persistence.NamedQueries; -import javax.persistence.NamedQuery; +import jakarta.persistence.Column; +import jakarta.persistence.Entity; +import jakarta.persistence.GeneratedValue; +import jakarta.persistence.GenerationType; +import jakarta.persistence.Id; +import jakarta.persistence.Lob; +import jakarta.persistence.NamedQueries; +import jakarta.persistence.NamedQuery; import org.hibernate.validator.constraints.NotBlank; /** diff --git a/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteRegisterService.java b/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteRegisterService.java index 218e4c85474..9ecc4a3ecc9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteRegisterService.java +++ b/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteRegisterService.java @@ -18,11 +18,13 @@ import java.util.Map; import java.util.logging.Level; import java.util.logging.Logger; -import javax.ejb.EJB; -import javax.ejb.Stateless; -import javax.persistence.EntityManager; -import javax.persistence.PersistenceContext; -import javax.persistence.TypedQuery; +import jakarta.ejb.EJB; +import jakarta.ejb.Stateless; +import jakarta.persistence.EntityManager; +import jakarta.persistence.PersistenceContext; +import jakarta.persistence.TypedQuery; + +import edu.harvard.iq.dataverse.settings.JvmSettings; import org.apache.commons.text.StringEscapeUtils; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; @@ -53,7 +55,11 @@ public class DOIDataCiteRegisterService { private DataCiteRESTfullClient getClient() throws IOException { if (client == null) { - client = new DataCiteRESTfullClient(System.getProperty("doi.baseurlstring"), System.getProperty("doi.username"), System.getProperty("doi.password")); + client = new DataCiteRESTfullClient( + JvmSettings.DATACITE_MDS_API_URL.lookup(), + JvmSettings.DATACITE_USERNAME.lookup(), + JvmSettings.DATACITE_PASSWORD.lookup() + ); } return client; } @@ -546,7 +552,7 @@ private String generateRelatedIdentifiers(DvObject dvObject) { datafileIdentifiers = new ArrayList<>(); for (DataFile dataFile : dataset.getFiles()) { - if (!dataFile.getGlobalId().asString().isEmpty()) { + if (dataFile.getGlobalId() != null) { if (sb.toString().isEmpty()) { sb.append(""); } diff --git a/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteServiceBean.java index e7dd49a6926..48786b41824 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteServiceBean.java @@ -3,16 +3,17 @@ import java.io.IOException; import java.net.HttpURLConnection; import java.net.URL; -import java.util.ArrayList; import java.util.Base64; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.logging.Level; import java.util.logging.Logger; -import javax.ejb.EJB; -import javax.ejb.Stateless; +import jakarta.ejb.EJB; +import jakarta.ejb.Stateless; + +import edu.harvard.iq.dataverse.settings.JvmSettings; import org.apache.commons.httpclient.HttpException; import org.apache.commons.httpclient.HttpStatus; @@ -22,7 +23,7 @@ * @author luopc */ @Stateless -public class DOIDataCiteServiceBean extends AbstractGlobalIdServiceBean { +public class DOIDataCiteServiceBean extends DOIServiceBean { private static final Logger logger = Logger.getLogger(DOIDataCiteServiceBean.class.getCanonicalName()); @@ -34,41 +35,30 @@ public class DOIDataCiteServiceBean extends AbstractGlobalIdServiceBean { @EJB DOIDataCiteRegisterService doiDataCiteRegisterService; - public DOIDataCiteServiceBean() { - } - @Override public boolean registerWhenPublished() { return false; } - @Override - public boolean alreadyExists(DvObject dvObject) { - if(dvObject==null) { - logger.severe("Null DvObject sent to alreadyExists()."); - return false; - } - return alreadyExists(dvObject.getGlobalId()); - } + @Override - public boolean alreadyExists(GlobalId pid) { - logger.log(Level.FINE,"alreadyExists"); + public boolean alreadyRegistered(GlobalId pid, boolean noProviderDefault) { + logger.log(Level.FINE,"alreadyRegistered"); if(pid==null || pid.asString().isEmpty()) { logger.fine("No identifier sent."); return false; } - boolean alreadyExists; + boolean alreadyRegistered; String identifier = pid.asString(); try{ - alreadyExists = doiDataCiteRegisterService.testDOIExists(identifier); + alreadyRegistered = doiDataCiteRegisterService.testDOIExists(identifier); } catch (Exception e){ - logger.log(Level.WARNING, "alreadyExists failed"); + logger.log(Level.WARNING, "alreadyRegistered failed"); return false; } - return alreadyExists; + return alreadyRegistered; } - @Override public String createIdentifier(DvObject dvObject) throws Exception { @@ -90,10 +80,10 @@ public String createIdentifier(DvObject dvObject) throws Exception { } @Override - public HashMap getIdentifierMetadata(DvObject dvObject) { + public Map getIdentifierMetadata(DvObject dvObject) { logger.log(Level.FINE,"getIdentifierMetadata"); String identifier = getIdentifier(dvObject); - HashMap metadata = new HashMap<>(); + Map metadata = new HashMap<>(); try { metadata = doiDataCiteRegisterService.getMetadata(identifier); } catch (Exception e) { @@ -103,29 +93,6 @@ public HashMap getIdentifierMetadata(DvObject dvObject) { } - /** - * Looks up the metadata for a Global Identifier - * @param protocol the identifier system, e.g. "doi" - * @param authority the namespace that the authority manages in the identifier system - * @param identifier the local identifier part - * @return a Map of metadata. It is empty when the lookup failed, e.g. when - * the identifier does not exist. - */ - @Override - public HashMap lookupMetadataFromIdentifier(String protocol, String authority, String identifier) { - logger.log(Level.FINE,"lookupMetadataFromIdentifier"); - String identifierOut = getIdentifierForLookup(protocol, authority, identifier); - HashMap metadata = new HashMap<>(); - try { - metadata = doiDataCiteRegisterService.getMetadata(identifierOut); - } catch (Exception e) { - logger.log(Level.WARNING, "None existing so we can use this identifier"); - logger.log(Level.WARNING, "identifier: {0}", identifierOut); - } - return metadata; - } - - /** * Modifies the DOI metadata for a Dataset * @param dvObject the dvObject whose metadata needs to be modified @@ -219,9 +186,9 @@ public void deleteIdentifier(DvObject dvObject) throws IOException, HttpExceptio private void deleteDraftIdentifier(DvObject dvObject) throws IOException { //ToDo - incorporate into DataCiteRESTfulClient - String baseUrl = systemConfig.getDataCiteRestApiUrlString(); - String username = System.getProperty("doi.username"); - String password = System.getProperty("doi.password"); + String baseUrl = JvmSettings.DATACITE_REST_API_URL.lookup(); + String username = JvmSettings.DATACITE_USERNAME.lookup(); + String password = JvmSettings.DATACITE_PASSWORD.lookup(); GlobalId doi = dvObject.getGlobalId(); /** * Deletes the DOI from DataCite if it can. Returns 204 if PID was deleted @@ -269,13 +236,13 @@ public boolean publicizeIdentifier(DvObject dvObject) { @Override public List getProviderInformation(){ - ArrayList providerInfo = new ArrayList<>(); - String providerName = "DataCite"; - String providerLink = "http://status.datacite.org"; - providerInfo.add(providerName); - providerInfo.add(providerLink); - return providerInfo; + return List.of("DataCite", "https://status.datacite.org"); } + + @Override + protected String getProviderKeyName() { + return "DataCite"; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/DOIEZIdServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DOIEZIdServiceBean.java index d21caf32411..86b74b72f30 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DOIEZIdServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DOIEZIdServiceBean.java @@ -1,44 +1,57 @@ package edu.harvard.iq.dataverse; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.ucsb.nceas.ezid.EZIDException; import edu.ucsb.nceas.ezid.EZIDService; -import edu.ucsb.nceas.ezid.EZIDServiceRequest; import java.util.*; import java.util.logging.Level; import java.util.logging.Logger; -import javax.ejb.Stateless; + +import jakarta.ejb.Stateless; /** * * @author skraffmiller */ @Stateless -public class DOIEZIdServiceBean extends AbstractGlobalIdServiceBean { - +public class DOIEZIdServiceBean extends DOIServiceBean { + + private static final Logger logger = Logger.getLogger(DOIEZIdServiceBean.class.getCanonicalName()); + EZIDService ezidService; - EZIDServiceRequest ezidServiceRequest; - String baseURLString = "https://ezid.cdlib.org"; - private static final Logger logger = Logger.getLogger("edu.harvard.iq.dvn.core.index.DOIEZIdServiceBean"); - - // get username and password from system properties - private String USERNAME = ""; - private String PASSWORD = ""; - + + // This has a sane default in microprofile-config.properties + private final String baseUrl = JvmSettings.EZID_API_URL.lookup(); + public DOIEZIdServiceBean() { - logger.log(Level.FINE,"Constructor"); - baseURLString = System.getProperty("doi.baseurlstring"); - ezidService = new EZIDService(baseURLString); - USERNAME = System.getProperty("doi.username"); - PASSWORD = System.getProperty("doi.password"); - logger.log(Level.FINE, "Using baseURLString {0}", baseURLString); + // Creating the service doesn't do any harm, just initializing some object data here. + // Makes sure we don't run into NPEs from the other methods, but will obviously fail if the + // login below does not work. + this.ezidService = new EZIDService(this.baseUrl); + try { - ezidService.login(USERNAME, PASSWORD); + // These have (obviously) no default, but still are optional to make the provider optional + String username = JvmSettings.EZID_USERNAME.lookupOptional().orElse(null); + String password = JvmSettings.EZID_PASSWORD.lookupOptional().orElse(null); + + if (username != null ^ password != null) { + logger.log(Level.WARNING, "You must give both username and password. Will not try to login."); + } + + if (username != null && password != null) { + this.ezidService.login(username, password); + this.configured = true; + } } catch (EZIDException e) { - logger.log(Level.WARNING, "login failed "); + // We only do the warnings here, but the object still needs to be created. + // The EJB stateless thing expects this to go through, and it is requested on any + // global id parsing. + logger.log(Level.WARNING, "Login failed to {0}", this.baseUrl); logger.log(Level.WARNING, "Exception String: {0}", e.toString()); - logger.log(Level.WARNING, "localized message: {0}", e.getLocalizedMessage()); - logger.log(Level.WARNING, "cause: ", e.getCause()); - logger.log(Level.WARNING, "message {0}", e.getMessage()); + logger.log(Level.WARNING, "Localized message: {0}", e.getLocalizedMessage()); + logger.log(Level.WARNING, "Cause:", e.getCause()); + logger.log(Level.WARNING, "Message {0}", e.getMessage()); + // TODO: is this antipattern really necessary? } catch (Exception e) { logger.log(Level.SEVERE, "Other Error on ezidService.login(USERNAME, PASSWORD) - not EZIDException ", e.getMessage()); } @@ -50,19 +63,10 @@ public boolean registerWhenPublished() { } @Override - public boolean alreadyExists(DvObject dvObject) throws Exception { - if(dvObject==null) { - logger.severe("Null DvObject sent to alreadyExists()."); - return false; - } - return alreadyExists(dvObject.getGlobalId()); - } - - @Override - public boolean alreadyExists(GlobalId pid) throws Exception { - logger.log(Level.FINE,"alreadyExists"); + public boolean alreadyRegistered(GlobalId pid, boolean noProviderDefault) throws Exception { + logger.log(Level.FINE,"alreadyRegistered"); try { - HashMap result = ezidService.getMetadata(pid.asString()); + HashMap result = ezidService.getMetadata(pid.asString()); return result != null && !result.isEmpty(); // TODO just check for HTTP status code 200/404, sadly the status code is swept under the carpet } catch (EZIDException e ){ @@ -74,7 +78,7 @@ public boolean alreadyExists(GlobalId pid) throws Exception { if (e.getLocalizedMessage().contains("no such identifier")){ return false; } - logger.log(Level.WARNING, "alreadyExists failed"); + logger.log(Level.WARNING, "alreadyRegistered failed"); logger.log(Level.WARNING, "getIdentifier(dvObject) {0}", pid.asString()); logger.log(Level.WARNING, "String {0}", e.toString()); logger.log(Level.WARNING, "localized message {0}", e.getLocalizedMessage()); @@ -102,32 +106,6 @@ public Map getIdentifierMetadata(DvObject dvObject) { return metadata; } - /** - * Looks up the metadata for a Global Identifier - * - * @param protocol the identifier system, e.g. "doi" - * @param authority the namespace that the authority manages in the - * identifier system - * identifier part - * @param identifier the local identifier part - * @return a Map of metadata. It is empty when the lookup failed, e.g. when - * the identifier does not exist. - */ - @Override - public HashMap lookupMetadataFromIdentifier(String protocol, String authority, String identifier) { - logger.log(Level.FINE,"lookupMetadataFromIdentifier"); - String identifierOut = getIdentifierForLookup(protocol, authority, identifier); - HashMap metadata = new HashMap<>(); - try { - metadata = ezidService.getMetadata(identifierOut); - } catch (EZIDException e) { - logger.log(Level.FINE, "None existing so we can use this identifier"); - logger.log(Level.FINE, "identifier: {0}", identifierOut); - return metadata; - } - return metadata; - } - /** * Modifies the EZID metadata for a Dataset * @@ -249,12 +227,7 @@ private boolean updateIdentifierStatus(DvObject dvObject, String statusIn) { @Override public List getProviderInformation(){ - ArrayList providerInfo = new ArrayList<>(); - String providerName = "EZID"; - String providerLink = baseURLString; - providerInfo.add(providerName); - providerInfo.add(providerLink); - return providerInfo; + return List.of("EZID", this.baseUrl); } @Override @@ -301,5 +274,10 @@ private HashMap asHashMap(Map map) { return (map instanceof HashMap) ? (HashMap)map : new HashMap<>(map); } + @Override + protected String getProviderKeyName() { + return "EZID"; + } + } diff --git a/src/main/java/edu/harvard/iq/dataverse/DOIServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DOIServiceBean.java new file mode 100644 index 00000000000..0182c745cd0 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/DOIServiceBean.java @@ -0,0 +1,78 @@ +package edu.harvard.iq.dataverse; + +import edu.harvard.iq.dataverse.settings.SettingsServiceBean.Key; + +public abstract class DOIServiceBean extends AbstractGlobalIdServiceBean { + + public static final String DOI_PROTOCOL = "doi"; + public static final String DOI_RESOLVER_URL = "https://doi.org/"; + public static final String HTTP_DOI_RESOLVER_URL = "http://doi.org/"; + public static final String DXDOI_RESOLVER_URL = "https://dx.doi.org/"; + public static final String HTTP_DXDOI_RESOLVER_URL = "http://dx.doi.org/"; + + public DOIServiceBean() { + super(); + } + + @Override + public GlobalId parsePersistentId(String pidString) { + if (pidString.startsWith(DOI_RESOLVER_URL)) { + pidString = pidString.replace(DOI_RESOLVER_URL, + (DOI_PROTOCOL + ":")); + } else if (pidString.startsWith(HTTP_DOI_RESOLVER_URL)) { + pidString = pidString.replace(HTTP_DOI_RESOLVER_URL, + (DOI_PROTOCOL + ":")); + } else if (pidString.startsWith(DXDOI_RESOLVER_URL)) { + pidString = pidString.replace(DXDOI_RESOLVER_URL, + (DOI_PROTOCOL + ":")); + } + return super.parsePersistentId(pidString); + } + + @Override + public GlobalId parsePersistentId(String protocol, String identifierString) { + + if (!DOI_PROTOCOL.equals(protocol)) { + return null; + } + GlobalId globalId = super.parsePersistentId(protocol, identifierString); + if (globalId!=null && !GlobalIdServiceBean.checkDOIAuthority(globalId.getAuthority())) { + return null; + } + return globalId; + } + + @Override + public GlobalId parsePersistentId(String protocol, String authority, String identifier) { + + if (!DOI_PROTOCOL.equals(protocol)) { + return null; + } + return super.parsePersistentId(protocol, authority, identifier); + } + + public String getUrlPrefix() { + return DOI_RESOLVER_URL; + } + + @Override + public boolean isConfigured() { + if (configured == null) { + if (getProviderKeyName() == null) { + configured = false; + } else { + String doiProvider = settingsService.getValueForKey(Key.DoiProvider, ""); + if (getProviderKeyName().equals(doiProvider)) { + configured = true; + } else if (!doiProvider.isEmpty()) { + configured = false; + } + } + } + return super.isConfigured(); + } + + protected String getProviderKeyName() { + return null; + } +} \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/DashboardPage.java b/src/main/java/edu/harvard/iq/dataverse/DashboardPage.java index 5b6cdd23775..c37c3f52bc7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DashboardPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DashboardPage.java @@ -5,23 +5,21 @@ */ package edu.harvard.iq.dataverse; -import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; import edu.harvard.iq.dataverse.harvest.client.HarvestingClient; import edu.harvard.iq.dataverse.harvest.client.HarvestingClientServiceBean; import edu.harvard.iq.dataverse.harvest.server.OAISet; import edu.harvard.iq.dataverse.harvest.server.OAISetServiceBean; -import static edu.harvard.iq.dataverse.util.JsfHelper.JH; import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.SystemConfig; import java.util.List; import java.util.logging.Logger; -import javax.ejb.EJB; -import javax.faces.application.FacesMessage; -import javax.faces.context.FacesContext; -import javax.faces.view.ViewScoped; -import javax.inject.Inject; -import javax.inject.Named; +import jakarta.ejb.EJB; +import jakarta.faces.application.FacesMessage; +import jakarta.faces.context.FacesContext; +import jakarta.faces.view.ViewScoped; +import jakarta.inject.Inject; +import jakarta.inject.Named; /** * @@ -97,12 +95,8 @@ public int getNumberOfConfiguredHarvestClients() { } public long getNumberOfHarvestedDatasets() { - List configuredHarvestingClients = harvestingClientService.getAllHarvestingClients(); - if (configuredHarvestingClients == null || configuredHarvestingClients.isEmpty()) { - return 0L; - } - Long numOfDatasets = harvestingClientService.getNumberOfHarvestedDatasetByClients(configuredHarvestingClients); + Long numOfDatasets = harvestingClientService.getNumberOfHarvestedDatasetsByAllClients(); if (numOfDatasets != null && numOfDatasets > 0L) { return numOfDatasets; @@ -142,7 +136,7 @@ public String getHarvestClientsInfoLabel() { infoLabel = configuredHarvestingClients.size() + " harvesting clients configured; "; } - Long numOfDatasets = harvestingClientService.getNumberOfHarvestedDatasetByClients(configuredHarvestingClients); + Long numOfDatasets = harvestingClientService.getNumberOfHarvestedDatasetsByAllClients(); if (numOfDatasets != null && numOfDatasets > 0L) { return infoLabel + numOfDatasets + " harvested datasets"; diff --git a/src/main/java/edu/harvard/iq/dataverse/DataCitation.java b/src/main/java/edu/harvard/iq/dataverse/DataCitation.java index abe3cc3e6d7..9b4b89db44f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataCitation.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataCitation.java @@ -14,7 +14,6 @@ import java.io.OutputStream; import java.io.OutputStreamWriter; import java.io.Writer; -import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Date; @@ -27,7 +26,7 @@ import java.util.regex.Pattern; import java.util.stream.Collectors; -import javax.ejb.EJBException; +import jakarta.ejb.EJBException; import javax.xml.stream.XMLOutputFactory; import javax.xml.stream.XMLStreamException; import javax.xml.stream.XMLStreamWriter; @@ -57,7 +56,7 @@ public class DataCitation { private String publisher; private boolean direct; private List funders; - private String seriesTitle; + private List seriesTitles; private String description; private List datesOfCollection; private List keywords; @@ -135,7 +134,7 @@ private void getCommonValuesFrom(DatasetVersion dsv) { datesOfCollection = dsv.getDatesOfCollection(); title = dsv.getTitle(); - seriesTitle = dsv.getSeriesTitle(); + seriesTitles = dsv.getSeriesTitles(); keywords = dsv.getKeywords(); languages = dsv.getLanguages(); spatialCoverages = dsv.getSpatialCoverages(); @@ -207,7 +206,7 @@ public String toString(boolean html, boolean anonymized) { if (persistentId != null) { // always show url format - citationList.add(formatURL(persistentId.toURL().toString(), persistentId.toURL().toString(), html)); + citationList.add(formatURL(persistentId.asURL(), persistentId.asURL(), html)); } citationList.add(formatString(publisher, html)); citationList.add(version); @@ -298,7 +297,7 @@ public void writeAsBibtexCitation(OutputStream os) throws IOException { out.write(persistentId.getIdentifier()); out.write("},\r\n"); out.write("url = {"); - out.write(persistentId.toURL().toString()); + out.write(persistentId.asURL()); out.write("}\r\n"); out.write("}\r\n"); out.flush(); @@ -330,8 +329,10 @@ public void writeAsRISCitation(OutputStream os) throws IOException { out.write("TY - DATA" + "\r\n"); out.write("T1 - " + getTitle() + "\r\n"); } - if (seriesTitle != null) { - out.write("T3 - " + seriesTitle + "\r\n"); + if (seriesTitles != null) { + for (String seriesTitle : seriesTitles) { + out.write("T3 - " + seriesTitle + "\r\n"); + } } /* Removing abstract/description per Request from G. King in #3759 if(description!=null) { @@ -387,7 +388,7 @@ public void writeAsRISCitation(OutputStream os) throws IOException { out.write("SE - " + date + "\r\n"); - out.write("UR - " + persistentId.toURL().toString() + "\r\n"); + out.write("UR - " + persistentId.asURL() + "\r\n"); out.write("PB - " + publisher + "\r\n"); // a DataFile citation also includes filename und UNF, if applicable: @@ -505,12 +506,22 @@ private void createEndNoteXML(XMLStreamWriter xmlw) throws XMLStreamException { xmlw.writeCharacters(title); xmlw.writeEndElement(); // title } - - if (seriesTitle != null) { - xmlw.writeStartElement("tertiary-title"); - xmlw.writeCharacters(seriesTitle); + + /* + If I say just !"isEmpty" for series titles I get a failure + on testToEndNoteString_withoutTitleAndAuthor + with a null pointer on build -SEK 3/31/23 + */ + if (seriesTitles != null && !seriesTitles.isEmpty() ) { + xmlw.writeStartElement("tertiary-titles"); + for (String seriesTitle : seriesTitles){ + xmlw.writeStartElement("tertiary-title"); + xmlw.writeCharacters(seriesTitle); + xmlw.writeEndElement(); // tertiary-title + } xmlw.writeEndElement(); // tertiary-title } + xmlw.writeEndElement(); // titles xmlw.writeStartElement("section"); @@ -584,7 +595,7 @@ private void createEndNoteXML(XMLStreamWriter xmlw) throws XMLStreamException { xmlw.writeStartElement("urls"); xmlw.writeStartElement("related-urls"); xmlw.writeStartElement("url"); - xmlw.writeCharacters(getPersistentId().toURL().toString()); + xmlw.writeCharacters(getPersistentId().asURL()); xmlw.writeEndElement(); // url xmlw.writeEndElement(); // related-urls xmlw.writeEndElement(); // urls @@ -781,18 +792,13 @@ private GlobalId getPIDFrom(DatasetVersion dsv, DvObject dv) { || HarvestingClient.HARVEST_STYLE_ICPSR.equals(dsv.getDataset().getHarvestedFrom().getHarvestStyle()) || HarvestingClient.HARVEST_STYLE_DATAVERSE .equals(dsv.getDataset().getHarvestedFrom().getHarvestStyle())) { - // creating a global id like this: - // persistentId = new GlobalId(dv.getGlobalId()); - // you end up doing new GlobalId((New GlobalId(dv)).toString()) - // - doing an extra formatting-and-parsing-again - // This achieves the same thing: if(!isDirect()) { if (!StringUtils.isEmpty(dsv.getDataset().getIdentifier())) { - return new GlobalId(dsv.getDataset()); + return dsv.getDataset().getGlobalId(); } } else { if (!StringUtils.isEmpty(dv.getIdentifier())) { - return new GlobalId(dv); + return dv.getGlobalId(); } } } diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFile.java b/src/main/java/edu/harvard/iq/dataverse/DataFile.java index cb43dff0e20..0f83ae3c5c8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataFile.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataFile.java @@ -5,12 +5,11 @@ import com.google.gson.JsonElement; import com.google.gson.JsonObject; import com.google.gson.annotations.Expose; -import com.google.gson.annotations.SerializedName; import edu.harvard.iq.dataverse.DatasetVersion.VersionState; +import edu.harvard.iq.dataverse.authorization.RoleAssignee; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.dataaccess.DataAccess; import edu.harvard.iq.dataverse.dataaccess.StorageIO; -import edu.harvard.iq.dataverse.dataset.DatasetThumbnail; import edu.harvard.iq.dataverse.datasetutility.FileSizeChecker; import edu.harvard.iq.dataverse.ingest.IngestReport; import edu.harvard.iq.dataverse.ingest.IngestRequest; @@ -19,21 +18,21 @@ import edu.harvard.iq.dataverse.util.ShapefileHandler; import edu.harvard.iq.dataverse.util.StringUtil; import java.io.IOException; +import java.util.Date; import java.util.List; import java.util.ArrayList; import java.util.Objects; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.nio.file.Files; import java.text.SimpleDateFormat; import java.util.Arrays; import java.util.HashMap; import java.util.Map; +import java.util.Set; import java.util.logging.Logger; -import javax.json.Json; -import javax.json.JsonArrayBuilder; -import javax.persistence.*; -import javax.validation.constraints.Pattern; +import java.util.stream.Collectors; +import jakarta.json.Json; +import jakarta.json.JsonArrayBuilder; +import jakarta.persistence.*; +import jakarta.validation.constraints.Pattern; import org.hibernate.validator.constraints.NotBlank; /** @@ -47,9 +46,9 @@ query = "SELECT o FROM DataFile o WHERE o.creator.id=:creatorId"), @NamedQuery(name = "DataFile.findByReleaseUserId", query = "SELECT o FROM DataFile o WHERE o.releaseUser.id=:releaseUserId"), - @NamedQuery(name="DataFile.findDataFileByIdProtocolAuth", + @NamedQuery(name="DataFile.findDataFileByIdProtocolAuth", query="SELECT s FROM DataFile s WHERE s.identifier=:identifier AND s.protocol=:protocol AND s.authority=:authority"), - @NamedQuery(name="DataFile.findDataFileThatReplacedId", + @NamedQuery(name="DataFile.findDataFileThatReplacedId", query="SELECT s.id FROM DataFile s WHERE s.previousDataFileId=:identifier") }) @Entity @@ -73,7 +72,10 @@ public class DataFile extends DvObject implements Comparable { @Column( nullable = false ) @Pattern(regexp = "^.*/.*$", message = "{contenttype.slash}") private String contentType; - + + public void setFileAccessRequests(List fileAccessRequests) { + this.fileAccessRequests = fileAccessRequests; + } // @Expose // @SerializedName("storageIdentifier") @@ -416,7 +418,7 @@ public String getIngestReportMessage() { return ingestReports.get(0).getReport(); } } - return "Ingest failed. No further information is available."; + return BundleUtil.getStringFromBundle("file.ingestFailed"); } public boolean isTabularData() { @@ -569,7 +571,7 @@ public FileMetadata getLatestPublishedFileMetadata() throws UnsupportedOperation if(fmd == null) { throw new UnsupportedOperationException("No published metadata version for DataFile " + this.getId()); } - + return fmd; } @@ -747,22 +749,71 @@ public String getUnf() { } return null; } - - @ManyToMany - @JoinTable(name = "fileaccessrequests", - joinColumns = @JoinColumn(name = "datafile_id"), - inverseJoinColumns = @JoinColumn(name = "authenticated_user_id")) - private List fileAccessRequesters; + @OneToMany(mappedBy = "dataFile", cascade = {CascadeType.REMOVE, CascadeType.MERGE, CascadeType.PERSIST}, orphanRemoval = true) + private List fileAccessRequests; - public List getFileAccessRequesters() { - return fileAccessRequesters; + public List getFileAccessRequests() { + return fileAccessRequests; } - public void setFileAccessRequesters(List fileAccessRequesters) { - this.fileAccessRequesters = fileAccessRequesters; + public void addFileAccessRequester(AuthenticatedUser authenticatedUser) { + if (this.fileAccessRequests == null) { + this.fileAccessRequests = new ArrayList<>(); + } + + Set existingUsers = this.fileAccessRequests.stream() + .map(FileAccessRequest::getAuthenticatedUser) + .collect(Collectors.toSet()); + + if (existingUsers.contains(authenticatedUser)) { + return; + } + + FileAccessRequest request = new FileAccessRequest(); + request.setCreationTime(new Date()); + request.setDataFile(this); + request.setAuthenticatedUser(authenticatedUser); + + FileAccessRequest.FileAccessRequestKey key = new FileAccessRequest.FileAccessRequestKey(); + key.setAuthenticatedUser(authenticatedUser.getId()); + key.setDataFile(this.getId()); + + request.setId(key); + + this.fileAccessRequests.add(request); } - + + public boolean removeFileAccessRequester(RoleAssignee roleAssignee) { + if (this.fileAccessRequests == null) { + return false; + } + + FileAccessRequest request = this.fileAccessRequests.stream() + .filter(fileAccessRequest -> fileAccessRequest.getAuthenticatedUser().equals(roleAssignee)) + .findFirst() + .orElse(null); + + if (request != null) { + this.fileAccessRequests.remove(request); + return true; + } + + return false; + } + + public boolean containsFileAccessRequestFromUser(RoleAssignee roleAssignee) { + if (this.fileAccessRequests == null) { + return false; + } + + Set existingUsers = this.fileAccessRequests.stream() + .map(FileAccessRequest::getAuthenticatedUser) + .collect(Collectors.toSet()); + + return existingUsers.contains(roleAssignee); + } + public boolean isHarvested() { Dataset ownerDataset = this.getOwner(); @@ -956,7 +1007,7 @@ public JsonObject asGsonObject(boolean prettyPrint){ // https://github.com/IQSS/dataverse/issues/761, https://github.com/IQSS/dataverse/issues/2110, https://github.com/IQSS/dataverse/issues/3191 // datasetMap.put("title", thisFileMetadata.getDatasetVersion().getTitle()); - datasetMap.put("persistentId", getOwner().getGlobalIdString()); + datasetMap.put("persistentId", getOwner().getGlobalId().asString()); datasetMap.put("url", getOwner().getPersistentURL()); datasetMap.put("version", thisFileMetadata.getDatasetVersion().getSemanticVersion()); datasetMap.put("id", getOwner().getId()); @@ -1034,6 +1085,10 @@ public String getCreateDateFormattedYYYYMMDD() { return null; } + @Override + public String getTargetUrl() { + return DataFile.TARGET_URL; + } } // end of class diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFileCategory.java b/src/main/java/edu/harvard/iq/dataverse/DataFileCategory.java index f569a69b13a..f5abe9ac78a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataFileCategory.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataFileCategory.java @@ -10,16 +10,16 @@ import java.io.Serializable; import java.util.ArrayList; import java.util.Collection; -import javax.persistence.Column; -import javax.persistence.Entity; -import javax.persistence.GeneratedValue; -import javax.persistence.GenerationType; -import javax.persistence.Id; -import javax.persistence.Index; -import javax.persistence.JoinColumn; -import javax.persistence.ManyToMany; -import javax.persistence.ManyToOne; -import javax.persistence.Table; +import jakarta.persistence.Column; +import jakarta.persistence.Entity; +import jakarta.persistence.GeneratedValue; +import jakarta.persistence.GenerationType; +import jakarta.persistence.Id; +import jakarta.persistence.Index; +import jakarta.persistence.JoinColumn; +import jakarta.persistence.ManyToMany; +import jakarta.persistence.ManyToOne; +import jakarta.persistence.Table; /** * diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFileCategoryServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataFileCategoryServiceBean.java index 3fa4691a6dd..29dcb22c3ec 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataFileCategoryServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataFileCategoryServiceBean.java @@ -3,8 +3,8 @@ import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.BundleUtil; -import javax.ejb.EJB; -import javax.ejb.Stateless; +import jakarta.ejb.EJB; +import jakarta.ejb.Stateless; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFileConverter.java b/src/main/java/edu/harvard/iq/dataverse/DataFileConverter.java index 18531f5203d..701e826f12e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataFileConverter.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataFileConverter.java @@ -1,13 +1,13 @@ package edu.harvard.iq.dataverse; import java.util.logging.Logger; -import javax.ejb.EJB; -import javax.enterprise.inject.spi.CDI; +import jakarta.ejb.EJB; +import jakarta.enterprise.inject.spi.CDI; -import javax.faces.component.UIComponent; -import javax.faces.context.FacesContext; -import javax.faces.convert.Converter; -import javax.faces.convert.FacesConverter; +import jakarta.faces.component.UIComponent; +import jakarta.faces.context.FacesContext; +import jakarta.faces.convert.Converter; +import jakarta.faces.convert.FacesConverter; @FacesConverter("dataFileConverter") public class DataFileConverter implements Converter { diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java index 0b935183182..98ee3351458 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java @@ -1,7 +1,5 @@ package edu.harvard.iq.dataverse; -import edu.harvard.iq.dataverse.authorization.AccessRequest; -import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.dataaccess.DataAccess; import edu.harvard.iq.dataverse.dataaccess.ImageThumbConverter; import edu.harvard.iq.dataverse.dataaccess.StorageIO; @@ -11,34 +9,28 @@ import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.FileSortFieldAndOrder; import edu.harvard.iq.dataverse.util.FileUtil; -import edu.harvard.iq.dataverse.util.SystemConfig; import java.io.IOException; import java.sql.Timestamp; import java.util.ArrayList; import java.util.Collections; import java.util.Date; import java.util.HashMap; -import java.util.HashSet; import java.util.Iterator; -import java.util.LinkedList; import java.util.List; import java.util.Map; -import java.util.Set; import java.util.UUID; import java.util.logging.Level; import java.util.logging.Logger; -import javax.ejb.EJB; -import javax.ejb.Stateless; -import javax.ejb.TransactionAttribute; -import javax.ejb.TransactionAttributeType; -import javax.inject.Named; -import javax.persistence.EntityManager; -import javax.persistence.NoResultException; -import javax.persistence.PersistenceContext; -import javax.persistence.Query; -import javax.persistence.StoredProcedureQuery; -import javax.persistence.TypedQuery; -import org.apache.commons.lang3.RandomStringUtils; +import jakarta.ejb.EJB; +import jakarta.ejb.Stateless; +import jakarta.ejb.TransactionAttribute; +import jakarta.ejb.TransactionAttributeType; +import jakarta.inject.Named; +import jakarta.persistence.EntityManager; +import jakarta.persistence.NoResultException; +import jakarta.persistence.PersistenceContext; +import jakarta.persistence.Query; +import jakarta.persistence.TypedQuery; /** * @@ -73,7 +65,7 @@ public class DataFileServiceBean implements java.io.Serializable { // Assorted useful mime types: // 3rd-party and/or proprietary tabular data formasts that we know - // how to ingest: + // how to ingest: private static final String MIME_TYPE_STATA = "application/x-stata"; private static final String MIME_TYPE_STATA13 = "application/x-stata-13"; @@ -155,7 +147,7 @@ public DataFile find(Object pk) { }*/ public DataFile findByGlobalId(String globalId) { - return (DataFile) dvObjectService.findByGlobalId(globalId, DataFile.DATAFILE_DTYPE_STRING); + return (DataFile) dvObjectService.findByGlobalId(globalId, DvObject.DType.DataFile); } public List findByCreatorId(Long creatorId) { @@ -199,6 +191,18 @@ public List findByDatasetId(Long studyId) { .setParameter("studyId", studyId).getResultList(); } + /** + * + * @param collectionId numeric id of the parent collection ("dataverse") + * @return list of files in the datasets that are *direct* children of the collection specified + * (i.e., no datafiles in sub-collections of this collection will be included) + */ + public List findByDirectCollectionOwner(Long collectionId) { + String queryString = "select f from DataFile f, Dataset d where f.owner.id = d.id and d.owner.id = :collectionId order by f.id"; + return em.createQuery(queryString, DataFile.class) + .setParameter("collectionId", collectionId).getResultList(); + } + public List findAllRelatedByRootDatafileId(Long datafileId) { /* Get all files with the same root datafile id @@ -357,7 +361,7 @@ public DataFile findCheapAndEasy(Long id) { Object[] result; try { - result = (Object[]) em.createNativeQuery("SELECT t0.ID, t0.CREATEDATE, t0.INDEXTIME, t0.MODIFICATIONTIME, t0.PERMISSIONINDEXTIME, t0.PERMISSIONMODIFICATIONTIME, t0.PUBLICATIONDATE, t0.CREATOR_ID, t0.RELEASEUSER_ID, t0.PREVIEWIMAGEAVAILABLE, t1.CONTENTTYPE, t0.STORAGEIDENTIFIER, t1.FILESIZE, t1.INGESTSTATUS, t1.CHECKSUMVALUE, t1.RESTRICTED, t3.ID, t2.AUTHORITY, t2.IDENTIFIER, t1.CHECKSUMTYPE, t1.PREVIOUSDATAFILEID, t1.ROOTDATAFILEID, t0.AUTHORITY, T0.PROTOCOL, T0.IDENTIFIER FROM DVOBJECT t0, DATAFILE t1, DVOBJECT t2, DATASET t3 WHERE ((t0.ID = " + id + ") AND (t0.OWNER_ID = t2.ID) AND (t2.ID = t3.ID) AND (t1.ID = t0.ID))").getSingleResult(); + result = (Object[]) em.createNativeQuery("SELECT t0.ID, t0.CREATEDATE, t0.INDEXTIME, t0.MODIFICATIONTIME, t0.PERMISSIONINDEXTIME, t0.PERMISSIONMODIFICATIONTIME, t0.PUBLICATIONDATE, t0.CREATOR_ID, t0.RELEASEUSER_ID, t0.PREVIEWIMAGEAVAILABLE, t1.CONTENTTYPE, t0.STORAGEIDENTIFIER, t1.FILESIZE, t1.INGESTSTATUS, t1.CHECKSUMVALUE, t1.RESTRICTED, t3.ID, t2.AUTHORITY, t2.IDENTIFIER, t1.CHECKSUMTYPE, t1.PREVIOUSDATAFILEID, t1.ROOTDATAFILEID, t0.AUTHORITY, T0.PROTOCOL, T0.IDENTIFIER, t2.PROTOCOL FROM DVOBJECT t0, DATAFILE t1, DVOBJECT t2, DATASET t3 WHERE ((t0.ID = " + id + ") AND (t0.OWNER_ID = t2.ID) AND (t2.ID = t3.ID) AND (t1.ID = t0.ID))").getSingleResult(); } catch (Exception ex) { return null; } @@ -501,7 +505,9 @@ public DataFile findCheapAndEasy(Long id) { if (identifier != null) { dataFile.setIdentifier(identifier); } - + + owner.setProtocol((String) result[25]); + dataFile.setOwner(owner); // If content type indicates it's tabular data, spend 2 extra queries @@ -559,365 +565,6 @@ public DataFile findCheapAndEasy(Long id) { return dataFile; } - /* - * This is an experimental method for populating the versions of - * the datafile with the filemetadatas, optimized for making as few db - * queries as possible. - * It should only be used to retrieve filemetadata for the DatasetPage! - * It is not guaranteed to adequately perform anywhere else. - */ - - public void findFileMetadataOptimizedExperimental(Dataset owner, DatasetVersion version, AuthenticatedUser au) { - List dataFiles = new ArrayList<>(); - List dataTables = new ArrayList<>(); - //List retList = new ArrayList<>(); - - // TODO: - // replace these maps with simple lists and run binary search on them. -- 4.2.1 - - Map userMap = new HashMap<>(); - Map filesMap = new HashMap<>(); - Map datatableMap = new HashMap<>(); - Map categoryMap = new HashMap<>(); - Map> fileTagMap = new HashMap<>(); - List accessRequestFileIds = new ArrayList(); - - List fileTagLabels = DataFileTag.listTags(); - - - int i = 0; - //Cache responses - Map embargoMap = new HashMap(); - - List dataTableResults = em.createNativeQuery("SELECT t0.ID, t0.DATAFILE_ID, t0.UNF, t0.CASEQUANTITY, t0.VARQUANTITY, t0.ORIGINALFILEFORMAT, t0.ORIGINALFILESIZE, t0.ORIGINALFILENAME FROM dataTable t0, dataFile t1, dvObject t2 WHERE ((t0.DATAFILE_ID = t1.ID) AND (t1.ID = t2.ID) AND (t2.OWNER_ID = " + owner.getId() + ")) ORDER BY t0.ID").getResultList(); - - for (Object[] result : dataTableResults) { - DataTable dataTable = new DataTable(); - long fileId = ((Number) result[1]).longValue(); - - dataTable.setId(((Number) result[1]).longValue()); - - dataTable.setUnf((String)result[2]); - - dataTable.setCaseQuantity((Long)result[3]); - - dataTable.setVarQuantity((Long)result[4]); - - dataTable.setOriginalFileFormat((String)result[5]); - - dataTable.setOriginalFileSize((Long)result[6]); - - dataTable.setOriginalFileName((String)result[7]); - - dataTables.add(dataTable); - datatableMap.put(fileId, i++); - - } - - logger.fine("Retrieved "+dataTables.size()+" DataTable objects."); - - List dataTagsResults = em.createNativeQuery("SELECT t0.DATAFILE_ID, t0.TYPE FROM DataFileTag t0, dvObject t1 WHERE (t1.ID = t0.DATAFILE_ID) AND (t1.OWNER_ID="+ owner.getId() + ")").getResultList(); - for (Object[] result : dataTagsResults) { - Long datafile_id = (Long) result[0]; - Integer tagtype_id = (Integer) result[1]; - if (fileTagMap.get(datafile_id) == null) { - fileTagMap.put(datafile_id, new HashSet<>()); - } - fileTagMap.get(datafile_id).add(tagtype_id); - } - logger.fine("Retrieved "+dataTagsResults.size()+" data tags."); - dataTagsResults = null; - - //Only need to check for access requests if there is an authenticated user - if (au != null) { - List accessRequests = em.createNativeQuery("SELECT t0.ID FROM DVOBJECT t0, FILEACCESSREQUESTS t1 WHERE t1.datafile_id = t0.id and t0.OWNER_ID = " + owner.getId() + " and t1.AUTHENTICATED_USER_ID = " + au.getId() + " ORDER BY t0.ID").getResultList(); - for (Object result : accessRequests) { - accessRequestFileIds.add(Long.valueOf((Integer)result)); - } - logger.fine("Retrieved " + accessRequests.size() + " access requests."); - accessRequests = null; - } - - i = 0; - - List fileResults = em.createNativeQuery("SELECT t0.ID, t0.CREATEDATE, t0.INDEXTIME, t0.MODIFICATIONTIME, t0.PERMISSIONINDEXTIME, t0.PERMISSIONMODIFICATIONTIME, t0.PUBLICATIONDATE, t0.CREATOR_ID, t0.RELEASEUSER_ID, t1.CONTENTTYPE, t0.STORAGEIDENTIFIER, t1.FILESIZE, t1.INGESTSTATUS, t1.CHECKSUMVALUE, t1.RESTRICTED, t1.CHECKSUMTYPE, t1.PREVIOUSDATAFILEID, t1.ROOTDATAFILEID, t0.PROTOCOL, t0.AUTHORITY, t0.IDENTIFIER, t1.EMBARGO_ID FROM DVOBJECT t0, DATAFILE t1 WHERE ((t0.OWNER_ID = " + owner.getId() + ") AND ((t1.ID = t0.ID) AND (t0.DTYPE = 'DataFile'))) ORDER BY t0.ID").getResultList(); - - for (Object[] result : fileResults) { - Integer file_id = (Integer) result[0]; - - DataFile dataFile = new DataFile(); - dataFile.setMergeable(false); - - dataFile.setId(file_id.longValue()); - - Timestamp createDate = (Timestamp) result[1]; - Timestamp indexTime = (Timestamp) result[2]; - Timestamp modificationTime = (Timestamp) result[3]; - Timestamp permissionIndexTime = (Timestamp) result[4]; - Timestamp permissionModificationTime = (Timestamp) result[5]; - Timestamp publicationDate = (Timestamp) result[6]; - - dataFile.setCreateDate(createDate); - dataFile.setIndexTime(indexTime); - dataFile.setModificationTime(modificationTime); - dataFile.setPermissionIndexTime(permissionIndexTime); - dataFile.setPermissionModificationTime(permissionModificationTime); - dataFile.setPublicationDate(publicationDate); - - Long creatorId = (Long) result[7]; - if (creatorId != null) { - AuthenticatedUser creator = userMap.get(creatorId); - if (creator == null) { - creator = userService.find(creatorId); - if (creator != null) { - userMap.put(creatorId, creator); - } - } - if (creator != null) { - dataFile.setCreator(creator); - } - } - - dataFile.setOwner(owner); - - Long releaseUserId = (Long) result[8]; - if (releaseUserId != null) { - AuthenticatedUser releaseUser = userMap.get(releaseUserId); - if (releaseUser == null) { - releaseUser = userService.find(releaseUserId); - if (releaseUser != null) { - userMap.put(releaseUserId, releaseUser); - } - } - if (releaseUser != null) { - dataFile.setReleaseUser(releaseUser); - } - } - - String contentType = (String) result[9]; - - if (contentType != null) { - dataFile.setContentType(contentType); - } - - String storageIdentifier = (String) result[10]; - - if (storageIdentifier != null) { - dataFile.setStorageIdentifier(storageIdentifier); - } - - Long fileSize = (Long) result[11]; - - if (fileSize != null) { - dataFile.setFilesize(fileSize); - } - - if (result[12] != null) { - String ingestStatusString = (String) result[12]; - dataFile.setIngestStatus(ingestStatusString.charAt(0)); - } - - String md5 = (String) result[13]; - - if (md5 != null) { - dataFile.setChecksumValue(md5); - } - - Boolean restricted = (Boolean) result[14]; - if (restricted != null) { - dataFile.setRestricted(restricted); - } - - String checksumType = (String) result[15]; - if (checksumType != null) { - try { - // In the database we store "SHA1" rather than "SHA-1". - DataFile.ChecksumType typeFromStringInDatabase = DataFile.ChecksumType.valueOf(checksumType); - dataFile.setChecksumType(typeFromStringInDatabase); - } catch (IllegalArgumentException ex) { - logger.info("Exception trying to convert " + checksumType + " to enum: " + ex); - } - } - - Long previousDataFileId = (Long) result[16]; - if (previousDataFileId != null) { - dataFile.setPreviousDataFileId(previousDataFileId); - } - - Long rootDataFileId = (Long) result[17]; - if (rootDataFileId != null) { - dataFile.setRootDataFileId(rootDataFileId); - } - - String protocol = (String) result[18]; - if (protocol != null) { - dataFile.setProtocol(protocol); - } - - String authority = (String) result[19]; - if (authority != null) { - dataFile.setAuthority(authority); - } - - String identifier = (String) result[20]; - if (identifier != null) { - dataFile.setIdentifier(identifier); - } - - Long embargo_id = (Long) result[21]; - if (embargo_id != null) { - if (embargoMap.containsKey(embargo_id)) { - dataFile.setEmbargo(embargoMap.get(embargo_id)); - } else { - Embargo e = embargoService.findByEmbargoId(embargo_id); - dataFile.setEmbargo(e); - embargoMap.put(embargo_id, e); - } - } - - // TODO: - // - if ingest status is "bad", look up the ingest report; - // - is it a dedicated thumbnail for the dataset? (do we ever need that info?? - not on the dataset page, I don't think...) - - // Is this a tabular file? - - if (datatableMap.get(dataFile.getId()) != null) { - dataTables.get(datatableMap.get(dataFile.getId())).setDataFile(dataFile); - dataFile.setDataTable(dataTables.get(datatableMap.get(dataFile.getId()))); - - } - - if (fileTagMap.get(dataFile.getId()) != null) { - for (Integer tag_id : fileTagMap.get(dataFile.getId())) { - DataFileTag tag = new DataFileTag(); - tag.setTypeByLabel(fileTagLabels.get(tag_id)); - tag.setDataFile(dataFile); - dataFile.addTag(tag); - } - } - - if (dataFile.isRestricted() && accessRequestFileIds.contains(dataFile.getId())) { - dataFile.setFileAccessRequesters(Collections.singletonList(au)); - } - - dataFiles.add(dataFile); - filesMap.put(dataFile.getId(), i++); - } - fileResults = null; - - logger.fine("Retrieved and cached "+i+" datafiles."); - - i = 0; - for (DataFileCategory fileCategory : owner.getCategories()) { - //logger.fine("category: id="+fileCategory.getId()); - categoryMap.put(fileCategory.getId(), i++); - } - - logger.fine("Retrieved "+i+" file categories attached to the dataset."); - - version.setFileMetadatas(retrieveFileMetadataForVersion(owner, version, dataFiles, filesMap, categoryMap)); - logger.fine("Retrieved " + version.getFileMetadatas().size() + " filemetadatas for the version " + version.getId()); - owner.setFiles(dataFiles); - } - - private List retrieveFileMetadataForVersion(Dataset dataset, DatasetVersion version, List dataFiles, Map filesMap, Map categoryMap) { - List retList = new ArrayList<>(); - Map> categoryMetaMap = new HashMap<>(); - - List categoryResults = em.createNativeQuery("select t0.filecategories_id, t0.filemetadatas_id from filemetadata_datafilecategory t0, filemetadata t1 where (t0.filemetadatas_id = t1.id) AND (t1.datasetversion_id = "+version.getId()+")").getResultList(); - int i = 0; - for (Object[] result : categoryResults) { - Long category_id = (Long) result[0]; - Long filemeta_id = (Long) result[1]; - if (categoryMetaMap.get(filemeta_id) == null) { - categoryMetaMap.put(filemeta_id, new HashSet<>()); - } - categoryMetaMap.get(filemeta_id).add(category_id); - i++; - } - logger.fine("Retrieved and mapped "+i+" file categories attached to files in the version "+version.getId()); - - List metadataResults = em.createNativeQuery("select id, datafile_id, DESCRIPTION, LABEL, RESTRICTED, DIRECTORYLABEL, prov_freeform from FileMetadata where datasetversion_id = "+version.getId() + " ORDER BY LABEL").getResultList(); - - for (Object[] result : metadataResults) { - Integer filemeta_id = (Integer) result[0]; - - if (filemeta_id == null) { - continue; - } - - Long file_id = (Long) result[1]; - if (file_id == null) { - continue; - } - - Integer file_list_id = filesMap.get(file_id); - if (file_list_id == null) { - continue; - } - FileMetadata fileMetadata = new FileMetadata(); - fileMetadata.setId(filemeta_id.longValue()); - fileMetadata.setCategories(new LinkedList<>()); - - if (categoryMetaMap.get(fileMetadata.getId()) != null) { - for (Long cat_id : categoryMetaMap.get(fileMetadata.getId())) { - if (categoryMap.get(cat_id) != null) { - fileMetadata.getCategories().add(dataset.getCategories().get(categoryMap.get(cat_id))); - } - } - } - - fileMetadata.setDatasetVersion(version); - - // Link the FileMetadata object to the DataFile: - fileMetadata.setDataFile(dataFiles.get(file_list_id)); - // ... and the DataFile back to the FileMetadata: - fileMetadata.getDataFile().getFileMetadatas().add(fileMetadata); - - String description = (String) result[2]; - - if (description != null) { - fileMetadata.setDescription(description); - } - - String label = (String) result[3]; - - if (label != null) { - fileMetadata.setLabel(label); - } - - Boolean restricted = (Boolean) result[4]; - if (restricted != null) { - fileMetadata.setRestricted(restricted); - } - - String dirLabel = (String) result[5]; - if (dirLabel != null){ - fileMetadata.setDirectoryLabel(dirLabel); - } - - String provFreeForm = (String) result[6]; - if (provFreeForm != null){ - fileMetadata.setProvFreeForm(provFreeForm); - } - - retList.add(fileMetadata); - } - - logger.fine("Retrieved "+retList.size()+" file metadatas for version "+version.getId()+" (inside the retrieveFileMetadataForVersion method)."); - - - /* - We no longer perform this sort here, just to keep this filemetadata - list as identical as possible to when it's produced by the "traditional" - EJB method. When it's necessary to have the filemetadatas sorted by - FileMetadata.compareByLabel, the DatasetVersion.getFileMetadatasSorted() - method should be called. - - Collections.sort(retList, FileMetadata.compareByLabel); */ - - return retList; - } public List findIngestsInProgress() { if ( em.isOpen() ) { @@ -1427,75 +1074,6 @@ public List selectFilesWithMissingOriginalSizes() { } } - public String generateDataFileIdentifier(DataFile datafile, GlobalIdServiceBean idServiceBean) { - String doiIdentifierType = settingsService.getValueForKey(SettingsServiceBean.Key.IdentifierGenerationStyle, "randomString"); - String doiDataFileFormat = settingsService.getValueForKey(SettingsServiceBean.Key.DataFilePIDFormat, "DEPENDENT"); - - String prepend = ""; - if (doiDataFileFormat.equals(SystemConfig.DataFilePIDFormat.DEPENDENT.toString())){ - //If format is dependent then pre-pend the dataset identifier - prepend = datafile.getOwner().getIdentifier() + "/"; - } else { - //If there's a shoulder prepend independent identifiers with it - prepend = settingsService.getValueForKey(SettingsServiceBean.Key.Shoulder, ""); - } - - switch (doiIdentifierType) { - case "randomString": - return generateIdentifierAsRandomString(datafile, idServiceBean, prepend); - case "storedProcGenerated": - if (doiDataFileFormat.equals(SystemConfig.DataFilePIDFormat.INDEPENDENT.toString())){ - return generateIdentifierFromStoredProcedureIndependent(datafile, idServiceBean, prepend); - } else { - return generateIdentifierFromStoredProcedureDependent(datafile, idServiceBean, prepend); - } - default: - /* Should we throw an exception instead?? -- L.A. 4.6.2 */ - return generateIdentifierAsRandomString(datafile, idServiceBean, prepend); - } - } - - private String generateIdentifierAsRandomString(DataFile datafile, GlobalIdServiceBean idServiceBean, String prepend) { - String identifier = null; - do { - identifier = prepend + RandomStringUtils.randomAlphanumeric(6).toUpperCase(); - } while (!isGlobalIdUnique(identifier, datafile, idServiceBean)); - - return identifier; - } - - - private String generateIdentifierFromStoredProcedureIndependent(DataFile datafile, GlobalIdServiceBean idServiceBean, String prepend) { - String identifier; - do { - StoredProcedureQuery query = this.em.createNamedStoredProcedureQuery("Dataset.generateIdentifierFromStoredProcedure"); - query.execute(); - String identifierFromStoredProcedure = (String) query.getOutputParameterValue(1); - // some diagnostics here maybe - is it possible to determine that it's failing - // because the stored procedure hasn't been created in the database? - if (identifierFromStoredProcedure == null) { - return null; - } - identifier = prepend + identifierFromStoredProcedure; - } while (!isGlobalIdUnique(identifier, datafile, idServiceBean)); - - return identifier; - } - - private String generateIdentifierFromStoredProcedureDependent(DataFile datafile, GlobalIdServiceBean idServiceBean, String prepend) { - String identifier; - Long retVal; - - retVal = new Long(0); - - do { - retVal++; - identifier = prepend + retVal.toString(); - - } while (!isGlobalIdUnique(identifier, datafile, idServiceBean)); - - return identifier; - } /** * Check that a identifier entered by the user is unique (not currently used @@ -1506,44 +1084,16 @@ private String generateIdentifierFromStoredProcedureDependent(DataFile datafile, * @param idServiceBean * @return {@code true} iff the global identifier is unique. */ - public boolean isGlobalIdUnique(String userIdentifier, DataFile datafile, GlobalIdServiceBean idServiceBean) { - String testProtocol = ""; - String testAuthority = ""; - if (datafile.getAuthority() != null){ - testAuthority = datafile.getAuthority(); - } else { - testAuthority = settingsService.getValueForKey(SettingsServiceBean.Key.Authority); - } - if (datafile.getProtocol() != null){ - testProtocol = datafile.getProtocol(); - } else { - testProtocol = settingsService.getValueForKey(SettingsServiceBean.Key.Protocol); - } - - boolean u = em.createNamedQuery("DvObject.findByProtocolIdentifierAuthority") - .setParameter("protocol", testProtocol) - .setParameter("authority", testAuthority) - .setParameter("identifier",userIdentifier) - .getResultList().isEmpty(); - - try{ - if (idServiceBean.alreadyExists(new GlobalId(testProtocol, testAuthority, userIdentifier))) { - u = false; - } - } catch (Exception e){ - //we can live with failure - means identifier not found remotely - } - - - return u; - } - public void finalizeFileDelete(Long dataFileId, String storageLocation) throws IOException { // Verify that the DataFile no longer exists: if (find(dataFileId) != null) { throw new IOException("Attempted to permanently delete a physical file still associated with an existing DvObject " + "(id: " + dataFileId + ", location: " + storageLocation); } + if(storageLocation == null || storageLocation.isBlank()) { + throw new IOException("Attempted to delete a physical file with no location " + + "(id: " + dataFileId + ", location: " + storageLocation); + } StorageIO directStorageAccess = DataAccess.getDirectStorageIO(storageLocation); directStorageAccess.delete(); } diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFileTag.java b/src/main/java/edu/harvard/iq/dataverse/DataFileTag.java index 275d47cf1de..f4f66d3c874 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataFileTag.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataFileTag.java @@ -11,15 +11,15 @@ import java.util.HashMap; import java.util.List; import java.util.Map; -import javax.persistence.Column; -import javax.persistence.Entity; -import javax.persistence.GeneratedValue; -import javax.persistence.GenerationType; -import javax.persistence.Id; -import javax.persistence.Index; -import javax.persistence.JoinColumn; -import javax.persistence.ManyToOne; -import javax.persistence.Table; +import jakarta.persistence.Column; +import jakarta.persistence.Entity; +import jakarta.persistence.GeneratedValue; +import jakarta.persistence.GenerationType; +import jakarta.persistence.Id; +import jakarta.persistence.Index; +import jakarta.persistence.JoinColumn; +import jakarta.persistence.ManyToOne; +import jakarta.persistence.Table; import org.apache.commons.lang3.StringUtils; /** diff --git a/src/main/java/edu/harvard/iq/dataverse/DataTable.java b/src/main/java/edu/harvard/iq/dataverse/DataTable.java index 614e7394583..a17d8c65138 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataTable.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataTable.java @@ -7,26 +7,23 @@ package edu.harvard.iq.dataverse; import java.io.Serializable; -import java.util.ArrayList; import java.util.List; -import javax.persistence.CascadeType; -import javax.persistence.Entity; -import javax.persistence.GeneratedValue; -import javax.persistence.GenerationType; -import javax.persistence.Id; -import javax.persistence.JoinColumn; -import javax.persistence.ManyToOne; -import javax.persistence.OneToMany; -import javax.validation.constraints.Size; -import javax.persistence.OrderBy; -import org.hibernate.validator.constraints.NotBlank; -import org.hibernate.validator.constraints.URL; +import jakarta.persistence.CascadeType; +import jakarta.persistence.Entity; +import jakarta.persistence.GeneratedValue; +import jakarta.persistence.GenerationType; +import jakarta.persistence.Id; +import jakarta.persistence.JoinColumn; +import jakarta.persistence.ManyToOne; +import jakarta.persistence.OneToMany; +import jakarta.validation.constraints.Size; +import jakarta.persistence.OrderBy; import edu.harvard.iq.dataverse.datavariable.DataVariable; import java.util.Objects; -import javax.persistence.Column; -import javax.persistence.Index; -import javax.persistence.Table; +import jakarta.persistence.Column; +import jakarta.persistence.Index; +import jakarta.persistence.Table; /** * diff --git a/src/main/java/edu/harvard/iq/dataverse/DataTagsAPITestingBean.java b/src/main/java/edu/harvard/iq/dataverse/DataTagsAPITestingBean.java index 2f987dde82b..713c86190fc 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataTagsAPITestingBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataTagsAPITestingBean.java @@ -5,11 +5,11 @@ import java.io.Serializable; import java.util.logging.Level; import java.util.logging.Logger; -import javax.ejb.EJB; -import javax.enterprise.context.SessionScoped; -import javax.faces.context.FacesContext; -import javax.inject.Named; -import javax.json.JsonObject; +import jakarta.ejb.EJB; +import jakarta.enterprise.context.SessionScoped; +import jakarta.faces.context.FacesContext; +import jakarta.inject.Named; +import jakarta.json.JsonObject; /** * diff --git a/src/main/java/edu/harvard/iq/dataverse/DataTagsContainer.java b/src/main/java/edu/harvard/iq/dataverse/DataTagsContainer.java index 5cf9c623bde..eeda70c1f17 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataTagsContainer.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataTagsContainer.java @@ -1,7 +1,7 @@ package edu.harvard.iq.dataverse; -import javax.ejb.Stateless; -import javax.json.JsonObject; +import jakarta.ejb.Stateless; +import jakarta.json.JsonObject; /** * diff --git a/src/main/java/edu/harvard/iq/dataverse/Dataset.java b/src/main/java/edu/harvard/iq/dataverse/Dataset.java index a4f82d41bac..620e66c6c54 100644 --- a/src/main/java/edu/harvard/iq/dataverse/Dataset.java +++ b/src/main/java/edu/harvard/iq/dataverse/Dataset.java @@ -17,22 +17,24 @@ import java.util.List; import java.util.Objects; import java.util.Set; -import javax.persistence.CascadeType; -import javax.persistence.Entity; -import javax.persistence.Index; -import javax.persistence.JoinColumn; -import javax.persistence.ManyToOne; -import javax.persistence.NamedQueries; -import javax.persistence.NamedQuery; -import javax.persistence.NamedStoredProcedureQuery; -import javax.persistence.OneToMany; -import javax.persistence.OneToOne; -import javax.persistence.OrderBy; -import javax.persistence.ParameterMode; -import javax.persistence.StoredProcedureParameter; -import javax.persistence.Table; -import javax.persistence.Temporal; -import javax.persistence.TemporalType; +import jakarta.persistence.CascadeType; +import jakarta.persistence.Entity; +import jakarta.persistence.Index; +import jakarta.persistence.JoinColumn; +import jakarta.persistence.ManyToOne; +import jakarta.persistence.NamedQueries; +import jakarta.persistence.NamedQuery; +import jakarta.persistence.NamedStoredProcedureQuery; +import jakarta.persistence.OneToMany; +import jakarta.persistence.OneToOne; +import jakarta.persistence.OrderBy; +import jakarta.persistence.ParameterMode; +import jakarta.persistence.StoredProcedureParameter; +import jakarta.persistence.Table; +import jakarta.persistence.Temporal; +import jakarta.persistence.TemporalType; + +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.util.StringUtil; import edu.harvard.iq.dataverse.util.SystemConfig; @@ -41,6 +43,10 @@ * @author skraffmiller */ @NamedQueries({ + // Dataset.findById should only be used if you're going to iterate over files (otherwise, lazy loading in DatasetService.find() is better). + // If you are going to iterate over files, preferably call the DatasetService.findDeep() method i.s.o. using this query directly. + @NamedQuery(name = "Dataset.findById", + query = "SELECT o FROM Dataset o LEFT JOIN FETCH o.files WHERE o.id=:id"), @NamedQuery(name = "Dataset.findIdStale", query = "SELECT d.id FROM Dataset d WHERE d.indexTime is NULL OR d.indexTime < d.modificationTime"), @NamedQuery(name = "Dataset.findIdStalePermission", @@ -256,7 +262,7 @@ public void setFileAccessRequest(boolean fileAccessRequest) { } public String getPersistentURL() { - return new GlobalId(this).toURL().toString(); + return this.getGlobalId().asURL(); } public List getFiles() { @@ -391,19 +397,21 @@ private DatasetVersion createNewDatasetVersion(Template template, FileMetadata f /** * The "edit version" is the most recent *draft* of a dataset, and if the - * latest version of a dataset is published, a new draft will be created. - * + * latest version of a dataset is published, a new draft will be created. If + * you don't want to create a new version, you should be using + * getLatestVersion. + * * @return The edit version {@code this}. */ - public DatasetVersion getEditVersion() { - return getEditVersion(null, null); + public DatasetVersion getOrCreateEditVersion() { + return getOrCreateEditVersion(null, null); } - public DatasetVersion getEditVersion(FileMetadata fm) { - return getEditVersion(null, fm); + public DatasetVersion getOrCreateEditVersion(FileMetadata fm) { + return getOrCreateEditVersion(null, fm); } - public DatasetVersion getEditVersion(Template template, FileMetadata fm) { + public DatasetVersion getOrCreateEditVersion(Template template, FileMetadata fm) { DatasetVersion latestVersion = this.getLatestVersion(); if (!latestVersion.isWorkingCopy() || template != null) { // if the latest version is released or archived, create a new version for editing @@ -528,11 +536,8 @@ private Collection getCategoryNames() { @Deprecated public Path getFileSystemDirectory() { Path studyDir = null; - - String filesRootDirectory = System.getProperty("dataverse.files.directory"); - if (filesRootDirectory == null || filesRootDirectory.equals("")) { - filesRootDirectory = "/tmp/files"; - } + + String filesRootDirectory = JvmSettings.FILES_DIRECTORY.lookup(); if (this.getAlternativePersistentIndentifiers() != null && !this.getAlternativePersistentIndentifiers().isEmpty()) { for (AlternativePersistentIdentifier api : this.getAlternativePersistentIndentifiers()) { @@ -764,13 +769,13 @@ public String getLocalURL() { public String getRemoteArchiveURL() { if (isHarvested()) { if (HarvestingClient.HARVEST_STYLE_DATAVERSE.equals(this.getHarvestedFrom().getHarvestStyle())) { - return this.getHarvestedFrom().getArchiveUrl() + "/dataset.xhtml?persistentId=" + getGlobalIdString(); + return this.getHarvestedFrom().getArchiveUrl() + "/dataset.xhtml?persistentId=" + getGlobalId().asString(); } else if (HarvestingClient.HARVEST_STYLE_VDC.equals(this.getHarvestedFrom().getHarvestStyle())) { String rootArchiveUrl = this.getHarvestedFrom().getHarvestingUrl(); int c = rootArchiveUrl.indexOf("/OAIHandler"); if (c > 0) { rootArchiveUrl = rootArchiveUrl.substring(0, c); - return rootArchiveUrl + "/faces/study/StudyPage.xhtml?globalId=" + getGlobalIdString(); + return rootArchiveUrl + "/faces/study/StudyPage.xhtml?globalId=" + getGlobalId().asString(); } } else if (HarvestingClient.HARVEST_STYLE_ICPSR.equals(this.getHarvestedFrom().getHarvestStyle())) { // For the ICPSR, it turns out that the best thing to do is to @@ -880,7 +885,12 @@ public T accept(Visitor v) { @Override public String getDisplayName() { DatasetVersion dsv = getReleasedVersion(); - return dsv != null ? dsv.getTitle() : getLatestVersion().getTitle(); + String result = dsv != null ? dsv.getTitle() : getLatestVersion().getTitle(); + boolean resultIsEmpty = result == null || "".equals(result); + if (resultIsEmpty && getGlobalId() != null) { + return getGlobalId().asString(); + } + return result; } @Override @@ -914,4 +924,8 @@ public DatasetThumbnail getDatasetThumbnail(DatasetVersion datasetVersion, int s return DatasetUtil.getThumbnail(this, datasetVersion, size); } + @Override + public String getTargetUrl() { + return Dataset.TARGET_URL; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetConverter.java b/src/main/java/edu/harvard/iq/dataverse/DatasetConverter.java index 2d19cf5fe06..b779e084250 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetConverter.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetConverter.java @@ -6,12 +6,12 @@ package edu.harvard.iq.dataverse; -import javax.ejb.EJB; -import javax.enterprise.inject.spi.CDI; -import javax.faces.component.UIComponent; -import javax.faces.context.FacesContext; -import javax.faces.convert.Converter; -import javax.faces.convert.FacesConverter; +import jakarta.ejb.EJB; +import jakarta.enterprise.inject.spi.CDI; +import jakarta.faces.component.UIComponent; +import jakarta.faces.context.FacesContext; +import jakarta.faces.convert.Converter; +import jakarta.faces.convert.FacesConverter; @FacesConverter("datasetConverter") public class DatasetConverter implements Converter { diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetDistributor.java b/src/main/java/edu/harvard/iq/dataverse/DatasetDistributor.java index 00936b9365a..3252b7f0367 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetDistributor.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetDistributor.java @@ -7,7 +7,7 @@ package edu.harvard.iq.dataverse; import java.util.Comparator; -import javax.persistence.Version; +import jakarta.persistence.Version; /** * diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetField.java b/src/main/java/edu/harvard/iq/dataverse/DatasetField.java index 31d08f84c02..c836a20893f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetField.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetField.java @@ -19,20 +19,20 @@ import java.util.LinkedList; import java.util.List; import java.util.Map; -import javax.persistence.CascadeType; -import javax.persistence.Entity; -import javax.persistence.GeneratedValue; -import javax.persistence.GenerationType; -import javax.persistence.Id; -import javax.persistence.Index; -import javax.persistence.JoinColumn; -import javax.persistence.JoinTable; -import javax.persistence.ManyToMany; -import javax.persistence.ManyToOne; -import javax.persistence.OneToMany; -import javax.persistence.OrderBy; -import javax.persistence.Table; -import javax.persistence.Transient; +import jakarta.persistence.CascadeType; +import jakarta.persistence.Entity; +import jakarta.persistence.GeneratedValue; +import jakarta.persistence.GenerationType; +import jakarta.persistence.Id; +import jakarta.persistence.Index; +import jakarta.persistence.JoinColumn; +import jakarta.persistence.JoinTable; +import jakarta.persistence.ManyToMany; +import jakarta.persistence.ManyToOne; +import jakarta.persistence.OneToMany; +import jakarta.persistence.OrderBy; +import jakarta.persistence.Table; +import jakarta.persistence.Transient; import org.apache.commons.lang3.StringUtils; @Entity diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldCompoundValue.java b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldCompoundValue.java index 5d83f1e4f8d..c679cd7edad 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldCompoundValue.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldCompoundValue.java @@ -14,17 +14,17 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Map; -import javax.persistence.CascadeType; -import javax.persistence.Entity; -import javax.persistence.GeneratedValue; -import javax.persistence.GenerationType; -import javax.persistence.Id; -import javax.persistence.Index; -import javax.persistence.ManyToOne; -import javax.persistence.OneToMany; -import javax.persistence.OrderBy; -import javax.persistence.Table; -import javax.persistence.Transient; +import jakarta.persistence.CascadeType; +import jakarta.persistence.Entity; +import jakarta.persistence.GeneratedValue; +import jakarta.persistence.GenerationType; +import jakarta.persistence.Id; +import jakarta.persistence.Index; +import jakarta.persistence.ManyToOne; +import jakarta.persistence.OneToMany; +import jakarta.persistence.OrderBy; +import jakarta.persistence.Table; +import jakarta.persistence.Transient; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.tuple.ImmutablePair; diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldConstant.java b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldConstant.java index 6d26c0cba58..1621b80df55 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldConstant.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldConstant.java @@ -6,8 +6,8 @@ package edu.harvard.iq.dataverse; -import javax.enterprise.context.Dependent; -import javax.inject.Named; +import jakarta.enterprise.context.Dependent; +import jakarta.inject.Named; /** * @@ -112,8 +112,8 @@ public class DatasetFieldConstant implements java.io.Serializable { public final static String geographicUnit="geographicUnit"; public final static String westLongitude="westLongitude"; public final static String eastLongitude="eastLongitude"; - public final static String northLatitude="northLongitude"; //Changed to match DB - incorrectly entered into DB - public final static String southLatitude="southLongitude"; //Incorrect in DB + public final static String northLatitude="northLongitude"; //Changed to match DB - incorrectly entered into DB: https://github.com/IQSS/dataverse/issues/5645 + public final static String southLatitude="southLongitude"; //Incorrect in DB: https://github.com/IQSS/dataverse/issues/5645 public final static String unitOfAnalysis="unitOfAnalysis"; public final static String universe="universe"; public final static String kindOfData="kindOfData"; diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldDefaultValue.java b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldDefaultValue.java index bad482dbca9..7746099818e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldDefaultValue.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldDefaultValue.java @@ -8,18 +8,18 @@ import java.io.Serializable; import java.util.Collection; -import javax.persistence.CascadeType; -import javax.persistence.Column; -import javax.persistence.Entity; -import javax.persistence.GeneratedValue; -import javax.persistence.GenerationType; -import javax.persistence.Id; -import javax.persistence.Index; -import javax.persistence.JoinColumn; -import javax.persistence.ManyToOne; -import javax.persistence.OneToMany; -import javax.persistence.OrderBy; -import javax.persistence.Table; +import jakarta.persistence.CascadeType; +import jakarta.persistence.Column; +import jakarta.persistence.Entity; +import jakarta.persistence.GeneratedValue; +import jakarta.persistence.GenerationType; +import jakarta.persistence.Id; +import jakarta.persistence.Index; +import jakarta.persistence.JoinColumn; +import jakarta.persistence.ManyToOne; +import jakarta.persistence.OneToMany; +import jakarta.persistence.OrderBy; +import jakarta.persistence.Table; /** * diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldServiceBean.java index 9bc5a5c09a7..620d4bf3e09 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldServiceBean.java @@ -17,22 +17,24 @@ import java.util.Set; import java.util.logging.Logger; -import javax.ejb.EJB; -import javax.ejb.Stateless; -import javax.inject.Named; -import javax.json.Json; -import javax.json.JsonArray; -import javax.json.JsonException; -import javax.json.JsonObject; -import javax.json.JsonObjectBuilder; -import javax.json.JsonReader; -import javax.json.JsonString; -import javax.json.JsonValue; -import javax.persistence.EntityManager; -import javax.persistence.NoResultException; -import javax.persistence.NonUniqueResultException; -import javax.persistence.PersistenceContext; -import javax.persistence.TypedQuery; +import jakarta.ejb.EJB; +import jakarta.ejb.Stateless; +import jakarta.inject.Named; +import jakarta.json.Json; +import jakarta.json.JsonArray; +import jakarta.json.JsonArrayBuilder; +import jakarta.json.JsonException; +import jakarta.json.JsonObject; +import jakarta.json.JsonObjectBuilder; +import jakarta.json.JsonReader; +import jakarta.json.JsonString; +import jakarta.json.JsonValue; +import jakarta.json.JsonValue.ValueType; +import jakarta.persistence.EntityManager; +import jakarta.persistence.NoResultException; +import jakarta.persistence.NonUniqueResultException; +import jakarta.persistence.PersistenceContext; +import jakarta.persistence.TypedQuery; import org.apache.commons.codec.digest.DigestUtils; import org.apache.commons.httpclient.HttpException; @@ -343,33 +345,33 @@ public Map getCVocConf(boolean byTermUriField){ public void registerExternalVocabValues(DatasetField df) { DatasetFieldType dft =df.getDatasetFieldType(); logger.fine("Registering for field: " + dft.getName()); - JsonObject cvocEntry = getCVocConf(false).get(dft.getId()); - if(dft.isPrimitive()) { - for(DatasetFieldValue dfv: df.getDatasetFieldValues()) { + JsonObject cvocEntry = getCVocConf(true).get(dft.getId()); + if (dft.isPrimitive()) { + for (DatasetFieldValue dfv : df.getDatasetFieldValues()) { registerExternalTerm(cvocEntry, dfv.getValue()); } - } else { - if (df.getDatasetFieldType().isCompound()) { - DatasetFieldType termdft = findByNameOpt(cvocEntry.getString("term-uri-field")); - for (DatasetFieldCompoundValue cv : df.getDatasetFieldCompoundValues()) { - for (DatasetField cdf : cv.getChildDatasetFields()) { - logger.fine("Found term uri field type id: " + cdf.getDatasetFieldType().getId()); - if(cdf.getDatasetFieldType().equals(termdft)) { - registerExternalTerm(cvocEntry, cdf.getValue()); - } + } else { + if (df.getDatasetFieldType().isCompound()) { + DatasetFieldType termdft = findByNameOpt(cvocEntry.getString("term-uri-field")); + for (DatasetFieldCompoundValue cv : df.getDatasetFieldCompoundValues()) { + for (DatasetField cdf : cv.getChildDatasetFields()) { + logger.fine("Found term uri field type id: " + cdf.getDatasetFieldType().getId()); + if (cdf.getDatasetFieldType().equals(termdft)) { + registerExternalTerm(cvocEntry, cdf.getValue()); } } } } + } } /** * Retrieves indexable strings from a cached externalvocabularyvalue entry. * * This method assumes externalvocabularyvalue entries have been filtered and - * the externalvocabularyvalue entry contain a single JsonObject whose values - * are either Strings or an array of objects with "lang" and "value" keys. The - * string, or the "value"s for each language are added to the set. + * the externalvocabularyvalue entry contain a single JsonObject whose "personName" or "termName" values + * are either Strings or an array of objects with "lang" and ("value" or "content") keys. The + * string, or the "value/content"s for each language are added to the set. * * Any parsing error results in no entries (there can be unfiltered entries with * unknown structure - getting some strings from such an entry could give fairly @@ -385,16 +387,25 @@ public Set getStringsFor(String termUri) { if (jo != null) { try { for (String key : jo.keySet()) { - JsonValue jv = jo.get(key); - if (jv.getValueType().equals(JsonValue.ValueType.STRING)) { - logger.fine("adding " + jo.getString(key) + " for " + termUri); - strings.add(jo.getString(key)); - } else { - if (jv.getValueType().equals(JsonValue.ValueType.ARRAY)) { - JsonArray jarr = jv.asJsonArray(); - for (int i = 0; i < jarr.size(); i++) { - logger.fine("adding " + jarr.getJsonObject(i).getString("value") + " for " + termUri); - strings.add(jarr.getJsonObject(i).getString("value")); + if (key.equals("termName") || key.equals("personName")) { + JsonValue jv = jo.get(key); + if (jv.getValueType().equals(JsonValue.ValueType.STRING)) { + logger.fine("adding " + jo.getString(key) + " for " + termUri); + strings.add(jo.getString(key)); + } else { + if (jv.getValueType().equals(JsonValue.ValueType.ARRAY)) { + JsonArray jarr = jv.asJsonArray(); + for (int i = 0; i < jarr.size(); i++) { + JsonObject entry = jarr.getJsonObject(i); + if (entry.containsKey("value")) { + logger.fine("adding " + entry.getString("value") + " for " + termUri); + strings.add(entry.getString("value")); + } else if (entry.containsKey("content")) { + logger.fine("adding " + entry.getString("content") + " for " + termUri); + strings.add(entry.getString("content")); + + } + } } } } @@ -410,7 +421,7 @@ public Set getStringsFor(String termUri) { } /** - * Perform a query to retrieve a cached valie from the externalvocabularvalue table + * Perform a query to retrieve a cached value from the externalvocabularvalue table * @param termUri * @return - the entry's value as a JsonObject */ @@ -444,9 +455,25 @@ public void registerExternalTerm(JsonObject cvocEntry, String term) { logger.fine("Ingoring blank term"); return; } + boolean isExternal = false; + JsonObject vocabs = cvocEntry.getJsonObject("vocabs"); + for (String key: vocabs.keySet()) { + JsonObject vocab = vocabs.getJsonObject(key); + if (vocab.containsKey("uriSpace")) { + if (term.startsWith(vocab.getString("uriSpace"))) { + isExternal = true; + break; + } + } + } + if (!isExternal) { + logger.fine("Ignoring free text entry: " + term); + return; + } logger.fine("Registering term: " + term); try { - URI uri = new URI(term); + //Assure the term is in URI form - should be if the uriSpace entry was correct + new URI(term); ExternalVocabularyValue evv = null; try { evv = em.createQuery("select object(o) from ExternalVocabularyValue as o where o.uri=:uri", @@ -542,37 +569,7 @@ private JsonObject filterResponse(JsonObject cvocEntry, JsonObject readObject, S String[] pathParts = param.split("/"); logger.fine("PP: " + String.join(", ", pathParts)); JsonValue curPath = readObject; - for (int j = 0; j < pathParts.length - 1; j++) { - if (pathParts[j].contains("=")) { - JsonArray arr = ((JsonArray) curPath); - for (int k = 0; k < arr.size(); k++) { - String[] keyVal = pathParts[j].split("="); - logger.fine("Looking for object where " + keyVal[0] + " is " + keyVal[1]); - JsonObject jo = arr.getJsonObject(k); - String val = jo.getString(keyVal[0]); - String expected = keyVal[1]; - if (expected.equals("@id")) { - expected = termUri; - } - if (val.equals(expected)) { - logger.fine("Found: " + jo.toString()); - curPath = jo; - break; - } - } - } else { - curPath = ((JsonObject) curPath).get(pathParts[j]); - logger.fine("Found next Path object " + curPath.toString()); - } - } - JsonValue jv = ((JsonObject) curPath).get(pathParts[pathParts.length - 1]); - if (jv.getValueType().equals(JsonValue.ValueType.STRING)) { - vals.add(i, ((JsonString) jv).getString()); - } else if (jv.getValueType().equals(JsonValue.ValueType.ARRAY)) { - vals.add(i, jv); - } else if (jv.getValueType().equals(JsonValue.ValueType.OBJECT)) { - vals.add(i, jv); - } + vals.add(i, processPathSegment(0, pathParts, curPath, termUri)); logger.fine("Added param value: " + i + ": " + vals.get(i)); } else { logger.fine("Param is: " + param); @@ -615,6 +612,7 @@ private JsonObject filterResponse(JsonObject cvocEntry, JsonObject readObject, S } catch (Exception e) { logger.warning("External Vocabulary: " + termUri + " - Failed to find value for " + filterKey + ": " + e.getMessage()); + e.printStackTrace(); } } } @@ -628,6 +626,66 @@ private JsonObject filterResponse(JsonObject cvocEntry, JsonObject readObject, S } } + Object processPathSegment(int index, String[] pathParts, JsonValue curPath, String termUri) { + if (index < pathParts.length - 1) { + if (pathParts[index].contains("=")) { + JsonArray arr = ((JsonArray) curPath); + String[] keyVal = pathParts[index].split("="); + logger.fine("Looking for object where " + keyVal[0] + " is " + keyVal[1]); + String expected = keyVal[1]; + + if (!expected.equals("*")) { + if (expected.equals("@id")) { + expected = termUri; + } + for (int k = 0; k < arr.size(); k++) { + JsonObject jo = arr.getJsonObject(k); + String val = jo.getString(keyVal[0]); + if (val.equals(expected)) { + logger.fine("Found: " + jo.toString()); + curPath = jo; + return processPathSegment(index + 1, pathParts, curPath, termUri); + } + } + } else { + JsonArrayBuilder parts = Json.createArrayBuilder(); + for (JsonValue subPath : arr) { + if (subPath instanceof JsonObject) { + JsonValue nextValue = ((JsonObject) subPath).get(keyVal[0]); + Object obj = processPathSegment(index + 1, pathParts, nextValue, termUri); + if (obj instanceof String) { + parts.add((String) obj); + } else { + parts.add((JsonValue) obj); + } + } + } + return parts.build(); + } + + } else { + curPath = ((JsonObject) curPath).get(pathParts[index]); + logger.fine("Found next Path object " + curPath.toString()); + return processPathSegment(index + 1, pathParts, curPath, termUri); + } + } else { + logger.fine("Last segment: " + curPath.toString()); + logger.fine("Looking for : " + pathParts[index]); + JsonValue jv = ((JsonObject) curPath).get(pathParts[index]); + ValueType type =jv.getValueType(); + if (type.equals(JsonValue.ValueType.STRING)) { + return ((JsonString) jv).getString(); + } else if (jv.getValueType().equals(JsonValue.ValueType.ARRAY)) { + return jv; + } else if (jv.getValueType().equals(JsonValue.ValueType.OBJECT)) { + return jv; + } + } + + return null; + + } + /** * Supports validation of externally controlled values. If the value is a URI it * must be in the namespace (start with) one of the uriSpace values of an @@ -669,8 +727,20 @@ public boolean isValidCVocValue(DatasetFieldType dft, String value) { public List getVocabScripts( Map cvocConf) { //ToDo - only return scripts that are needed (those fields are set on display pages, those blocks/fields are allowed in the Dataverse collection for create/edit)? Set scripts = new HashSet(); - for(JsonObject jo: cvocConf.values()) { - scripts.add(jo.getString("js-url")); + for (JsonObject jo : cvocConf.values()) { + // Allow either a single script (a string) or an array of scripts (used, for + // example, to allow use of the common cvocutils.js script along with a main + // script for the field.) + JsonValue scriptValue = jo.get("js-url"); + ValueType scriptType = scriptValue.getValueType(); + if (scriptType.equals(ValueType.STRING)) { + scripts.add(((JsonString) scriptValue).getString()); + } else if (scriptType.equals(ValueType.ARRAY)) { + JsonArray scriptArray = ((JsonArray) scriptValue); + for (int i = 0; i < scriptArray.size(); i++) { + scripts.add(scriptArray.getString(i)); + } + } } String customScript = settingsService.getValueForKey(SettingsServiceBean.Key.ControlledVocabularyCustomJavaScript); if (customScript != null && !customScript.isEmpty()) { diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldType.java b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldType.java index df126514308..824b486a42d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldType.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldType.java @@ -13,8 +13,8 @@ import java.util.Set; import java.util.TreeMap; import java.util.MissingResourceException; -import javax.faces.model.SelectItem; -import javax.persistence.*; +import jakarta.faces.model.SelectItem; +import jakarta.persistence.*; /** * Defines the meaning and constraints of a metadata field and its values. diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValidator.java b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValidator.java index 3ded24d7a59..6d3fda2812d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValidator.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValidator.java @@ -5,11 +5,11 @@ */ package edu.harvard.iq.dataverse; -import javax.validation.ConstraintValidator; -import javax.validation.ConstraintValidatorContext; +import jakarta.validation.ConstraintValidator; +import jakarta.validation.ConstraintValidatorContext; import edu.harvard.iq.dataverse.util.BundleUtil; -import java.util.Collections; + import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValue.java b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValue.java index 2447a6478fd..1064187ccd6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValue.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValue.java @@ -10,17 +10,17 @@ import edu.harvard.iq.dataverse.util.MarkupChecker; import java.io.Serializable; import java.util.Comparator; -import java.util.ResourceBundle; -import javax.persistence.Column; -import javax.persistence.Entity; -import javax.persistence.GeneratedValue; -import javax.persistence.GenerationType; -import javax.persistence.Id; -import javax.persistence.Index; -import javax.persistence.JoinColumn; -import javax.persistence.ManyToOne; -import javax.persistence.Table; -import javax.persistence.Transient; + +import jakarta.persistence.Column; +import jakarta.persistence.Entity; +import jakarta.persistence.GeneratedValue; +import jakarta.persistence.GenerationType; +import jakarta.persistence.Id; +import jakarta.persistence.Index; +import jakarta.persistence.JoinColumn; +import jakarta.persistence.ManyToOne; +import jakarta.persistence.Table; +import jakarta.persistence.Transient; import org.apache.commons.lang3.StringUtils; /** diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValueValidator.java b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValueValidator.java index 8b807f78bca..b6c21014f04 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValueValidator.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValueValidator.java @@ -13,13 +13,13 @@ import java.util.GregorianCalendar; import java.util.logging.Logger; import java.util.regex.Pattern; -import javax.validation.ConstraintValidator; -import javax.validation.ConstraintValidatorContext; +import jakarta.validation.ConstraintValidator; +import jakarta.validation.ConstraintValidatorContext; +import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.validation.EMailValidator; import edu.harvard.iq.dataverse.validation.URLValidator; import org.apache.commons.lang3.StringUtils; -import org.apache.commons.validator.routines.UrlValidator; /** * @@ -59,7 +59,7 @@ public boolean isValid(DatasetFieldValue value, ConstraintValidatorContext conte boolean valid = value.getValue().matches(value.getDatasetField().getDatasetFieldType().getValidationFormat()); if (!valid) { try { - context.buildConstraintViolationWithTemplate(dsfType.getDisplayName() + " is not a valid entry.").addConstraintViolation(); + context.buildConstraintViolationWithTemplate(dsfType.getDisplayName() + " " + BundleUtil.getStringFromBundle("dataset.metadata.invalidEntry")).addConstraintViolation(); } catch (NullPointerException e) { return false; } @@ -128,7 +128,7 @@ public boolean isValid(DatasetFieldValue value, ConstraintValidatorContext conte } if (!valid) { try { - context.buildConstraintViolationWithTemplate(dsfType.getDisplayName() + " is not a valid date. \"" + YYYYformat + "\" is a supported format.").addConstraintViolation(); + context.buildConstraintViolationWithTemplate(dsfType.getDisplayName() + " " + BundleUtil.getStringFromBundle("dataset.metadata.invalidDate") ).addConstraintViolation(); } catch (NullPointerException npe) { } @@ -143,7 +143,7 @@ public boolean isValid(DatasetFieldValue value, ConstraintValidatorContext conte } catch (Exception e) { logger.fine("Float value failed validation: " + value.getValue() + " (" + dsfType.getDisplayName() + ")"); try { - context.buildConstraintViolationWithTemplate(dsfType.getDisplayName() + " is not a valid number.").addConstraintViolation(); + context.buildConstraintViolationWithTemplate(dsfType.getDisplayName() + " " + BundleUtil.getStringFromBundle("dataset.metadata.invalidNumber") ).addConstraintViolation(); } catch (NullPointerException npe) { } @@ -157,7 +157,7 @@ public boolean isValid(DatasetFieldValue value, ConstraintValidatorContext conte Integer.parseInt(value.getValue()); } catch (Exception e) { try { - context.buildConstraintViolationWithTemplate(dsfType.getDisplayName() + " is not a valid integer.").addConstraintViolation(); + context.buildConstraintViolationWithTemplate(dsfType.getDisplayName() + " " + BundleUtil.getStringFromBundle("dataset.metadata.invalidInteger") ).addConstraintViolation(); } catch (NullPointerException npe) { } @@ -170,7 +170,7 @@ public boolean isValid(DatasetFieldValue value, ConstraintValidatorContext conte if (fieldType.equals(FieldType.URL) && !lengthOnly) { boolean isValidUrl = URLValidator.isURLValid(value.getValue()); if (!isValidUrl) { - context.buildConstraintViolationWithTemplate(dsfType.getDisplayName() + " " + value.getValue() + " {url.invalid}").addConstraintViolation(); + context.buildConstraintViolationWithTemplate(dsfType.getDisplayName() + " " + value.getValue() + " " + BundleUtil.getStringFromBundle("dataset.metadata.invalidURL")).addConstraintViolation(); return false; } } @@ -178,7 +178,7 @@ public boolean isValid(DatasetFieldValue value, ConstraintValidatorContext conte if (fieldType.equals(FieldType.EMAIL) && !lengthOnly) { boolean isValidMail = EMailValidator.isEmailValid(value.getValue()); if (!isValidMail) { - context.buildConstraintViolationWithTemplate(dsfType.getDisplayName() + " " + value.getValue() + " {email.invalid}").addConstraintViolation(); + context.buildConstraintViolationWithTemplate(dsfType.getDisplayName() + " " + value.getValue() + " " + BundleUtil.getStringFromBundle("dataset.metadata.invalidEmail")).addConstraintViolation(); return false; } } diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetLinkingDataverse.java b/src/main/java/edu/harvard/iq/dataverse/DatasetLinkingDataverse.java index 8f8e9b103c1..dec07a09643 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetLinkingDataverse.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetLinkingDataverse.java @@ -2,19 +2,19 @@ import java.io.Serializable; import java.util.Date; -import javax.persistence.Column; -import javax.persistence.Entity; -import javax.persistence.GeneratedValue; -import javax.persistence.GenerationType; -import javax.persistence.Id; -import javax.persistence.Index; -import javax.persistence.JoinColumn; -import javax.persistence.NamedQueries; -import javax.persistence.NamedQuery; -import javax.persistence.OneToOne; -import javax.persistence.Table; -import javax.persistence.Temporal; -import javax.persistence.TemporalType; +import jakarta.persistence.Column; +import jakarta.persistence.Entity; +import jakarta.persistence.GeneratedValue; +import jakarta.persistence.GenerationType; +import jakarta.persistence.Id; +import jakarta.persistence.Index; +import jakarta.persistence.JoinColumn; +import jakarta.persistence.NamedQueries; +import jakarta.persistence.NamedQuery; +import jakarta.persistence.OneToOne; +import jakarta.persistence.Table; +import jakarta.persistence.Temporal; +import jakarta.persistence.TemporalType; /** * diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetLinkingServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetLinkingServiceBean.java index 3789efcd443..39c82bfa3f1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetLinkingServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetLinkingServiceBean.java @@ -8,12 +8,13 @@ import java.util.ArrayList; import java.util.List; import java.util.logging.Logger; -import javax.ejb.Stateless; -import javax.inject.Named; -import javax.persistence.EntityManager; -import javax.persistence.PersistenceContext; -import javax.persistence.Query; -import javax.persistence.TypedQuery; +import jakarta.ejb.Stateless; +import jakarta.inject.Named; +import jakarta.persistence.EntityManager; +import jakarta.persistence.NoResultException; +import jakarta.persistence.PersistenceContext; +import jakarta.persistence.Query; +import jakarta.persistence.TypedQuery; /** * @@ -63,7 +64,7 @@ public DatasetLinkingDataverse findDatasetLinkingDataverse(Long datasetId, Long .setParameter("datasetId", datasetId) .setParameter("linkingDataverseId", linkingDataverseId) .getSingleResult(); - } catch (javax.persistence.NoResultException e) { + } catch (NoResultException e) { logger.fine("no datasetLinkingDataverse found for datasetId " + datasetId + " and linkingDataverseId " + linkingDataverseId); return null; } diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetLock.java b/src/main/java/edu/harvard/iq/dataverse/DatasetLock.java index 7b857545c20..cc0078ecbc5 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetLock.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetLock.java @@ -20,25 +20,24 @@ package edu.harvard.iq.dataverse; -import static edu.harvard.iq.dataverse.DatasetLock.Reason.Workflow; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import java.util.Date; import java.io.Serializable; -import javax.persistence.Column; -import javax.persistence.Entity; -import javax.persistence.EnumType; -import javax.persistence.Enumerated; -import javax.persistence.GeneratedValue; -import javax.persistence.GenerationType; -import javax.persistence.Id; -import javax.persistence.Index; -import javax.persistence.JoinColumn; -import javax.persistence.ManyToOne; -import javax.persistence.Table; -import javax.persistence.Temporal; -import javax.persistence.TemporalType; -import javax.persistence.NamedQueries; -import javax.persistence.NamedQuery; +import jakarta.persistence.Column; +import jakarta.persistence.Entity; +import jakarta.persistence.EnumType; +import jakarta.persistence.Enumerated; +import jakarta.persistence.GeneratedValue; +import jakarta.persistence.GenerationType; +import jakarta.persistence.Id; +import jakarta.persistence.Index; +import jakarta.persistence.JoinColumn; +import jakarta.persistence.ManyToOne; +import jakarta.persistence.Table; +import jakarta.persistence.Temporal; +import jakarta.persistence.TemporalType; +import jakarta.persistence.NamedQueries; +import jakarta.persistence.NamedQuery; /** * diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index 0a8db69bf5b..d20175b6e1a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -33,13 +33,14 @@ import edu.harvard.iq.dataverse.engine.command.impl.PublishDatasetCommand; import edu.harvard.iq.dataverse.engine.command.impl.PublishDataverseCommand; import edu.harvard.iq.dataverse.engine.command.impl.UpdateDatasetVersionCommand; -import edu.harvard.iq.dataverse.export.ExportException; import edu.harvard.iq.dataverse.export.ExportService; -import edu.harvard.iq.dataverse.export.spi.Exporter; +import io.gdcc.spi.export.ExportException; +import io.gdcc.spi.export.Exporter; import edu.harvard.iq.dataverse.ingest.IngestRequest; import edu.harvard.iq.dataverse.ingest.IngestServiceBean; import edu.harvard.iq.dataverse.license.LicenseServiceBean; import edu.harvard.iq.dataverse.metadataimport.ForeignMetadataImportServiceBean; +import edu.harvard.iq.dataverse.pidproviders.PidUtil; import edu.harvard.iq.dataverse.privateurl.PrivateUrl; import edu.harvard.iq.dataverse.privateurl.PrivateUrlServiceBean; import edu.harvard.iq.dataverse.privateurl.PrivateUrlUtil; @@ -48,6 +49,7 @@ import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.ArchiverUtil; import edu.harvard.iq.dataverse.util.BundleUtil; +import edu.harvard.iq.dataverse.util.DataFileComparator; import edu.harvard.iq.dataverse.util.FileSortFieldAndOrder; import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.JsfHelper; @@ -56,6 +58,7 @@ import edu.harvard.iq.dataverse.util.StringUtil; import edu.harvard.iq.dataverse.util.SystemConfig; +import edu.harvard.iq.dataverse.util.WebloaderUtil; import edu.harvard.iq.dataverse.validation.URLValidator; import edu.harvard.iq.dataverse.workflows.WorkflowComment; @@ -78,27 +81,29 @@ import java.util.Set; import java.util.Collection; import java.util.logging.Logger; -import javax.ejb.EJB; -import javax.ejb.EJBException; -import javax.faces.application.FacesMessage; -import javax.faces.context.FacesContext; -import javax.faces.event.ActionEvent; -import javax.faces.event.ValueChangeEvent; -import javax.faces.view.ViewScoped; -import javax.inject.Inject; -import javax.inject.Named; +import java.util.stream.Collectors; + +import jakarta.ejb.EJB; +import jakarta.ejb.EJBException; +import jakarta.faces.application.FacesMessage; +import jakarta.faces.context.FacesContext; +import jakarta.faces.event.ActionEvent; +import jakarta.faces.event.ValueChangeEvent; +import jakarta.faces.view.ViewScoped; +import jakarta.inject.Inject; +import jakarta.inject.Named; import org.apache.commons.lang3.StringUtils; import org.primefaces.event.FileUploadEvent; import org.primefaces.model.file.UploadedFile; -import javax.validation.ConstraintViolation; +import jakarta.validation.ConstraintViolation; import org.apache.commons.httpclient.HttpClient; //import org.primefaces.context.RequestContext; import java.util.Arrays; import java.util.HashSet; -import javax.faces.model.SelectItem; -import javax.faces.validator.ValidatorException; +import jakarta.faces.model.SelectItem; +import jakarta.faces.validator.ValidatorException; import java.util.logging.Level; import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException; @@ -119,12 +124,12 @@ import edu.harvard.iq.dataverse.makedatacount.MakeDataCountLoggingServiceBean; import edu.harvard.iq.dataverse.makedatacount.MakeDataCountLoggingServiceBean.MakeDataCountEntry; import java.util.Collections; -import javax.faces.component.UIComponent; -import javax.faces.component.UIInput; +import jakarta.faces.component.UIComponent; +import jakarta.faces.component.UIInput; -import javax.faces.event.AjaxBehaviorEvent; -import javax.servlet.ServletOutputStream; -import javax.servlet.http.HttpServletResponse; +import jakarta.faces.event.AjaxBehaviorEvent; +import jakarta.servlet.ServletOutputStream; +import jakarta.servlet.http.HttpServletResponse; import org.apache.commons.text.StringEscapeUtils; import org.apache.commons.lang3.mutable.MutableBoolean; @@ -141,6 +146,8 @@ import edu.harvard.iq.dataverse.search.SearchServiceBean; import edu.harvard.iq.dataverse.search.SearchUtil; import edu.harvard.iq.dataverse.search.SolrClientService; +import edu.harvard.iq.dataverse.settings.JvmSettings; +import edu.harvard.iq.dataverse.util.SignpostingResources; import edu.harvard.iq.dataverse.util.FileMetadataUtil; import java.util.Comparator; import org.apache.solr.client.solrj.SolrQuery; @@ -228,6 +235,8 @@ public enum DisplayMode { ExternalToolServiceBean externalToolService; @EJB SolrClientService solrClientService; + @EJB + DvObjectServiceBean dvObjectService; @Inject DataverseRequestServiceBean dvRequestService; @Inject @@ -336,7 +345,7 @@ public void setSelectedHostDataverse(Dataverse selectedHostDataverse) { private Boolean hasRsyncScript = false; - private Boolean hasTabular = false; + /*private Boolean hasTabular = false;*/ /** @@ -345,6 +354,12 @@ public void setSelectedHostDataverse(Dataverse selectedHostDataverse) { * sometimes you want to know about the current version ("no tabular files * currently"). Like all files, tabular files can be deleted. */ + /** + * There doesn't seem to be an actual real life case where we need to know + * if this dataset "has ever had a tabular file" - for all practical purposes + * only the versionHasTabular appears to be in use. I'm going to remove the + * other boolean. + */ private boolean versionHasTabular = false; private boolean showIngestSuccess; @@ -373,6 +388,8 @@ public void setShowIngestSuccess(boolean showIngestSuccess) { Map> previewToolsByFileId = new HashMap<>(); // TODO: Consider renaming "previewTools" to "filePreviewTools". List previewTools = new ArrayList<>(); + Map> fileQueryToolsByFileId = new HashMap<>(); + List fileQueryTools = new ArrayList<>(); private List datasetExploreTools; public Boolean isHasRsyncScript() { @@ -501,6 +518,16 @@ public void setRemoveUnusedTags(boolean removeUnusedTags) { private String fileSortField; private String fileSortOrder; + private boolean tagPresort = true; + private boolean folderPresort = true; + // Due to what may be a bug in PrimeFaces, the call to select a new page of + // files appears to reset the two presort booleans to false. The following + // values are a flag and duplicate booleans to remember what the new values were + // so that they can be set only in real checkbox changes. Further comments where + // these are used. + boolean isPageFlip = false; + private boolean newTagPresort = true; + private boolean newFolderPresort = true; public List> getCartList() { if (session.getUser() instanceof AuthenticatedUser) { @@ -663,70 +690,46 @@ public void showAll(){ } private List selectFileMetadatasForDisplay() { - Set searchResultsIdSet = null; - - if (isIndexedVersion()) { + final Set searchResultsIdSet; + if (isIndexedVersion() && StringUtil.isEmpty(fileLabelSearchTerm) && StringUtil.isEmpty(fileTypeFacet) && StringUtil.isEmpty(fileAccessFacet) && StringUtil.isEmpty(fileTagsFacet)) { + // Indexed version: we need facets, they are set as a side effect of getFileIdsInVersionFromSolr method. + // But, no search terms were specified, we will return the full + // list of the files in the version: we discard the result from getFileIdsInVersionFromSolr. + getFileIdsInVersionFromSolr(workingVersion.getId(), this.fileLabelSearchTerm); + // Since the search results should include the full set of fmds if all the + // terms/facets are empty, setting them to null should just be + // an optimization to skip the loop below + searchResultsIdSet = null; + } else if (isIndexedVersion()) { // We run the search even if no search term and/or facets are // specified - to generate the facet labels list: searchResultsIdSet = getFileIdsInVersionFromSolr(workingVersion.getId(), this.fileLabelSearchTerm); - // But, if no search terms were specified, we can immediately return the full - // list of the files in the version: - if (StringUtil.isEmpty(fileLabelSearchTerm) - && StringUtil.isEmpty(fileTypeFacet) - && StringUtil.isEmpty(fileAccessFacet) - && StringUtil.isEmpty(fileTagsFacet)) { - if ((StringUtil.isEmpty(fileSortField) || fileSortField.equals("name")) && StringUtil.isEmpty(fileSortOrder)) { - return workingVersion.getFileMetadatasSorted(); - } else { - searchResultsIdSet = null; - } - } - - } else { + } else if (!StringUtil.isEmpty(this.fileLabelSearchTerm)) { // No, this is not an indexed version. // If the search term was specified, we'll run a search in the db; // if not - return the full list of files in the version. // (no facets without solr!) - if (StringUtil.isEmpty(this.fileLabelSearchTerm)) { - if ((StringUtil.isEmpty(fileSortField) || fileSortField.equals("name")) && StringUtil.isEmpty(fileSortOrder)) { - return workingVersion.getFileMetadatasSorted(); - } - } else { - searchResultsIdSet = getFileIdsInVersionFromDb(workingVersion.getId(), this.fileLabelSearchTerm); - } - } - - List retList = new ArrayList<>(); - - for (FileMetadata fileMetadata : workingVersion.getFileMetadatasSorted()) { - if (searchResultsIdSet == null || searchResultsIdSet.contains(fileMetadata.getDataFile().getId())) { - retList.add(fileMetadata); - } + searchResultsIdSet = getFileIdsInVersionFromDb(workingVersion.getId(), this.fileLabelSearchTerm); + } else { + searchResultsIdSet = null; } - if ((StringUtil.isEmpty(fileSortOrder) && !("name".equals(fileSortField))) - || ("desc".equals(fileSortOrder) || !("name".equals(fileSortField)))) { - sortFileMetadatas(retList); - + final List md = workingVersion.getFileMetadatas(); + final List retList; + if (searchResultsIdSet == null) { + retList = new ArrayList<>(md); + } else { + retList = md.stream().filter(x -> searchResultsIdSet.contains(x.getDataFile().getId())).collect(Collectors.toList()); } - + sortFileMetadatas(retList); return retList; } - private void sortFileMetadatas(List fileList) { - if ("name".equals(fileSortField) && "desc".equals(fileSortOrder)) { - Collections.sort(fileList, compareByLabelZtoA); - } else if ("date".equals(fileSortField)) { - if ("desc".equals(fileSortOrder)) { - Collections.sort(fileList, compareByOldest); - } else { - Collections.sort(fileList, compareByNewest); - } - } else if ("type".equals(fileSortField)) { - Collections.sort(fileList, compareByType); - } else if ("size".equals(fileSortField)) { - Collections.sort(fileList, compareBySize); - } + private void sortFileMetadatas(final List fileList) { + + final DataFileComparator dfc = new DataFileComparator(); + final Comparator comp = dfc.compareBy(folderPresort, tagPresort, fileSortField, !"desc".equals(fileSortOrder)); + Collections.sort(fileList, comp); } private Boolean isIndexedVersion = null; @@ -1845,7 +1848,20 @@ public boolean globusUploadSupported() { return settingsWrapper.isGlobusUpload() && settingsWrapper.isGlobusEnabledStorageDriver(dataset.getEffectiveStorageDriverId()); } - + public boolean webloaderUploadSupported() { + return settingsWrapper.isWebloaderUpload() && StorageIO.isDirectUploadEnabled(dataset.getEffectiveStorageDriverId()); + } + + private void setIdByPersistentId() { + GlobalId gid = PidUtil.parseAsGlobalID(persistentId); + Long id = dvObjectService.findIdByGlobalId(gid, DvObject.DType.Dataset); + if (id == null) { + id = dvObjectService.findIdByAltGlobalId(gid, DvObject.DType.Dataset); + } + if (id != null) { + this.setId(id); + } + } private String init(boolean initFull) { @@ -1857,7 +1873,12 @@ private String init(boolean initFull) { String nonNullDefaultIfKeyNotFound = ""; protocol = settingsWrapper.getValueForKey(SettingsServiceBean.Key.Protocol, nonNullDefaultIfKeyNotFound); authority = settingsWrapper.getValueForKey(SettingsServiceBean.Key.Authority, nonNullDefaultIfKeyNotFound); - if (this.getId() != null || versionId != null || persistentId != null) { // view mode for a dataset + String sortOrder = getSortOrder(); + if(sortOrder != null) { + FileMetadata.setCategorySortOrder(sortOrder); + } + + if (dataset.getId() != null || versionId != null || persistentId != null) { // view mode for a dataset DatasetVersionServiceBean.RetrieveDatasetVersionResponse retrieveDatasetVersionResponse = null; @@ -1865,44 +1886,60 @@ private String init(boolean initFull) { // Set the workingVersion and Dataset // --------------------------------------- if (persistentId != null) { - logger.fine("initializing DatasetPage with persistent ID " + persistentId); - // Set Working Version and Dataset by PersistentID - dataset = datasetService.findByGlobalId(persistentId); - if (dataset == null) { - logger.warning("No such dataset: "+persistentId); - return permissionsWrapper.notFound(); - } - logger.fine("retrieved dataset, id="+dataset.getId()); - - retrieveDatasetVersionResponse = datasetVersionService.selectRequestedVersion(dataset.getVersions(), version); - //retrieveDatasetVersionResponse = datasetVersionService.retrieveDatasetVersionByPersistentId(persistentId, version); - this.workingVersion = retrieveDatasetVersionResponse.getDatasetVersion(); - logger.fine("retrieved version: id: " + workingVersion.getId() + ", state: " + this.workingVersion.getVersionState()); - - } else if (this.getId() != null) { + setIdByPersistentId(); + } + + if (this.getId() != null) { // Set Working Version and Dataset by Datasaet Id and Version + + // We are only performing these lookups to obtain the database id + // of the version that we are displaying, and then we will use it + // to perform a .findDeep(versionId); see below. + + // TODO: replace the code block below, the combination of + // datasetService.find(id) and datasetVersionService.selectRequestedVersion() + // with some optimized, direct query-based way of obtaining + // the numeric id of the requested DatasetVersion (and that's + // all we need, we are not using any of the entities produced + // below. + dataset = datasetService.find(this.getId()); + if (dataset == null) { logger.warning("No such dataset: "+dataset); return permissionsWrapper.notFound(); } //retrieveDatasetVersionResponse = datasetVersionService.retrieveDatasetVersionById(dataset.getId(), version); retrieveDatasetVersionResponse = datasetVersionService.selectRequestedVersion(dataset.getVersions(), version); + if (retrieveDatasetVersionResponse == null) { + return permissionsWrapper.notFound(); + } this.workingVersion = retrieveDatasetVersionResponse.getDatasetVersion(); - logger.info("retreived version: id: " + workingVersion.getId() + ", state: " + this.workingVersion.getVersionState()); - - } else if (versionId != null) { - // TODO: 4.2.1 - this method is broken as of now! - // Set Working Version and Dataset by DatasaetVersion Id - //retrieveDatasetVersionResponse = datasetVersionService.retrieveDatasetVersionByVersionId(versionId); - + logger.fine("retrieved version: id: " + workingVersion.getId() + ", state: " + this.workingVersion.getVersionState()); + + versionId = workingVersion.getId(); + + this.workingVersion = null; + this.dataset = null; + + } + + // ... And now the "real" working version lookup: + + if (versionId != null) { + this.workingVersion = datasetVersionService.findDeep(versionId); + dataset = workingVersion.getDataset(); + } + + if (workingVersion == null) { + logger.warning("Failed to retrieve version"); + return permissionsWrapper.notFound(); } + this.maxFileUploadSizeInBytes = systemConfig.getMaxFileUploadSizeForStore(dataset.getEffectiveStorageDriverId()); - if (retrieveDatasetVersionResponse == null) { - return permissionsWrapper.notFound(); - } + switch (selectTab){ case "dataFilesTab": @@ -1919,16 +1956,6 @@ private String init(boolean initFull) { break; } - //this.dataset = this.workingVersion.getDataset(); - - // end: Set the workingVersion and Dataset - // --------------------------------------- - // Is the DatasetVersion or Dataset null? - // - if (workingVersion == null || this.dataset == null) { - return permissionsWrapper.notFound(); - } - // Is the Dataset harvested? if (dataset.isHarvested()) { @@ -1956,7 +1983,7 @@ private String init(boolean initFull) { return permissionsWrapper.notAuthorized(); } - if (!retrieveDatasetVersionResponse.wasRequestedVersionRetrieved()) { + if (retrieveDatasetVersionResponse != null && !retrieveDatasetVersionResponse.wasRequestedVersionRetrieved()) { //msg("checkit " + retrieveDatasetVersionResponse.getDifferentVersionMessage()); JsfHelper.addWarningMessage(retrieveDatasetVersionResponse.getDifferentVersionMessage());//BundleUtil.getStringFromBundle("dataset.message.metadataSuccess")); } @@ -1977,11 +2004,6 @@ private String init(boolean initFull) { // init the list of FileMetadatas if (workingVersion.isDraft() && canUpdateDataset()) { readOnly = false; - } else { - // an attempt to retreive both the filemetadatas and datafiles early on, so that - // we don't have to do so later (possibly, many more times than necessary): - AuthenticatedUser au = session.getUser() instanceof AuthenticatedUser ? (AuthenticatedUser) session.getUser() : null; - datafileService.findFileMetadataOptimizedExperimental(dataset, workingVersion, au); } // This will default to all the files in the version, if the search term // parameter hasn't been specified yet: @@ -2045,7 +2067,7 @@ private String init(boolean initFull) { if ( isEmpty(dataset.getIdentifier()) && systemConfig.directUploadEnabled(dataset) ) { CommandContext ctxt = commandEngine.getContext(); GlobalIdServiceBean idServiceBean = GlobalIdServiceBean.getBean(ctxt); - dataset.setIdentifier(ctxt.datasets().generateDatasetIdentifier(dataset, idServiceBean)); + dataset.setIdentifier(idServiceBean.generateDatasetIdentifier(dataset)); } dataverseTemplates.addAll(dataverseService.find(ownerId).getTemplates()); if (!dataverseService.find(ownerId).isTemplateRoot()) { @@ -2067,7 +2089,7 @@ private String init(boolean initFull) { } //Initalize with the default if there is one dataset.setTemplate(selectedTemplate); - workingVersion = dataset.getEditVersion(selectedTemplate, null); + workingVersion = dataset.getOrCreateEditVersion(selectedTemplate, null); updateDatasetFieldInputLevels(); } else { workingVersion = dataset.getCreateVersion(licenseServiceBean.getDefault()); @@ -2108,23 +2130,18 @@ private String init(boolean initFull) { displayLockInfo(dataset); displayPublishMessage(); + // TODO: replace this loop, and the loop in the method that calculates + // the total "originals" size of the dataset with direct custom queries; + // then we'll be able to drop the lookup hint for DataTable from the + // findDeep() method for the version and further speed up the lookup + // a little bit. for (FileMetadata fmd : workingVersion.getFileMetadatas()) { if (fmd.getDataFile().isTabularData()) { versionHasTabular = true; break; } } - for(DataFile f : dataset.getFiles()) { - // TODO: Consider uncommenting this optimization. -// if (versionHasTabular) { -// hasTabular = true; -// break; -// } - if(f.isTabularData()) { - hasTabular = true; - break; - } - } + //Show ingest success message if refresh forces a page reload after ingest success //This is needed to display the explore buttons (the fileDownloadHelper needs to be reloaded via page if (showIngestSuccess) { @@ -2134,6 +2151,7 @@ private String init(boolean initFull) { configureTools = externalToolService.findFileToolsByType(ExternalTool.Type.CONFIGURE); exploreTools = externalToolService.findFileToolsByType(ExternalTool.Type.EXPLORE); previewTools = externalToolService.findFileToolsByType(ExternalTool.Type.PREVIEW); + fileQueryTools = externalToolService.findFileToolsByType(ExternalTool.Type.QUERY); datasetExploreTools = externalToolService.findDatasetToolsByType(ExternalTool.Type.EXPLORE); rowsPerPage = 10; if (dataset.getId() != null && canUpdateDataset()) { @@ -2167,10 +2185,29 @@ private void displayPublishMessage(){ if (workingVersion.isDraft() && workingVersion.getId() != null && canUpdateDataset() && !dataset.isLockedFor(DatasetLock.Reason.finalizePublication) && (canPublishDataset() || !dataset.isLockedFor(DatasetLock.Reason.InReview) )){ - JsfHelper.addWarningMessage(datasetService.getReminderString(dataset, canPublishDataset())); + JsfHelper.addWarningMessage(datasetService.getReminderString(dataset, canPublishDataset(), false, isValid())); } } + Boolean valid = null; + + public boolean isValid() { + if (valid == null) { + DatasetVersion version = dataset.getLatestVersion(); + if (!version.isDraft()) { + valid = true; + } + DatasetVersion newVersion = version.cloneDatasetVersion(); + newVersion.setDatasetFields(newVersion.initDatasetFields()); + valid = newVersion.isValid(); + } + return valid; + } + + public boolean isValidOrCanReviewIncomplete() { + return isValid() || JvmSettings.UI_ALLOW_REVIEW_INCOMPLETE.lookupOptional(Boolean.class).orElse(false); + } + private void displayLockInfo(Dataset dataset) { // Various info messages, when the dataset is locked (for various reasons): if (dataset.isLocked() && canUpdateDataset()) { @@ -2239,6 +2276,19 @@ private void displayLockInfo(Dataset dataset) { } + public String getSortOrder() { + return settingsWrapper.getValueForKey(SettingsServiceBean.Key.CategoryOrder, null); + } + + public boolean orderByFolder() { + return settingsWrapper.isTrueForKey(SettingsServiceBean.Key.OrderByFolder, true); + } + + public boolean allowUserManagementOfOrder() { + return settingsWrapper.isTrueForKey(SettingsServiceBean.Key.AllowUserManagementOfOrder, false); + } + + private Boolean fileTreeViewRequired = null; public boolean isFileTreeViewRequired() { @@ -2261,6 +2311,7 @@ public String getFileDisplayMode() { } public void setFileDisplayMode(String fileDisplayMode) { + isPageFlip = true; if ("Table".equals(fileDisplayMode)) { this.fileDisplayMode = FileDisplayStyle.TABLE; } else { @@ -2272,13 +2323,6 @@ public boolean isFileDisplayTable() { return fileDisplayMode == FileDisplayStyle.TABLE; } - public void toggleFileDisplayMode() { - if (fileDisplayMode == FileDisplayStyle.TABLE) { - fileDisplayMode = FileDisplayStyle.TREE; - } else { - fileDisplayMode = FileDisplayStyle.TABLE; - } - } public boolean isFileDisplayTree() { return fileDisplayMode == FileDisplayStyle.TREE; } @@ -2382,9 +2426,9 @@ private DefaultTreeNode createFileTreeNode(FileMetadata fileMetadata, TreeNode p return fileNode; } - public boolean isHasTabular() { + /*public boolean isHasTabular() { return hasTabular; - } + }*/ public boolean isVersionHasTabular() { return versionHasTabular; @@ -2401,7 +2445,7 @@ private void resetVersionUI() { AuthenticatedUser au = (AuthenticatedUser) session.getUser(); //On create set pre-populated fields - for (DatasetField dsf : dataset.getEditVersion().getDatasetFields()) { + for (DatasetField dsf : dataset.getOrCreateEditVersion().getDatasetFields()) { if (dsf.getDatasetFieldType().getName().equals(DatasetFieldConstant.depositor) && dsf.isEmpty()) { dsf.getDatasetFieldValues().get(0).setValue(au.getLastName() + ", " + au.getFirstName()); } @@ -2458,7 +2502,7 @@ private void refreshSelectedFiles(List filesToRefresh){ } String termsOfAccess = workingVersion.getTermsOfUseAndAccess().getTermsOfAccess(); boolean requestAccess = workingVersion.getTermsOfUseAndAccess().isFileAccessRequest(); - workingVersion = dataset.getEditVersion(); + workingVersion = dataset.getOrCreateEditVersion(); workingVersion.getTermsOfUseAndAccess().setTermsOfAccess(termsOfAccess); workingVersion.getTermsOfUseAndAccess().setFileAccessRequest(requestAccess); List newSelectedFiles = new ArrayList<>(); @@ -2521,7 +2565,7 @@ public void edit(EditMode editMode) { if (this.readOnly) { dataset = datasetService.find(dataset.getId()); } - workingVersion = dataset.getEditVersion(); + workingVersion = dataset.getOrCreateEditVersion(); clone = workingVersion.cloneDatasetVersion(); if (editMode.equals(EditMode.METADATA)) { datasetVersionUI = datasetVersionUI.initDatasetVersionUI(workingVersion, true); @@ -2798,54 +2842,52 @@ public void refresh(ActionEvent e) { refresh(); } + + public void sort() { + // This is called as the presort checkboxes' listener when the user is actually + // clicking in the checkbox. It does appear to happen after the setTagPresort + // and setFolderPresort calls. + // So -we know this isn't a pageflip and at this point can update to use the new + // values. + isPageFlip = false; + if (!newTagPresort == tagPresort) { + tagPresort = newTagPresort; + } + if (!newFolderPresort == folderPresort) { + folderPresort = newFolderPresort; + } + sortFileMetadatas(fileMetadatasSearch); + JsfHelper.addSuccessMessage(BundleUtil.getStringFromBundle("file.results.presort.change.success")); + } + public String refresh() { logger.fine("refreshing"); //dataset = datasetService.find(dataset.getId()); dataset = null; + workingVersion = null; logger.fine("refreshing working version"); DatasetVersionServiceBean.RetrieveDatasetVersionResponse retrieveDatasetVersionResponse = null; - if (persistentId != null) { - //retrieveDatasetVersionResponse = datasetVersionService.retrieveDatasetVersionByPersistentId(persistentId, version); - dataset = datasetService.findByGlobalId(persistentId); - retrieveDatasetVersionResponse = datasetVersionService.selectRequestedVersion(dataset.getVersions(), version); - } else if (versionId != null) { - retrieveDatasetVersionResponse = datasetVersionService.retrieveDatasetVersionByVersionId(versionId); - } else if (dataset.getId() != null) { - //retrieveDatasetVersionResponse = datasetVersionService.retrieveDatasetVersionById(dataset.getId(), version); - dataset = datasetService.find(dataset.getId()); - retrieveDatasetVersionResponse = datasetVersionService.selectRequestedVersion(dataset.getVersions(), version); - } + if (versionId != null) { + // versionId must have been set by now, in the init() method, + // regardless of how the page was originally called - by the dataset + // database id, by the persistent identifier, or by the db id of + // the version. + this.workingVersion = datasetVersionService.findDeep(versionId); + dataset = workingVersion.getDataset(); + } + - if (retrieveDatasetVersionResponse == null) { + if (this.workingVersion == null) { // TODO: // should probably redirect to the 404 page, if we can't find // this version anymore. // -- L.A. 4.2.3 return ""; } - this.workingVersion = retrieveDatasetVersionResponse.getDatasetVersion(); - - if (this.workingVersion == null) { - // TODO: - // same as the above - - return ""; - } - - if (dataset == null) { - // this would be the case if we were retrieving the version by - // the versionId, above. - this.dataset = this.workingVersion.getDataset(); - } - - if (readOnly) { - AuthenticatedUser au = session.getUser() instanceof AuthenticatedUser ? (AuthenticatedUser) session.getUser() : null; - datafileService.findFileMetadataOptimizedExperimental(dataset, workingVersion, au); - } fileMetadatasSearch = selectFileMetadatasForDisplay(); @@ -2858,9 +2900,9 @@ public String refresh() { //SEK 12/20/2019 - since we are ingesting a file we know that there is a current draft version lockedDueToIngestVar = null; if (canViewUnpublishedDataset()) { - return "/dataset.xhtml?persistentId=" + dataset.getGlobalIdString() + "&showIngestSuccess=true&version=DRAFT&faces-redirect=true"; + return "/dataset.xhtml?persistentId=" + dataset.getGlobalId().asString() + "&showIngestSuccess=true&version=DRAFT&faces-redirect=true"; } else { - return "/dataset.xhtml?persistentId=" + dataset.getGlobalIdString() + "&showIngestSuccess=true&faces-redirect=true"; + return "/dataset.xhtml?persistentId=" + dataset.getGlobalId().asString() + "&showIngestSuccess=true&faces-redirect=true"; } } @@ -3024,19 +3066,32 @@ public void setTooLargeToDownload(boolean tooLargeToDownload) { this.tooLargeToDownload = tooLargeToDownload; } + private Long sizeOfDatasetArchival = null; + private Long sizeOfDatasetOriginal = null; + + public Long getSizeOfDatasetNumeric() { - if (this.hasTabular){ + if (this.versionHasTabular){ return Math.min(getSizeOfDatasetOrigNumeric(), getSizeOfDatasetArchivalNumeric()); } return getSizeOfDatasetOrigNumeric(); } public Long getSizeOfDatasetOrigNumeric() { - return DatasetUtil.getDownloadSizeNumeric(workingVersion, true); + if (versionHasTabular) { + if (sizeOfDatasetOriginal == null) { + sizeOfDatasetOriginal = DatasetUtil.getDownloadSizeNumeric(workingVersion, true); + } + return sizeOfDatasetOriginal; + } + return getSizeOfDatasetArchivalNumeric(); } public Long getSizeOfDatasetArchivalNumeric() { - return DatasetUtil.getDownloadSizeNumeric(workingVersion, false); + if (sizeOfDatasetArchival == null) { + sizeOfDatasetArchival = DatasetUtil.getDownloadSizeNumeric(workingVersion, false); + } + return sizeOfDatasetArchival; } public String getSizeOfSelectedAsString(){ @@ -3452,7 +3507,7 @@ private void deleteFiles(List filesToDelete) { if (markedForDelete.getId() != null) { // This FileMetadata has an id, i.e., it exists in the database. // We are going to remove this filemetadata from the version: - dataset.getEditVersion().getFileMetadatas().remove(markedForDelete); + dataset.getOrCreateEditVersion().getFileMetadatas().remove(markedForDelete); // But the actual delete will be handled inside the UpdateDatasetCommand // (called later on). The list "filesToBeDeleted" is passed to the // command as a parameter: @@ -3591,7 +3646,7 @@ public String save() { //ToDo - could drop use of selectedTemplate and just use the persistent dataset.getTemplate() if ( selectedTemplate != null ) { if ( isSessionUserAuthenticated() ) { - cmd = new CreateNewDatasetCommand(dataset, dvRequestService.getDataverseRequest(), false, selectedTemplate); + cmd = new CreateNewDatasetCommand(dataset, dvRequestService.getDataverseRequest(), selectedTemplate); } else { JH.addMessage(FacesMessage.SEVERITY_FATAL, BundleUtil.getStringFromBundle("dataset.create.authenticatedUsersOnly")); return null; @@ -3618,9 +3673,9 @@ public String save() { ((UpdateDatasetVersionCommand) cmd).setValidateLenient(true); } dataset = commandEngine.submit(cmd); - for (DatasetField df : dataset.getLatestVersion().getDatasetFields()) { + for (DatasetField df : dataset.getLatestVersion().getFlatDatasetFields()) { logger.fine("Found id: " + df.getDatasetFieldType().getId()); - if (fieldService.getCVocConf(false).containsKey(df.getDatasetFieldType().getId())) { + if (fieldService.getCVocConf(true).containsKey(df.getDatasetFieldType().getId())) { fieldService.registerExternalVocabValues(df); } } @@ -3678,7 +3733,7 @@ public String save() { // have been created in the dataset. dataset = datasetService.find(dataset.getId()); - List filesAdded = ingestService.saveAndAddFilesToDataset(dataset.getEditVersion(), newFiles, null, true); + List filesAdded = ingestService.saveAndAddFilesToDataset(dataset.getOrCreateEditVersion(), newFiles, null, true); newFiles.clear(); // and another update command: @@ -3787,7 +3842,7 @@ private String returnToLatestVersion(){ setReleasedVersionTabList(resetReleasedVersionTabList()); newFiles.clear(); editMode = null; - return "/dataset.xhtml?persistentId=" + dataset.getGlobalIdString() + "&version="+ workingVersion.getFriendlyVersionNumber() + "&faces-redirect=true"; + return "/dataset.xhtml?persistentId=" + dataset.getGlobalId().asString() + "&version="+ workingVersion.getFriendlyVersionNumber() + "&faces-redirect=true"; } private String returnToDatasetOnly(){ @@ -3797,7 +3852,7 @@ private String returnToDatasetOnly(){ } private String returnToDraftVersion(){ - return "/dataset.xhtml?persistentId=" + dataset.getGlobalIdString() + "&version=DRAFT" + "&faces-redirect=true"; + return "/dataset.xhtml?persistentId=" + dataset.getGlobalId().asString() + "&version=DRAFT" + "&faces-redirect=true"; } public String cancel() { @@ -4410,6 +4465,8 @@ public List< String[]> getExporters(){ try { exporter = ExportService.getInstance().getExporter(formatName); } catch (ExportException ex) { + logger.warning("Failed to get : " + formatName); + logger.warning(ex.getLocalizedMessage()); exporter = null; } if (exporter != null && exporter.isAvailableToUsers()) { @@ -4418,7 +4475,7 @@ public List< String[]> getExporters(){ String[] temp = new String[2]; temp[0] = formatDisplayName; - temp[1] = myHostURL + "/api/datasets/export?exporter=" + formatName + "&persistentId=" + dataset.getGlobalIdString(); + temp[1] = myHostURL + "/api/datasets/export?exporter=" + formatName + "&persistentId=" + dataset.getGlobalId().asString(); retList.add(temp); } } @@ -5038,10 +5095,9 @@ public boolean isFileAccessRequestMultiButtonRequired(){ // return false; } for (FileMetadata fmd : workingVersion.getFileMetadatas()){ + AuthenticatedUser authenticatedUser = (AuthenticatedUser) session.getUser(); //Change here so that if all restricted files have pending requests there's no Request Button - if ((!this.fileDownloadHelper.canDownloadFile(fmd) && (fmd.getDataFile().getFileAccessRequesters() == null - || ( fmd.getDataFile().getFileAccessRequesters() != null - && !fmd.getDataFile().getFileAccessRequesters().contains((AuthenticatedUser)session.getUser()))))){ + if ((!this.fileDownloadHelper.canDownloadFile(fmd) && !fmd.getDataFile().containsFileAccessRequestFromUser(authenticatedUser))) { return true; } } @@ -5452,12 +5508,25 @@ public boolean isShowPreviewButton(Long fileId) { List previewTools = getPreviewToolsForDataFile(fileId); return previewTools.size() > 0; } + + public boolean isShowQueryButton(Long fileId) { + DataFile dataFile = datafileService.find(fileId); + + if(dataFile.isRestricted() || !dataFile.isReleased() || FileUtil.isActivelyEmbargoed(dataFile)){ + return false; + } + + List fileQueryTools = getQueryToolsForDataFile(fileId); + return fileQueryTools.size() > 0; + } public List getPreviewToolsForDataFile(Long fileId) { return getCachedToolsForDataFile(fileId, ExternalTool.Type.PREVIEW); } - + public List getQueryToolsForDataFile(Long fileId) { + return getCachedToolsForDataFile(fileId, ExternalTool.Type.QUERY); + } public List getConfigureToolsForDataFile(Long fileId) { return getCachedToolsForDataFile(fileId, ExternalTool.Type.CONFIGURE); } @@ -5482,6 +5551,10 @@ public List getCachedToolsForDataFile(Long fileId, ExternalTool.Ty cachedToolsByFileId = previewToolsByFileId; externalTools = previewTools; break; + case QUERY: + cachedToolsByFileId = fileQueryToolsByFileId; + externalTools = fileQueryTools; + break; default: break; } @@ -5490,7 +5563,7 @@ public List getCachedToolsForDataFile(Long fileId, ExternalTool.Ty return cachedTools; } DataFile dataFile = datafileService.find(fileId); - cachedTools = ExternalToolServiceBean.findExternalToolsByFile(externalTools, dataFile); + cachedTools = externalToolService.findExternalToolsByFile(externalTools, dataFile); cachedToolsByFileId.put(fileId, cachedTools); //add to map so we don't have to do the lifting again return cachedTools; } @@ -5552,6 +5625,10 @@ public void clearSelection() { } public void fileListingPaginatorListener(PageEvent event) { + // Changing to a new page of files - set this so we can ignore changes to the + // presort checkboxes. (This gets called before the set calls for the presorts + // get called.) + isPageFlip=true; setFilePaginatorPage(event.getPage()); } @@ -5668,52 +5745,34 @@ public boolean isSomeVersionArchived() { return someVersionArchived; } - private static Date getFileDateToCompare(FileMetadata fileMetadata) { - DataFile datafile = fileMetadata.getDataFile(); - - if (datafile.isReleased()) { - return datafile.getPublicationDate(); - } - - return datafile.getCreateDate(); - } - - private static final Comparator compareByLabelZtoA = new Comparator() { - @Override - public int compare(FileMetadata o1, FileMetadata o2) { - return o2.getLabel().toUpperCase().compareTo(o1.getLabel().toUpperCase()); + public boolean isTagPresort() { + return this.tagPresort; } - }; - private static final Comparator compareByNewest = new Comparator() { - @Override - public int compare(FileMetadata o1, FileMetadata o2) { - return getFileDateToCompare(o2).compareTo(getFileDateToCompare(o1)); + public void setTagPresort(boolean tagPresort) { + // Record the new value + newTagPresort = tagPresort && (null != getSortOrder()); + // If this is not a page flip, it should be a real change to the presort + // boolean that we should use. + if (!isPageFlip) { + this.tagPresort = tagPresort && (null != getSortOrder()); + } } - }; - private static final Comparator compareByOldest = new Comparator() { - @Override - public int compare(FileMetadata o1, FileMetadata o2) { - return getFileDateToCompare(o1).compareTo(getFileDateToCompare(o2)); + public boolean isFolderPresort() { + return this.folderPresort; } - }; - private static final Comparator compareBySize = new Comparator() { - @Override - public int compare(FileMetadata o1, FileMetadata o2) { - return (new Long(o1.getDataFile().getFilesize())).compareTo(new Long(o2.getDataFile().getFilesize())); + public void setFolderPresort(boolean folderPresort) { + //Record the new value + newFolderPresort = folderPresort && orderByFolder(); + // If this is not a page flip, it should be a real change to the presort + // boolean that we should use. + if (!isPageFlip) { + this.folderPresort = folderPresort && orderByFolder(); + } } - }; - private static final Comparator compareByType = new Comparator() { - @Override - public int compare(FileMetadata o1, FileMetadata o2) { - String type1 = StringUtil.isEmpty(o1.getDataFile().getFriendlyType()) ? "" : o1.getDataFile().getContentType(); - String type2 = StringUtil.isEmpty(o2.getDataFile().getFriendlyType()) ? "" : o2.getDataFile().getContentType(); - return type1.compareTo(type2); - } - }; public void explore(ExternalTool externalTool) { ApiToken apiToken = null; @@ -5793,7 +5852,7 @@ public Set> getMetadataLanguages() { } public List getVocabScripts() { - return fieldService.getVocabScripts(settingsWrapper.getCVocConf()); + return fieldService.getVocabScripts(settingsWrapper.getCVocConf(false)); } public String getFieldLanguage(String languages) { @@ -6042,8 +6101,7 @@ public boolean downloadingRestrictedFiles() { } return false; } - - + //Determines whether this Dataset uses a public store and therefore doesn't support embargoed or restricted files public boolean isHasPublicStore() { return settingsWrapper.isTrueForKey(SettingsServiceBean.Key.PublicInstall, StorageIO.isPublicStore(dataset.getEffectiveStorageDriverId())); @@ -6062,4 +6120,40 @@ public void startGlobusTransfer() { } PrimeFaces.current().executeScript(globusService.getGlobusDownloadScript(dataset, apiToken)); } + + public String getWebloaderUrlForDataset(Dataset d) { + String localeCode = session.getLocaleCode(); + User user = session.getUser(); + if (user instanceof AuthenticatedUser) { + ApiToken apiToken = authService.getValidApiTokenForUser((AuthenticatedUser) user); + return WebloaderUtil.getWebloaderUrl(d, apiToken, localeCode, + settingsService.getValueForKey(SettingsServiceBean.Key.WebloaderUrl)); + } else { + // Shouldn't normally happen (seesion timeout? bug?) + logger.warning("getWebloaderUrlForDataset called for non-Authenticated user"); + return null; + } + } + + /** + * Add Signposting + * + * @return String + */ + + String signpostingLinkHeader = null; + + public String getSignpostingLinkHeader() { + if (!workingVersion.isReleased()) { + return null; + } + if (signpostingLinkHeader == null) { + SignpostingResources sr = new SignpostingResources(systemConfig, workingVersion, + JvmSettings.SIGNPOSTING_LEVEL1_AUTHOR_LIMIT.lookupOptional().orElse(""), + JvmSettings.SIGNPOSTING_LEVEL1_ITEM_LIMIT.lookupOptional().orElse("")); + signpostingLinkHeader = sr.getLinks(); + } + return signpostingLinkHeader; + } + } diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetRelMaterial.java b/src/main/java/edu/harvard/iq/dataverse/DatasetRelMaterial.java index f432e4f5bbf..53ea62f566a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetRelMaterial.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetRelMaterial.java @@ -6,14 +6,14 @@ package edu.harvard.iq.dataverse; -import javax.persistence.Column; -import javax.persistence.Entity; -import javax.persistence.GeneratedValue; -import javax.persistence.GenerationType; -import javax.persistence.Id; -import javax.persistence.JoinColumn; -import javax.persistence.ManyToOne; -import javax.persistence.Version; +import jakarta.persistence.Column; +import jakarta.persistence.Entity; +import jakarta.persistence.GeneratedValue; +import jakarta.persistence.GenerationType; +import jakarta.persistence.Id; +import jakarta.persistence.JoinColumn; +import jakarta.persistence.ManyToOne; +import jakarta.persistence.Version; /** * diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java index 91ec050fe5c..52eb5868c35 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java @@ -1,5 +1,6 @@ package edu.harvard.iq.dataverse; +import edu.harvard.iq.dataverse.DatasetVersion.VersionState; import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; @@ -30,21 +31,21 @@ import java.util.logging.FileHandler; import java.util.logging.Level; import java.util.logging.Logger; -import javax.ejb.Asynchronous; -import javax.ejb.EJB; -import javax.ejb.EJBException; -import javax.ejb.Stateless; -import javax.ejb.TransactionAttribute; -import javax.ejb.TransactionAttributeType; -import javax.inject.Named; -import javax.persistence.EntityManager; -import javax.persistence.NoResultException; -import javax.persistence.PersistenceContext; -import javax.persistence.Query; -import javax.persistence.StoredProcedureQuery; -import javax.persistence.TypedQuery; -import org.apache.commons.lang3.RandomStringUtils; -import org.ocpsoft.common.util.Strings; +import jakarta.ejb.Asynchronous; +import jakarta.ejb.EJB; +import jakarta.ejb.EJBException; +import jakarta.ejb.Stateless; +import jakarta.ejb.TransactionAttribute; +import jakarta.ejb.TransactionAttributeType; +import jakarta.inject.Named; +import jakarta.persistence.EntityManager; +import jakarta.persistence.LockModeType; +import jakarta.persistence.NoResultException; +import jakarta.persistence.PersistenceContext; +import jakarta.persistence.Query; +import jakarta.persistence.StoredProcedureQuery; +import jakarta.persistence.TypedQuery; +import org.apache.commons.lang3.StringUtils; /** * @@ -105,6 +106,38 @@ public Dataset find(Object pk) { return em.find(Dataset.class, pk); } + /** + * Retrieve a dataset with the deep underlying structure in one query execution. + * This is a more optimal choice when accessing files of a dataset. + * In a contrast, the find() method does not pre-fetch the file objects and results in point queries when accessing these objects. + * Since the files have a deep structure, many queries can be prevented by using the findDeep() method, especially for large datasets + * containing many files, and when iterating over all the files. + * When you are not going to access the file objects, the default find() method is better because of the lazy loading. + * @return a dataset with pre-fetched file objects + */ + public Dataset findDeep(Object pk) { + return (Dataset) em.createNamedQuery("Dataset.findById") + .setParameter("id", pk) + // Optimization hints: retrieve all data in one query; this prevents point queries when iterating over the files + .setHint("eclipselink.left-join-fetch", "o.files.ingestRequest") + .setHint("eclipselink.left-join-fetch", "o.files.thumbnailForDataset") + .setHint("eclipselink.left-join-fetch", "o.files.dataTables") + .setHint("eclipselink.left-join-fetch", "o.files.auxiliaryFiles") + .setHint("eclipselink.left-join-fetch", "o.files.ingestReports") + .setHint("eclipselink.left-join-fetch", "o.files.dataFileTags") + .setHint("eclipselink.left-join-fetch", "o.files.fileMetadatas") + .setHint("eclipselink.left-join-fetch", "o.files.fileMetadatas.fileCategories") + //.setHint("eclipselink.left-join-fetch", "o.files.guestbookResponses") + .setHint("eclipselink.left-join-fetch", "o.files.embargo") + .setHint("eclipselink.left-join-fetch", "o.files.fileAccessRequests") + .setHint("eclipselink.left-join-fetch", "o.files.owner") + .setHint("eclipselink.left-join-fetch", "o.files.releaseUser") + .setHint("eclipselink.left-join-fetch", "o.files.creator") + .setHint("eclipselink.left-join-fetch", "o.files.alternativePersistentIndentifiers") + .setHint("eclipselink.left-join-fetch", "o.files.roleAssignments") + .getSingleResult(); + } + public List findByOwnerId(Long ownerId) { return findByOwnerId(ownerId, false); } @@ -199,8 +232,10 @@ public List findAllUnindexed() { } //Used in datasets listcurationstatus API - public List findAllUnpublished() { - return em.createQuery("SELECT object(o) FROM Dataset o, DvObject d WHERE d.id=o.id and d.publicationDate IS null ORDER BY o.id ASC", Dataset.class).getResultList(); + public List findAllWithDraftVersion() { + TypedQuery query = em.createQuery("SELECT object(d) FROM Dataset d, DatasetVersion v WHERE d.id=v.dataset.id and v.versionState=:state ORDER BY d.id ASC", Dataset.class); + query.setParameter("state", VersionState.DRAFT); + return query.getResultList(); } /** @@ -280,12 +315,12 @@ public Dataset merge( Dataset ds ) { } public Dataset findByGlobalId(String globalId) { - Dataset retVal = (Dataset) dvObjectService.findByGlobalId(globalId, "Dataset"); + Dataset retVal = (Dataset) dvObjectService.findByGlobalId(globalId, DvObject.DType.Dataset); if (retVal != null){ return retVal; } else { //try to find with alternative PID - return (Dataset) dvObjectService.findByGlobalId(globalId, "Dataset", true); + return (Dataset) dvObjectService.findByAltGlobalId(globalId, DvObject.DType.Dataset); } } @@ -295,7 +330,7 @@ public Dataset findByGlobalId(String globalId) { * in the dataset components, a ConstraintViolationException will be thrown, * which can be further parsed to detect the specific offending values. * @param id the id of the dataset - * @throws javax.validation.ConstraintViolationException + * @throws ConstraintViolationException */ @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) @@ -316,85 +351,11 @@ public void instantiateDatasetInNewTransaction(Long id, boolean includeVariables } } - public String generateDatasetIdentifier(Dataset dataset, GlobalIdServiceBean idServiceBean) { - String identifierType = settingsService.getValueForKey(SettingsServiceBean.Key.IdentifierGenerationStyle, "randomString"); - String shoulder = settingsService.getValueForKey(SettingsServiceBean.Key.Shoulder, ""); - - switch (identifierType) { - case "randomString": - return generateIdentifierAsRandomString(dataset, idServiceBean, shoulder); - case "storedProcGenerated": - return generateIdentifierFromStoredProcedure(dataset, idServiceBean, shoulder); - default: - /* Should we throw an exception instead?? -- L.A. 4.6.2 */ - return generateIdentifierAsRandomString(dataset, idServiceBean, shoulder); - } - } - private String generateIdentifierAsRandomString(Dataset dataset, GlobalIdServiceBean idServiceBean, String shoulder) { - String identifier = null; - do { - identifier = shoulder + RandomStringUtils.randomAlphanumeric(6).toUpperCase(); - } while (!isIdentifierLocallyUnique(identifier, dataset)); - - return identifier; - } - - private String generateIdentifierFromStoredProcedure(Dataset dataset, GlobalIdServiceBean idServiceBean, String shoulder) { - - String identifier; - do { - StoredProcedureQuery query = this.em.createNamedStoredProcedureQuery("Dataset.generateIdentifierFromStoredProcedure"); - query.execute(); - String identifierFromStoredProcedure = (String) query.getOutputParameterValue(1); - // some diagnostics here maybe - is it possible to determine that it's failing - // because the stored procedure hasn't been created in the database? - if (identifierFromStoredProcedure == null) { - return null; - } - identifier = shoulder + identifierFromStoredProcedure; - } while (!isIdentifierLocallyUnique(identifier, dataset)); - - return identifier; - } - - /** - * Check that a identifier entered by the user is unique (not currently used - * for any other study in this Dataverse Network) also check for duplicate - * in EZID if needed - * @param userIdentifier - * @param dataset - * @param persistentIdSvc - * @return {@code true} if the identifier is unique, {@code false} otherwise. - */ - public boolean isIdentifierUnique(String userIdentifier, Dataset dataset, GlobalIdServiceBean persistentIdSvc) { - if ( ! isIdentifierLocallyUnique(userIdentifier, dataset) ) return false; // duplication found in local database - - // not in local DB, look in the persistent identifier service - try { - return ! persistentIdSvc.alreadyExists(dataset); - } catch (Exception e){ - //we can live with failure - means identifier not found remotely - } - - return true; - } - - public boolean isIdentifierLocallyUnique(Dataset dataset) { - return isIdentifierLocallyUnique(dataset.getIdentifier(), dataset); - } - - public boolean isIdentifierLocallyUnique(String identifier, Dataset dataset) { - return em.createNamedQuery("Dataset.findByIdentifierAuthorityProtocol") - .setParameter("identifier", identifier) - .setParameter("authority", dataset.getAuthority()) - .setParameter("protocol", dataset.getProtocol()) - .getResultList().isEmpty(); - } public Long getMaximumExistingDatafileIdentifier(Dataset dataset) { //Cannot rely on the largest table id having the greatest identifier counter - long zeroFiles = new Long(0); + long zeroFiles = 0L; Long retVal = zeroFiles; Long testVal; List idResults; @@ -411,7 +372,7 @@ public Long getMaximumExistingDatafileIdentifier(Dataset dataset) { for (Object raw: idResults){ String identifier = (String) raw; identifier = identifier.substring(identifier.lastIndexOf("/") + 1); - testVal = new Long(identifier) ; + testVal = Long.valueOf(identifier) ; if (testVal > retVal){ retVal = testVal; } @@ -437,7 +398,7 @@ public DatasetVersionUser getDatasetVersionUser(DatasetVersion version, User use query.setParameter("userId", au.getId()); try { return query.getSingleResult(); - } catch (javax.persistence.NoResultException e) { + } catch (NoResultException e) { return null; } } @@ -552,7 +513,7 @@ public List listLocks(DatasetLock.Reason lockType, AuthenticatedUse } try { return query.getResultList(); - } catch (javax.persistence.NoResultException e) { + } catch (NoResultException e) { return null; } } @@ -633,7 +594,7 @@ public Map getArchiveDescriptionsForHarvestedDatasets(Set da return null; } - String datasetIdStr = Strings.join(datasetIds, ", "); + String datasetIdStr = StringUtils.join(datasetIds, ", "); String qstr = "SELECT d.id, h.archiveDescription FROM harvestingClient h, dataset d WHERE d.harvestingClient_id = h.id AND d.id IN (" + datasetIdStr + ")"; List searchResults; @@ -781,10 +742,10 @@ public void exportAllDatasets(boolean forceReExport) { countAll++; try { recordService.exportAllFormatsInNewTransaction(dataset); - exportLogger.info("Success exporting dataset: " + dataset.getDisplayName() + " " + dataset.getGlobalIdString()); + exportLogger.info("Success exporting dataset: " + dataset.getDisplayName() + " " + dataset.getGlobalId().asString()); countSuccess++; } catch (Exception ex) { - exportLogger.info("Error exporting dataset: " + dataset.getDisplayName() + " " + dataset.getGlobalIdString() + "; " + ex.getMessage()); + exportLogger.log(Level.INFO, "Error exporting dataset: " + dataset.getDisplayName() + " " + dataset.getGlobalId().asString() + "; " + ex.getMessage(), ex); countError++; } } @@ -801,7 +762,6 @@ public void exportAllDatasets(boolean forceReExport) { } } - @Asynchronous public void reExportDatasetAsync(Dataset dataset) { @@ -810,20 +770,20 @@ public void reExportDatasetAsync(Dataset dataset) { public void exportDataset(Dataset dataset, boolean forceReExport) { if (dataset != null) { - // Note that the logic for handling a dataset is similar to what is implemented in exportAllDatasets, + // Note that the logic for handling a dataset is similar to what is implemented in exportAllDatasets, // but when only one dataset is exported we do not log in a separate export logging file if (dataset.isReleased() && dataset.getReleasedVersion() != null && !dataset.isDeaccessioned()) { - // can't trust dataset.getPublicationDate(), no. + // can't trust dataset.getPublicationDate(), no. Date publicationDate = dataset.getReleasedVersion().getReleaseTime(); // we know this dataset has a non-null released version! Maybe not - SEK 8/19 (We do now! :) if (forceReExport || (publicationDate != null && (dataset.getLastExportTime() == null || dataset.getLastExportTime().before(publicationDate)))) { try { recordService.exportAllFormatsInNewTransaction(dataset); - logger.info("Success exporting dataset: " + dataset.getDisplayName() + " " + dataset.getGlobalIdString()); + logger.info("Success exporting dataset: " + dataset.getDisplayName() + " " + dataset.getGlobalId().asString()); } catch (Exception ex) { - logger.info("Error exporting dataset: " + dataset.getDisplayName() + " " + dataset.getGlobalIdString() + "; " + ex.getMessage()); + logger.log(Level.INFO, "Error exporting dataset: " + dataset.getDisplayName() + " " + dataset.getGlobalId().asString() + "; " + ex.getMessage(), ex); } } } @@ -831,13 +791,9 @@ public void exportDataset(Dataset dataset, boolean forceReExport) { } - public String getReminderString(Dataset dataset, boolean canPublishDataset) { - return getReminderString( dataset, canPublishDataset, false); - } - //get a string to add to save success message //depends on page (dataset/file) and user privleges - public String getReminderString(Dataset dataset, boolean canPublishDataset, boolean filePage) { + public String getReminderString(Dataset dataset, boolean canPublishDataset, boolean filePage, boolean isValid) { String reminderString; @@ -863,6 +819,10 @@ public String getReminderString(Dataset dataset, boolean canPublishDataset, bool } } + if (!isValid) { + reminderString = reminderString + "
" + BundleUtil.getStringFromBundle("dataset.message.incomplete.warning") + ""; + } + if (reminderString != null) { return reminderString; } else { @@ -1019,7 +979,7 @@ public void obtainPersistentIdentifiersForDatafiles(Dataset dataset) { maxIdentifier++; datafile.setIdentifier(datasetIdentifier + "/" + maxIdentifier.toString()); } else { - datafile.setIdentifier(fileService.generateDataFileIdentifier(datafile, idServiceBean)); + datafile.setIdentifier(idServiceBean.generateDataFileIdentifier(datafile)); } if (datafile.getProtocol() == null) { diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetTopicClass.java b/src/main/java/edu/harvard/iq/dataverse/DatasetTopicClass.java index f253e1810a1..91a4ff3cf5a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetTopicClass.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetTopicClass.java @@ -6,8 +6,8 @@ package edu.harvard.iq.dataverse; -import javax.persistence.Column; -import javax.persistence.Version; +import jakarta.persistence.Column; +import jakarta.persistence.Version; /** * diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index 30815c43381..93f45bd288e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -1,11 +1,14 @@ package edu.harvard.iq.dataverse; import edu.harvard.iq.dataverse.util.MarkupChecker; +import edu.harvard.iq.dataverse.util.PersonOrOrgUtil; import edu.harvard.iq.dataverse.util.BundleUtil; +import edu.harvard.iq.dataverse.util.DataFileComparator; import edu.harvard.iq.dataverse.DatasetFieldType.FieldType; import edu.harvard.iq.dataverse.branding.BrandingUtil; import edu.harvard.iq.dataverse.dataset.DatasetUtil; import edu.harvard.iq.dataverse.license.License; +import edu.harvard.iq.dataverse.pidproviders.PidUtil; import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.StringUtil; import edu.harvard.iq.dataverse.util.SystemConfig; @@ -14,9 +17,7 @@ import edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder; import edu.harvard.iq.dataverse.workflows.WorkflowComment; import java.io.Serializable; -import java.net.URL; import java.sql.Timestamp; -import java.text.DateFormat; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.*; @@ -24,38 +25,37 @@ import java.util.logging.Logger; import java.util.stream.Collectors; -import javax.json.Json; -import javax.json.JsonArray; -import javax.json.JsonArrayBuilder; -import javax.json.JsonObject; -import javax.json.JsonObjectBuilder; -import javax.persistence.CascadeType; -import javax.persistence.Column; -import javax.persistence.Entity; -import javax.persistence.EnumType; -import javax.persistence.Enumerated; -import javax.persistence.GeneratedValue; -import javax.persistence.GenerationType; -import javax.persistence.Id; -import javax.persistence.Index; -import javax.persistence.JoinColumn; -import javax.persistence.ManyToOne; -import javax.persistence.NamedQueries; -import javax.persistence.NamedQuery; -import javax.persistence.OneToMany; -import javax.persistence.OneToOne; -import javax.persistence.OrderBy; -import javax.persistence.Table; -import javax.persistence.Temporal; -import javax.persistence.TemporalType; -import javax.persistence.Transient; -import javax.persistence.UniqueConstraint; -import javax.persistence.Version; -import javax.validation.ConstraintViolation; -import javax.validation.Validation; -import javax.validation.Validator; -import javax.validation.ValidatorFactory; -import javax.validation.constraints.Size; +import jakarta.json.Json; +import jakarta.json.JsonArray; +import jakarta.json.JsonArrayBuilder; +import jakarta.json.JsonObject; +import jakarta.json.JsonObjectBuilder; +import jakarta.persistence.CascadeType; +import jakarta.persistence.Column; +import jakarta.persistence.Entity; +import jakarta.persistence.EnumType; +import jakarta.persistence.Enumerated; +import jakarta.persistence.GeneratedValue; +import jakarta.persistence.GenerationType; +import jakarta.persistence.Id; +import jakarta.persistence.Index; +import jakarta.persistence.JoinColumn; +import jakarta.persistence.ManyToOne; +import jakarta.persistence.NamedQueries; +import jakarta.persistence.NamedQuery; +import jakarta.persistence.OneToMany; +import jakarta.persistence.OneToOne; +import jakarta.persistence.OrderBy; +import jakarta.persistence.Table; +import jakarta.persistence.Temporal; +import jakarta.persistence.TemporalType; +import jakarta.persistence.Transient; +import jakarta.persistence.UniqueConstraint; +import jakarta.persistence.Version; +import jakarta.validation.ConstraintViolation; +import jakarta.validation.Validation; +import jakarta.validation.Validator; +import jakarta.validation.constraints.Size; import org.apache.commons.lang3.StringUtils; /** @@ -66,7 +66,9 @@ @NamedQueries({ @NamedQuery(name = "DatasetVersion.findUnarchivedReleasedVersion", query = "SELECT OBJECT(o) FROM DatasetVersion AS o WHERE o.dataset.harvestedFrom IS NULL and o.releaseTime IS NOT NULL and o.archivalCopyLocation IS NULL" - )}) + ), + @NamedQuery(name = "DatasetVersion.findById", + query = "SELECT o FROM DatasetVersion o LEFT JOIN FETCH o.fileMetadatas WHERE o.id=:id")}) @Entity @@ -76,6 +78,7 @@ public class DatasetVersion implements Serializable { private static final Logger logger = Logger.getLogger(DatasetVersion.class.getCanonicalName()); + private static final Validator validator = Validation.buildDefaultValidatorFactory().getValidator(); /** * Convenience comparator to compare dataset versions by their version number. @@ -242,14 +245,34 @@ public List getFileMetadatas() { } public List getFileMetadatasSorted() { - Collections.sort(fileMetadatas, FileMetadata.compareByLabel); + + /* + * fileMetadatas can sometimes be an + * org.eclipse.persistence.indirection.IndirectList When that happens, the + * comparator in the Collections.sort below is not called, possibly due to + * https://bugs.eclipse.org/bugs/show_bug.cgi?id=446236 which is Java 1.8+ + * specific Converting to an ArrayList solves the problem, but the longer term + * solution may be in avoiding the IndirectList or moving to a new version of + * the jar it is in. + */ + if(!(fileMetadatas instanceof ArrayList)) { + List newFMDs = new ArrayList(); + for(FileMetadata fmd: fileMetadatas) { + newFMDs.add(fmd); + } + setFileMetadatas(newFMDs); + } + + DataFileComparator dfc = new DataFileComparator(); + Collections.sort(fileMetadatas, dfc.compareBy(true, null!=FileMetadata.getCategorySortOrder(), "name", true)); return fileMetadatas; } public List getFileMetadatasSortedByLabelAndFolder() { ArrayList fileMetadatasCopy = new ArrayList<>(); fileMetadatasCopy.addAll(fileMetadatas); - Collections.sort(fileMetadatasCopy, FileMetadata.compareByLabelAndFolder); + DataFileComparator dfc = new DataFileComparator(); + Collections.sort(fileMetadatasCopy, dfc.compareBy(true, null!=FileMetadata.getCategorySortOrder(), "name", true)); return fileMetadatasCopy; } @@ -388,7 +411,7 @@ public void setDeaccessionLink(String deaccessionLink) { } public GlobalId getDeaccessionLinkAsGlobalId() { - return new GlobalId(deaccessionLink); + return PidUtil.parseAsGlobalID(deaccessionLink); } public Date getCreateTime() { @@ -842,12 +865,26 @@ public String getDescriptionPlainText() { return MarkupChecker.stripAllTags(getDescription()); } - public List getDescriptionsPlainText() { - List plainTextDescriptions = new ArrayList<>(); + /* This method is (only) used in creating schema.org json-jd where Google requires a text description <5000 chars. + * + * @returns - a single string composed of all descriptions (joined with \n if more than one) truncated with a trailing '...' if >=5000 chars + */ + public String getDescriptionsPlainTextTruncated() { + List plainTextDescriptions = new ArrayList(); + for (String htmlDescription : getDescriptions()) { plainTextDescriptions.add(MarkupChecker.stripAllTags(htmlDescription)); } - return plainTextDescriptions; + String description = String.join("\n", plainTextDescriptions); + if (description.length() >= 5000) { + int endIndex = description.substring(0, 4997).lastIndexOf(" "); + if (endIndex == -1) { + //There are no spaces so just break anyway + endIndex = 4997; + } + description = description.substring(0, endIndex) + "..."; + } + return description; } /** @@ -1352,17 +1389,14 @@ public List getUniqueGrantAgencyValues() { } /** - * @return String containing the version's series title + * @return List of Strings containing the version's series title(s) */ - public String getSeriesTitle() { + public List getSeriesTitles() { List seriesNames = getCompoundChildFieldValues(DatasetFieldConstant.series, DatasetFieldConstant.seriesName); - if (seriesNames.size() > 1) { - logger.warning("More than one series title found for datasetVersion: " + this.id); - } if (!seriesNames.isEmpty()) { - return seriesNames.get(0); + return seriesNames; } else { return null; } @@ -1674,8 +1708,6 @@ public String getSemanticVersion() { public List> validateRequired() { List> returnListreturnList = new ArrayList<>(); - ValidatorFactory factory = Validation.buildDefaultValidatorFactory(); - Validator validator = factory.getValidator(); for (DatasetField dsf : this.getFlatDatasetFields()) { dsf.setValidationMessage(null); // clear out any existing validation message Set> constraintViolations = validator.validate(dsf); @@ -1689,11 +1721,13 @@ public List> validateRequired() { return returnListreturnList; } + public boolean isValid() { + return validate().isEmpty(); + } + public Set validate() { Set returnSet = new HashSet<>(); - ValidatorFactory factory = Validation.buildDefaultValidatorFactory(); - Validator validator = factory.getValidator(); for (DatasetField dsf : this.getFlatDatasetFields()) { dsf.setValidationMessage(null); // clear out any existing validation message @@ -1780,7 +1814,7 @@ public String getPublicationDateAsString() { // So something will need to be modified to accommodate this. -- L.A. /** * We call the export format "Schema.org JSON-LD" and extensive Javadoc can - * be found in {@link SchemaDotOrgExporter}. + * be found in {@link edu.harvard.iq.dataverse.export.SchemaDotOrgExporter}. */ public String getJsonLd() { // We show published datasets only for "datePublished" field below. @@ -1802,27 +1836,46 @@ public String getJsonLd() { for (DatasetAuthor datasetAuthor : this.getDatasetAuthors()) { JsonObjectBuilder author = Json.createObjectBuilder(); String name = datasetAuthor.getName().getDisplayValue(); + String identifierAsUrl = datasetAuthor.getIdentifierAsUrl(); DatasetField authorAffiliation = datasetAuthor.getAffiliation(); String affiliation = null; if (authorAffiliation != null) { - affiliation = datasetAuthor.getAffiliation().getDisplayValue(); - } - // We are aware of "givenName" and "familyName" but instead of a person it might be an organization such as "Gallup Organization". - //author.add("@type", "Person"); - author.add("name", name); - // We are aware that the following error is thrown by https://search.google.com/structured-data/testing-tool - // "The property affiliation is not recognized by Google for an object of type Thing." - // Someone at Google has said this is ok. - // This logic could be moved into the `if (authorAffiliation != null)` block above. - if (!StringUtil.isEmpty(affiliation)) { - author.add("affiliation", affiliation); + affiliation = datasetAuthor.getAffiliation().getValue(); } - String identifierAsUrl = datasetAuthor.getIdentifierAsUrl(); - if (identifierAsUrl != null) { - // It would be valid to provide an array of identifiers for authors but we have decided to only provide one. - author.add("@id", identifierAsUrl); - author.add("identifier", identifierAsUrl); + JsonObject entity = PersonOrOrgUtil.getPersonOrOrganization(name, false, (identifierAsUrl!=null)); + String givenName= entity.containsKey("givenName") ? entity.getString("givenName"):null; + String familyName= entity.containsKey("familyName") ? entity.getString("familyName"):null; + + if (entity.getBoolean("isPerson")) { + // Person + author.add("@type", "Person"); + if (givenName != null) { + author.add("givenName", givenName); + } + if (familyName != null) { + author.add("familyName", familyName); + } + if (!StringUtil.isEmpty(affiliation)) { + author.add("affiliation", Json.createObjectBuilder().add("@type", "Organization").add("name", affiliation)); + } + //Currently all possible identifier URLs are for people not Organizations + if(identifierAsUrl != null) { + author.add("sameAs", identifierAsUrl); + //Legacy - not sure if these are still useful + author.add("@id", identifierAsUrl); + author.add("identifier", identifierAsUrl); + + } + } else { + // Organization + author.add("@type", "Organization"); + if (!StringUtil.isEmpty(affiliation)) { + author.add("parentOrganization", Json.createObjectBuilder().add("@type", "Organization").add("name", affiliation)); + } } + // Both cases + author.add("name", entity.getString("fullName")); + //And add to the array authors.add(author); } JsonArray authorsArray = authors.build(); @@ -1859,16 +1912,8 @@ public String getJsonLd() { job.add("dateModified", this.getPublicationDateAsString()); job.add("version", this.getVersionNumber().toString()); - JsonArrayBuilder descriptionsArray = Json.createArrayBuilder(); - List descriptions = this.getDescriptionsPlainText(); - for (String description : descriptions) { - descriptionsArray.add(description); - } - /** - * In Dataverse 4.8.4 "description" was a single string but now it's an - * array. - */ - job.add("description", descriptionsArray); + String description = this.getDescriptionsPlainTextTruncated(); + job.add("description", description); /** * "keywords" - contains subject(s), datasetkeyword(s) and topicclassification(s) @@ -1892,11 +1937,16 @@ public String getJsonLd() { job.add("keywords", keywords); /** - * citation: (multiple) related publication citation and URLs, if - * present. + * citation: (multiple) related publication citation and URLs, if present. * - * In Dataverse 4.8.4 "citation" was an array of strings but now it's an - * array of objects. + * Schema.org allows text or a CreativeWork object. Google recommends text with + * either the full citation or the PID URL. This code adds an object if we have + * the citation text for the work and/or an entry in the URL field (i.e. + * https://doi.org/...) The URL is reported as the 'url' field while the + * citation text (which would normally include the name) is reported as 'name' + * since there doesn't appear to be a better field ('text', which was used + * previously, is the actual text of the creative work). + * */ List relatedPublications = getRelatedPublications(); if (!relatedPublications.isEmpty()) { @@ -1911,11 +1961,12 @@ public String getJsonLd() { JsonObjectBuilder citationEntry = Json.createObjectBuilder(); citationEntry.add("@type", "CreativeWork"); if (pubCitation != null) { - citationEntry.add("text", pubCitation); + citationEntry.add("name", pubCitation); } if (pubUrl != null) { citationEntry.add("@id", pubUrl); citationEntry.add("identifier", pubUrl); + citationEntry.add("url", pubUrl); } if (addToArray) { jsonArrayBuilder.add(citationEntry); @@ -1957,13 +2008,14 @@ public String getJsonLd() { job.add("license",DatasetUtil.getLicenseURI(this)); } + String installationBrandName = BrandingUtil.getInstallationBrandName(); + job.add("includedInDataCatalog", Json.createObjectBuilder() .add("@type", "DataCatalog") - .add("name", BrandingUtil.getRootDataverseCollectionName()) + .add("name", installationBrandName) .add("url", SystemConfig.getDataverseSiteUrlStatic()) ); - - String installationBrandName = BrandingUtil.getInstallationBrandName(); + /** * Both "publisher" and "provider" are included but they have the same * values. Some services seem to prefer one over the other. @@ -2006,13 +2058,11 @@ public String getJsonLd() { for (FileMetadata fileMetadata : fileMetadatasSorted) { JsonObjectBuilder fileObject = NullSafeJsonBuilder.jsonObjectBuilder(); String filePidUrlAsString = null; - URL filePidUrl = fileMetadata.getDataFile().getGlobalId().toURL(); - if (filePidUrl != null) { - filePidUrlAsString = filePidUrl.toString(); - } + GlobalId gid = fileMetadata.getDataFile().getGlobalId(); + filePidUrlAsString = gid != null ? gid.asURL() : null; fileObject.add("@type", "DataDownload"); fileObject.add("name", fileMetadata.getLabel()); - fileObject.add("fileFormat", fileMetadata.getDataFile().getContentType()); + fileObject.add("encodingFormat", fileMetadata.getDataFile().getContentType()); fileObject.add("contentSize", fileMetadata.getDataFile().getFilesize()); fileObject.add("description", fileMetadata.getDescription()); fileObject.add("@id", filePidUrlAsString); @@ -2021,10 +2071,8 @@ public String getJsonLd() { if (hideFilesBoolean != null && hideFilesBoolean.equals("true")) { // no-op } else { - if (FileUtil.isPubliclyDownloadable(fileMetadata)) { - String nullDownloadType = null; - fileObject.add("contentUrl", dataverseSiteUrl + FileUtil.getFileDownloadUrlPath(nullDownloadType, fileMetadata.getDataFile().getId(), false, fileMetadata.getId())); - } + String nullDownloadType = null; + fileObject.add("contentUrl", dataverseSiteUrl + FileUtil.getFileDownloadUrlPath(nullDownloadType, fileMetadata.getDataFile().getId(), false, fileMetadata.getId())); } fileArray.add(fileObject); } diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionConverter.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionConverter.java index 98f0d707bdc..b670fb18afc 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionConverter.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionConverter.java @@ -5,12 +5,12 @@ */ package edu.harvard.iq.dataverse; -import javax.ejb.EJB; -import javax.enterprise.inject.spi.CDI; -import javax.faces.component.UIComponent; -import javax.faces.context.FacesContext; -import javax.faces.convert.Converter; -import javax.faces.convert.FacesConverter; +import jakarta.ejb.EJB; +import jakarta.enterprise.inject.spi.CDI; +import jakarta.faces.component.UIComponent; +import jakarta.faces.context.FacesContext; +import jakarta.faces.convert.Converter; +import jakarta.faces.convert.FacesConverter; /** * diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionDifference.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionDifference.java index e844a3f1ca8..eca0c84ae84 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionDifference.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionDifference.java @@ -2,28 +2,29 @@ import edu.harvard.iq.dataverse.datavariable.DataVariable; import edu.harvard.iq.dataverse.datavariable.VarGroup; -import edu.harvard.iq.dataverse.datavariable.VariableMetadata; import edu.harvard.iq.dataverse.datavariable.VariableMetadataUtil; import edu.harvard.iq.dataverse.util.StringUtil; import java.util.ArrayList; import java.util.Collections; -import java.util.Collection; import java.util.List; import java.util.Set; +import java.util.logging.Logger; import org.apache.commons.lang3.StringUtils; import edu.harvard.iq.dataverse.util.BundleUtil; -import edu.harvard.iq.dataverse.util.FileUtil; - import java.util.Arrays; import java.util.Date; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedList; /** * * @author skraffmiller */ public final class DatasetVersionDifference { + private static final Logger logger = Logger.getLogger(DatasetVersionDifference.class.getCanonicalName()); private DatasetVersion newVersion; private DatasetVersion originalVersion; @@ -1713,4 +1714,109 @@ public void setDatasetFilesDiffList(List datasetFiles this.datasetFilesDiffList = datasetFilesDiffList; } + /* + * Static methods to compute which blocks have changes between the two + * DatasetVersions. Currently used to assess whether 'system metadatablocks' + * (protected by a separate key) have changed. (Simplified from the methods + * above that track all the individual changes) + * + */ + public static Set getBlocksWithChanges(DatasetVersion newVersion, DatasetVersion originalVersion) { + Set changedBlockSet = new HashSet(); + + // Compare Data + List newDatasetFields = new LinkedList(newVersion.getDatasetFields()); + if (originalVersion == null) { + // Every field is new, just list blocks used + Iterator dsfnIter = newDatasetFields.listIterator(); + while (dsfnIter.hasNext()) { + DatasetField dsfn = dsfnIter.next(); + if (!changedBlockSet.contains(dsfn.getDatasetFieldType().getMetadataBlock())) { + changedBlockSet.add(dsfn.getDatasetFieldType().getMetadataBlock()); + } + } + + } else { + List originalDatasetFields = new LinkedList(originalVersion.getDatasetFields()); + Iterator dsfoIter = originalDatasetFields.listIterator(); + while (dsfoIter.hasNext()) { + DatasetField dsfo = dsfoIter.next(); + boolean deleted = true; + Iterator dsfnIter = newDatasetFields.listIterator(); + + while (dsfnIter.hasNext()) { + DatasetField dsfn = dsfnIter.next(); + if (dsfo.getDatasetFieldType().equals(dsfn.getDatasetFieldType())) { + deleted = false; + if (!changedBlockSet.contains(dsfo.getDatasetFieldType().getMetadataBlock())) { + logger.fine("Checking " + dsfo.getDatasetFieldType().getName()); + if (dsfo.getDatasetFieldType().isPrimitive()) { + if (fieldsAreDifferent(dsfo, dsfn, false)) { + logger.fine("Adding block for " + dsfo.getDatasetFieldType().getName()); + changedBlockSet.add(dsfo.getDatasetFieldType().getMetadataBlock()); + } + } else { + if (fieldsAreDifferent(dsfo, dsfn, true)) { + logger.fine("Adding block for " + dsfo.getDatasetFieldType().getName()); + changedBlockSet.add(dsfo.getDatasetFieldType().getMetadataBlock()); + } + } + } + dsfnIter.remove(); + break; // if found go to next dataset field + } + } + + if (deleted) { + logger.fine("Adding block for deleted " + dsfo.getDatasetFieldType().getName()); + changedBlockSet.add(dsfo.getDatasetFieldType().getMetadataBlock()); + } + dsfoIter.remove(); + } + // Only fields left are non-matching ones but they may be empty + for (DatasetField dsfn : newDatasetFields) { + if (!dsfn.isEmpty()) { + logger.fine("Adding block for added " + dsfn.getDatasetFieldType().getName()); + changedBlockSet.add(dsfn.getDatasetFieldType().getMetadataBlock()); + } + } + } + return changedBlockSet; + } + + private static boolean fieldsAreDifferent(DatasetField originalField, DatasetField newField, boolean compound) { + String originalValue = ""; + String newValue = ""; + + if (compound) { + for (DatasetFieldCompoundValue datasetFieldCompoundValueOriginal : originalField + .getDatasetFieldCompoundValues()) { + int loopIndex = 0; + if (newField.getDatasetFieldCompoundValues().size() >= loopIndex + 1) { + for (DatasetField dsfo : datasetFieldCompoundValueOriginal.getChildDatasetFields()) { + if (!dsfo.getDisplayValue().isEmpty()) { + originalValue += dsfo.getDisplayValue() + ", "; + } + } + for (DatasetField dsfn : newField.getDatasetFieldCompoundValues().get(loopIndex) + .getChildDatasetFields()) { + if (!dsfn.getDisplayValue().isEmpty()) { + newValue += dsfn.getDisplayValue() + ", "; + } + } + if (!originalValue.trim().equals(newValue.trim())) { + return true; + } + } + loopIndex++; + } + } else { + originalValue = originalField.getDisplayValue(); + newValue = newField.getDisplayValue(); + if (!originalValue.equalsIgnoreCase(newValue)) { + return true; + } + } + return false; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionNoteValidator.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionNoteValidator.java index c086fed3b10..a5ea487a68f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionNoteValidator.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionNoteValidator.java @@ -6,8 +6,8 @@ package edu.harvard.iq.dataverse; import edu.harvard.iq.dataverse.util.BundleUtil; -import javax.validation.ConstraintValidator; -import javax.validation.ConstraintValidatorContext; +import jakarta.validation.ConstraintValidator; +import jakarta.validation.ConstraintValidatorContext; /** * diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java index 23fc1961b7d..28243c37eee 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java @@ -2,6 +2,7 @@ import edu.harvard.iq.dataverse.DatasetVersion.VersionState; import edu.harvard.iq.dataverse.ingest.IngestUtil; +import edu.harvard.iq.dataverse.pidproviders.PidUtil; import edu.harvard.iq.dataverse.search.IndexServiceBean; import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; @@ -12,7 +13,7 @@ import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.MarkupChecker; import edu.harvard.iq.dataverse.util.SystemConfig; -import java.io.IOException; + import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Arrays; @@ -21,22 +22,21 @@ import java.util.HashMap; import java.util.Iterator; import java.util.List; -import java.util.concurrent.Future; import java.util.logging.Level; import java.util.logging.Logger; -import javax.ejb.EJB; -import javax.ejb.EJBException; -import javax.ejb.Stateless; -import javax.inject.Named; -import javax.json.Json; -import javax.json.JsonObjectBuilder; -import javax.persistence.EntityManager; -import javax.persistence.PersistenceContext; -import javax.persistence.Query; -import javax.persistence.TypedQuery; +import jakarta.ejb.EJB; +import jakarta.ejb.EJBException; +import jakarta.ejb.Stateless; +import jakarta.inject.Named; +import jakarta.json.Json; +import jakarta.json.JsonObjectBuilder; +import jakarta.persistence.EntityManager; +import jakarta.persistence.NoResultException; +import jakarta.persistence.PersistenceContext; +import jakarta.persistence.Query; +import jakarta.persistence.TypedQuery; import org.apache.commons.lang3.StringUtils; -import org.apache.solr.client.solrj.SolrServerException; - + /** * * @author skraffmiller @@ -153,6 +153,21 @@ public DatasetVersion getDatasetVersion(){ public DatasetVersion find(Object pk) { return em.find(DatasetVersion.class, pk); } + + public DatasetVersion findDeep(Object pk) { + return (DatasetVersion) em.createNamedQuery("DatasetVersion.findById") + .setParameter("id", pk) + // Optimization hints: retrieve all data in one query; this prevents point queries when iterating over the files + .setHint("eclipselink.left-join-fetch", "o.fileMetadatas.dataFile.ingestRequest") + .setHint("eclipselink.left-join-fetch", "o.fileMetadatas.dataFile.thumbnailForDataset") + .setHint("eclipselink.left-join-fetch", "o.fileMetadatas.dataFile.dataTables") + .setHint("eclipselink.left-join-fetch", "o.fileMetadatas.fileCategories") + .setHint("eclipselink.left-join-fetch", "o.fileMetadatas.dataFile.embargo") + .setHint("eclipselink.left-join-fetch", "o.fileMetadatas.datasetVersion") + .setHint("eclipselink.left-join-fetch", "o.fileMetadatas.dataFile.releaseUser") + .setHint("eclipselink.left-join-fetch", "o.fileMetadatas.dataFile.creator") + .getSingleResult(); + } public DatasetVersion findByFriendlyVersionNumber(Long datasetId, String friendlyVersionNumber) { Long majorVersionNumber = null; @@ -180,7 +195,7 @@ public DatasetVersion findByFriendlyVersionNumber(Long datasetId, String friendl query.setParameter("majorVersionNumber", majorVersionNumber); query.setParameter("minorVersionNumber", minorVersionNumber); foundDatasetVersion = (DatasetVersion) query.getSingleResult(); - } catch (javax.persistence.NoResultException e) { + } catch (NoResultException e) { logger.warning("no ds version found: " + datasetId + " " + friendlyVersionNumber); // DO nothing, just return null. } @@ -208,7 +223,7 @@ public DatasetVersion findByFriendlyVersionNumber(Long datasetId, String friendl } } return retVal; - } catch (javax.persistence.NoResultException e) { + } catch (NoResultException e) { logger.warning("no ds version found: " + datasetId + " " + friendlyVersionNumber); // DO nothing, just return null. } @@ -435,7 +450,7 @@ private DatasetVersion getDatasetVersionByQuery(String queryString){ msg("Found: " + ds); return ds; - } catch (javax.persistence.NoResultException e) { + } catch (NoResultException e) { msg("DatasetVersion not found: " + queryString); logger.log(Level.FINE, "DatasetVersion not found: {0}", queryString); return null; @@ -559,7 +574,7 @@ public RetrieveDatasetVersionResponse retrieveDatasetVersionByPersistentId(Strin */ GlobalId parsedId; try{ - parsedId = new GlobalId(persistentId); // [ protocol, authority, identifier] + parsedId = PidUtil.parseAsGlobalID(persistentId); // [ protocol, authority, identifier] } catch (IllegalArgumentException e){ logger.log(Level.WARNING, "Failed to parse persistentID: {0}", persistentId); return null; @@ -892,7 +907,7 @@ public void populateDatasetSearchCard(SolrSearchResult solrSearchResult) { if (searchResult.length == 5) { Dataset datasetEntity = new Dataset(); String globalIdentifier = solrSearchResult.getIdentifier(); - GlobalId globalId = new GlobalId(globalIdentifier); + GlobalId globalId = PidUtil.parseAsGlobalID(globalIdentifier); datasetEntity.setProtocol(globalId.getProtocol()); datasetEntity.setAuthority(globalId.getAuthority()); @@ -1117,13 +1132,7 @@ public JsonObjectBuilder fixMissingUnf(String datasetVersionId, boolean forceRec // reindexing the dataset, to make sure the new UNF is in SOLR: boolean doNormalSolrDocCleanUp = true; - try { - Future indexingResult = indexService.indexDataset(datasetVersion.getDataset(), doNormalSolrDocCleanUp); - } catch (IOException | SolrServerException e) { - String failureLogText = "Post UNF update indexing failed. You can kickoff a re-index of this dataset with: \r\n curl http://localhost:8080/api/admin/index/datasets/" + datasetVersion.getDataset().getId().toString(); - failureLogText += "\r\n" + e.getLocalizedMessage(); - LoggingUtil.writeOnSuccessFailureLog(null, failureLogText, datasetVersion.getDataset()); - } + indexService.asyncIndexDataset(datasetVersion.getDataset(), doNormalSolrDocCleanUp); return info; } @@ -1207,7 +1216,7 @@ public List getUnarchivedDatasetVersions(){ try { List dsl = em.createNamedQuery("DatasetVersion.findUnarchivedReleasedVersion", DatasetVersion.class).getResultList(); return dsl; - } catch (javax.persistence.NoResultException e) { + } catch (NoResultException e) { logger.log(Level.FINE, "No unarchived DatasetVersions found: {0}"); return null; } catch (EJBException e) { diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionUI.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionUI.java index d09457c86bf..55b98c178bb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionUI.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionUI.java @@ -6,25 +6,21 @@ package edu.harvard.iq.dataverse; import edu.harvard.iq.dataverse.util.MarkupChecker; -import edu.harvard.iq.dataverse.util.StringUtil; + import java.io.Serializable; -import java.sql.Timestamp; -import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Calendar; import java.util.Collections; import java.util.Comparator; import java.util.Date; -import java.util.HashMap; import java.util.List; -import java.util.Map; import java.util.TreeMap; -import static java.util.stream.Collectors.toList; -import javax.ejb.EJB; -import javax.faces.view.ViewScoped; -import javax.inject.Named; -import javax.persistence.EntityManager; -import javax.persistence.PersistenceContext; + +import jakarta.ejb.EJB; +import jakarta.faces.view.ViewScoped; +import jakarta.inject.Inject; +import jakarta.persistence.EntityManager; +import jakarta.persistence.PersistenceContext; /** * @@ -35,6 +31,9 @@ public class DatasetVersionUI implements Serializable { @EJB DataverseServiceBean dataverseService; + @Inject + SettingsWrapper settingsWrapper; + @PersistenceContext(unitName = "VDCNet-ejbPU") private EntityManager em; @@ -400,6 +399,9 @@ public void setMetadataValueBlocks(DatasetVersion datasetVersion) { //TODO: A lot of clean up on the logic of this method metadataBlocksForView.clear(); metadataBlocksForEdit.clear(); + + List systemMDBlocks = settingsWrapper.getSystemMetadataBlocks(); + Long dvIdForInputLevel = datasetVersion.getDataset().getOwner().getId(); if (!dataverseService.find(dvIdForInputLevel).isMetadataBlockRoot()){ @@ -442,7 +444,7 @@ public void setMetadataValueBlocks(DatasetVersion datasetVersion) { if (!datasetFieldsForView.isEmpty()) { metadataBlocksForView.put(mdb, datasetFieldsForView); } - if (!datasetFieldsForEdit.isEmpty()) { + if (!datasetFieldsForEdit.isEmpty() && !systemMDBlocks.contains(mdb)) { metadataBlocksForEdit.put(mdb, datasetFieldsForEdit); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionUser.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionUser.java index eda62a080f8..e56fad71253 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionUser.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionUser.java @@ -4,20 +4,20 @@ import edu.harvard.iq.dataverse.authorization.users.User; import java.io.Serializable; import java.sql.Timestamp; -import javax.persistence.Column; -import javax.persistence.Entity; -import javax.persistence.GeneratedValue; -import javax.persistence.GenerationType; +import jakarta.persistence.Column; +import jakarta.persistence.Entity; +import jakarta.persistence.GeneratedValue; +import jakarta.persistence.GenerationType; -import javax.persistence.Id; -import javax.persistence.Index; +import jakarta.persistence.Id; +import jakarta.persistence.Index; -import javax.persistence.JoinColumn; +import jakarta.persistence.JoinColumn; -import javax.persistence.ManyToOne; -import javax.persistence.NamedQueries; -import javax.persistence.NamedQuery; -import javax.persistence.Table; +import jakarta.persistence.ManyToOne; +import jakarta.persistence.NamedQueries; +import jakarta.persistence.NamedQuery; +import jakarta.persistence.Table; /** * Records the last time a {@link User} handled a {@link DatasetVersion}. diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetWidgetsPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetWidgetsPage.java index 9cc611e146a..1dd42903118 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetWidgetsPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetWidgetsPage.java @@ -14,10 +14,10 @@ import java.util.List; import java.util.logging.Level; import java.util.logging.Logger; -import javax.ejb.EJB; -import javax.faces.view.ViewScoped; -import javax.inject.Inject; -import javax.inject.Named; +import jakarta.ejb.EJB; +import jakarta.faces.view.ViewScoped; +import jakarta.inject.Inject; +import jakarta.inject.Named; import org.primefaces.event.FileUploadEvent; import org.primefaces.model.file.UploadedFile; @@ -164,7 +164,7 @@ public String save() { try { DatasetThumbnail datasetThumbnailFromCommand = commandEngine.submit(updateDatasetThumbnailCommand); JsfHelper.addSuccessMessage(BundleUtil.getStringFromBundle("dataset.thumbnailsAndWidget.success")); - return "/dataset.xhtml?persistentId=" + dataset.getGlobalIdString() + "&faces-redirect=true"; + return "/dataset.xhtml?persistentId=" + dataset.getGlobalId().asString() + "&faces-redirect=true"; } catch (CommandException ex) { String error = ex.getLocalizedMessage(); /** @@ -179,7 +179,7 @@ public String save() { public String cancel() { logger.fine("cancel clicked"); - return "/dataset.xhtml?persistentId=" + dataset.getGlobalIdString() + "&faces-redirect=true"; + return "/dataset.xhtml?persistentId=" + dataset.getGlobalId().asString() + "&faces-redirect=true"; } } diff --git a/src/main/java/edu/harvard/iq/dataverse/Dataverse.java b/src/main/java/edu/harvard/iq/dataverse/Dataverse.java index bc8716b6129..682c1dc6744 100644 --- a/src/main/java/edu/harvard/iq/dataverse/Dataverse.java +++ b/src/main/java/edu/harvard/iq/dataverse/Dataverse.java @@ -2,7 +2,6 @@ import edu.harvard.iq.dataverse.harvest.client.HarvestingClient; import edu.harvard.iq.dataverse.authorization.DataverseRole; -import edu.harvard.iq.dataverse.dataaccess.DataAccess; import edu.harvard.iq.dataverse.search.savedsearch.SavedSearch; import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.SystemConfig; @@ -13,29 +12,28 @@ import java.util.List; import java.util.Objects; import java.util.Set; -import javax.persistence.CascadeType; -import javax.persistence.Column; -import javax.persistence.Entity; -import javax.persistence.EnumType; -import javax.persistence.Enumerated; -import javax.persistence.FetchType; -import javax.persistence.Index; -import javax.persistence.JoinColumn; -import javax.persistence.JoinTable; -import javax.persistence.ManyToMany; -import javax.persistence.ManyToOne; -import javax.persistence.NamedQueries; -import javax.persistence.NamedQuery; -import javax.persistence.OneToMany; -import javax.persistence.OneToOne; -import javax.persistence.OrderBy; -import javax.persistence.Table; -import javax.persistence.Transient; -import javax.validation.constraints.NotNull; -import javax.validation.constraints.Pattern; -import javax.validation.constraints.Size; - -import org.apache.commons.lang3.StringUtils; +import jakarta.persistence.CascadeType; +import jakarta.persistence.Column; +import jakarta.persistence.Entity; +import jakarta.persistence.EnumType; +import jakarta.persistence.Enumerated; +import jakarta.persistence.FetchType; +import jakarta.persistence.Index; +import jakarta.persistence.JoinColumn; +import jakarta.persistence.JoinTable; +import jakarta.persistence.ManyToMany; +import jakarta.persistence.ManyToOne; +import jakarta.persistence.NamedQueries; +import jakarta.persistence.NamedQuery; +import jakarta.persistence.OneToMany; +import jakarta.persistence.OneToOne; +import jakarta.persistence.OrderBy; +import jakarta.persistence.Table; +import jakarta.persistence.Transient; +import jakarta.validation.constraints.NotNull; +import jakarta.validation.constraints.Pattern; +import jakarta.validation.constraints.Size; + import org.hibernate.validator.constraints.NotBlank; import org.hibernate.validator.constraints.NotEmpty; @@ -590,8 +588,34 @@ public void setCitationDatasetFieldTypes(List citationDatasetF this.citationDatasetFieldTypes = citationDatasetFieldTypes; } - + /** + * @Note: this setting is Nullable, with {@code null} indicating that the + * desired behavior is not explicitly configured for this specific collection. + * See the comment below. + */ + @Column(nullable = true) + private Boolean filePIDsEnabled; + /** + * Specifies whether the PIDs for Datafiles should be registered when publishing + * datasets in this Collection, if the behavior is explicitly configured. + * @return {@code Boolean.TRUE} if explicitly enabled, {@code Boolean.FALSE} if explicitly disabled. + * {@code null} indicates that the behavior is not explicitly defined, in which + * case the behavior should follow the explicit configuration of the first + * direct ancestor collection, or the instance-wide configuration, if none + * present. + * @Note: If present, this configuration therefore by default applies to all + * the sub-collections, unless explicitly overwritten there. + * @author landreev + */ + public Boolean getFilePIDsEnabled() { + return filePIDsEnabled; + } + + public void setFilePIDsEnabled(boolean filePIDsEnabled) { + this.filePIDsEnabled = filePIDsEnabled; + } + public List getDataverseFacets() { return getDataverseFacets(false); } diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseContact.java b/src/main/java/edu/harvard/iq/dataverse/DataverseContact.java index 46021ddbc9b..d77767985eb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataverseContact.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataverseContact.java @@ -7,15 +7,15 @@ import java.io.Serializable; import java.util.Objects; -import javax.persistence.Column; -import javax.persistence.Entity; -import javax.persistence.GeneratedValue; -import javax.persistence.GenerationType; -import javax.persistence.Id; -import javax.persistence.Index; -import javax.persistence.JoinColumn; -import javax.persistence.ManyToOne; -import javax.persistence.Table; +import jakarta.persistence.Column; +import jakarta.persistence.Entity; +import jakarta.persistence.GeneratedValue; +import jakarta.persistence.GenerationType; +import jakarta.persistence.Id; +import jakarta.persistence.Index; +import jakarta.persistence.JoinColumn; +import jakarta.persistence.ManyToOne; +import jakarta.persistence.Table; import edu.harvard.iq.dataverse.validation.ValidateEmail; import org.hibernate.validator.constraints.NotBlank; diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseConverter.java b/src/main/java/edu/harvard/iq/dataverse/DataverseConverter.java index 7d09c300dde..d802117043b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataverseConverter.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataverseConverter.java @@ -6,12 +6,12 @@ package edu.harvard.iq.dataverse; -import javax.ejb.EJB; -import javax.enterprise.inject.spi.CDI; -import javax.faces.component.UIComponent; -import javax.faces.context.FacesContext; -import javax.faces.convert.Converter; -import javax.faces.convert.FacesConverter; +import jakarta.ejb.EJB; +import jakarta.enterprise.inject.spi.CDI; +import jakarta.faces.component.UIComponent; +import jakarta.faces.context.FacesContext; +import jakarta.faces.convert.Converter; +import jakarta.faces.convert.FacesConverter; /** * diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseFacet.java b/src/main/java/edu/harvard/iq/dataverse/DataverseFacet.java index bfd465b8f54..768c2308e50 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataverseFacet.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataverseFacet.java @@ -8,16 +8,16 @@ import java.io.Serializable; import java.util.Objects; -import javax.persistence.Entity; -import javax.persistence.GeneratedValue; -import javax.persistence.GenerationType; -import javax.persistence.Id; -import javax.persistence.Index; -import javax.persistence.JoinColumn; -import javax.persistence.ManyToOne; -import javax.persistence.NamedQueries; -import javax.persistence.NamedQuery; -import javax.persistence.Table; +import jakarta.persistence.Entity; +import jakarta.persistence.GeneratedValue; +import jakarta.persistence.GenerationType; +import jakarta.persistence.Id; +import jakarta.persistence.Index; +import jakarta.persistence.JoinColumn; +import jakarta.persistence.ManyToOne; +import jakarta.persistence.NamedQueries; +import jakarta.persistence.NamedQuery; +import jakarta.persistence.Table; /** * diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseFacetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataverseFacetServiceBean.java index 67bf6a820e2..5c77989f6d6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataverseFacetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataverseFacetServiceBean.java @@ -2,11 +2,11 @@ import edu.harvard.iq.dataverse.util.LruCache; import java.util.List; -import javax.ejb.EJB; -import javax.ejb.Stateless; -import javax.inject.Named; -import javax.persistence.EntityManager; -import javax.persistence.PersistenceContext; +import jakarta.ejb.EJB; +import jakarta.ejb.Stateless; +import jakarta.inject.Named; +import jakarta.persistence.EntityManager; +import jakarta.persistence.PersistenceContext; /** * diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseFeaturedDataverse.java b/src/main/java/edu/harvard/iq/dataverse/DataverseFeaturedDataverse.java index 662ee74c3bf..39ad6ca9520 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataverseFeaturedDataverse.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataverseFeaturedDataverse.java @@ -2,16 +2,16 @@ import java.io.Serializable; import java.util.Objects; -import javax.persistence.Entity; -import javax.persistence.GeneratedValue; -import javax.persistence.GenerationType; -import javax.persistence.Id; -import javax.persistence.Index; -import javax.persistence.JoinColumn; -import javax.persistence.ManyToOne; -import javax.persistence.NamedQueries; -import javax.persistence.NamedQuery; -import javax.persistence.Table; +import jakarta.persistence.Entity; +import jakarta.persistence.GeneratedValue; +import jakarta.persistence.GenerationType; +import jakarta.persistence.Id; +import jakarta.persistence.Index; +import jakarta.persistence.JoinColumn; +import jakarta.persistence.ManyToOne; +import jakarta.persistence.NamedQueries; +import jakarta.persistence.NamedQuery; +import jakarta.persistence.Table; /** * diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseFieldTypeInputLevel.java b/src/main/java/edu/harvard/iq/dataverse/DataverseFieldTypeInputLevel.java index 92b1ff7c2cf..c4749be0cb3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataverseFieldTypeInputLevel.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataverseFieldTypeInputLevel.java @@ -6,17 +6,17 @@ package edu.harvard.iq.dataverse; import java.io.Serializable; -import javax.persistence.Entity; -import javax.persistence.GeneratedValue; -import javax.persistence.GenerationType; -import javax.persistence.Id; -import javax.persistence.Index; -import javax.persistence.JoinColumn; -import javax.persistence.ManyToOne; -import javax.persistence.NamedQueries; -import javax.persistence.NamedQuery; -import javax.persistence.Table; -import javax.persistence.UniqueConstraint; +import jakarta.persistence.Entity; +import jakarta.persistence.GeneratedValue; +import jakarta.persistence.GenerationType; +import jakarta.persistence.Id; +import jakarta.persistence.Index; +import jakarta.persistence.JoinColumn; +import jakarta.persistence.ManyToOne; +import jakarta.persistence.NamedQueries; +import jakarta.persistence.NamedQuery; +import jakarta.persistence.Table; +import jakarta.persistence.UniqueConstraint; /** * diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseFieldTypeInputLevelServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataverseFieldTypeInputLevelServiceBean.java index 42a1290fdbd..66c700f59ce 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataverseFieldTypeInputLevelServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataverseFieldTypeInputLevelServiceBean.java @@ -7,13 +7,13 @@ import edu.harvard.iq.dataverse.util.LruCache; import java.util.List; -import java.util.logging.Logger; -import javax.ejb.Stateless; -import javax.inject.Named; -import javax.persistence.EntityManager; -import javax.persistence.NoResultException; -import javax.persistence.PersistenceContext; -import javax.persistence.Query; + +import jakarta.ejb.Stateless; +import jakarta.inject.Named; +import jakarta.persistence.EntityManager; +import jakarta.persistence.NoResultException; +import jakarta.persistence.PersistenceContext; +import jakarta.persistence.Query; /** * diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseHeaderFragment.java b/src/main/java/edu/harvard/iq/dataverse/DataverseHeaderFragment.java index 1e1353a11fc..389b85c19d9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataverseHeaderFragment.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataverseHeaderFragment.java @@ -19,11 +19,11 @@ import java.util.List; import java.util.logging.Level; import java.util.logging.Logger; -import javax.ejb.EJB; -import javax.faces.context.FacesContext; -import javax.faces.view.ViewScoped; -import javax.inject.Inject; -import javax.inject.Named; +import jakarta.ejb.EJB; +import jakarta.faces.context.FacesContext; +import jakarta.faces.view.ViewScoped; +import jakarta.inject.Inject; +import jakarta.inject.Named; import org.apache.commons.lang3.StringUtils; /** diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseLinkingDataverse.java b/src/main/java/edu/harvard/iq/dataverse/DataverseLinkingDataverse.java index 788308dce1e..3030922ea5e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataverseLinkingDataverse.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataverseLinkingDataverse.java @@ -7,18 +7,18 @@ import java.io.Serializable; import java.util.Date; -import javax.persistence.Entity; -import javax.persistence.GeneratedValue; -import javax.persistence.GenerationType; -import javax.persistence.Id; -import javax.persistence.Index; -import javax.persistence.JoinColumn; -import javax.persistence.NamedQueries; -import javax.persistence.NamedQuery; -import javax.persistence.OneToOne; -import javax.persistence.Table; -import javax.persistence.Temporal; -import javax.persistence.TemporalType; +import jakarta.persistence.Entity; +import jakarta.persistence.GeneratedValue; +import jakarta.persistence.GenerationType; +import jakarta.persistence.Id; +import jakarta.persistence.Index; +import jakarta.persistence.JoinColumn; +import jakarta.persistence.NamedQueries; +import jakarta.persistence.NamedQuery; +import jakarta.persistence.OneToOne; +import jakarta.persistence.Table; +import jakarta.persistence.Temporal; +import jakarta.persistence.TemporalType; /** * diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseLinkingServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataverseLinkingServiceBean.java index c823deddb64..834ff96e392 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataverseLinkingServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataverseLinkingServiceBean.java @@ -8,13 +8,13 @@ import java.util.ArrayList; import java.util.List; import java.util.logging.Logger; -import javax.ejb.EJB; -import javax.ejb.Stateless; -import javax.inject.Named; -import javax.persistence.EntityManager; -import javax.persistence.PersistenceContext; -import javax.persistence.Query; -import javax.persistence.TypedQuery; +import jakarta.ejb.EJB; +import jakarta.ejb.Stateless; +import jakarta.inject.Named; +import jakarta.persistence.EntityManager; +import jakarta.persistence.PersistenceContext; +import jakarta.persistence.Query; +import jakarta.persistence.TypedQuery; /** * @@ -66,7 +66,7 @@ public DataverseLinkingDataverse findDataverseLinkingDataverse(Long dataverseId, .setParameter("dataverseId", dataverseId) .setParameter("linkingDataverseId", linkingDataverseId) .getSingleResult(); - } catch (javax.persistence.NoResultException e) { + } catch (jakarta.persistence.NoResultException e) { logger.fine("No DataverseLinkingDataverse found for dataverseId " + dataverseId + " and linkedDataverseId " + linkingDataverseId); return null; } diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseMetadataBlockFacet.java b/src/main/java/edu/harvard/iq/dataverse/DataverseMetadataBlockFacet.java index a2659b81974..c93144b2e97 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataverseMetadataBlockFacet.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataverseMetadataBlockFacet.java @@ -1,13 +1,13 @@ package edu.harvard.iq.dataverse; -import javax.persistence.Entity; -import javax.persistence.GeneratedValue; -import javax.persistence.GenerationType; -import javax.persistence.Id; -import javax.persistence.Index; -import javax.persistence.JoinColumn; -import javax.persistence.ManyToOne; -import javax.persistence.Table; +import jakarta.persistence.Entity; +import jakarta.persistence.GeneratedValue; +import jakarta.persistence.GenerationType; +import jakarta.persistence.Id; +import jakarta.persistence.Index; +import jakarta.persistence.JoinColumn; +import jakarta.persistence.ManyToOne; +import jakarta.persistence.Table; import java.io.Serializable; import java.util.Objects; diff --git a/src/main/java/edu/harvard/iq/dataverse/DataversePage.java b/src/main/java/edu/harvard/iq/dataverse/DataversePage.java index b48ff725e1e..daf33f444d9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataversePage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataversePage.java @@ -28,27 +28,26 @@ import static edu.harvard.iq.dataverse.util.JsfHelper.JH; import edu.harvard.iq.dataverse.util.SystemConfig; import java.util.List; -import javax.ejb.EJB; -import javax.faces.application.FacesMessage; -import javax.faces.context.FacesContext; -import javax.faces.view.ViewScoped; -import javax.inject.Inject; -import javax.inject.Named; +import jakarta.ejb.EJB; +import jakarta.faces.application.FacesMessage; +import jakarta.faces.context.FacesContext; +import jakarta.faces.view.ViewScoped; +import jakarta.inject.Inject; +import jakarta.inject.Named; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; -import java.util.HashSet; import java.util.Map; import java.util.Map.Entry; import java.util.Set; import java.util.logging.Level; import java.util.logging.Logger; -import javax.faces.component.UIComponent; -import javax.faces.component.UIInput; +import jakarta.faces.component.UIComponent; +import jakarta.faces.component.UIInput; import org.primefaces.model.DualListModel; -import javax.ejb.EJBException; -import javax.faces.event.ValueChangeEvent; -import javax.faces.model.SelectItem; +import jakarta.ejb.EJBException; +import jakarta.faces.event.ValueChangeEvent; +import jakarta.faces.model.SelectItem; import org.apache.commons.text.StringEscapeUtils; import org.apache.commons.lang3.StringUtils; import org.primefaces.PrimeFaces; diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseRequestServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataverseRequestServiceBean.java index e193b535412..58a3837dbf9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataverseRequestServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataverseRequestServiceBean.java @@ -1,11 +1,11 @@ package edu.harvard.iq.dataverse; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; -import javax.annotation.PostConstruct; -import javax.enterprise.context.RequestScoped; -import javax.inject.Inject; -import javax.inject.Named; -import javax.servlet.http.HttpServletRequest; +import jakarta.annotation.PostConstruct; +import jakarta.enterprise.context.RequestScoped; +import jakarta.inject.Inject; +import jakarta.inject.Named; +import jakarta.servlet.http.HttpServletRequest; /** * The service bean to go to when one needs the current {@link DataverseRequest}. diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseRoleServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataverseRoleServiceBean.java index 9d09d0580e2..78d5eaf3414 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataverseRoleServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataverseRoleServiceBean.java @@ -17,13 +17,13 @@ import java.util.Set; import java.util.logging.Logger; import java.util.stream.Collectors; -import javax.ejb.EJB; -import javax.ejb.Stateless; -import javax.inject.Named; -import javax.persistence.EntityManager; -import javax.persistence.PersistenceContext; -import javax.persistence.TypedQuery; -//import javax.validation.constraints.NotNull; +import jakarta.ejb.EJB; +import jakarta.ejb.Stateless; +import jakarta.inject.Named; +import jakarta.persistence.EntityManager; +import jakarta.persistence.PersistenceContext; +import jakarta.persistence.TypedQuery; +//import jakarta.validation.constraints.NotNull; /** * @@ -303,7 +303,7 @@ public Set availableRoles(Long dvId) { Set roles = dv.getRoles(); roles.addAll(findBuiltinRoles()); - while (!dv.isEffectivelyPermissionRoot()) { + while (dv.getOwner() != null) { dv = dv.getOwner(); roles.addAll(dv.getRoles()); } diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java index e092f209acd..7194a1ef31e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java @@ -30,18 +30,18 @@ import java.util.Map; import java.util.logging.Logger; import java.util.Properties; -import java.util.concurrent.Future; -import javax.ejb.EJB; -import javax.ejb.Stateless; -import javax.inject.Inject; -import javax.inject.Named; -import javax.json.Json; -import javax.json.JsonArrayBuilder; -import javax.persistence.EntityManager; -import javax.persistence.NoResultException; -import javax.persistence.NonUniqueResultException; -import javax.persistence.PersistenceContext; -import javax.persistence.TypedQuery; + +import jakarta.ejb.EJB; +import jakarta.ejb.Stateless; +import jakarta.inject.Inject; +import jakarta.inject.Named; +import jakarta.json.Json; +import jakarta.json.JsonArrayBuilder; +import jakarta.persistence.EntityManager; +import jakarta.persistence.NoResultException; +import jakarta.persistence.NonUniqueResultException; +import jakarta.persistence.PersistenceContext; +import jakarta.persistence.TypedQuery; import org.apache.solr.client.solrj.SolrServerException; /** diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseSession.java b/src/main/java/edu/harvard/iq/dataverse/DataverseSession.java index c6016939c08..e8d76e1825e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataverseSession.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataverseSession.java @@ -18,13 +18,13 @@ import java.util.List; import java.util.Locale; import java.util.logging.Logger; -import javax.ejb.EJB; -import javax.enterprise.context.SessionScoped; -import javax.faces.context.FacesContext; -import javax.inject.Inject; -import javax.inject.Named; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpSession; +import jakarta.ejb.EJB; +import jakarta.enterprise.context.SessionScoped; +import jakarta.faces.context.FacesContext; +import jakarta.inject.Inject; +import jakarta.inject.Named; +import jakarta.servlet.http.HttpServletRequest; +import jakarta.servlet.http.HttpSession; /** * diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseTheme.java b/src/main/java/edu/harvard/iq/dataverse/DataverseTheme.java index 0c6341db485..539669328a7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataverseTheme.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataverseTheme.java @@ -8,16 +8,16 @@ import java.io.Serializable; import java.util.Objects; -import javax.persistence.Entity; -import javax.persistence.EnumType; -import javax.persistence.Enumerated; -import javax.persistence.GeneratedValue; -import javax.persistence.GenerationType; -import javax.persistence.Id; -import javax.persistence.Index; -import javax.persistence.JoinColumn; -import javax.persistence.OneToOne; -import javax.persistence.Table; +import jakarta.persistence.Entity; +import jakarta.persistence.EnumType; +import jakarta.persistence.Enumerated; +import jakarta.persistence.GeneratedValue; +import jakarta.persistence.GenerationType; +import jakarta.persistence.Id; +import jakarta.persistence.Index; +import jakarta.persistence.JoinColumn; +import jakarta.persistence.OneToOne; +import jakarta.persistence.Table; /** * diff --git a/src/main/java/edu/harvard/iq/dataverse/DefaultValueSet.java b/src/main/java/edu/harvard/iq/dataverse/DefaultValueSet.java index ad48f15fc54..a2dc785c470 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DefaultValueSet.java +++ b/src/main/java/edu/harvard/iq/dataverse/DefaultValueSet.java @@ -8,13 +8,13 @@ import java.io.Serializable; import java.util.List; -import javax.persistence.CascadeType; -import javax.persistence.Column; -import javax.persistence.Entity; -import javax.persistence.GeneratedValue; -import javax.persistence.GenerationType; -import javax.persistence.Id; -import javax.persistence.OneToMany; +import jakarta.persistence.CascadeType; +import jakarta.persistence.Column; +import jakarta.persistence.Entity; +import jakarta.persistence.GeneratedValue; +import jakarta.persistence.GenerationType; +import jakarta.persistence.Id; +import jakarta.persistence.OneToMany; /** * diff --git a/src/main/java/edu/harvard/iq/dataverse/DvObject.java b/src/main/java/edu/harvard/iq/dataverse/DvObject.java index 09a2ef85893..9e7f3f3fe96 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DvObject.java +++ b/src/main/java/edu/harvard/iq/dataverse/DvObject.java @@ -1,14 +1,17 @@ package edu.harvard.iq.dataverse; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; +import edu.harvard.iq.dataverse.pidproviders.PidUtil; + import java.sql.Timestamp; import java.text.SimpleDateFormat; -import java.util.Arrays; import java.util.Date; import java.util.List; import java.util.Objects; import java.util.Set; -import javax.persistence.*; +import java.util.logging.Logger; + +import jakarta.persistence.*; /** * Base of the object hierarchy for "anything that can be inside a dataverse". @@ -26,9 +29,13 @@ query="SELECT COUNT(obj) FROM DvObject obj WHERE obj.owner.id=:id"), @NamedQuery(name = "DvObject.findByGlobalId", query = "SELECT o FROM DvObject o WHERE o.identifier=:identifier and o.authority=:authority and o.protocol=:protocol and o.dtype=:dtype"), + @NamedQuery(name = "DvObject.findIdByGlobalId", + query = "SELECT o.id FROM DvObject o WHERE o.identifier=:identifier and o.authority=:authority and o.protocol=:protocol and o.dtype=:dtype"), @NamedQuery(name = "DvObject.findByAlternativeGlobalId", query = "SELECT o FROM DvObject o, AlternativePersistentIdentifier a WHERE o.id = a.dvObject.id and a.identifier=:identifier and a.authority=:authority and a.protocol=:protocol and o.dtype=:dtype"), + @NamedQuery(name = "DvObject.findIdByAlternativeGlobalId", + query = "SELECT o.id FROM DvObject o, AlternativePersistentIdentifier a WHERE o.id = a.dvObject.id and a.identifier=:identifier and a.authority=:authority and a.protocol=:protocol and o.dtype=:dtype"), @NamedQuery(name = "DvObject.findByProtocolIdentifierAuthority", query = "SELECT o FROM DvObject o WHERE o.identifier=:identifier and o.authority=:authority and o.protocol=:protocol"), @@ -51,10 +58,19 @@ uniqueConstraints = {@UniqueConstraint(columnNames = {"authority,protocol,identifier"}),@UniqueConstraint(columnNames = {"owner_id,storageidentifier"})}) public abstract class DvObject extends DataverseEntity implements java.io.Serializable { - public static final String DATAVERSE_DTYPE_STRING = "Dataverse"; - public static final String DATASET_DTYPE_STRING = "Dataset"; - public static final String DATAFILE_DTYPE_STRING = "DataFile"; - public static final List DTYPE_LIST = Arrays.asList(DATAVERSE_DTYPE_STRING, DATASET_DTYPE_STRING, DATAFILE_DTYPE_STRING); + private static final Logger logger = Logger.getLogger(DvObject.class.getCanonicalName()); + + public enum DType { + Dataverse("Dataverse"), Dataset("Dataset"),DataFile("DataFile"); + + String dtype; + DType(String dt) { + dtype = dt; + } + public String getDType() { + return dtype; + } + } public static final Visitor NamePrinter = new Visitor(){ @@ -140,6 +156,8 @@ public String visit(DataFile df) { private boolean identifierRegistered; + private transient GlobalId globalId = null; + @OneToMany(mappedBy = "dvObject", cascade = CascadeType.ALL, orphanRemoval = true) private Set alternativePersistentIndentifiers; @@ -272,6 +290,8 @@ public String getProtocol() { public void setProtocol(String protocol) { this.protocol = protocol; + //Remove cached value + globalId=null; } public String getAuthority() { @@ -280,6 +300,8 @@ public String getAuthority() { public void setAuthority(String authority) { this.authority = authority; + //Remove cached value + globalId=null; } public Date getGlobalIdCreateTime() { @@ -296,6 +318,8 @@ public String getIdentifier() { public void setIdentifier(String identifier) { this.identifier = identifier; + //Remove cached value + globalId=null; } public boolean isIdentifierRegistered() { @@ -306,22 +330,13 @@ public void setIdentifierRegistered(boolean identifierRegistered) { this.identifierRegistered = identifierRegistered; } - /** - * - * @return This object's global id in a string form. - * @deprecated use {@code dvobj.getGlobalId().asString()}. - */ - public String getGlobalIdString() { - final GlobalId globalId = getGlobalId(); - return globalId != null ? globalId.asString() : null; - } - public void setGlobalId( GlobalId pid ) { if ( pid == null ) { setProtocol(null); setAuthority(null); setIdentifier(null); } else { + //These reset globalId=null setProtocol(pid.getProtocol()); setAuthority(pid.getAuthority()); setIdentifier(pid.getIdentifier()); @@ -329,10 +344,11 @@ public void setGlobalId( GlobalId pid ) { } public GlobalId getGlobalId() { - // FIXME should return NULL when the fields are null. Currenntly, - // a lot of code depends call this method, so this fix can't be - // a part of the current PR. - return new GlobalId(getProtocol(), getAuthority(), getIdentifier()); + // Cache this + if ((globalId == null) && !(getProtocol() == null || getAuthority() == null || getIdentifier() == null)) { + globalId = PidUtil.parseAsGlobalID(getProtocol(), getAuthority(), getIdentifier()); + } + return globalId; } public abstract T accept(Visitor v); @@ -420,17 +436,7 @@ public String getAuthorString(){ } public String getTargetUrl(){ - if (this instanceof Dataverse){ - throw new UnsupportedOperationException("Not supported yet."); - } - if (this instanceof Dataset){ - return Dataset.TARGET_URL; - } - if (this instanceof DataFile){ - return DataFile.TARGET_URL; - } throw new UnsupportedOperationException("Not supported yet. New DVObject Instance?"); - } public String getYearPublishedCreated(){ diff --git a/src/main/java/edu/harvard/iq/dataverse/DvObjectContainer.java b/src/main/java/edu/harvard/iq/dataverse/DvObjectContainer.java index 6ff01ef3ea8..a322a25103e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DvObjectContainer.java +++ b/src/main/java/edu/harvard/iq/dataverse/DvObjectContainer.java @@ -2,8 +2,7 @@ import edu.harvard.iq.dataverse.dataaccess.DataAccess; import edu.harvard.iq.dataverse.util.SystemConfig; -import java.util.Locale; -import javax.persistence.MappedSuperclass; +import jakarta.persistence.MappedSuperclass; import org.apache.commons.lang3.StringUtils; /** diff --git a/src/main/java/edu/harvard/iq/dataverse/DvObjectServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DvObjectServiceBean.java index 01b0890d588..d4219c36149 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DvObjectServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DvObjectServiceBean.java @@ -1,6 +1,8 @@ package edu.harvard.iq.dataverse; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; +import edu.harvard.iq.dataverse.pidproviders.PidUtil; + import java.sql.Timestamp; import java.util.ArrayList; import java.util.Date; @@ -10,17 +12,18 @@ import java.util.Set; import java.util.logging.Level; import java.util.logging.Logger; -import javax.ejb.Stateless; -import javax.ejb.TransactionAttribute; -import static javax.ejb.TransactionAttributeType.REQUIRES_NEW; -import javax.inject.Named; -import javax.persistence.EntityManager; -import javax.persistence.NoResultException; -import javax.persistence.NonUniqueResultException; -import javax.persistence.PersistenceContext; -import javax.persistence.Query; +import jakarta.ejb.Stateless; +import jakarta.ejb.TransactionAttribute; +import static jakarta.ejb.TransactionAttributeType.REQUIRES_NEW; +import jakarta.inject.Named; +import jakarta.persistence.EntityManager; +import jakarta.persistence.NoResultException; +import jakarta.persistence.NonUniqueResultException; +import jakarta.persistence.PersistenceContext; +import jakarta.persistence.Query; +import jakarta.persistence.StoredProcedureQuery; + import org.apache.commons.lang3.StringUtils; -import org.ocpsoft.common.util.Strings; /** * Your goto bean for everything {@link DvObject}, that's not tied to any @@ -79,46 +82,108 @@ public boolean checkExists(Long id) { Long result =(Long)query.getSingleResult(); return result > 0; } - // FIXME This type-by-string has to go, in favor of passing a class parameter. - public DvObject findByGlobalId(String globalIdString, String typeString) { - return findByGlobalId(globalIdString, typeString, false); + + public DvObject findByGlobalId(String globalIdString, DvObject.DType dtype) { + try { + GlobalId gid = PidUtil.parseAsGlobalID(globalIdString); + return findByGlobalId(gid, dtype); + } catch (IllegalArgumentException iae) { + logger.fine("Invalid identifier: " + globalIdString); + return null; + } + } - // FIXME This type-by-string has to go, in favor of passing a class parameter. - public DvObject findByGlobalId(String globalIdString, String typeString, Boolean altId) { - + public DvObject findByAltGlobalId(String globalIdString, DvObject.DType dtype) { try { - GlobalId gid = new GlobalId(globalIdString); + GlobalId gid = PidUtil.parseAsGlobalID(globalIdString); + return findByAltGlobalId(gid, dtype); + } catch (IllegalArgumentException iae) { + logger.fine("Invalid alternate identifier: " + globalIdString); + return null; + } - DvObject foundDvObject = null; - try { - Query query; - if (altId) { - query = em.createNamedQuery("DvObject.findByAlternativeGlobalId"); - } else{ - query = em.createNamedQuery("DvObject.findByGlobalId"); - } - query.setParameter("identifier", gid.getIdentifier()); - query.setParameter("protocol", gid.getProtocol()); - query.setParameter("authority", gid.getAuthority()); - query.setParameter("dtype", typeString); - foundDvObject = (DvObject) query.getSingleResult(); - } catch (javax.persistence.NoResultException e) { - // (set to .info, this can fill the log file with thousands of - // these messages during a large harvest run) - logger.fine("no dvObject found: " + globalIdString); - // DO nothing, just return null. - return null; - } catch (Exception ex) { - logger.info("Exception caught in findByGlobalId: " + ex.getLocalizedMessage()); - return null; - } - return foundDvObject; + } - } catch (IllegalArgumentException iae) { - logger.info("Invalid identifier: " + globalIdString); + public DvObject findByGlobalId(GlobalId globalId, DvObject.DType dtype) { + Query query = em.createNamedQuery("DvObject.findByGlobalId"); + return runFindByGlobalId(query, globalId, dtype); + } + + public DvObject findByAltGlobalId(GlobalId globalId, DvObject.DType dtype) { + Query query = em.createNamedQuery("DvObject.findByAlternativeGlobalId"); + return runFindByGlobalId(query, globalId, dtype); + } + + public Long findIdByGlobalId(GlobalId globalId, DvObject.DType dtype) { + Query query = em.createNamedQuery("DvObject.findIdByGlobalId"); + return runFindIdByGlobalId(query, globalId, dtype); + } + + public Long findIdByAltGlobalId(GlobalId globalId, DvObject.DType dtype) { + Query query = em.createNamedQuery("DvObject.findIdByAlternativeGlobalId"); + return runFindIdByGlobalId(query, globalId, dtype); + } + + private DvObject runFindByGlobalId(Query query, GlobalId gid, DvObject.DType dtype) { + DvObject foundDvObject = null; + try { + query.setParameter("identifier", gid.getIdentifier()); + query.setParameter("protocol", gid.getProtocol()); + query.setParameter("authority", gid.getAuthority()); + query.setParameter("dtype", dtype.getDType()); + foundDvObject = (DvObject) query.getSingleResult(); + } catch (NoResultException e) { + // (set to .info, this can fill the log file with thousands of + // these messages during a large harvest run) + logger.fine("no dvObject found: " + gid.asString()); + // DO nothing, just return null. + return null; + } catch (Exception ex) { + logger.info("Exception caught in findByGlobalId: " + ex.getLocalizedMessage()); + return null; + } + return foundDvObject; + } + + private Long runFindIdByGlobalId(Query query, GlobalId gid, DvObject.DType dtype) { + Long foundDvObject = null; + try { + query.setParameter("identifier", gid.getIdentifier()); + query.setParameter("protocol", gid.getProtocol()); + query.setParameter("authority", gid.getAuthority()); + query.setParameter("dtype", dtype.getDType()); + foundDvObject = (Long) query.getSingleResult(); + } catch (NoResultException e) { + // (set to .info, this can fill the log file with thousands of + // these messages during a large harvest run) + logger.fine("no dvObject found: " + gid.asString()); + // DO nothing, just return null. + return null; + } catch (Exception ex) { + logger.info("Exception caught in findByGlobalId: " + ex.getLocalizedMessage()); return null; } + return foundDvObject; + } + + public DvObject findByGlobalId(GlobalId globalId) { + try { + return (DvObject) em.createNamedQuery("DvObject.findByProtocolIdentifierAuthority") + .setParameter("identifier", globalId.getIdentifier()) + .setParameter("authority", globalId.getAuthority()).setParameter("protocol", globalId.getProtocol()) + .getSingleResult(); + } catch (NoResultException nre) { + return null; + } + } + + public boolean isGlobalIdLocallyUnique(GlobalId globalId) { + return em.createNamedQuery("DvObject.findByProtocolIdentifierAuthority") + .setParameter("identifier", globalId.getIdentifier()) + .setParameter("authority", globalId.getAuthority()) + .setParameter("protocol", globalId.getProtocol()) + .getResultList().isEmpty(); } public DvObject updateContentIndexTime(DvObject dvObject) { @@ -257,7 +322,7 @@ public Map getObjectPathsByIds(Set objectIds){ return null; } - String datasetIdStr = Strings.join(objectIds, ", "); + String datasetIdStr = StringUtils.join(objectIds, ", "); String qstr = "WITH RECURSIVE path_elements AS ((" + " SELECT id, owner_id FROM dvobject WHERE id in (" + datasetIdStr + "))" + @@ -317,4 +382,11 @@ public Map getObjectPathsByIds(Set objectIds){ } return ret; } + + public String generateNewIdentifierByStoredProcedure() { + StoredProcedureQuery query = this.em.createNamedStoredProcedureQuery("Dataset.generateIdentifierFromStoredProcedure"); + query.execute(); + return (String) query.getOutputParameterValue(1); + } + } diff --git a/src/main/java/edu/harvard/iq/dataverse/EditDataFilesPageHelper.java b/src/main/java/edu/harvard/iq/dataverse/EditDataFilesPageHelper.java index 1bf6bee82eb..883baeedef4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/EditDataFilesPageHelper.java +++ b/src/main/java/edu/harvard/iq/dataverse/EditDataFilesPageHelper.java @@ -4,8 +4,8 @@ import edu.harvard.iq.dataverse.util.file.CreateDataFileResult; import org.apache.commons.text.StringEscapeUtils; -import javax.ejb.Stateless; -import javax.inject.Inject; +import jakarta.ejb.Stateless; +import jakarta.inject.Inject; import java.util.Arrays; import java.util.List; import java.util.Optional; diff --git a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java index 6cf294ffd6d..02a148f8cc5 100644 --- a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java @@ -5,7 +5,9 @@ import edu.harvard.iq.dataverse.api.AbstractApiBean; import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; import edu.harvard.iq.dataverse.authorization.Permission; +import edu.harvard.iq.dataverse.authorization.users.ApiToken; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; +import edu.harvard.iq.dataverse.authorization.users.User; import edu.harvard.iq.dataverse.branding.BrandingUtil; import edu.harvard.iq.dataverse.datasetutility.AddReplaceFileHelper; import edu.harvard.iq.dataverse.datasetutility.FileSizeChecker; @@ -31,11 +33,13 @@ import edu.harvard.iq.dataverse.ingest.IngestUtil; import edu.harvard.iq.dataverse.license.LicenseServiceBean; import edu.harvard.iq.dataverse.search.IndexServiceBean; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.Setting; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.JsfHelper; import edu.harvard.iq.dataverse.util.SystemConfig; +import edu.harvard.iq.dataverse.util.WebloaderUtil; import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.EjbUtil; import edu.harvard.iq.dataverse.util.FileMetadataUtil; @@ -53,23 +57,23 @@ import java.util.Map; import java.util.Optional; import java.util.logging.Logger; -import javax.ejb.EJB; -import javax.ejb.EJBException; -import javax.faces.application.FacesMessage; -import javax.faces.context.FacesContext; -import javax.faces.event.ActionEvent; -import javax.faces.view.ViewScoped; -import javax.inject.Inject; -import javax.inject.Named; +import jakarta.ejb.EJB; +import jakarta.ejb.EJBException; +import jakarta.faces.application.FacesMessage; +import jakarta.faces.context.FacesContext; +import jakarta.faces.event.ActionEvent; +import jakarta.faces.view.ViewScoped; +import jakarta.inject.Inject; +import jakarta.inject.Named; import edu.harvard.iq.dataverse.util.file.CreateDataFileResult; import org.primefaces.event.FileUploadEvent; import org.primefaces.model.file.UploadedFile; -import javax.json.Json; -import javax.json.JsonObject; -import javax.json.JsonObjectBuilder; -import javax.json.JsonArray; -import javax.json.JsonReader; +import jakarta.json.Json; +import jakarta.json.JsonObject; +import jakarta.json.JsonObjectBuilder; +import jakarta.json.JsonArray; +import jakarta.json.JsonReader; import org.apache.commons.httpclient.HttpClient; import org.apache.commons.io.IOUtils; import org.apache.commons.httpclient.methods.GetMethod; @@ -77,10 +81,10 @@ import java.util.Collection; import java.util.Set; import java.util.logging.Level; -import javax.faces.event.AjaxBehaviorEvent; -import javax.faces.event.FacesEvent; -import javax.servlet.ServletOutputStream; -import javax.servlet.http.HttpServletResponse; +import jakarta.faces.event.AjaxBehaviorEvent; +import jakarta.faces.event.FacesEvent; +import jakarta.servlet.ServletOutputStream; +import jakarta.servlet.http.HttpServletResponse; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.mutable.MutableBoolean; import org.primefaces.PrimeFaces; @@ -539,7 +543,7 @@ public String init() { return permissionsWrapper.notFound(); } - workingVersion = dataset.getEditVersion(); + workingVersion = dataset.getOrCreateEditVersion(); //TODO: review if we we need this check; // as getEditVersion should either return the exisiting draft or create a new one @@ -586,8 +590,7 @@ public String init() { datafileService, permissionService, commandEngine, - systemConfig, - licenseServiceBean); + systemConfig); fileReplacePageHelper = new FileReplacePageHelper(addReplaceFileHelper, dataset, @@ -890,7 +893,7 @@ private void deleteFiles(List filesForDelete) { // ToDo - FileMetadataUtil.removeFileMetadataFromList should handle these two // removes so they could be put after this if clause and the else clause could // be removed. - dataset.getEditVersion().getFileMetadatas().remove(markedForDelete); + dataset.getOrCreateEditVersion().getFileMetadatas().remove(markedForDelete); fileMetadatas.remove(markedForDelete); filesToBeDeleted.add(markedForDelete); @@ -907,7 +910,7 @@ private void deleteFiles(List filesForDelete) { // 1. delete the filemetadata from the local display list: FileMetadataUtil.removeFileMetadataFromList(fileMetadatas, markedForDelete); // 2. delete the filemetadata from the version: - FileMetadataUtil.removeFileMetadataFromList(dataset.getEditVersion().getFileMetadatas(), markedForDelete); + FileMetadataUtil.removeFileMetadataFromList(dataset.getOrCreateEditVersion().getFileMetadatas(), markedForDelete); } if (markedForDelete.getDataFile().getId() == null) { @@ -1201,7 +1204,7 @@ public String save() { */ } - workingVersion = dataset.getEditVersion(); + workingVersion = dataset.getOrCreateEditVersion(); logger.fine("working version id: " + workingVersion.getId()); if (FileEditMode.EDIT == mode && Referrer.FILE == referrer) { @@ -2425,10 +2428,8 @@ public boolean isTemporaryPreviewAvailable(String fileSystemId, String mimeType) return false; } - String filesRootDirectory = System.getProperty("dataverse.files.directory"); - if (filesRootDirectory == null || filesRootDirectory.isEmpty()) { - filesRootDirectory = "/tmp/files"; - } + // Retrieve via MPCONFIG. Has sane default /tmp/dataverse from META-INF/microprofile-config.properties + String filesRootDirectory = JvmSettings.FILES_DIRECTORY.lookup(); String fileSystemName = filesRootDirectory + "/temp/" + fileSystemId; @@ -3067,6 +3068,10 @@ public boolean globusUploadSupported() { return settingsWrapper.isGlobusUpload() && settingsWrapper.isGlobusEnabledStorageDriver(dataset.getEffectiveStorageDriverId()); } + + public boolean webloaderUploadSupported() { + return settingsWrapper.isWebloaderUpload() && StorageIO.isDirectUploadEnabled(dataset.getEffectiveStorageDriverId()); + } private void populateFileMetadatas() { fileMetadatas = new ArrayList<>(); @@ -3106,4 +3111,18 @@ public void setFileAccessRequest(boolean fileAccessRequest) { public boolean isHasPublicStore() { return settingsWrapper.isTrueForKey(SettingsServiceBean.Key.PublicInstall, StorageIO.isPublicStore(dataset.getEffectiveStorageDriverId())); } + + public String getWebloaderUrlForDataset(Dataset d) { + String localeCode = session.getLocaleCode(); + User user = session.getUser(); + if (user instanceof AuthenticatedUser) { + ApiToken apiToken = authService.getValidApiTokenForUser((AuthenticatedUser) user); + return WebloaderUtil.getWebloaderUrl(d, apiToken, localeCode, + settingsService.getValueForKey(SettingsServiceBean.Key.WebloaderUrl)); + } else { + // Shouldn't normally happen (seesion timeout? bug?) + logger.warning("getWebloaderUrlForDataset called for non-Authenticated user"); + return null; + } + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/EjbDataverseEngine.java b/src/main/java/edu/harvard/iq/dataverse/EjbDataverseEngine.java index b4efe7ec41d..bad8903c091 100644 --- a/src/main/java/edu/harvard/iq/dataverse/EjbDataverseEngine.java +++ b/src/main/java/edu/harvard/iq/dataverse/EjbDataverseEngine.java @@ -18,16 +18,16 @@ import edu.harvard.iq.dataverse.engine.command.exception.PermissionException; import edu.harvard.iq.dataverse.ingest.IngestServiceBean; import edu.harvard.iq.dataverse.pidproviders.FakePidProviderServiceBean; +import edu.harvard.iq.dataverse.pidproviders.PermaLinkPidProviderServiceBean; import edu.harvard.iq.dataverse.privateurl.PrivateUrlServiceBean; import edu.harvard.iq.dataverse.search.IndexBatchServiceBean; import edu.harvard.iq.dataverse.search.IndexServiceBean; import edu.harvard.iq.dataverse.search.SearchServiceBean; import java.util.Map; -import java.util.Map.Entry; import java.util.Set; -import javax.ejb.EJB; -import javax.ejb.Stateless; -import javax.inject.Named; +import jakarta.ejb.EJB; +import jakarta.ejb.Stateless; +import jakarta.inject.Named; import edu.harvard.iq.dataverse.search.SolrIndexServiceBean; import edu.harvard.iq.dataverse.search.savedsearch.SavedSearchServiceBean; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; @@ -40,16 +40,16 @@ import java.util.Stack; import java.util.logging.Level; import java.util.logging.Logger; -import javax.annotation.Resource; -import javax.ejb.EJBContext; -import javax.ejb.EJBException; -import javax.ejb.TransactionAttribute; -import static javax.ejb.TransactionAttributeType.REQUIRES_NEW; -import static javax.ejb.TransactionAttributeType.SUPPORTS; -import javax.persistence.EntityManager; -import javax.persistence.PersistenceContext; -import javax.validation.ConstraintViolation; -import javax.validation.ConstraintViolationException; +import jakarta.annotation.Resource; +import jakarta.ejb.EJBContext; +import jakarta.ejb.EJBException; +import jakarta.ejb.TransactionAttribute; +import static jakarta.ejb.TransactionAttributeType.REQUIRES_NEW; +import static jakarta.ejb.TransactionAttributeType.SUPPORTS; +import jakarta.persistence.EntityManager; +import jakarta.persistence.PersistenceContext; +import jakarta.validation.ConstraintViolation; +import jakarta.validation.ConstraintViolationException; /** * An EJB capable of executing {@link Command}s in a JEE environment. @@ -124,6 +124,9 @@ public class EjbDataverseEngine { @EJB HandlenetServiceBean handleNet; + @EJB + PermaLinkPidProviderServiceBean permaLinkProvider; + @EJB SettingsServiceBean settings; @@ -496,6 +499,11 @@ public HandlenetServiceBean handleNet() { return handleNet; } + @Override + public PermaLinkPidProviderServiceBean permaLinkProvider() { + return permaLinkProvider; + } + @Override public SettingsServiceBean settings() { return settings; diff --git a/src/main/java/edu/harvard/iq/dataverse/EjbDataverseEngineInner.java b/src/main/java/edu/harvard/iq/dataverse/EjbDataverseEngineInner.java index d8339dce856..891fe91dc66 100644 --- a/src/main/java/edu/harvard/iq/dataverse/EjbDataverseEngineInner.java +++ b/src/main/java/edu/harvard/iq/dataverse/EjbDataverseEngineInner.java @@ -4,13 +4,13 @@ import edu.harvard.iq.dataverse.engine.command.Command; import edu.harvard.iq.dataverse.engine.command.CommandContext; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; -import javax.annotation.Resource; -import javax.ejb.EJBContext; -import javax.ejb.Stateless; -import javax.ejb.TransactionAttribute; -import static javax.ejb.TransactionAttributeType.REQUIRED; +import jakarta.annotation.Resource; +import jakarta.ejb.EJBContext; +import jakarta.ejb.Stateless; +import jakarta.ejb.TransactionAttribute; +import static jakarta.ejb.TransactionAttributeType.REQUIRED; -import javax.inject.Named; +import jakarta.inject.Named; /** * diff --git a/src/main/java/edu/harvard/iq/dataverse/Embargo.java b/src/main/java/edu/harvard/iq/dataverse/Embargo.java index eac83edd296..29959b9f2d4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/Embargo.java +++ b/src/main/java/edu/harvard/iq/dataverse/Embargo.java @@ -1,7 +1,7 @@ package edu.harvard.iq.dataverse; -import javax.persistence.*; +import jakarta.persistence.*; import edu.harvard.iq.dataverse.util.BundleUtil; diff --git a/src/main/java/edu/harvard/iq/dataverse/EmbargoServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/EmbargoServiceBean.java index afbeab404c7..d0a8d214959 100644 --- a/src/main/java/edu/harvard/iq/dataverse/EmbargoServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/EmbargoServiceBean.java @@ -3,12 +3,12 @@ import edu.harvard.iq.dataverse.actionlogging.ActionLogRecord; import edu.harvard.iq.dataverse.actionlogging.ActionLogServiceBean; -import javax.ejb.EJB; -import javax.ejb.Stateless; -import javax.inject.Named; -import javax.persistence.EntityManager; -import javax.persistence.PersistenceContext; -import javax.persistence.Query; +import jakarta.ejb.EJB; +import jakarta.ejb.Stateless; +import jakarta.inject.Named; +import jakarta.persistence.EntityManager; +import jakarta.persistence.PersistenceContext; +import jakarta.persistence.Query; import java.util.List; /** diff --git a/src/main/java/edu/harvard/iq/dataverse/ExternalVocabularyValue.java b/src/main/java/edu/harvard/iq/dataverse/ExternalVocabularyValue.java index 3618da79630..7ebfa0302ac 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ExternalVocabularyValue.java +++ b/src/main/java/edu/harvard/iq/dataverse/ExternalVocabularyValue.java @@ -9,13 +9,13 @@ import java.io.Serializable; import java.sql.Timestamp; import java.util.Objects; -import javax.persistence.Column; -import javax.persistence.Entity; -import javax.persistence.GeneratedValue; -import javax.persistence.GenerationType; -import javax.persistence.Id; -import javax.persistence.Index; -import javax.persistence.Table; +import jakarta.persistence.Column; +import jakarta.persistence.Entity; +import jakarta.persistence.GeneratedValue; +import jakarta.persistence.GenerationType; +import jakarta.persistence.Id; +import jakarta.persistence.Index; +import jakarta.persistence.Table; /** * diff --git a/src/main/java/edu/harvard/iq/dataverse/FacetConverter.java b/src/main/java/edu/harvard/iq/dataverse/FacetConverter.java index 75ef62200bf..fd41315dbc0 100644 --- a/src/main/java/edu/harvard/iq/dataverse/FacetConverter.java +++ b/src/main/java/edu/harvard/iq/dataverse/FacetConverter.java @@ -5,13 +5,13 @@ */ package edu.harvard.iq.dataverse; -import javax.ejb.EJB; -import javax.enterprise.inject.spi.CDI; +import jakarta.ejb.EJB; +import jakarta.enterprise.inject.spi.CDI; -import javax.faces.component.UIComponent; -import javax.faces.context.FacesContext; -import javax.faces.convert.Converter; -import javax.faces.convert.FacesConverter; +import jakarta.faces.component.UIComponent; +import jakarta.faces.context.FacesContext; +import jakarta.faces.convert.Converter; +import jakarta.faces.convert.FacesConverter; /** * diff --git a/src/main/java/edu/harvard/iq/dataverse/FeaturedDataverseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/FeaturedDataverseServiceBean.java index e7362587c36..d4d701cb02f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/FeaturedDataverseServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/FeaturedDataverseServiceBean.java @@ -9,11 +9,11 @@ import java.util.ArrayList; import java.util.List; import java.util.logging.Logger; -import javax.ejb.EJB; -import javax.ejb.Stateless; -import javax.inject.Named; -import javax.persistence.EntityManager; -import javax.persistence.PersistenceContext; +import jakarta.ejb.EJB; +import jakarta.ejb.Stateless; +import jakarta.inject.Named; +import jakarta.persistence.EntityManager; +import jakarta.persistence.PersistenceContext; /** * diff --git a/src/main/java/edu/harvard/iq/dataverse/FileAccessRequest.java b/src/main/java/edu/harvard/iq/dataverse/FileAccessRequest.java new file mode 100644 index 00000000000..6f68815c2ca --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/FileAccessRequest.java @@ -0,0 +1,91 @@ +package edu.harvard.iq.dataverse; + +import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; + +import jakarta.persistence.Column; +import jakarta.persistence.Embeddable; +import jakarta.persistence.EmbeddedId; +import jakarta.persistence.Entity; +import jakarta.persistence.JoinColumn; +import jakarta.persistence.ManyToOne; +import jakarta.persistence.MapsId; +import jakarta.persistence.Table; +import jakarta.persistence.Temporal; +import jakarta.persistence.TemporalType; +import java.io.Serializable; +import java.util.Date; + +@Entity +@Table(name = "fileaccessrequests") +public class FileAccessRequest { + @EmbeddedId + private FileAccessRequestKey id; + @ManyToOne + @MapsId("dataFile") + @JoinColumn(name = "datafile_id") + private DataFile dataFile; + @ManyToOne + @MapsId("authenticatedUser") + @JoinColumn(name = "authenticated_user_id") + private AuthenticatedUser authenticatedUser; + + @Temporal(value = TemporalType.TIMESTAMP) + @Column(name = "creation_time") + private Date creationTime; + + public FileAccessRequestKey getId() { + return id; + } + + public void setId(FileAccessRequestKey id) { + this.id = id; + } + + public DataFile getDataFile() { + return dataFile; + } + + public void setDataFile(DataFile dataFile) { + this.dataFile = dataFile; + } + + public AuthenticatedUser getAuthenticatedUser() { + return authenticatedUser; + } + + public void setAuthenticatedUser(AuthenticatedUser authenticatedUser) { + this.authenticatedUser = authenticatedUser; + } + + public Date getCreationTime() { + return creationTime; + } + + public void setCreationTime(Date creationTime) { + this.creationTime = creationTime; + } + + @Embeddable + public static class FileAccessRequestKey implements Serializable { + @Column(name = "datafile_id") + private Long dataFile; + @Column(name = "authenticated_user_id") + private Long authenticatedUser; + + public Long getDataFile() { + return dataFile; + } + + public void setDataFile(Long dataFile) { + this.dataFile = dataFile; + } + + public Long getAuthenticatedUser() { + return authenticatedUser; + } + + public void setAuthenticatedUser(Long authenticatedUser) { + this.authenticatedUser = authenticatedUser; + } + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/FileDirectoryNameValidator.java b/src/main/java/edu/harvard/iq/dataverse/FileDirectoryNameValidator.java index e0c2b83ab65..84c033afcaf 100644 --- a/src/main/java/edu/harvard/iq/dataverse/FileDirectoryNameValidator.java +++ b/src/main/java/edu/harvard/iq/dataverse/FileDirectoryNameValidator.java @@ -7,8 +7,8 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; -import javax.validation.ConstraintValidator; -import javax.validation.ConstraintValidatorContext; +import jakarta.validation.ConstraintValidator; +import jakarta.validation.ConstraintValidatorContext; /** * diff --git a/src/main/java/edu/harvard/iq/dataverse/FileDownload.java b/src/main/java/edu/harvard/iq/dataverse/FileDownload.java index fad03d2a0a1..a79281f71f0 100644 --- a/src/main/java/edu/harvard/iq/dataverse/FileDownload.java +++ b/src/main/java/edu/harvard/iq/dataverse/FileDownload.java @@ -6,18 +6,18 @@ package edu.harvard.iq.dataverse; import java.io.Serializable; -import javax.persistence.Entity; -import javax.persistence.GeneratedValue; -import javax.persistence.GenerationType; -import javax.persistence.Id; -import javax.persistence.Temporal; -import javax.persistence.TemporalType; -import javax.persistence.Transient; -import javax.persistence.CascadeType; -import javax.persistence.OneToOne; -import javax.persistence.MapsId; -import javax.persistence.FetchType; -import javax.persistence.JoinColumn; +import jakarta.persistence.Entity; +import jakarta.persistence.GeneratedValue; +import jakarta.persistence.GenerationType; +import jakarta.persistence.Id; +import jakarta.persistence.Temporal; +import jakarta.persistence.TemporalType; +import jakarta.persistence.Transient; +import jakarta.persistence.CascadeType; +import jakarta.persistence.OneToOne; +import jakarta.persistence.MapsId; +import jakarta.persistence.FetchType; +import jakarta.persistence.JoinColumn; import java.util.Date; diff --git a/src/main/java/edu/harvard/iq/dataverse/FileDownloadHelper.java b/src/main/java/edu/harvard/iq/dataverse/FileDownloadHelper.java index ef7ed1a2010..c4b4978e0f8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/FileDownloadHelper.java +++ b/src/main/java/edu/harvard/iq/dataverse/FileDownloadHelper.java @@ -18,10 +18,10 @@ import java.util.List; import java.util.Map; import java.util.logging.Logger; -import javax.ejb.EJB; -import javax.faces.view.ViewScoped; -import javax.inject.Inject; -import javax.inject.Named; +import jakarta.ejb.EJB; +import jakarta.faces.view.ViewScoped; +import jakarta.inject.Inject; +import jakarta.inject.Named; import org.primefaces.PrimeFaces; //import org.primefaces.context.RequestContext; @@ -324,13 +324,12 @@ public void requestAccessIndirect() { private boolean processRequestAccess(DataFile file, Boolean sendNotification) { if (fileDownloadService.requestAccess(file.getId())) { // update the local file object so that the page properly updates - if(file.getFileAccessRequesters() == null){ - file.setFileAccessRequesters(new ArrayList()); - } - file.getFileAccessRequesters().add((AuthenticatedUser) session.getUser()); + AuthenticatedUser user = (AuthenticatedUser) session.getUser(); + file.addFileAccessRequester(user); + // create notification if necessary if (sendNotification) { - fileDownloadService.sendRequestFileAccessNotification(file, (AuthenticatedUser) session.getUser()); + fileDownloadService.sendRequestFileAccessNotification(file, user); } JsfHelper.addSuccessMessage(BundleUtil.getStringFromBundle("file.accessRequested.success")); return true; diff --git a/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java index 65e6b259bf4..e2b07717358 100644 --- a/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java @@ -18,7 +18,6 @@ import edu.harvard.iq.dataverse.privateurl.PrivateUrl; import edu.harvard.iq.dataverse.privateurl.PrivateUrlServiceBean; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; -import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.StringUtil; import java.io.IOException; @@ -29,16 +28,16 @@ import java.util.List; import java.util.UUID; import java.util.logging.Logger; -import javax.ejb.EJB; -import javax.ejb.Stateless; -import javax.faces.context.FacesContext; -import javax.inject.Inject; -import javax.inject.Named; -import javax.persistence.EntityManager; -import javax.persistence.PersistenceContext; -import javax.persistence.Query; -import javax.servlet.ServletOutputStream; -import javax.servlet.http.HttpServletResponse; +import jakarta.ejb.EJB; +import jakarta.ejb.Stateless; +import jakarta.faces.context.FacesContext; +import jakarta.inject.Inject; +import jakarta.inject.Named; +import jakarta.persistence.EntityManager; +import jakarta.persistence.PersistenceContext; +import jakarta.persistence.Query; +import jakarta.servlet.ServletOutputStream; +import jakarta.servlet.http.HttpServletResponse; import org.primefaces.PrimeFaces; //import org.primefaces.context.RequestContext; @@ -299,7 +298,7 @@ public void explore(GuestbookResponse guestbookResponse, FileMetadata fmd, Exter ApiToken apiToken = null; User user = session.getUser(); DatasetVersion version = fmd.getDatasetVersion(); - if (version.isDraft() || (fmd.getDataFile().isRestricted()) || (FileUtil.isActivelyEmbargoed(fmd))) { + if (version.isDraft() || fmd.getDatasetVersion().isDeaccessioned() || (fmd.getDataFile().isRestricted()) || (FileUtil.isActivelyEmbargoed(fmd))) { apiToken = getApiToken(user); } DataFile dataFile = null; @@ -489,7 +488,7 @@ public boolean requestAccess(Long fileId) { return false; } DataFile file = datafileService.find(fileId); - if (!file.getFileAccessRequesters().contains((AuthenticatedUser)session.getUser())) { + if (!file.containsFileAccessRequestFromUser(session.getUser())) { try { commandEngine.submit(new RequestAccessCommand(dvRequestService.getDataverseRequest(), file)); return true; diff --git a/src/main/java/edu/harvard/iq/dataverse/FileMetadata.java b/src/main/java/edu/harvard/iq/dataverse/FileMetadata.java index 6262b6204f4..461c8b14e46 100644 --- a/src/main/java/edu/harvard/iq/dataverse/FileMetadata.java +++ b/src/main/java/edu/harvard/iq/dataverse/FileMetadata.java @@ -13,30 +13,33 @@ import java.sql.Timestamp; import java.util.ArrayList; import java.util.Collection; +import java.util.Collections; import java.util.Comparator; import java.util.Date; +import java.util.HashMap; import java.util.LinkedList; import java.util.List; +import java.util.Map; import java.util.logging.Level; import java.util.logging.Logger; -import javax.json.Json; -import javax.json.JsonArrayBuilder; -import javax.persistence.Column; -import javax.persistence.Entity; -import javax.persistence.GeneratedValue; -import javax.persistence.GenerationType; -import javax.persistence.CascadeType; -import javax.persistence.Id; -import javax.persistence.Index; -import javax.persistence.JoinColumn; -import javax.persistence.JoinTable; -import javax.persistence.ManyToMany; -import javax.persistence.ManyToOne; -import javax.persistence.OneToMany; -import javax.persistence.OrderBy; -import javax.persistence.Table; -import javax.persistence.Transient; -import javax.persistence.Version; +import jakarta.json.Json; +import jakarta.json.JsonArrayBuilder; +import jakarta.persistence.Column; +import jakarta.persistence.Entity; +import jakarta.persistence.GeneratedValue; +import jakarta.persistence.GenerationType; +import jakarta.persistence.CascadeType; +import jakarta.persistence.Id; +import jakarta.persistence.Index; +import jakarta.persistence.JoinColumn; +import jakarta.persistence.JoinTable; +import jakarta.persistence.ManyToMany; +import jakarta.persistence.ManyToOne; +import jakarta.persistence.OneToMany; +import jakarta.persistence.OrderBy; +import jakarta.persistence.Table; +import jakarta.persistence.Transient; +import jakarta.persistence.Version; import edu.harvard.iq.dataverse.datavariable.CategoryMetadata; import edu.harvard.iq.dataverse.datavariable.DataVariable; @@ -46,12 +49,12 @@ import edu.harvard.iq.dataverse.util.StringUtil; import java.util.HashSet; import java.util.Set; -import javax.validation.ConstraintViolation; -import javax.validation.Validation; -import javax.validation.Validator; -import javax.validation.ValidatorFactory; +import jakarta.validation.ConstraintViolation; +import jakarta.validation.Validation; +import jakarta.validation.Validator; +import jakarta.validation.ValidatorFactory; import org.hibernate.validator.constraints.NotBlank; -import javax.validation.constraints.Pattern; +import jakarta.validation.constraints.Pattern; /** @@ -203,6 +206,25 @@ public void setVarGroups(List varGroups) { private List fileCategories; public List getCategories() { + if (fileCategories != null) { + /* + * fileCategories can sometimes be an + * org.eclipse.persistence.indirection.IndirectList When that happens, the + * comparator in the Collections.sort below is not called, possibly due to + * https://bugs.eclipse.org/bugs/show_bug.cgi?id=446236 which is Java 1.8+ + * specific Converting to an ArrayList solves the problem, but the longer term + * solution may be in avoiding the IndirectList or moving to a new version of + * the jar it is in. + */ + if (!(fileCategories instanceof ArrayList)) { + List newDFCs = new ArrayList(); + for (DataFileCategory fdc : fileCategories) { + newDFCs.add(fdc); + } + setCategories(newDFCs); + } + Collections.sort(fileCategories, FileMetadata.compareByNameWithSortCategories); + } return fileCategories; } @@ -228,7 +250,7 @@ public List getCategoriesByName() { return ret; } - for (DataFileCategory fileCategory : fileCategories) { + for (DataFileCategory fileCategory : getCategories()) { ret.add(fileCategory.getName()); } // fileCategories.stream() @@ -237,7 +259,6 @@ public List getCategoriesByName() { return ret; } - public JsonArrayBuilder getCategoryNamesAsJsonArrayBuilder() { JsonArrayBuilder builder = Json.createArrayBuilder(); @@ -537,7 +558,7 @@ public boolean compareContent(FileMetadata other){ @Override public String toString() { - return "edu.harvard.iq.dvn.core.study.FileMetadata[id=" + id + "]"; + return "edu.harvard.iq.dataverse.FileMetadata[id=" + id + "]"; } public static final Comparator compareByLabel = new Comparator() { @@ -547,28 +568,37 @@ public int compare(FileMetadata o1, FileMetadata o2) { } }; - public static final Comparator compareByLabelAndFolder = new Comparator() { + static Map categoryMap=null; + + public static void setCategorySortOrder(String categories) { + categoryMap=new HashMap(); + long i=1; + for(String cat: categories.split(",\\s*")) { + categoryMap.put(cat.toUpperCase(), i); + i++; + } + } + + public static Map getCategorySortOrder() { + return categoryMap; + } + + + public static final Comparator compareByNameWithSortCategories = new Comparator() { @Override - public int compare(FileMetadata o1, FileMetadata o2) { - String folder1 = o1.getDirectoryLabel() == null ? "" : o1.getDirectoryLabel().toUpperCase(); - String folder2 = o2.getDirectoryLabel() == null ? "" : o2.getDirectoryLabel().toUpperCase(); - - - // We want to the files w/ no folders appear *after* all the folders - // on the sorted list: - if ("".equals(folder1) && !"".equals(folder2)) { - return 1; - } - - if ("".equals(folder2) && !"".equals(folder1)) { - return -1; - } - - int comp = folder1.compareTo(folder2); - if (comp != 0) { - return comp; + public int compare(DataFileCategory o1, DataFileCategory o2) { + if (categoryMap != null) { + //If one is in the map and one is not, the former is first, otherwise sort by name + boolean o1InMap = categoryMap.containsKey(o1.getName().toUpperCase()); + boolean o2InMap = categoryMap.containsKey(o2.getName().toUpperCase()); + if(o1InMap && !o2InMap) { + return (-1); + } + if(!o1InMap && o2InMap) { + return 1; + } } - return o1.getLabel().toUpperCase().compareTo(o2.getLabel().toUpperCase()); + return(o1.getName().toUpperCase().compareTo(o2.getName().toUpperCase())); } }; diff --git a/src/main/java/edu/harvard/iq/dataverse/FilePage.java b/src/main/java/edu/harvard/iq/dataverse/FilePage.java index 7f2c6dfca5c..49c904c3ac3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/FilePage.java +++ b/src/main/java/edu/harvard/iq/dataverse/FilePage.java @@ -22,16 +22,14 @@ import edu.harvard.iq.dataverse.engine.command.impl.PersistProvFreeFormCommand; import edu.harvard.iq.dataverse.engine.command.impl.RestrictFileCommand; import edu.harvard.iq.dataverse.engine.command.impl.UpdateDatasetVersionCommand; -import edu.harvard.iq.dataverse.export.ExportException; import edu.harvard.iq.dataverse.export.ExportService; -import edu.harvard.iq.dataverse.export.spi.Exporter; +import io.gdcc.spi.export.ExportException; +import io.gdcc.spi.export.Exporter; import edu.harvard.iq.dataverse.externaltools.ExternalTool; import edu.harvard.iq.dataverse.externaltools.ExternalToolHandler; import edu.harvard.iq.dataverse.externaltools.ExternalToolServiceBean; import edu.harvard.iq.dataverse.makedatacount.MakeDataCountLoggingServiceBean; import edu.harvard.iq.dataverse.makedatacount.MakeDataCountLoggingServiceBean.MakeDataCountEntry; -import edu.harvard.iq.dataverse.makedatacount.MakeDataCountUtil; -import edu.harvard.iq.dataverse.privateurl.PrivateUrl; import edu.harvard.iq.dataverse.privateurl.PrivateUrlServiceBean; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.BundleUtil; @@ -39,8 +37,8 @@ import edu.harvard.iq.dataverse.util.JsfHelper; import static edu.harvard.iq.dataverse.util.JsfHelper.JH; import edu.harvard.iq.dataverse.util.SystemConfig; + import java.io.IOException; -import java.time.format.DateTimeFormatter; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -48,16 +46,19 @@ import java.util.List; import java.util.Set; import java.util.logging.Logger; -import javax.ejb.EJB; -import javax.ejb.EJBException; -import javax.faces.application.FacesMessage; -import javax.faces.component.UIComponent; -import javax.faces.context.FacesContext; -import javax.faces.validator.ValidatorException; -import javax.faces.view.ViewScoped; -import javax.inject.Inject; -import javax.inject.Named; -import javax.validation.ConstraintViolation; +import jakarta.ejb.EJB; +import jakarta.ejb.EJBException; +import jakarta.faces.application.FacesMessage; +import jakarta.faces.component.UIComponent; +import jakarta.faces.context.FacesContext; +import jakarta.faces.validator.ValidatorException; +import jakarta.faces.view.ViewScoped; +import jakarta.inject.Inject; +import jakarta.inject.Named; +import jakarta.json.JsonArray; +import jakarta.json.JsonObject; +import jakarta.json.JsonValue; +import jakarta.validation.ConstraintViolation; import org.primefaces.PrimeFaces; import org.primefaces.component.tabview.TabView; @@ -76,6 +77,7 @@ public class FilePage implements java.io.Serializable { private FileMetadata fileMetadata; private Long fileId; private String version; + private String toolType; private DataFile file; private GuestbookResponse guestbookResponse; private int selectedTabIndex; @@ -87,6 +89,7 @@ public class FilePage implements java.io.Serializable { private List configureTools; private List exploreTools; private List toolsWithPreviews; + private List queryTools; private Long datasetVersionId; /** * Have the terms been met so that the Preview tab can show the preview? @@ -125,6 +128,8 @@ public class FilePage implements java.io.Serializable { ExternalToolServiceBean externalToolService; @EJB PrivateUrlServiceBean privateUrlService; + @EJB + AuxiliaryFileServiceBean auxiliaryFileService; @Inject DataverseRequestServiceBean dvRequestService; @@ -146,7 +151,6 @@ public String init() { if (fileId != null || persistentId != null) { - // --------------------------------------- // Set the file and datasetVersion // --------------------------------------- @@ -236,13 +240,28 @@ public String init() { } configureTools = externalToolService.findFileToolsByTypeAndContentType(ExternalTool.Type.CONFIGURE, contentType); exploreTools = externalToolService.findFileToolsByTypeAndContentType(ExternalTool.Type.EXPLORE, contentType); + queryTools = externalToolService.findFileToolsByTypeAndContentType(ExternalTool.Type.QUERY, contentType); Collections.sort(exploreTools, CompareExternalToolName); toolsWithPreviews = sortExternalTools(); - if(!toolsWithPreviews.isEmpty()){ - setSelectedTool(toolsWithPreviews.get(0)); + + if (toolType != null) { + if (toolType.equals("PREVIEW")) { + if (!toolsWithPreviews.isEmpty()) { + setSelectedTool(toolsWithPreviews.get(0)); + } + } + if (toolType.equals("QUERY")) { + if (!queryTools.isEmpty()) { + setSelectedTool(queryTools.get(0)); + } + } + } else { + if (!getAllAvailableTools().isEmpty()){ + setSelectedTool(getAllAvailableTools().get(0)); + } } - } else { + } else { return permissionsWrapper.notFound(); } @@ -260,10 +279,19 @@ public String init() { private void displayPublishMessage(){ if (fileMetadata.getDatasetVersion().isDraft() && canUpdateDataset() && (canPublishDataset() || !fileMetadata.getDatasetVersion().getDataset().isLockedFor(DatasetLock.Reason.InReview))){ - JsfHelper.addWarningMessage(datasetService.getReminderString(fileMetadata.getDatasetVersion().getDataset(), canPublishDataset(), true)); + JsfHelper.addWarningMessage(datasetService.getReminderString(fileMetadata.getDatasetVersion().getDataset(), canPublishDataset(), true, isValid())); } } + public boolean isValid() { + if (!fileMetadata.getDatasetVersion().isDraft()) { + return true; + } + DatasetVersion newVersion = fileMetadata.getDatasetVersion().cloneDatasetVersion(); + newVersion.setDatasetFields(newVersion.initDatasetFields()); + return newVersion.isValid(); + } + private boolean canViewUnpublishedDataset() { return permissionsWrapper.canViewUnpublishedDataset( dvRequestService.getDataverseRequest(), fileMetadata.getDatasetVersion().getDataset()); } @@ -285,8 +313,15 @@ public void setDatasetVersionId(Long datasetVersionId) { this.datasetVersionId = datasetVersionId; } + // findPreviewTools would be a better name private List sortExternalTools(){ - List retList = externalToolService.findFileToolsByTypeAndContentType(ExternalTool.Type.PREVIEW, file.getContentType()); + List retList = new ArrayList<>(); + List previewTools = externalToolService.findFileToolsByTypeAndContentType(ExternalTool.Type.PREVIEW, file.getContentType()); + for (ExternalTool previewTool : previewTools) { + if (externalToolService.meetsRequirements(previewTool, file)) { + retList.add(previewTool); + } + } Collections.sort(retList, CompareExternalToolName); return retList; } @@ -351,9 +386,9 @@ public List< String[]> getExporters(){ // Not all metadata exports should be presented to the web users! // Some are only for harvesting clients. - String[] temp = new String[2]; + String[] temp = new String[2]; temp[0] = formatDisplayName; - temp[1] = myHostURL + "/api/datasets/export?exporter=" + formatName + "&persistentId=" + fileMetadata.getDatasetVersion().getDataset().getGlobalIdString(); + temp[1] = myHostURL + "/api/datasets/export?exporter=" + formatName + "&persistentId=" + fileMetadata.getDatasetVersion().getDataset().getGlobalId().asString(); retList.add(temp); } } @@ -365,7 +400,7 @@ public String saveProvFreeform(String freeformTextInput, DataFile dataFileFromPo file.setProvEntityName(dataFileFromPopup.getProvEntityName()); //passing this value into the file being saved here is pretty hacky. Command cmd; - for (FileMetadata fmw : editDataset.getEditVersion().getFileMetadatas()) { + for (FileMetadata fmw : editDataset.getOrCreateEditVersion().getFileMetadatas()) { if (fmw.getDataFile().equals(this.fileMetadata.getDataFile())) { cmd = new PersistProvFreeFormCommand(dvRequestService.getDataverseRequest(), file, freeformTextInput); commandEngine.submit(cmd); @@ -381,15 +416,15 @@ public String restrictFile(boolean restricted) throws CommandException{ String fileNames = null; editDataset = this.file.getOwner(); if (restricted) { // get values from access popup - editDataset.getEditVersion().getTermsOfUseAndAccess().setTermsOfAccess(termsOfAccess); - editDataset.getEditVersion().getTermsOfUseAndAccess().setFileAccessRequest(fileAccessRequest); + editDataset.getOrCreateEditVersion().getTermsOfUseAndAccess().setTermsOfAccess(termsOfAccess); + editDataset.getOrCreateEditVersion().getTermsOfUseAndAccess().setFileAccessRequest(fileAccessRequest); } //using this method to update the terms for datasets that are out of compliance // with Terms of Access requirement - may get her with a file that is already restricted // we'll allow it try { Command cmd; - for (FileMetadata fmw : editDataset.getEditVersion().getFileMetadatas()) { + for (FileMetadata fmw : editDataset.getOrCreateEditVersion().getFileMetadatas()) { if (fmw.getDataFile().equals(this.fileMetadata.getDataFile())) { fileNames += fmw.getLabel(); cmd = new RestrictFileCommand(fmw.getDataFile(), dvRequestService.getDataverseRequest(), restricted); @@ -424,7 +459,7 @@ public String deleteFile() { FileMetadata markedForDelete = null; - for (FileMetadata fmd : editDataset.getEditVersion().getFileMetadatas()) { + for (FileMetadata fmd : editDataset.getOrCreateEditVersion().getFileMetadatas()) { if (fmd.getDataFile().getId().equals(fileId)) { markedForDelete = fmd; @@ -435,17 +470,17 @@ public String deleteFile() { // the file already exists as part of this dataset // so all we remove is the file from the fileMetadatas (for display) // and let the delete be handled in the command (by adding it to the filesToBeDeleted list - editDataset.getEditVersion().getFileMetadatas().remove(markedForDelete); + editDataset.getOrCreateEditVersion().getFileMetadatas().remove(markedForDelete); filesToBeDeleted.add(markedForDelete); } else { List filesToKeep = new ArrayList<>(); - for (FileMetadata fmo : editDataset.getEditVersion().getFileMetadatas()) { + for (FileMetadata fmo : editDataset.getOrCreateEditVersion().getFileMetadatas()) { if (!fmo.getDataFile().getId().equals(this.getFile().getId())) { filesToKeep.add(fmo); } } - editDataset.getEditVersion().setFileMetadatas(filesToKeep); + editDataset.getOrCreateEditVersion().setFileMetadatas(filesToKeep); } fileDeleteInProgress = true; @@ -612,7 +647,7 @@ public void setTermsMet(boolean termsMet) { public String save() { // Validate - Set constraintViolations = editDataset.getEditVersion().validate(); + Set constraintViolations = editDataset.getOrCreateEditVersion().validate(); if (!constraintViolations.isEmpty()) { //JsfHelper.addFlashMessage(JH.localize("dataset.message.validationError")); fileDeleteInProgress = false; @@ -629,7 +664,7 @@ public String save() { if (!filesToBeDeleted.isEmpty()) { // We want to delete the file (there's always only one file with this page) - editDataset.getEditVersion().getFileMetadatas().remove(filesToBeDeleted.get(0)); + editDataset.getOrCreateEditVersion().getFileMetadatas().remove(filesToBeDeleted.get(0)); deleteFileId = filesToBeDeleted.get(0).getDataFile().getId(); deleteStorageLocation = datafileService.getPhysicalFileToDelete(filesToBeDeleted.get(0).getDataFile()); } @@ -713,7 +748,7 @@ public boolean isThumbnailAvailable(FileMetadata fileMetadata) { private String returnToDatasetOnly(){ - return "/dataset.xhtml?persistentId=" + editDataset.getGlobalIdString() + "&version=DRAFT" + "&faces-redirect=true"; + return "/dataset.xhtml?persistentId=" + editDataset.getGlobalId().asString() + "&version=DRAFT" + "&faces-redirect=true"; } private String returnToDraftVersion(){ @@ -845,9 +880,9 @@ public String getComputeUrl() throws IOException { swiftObject.open(); //generate a temp url for a file if (isHasPublicStore()) { - return settingsService.getValueForKey(SettingsServiceBean.Key.ComputeBaseUrl) + "?" + this.getFile().getOwner().getGlobalIdString() + "=" + swiftObject.getSwiftFileName(); + return settingsService.getValueForKey(SettingsServiceBean.Key.ComputeBaseUrl) + "?" + this.getFile().getOwner().getGlobalId().asString() + "=" + swiftObject.getSwiftFileName(); } - return settingsService.getValueForKey(SettingsServiceBean.Key.ComputeBaseUrl) + "?" + this.getFile().getOwner().getGlobalIdString() + "=" + swiftObject.getSwiftFileName() + "&temp_url_sig=" + swiftObject.getTempUrlSignature() + "&temp_url_expires=" + swiftObject.getTempUrlExpiry(); + return settingsService.getValueForKey(SettingsServiceBean.Key.ComputeBaseUrl) + "?" + this.getFile().getOwner().getGlobalId().asString() + "=" + swiftObject.getSwiftFileName() + "&temp_url_sig=" + swiftObject.getTempUrlSignature() + "&temp_url_expires=" + swiftObject.getTempUrlExpiry(); } return ""; } @@ -970,6 +1005,30 @@ public List getToolsWithPreviews() { return toolsWithPreviews; } + public List getQueryTools() { + return queryTools; + } + + + public List getAllAvailableTools(){ + List externalTools = new ArrayList<>(); + externalTools.addAll(queryTools); + for (ExternalTool pt : toolsWithPreviews){ + if (!externalTools.contains(pt)){ + externalTools.add(pt); + } + } + return externalTools; + } + + public String getToolType() { + return toolType; + } + + public void setToolType(String toolType) { + this.toolType = toolType; + } + private ExternalTool selectedTool; public ExternalTool getSelectedTool() { @@ -983,7 +1042,7 @@ public void setSelectedTool(ExternalTool selectedTool) { public String preview(ExternalTool externalTool) { ApiToken apiToken = null; User user = session.getUser(); - if (fileMetadata.getDatasetVersion().isDraft() || (fileMetadata.getDataFile().isRestricted()) || (FileUtil.isActivelyEmbargoed(fileMetadata))) { + if (fileMetadata.getDatasetVersion().isDraft() || fileMetadata.getDatasetVersion().isDeaccessioned() || (fileMetadata.getDataFile().isRestricted()) || (FileUtil.isActivelyEmbargoed(fileMetadata))) { apiToken=fileDownloadService.getApiToken(user); } if(externalTool == null){ @@ -1162,7 +1221,22 @@ public String getEmbargoPhrase() { return BundleUtil.getStringFromBundle("embargoed.willbeuntil"); } } - + + public String getToolTabTitle(){ + if (getAllAvailableTools().size() > 1) { + return BundleUtil.getStringFromBundle("file.toolTab.header"); + } + if( getSelectedTool() != null ){ + if(getSelectedTool().isPreviewTool()){ + return BundleUtil.getStringFromBundle("file.previewTab.header"); + } + if(getSelectedTool().isQueryTool()){ + return BundleUtil.getStringFromBundle("file.queryTab.header"); + } + } + return BundleUtil.getStringFromBundle("file.toolTab.header"); + } + public String getIngestMessage() { return BundleUtil.getStringFromBundle("file.ingestFailed.message", Arrays.asList(settingsWrapper.getGuidesBaseUrl(), settingsWrapper.getGuidesVersion())); } @@ -1172,4 +1246,13 @@ public boolean isHasPublicStore() { return settingsWrapper.isTrueForKey(SettingsServiceBean.Key.PublicInstall, StorageIO.isPublicStore(DataAccess.getStorageDriverFromIdentifier(file.getStorageIdentifier()))); } + /** + * This method only exists because in file-edit-button-fragment.xhtml we + * call bean.editFileMetadata() and we need both FilePage (this bean) and + * DatasetPage to have the method defined to prevent errors in server.log. + */ + public String editFileMetadata(){ + return ""; + } + } diff --git a/src/main/java/edu/harvard/iq/dataverse/FileUploadRenderer.java b/src/main/java/edu/harvard/iq/dataverse/FileUploadRenderer.java index 5e73ef65f25..ce3b0d65875 100644 --- a/src/main/java/edu/harvard/iq/dataverse/FileUploadRenderer.java +++ b/src/main/java/edu/harvard/iq/dataverse/FileUploadRenderer.java @@ -6,8 +6,8 @@ package edu.harvard.iq.dataverse; -import javax.faces.component.UIComponent; -import javax.faces.context.FacesContext; +import jakarta.faces.component.UIComponent; +import jakarta.faces.context.FacesContext; /** * diff --git a/src/main/java/edu/harvard/iq/dataverse/ForeignMetadataFieldMapping.java b/src/main/java/edu/harvard/iq/dataverse/ForeignMetadataFieldMapping.java index 40d219d2638..db83ab953a1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ForeignMetadataFieldMapping.java +++ b/src/main/java/edu/harvard/iq/dataverse/ForeignMetadataFieldMapping.java @@ -3,13 +3,8 @@ package edu.harvard.iq.dataverse; import java.io.Serializable; -import javax.persistence.*; +import jakarta.persistence.*; import java.util.Collection; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.TreeMap; /** * diff --git a/src/main/java/edu/harvard/iq/dataverse/ForeignMetadataFormatMapping.java b/src/main/java/edu/harvard/iq/dataverse/ForeignMetadataFormatMapping.java index 0fac75257c8..eb7b97b1a84 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ForeignMetadataFormatMapping.java +++ b/src/main/java/edu/harvard/iq/dataverse/ForeignMetadataFormatMapping.java @@ -7,18 +7,18 @@ package edu.harvard.iq.dataverse; import java.io.Serializable; -import javax.persistence.Entity; -import javax.persistence.GeneratedValue; -import javax.persistence.GenerationType; -import javax.persistence.Id; +import jakarta.persistence.Entity; +import jakarta.persistence.GeneratedValue; +import jakarta.persistence.GenerationType; +import jakarta.persistence.Id; import java.util.List; -import javax.persistence.CascadeType; -import javax.persistence.Column; -import javax.persistence.Index; -import javax.persistence.NamedQueries; -import javax.persistence.NamedQuery; -import javax.persistence.OneToMany; -import javax.persistence.Table; +import jakarta.persistence.CascadeType; +import jakarta.persistence.Column; +import jakarta.persistence.Index; +import jakarta.persistence.NamedQueries; +import jakarta.persistence.NamedQuery; +import jakarta.persistence.OneToMany; +import jakarta.persistence.Table; /** * diff --git a/src/main/java/edu/harvard/iq/dataverse/GlobalId.java b/src/main/java/edu/harvard/iq/dataverse/GlobalId.java index 20b280771fc..890b146a61c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/GlobalId.java +++ b/src/main/java/edu/harvard/iq/dataverse/GlobalId.java @@ -6,7 +6,7 @@ package edu.harvard.iq.dataverse; -import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import edu.harvard.iq.dataverse.pidproviders.PermaLinkPidProviderServiceBean; import edu.harvard.iq.dataverse.util.BundleUtil; import static edu.harvard.iq.dataverse.util.StringUtil.isEmpty; import java.net.MalformedURLException; @@ -16,7 +16,6 @@ import java.util.Optional; import java.util.regex.Matcher; import java.util.regex.Pattern; -import javax.ejb.EJB; /** * @@ -24,55 +23,28 @@ */ public class GlobalId implements java.io.Serializable { - public static final String DOI_PROTOCOL = "doi"; - public static final String HDL_PROTOCOL = "hdl"; - public static final String DOI_RESOLVER_URL = "https://doi.org/"; - public static final String DXDOI_RESOLVER_URL = "https://dx.doi.org/"; - public static final String HDL_RESOLVER_URL = "https://hdl.handle.net/"; - public static final String HTTP_DOI_RESOLVER_URL = "http://doi.org/"; - public static final String HTTP_DXDOI_RESOLVER_URL = "http://dx.doi.org/"; - public static final String HTTP_HDL_RESOLVER_URL = "http://hdl.handle.net/"; - - public static Optional parse(String identifierString) { - try { - return Optional.of(new GlobalId(identifierString)); - } catch ( IllegalArgumentException _iae) { - return Optional.empty(); - } - } - private static final Logger logger = Logger.getLogger(GlobalId.class.getName()); - - @EJB - SettingsServiceBean settingsService; - /** - * - * @param identifier The string to be parsed - * @throws IllegalArgumentException if the passed string cannot be parsed. - */ - public GlobalId(String identifier) { - // set the protocol, authority, and identifier via parsePersistentId - if ( ! parsePersistentId(identifier) ){ - throw new IllegalArgumentException("Failed to parse identifier: " + identifier); - } - } - - public GlobalId(String protocol, String authority, String identifier) { + public GlobalId(String protocol, String authority, String identifier, String separator, String urlPrefix, String providerName) { this.protocol = protocol; this.authority = authority; this.identifier = identifier; + if(separator!=null) { + this.separator = separator; + } + this.urlPrefix = urlPrefix; + this.managingProviderName = providerName; } - public GlobalId(DvObject dvObject) { - this.authority = dvObject.getAuthority(); - this.protocol = dvObject.getProtocol(); - this.identifier = dvObject.getIdentifier(); - } - + // protocol the identifier system, e.g. "doi" + // authority the namespace that the authority manages in the identifier system + // identifier the local identifier part private String protocol; private String authority; private String identifier; + private String managingProviderName; + private String separator = "/"; + private String urlPrefix; /** * Tests whether {@code this} instance has all the data required for a @@ -87,161 +59,50 @@ public String getProtocol() { return protocol; } - public void setProtocol(String protocol) { - this.protocol = protocol; - } - public String getAuthority() { return authority; } - public void setAuthority(String authority) { - this.authority = authority; - } - public String getIdentifier() { return identifier; } - - public void setIdentifier(String identifier) { - this.identifier = identifier; - } + public String getProvider() { + return managingProviderName; + } + public String toString() { return asString(); } /** - * Returns {@code this}' string representation. Differs from {@link #toString} - * which can also contain debug data, if needed. + * Concatenate the parts that make up a Global Identifier. * - * @return The string representation of this global id. + * @return the Global Identifier, e.g. "doi:10.12345/67890" */ public String asString() { if (protocol == null || authority == null || identifier == null) { return ""; } - return protocol + ":" + authority + "/" + identifier; + return protocol + ":" + authority + separator + identifier; } - public URL toURL() { + public String asURL() { URL url = null; if (identifier == null){ return null; } try { - if (protocol.equals(DOI_PROTOCOL)){ - url = new URL(DOI_RESOLVER_URL + authority + "/" + identifier); - } else if (protocol.equals(HDL_PROTOCOL)){ - url = new URL(HDL_RESOLVER_URL + authority + "/" + identifier); - } + url = new URL(urlPrefix + authority + separator + identifier); + return url.toExternalForm(); } catch (MalformedURLException ex) { logger.log(Level.SEVERE, null, ex); - } - return url; - } - - - /** - * Parse a Persistent Id and set the protocol, authority, and identifier - * - * Example 1: doi:10.5072/FK2/BYM3IW - * protocol: doi - * authority: 10.5072 - * identifier: FK2/BYM3IW - * - * Example 2: hdl:1902.1/111012 - * protocol: hdl - * authority: 1902.1 - * identifier: 111012 - * - * @param identifierString - * @param separator the string that separates the authority from the identifier. - * @param destination the global id that will contain the parsed data. - * @return {@code destination}, after its fields have been updated, or - * {@code null} if parsing failed. - */ - private boolean parsePersistentId(String identifierString) { - - if (identifierString == null) { - return false; - } - int index1 = identifierString.indexOf(':'); - if (index1 > 0) { // ':' found with one or more characters before it - int index2 = identifierString.indexOf('/', index1 + 1); - if (index2 > 0 && (index2 + 1) < identifierString.length()) { // '/' found with one or more characters - // between ':' - protocol = identifierString.substring(0, index1); // and '/' and there are characters after '/' - if (!"doi".equals(protocol) && !"hdl".equals(protocol)) { - return false; - } - //Strip any whitespace, ; and ' from authority (should finding them cause a failure instead?) - authority = formatIdentifierString(identifierString.substring(index1 + 1, index2)); - if(testforNullTerminator(authority)) return false; - if (protocol.equals(DOI_PROTOCOL)) { - if (!this.checkDOIAuthority(authority)) { - return false; - } - } - // Passed all checks - //Strip any whitespace, ; and ' from identifier (should finding them cause a failure instead?) - identifier = formatIdentifierString(identifierString.substring(index2 + 1)); - if(testforNullTerminator(identifier)) return false; - } else { - logger.log(Level.INFO, "Error parsing identifier: {0}: '':/'' not found in string", identifierString); - return false; - } - } else { - logger.log(Level.INFO, "Error parsing identifier: {0}: '':'' not found in string", identifierString); - return false; - } - return true; - } - - private static String formatIdentifierString(String str){ - - if (str == null){ - return null; - } - // remove whitespace, single quotes, and semicolons - return str.replaceAll("\\s+|'|;",""); - - /* - < (%3C) -> (%3E) -{ (%7B) -} (%7D) -^ (%5E) -[ (%5B) -] (%5D) -` (%60) -| (%7C) -\ (%5C) -+ - */ - // http://www.doi.org/doi_handbook/2_Numbering.html - } - - private static boolean testforNullTerminator(String str){ - if(str == null) { - return false; } - return str.indexOf('\u0000') > 0; - } - - private boolean checkDOIAuthority(String doiAuthority){ - - if (doiAuthority==null){ - return false; - } - - if (!(doiAuthority.startsWith("10."))){ - return false; - } - - return true; + return null; } + + /** * Verifies that the pid only contains allowed characters. * @@ -257,26 +118,5 @@ public static boolean verifyImportCharacters(String pidParam) { return m.matches(); } - /** - * Convenience method to get the internal form of a PID string when it may be in - * the https:// or http:// form ToDo -refactor class to allow creating a - * GlobalID from any form (which assures it has valid syntax) and then have methods to get - * the form you want. - * - * @param pidUrlString - a string assumed to be a valid PID in some form - * @return the internal form as a String - */ - public static String getInternalFormOfPID(String pidUrlString) { - String pidString = pidUrlString; - if(pidUrlString.startsWith(GlobalId.DOI_RESOLVER_URL)) { - pidString = pidUrlString.replace(GlobalId.DOI_RESOLVER_URL, (GlobalId.DOI_PROTOCOL + ":")); - } else if(pidUrlString.startsWith(GlobalId.HDL_RESOLVER_URL)) { - pidString = pidUrlString.replace(GlobalId.HDL_RESOLVER_URL, (GlobalId.HDL_PROTOCOL + ":")); - } else if(pidUrlString.startsWith(GlobalId.HTTP_DOI_RESOLVER_URL)) { - pidString = pidUrlString.replace(GlobalId.HTTP_DOI_RESOLVER_URL, (GlobalId.DOI_PROTOCOL + ":")); - } else if(pidUrlString.startsWith(GlobalId.HTTP_HDL_RESOLVER_URL)) { - pidString = pidUrlString.replace(GlobalId.HTTP_HDL_RESOLVER_URL, (GlobalId.HDL_PROTOCOL + ":")); - } - return pidString; - } + } diff --git a/src/main/java/edu/harvard/iq/dataverse/GlobalIdServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/GlobalIdServiceBean.java index 0d64c1050b8..aebf13778c3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/GlobalIdServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/GlobalIdServiceBean.java @@ -2,6 +2,8 @@ import static edu.harvard.iq.dataverse.GlobalIdServiceBean.logger; import edu.harvard.iq.dataverse.engine.command.CommandContext; +import edu.harvard.iq.dataverse.pidproviders.PermaLinkPidProviderServiceBean; +import edu.harvard.iq.dataverse.pidproviders.PidUtil; import edu.harvard.iq.dataverse.settings.SettingsServiceBean.Key; import java.util.*; @@ -13,11 +15,28 @@ public interface GlobalIdServiceBean { static final Logger logger = Logger.getLogger(GlobalIdServiceBean.class.getCanonicalName()); - boolean alreadyExists(DvObject dvo) throws Exception; + boolean alreadyRegistered(DvObject dvo) throws Exception; + + /** + * This call reports whether a PID is registered with the external Provider + * service. For providers like DOIs/Handles with an external service, this call + * should accurately report whether the PID has been registered in the service. + * For providers with no external service, the call should return true if the + * PID is defined locally. If it isn't, these no-service providers need to know + * whether use case of the caller requires that the returned value should + * default to true or false - via the noProviderDefault parameter. + * + * @param globalId + * @param noProviderDefault - when there is no external service, and no local + * use of the PID, this should be returned + * @return whether the PID should be considered registered or not. + * @throws Exception + */ + boolean alreadyRegistered(GlobalId globalId, boolean noProviderDefault) throws Exception; - boolean alreadyExists(GlobalId globalId) throws Exception; - boolean registerWhenPublished(); + boolean canManagePID(); + boolean isConfigured(); List getProviderInformation(); @@ -25,15 +44,6 @@ public interface GlobalIdServiceBean { Map getIdentifierMetadata(DvObject dvo); - /** - * Concatenate the parts that make up a Global Identifier. - * @param protocol the identifier system, e.g. "doi" - * @param authority the namespace that the authority manages in the identifier system - * @param identifier the local identifier part - * @return the Global Identifier, e.g. "doi:10.12345/67890" - */ - String getIdentifierForLookup(String protocol, String authority, String identifier); - String modifyIdentifierTargetURL(DvObject dvo) throws Exception; void deleteIdentifier(DvObject dvo) throws Exception; @@ -42,18 +52,27 @@ public interface GlobalIdServiceBean { Map getMetadataForTargetURL(DvObject dvObject); - Map lookupMetadataFromIdentifier(String protocol, String authority, String identifier); - DvObject generateIdentifier(DvObject dvObject); String getIdentifier(DvObject dvObject); boolean publicizeIdentifier(DvObject studyIn); + String generateDatasetIdentifier(Dataset dataset); + String generateDataFileIdentifier(DataFile datafile); + boolean isGlobalIdUnique(GlobalId globalId); + + String getUrlPrefix(); + String getSeparator(); + static GlobalIdServiceBean getBean(String protocol, CommandContext ctxt) { final Function protocolHandler = BeanDispatcher.DISPATCHER.get(protocol); if ( protocolHandler != null ) { - return protocolHandler.apply(ctxt); + GlobalIdServiceBean theBean = protocolHandler.apply(ctxt); + if(theBean != null && theBean.isConfigured()) { + logger.fine("getBean returns " + theBean.getProviderInformation().get(0) + " for protocol " + protocol); + } + return theBean; } else { logger.log(Level.SEVERE, "Unknown protocol: {0}", protocol); return null; @@ -64,8 +83,113 @@ static GlobalIdServiceBean getBean(CommandContext ctxt) { return getBean(ctxt.settings().getValueForKey(Key.Protocol, ""), ctxt); } + public static Optional parse(String identifierString) { + try { + return Optional.of(PidUtil.parseAsGlobalID(identifierString)); + } catch ( IllegalArgumentException _iae) { + return Optional.empty(); + } + } + + /** + * Parse a Persistent Id and set the protocol, authority, and identifier + * + * Example 1: doi:10.5072/FK2/BYM3IW + * protocol: doi + * authority: 10.5072 + * identifier: FK2/BYM3IW + * + * Example 2: hdl:1902.1/111012 + * protocol: hdl + * authority: 1902.1 + * identifier: 111012 + * + * @param identifierString + * @param separator the string that separates the authority from the identifier. + * @param destination the global id that will contain the parsed data. + * @return {@code destination}, after its fields have been updated, or + * {@code null} if parsing failed. + */ + public GlobalId parsePersistentId(String identifierString); + public GlobalId parsePersistentId(String protocol, String authority, String identifier); + + + + public static boolean isValidGlobalId(String protocol, String authority, String identifier) { + if (protocol == null || authority == null || identifier == null) { + return false; + } + if(!authority.equals(GlobalIdServiceBean.formatIdentifierString(authority))) { + return false; + } + if (GlobalIdServiceBean.testforNullTerminator(authority)) { + return false; + } + if(!identifier.equals(GlobalIdServiceBean.formatIdentifierString(identifier))) { + return false; + } + if (GlobalIdServiceBean.testforNullTerminator(identifier)) { + return false; + } + return true; + } + + static String formatIdentifierString(String str){ + + if (str == null){ + return null; + } + // remove whitespace, single quotes, and semicolons + return str.replaceAll("\\s+|'|;",""); + + /* + < (%3C) +> (%3E) +{ (%7B) +} (%7D) +^ (%5E) +[ (%5B) +] (%5D) +` (%60) +| (%7C) +\ (%5C) ++ + */ + // http://www.doi.org/doi_handbook/2_Numbering.html + } + + static boolean testforNullTerminator(String str){ + if(str == null) { + return false; + } + return str.indexOf('\u0000') > 0; + } + + static boolean checkDOIAuthority(String doiAuthority){ + + if (doiAuthority==null){ + return false; + } + + if (!(doiAuthority.startsWith("10."))){ + return false; + } + + return true; + } } + +/* + * ToDo - replace this with a mechanism like BrandingUtilHelper that would read + * the config and create PidProviders, one per set of config values and serve + * those as needed. The help has to be a bean to autostart and to hand the + * required service beans to the PidProviders. That may boil down to just the + * dvObjectService (to check for local identifier conflicts) since it will be + * the helper that has to read settings/get systewmConfig values. + * + */ + /** * Static utility class for dispatching implementing beans, based on protocol and providers. * @author michael @@ -86,5 +210,7 @@ class BeanDispatcher { return null; } }); + + DISPATCHER.put(PermaLinkPidProviderServiceBean.PERMA_PROTOCOL, ctxt->ctxt.permaLinkProvider() ); } } \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/Guestbook.java b/src/main/java/edu/harvard/iq/dataverse/Guestbook.java index 18913bfd5bf..2ef23d1f925 100644 --- a/src/main/java/edu/harvard/iq/dataverse/Guestbook.java +++ b/src/main/java/edu/harvard/iq/dataverse/Guestbook.java @@ -3,27 +3,25 @@ import edu.harvard.iq.dataverse.util.BundleUtil; import java.io.Serializable; -import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Date; -import javax.persistence.CascadeType; -import javax.persistence.Entity; -import javax.persistence.GeneratedValue; -import javax.persistence.GenerationType; -import javax.persistence.Id; -import javax.persistence.JoinColumn; -import javax.persistence.OneToMany; +import jakarta.persistence.CascadeType; +import jakarta.persistence.Entity; +import jakarta.persistence.GeneratedValue; +import jakarta.persistence.GenerationType; +import jakarta.persistence.Id; +import jakarta.persistence.JoinColumn; +import jakarta.persistence.OneToMany; import java.util.List; import java.util.Objects; -import javax.persistence.Column; -import javax.persistence.ManyToOne; -import javax.persistence.OrderBy; -import javax.persistence.Temporal; -import javax.persistence.TemporalType; -import javax.persistence.Transient; +import jakarta.persistence.Column; +import jakarta.persistence.ManyToOne; +import jakarta.persistence.OrderBy; +import jakarta.persistence.Temporal; +import jakarta.persistence.TemporalType; +import jakarta.persistence.Transient; import edu.harvard.iq.dataverse.util.DateUtil; -import org.apache.commons.text.StringEscapeUtils; import org.hibernate.validator.constraints.NotBlank; /** diff --git a/src/main/java/edu/harvard/iq/dataverse/GuestbookPage.java b/src/main/java/edu/harvard/iq/dataverse/GuestbookPage.java index 7cbb69e5c1d..9fb584a9133 100644 --- a/src/main/java/edu/harvard/iq/dataverse/GuestbookPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/GuestbookPage.java @@ -17,13 +17,13 @@ import java.util.Iterator; import java.util.List; import java.util.logging.Logger; -import javax.ejb.EJB; -import javax.ejb.EJBException; -import javax.faces.application.FacesMessage; -import javax.faces.context.FacesContext; -import javax.faces.view.ViewScoped; -import javax.inject.Inject; -import javax.inject.Named; +import jakarta.ejb.EJB; +import jakarta.ejb.EJBException; +import jakarta.faces.application.FacesMessage; +import jakarta.faces.context.FacesContext; +import jakarta.faces.view.ViewScoped; +import jakarta.inject.Inject; +import jakarta.inject.Named; import org.apache.commons.lang3.StringUtils; /** diff --git a/src/main/java/edu/harvard/iq/dataverse/GuestbookResponse.java b/src/main/java/edu/harvard/iq/dataverse/GuestbookResponse.java index 69404482fce..0057fbeddab 100644 --- a/src/main/java/edu/harvard/iq/dataverse/GuestbookResponse.java +++ b/src/main/java/edu/harvard/iq/dataverse/GuestbookResponse.java @@ -13,8 +13,8 @@ import java.util.ArrayList; import java.util.Date; import java.util.List; -import javax.persistence.*; -import javax.validation.constraints.Size; +import jakarta.persistence.*; +import jakarta.validation.constraints.Size; /** * diff --git a/src/main/java/edu/harvard/iq/dataverse/GuestbookResponseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/GuestbookResponseServiceBean.java index f4cf38979c5..bd598d2dca0 100644 --- a/src/main/java/edu/harvard/iq/dataverse/GuestbookResponseServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/GuestbookResponseServiceBean.java @@ -12,6 +12,7 @@ import java.io.IOException; import java.io.OutputStream; import java.text.SimpleDateFormat; +import java.time.LocalDate; import java.util.ArrayList; import java.util.Calendar; import java.util.Date; @@ -20,17 +21,17 @@ import java.util.List; import java.util.Map; import java.util.logging.Logger; -import javax.ejb.EJB; -import javax.ejb.Stateless; -import javax.ejb.TransactionAttribute; -import javax.ejb.TransactionAttributeType; -import javax.faces.model.SelectItem; -import javax.inject.Named; -import javax.persistence.EntityManager; -import javax.persistence.PersistenceContext; -import javax.persistence.Query; -import javax.persistence.StoredProcedureQuery; -import javax.persistence.TypedQuery; +import jakarta.ejb.EJB; +import jakarta.ejb.Stateless; +import jakarta.ejb.TransactionAttribute; +import jakarta.ejb.TransactionAttributeType; +import jakarta.faces.model.SelectItem; +import jakarta.inject.Named; +import jakarta.persistence.EntityManager; +import jakarta.persistence.PersistenceContext; +import jakarta.persistence.Query; +import jakarta.persistence.StoredProcedureQuery; +import jakarta.persistence.TypedQuery; import org.apache.commons.text.StringEscapeUtils; /** * @@ -910,8 +911,17 @@ public Long getCountGuestbookResponsesByDataFileId(Long dataFileId) { } public Long getCountGuestbookResponsesByDatasetId(Long datasetId) { + return getCountGuestbookResponsesByDatasetId(datasetId, null); + } + + public Long getCountGuestbookResponsesByDatasetId(Long datasetId, LocalDate date) { // dataset id is null, will return 0 - Query query = em.createNativeQuery("select count(o.id) from GuestbookResponse o where o.dataset_id = " + datasetId); + Query query; + if(date != null) { + query = em.createNativeQuery("select count(o.id) from GuestbookResponse o where o.dataset_id = " + datasetId + " and responsetime < '" + date.toString() + "'"); + }else { + query = em.createNativeQuery("select count(o.id) from GuestbookResponse o where o.dataset_id = " + datasetId); + } return (Long) query.getSingleResult(); } diff --git a/src/main/java/edu/harvard/iq/dataverse/GuestbookResponsesPage.java b/src/main/java/edu/harvard/iq/dataverse/GuestbookResponsesPage.java index 23aac4a24a3..c53df93def8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/GuestbookResponsesPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/GuestbookResponsesPage.java @@ -6,20 +6,19 @@ package edu.harvard.iq.dataverse; import edu.harvard.iq.dataverse.engine.command.impl.UpdateDataverseCommand; -import static edu.harvard.iq.dataverse.util.JsfHelper.JH; import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.SystemConfig; import java.util.List; import java.util.logging.Logger; -import javax.ejb.EJB; -import javax.faces.application.FacesMessage; -import javax.faces.context.FacesContext; -import javax.faces.view.ViewScoped; -import javax.inject.Inject; -import javax.inject.Named; -import javax.servlet.ServletOutputStream; -import javax.servlet.http.HttpServletResponse; +import jakarta.ejb.EJB; +import jakarta.faces.application.FacesMessage; +import jakarta.faces.context.FacesContext; +import jakarta.faces.view.ViewScoped; +import jakarta.inject.Inject; +import jakarta.inject.Named; +import jakarta.servlet.ServletOutputStream; +import jakarta.servlet.http.HttpServletResponse; /** * diff --git a/src/main/java/edu/harvard/iq/dataverse/GuestbookServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/GuestbookServiceBean.java index 5394ddc652a..fcd4e91d455 100644 --- a/src/main/java/edu/harvard/iq/dataverse/GuestbookServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/GuestbookServiceBean.java @@ -5,12 +5,11 @@ */ package edu.harvard.iq.dataverse; -import java.util.List; -import javax.ejb.Stateless; -import javax.inject.Named; -import javax.persistence.EntityManager; -import javax.persistence.PersistenceContext; -import javax.persistence.Query; +import jakarta.ejb.Stateless; +import jakarta.inject.Named; +import jakarta.persistence.EntityManager; +import jakarta.persistence.PersistenceContext; +import jakarta.persistence.Query; /** * diff --git a/src/main/java/edu/harvard/iq/dataverse/HandlenetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/HandlenetServiceBean.java index 1a8ee8a85e8..4942db9e7ec 100644 --- a/src/main/java/edu/harvard/iq/dataverse/HandlenetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/HandlenetServiceBean.java @@ -20,20 +20,21 @@ package edu.harvard.iq.dataverse; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import java.io.File; import java.io.FileInputStream; -import java.net.InetAddress; -import java.net.UnknownHostException; +import java.nio.charset.StandardCharsets; import java.util.*; import java.util.logging.Level; import java.util.logging.Logger; -import javax.ejb.EJB; -import javax.ejb.Stateless; +import jakarta.ejb.EJB; +import jakarta.ejb.Stateless; import java.security.PrivateKey; /* Handlenet imports: */ +import edu.harvard.iq.dataverse.util.SystemConfig; import net.handle.hdllib.AbstractMessage; import net.handle.hdllib.AbstractResponse; import net.handle.hdllib.AdminRecord; @@ -65,14 +66,17 @@ public class HandlenetServiceBean extends AbstractGlobalIdServiceBean { @EJB DataverseServiceBean dataverseService; @EJB - SettingsServiceBean settingsService; + SettingsServiceBean settingsService; private static final Logger logger = Logger.getLogger(HandlenetServiceBean.class.getCanonicalName()); - private static final String HANDLE_PROTOCOL_TAG = "hdl"; - int handlenetIndex = System.getProperty("dataverse.handlenet.index")!=null? Integer.parseInt(System.getProperty("dataverse.handlenet.index")) : 300; + public static final String HDL_PROTOCOL = "hdl"; + int handlenetIndex = JvmSettings.HANDLENET_INDEX.lookup(Integer.class); + public static final String HTTP_HDL_RESOLVER_URL = "http://hdl.handle.net/"; + public static final String HDL_RESOLVER_URL = "https://hdl.handle.net/"; public HandlenetServiceBean() { logger.log(Level.FINE,"Constructor"); + configured = true; } @Override @@ -82,7 +86,7 @@ public boolean registerWhenPublished() { public void reRegisterHandle(DvObject dvObject) { logger.log(Level.FINE,"reRegisterHandle"); - if (!HANDLE_PROTOCOL_TAG.equals(dvObject.getProtocol())) { + if (!HDL_PROTOCOL.equals(dvObject.getProtocol())) { logger.log(Level.WARNING, "reRegisterHandle called on a dvObject with the non-handle global id: {0}", dvObject.getId()); } @@ -227,8 +231,8 @@ private ResolutionRequest buildResolutionRequest(final String handle) { private PublicKeyAuthenticationInfo getAuthInfo(String handlePrefix) { logger.log(Level.FINE,"getAuthInfo"); byte[] key = null; - String adminCredFile = System.getProperty("dataverse.handlenet.admcredfile"); - int handlenetIndex = System.getProperty("dataverse.handlenet.index")!=null? Integer.parseInt(System.getProperty("dataverse.handlenet.index")) : 300; + String adminCredFile = JvmSettings.HANDLENET_KEY_PATH.lookup(); + int handlenetIndex = JvmSettings.HANDLENET_INDEX.lookup(Integer.class); key = readKey(adminCredFile); PrivateKey privkey = null; @@ -247,21 +251,7 @@ private String getRegistrationUrl(DvObject dvObject) { } public String getSiteUrl() { - logger.log(Level.FINE,"getSiteUrl"); - String hostUrl = System.getProperty("dataverse.siteUrl"); - if (hostUrl != null && !"".equals(hostUrl)) { - return hostUrl; - } - String hostName = System.getProperty("dataverse.fqdn"); - if (hostName == null) { - try { - hostName = InetAddress.getLocalHost().getCanonicalHostName(); - } catch (UnknownHostException e) { - return null; - } - } - hostUrl = "https://" + hostName; - return hostUrl; + return SystemConfig.getDataverseSiteUrlStatic(); } private byte[] readKey(final String file) { @@ -283,13 +273,13 @@ private byte[] readKey(final String file) { private PrivateKey readPrivKey(byte[] key, final String file) { logger.log(Level.FINE,"readPrivKey"); - PrivateKey privkey=null; + PrivateKey privkey = null; - String secret = System.getProperty("dataverse.handlenet.admprivphrase"); - byte secKey[] = null; try { + byte[] secKey = null; if ( Util.requiresSecretKey(key) ) { - secKey = secret.getBytes(); + String secret = JvmSettings.HANDLENET_KEY_PASSPHRASE.lookup(); + secKey = secret.getBytes(StandardCharsets.UTF_8); } key = Util.decrypt(key, secKey); privkey = Util.getPrivateKeyFromBytes(key, 0); @@ -324,13 +314,13 @@ private String getAuthenticationHandle(String handlePrefix) { } @Override - public boolean alreadyExists(DvObject dvObject) throws Exception { + public boolean alreadyRegistered(DvObject dvObject) throws Exception { String handle = getDvObjectHandle(dvObject); return isHandleRegistered(handle); } @Override - public boolean alreadyExists(GlobalId pid) throws Exception { + public boolean alreadyRegistered(GlobalId pid, boolean noProviderDefault) throws Exception { String handle = pid.getAuthority() + "/" + pid.getIdentifier(); return isHandleRegistered(handle); } @@ -340,11 +330,6 @@ public Map getIdentifierMetadata(DvObject dvObject) { throw new NotImplementedException(); } - @Override - public HashMap lookupMetadataFromIdentifier(String protocol, String authority, String identifier) { - throw new NotImplementedException(); - } - @Override public String modifyIdentifierTargetURL(DvObject dvObject) throws Exception { logger.log(Level.FINE,"modifyIdentifier"); @@ -362,9 +347,9 @@ public String modifyIdentifierTargetURL(DvObject dvObject) throws Exception { public void deleteIdentifier(DvObject dvObject) throws Exception { String handle = getDvObjectHandle(dvObject); String authHandle = getAuthenticationHandle(dvObject); - - String adminCredFile = System.getProperty("dataverse.handlenet.admcredfile"); - int handlenetIndex = System.getProperty("dataverse.handlenet.index")!=null? Integer.parseInt(System.getProperty("dataverse.handlenet.index")) : 300; + + String adminCredFile = JvmSettings.HANDLENET_KEY_PATH.lookup(); + int handlenetIndex = JvmSettings.HANDLENET_INDEX.lookup(Integer.class); byte[] key = readKey(adminCredFile); PrivateKey privkey = readPrivKey(key, adminCredFile); @@ -398,12 +383,7 @@ private boolean updateIdentifierStatus(DvObject dvObject, String statusIn) { @Override public List getProviderInformation(){ - ArrayList providerInfo = new ArrayList<>(); - String providerName = "Handle"; - String providerLink = "https://hdl.handle.net"; - providerInfo.add(providerName); - providerInfo.add(providerLink); - return providerInfo; + return List.of("Handle", "https://hdl.handle.net"); } @@ -427,7 +407,37 @@ public boolean publicizeIdentifier(DvObject dvObject) { } -} + @Override + public GlobalId parsePersistentId(String pidString) { + if (pidString.startsWith(HDL_RESOLVER_URL)) { + pidString = pidString.replace(HDL_RESOLVER_URL, (HDL_PROTOCOL + ":")); + } else if (pidString.startsWith(HTTP_HDL_RESOLVER_URL)) { + pidString = pidString.replace(HTTP_HDL_RESOLVER_URL, (HDL_PROTOCOL + ":")); + } + return super.parsePersistentId(pidString); + } + @Override + public GlobalId parsePersistentId(String protocol, String identifierString) { + if (!HDL_PROTOCOL.equals(protocol)) { + return null; + } + GlobalId globalId = super.parsePersistentId(protocol, identifierString); + return globalId; + } + + @Override + public GlobalId parsePersistentId(String protocol, String authority, String identifier) { + if (!HDL_PROTOCOL.equals(protocol)) { + return null; + } + return super.parsePersistentId(protocol, authority, identifier); + } + + @Override + public String getUrlPrefix() { + return HDL_RESOLVER_URL; + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/HarvestingClientsPage.java b/src/main/java/edu/harvard/iq/dataverse/HarvestingClientsPage.java index bc83c15dcd7..f008db1403f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/HarvestingClientsPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/HarvestingClientsPage.java @@ -9,7 +9,6 @@ import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.impl.CreateHarvestingClientCommand; -import edu.harvard.iq.dataverse.engine.command.impl.DeleteHarvestingClientCommand; import edu.harvard.iq.dataverse.engine.command.impl.UpdateHarvestingClientCommand; import edu.harvard.iq.dataverse.harvest.client.HarvesterServiceBean; import edu.harvard.iq.dataverse.harvest.client.HarvestingClient; @@ -24,22 +23,21 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; -import java.util.Locale; import java.util.Collections; import java.util.logging.Level; import java.util.logging.Logger; import java.util.regex.Pattern; -import javax.ejb.EJB; -import javax.faces.application.FacesMessage; -import javax.faces.component.UIComponent; -import javax.faces.component.UIInput; -import javax.faces.context.FacesContext; -import javax.faces.event.ActionEvent; -import javax.faces.model.SelectItem; -import javax.faces.view.ViewScoped; -import javax.inject.Inject; -import javax.inject.Named; -import javax.servlet.http.HttpServletRequest; +import jakarta.ejb.EJB; +import jakarta.faces.application.FacesMessage; +import jakarta.faces.component.UIComponent; +import jakarta.faces.component.UIInput; +import jakarta.faces.context.FacesContext; +import jakarta.faces.event.ActionEvent; +import jakarta.faces.model.SelectItem; +import jakarta.faces.view.ViewScoped; +import jakarta.inject.Inject; +import jakarta.inject.Named; +import jakarta.servlet.http.HttpServletRequest; import org.apache.commons.lang3.StringUtils; /** @@ -79,7 +77,7 @@ public class HarvestingClientsPage implements java.io.Serializable { private Dataverse dataverse; private Long dataverseId = null; private HarvestingClient selectedClient; - private boolean setListTruncated = false; + private boolean setListTruncated = false; //private static final String solrDocIdentifierDataset = "dataset_"; @@ -245,6 +243,7 @@ public void editClient(HarvestingClient harvestingClient) { this.newNickname = harvestingClient.getName(); this.newHarvestingUrl = harvestingClient.getHarvestingUrl(); + this.customHeader = harvestingClient.getCustomHttpHeaders(); this.initialSettingsValidated = false; // TODO: do we want to try and contact the server, again, to make @@ -340,6 +339,7 @@ public void createClient(ActionEvent ae) { getSelectedDestinationDataverse().getHarvestingClientConfigs().add(newHarvestingClient); newHarvestingClient.setHarvestingUrl(newHarvestingUrl); + newHarvestingClient.setCustomHttpHeaders(customHeader); if (!StringUtils.isEmpty(newOaiSet)) { newHarvestingClient.setHarvestingSet(newOaiSet); } @@ -426,6 +426,7 @@ public void saveClient(ActionEvent ae) { // nickname is not editable for existing clients: //harvestingClient.setName(newNickname); harvestingClient.setHarvestingUrl(newHarvestingUrl); + harvestingClient.setCustomHttpHeaders(customHeader); harvestingClient.setHarvestingSet(newOaiSet); harvestingClient.setMetadataPrefix(newMetadataFormat); harvestingClient.setHarvestStyle(newHarvestingStyle); @@ -554,6 +555,9 @@ public boolean validateServerUrlOAI() { if (!StringUtils.isEmpty(getNewHarvestingUrl())) { OaiHandler oaiHandler = new OaiHandler(getNewHarvestingUrl()); + if (getNewCustomHeader() != null) { + oaiHandler.setCustomHeaders(oaiHandler.makeCustomHeaders(getNewCustomHeader())); + } boolean success = true; String message = null; @@ -635,6 +639,23 @@ public boolean validateServerUrlOAI() { return false; } + public boolean validateCustomHeader() { + if (!StringUtils.isEmpty(getNewCustomHeader())) { + // TODO: put this method somewhere else as a static utility + + // check that it's looking like "{header-name}: {header value}" at least + if (!Pattern.matches("^[a-zA-Z0-9\\_\\-]+:.*",getNewCustomHeader())) { + FacesContext.getCurrentInstance().addMessage(getNewClientCustomHeaderInputField().getClientId(), + new FacesMessage(FacesMessage.SEVERITY_ERROR, "", BundleUtil.getStringFromBundle("harvestclients.newClientDialog.customHeader.invalid"))); + + return false; + } + } + + // this setting is optional + return true; + } + public void validateInitialSettings() { if (isHarvestTypeOAI()) { boolean nicknameValidated = true; @@ -644,9 +665,10 @@ public void validateInitialSettings() { destinationDataverseValidated = validateSelectedDataverse(); } boolean urlValidated = validateServerUrlOAI(); + boolean customHeaderValidated = validateCustomHeader(); - if (nicknameValidated && destinationDataverseValidated && urlValidated) { - // In Create mode we want to run all 3 validation tests; this is why + if (nicknameValidated && destinationDataverseValidated && urlValidated && customHeaderValidated) { + // In Create mode we want to run all 4 validation tests; this is why // we are not doing "if ((validateNickname() && validateServerUrlOAI())" // in the line above. -- L.A. 4.4 May 2016. @@ -688,6 +710,7 @@ public void backToStepThree() { UIInput newClientNicknameInputField; UIInput newClientUrlInputField; + UIInput newClientCustomHeaderInputField; UIInput hiddenInputField; /*UISelectOne*/ UIInput metadataFormatMenu; UIInput remoteArchiveStyleMenu; @@ -695,6 +718,7 @@ public void backToStepThree() { private String newNickname = ""; private String newHarvestingUrl = ""; + private String customHeader = null; private boolean initialSettingsValidated = false; private String newOaiSet = ""; private String newMetadataFormat = ""; @@ -718,6 +742,7 @@ public void initNewClient(ActionEvent ae) { //this.selectedClient = new HarvestingClient(); this.newNickname = ""; this.newHarvestingUrl = ""; + this.customHeader = null; this.initialSettingsValidated = false; this.newOaiSet = ""; this.newMetadataFormat = ""; @@ -762,6 +787,14 @@ public void setNewHarvestingUrl(String newHarvestingUrl) { this.newHarvestingUrl = newHarvestingUrl; } + public String getNewCustomHeader() { + return customHeader; + } + + public void setNewCustomHeader(String customHeader) { + this.customHeader = customHeader; + } + public int getHarvestTypeRadio() { return this.harvestTypeRadio; } @@ -871,6 +904,14 @@ public void setNewClientUrlInputField(UIInput newClientInputField) { this.newClientUrlInputField = newClientInputField; } + public UIInput getNewClientCustomHeaderInputField() { + return newClientCustomHeaderInputField; + } + + public void setNewClientCustomHeaderInputField(UIInput newClientInputField) { + this.newClientCustomHeaderInputField = newClientInputField; + } + public UIInput getHiddenInputField() { return hiddenInputField; } diff --git a/src/main/java/edu/harvard/iq/dataverse/HarvestingDataverseConfig.java b/src/main/java/edu/harvard/iq/dataverse/HarvestingDataverseConfig.java index 28df6e19e65..6709b978c47 100644 --- a/src/main/java/edu/harvard/iq/dataverse/HarvestingDataverseConfig.java +++ b/src/main/java/edu/harvard/iq/dataverse/HarvestingDataverseConfig.java @@ -6,16 +6,16 @@ package edu.harvard.iq.dataverse; import java.io.Serializable; -import javax.persistence.CascadeType; -import javax.persistence.Column; -import javax.persistence.Entity; -import javax.persistence.GeneratedValue; -import javax.persistence.GenerationType; -import javax.persistence.Id; -import javax.persistence.Index; -import javax.persistence.JoinColumn; -import javax.persistence.OneToOne; -import javax.persistence.Table; +import jakarta.persistence.CascadeType; +import jakarta.persistence.Column; +import jakarta.persistence.Entity; +import jakarta.persistence.GeneratedValue; +import jakarta.persistence.GenerationType; +import jakarta.persistence.Id; +import jakarta.persistence.Index; +import jakarta.persistence.JoinColumn; +import jakarta.persistence.OneToOne; +import jakarta.persistence.Table; /** * diff --git a/src/main/java/edu/harvard/iq/dataverse/HarvestingSetsPage.java b/src/main/java/edu/harvard/iq/dataverse/HarvestingSetsPage.java index 432683a5797..6dbba34920b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/HarvestingSetsPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/HarvestingSetsPage.java @@ -6,11 +6,6 @@ package edu.harvard.iq.dataverse; import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; -import edu.harvard.iq.dataverse.engine.command.exception.CommandException; -import edu.harvard.iq.dataverse.engine.command.impl.CreateHarvestingClientCommand; -import edu.harvard.iq.dataverse.engine.command.impl.UpdateHarvestingClientCommand; -import edu.harvard.iq.dataverse.harvest.client.HarvestingClient; -import edu.harvard.iq.dataverse.harvest.client.HarvestingClientServiceBean; import edu.harvard.iq.dataverse.harvest.server.OAIRecord; import edu.harvard.iq.dataverse.harvest.server.OAIRecordServiceBean; import edu.harvard.iq.dataverse.harvest.server.OAISet; @@ -26,15 +21,15 @@ import java.util.logging.Level; import java.util.logging.Logger; import java.util.regex.Pattern; -import javax.ejb.EJB; -import javax.faces.application.FacesMessage; -import javax.faces.component.UIComponent; -import javax.faces.component.UIInput; -import javax.faces.context.FacesContext; -import javax.faces.event.ActionEvent; -import javax.faces.view.ViewScoped; -import javax.inject.Inject; -import javax.inject.Named; +import jakarta.ejb.EJB; +import jakarta.faces.application.FacesMessage; +import jakarta.faces.component.UIComponent; +import jakarta.faces.component.UIInput; +import jakarta.faces.context.FacesContext; +import jakarta.faces.event.ActionEvent; +import jakarta.faces.view.ViewScoped; +import jakarta.inject.Inject; +import jakarta.inject.Named; import org.apache.commons.lang3.StringUtils; /** diff --git a/src/main/java/edu/harvard/iq/dataverse/HomepageServlet.java b/src/main/java/edu/harvard/iq/dataverse/HomepageServlet.java index ef9b3267db4..e1864194436 100644 --- a/src/main/java/edu/harvard/iq/dataverse/HomepageServlet.java +++ b/src/main/java/edu/harvard/iq/dataverse/HomepageServlet.java @@ -7,12 +7,12 @@ import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import java.io.IOException; -import javax.ejb.EJB; -import javax.servlet.RequestDispatcher; -import javax.servlet.ServletException; -import javax.servlet.http.HttpServlet; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; +import jakarta.ejb.EJB; +import jakarta.servlet.RequestDispatcher; +import jakarta.servlet.ServletException; +import jakarta.servlet.http.HttpServlet; +import jakarta.servlet.http.HttpServletRequest; +import jakarta.servlet.http.HttpServletResponse; /** * diff --git a/src/main/java/edu/harvard/iq/dataverse/LinkValidator.java b/src/main/java/edu/harvard/iq/dataverse/LinkValidator.java index 2ecfc55f67e..7d540f0a425 100644 --- a/src/main/java/edu/harvard/iq/dataverse/LinkValidator.java +++ b/src/main/java/edu/harvard/iq/dataverse/LinkValidator.java @@ -5,13 +5,13 @@ */ package edu.harvard.iq.dataverse; -import javax.faces.application.FacesMessage; -import javax.faces.component.UIComponent; -import javax.faces.component.UIInput; -import javax.faces.context.FacesContext; -import javax.faces.validator.FacesValidator; -import javax.faces.validator.Validator; -import javax.faces.validator.ValidatorException; +import jakarta.faces.application.FacesMessage; +import jakarta.faces.component.UIComponent; +import jakarta.faces.component.UIInput; +import jakarta.faces.context.FacesContext; +import jakarta.faces.validator.FacesValidator; +import jakarta.faces.validator.Validator; +import jakarta.faces.validator.ValidatorException; import edu.harvard.iq.dataverse.util.BundleUtil; @FacesValidator(value = "linkValidator") diff --git a/src/main/java/edu/harvard/iq/dataverse/LoginPage.java b/src/main/java/edu/harvard/iq/dataverse/LoginPage.java index 2420ce08550..16d2cc53cb9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/LoginPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/LoginPage.java @@ -9,30 +9,27 @@ import edu.harvard.iq.dataverse.authorization.exceptions.AuthenticationFailedException; import edu.harvard.iq.dataverse.authorization.providers.builtin.BuiltinAuthenticationProvider; import edu.harvard.iq.dataverse.authorization.providers.builtin.BuiltinUserServiceBean; -import edu.harvard.iq.dataverse.authorization.providers.shib.ShibAuthenticationProvider; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.JsfHelper; -import edu.harvard.iq.dataverse.util.SessionUtil; -import static edu.harvard.iq.dataverse.util.JsfHelper.JH; import edu.harvard.iq.dataverse.util.SystemConfig; import java.io.UnsupportedEncodingException; import java.net.URLDecoder; import java.util.*; import java.util.logging.Level; import java.util.logging.Logger; -import javax.ejb.EJB; -import javax.faces.application.FacesMessage; -import javax.faces.component.UIComponent; -import javax.faces.context.FacesContext; -import javax.faces.event.AjaxBehaviorEvent; -import javax.faces.validator.ValidatorException; -import javax.faces.view.ViewScoped; -import javax.inject.Inject; -import javax.inject.Named; -import javax.servlet.http.HttpServletRequest; +import jakarta.ejb.EJB; +import jakarta.faces.application.FacesMessage; +import jakarta.faces.component.UIComponent; +import jakarta.faces.context.FacesContext; +import jakarta.faces.event.AjaxBehaviorEvent; +import jakarta.faces.validator.ValidatorException; +import jakarta.faces.view.ViewScoped; +import jakarta.inject.Inject; +import jakarta.inject.Named; +import jakarta.servlet.http.HttpServletRequest; /** * diff --git a/src/main/java/edu/harvard/iq/dataverse/MailServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/MailServiceBean.java index 2bfd342d899..f17732df7b6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/MailServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/MailServiceBean.java @@ -5,12 +5,12 @@ */ package edu.harvard.iq.dataverse; -import com.sun.mail.smtp.SMTPSendFailedException; import edu.harvard.iq.dataverse.authorization.groups.Group; import edu.harvard.iq.dataverse.authorization.groups.GroupServiceBean; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.branding.BrandingUtil; import edu.harvard.iq.dataverse.confirmemail.ConfirmEmailServiceBean; +import edu.harvard.iq.dataverse.dataset.DatasetUtil; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.settings.SettingsServiceBean.Key; import edu.harvard.iq.dataverse.util.BundleUtil; @@ -23,25 +23,23 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Date; -import java.util.Properties; -import java.util.Map; -import java.util.HashMap; import java.util.List; import java.util.Set; import java.util.logging.Logger; -import javax.annotation.Resource; -import javax.ejb.EJB; -import javax.ejb.Stateless; -import javax.mail.Address; -import javax.mail.Message; -import javax.mail.MessagingException; -import javax.mail.Session; -import javax.mail.Transport; -import javax.mail.internet.AddressException; -import javax.mail.internet.InternetAddress; -import javax.mail.internet.MimeMessage; +import jakarta.annotation.Resource; +import jakarta.ejb.EJB; +import jakarta.ejb.Stateless; +import jakarta.mail.Address; +import jakarta.mail.Message; +import jakarta.mail.MessagingException; +import jakarta.mail.Session; +import jakarta.mail.Transport; +import jakarta.mail.internet.AddressException; +import jakarta.mail.internet.InternetAddress; +import jakarta.mail.internet.MimeMessage; import edu.harvard.iq.dataverse.validation.EMailValidator; +import jakarta.json.JsonObject; import org.apache.commons.lang3.StringUtils; /** @@ -81,37 +79,6 @@ public class MailServiceBean implements java.io.Serializable { public MailServiceBean() { } - public void sendMail(String host, String reply, String to, String subject, String messageText) { - Properties props = System.getProperties(); - props.put("mail.smtp.host", host); - Session session = Session.getDefaultInstance(props, null); - - try { - MimeMessage msg = new MimeMessage(session); - String[] recipientStrings = to.split(","); - InternetAddress[] recipients = new InternetAddress[recipientStrings.length]; - try { - InternetAddress fromAddress = getSystemAddress(); - setContactDelegation(reply, fromAddress); - msg.setFrom(fromAddress); - msg.setReplyTo(new Address[] {new InternetAddress(reply, charset)}); - for (int i = 0; i < recipients.length; i++) { - recipients[i] = new InternetAddress(recipientStrings[i], "", charset); - } - } catch (UnsupportedEncodingException ex) { - logger.severe(ex.getMessage()); - } - msg.setRecipients(Message.RecipientType.TO, recipients); - msg.setSubject(subject, charset); - msg.setText(messageText, charset); - Transport.send(msg, recipients); - } catch (AddressException ae) { - ae.printStackTrace(System.out); - } catch (MessagingException me) { - me.printStackTrace(System.out); - } - } - @Resource(name = "mail/notifyMailSession") private Session session; @@ -154,9 +121,9 @@ public boolean sendSystemEmail(String to, String subject, String messageText, bo try { Transport.send(msg, recipients); sent = true; - } catch (SMTPSendFailedException ssfe) { + } catch (MessagingException ssfe) { logger.warning("Failed to send mail to: " + to); - logger.warning("SMTPSendFailedException Message: " + ssfe); + logger.warning("MessagingException Message: " + ssfe); } } else { logger.fine("Skipping sending mail to " + to + ", because the \"no-reply\" address not set (" + Key.SystemEmail + " setting)."); @@ -177,11 +144,7 @@ public InternetAddress getSystemAddress() { } //@Resource(name="mail/notifyMailSession") - public void sendMail(String from, String to, String subject, String messageText) { - sendMail(from, to, subject, messageText, new HashMap<>()); - } - - public void sendMail(String reply, String to, String subject, String messageText, Map extraHeaders) { + public void sendMail(String reply, String to, String cc, String subject, String messageText) { try { MimeMessage msg = new MimeMessage(session); // Always send from system address to avoid email being blocked @@ -202,18 +165,12 @@ public void sendMail(String reply, String to, String subject, String messageText msg.setSentDate(new Date()); msg.setRecipients(Message.RecipientType.TO, InternetAddress.parse(to, false)); + if (cc != null) { + msg.setRecipients(Message.RecipientType.CC, InternetAddress.parse(cc, false)); + } msg.setSubject(subject, charset); msg.setText(messageText, charset); - if (extraHeaders != null) { - for (Object key : extraHeaders.keySet()) { - String headerName = key.toString(); - String headerValue = extraHeaders.get(key).toString(); - - msg.addHeader(headerName, headerValue); - } - } - Transport.send(msg); } catch (AddressException ae) { ae.printStackTrace(System.out); @@ -283,11 +240,11 @@ private String getDatasetManageFileAccessLink(DataFile datafile){ } private String getDatasetLink(Dataset dataset){ - return systemConfig.getDataverseSiteUrl() + "/dataset.xhtml?persistentId=" + dataset.getGlobalIdString(); + return systemConfig.getDataverseSiteUrl() + "/dataset.xhtml?persistentId=" + dataset.getGlobalId().asString(); } private String getDatasetDraftLink(Dataset dataset){ - return systemConfig.getDataverseSiteUrl() + "/dataset.xhtml?persistentId=" + dataset.getGlobalIdString() + "&version=DRAFT" + "&faces-redirect=true"; + return systemConfig.getDataverseSiteUrl() + "/dataset.xhtml?persistentId=" + dataset.getGlobalId().asString() + "&version=DRAFT" + "&faces-redirect=true"; } private String getDataverseLink(Dataverse dataverse){ @@ -535,7 +492,7 @@ public String getMessageTextBasedOnNotification(UserNotification userNotificatio case STATUSUPDATED: version = (DatasetVersion) targetObject; pattern = BundleUtil.getStringFromBundle("notification.email.status.change"); - String[] paramArrayStatus = {version.getDataset().getDisplayName(), (version.getExternalStatusLabel()==null) ? "" : version.getExternalStatusLabel()}; + String[] paramArrayStatus = {version.getDataset().getDisplayName(), (version.getExternalStatusLabel()==null) ? "" : DatasetUtil.getLocaleExternalStatus(version.getExternalStatusLabel())}; messageText += MessageFormat.format(pattern, paramArrayStatus); return messageText; case CREATEACC: @@ -555,7 +512,7 @@ public String getMessageTextBasedOnNotification(UserNotification userNotificatio case CHECKSUMFAIL: dataset = (Dataset) targetObject; String checksumFailMsg = BundleUtil.getStringFromBundle("notification.checksumfail", Arrays.asList( - dataset.getGlobalIdString() + dataset.getGlobalId().asString() )); logger.fine("checksumFailMsg: " + checksumFailMsg); return messageText += checksumFailMsg; @@ -564,7 +521,7 @@ public String getMessageTextBasedOnNotification(UserNotification userNotificatio version = (DatasetVersion) targetObject; String fileImportMsg = BundleUtil.getStringFromBundle("notification.mail.import.filesystem", Arrays.asList( systemConfig.getDataverseSiteUrl(), - version.getDataset().getGlobalIdString(), + version.getDataset().getGlobalId().asString(), version.getDataset().getDisplayName() )); logger.fine("fileImportMsg: " + fileImportMsg); @@ -575,7 +532,7 @@ public String getMessageTextBasedOnNotification(UserNotification userNotificatio messageText = BundleUtil.getStringFromBundle("notification.email.greeting.html"); String uploadCompletedMessage = messageText + BundleUtil.getStringFromBundle("notification.mail.globus.upload.completed", Arrays.asList( systemConfig.getDataverseSiteUrl(), - dataset.getGlobalIdString(), + dataset.getGlobalId().asString(), dataset.getDisplayName(), comment )) ; @@ -586,7 +543,7 @@ public String getMessageTextBasedOnNotification(UserNotification userNotificatio messageText = BundleUtil.getStringFromBundle("notification.email.greeting.html"); String downloadCompletedMessage = messageText + BundleUtil.getStringFromBundle("notification.mail.globus.download.completed", Arrays.asList( systemConfig.getDataverseSiteUrl(), - dataset.getGlobalIdString(), + dataset.getGlobalId().asString(), dataset.getDisplayName(), comment )) ; @@ -596,7 +553,7 @@ public String getMessageTextBasedOnNotification(UserNotification userNotificatio messageText = BundleUtil.getStringFromBundle("notification.email.greeting.html"); String uploadCompletedWithErrorsMessage = messageText + BundleUtil.getStringFromBundle("notification.mail.globus.upload.completedWithErrors", Arrays.asList( systemConfig.getDataverseSiteUrl(), - dataset.getGlobalIdString(), + dataset.getGlobalId().asString(), dataset.getDisplayName(), comment )) ; @@ -607,7 +564,7 @@ public String getMessageTextBasedOnNotification(UserNotification userNotificatio messageText = BundleUtil.getStringFromBundle("notification.email.greeting.html"); String downloadCompletedWithErrorsMessage = messageText + BundleUtil.getStringFromBundle("notification.mail.globus.download.completedWithErrors", Arrays.asList( systemConfig.getDataverseSiteUrl(), - dataset.getGlobalIdString(), + dataset.getGlobalId().asString(), dataset.getDisplayName(), comment )) ; @@ -616,7 +573,7 @@ public String getMessageTextBasedOnNotification(UserNotification userNotificatio case CHECKSUMIMPORT: version = (DatasetVersion) targetObject; String checksumImportMsg = BundleUtil.getStringFromBundle("notification.import.checksum", Arrays.asList( - version.getDataset().getGlobalIdString(), + version.getDataset().getGlobalId().asString(), version.getDataset().getDisplayName() )); logger.fine("checksumImportMsg: " + checksumImportMsg); @@ -632,7 +589,7 @@ public String getMessageTextBasedOnNotification(UserNotification userNotificatio messageText = BundleUtil.getStringFromBundle("notification.email.greeting.html"); String ingestedCompletedMessage = messageText + BundleUtil.getStringFromBundle("notification.ingest.completed", Arrays.asList( systemConfig.getDataverseSiteUrl(), - dataset.getGlobalIdString(), + dataset.getGlobalId().asString(), dataset.getDisplayName(), systemConfig.getGuidesBaseUrl(), systemConfig.getGuidesVersion(), @@ -645,7 +602,7 @@ public String getMessageTextBasedOnNotification(UserNotification userNotificatio messageText = BundleUtil.getStringFromBundle("notification.email.greeting.html"); String ingestedCompletedWithErrorsMessage = messageText + BundleUtil.getStringFromBundle("notification.ingest.completedwitherrors", Arrays.asList( systemConfig.getDataverseSiteUrl(), - dataset.getGlobalIdString(), + dataset.getGlobalId().asString(), dataset.getDisplayName(), systemConfig.getGuidesBaseUrl(), systemConfig.getGuidesVersion(), @@ -656,7 +613,7 @@ public String getMessageTextBasedOnNotification(UserNotification userNotificatio case DATASETMENTIONED: String additionalInfo = userNotification.getAdditionalInfo(); dataset = (Dataset) targetObject; - javax.json.JsonObject citingResource = null; + JsonObject citingResource = null; citingResource = JsonUtil.getJsonObject(additionalInfo); diff --git a/src/main/java/edu/harvard/iq/dataverse/ManageFilePermissionsPage.java b/src/main/java/edu/harvard/iq/dataverse/ManageFilePermissionsPage.java index 09f067f772c..1b4af29c915 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ManageFilePermissionsPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/ManageFilePermissionsPage.java @@ -5,13 +5,13 @@ */ package edu.harvard.iq.dataverse; +import edu.harvard.iq.dataverse.api.Util; import edu.harvard.iq.dataverse.authorization.AuthenticationProvider; import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; import edu.harvard.iq.dataverse.authorization.DataverseRole; import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.authorization.RoleAssignee; import edu.harvard.iq.dataverse.authorization.RoleAssigneeDisplayInfo; -import edu.harvard.iq.dataverse.authorization.groups.Group; import edu.harvard.iq.dataverse.authorization.groups.GroupServiceBean; import edu.harvard.iq.dataverse.authorization.groups.impl.explicit.ExplicitGroupServiceBean; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; @@ -20,24 +20,22 @@ import edu.harvard.iq.dataverse.engine.command.impl.AssignRoleCommand; import edu.harvard.iq.dataverse.engine.command.impl.RevokeRoleCommand; import edu.harvard.iq.dataverse.util.BundleUtil; +import edu.harvard.iq.dataverse.util.DateUtil; import edu.harvard.iq.dataverse.util.JsfHelper; import static edu.harvard.iq.dataverse.util.JsfHelper.JH; import java.sql.Timestamp; import java.util.*; import java.util.logging.Level; import java.util.logging.Logger; -import javax.ejb.EJB; -import javax.faces.application.FacesMessage; -import javax.faces.event.ActionEvent; -import javax.faces.view.ViewScoped; -import javax.inject.Inject; -import javax.inject.Named; -import javax.persistence.EntityManager; -import javax.persistence.PersistenceContext; -import org.apache.commons.lang3.StringUtils; -import org.primefaces.event.SelectEvent; -import org.primefaces.event.ToggleSelectEvent; -import org.primefaces.event.UnselectEvent; +import jakarta.ejb.EJB; +import jakarta.faces.application.FacesMessage; +import jakarta.faces.event.ActionEvent; +import jakarta.faces.view.ViewScoped; +import jakarta.inject.Inject; +import jakarta.inject.Named; +import jakarta.persistence.EntityManager; +import jakarta.persistence.PersistenceContext; +import org.apache.commons.lang3.ObjectUtils; /** * @@ -83,7 +81,12 @@ public class ManageFilePermissionsPage implements java.io.Serializable { Dataset dataset = new Dataset(); private final TreeMap> roleAssigneeMap = new TreeMap<>(); private final TreeMap> fileMap = new TreeMap<>(); - private final TreeMap> fileAccessRequestMap = new TreeMap<>(); + + public TreeMap> getFileAccessRequestMap() { + return fileAccessRequestMap; + } + + private final TreeMap> fileAccessRequestMap = new TreeMap<>(); private boolean showDeleted = true; public boolean isShowDeleted() { @@ -110,11 +113,6 @@ public TreeMap> getFileMap() { return fileMap; } - public TreeMap> getFileAccessRequestMap() { - return fileAccessRequestMap; - } - - private boolean backingShowDeleted = true; public void showDeletedCheckboxChange() { @@ -125,7 +123,7 @@ public void showDeletedCheckboxChange() { } } - + public String init() { if (dataset.getId() != null) { dataset = datasetService.find(dataset.getId()); @@ -142,17 +140,17 @@ public String init() { initMaps(); return ""; } - + private void initMaps() { // initialize files and usergroup list roleAssigneeMap.clear(); fileMap.clear(); - fileAccessRequestMap.clear(); - + fileAccessRequestMap.clear(); + for (DataFile file : dataset.getFiles()) { - + // only include if the file is restricted (or its draft version is restricted) - //Added a null check in case there are files that have no metadata records SEK + //Added a null check in case there are files that have no metadata records SEK //for 6587 make sure that a file is in the current version befor adding to the fileMap SEK 2/11/2020 if (file.getFileMetadata() != null && (file.isRestricted() || file.getFileMetadata().isRestricted())) { //only test if file is deleted if it's restricted @@ -169,35 +167,67 @@ private void initMaps() { for (RoleAssignment ra : ras) { // for files, only show role assignments which can download if (ra.getRole().permissions().contains(Permission.DownloadFile)) { - raList.add(new RoleAssignmentRow(ra, roleAssigneeService.getRoleAssignee(ra.getAssigneeIdentifier(), true).getDisplayInfo(), fileIsDeleted)); - addFileToRoleAssignee(ra, fileIsDeleted); + raList.add(new RoleAssignmentRow(ra, roleAssigneeService.getRoleAssignee(ra.getAssigneeIdentifier(), true).getDisplayInfo(), fileIsDeleted)); + addFileToRoleAssignee(ra, fileIsDeleted); } } - + file.setDeleted(fileIsDeleted); - + fileMap.put(file, raList); - + // populate the file access requests map - for (AuthenticatedUser au : file.getFileAccessRequesters()) { - List requestedFiles = fileAccessRequestMap.get(au); - if (requestedFiles == null) { - requestedFiles = new ArrayList<>(); - AuthenticatedUser withProvider = authenticationService.getAuthenticatedUserWithProvider(au.getUserIdentifier()); - fileAccessRequestMap.put(withProvider, requestedFiles); - } - requestedFiles.add(file); + for (FileAccessRequest fileAccessRequest : file.getFileAccessRequests()) { + List requestedFiles = fileAccessRequestMap.get(fileAccessRequest.getAuthenticatedUser()); + if (requestedFiles == null) { + requestedFiles = new ArrayList<>(); + AuthenticatedUser withProvider = authenticationService.getAuthenticatedUserWithProvider(fileAccessRequest.getAuthenticatedUser().getUserIdentifier()); + fileAccessRequestMap.put(withProvider, requestedFiles); + } + requestedFiles.add(fileAccessRequest); } - } + } } - } - + public String getAuthProviderFriendlyName(String authProviderId){ - return AuthenticationProvider.getFriendlyName(authProviderId); } - + + Date getAccessRequestDate(List fileAccessRequests){ + if (fileAccessRequests == null) { + return null; + } + + // find the oldest date in the list of available and return a formatted date, or null if no dates were found + return fileAccessRequests.stream() + .filter(fileAccessRequest -> fileAccessRequest.getCreationTime() != null) + .min((a, b) -> ObjectUtils.compare(a.getCreationTime(), b.getCreationTime(), true)) + .map(FileAccessRequest::getCreationTime) + .orElse(null); + } + + public String formatAccessRequestDate(List fileAccessRequests){ + Date date = getAccessRequestDate(fileAccessRequests); + + if (date == null) { + return null; + } + + return DateUtil.formatDate(date); + } + + + public String formatAccessRequestTimestamp(List fileAccessRequests){ + Date date = getAccessRequestDate(fileAccessRequests); + + if (date == null) { + return null; + } + + return Util.getDateTimeFormat().format(date); + } + private void addFileToRoleAssignee(RoleAssignment assignment, boolean fileDeleted) { RoleAssignee ra = roleAssigneeService.getRoleAssignee(assignment.getAssigneeIdentifier()); List assignments = roleAssigneeMap.get(ra); @@ -354,7 +384,10 @@ public void initAssignDialogForFileRequester(AuthenticatedUser au) { fileRequester = au; selectedRoleAssignees = null; selectedFiles.clear(); - selectedFiles.addAll(fileAccessRequestMap.get(au)); + + for (FileAccessRequest fileAccessRequest : fileAccessRequestMap.get(au)) { + selectedFiles.add(fileAccessRequest.getDataFile()); + } showUserGroupMessages(); } @@ -374,20 +407,19 @@ public void grantAccess(ActionEvent evt) { sendNotification = true; } // remove request, if it exist - if (file.getFileAccessRequesters().remove(roleAssignee)) { + if (file.removeFileAccessRequester(roleAssignee)) { datafileService.save(file); - } - } - + } + } } if (sendNotification) { for (AuthenticatedUser au : roleAssigneeService.getExplicitUsers(roleAssignee)) { - userNotificationService.sendNotification(au, new Timestamp(new Date().getTime()), UserNotification.Type.GRANTFILEACCESS, dataset.getId()); + userNotificationService.sendNotification(au, new Timestamp(new Date().getTime()), UserNotification.Type.GRANTFILEACCESS, dataset.getId()); } } } - + initMaps(); } @@ -396,23 +428,31 @@ public void grantAccessToRequests(AuthenticatedUser au) { } public void grantAccessToAllRequests(AuthenticatedUser au) { - grantAccessToRequests(au, fileAccessRequestMap.get(au)); - } + List files = new ArrayList<>(); + + for (FileAccessRequest fileAccessRequest : fileAccessRequestMap.get(au)) { + files.add(fileAccessRequest.getDataFile()); + } + + grantAccessToRequests(au, files); + } private void grantAccessToRequests(AuthenticatedUser au, List files) { boolean actionPerformed = false; // Find the built in file downloader role (currently by alias) DataverseRole fileDownloaderRole = roleService.findBuiltinRoleByAlias(DataverseRole.FILE_DOWNLOADER); for (DataFile file : files) { - if (assignRole(au, file, fileDownloaderRole)) { - file.getFileAccessRequesters().remove(au); - datafileService.save(file); + if (assignRole(au, file, fileDownloaderRole)) { + if (file.removeFileAccessRequester(au)) { + datafileService.save(file); + } actionPerformed = true; } } + if (actionPerformed) { JsfHelper.addSuccessMessage(BundleUtil.getStringFromBundle("permission.fileAccessGranted", Arrays.asList(au.getDisplayInfo().getTitle()))); - userNotificationService.sendNotification(au, new Timestamp(new Date().getTime()), UserNotification.Type.GRANTFILEACCESS, dataset.getId()); + userNotificationService.sendNotification(au, new Timestamp(new Date().getTime()), UserNotification.Type.GRANTFILEACCESS, dataset.getId()); initMaps(); } @@ -423,24 +463,29 @@ public void rejectAccessToRequests(AuthenticatedUser au) { } public void rejectAccessToAllRequests(AuthenticatedUser au) { - rejectAccessToRequests(au, fileAccessRequestMap.get(au)); - } + List files = new ArrayList<>(); + + for (FileAccessRequest fileAccessRequest : fileAccessRequestMap.get(au)) { + files.add(fileAccessRequest.getDataFile()); + } + + rejectAccessToRequests(au, files); + } private void rejectAccessToRequests(AuthenticatedUser au, List files) { - boolean actionPerformed = false; - for (DataFile file : files) { - file.getFileAccessRequesters().remove(au); + boolean actionPerformed = false; + for (DataFile file : files) { + file.removeFileAccessRequester(au); datafileService.save(file); actionPerformed = true; } - if (actionPerformed) { JsfHelper.addSuccessMessage(BundleUtil.getStringFromBundle("permission.fileAccessRejected", Arrays.asList(au.getDisplayInfo().getTitle()))); - userNotificationService.sendNotification(au, new Timestamp(new Date().getTime()), UserNotification.Type.REJECTFILEACCESS, dataset.getId()); + userNotificationService.sendNotification(au, new Timestamp(new Date().getTime()), UserNotification.Type.REJECTFILEACCESS, dataset.getId()); initMaps(); } - } + } private boolean assignRole(RoleAssignee ra, DataFile file, DataverseRole r) { try { diff --git a/src/main/java/edu/harvard/iq/dataverse/ManageGroupsPage.java b/src/main/java/edu/harvard/iq/dataverse/ManageGroupsPage.java index 8513ca33b47..583e195ab0d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ManageGroupsPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/ManageGroupsPage.java @@ -22,17 +22,17 @@ import java.util.logging.Level; import java.util.logging.Logger; import java.util.regex.Pattern; -import javax.ejb.EJB; -import javax.faces.application.FacesMessage; -import javax.faces.component.UIComponent; -import javax.faces.component.UIInput; -import javax.faces.context.FacesContext; -import javax.faces.event.ActionEvent; -import javax.faces.view.ViewScoped; -import javax.inject.Inject; -import javax.inject.Named; -import javax.persistence.EntityManager; -import javax.persistence.PersistenceContext; +import jakarta.ejb.EJB; +import jakarta.faces.application.FacesMessage; +import jakarta.faces.component.UIComponent; +import jakarta.faces.component.UIInput; +import jakarta.faces.context.FacesContext; +import jakarta.faces.event.ActionEvent; +import jakarta.faces.view.ViewScoped; +import jakarta.inject.Inject; +import jakarta.inject.Named; +import jakarta.persistence.EntityManager; +import jakarta.persistence.PersistenceContext; import org.apache.commons.lang3.StringUtils; diff --git a/src/main/java/edu/harvard/iq/dataverse/ManageGuestbooksPage.java b/src/main/java/edu/harvard/iq/dataverse/ManageGuestbooksPage.java index 7db0ecc0767..cc89cfd9d56 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ManageGuestbooksPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/ManageGuestbooksPage.java @@ -11,17 +11,19 @@ import java.util.List; import java.util.logging.Level; import java.util.logging.Logger; -import javax.ejb.EJB; -import javax.faces.application.FacesMessage; -import javax.faces.context.FacesContext; -import javax.faces.event.ActionEvent; -import javax.faces.view.ViewScoped; -import javax.inject.Inject; -import javax.inject.Named; -import javax.persistence.EntityManager; -import javax.persistence.PersistenceContext; -import javax.servlet.ServletOutputStream; -import javax.servlet.http.HttpServletResponse; +import jakarta.ejb.EJB; +import jakarta.faces.application.FacesMessage; +import jakarta.faces.context.FacesContext; +import jakarta.faces.event.AbortProcessingException; +import jakarta.faces.event.ActionEvent; +import jakarta.faces.event.AjaxBehaviorEvent; +import jakarta.faces.view.ViewScoped; +import jakarta.inject.Inject; +import jakarta.inject.Named; +import jakarta.persistence.EntityManager; +import jakarta.persistence.PersistenceContext; +import jakarta.servlet.ServletOutputStream; +import jakarta.servlet.http.HttpServletResponse; /** * @@ -325,7 +327,7 @@ public void setDisplayDownloadAll(boolean displayDownloadAll) { this.displayDownloadAll = displayDownloadAll; } - public String updateGuestbooksRoot(javax.faces.event.AjaxBehaviorEvent event) throws javax.faces.event.AbortProcessingException { + public String updateGuestbooksRoot(AjaxBehaviorEvent event) throws AbortProcessingException { try { dataverse = engineService.submit( new UpdateDataverseGuestbookRootCommand(!isInheritGuestbooksValue(), diff --git a/src/main/java/edu/harvard/iq/dataverse/ManagePermissionsPage.java b/src/main/java/edu/harvard/iq/dataverse/ManagePermissionsPage.java index e71e04bc42f..bf78b9d088f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ManagePermissionsPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/ManagePermissionsPage.java @@ -29,18 +29,17 @@ import java.util.Date; import java.util.LinkedList; import java.util.List; -import java.util.ResourceBundle; import java.util.Set; import java.util.logging.Level; import java.util.logging.Logger; -import javax.ejb.EJB; -import javax.faces.application.FacesMessage; -import javax.faces.event.ActionEvent; -import javax.faces.view.ViewScoped; -import javax.inject.Inject; -import javax.inject.Named; -import javax.persistence.EntityManager; -import javax.persistence.PersistenceContext; +import jakarta.ejb.EJB; +import jakarta.faces.application.FacesMessage; +import jakarta.faces.event.ActionEvent; +import jakarta.faces.view.ViewScoped; +import jakarta.inject.Inject; +import jakarta.inject.Named; +import jakarta.persistence.EntityManager; +import jakarta.persistence.PersistenceContext; import org.apache.commons.text.StringEscapeUtils; /** diff --git a/src/main/java/edu/harvard/iq/dataverse/ManageTemplatesPage.java b/src/main/java/edu/harvard/iq/dataverse/ManageTemplatesPage.java index 4578a01e693..98369a2eab3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ManageTemplatesPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/ManageTemplatesPage.java @@ -14,15 +14,17 @@ import java.util.List; import java.util.logging.Level; import java.util.logging.Logger; -import javax.ejb.EJB; -import javax.faces.application.FacesMessage; -import javax.faces.event.ActionEvent; -import javax.faces.view.ViewScoped; -import javax.inject.Inject; -import javax.inject.Named; -import javax.persistence.EntityManager; -import javax.persistence.PersistenceContext; +import jakarta.ejb.EJB; +import jakarta.faces.application.FacesMessage; +import jakarta.faces.event.ActionEvent; +import jakarta.faces.view.ViewScoped; +import jakarta.inject.Inject; +import jakarta.inject.Named; +import jakarta.persistence.EntityManager; +import jakarta.persistence.PersistenceContext; import edu.harvard.iq.dataverse.util.BundleUtil; +import jakarta.faces.event.AbortProcessingException; +import jakarta.faces.event.AjaxBehaviorEvent; /** * * @author skraffmiller @@ -60,6 +62,9 @@ public class ManageTemplatesPage implements java.io.Serializable { @Inject LicenseServiceBean licenseServiceBean; + + @Inject + SettingsWrapper settingsWrapper; private List