diff --git a/doc/sphinx-guides/source/_static/util/robots.txt b/doc/sphinx-guides/source/_static/util/robots.txt new file mode 100644 index 00000000000..ee906363016 --- /dev/null +++ b/doc/sphinx-guides/source/_static/util/robots.txt @@ -0,0 +1,6 @@ +User-agent: * +Allow: /$ +Allow: /dataverse.xhtml +Allow: /dataset.xhtml +Disallow: / +Crawl-delay: 20 diff --git a/doc/sphinx-guides/source/admin/geoconnect-worldmap.rst b/doc/sphinx-guides/source/admin/geoconnect-worldmap.rst index 21fa4a00a1e..b12f0b26fe8 100644 --- a/doc/sphinx-guides/source/admin/geoconnect-worldmap.rst +++ b/doc/sphinx-guides/source/admin/geoconnect-worldmap.rst @@ -3,7 +3,33 @@ Geoconnect and WorldMap .. contents:: :local: -One of the optional components listed under "Architecture and Components" in the :doc:`/installation/prep` section of the Installation Guide is `Geoconnect `_, piece of middleware that allows Dataverse users to create maps in `WorldMap `_ based on geospatial data stored in Dataverse. For more details on the feature from the user perspective, see the :doc:`/user/data-exploration/worldmap` section of the User Guide. +One of the optional components listed under "Architecture and Components" in the :doc:`/installation/prep` section of the Installation Guide is `Geoconnect `_, a piece of middleware that allows Dataverse users to create maps in `WorldMap `_ based on geospatial data stored in Dataverse. For more details on the feature from the user perspective, see the :doc:`/user/data-exploration/worldmap` section of the User Guide. + +Update "mapitlink" +------------------ + +SQL commands to point a Dataverse installation at different Geoconnect servers: + + +**Geoconnect Production** *geoconnect.datascience.iq.harvard.edu* + +.. code-block:: sql + + update worldmapauth_tokentype set mapitlink = 'https://geoconnect.datascience.iq.harvard.edu/shapefile/map-it', hostname='geoconnect.datascience.iq.harvard.edu' where name = 'GEOCONNECT'; + +**Heroku Test** *geoconnect-dev.herokuapp.com* + +.. code-block:: sql + + update worldmapauth_tokentype set mapitlink = 'https://geoconnect-dev.herokuapp.com/shapefile/map-it', hostname='geoconnect-dev.herokuapp.com' where name = 'GEOCONNECT'; + + +**View Current Settings** + +.. code-block:: sql + + SELECT * from worldmapauth_tokentype; + Removing Dead Explore Links --------------------------- diff --git a/doc/sphinx-guides/source/developers/dev-environment.rst b/doc/sphinx-guides/source/developers/dev-environment.rst index cca57dc4497..53317e4c459 100755 --- a/doc/sphinx-guides/source/developers/dev-environment.rst +++ b/doc/sphinx-guides/source/developers/dev-environment.rst @@ -74,11 +74,11 @@ Additional Tools Please see also the :doc:`/developers/tools` page, which lists additional tools that very useful but not essential. -Setting up your dev environment +Setting Up Your Dev Environment ------------------------------- -SSH keys -~~~~~~~~ +Set Up SSH Keys +~~~~~~~~~~~~~~~ You can use git with passwords over HTTPS, but it's much nicer to set up SSH keys. https://github.com/settings/ssh is the place to manage the ssh keys GitHub knows about for you. That page also links to a nice howto: https://help.github.com/articles/generating-ssh-keys @@ -135,7 +135,7 @@ Once Solr is up and running you should be able to see a "Solr Admin" dashboard a Once some dataverses, datasets, and files have been created and indexed, you can experiment with searches directly from Solr at http://localhost:8983/solr/#/collection1/query and look at the JSON output of searches, such as this wildcard search: http://localhost:8983/solr/collection1/select?q=*%3A*&wt=json&indent=true . You can also get JSON output of static fields Solr knows about: http://localhost:8983/solr/schema/fields -Run installer +Run Installer ~~~~~~~~~~~~~ Once you install Glassfish and PostgreSQL, you need to configure the environment for the Dataverse app - configure the database connection, set some options, etc. We have a new installer script that should do it all for you. Again, assuming that the clone on the Dataverse repository was retrieved using NetBeans and that it is saved in the path ~/NetBeansProjects: @@ -150,10 +150,26 @@ The script is a variation of the old installer from DVN 3.x that calls another s All the future changes to the configuration that are Glassfish-specific and can be done through ``asadmin`` should now go into ``scripts/install/glassfish-setup.sh``. +Rebuilding Your Dev Environment +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If you have an old copy of the database and old Solr data and want to start fresh, here are the recommended steps: + +- drop your old database +- clear out your existing Solr index: ``scripts/search/clear`` +- run the installer script above - it will create the db, deploy the app, populate the db with reference data and run all the scripts that create the domain metadata fields. You no longer need to perform these steps separately. +- confirm you are using the latest Dataverse-specific Solr schema.xml per the "Installing and Running Solr" section of this guide +- confirm http://localhost:8080 is up +- If you want to set some dataset-specific facets, go to the root dataverse (or any dataverse; the selections can be inherited) and click "General Information" and make choices under "Select Facets". There is a ticket to automate this: https://github.com/IQSS/dataverse/issues/619 + +You may also find https://github.com/IQSS/dataverse/blob/develop/scripts/deploy/phoenix.dataverse.org/deploy and related scripts interesting because they demonstrate how we have at least partially automated the process of tearing down a Dataverse installation and having it rise again, hence the name "phoenix." See also "Fresh Reinstall" in the :doc:`/installation/installation-main` section of the Installation Guide. + Shibboleth and OAuth -------------------- -If you are working on anything related to users, please keep in mind that your changes will likely affect Shibboleth and OAuth users. Rather than setting up Shibboleth on your laptop, developers are advised to simply add a value to their database to enable Shibboleth "dev mode" like this: +If you are working on anything related to users, please keep in mind that your changes will likely affect Shibboleth and OAuth users. For some background on user accounts in Dataverse, see "Auth Modes: Local vs. Remote vs. Both" in the :doc:`/installation/config` section of the Installation Guide. + +Rather than setting up Shibboleth on your laptop, developers are advised to simply add a value to their database to enable Shibboleth "dev mode" like this: ``curl http://localhost:8080/api/admin/settings/:DebugShibAccountType -X PUT -d RANDOM`` @@ -171,14 +187,9 @@ For a list of possible values, please "find usages" on the settings key above an Now when you go to http://localhost:8080/oauth2/firstLogin.xhtml you should be prompted to create a Shibboleth account. -Rebuilding your dev environment -------------------------------- +Geoconnect +---------- -If you have an old copy of the database and old Solr data and want to start fresh, here are the recommended steps: +Geoconnect works as a middle layer, allowing geospatial data files in Dataverse to be visualized with Harvard WorldMap. To set up a Geoconnect development environment, you can follow the steps outlined in the `local_setup.md `_ guide. You will need Python and a few other prerequisites. -- drop your old database -- clear out your existing Solr index: ``scripts/search/clear`` -- run the installer script above - it will create the db, deploy the app, populate the db with reference data and run all the scripts that create the domain metadata fields. You no longer need to perform these steps separately. -- confirm you are using the latest Dataverse-specific Solr schema.xml per the "Installing and Running Solr" section of this guide -- confirm http://localhost:8080 is up -- If you want to set some dataset-specific facets, go to the root dataverse (or any dataverse; the selections can be inherited) and click "General Information" and make choices under "Select Facets". There is a ticket to automate this: https://github.com/IQSS/dataverse/issues/619 +As mentioned under "Architecture and Components" in the :doc:`/installation/prep` section of the Installation Guide, Geoconnect is an optional component of Dataverse, so this section is only necessary to follow it you are working on an issue related to this feature. diff --git a/doc/sphinx-guides/source/developers/geospatial.rst b/doc/sphinx-guides/source/developers/geospatial.rst new file mode 100644 index 00000000000..55fbf9e6808 --- /dev/null +++ b/doc/sphinx-guides/source/developers/geospatial.rst @@ -0,0 +1,174 @@ +=============== +Geospatial Data +=============== + +How Dataverse Ingests Shapefiles +-------------------------------- + +A shapefile is a set of files, often uploaded/transferred in ``.zip`` format. This set may contain up to fifteen files. A minimum of three specific files (``.shp``, ``.shx``, ``.dbf``) are needed to be a valid shapefile and a fourth file (``.prj``) is required for WorldMap -- or any type of meaningful visualization. + +For ingest and connecting to WorldMap, four files are the minimum required: + +- ``.shp`` - shape format; the feature geometry itself +- ``.shx`` - shape index format; a positional index of the feature geometry to allow seeking forwards and backwards quickly +- ``.dbf`` - attribute format; columnar attributes for each shape, in dBase IV format +- ``.prj`` - projection format; the coordinate system and projection information, a plain text file describing the projection using well-known text format + +Ingest +~~~~~~ + +When uploaded to Dataverse, the ``.zip`` is unpacked (same as all ``.zip`` files). Shapefile sets are recognized by the same base name and specific extensions. These individual files constitute a shapefile set. The first four are the minimum required (``.shp``, ``.shx``, ``.dbf``, ``.prj``) + +For example: + +- bicycles.shp (required extension) +- bicycles.shx (required extension) +- bicycles.prj (required extension) +- bicycles.dbf (required extension) +- bicycles.sbx (NOT required extension) +- bicycles.sbn (NOT required extension) + +Upon recognition of the four required files, Dataverse will group them as well as any other relevant files into a shapefile set. Files with these extensions will be included in the shapefile set: + +- Required: ``.shp``, ``.shx``, ``.dbf``, ``.prj`` +- Optional: ``.sbn``, ``.sbx``, ``.fbn``, ``.fbx``, ``.ain``, ``.aih``, ``.ixs``, ``.mxs``, ``.atx``, ``.cpg``, ``shp.xml`` + +Then Dataverse creates a new ``.zip`` with mimetype as a shapefile. The shapefile set will persist as this new ``.zip``. + +Example +~~~~~~~ + +**1a.** Original ``.zip`` contents: + +A file named ``bikes_and_subways.zip`` is uploaded to the Dataverse. This ``.zip`` contains the following files. + +- ``bicycles.shp`` (shapefile set #1) +- ``bicycles.shx`` (shapefile set #1) +- ``bicycles.prj`` (shapefile set #1) +- ``bicycles.dbf`` (shapefile set #1) +- ``bicycles.sbx`` (shapefile set #1) +- ``bicycles.sbn`` (shapefile set #1) +- ``bicycles.txt`` +- ``the_bikes.md`` +- ``readme.txt`` +- ``subway_line.shp`` (shapefile set #2) +- ``subway_line.shx`` (shapefile set #2) +- ``subway_line.prj`` (shapefile set #2) +- ``subway_line.dbf`` (shapefile set #2) + +**1b.** Dataverse unzips and re-zips files: + +Upon ingest, Dataverse unpacks the file ``bikes_and_subways.zip``. Upon recognizing the shapefile sets, it groups those files together into new ``.zip`` files: + +- files making up the "bicycles" shapefile become a new ``.zip`` +- files making up the "subway_line" shapefile become a new ``.zip`` +- remaining files will stay as they are + +To ensure that a shapefile set remains intact, individual files such as ``bicycles.sbn`` are kept in the set -- even though they are not used for mapping. + +**1c.** Dataverse final file listing: + +- ``bicycles.zip`` (contains shapefile set #1: ``bicycles.shp``, ``bicycles.shx``, ``bicycles.prj``, ``bicycles.dbf``, ``bicycles.sbx``, ``bicycles.sbn``) +- ``bicycles.txt`` (separate, not part of a shapefile set) +- ``the_bikes.md`` (separate, not part of a shapefile set) +- ``readme.txt`` (separate, not part of a shapefile set) +- ``subway_line.zip`` (contains shapefile set #2: ``subway_line.shp``, ``subway_line.shx``, ``subway_line.prj``, ``subway_line.dbf``) + +For two "final" shapefile sets, ``bicycles.zip`` and ``subway_line.zip``, a new mimetype is used: + +- Mimetype: ``application/zipped-shapefile`` +- Mimetype Label: "Shapefile as ZIP Archive" + +WorldMap JoinTargets + API Endpoint +----------------------------------- + +WorldMap supplies target layers -- or JoinTargets -- that a tabular file may be mapped against. A JSON description of these `CGA `_-curated JoinTargets may be retrieved via API at ``http://worldmap.harvard.edu/datatables/api/jointargets/``. Please note: login is required. You may use any WorldMap account credentials via HTTP Basic Auth. + +Example of JoinTarget information returned via the API: + +.. code-block:: json + + { + "data":[ + { + "layer":"geonode:census_tracts_2010_boston_6f6", + "name":"Census Tracts, Boston (GEOID10: State+County+Tract)", + "geocode_type_slug":"us-census-tract", + "geocode_type":"US Census Tract", + "attribute":{ + "attribute":"CT_ID_10", + "type":"xsd:string" + }, + "abstract":"As of the 2010 census, Boston, MA contains 7,288 city blocks [truncated for example]", + "title":"Census Tracts 2010, Boston (BARI)", + "expected_format":{ + "expected_zero_padded_length":-1, + "is_zero_padded":false, + "description":"Concatenation of state, county and tract for 2010 Census Tracts. Reference: https://www.census.gov/geo/maps-data/data/tract_rel_layout.html\r\n\r\nNote: Across the US, this can be a zero-padded \"string\" but the original Boston layer has this column as \"numeric\" ", + "name":"2010 Census Boston GEOID10 (State+County+Tract)" + }, + "year":2010, + "id":28 + }, + { + "layer":"geonode:addresses_2014_boston_1wr", + "name":"Addresses, Boston", + "geocode_type_slug":"boston-administrative-geography", + "geocode_type":"Boston, Administrative Geography", + "attribute":{ + "attribute":"LocationID", + "type":"xsd:int" + }, + "abstract":"Unique addresses present in the parcels data set, which itself is derived from [truncated for example]", + "title":"Addresses 2015, Boston (BARI)", + "expected_format":{ + "expected_zero_padded_length":-1, + "is_zero_padded":false, + "description":"Boston, Administrative Geography, Boston Address Location ID. Example: 1, 2, 3...nearly 120000", + "name":"Boston Address Location ID (integer)" + }, + "year":2015, + "id":18 + }, + { + "layer":"geonode:bra_neighborhood_statistical_areas_2012__ug9", + "name":"BRA Neighborhood Statistical Areas, Boston", + "geocode_type_slug":"boston-administrative-geography", + "geocode_type":"Boston, Administrative Geography", + "attribute":{ + "attribute":"BOSNA_R_ID", + "type":"xsd:double" + }, + "abstract":"BRA Neighborhood Statistical Areas 2015, Boston. Provided by [truncated for example]", + "title":"BRA Neighborhood Statistical Areas 2015, Boston (BARI)", + "expected_format":{ + "expected_zero_padded_length":-1, + "is_zero_padded":false, + "description":"Boston, Administrative Geography, Boston BRA Neighborhood Statistical Area ID (integer). Examples: 1, 2, 3, ... 68, 69", + "name":"Boston BRA Neighborhood Statistical Area ID (integer)" + }, + "year":2015, + "id":17 + } + ], + "success":true + } + +How Geoconnect Uses Join Target Information +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +When a user attempts to map a tabular file, the application looks in the Geoconnect database for ``JoinTargetInformation``. If this information is more than 10 minutes* old, the application will retrieve fresh information and save it to the db. + +(* Change the timing via the Django settings variable ``JOIN_TARGET_UPDATE_TIME``.) + +This JoinTarget info is used to populate HTML forms used to match a tabular file column to a JoinTarget column. Once a JoinTarget is chosen, the JoinTarget ID is an essential piece of information used to make an API call to the WorldMap and attempt to map the file. + +Retrieving Join Target Information from WorldMap API +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The ``get_join_targets()`` function in ``dataverse_layer_services.py`` uses the WorldMap API, retrieves a list of available tabular file JointTargets. (See the `dataverse_layer_services code in GitHub `_.) + +Saving Join Target Information to Geoconnect Database +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The ``get_latest_jointarget_information()`` in ``utils.py`` retrieves recent JoinTarget Information from the database. (See the `utils code in GitHub `_.) \ No newline at end of file diff --git a/doc/sphinx-guides/source/developers/index.rst b/doc/sphinx-guides/source/developers/index.rst index f16953a3265..f37afbbd173 100755 --- a/doc/sphinx-guides/source/developers/index.rst +++ b/doc/sphinx-guides/source/developers/index.rst @@ -20,4 +20,5 @@ Contents: making-releases tools unf/index + geospatial selinux diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 7c95b3453fd..28b6920a168 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -85,6 +85,8 @@ Even if you have no interest in Shibboleth nor TwoRavens, you may want to front Still not convinced you should put Glassfish behind another web server? Even if you manage to get your SSL certificate into Glassfish, how are you going to run Glassfish on low ports such as 80 and 443? Are you going to run Glassfish as root? Bad idea. This is a security risk. Under "Additional Recommendations" under "Securing Your Installation" above you are advised to configure Glassfish to run as a user other than root. (The Dataverse team will close https://github.com/IQSS/dataverse/issues/1934 after updating the Glassfish init script provided in the :doc:`prerequisites` section to not require root.) +There's also the issue of serving a production-ready version of robots.txt. By using a proxy such as Apache, this is a one-time "set it and forget it" step as explained below in the "Going Live" section. + If you are convinced you'd like to try fronting Glassfish with Apache, the :doc:`shibboleth` section should be good resource for you. If you really don't want to front Glassfish with any proxy (not recommended), you can configure Glassfish to run HTTPS on port 443 like this: @@ -149,6 +151,42 @@ Enabling a second authentication provider will result in the Log In page showing - ``:AllowSignUp`` is set to "false" per the :doc:`config` section to prevent users from creating local accounts via the web interface. Please note that local accounts can also be created via API, and the way to prevent this is to block the ``builtin-users`` endpoint or scramble (or remove) the ``BuiltinUsers.KEY`` database setting per the :doc:`config` section. - The "builtin" authentication provider has been disabled. Note that disabling the builting auth provider means that the API endpoint for converting an account from a remote auth provider will not work. This is the main reason why https://github.com/IQSS/dataverse/issues/2974 is still open. Converting directly from one remote authentication provider to another (i.e. from GitHub to Google) is not supported. Conversion from remote is always to builtin. Then the user initiates a conversion from builtin to remote. Note that longer term, the plan is to permit multiple login options to the same Dataverse account per https://github.com/IQSS/dataverse/issues/3487 (so all this talk of conversion will be moot) but for now users can only use a single login option, as explained in the :doc:`/user/account` section of the User Guide. In short, "remote only" might work for you if you only plan to use a single remote authentication provider such that no conversion between remote authentication providers will be necessary. +Going Live: Launching Your Production Deployment +------------------------------------------------ + +This guide has attempted to take you from kicking the tires on Dataverse to finalizing your installation before letting real users in. In theory, all this work could be done on a single server but better would be to have separate staging and production environments so that you can deploy upgrades to staging before deploying to production. This "Going Live" section is about launching your **production** environment. + +Before going live with your installation of Dataverse, you must take the steps above under "Securing Your Installation" and you should at least review the various configuration options listed below. An attempt has been made to put the more commonly-configured options earlier in the list. + +Out of the box, Dataverse attempts to block search engines from crawling your installation of Dataverse so that test datasets do not appear in search results until you're ready. + +Letting Search Engines Crawl Your Installation +++++++++++++++++++++++++++++++++++++++++++++++ + +For a public production Dataverse installation, it is probably desired that search agents be able to index published pages (aka - pages that are visible to an unauthenticated user). +Polite crawlers usually respect the `Robots Exclusion Standard `_; we have provided an example of a production robots.txt :download:`here `). + +You have a couple of options for putting an updated robots.txt file into production. If you are fronting Glassfish with Apache as recommended above, you can place robots.txt in the root of the directory specified in your ``VirtualHost`` and to your Apache config a ``ProxyPassMatch`` line like the one below to prevent Glassfish from serving the version of robots.txt that embedded in the Dataverse war file: + +.. code-block:: text + + # don't let Glassfish serve its version of robots.txt + ProxyPassMatch ^/robots.txt$ ! + +For more of an explanation of ``ProxyPassMatch`` see the :doc:`shibboleth` section. + +If you are not fronting Glassfish with Apache you'll need to prevent Glassfish from serving the robots.txt file embedded in the war file by overwriting robots.txt after the war file has been deployed. The downside of this technique is that you will have to remember to overwrite robots.txt in the "exploded" war file each time you deploy the war file, which probably means each time you upgrade to a new version of Dataverse. Furthermore, since the version of Dataverse is always incrementing and the version can be part of the file path, you will need to be conscious of where on disk you need to replace the file. For example, for Dataverse 4.6.1 the path to robots.txt may be ``/usr/local/glassfish4/glassfish/domains/domain1/applications/dataverse-4.6.1/robots.txt`` with the version number ``4.6.1`` as part of the path. + +Putting Your Dataverse Installation on the Map at dataverse.org ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +Congratulations! You've gone live! It's time to announce your new data respository to the world! You are also welcome to contact support@dataverse.org to have the Dataverse team add your installation to the map at http://dataverse.org . Thank you for installing Datavese! + +Administration of Your Dataverse Installation ++++++++++++++++++++++++++++++++++++++++++++++ + +Now that you're live you'll want to review the :doc:`/admin/index`. Please note that there is also an :doc:`administration` section of this Installation Guide that will be moved to the newer Admin Guide in the future. + JVM Options ----------- diff --git a/doc/sphinx-guides/source/installation/geoconnect.rst b/doc/sphinx-guides/source/installation/geoconnect.rst new file mode 100644 index 00000000000..5ac5f360f73 --- /dev/null +++ b/doc/sphinx-guides/source/installation/geoconnect.rst @@ -0,0 +1,14 @@ +Geoconnect +========== + +Geoconnect works as a middle layer, allowing geospatial data files in Dataverse to be visualized with Harvard WorldMap. + +To understand the feature from the user perspective, see the :doc:`/user/data-exploration/worldmap` section of the User Guide. + +As of this writing, the README at https://github.com/IQSS/geoconnect recommends not installing Geoconnect at this time due to an ongoing rewrite of the WorldMap code. If you are not deterred by this, read on! + +To set up a Geoconnect development environment, you can follow the steps outlined in the `local_setup.md `_ guide. Although those instructions are for a local development setup, they may assist in installing Geoconnect in your production environment. See also "Geoconnect" under the :doc:`/developers/dev-environment` section of the Developer Guide. + +Harvard Dataverse runs Geoconnect on Heroku. To make use of Heroku, you will need a Heroku account, as well as a few other prerequisites. Follow the instructions outlined in the `heroku_setup.md `_ guide. The `heroku.py `_ settings file may also be adapted for other environments. Please note, for the production environment, remember to set ``DEBUG=False``. + +See also the :doc:`/admin/geoconnect-worldmap` section of the Admin Guide. diff --git a/doc/sphinx-guides/source/installation/index.rst b/doc/sphinx-guides/source/installation/index.rst index 5004093090a..469bb75a481 100755 --- a/doc/sphinx-guides/source/installation/index.rst +++ b/doc/sphinx-guides/source/installation/index.rst @@ -18,5 +18,6 @@ Contents: administration upgrading r-rapache-tworavens + geoconnect shibboleth oauth2 diff --git a/doc/sphinx-guides/source/installation/installation-main.rst b/doc/sphinx-guides/source/installation/installation-main.rst index 85551366502..e5c002c62ab 100755 --- a/doc/sphinx-guides/source/installation/installation-main.rst +++ b/doc/sphinx-guides/source/installation/installation-main.rst @@ -154,4 +154,4 @@ Rerun Installer With all the data cleared out, you should be ready to rerun the installer per above. -Related to all this is a series of scripts at https://github.com/IQSS/dataverse/blob/develop/scripts/deploy/phoenix.dataverse.org/deploy that Dataverse developers use have the test server http://phoenix.dataverse.org rise from the ashes before integration tests are run against it. Your mileage may vary. :) +Related to all this is a series of scripts at https://github.com/IQSS/dataverse/blob/develop/scripts/deploy/phoenix.dataverse.org/deploy that Dataverse developers use have the test server http://phoenix.dataverse.org rise from the ashes before integration tests are run against it. Your mileage may vary. :) For more on this topic, see "Rebuilding Your Dev Environment" in the :doc:`/developers/dev-environment` section of the Developer Guide. diff --git a/doc/sphinx-guides/source/installation/prep.rst b/doc/sphinx-guides/source/installation/prep.rst index e33d9bfae51..d6d531cdfaf 100644 --- a/doc/sphinx-guides/source/installation/prep.rst +++ b/doc/sphinx-guides/source/installation/prep.rst @@ -65,7 +65,7 @@ There are a number of optional components you may choose to install or configure - Apache: a web server that can "reverse proxy" Glassfish applications and rewrite HTTP traffic. - Shibboleth: an authentication system described in :doc:`shibboleth`. Its use with Dataverse requires Apache. - OAuth2: an authentication system described in :doc:`oauth2`. -- Geoconnect: :doc:`/user/data-exploration/worldmap` describes the feature and the code can be downloaded from https://github.com/IQSS/geoconnect +- Geoconnect: a system that allows users to create maps from geospatial files, described in :doc:`geoconnect`. System Requirements ------------------- diff --git a/doc/sphinx-guides/source/user/data-exploration/img/geoconnect-tabular-1.png b/doc/sphinx-guides/source/user/data-exploration/img/geoconnect-tabular-1.png new file mode 100644 index 00000000000..33198c3c0dc Binary files /dev/null and b/doc/sphinx-guides/source/user/data-exploration/img/geoconnect-tabular-1.png differ diff --git a/doc/sphinx-guides/source/user/data-exploration/img/geoconnect-tabular-2.png b/doc/sphinx-guides/source/user/data-exploration/img/geoconnect-tabular-2.png new file mode 100644 index 00000000000..f2a0b0fdf42 Binary files /dev/null and b/doc/sphinx-guides/source/user/data-exploration/img/geoconnect-tabular-2.png differ diff --git a/doc/sphinx-guides/source/user/data-exploration/img/geoconnect-tabular-3.png b/doc/sphinx-guides/source/user/data-exploration/img/geoconnect-tabular-3.png new file mode 100644 index 00000000000..55b4ff9c918 Binary files /dev/null and b/doc/sphinx-guides/source/user/data-exploration/img/geoconnect-tabular-3.png differ diff --git a/doc/sphinx-guides/source/user/data-exploration/img/geoconnect-tabular-4.png b/doc/sphinx-guides/source/user/data-exploration/img/geoconnect-tabular-4.png new file mode 100644 index 00000000000..fc9664db94d Binary files /dev/null and b/doc/sphinx-guides/source/user/data-exploration/img/geoconnect-tabular-4.png differ diff --git a/doc/sphinx-guides/source/user/data-exploration/img/geoconnect-tabular-5.png b/doc/sphinx-guides/source/user/data-exploration/img/geoconnect-tabular-5.png new file mode 100644 index 00000000000..806c290381c Binary files /dev/null and b/doc/sphinx-guides/source/user/data-exploration/img/geoconnect-tabular-5.png differ diff --git a/doc/sphinx-guides/source/user/data-exploration/img/geoconnect-tabular-6.png b/doc/sphinx-guides/source/user/data-exploration/img/geoconnect-tabular-6.png new file mode 100644 index 00000000000..1df343e594b Binary files /dev/null and b/doc/sphinx-guides/source/user/data-exploration/img/geoconnect-tabular-6.png differ diff --git a/doc/sphinx-guides/source/user/data-exploration/img/geoconnect-tabular-7.png b/doc/sphinx-guides/source/user/data-exploration/img/geoconnect-tabular-7.png new file mode 100644 index 00000000000..c688992d260 Binary files /dev/null and b/doc/sphinx-guides/source/user/data-exploration/img/geoconnect-tabular-7.png differ diff --git a/doc/sphinx-guides/source/user/data-exploration/img/geoconnect-tabular-8.png b/doc/sphinx-guides/source/user/data-exploration/img/geoconnect-tabular-8.png new file mode 100644 index 00000000000..9bbc6a1dac7 Binary files /dev/null and b/doc/sphinx-guides/source/user/data-exploration/img/geoconnect-tabular-8.png differ diff --git a/doc/sphinx-guides/source/user/data-exploration/img/geoconnect-tabular-9.png b/doc/sphinx-guides/source/user/data-exploration/img/geoconnect-tabular-9.png new file mode 100644 index 00000000000..0996ff0513f Binary files /dev/null and b/doc/sphinx-guides/source/user/data-exploration/img/geoconnect-tabular-9.png differ diff --git a/doc/sphinx-guides/source/user/data-exploration/worldmap.rst b/doc/sphinx-guides/source/user/data-exploration/worldmap.rst index 69f7bdccbdf..b3a7ea7c815 100644 --- a/doc/sphinx-guides/source/user/data-exploration/worldmap.rst +++ b/doc/sphinx-guides/source/user/data-exploration/worldmap.rst @@ -3,29 +3,148 @@ WorldMap: Geospatial Data Exploration +++++++++++++++++++++++++++++++++++++ -WorldMap -======== +.. contents:: :local: -`WorldMap `_ is developed by the Center for Geographic Analysis (CGA) at Harvard and is an open source software that helps researchers visualize and explore their data in maps. The WorldMap and Dataverse collaboration allows researchers to be able to upload shapefiles to Dataverse for long term storage and receive a persistent identifier (through DOI) as well as be able to easily move into WorldMap to interact with the data and save to WorldMap as well. GeoConnect is the platform integrating Dataverse and WorldMap together and what you will use to visualize your data. +Dataverse and WorldMap +====================== -Uploading Shapefiles to Dataverse -================================= +`WorldMap `_ is developed by the Center for Geographic Analysis (CGA) at Harvard and is open source software that helps researchers visualize and explore their data in maps. The WorldMap and Dataverse collaboration allows researchers to upload shapefiles or tabular files to Dataverse for long term storage and receive a persistent identifier (through DOI), then easily navigate into WorldMap to interact with the data and save to WorldMap as well. -To get started, you will need to create a dataset in Dataverse. For more detailed instructions on creating a dataset, read the `Dataset + File Management `_ portion of this user guide. +Note: WorldMap hosts their own `user guide `_ that covers some of the same material as this page. -Dataverse recognizes ZIP files that contain the components of a shapefile and will ingest them as a ZIP. +What is Geoconnect? +=================== -Once you have uploaded your ZIP files comprising a shapefile, a Map Data button will appear next to the file in the dataset. +Geoconnect is a platform that integrates Dataverse and WorldMap, allowing researchers to visualize their geospatial data. Geoconnect can be used to create maps of shapefiles or of tabular files containing geospatial information. Geoconnect is an optional component of Dataverse, so if you are interested in this feature but don't see it in the installation of Dataverse you are using, you should contact the support team for that installation and ask them to enable the Geoconnect feature. -Mapping your data with Geoconnect -================================= +If a data file's owner has created a map of that data using Geoconnect, you can view the map by clicking the "Explore" button. If the data is in the form of a shapefile, the button take you right to the map. If it's a tabular file, the Explore button will be a dropdown, and you'll need to select "Worldmap". -In order to use the WorldMap and Dataverse integration, your dataset will need to be published. Once it has been published, you will be able to use the MapData button. Click on the Map Data button to be brought to GeoConnect, the portal between Dataverse and WorldMap that will process your shapefile and send it to WorldMap. +Mapping shapefiles with Geoconnect +================================== -To get started with visualizing your shapefile, click on the blue Visualize on WorldMap button in GeoConnect. It may take 30 seconds or longer for the data to be sent to WorldMap and then back to GeoConnect +Geoconnect is capable of mapping shapefiles which are uploaded to Dataverse in .zip format. Specifically, Dataverse recognizes a zipped shapefile by: -Once the visualizing has finished, you will be able to style your map through Attribute, Classification Method, Number of Intervals, and Colors. At any time, you can view the map on WorldMap if you would like to see how it will be displayed there. +1. Examining the contents of the .zip file -After styling your map, you can delete it or return to Dataverse. If you decide to delete the map, it will no longer appear on WorldMap. By returning to Dataverse, you will send the styled map layer to WorldMap as well as to Dataverse where a preview will be available of the map layer you styled using GeoConnect. +2. Checking for the existence of four similarly named files with the following extensions: .dbf, .prj, .shp, .shx -To map the shapefile again, all you will need to do is click the Map Data button again. +Once you have uploaded your .zip shapefile, a Map Data button will appear next to the file in the dataset. In order to use this button, you'll need to publish your dataset. Once your dataset has been published, you can click on the Map Data button to be brought to Geoconnect, the portal between Dataverse and WorldMap that will allow you to create your map. + +To get started with visualizing your shapefile, click on the blue "Visualize on WorldMap" button in Geoconnect. It may take up to 45 seconds for the data to be sent to WorldMap and then back to Geoconnect. + +Once this process has finished, you will be taken to a new page where you can style your map through Attribute, Classification Method, Number of Intervals, and Colors. Clicking "View on WorldMap" will open WorldMap in a new tab, allowing you to see how your map will be displayed there. + +After styling your map, you can either save it by clicking "Return to Dataverse" or delete it with the "Delete" button. If you decide to delete the map, it will no longer appear on WorldMap. Returning to Dataverse will send the styled map layer to both Dataverse and WorldMap. A preview of your map will now be visible on your file page and your dataset page. + +To replace your shapefile's map with a new one, simply click the Map Data button again. + +Mapping tabular files with Geoconnect +===================================== + +Geoconnect can map tabular files that contain geospatial information such as latitude/longitude coordinates, census tracts, zip codes, Boston election wards, etc. + + +Preparing a tabular file to be mapped +------------------------------------- + +**1. Ingest** + +Geospatial tabular files need a bit of preparation in Dataverse before they can be mapped in Geoconnect. When you upload your file, Dataverse will take about ten seconds to ingest it. During the ingest process it will identify the file as tabular data. + +|image1| + + +**2.Tag as Geospatial** + +Next, you'll need to let Dataverse know that your tabular file contains geospatial data. Select your file, click the "Edit Files" button, and select "Tags" from the dropdown menu. This will take you to the Edit Tags menu (pictured below). Under the "Tabular Data Tags" dropdown, select "Geospatial". Then click "Save Changes". + +|image2| + + +**3. Publish & Map Data** + +At this point, a "Map data" button will appear next to your file. Publish this new version of your dataset to activate this button. + +|image3| + + +Creating the map +---------------- + +If your tabular file contains **latitude and longitude** columns, then the process is simple: those columns may be directly mapped. Otherwise, you will need to use a *spatial join*. Spatial joins tell WorldMap how to read your tabular data file in order to create a map that accurately represents it. + +To carry out a spatial join, you'll manually connect + +- Geospatial column(s) from your Dataverse tabular file + - e.g., a census tract column from your table + +with + +- A WorldMap "target layer" that contains the same geospatial information + - e.g., WorldMap's "target layer" containing census tract parameters + +The following screenshots illustrate the mapping process: + +**1. Once you've pressed the "Map Data" button, you're brought to this page:** + +|image4| + +**2. Choose a Geospatial Data Type** + +|image5| + +**3. Choose a column from your file to match the WorldMap Layer you selected** + +|image6| + +**4. Choose from the list of WorldMap Layers available for the Geospatial Data Type you selected** + +|image7| + +**5.Submit the data for mapping!** + +|image8| + +**6. View Results** + +At this point you will be presented with a basic map that can be styled to your specifications. The example pictured below includes an error message - some of the rows weren't able to be matched properly. In this case, you could still go forward with your map, but without the information from the unmatched rows. + +|image9| + +Finalizing your map +=================== + +Now that you have created your map: + +- It exists on the WorldMap platform and may be viewed there -- with all of WorldMap's capabilities. + +- Dataverse will contain a preview of the map and links to the larger version on WorldMap. + +The map editor (pictured above) provides a set of options you can use to style your map. The "Return to the Dataverse" button saves your map and brings you back to Dataverse. "View on WorldMap" takes you to the map's page on WorldMap, which offers additional views and options. + +If you'd like to make future changes to your map, you can return to the editor by clicking the "Map Data" button on your file. + +Removing your map +================= + +You can delete your map at any time. If you are on Dataverse, click "Map Data" and click the "Delete Map" button on the upper right. This completely removes the map and underlying data from the WorldMap platform. + + +.. |image1| image:: ./img/geoconnect-tabular-1.png + :class: img-responsive +.. |image2| image:: ./img/geoconnect-tabular-2.png + :class: img-responsive +.. |image3| image:: ./img/geoconnect-tabular-3.png + :class: img-responsive +.. |image4| image:: ./img/geoconnect-tabular-4.png + :class: img-responsive +.. |image5| image:: ./img/geoconnect-tabular-5.png + :class: img-responsive +.. |image6| image:: ./img/geoconnect-tabular-6.png + :class: img-responsive +.. |image7| image:: ./img/geoconnect-tabular-7.png + :class: img-responsive +.. |image8| image:: ./img/geoconnect-tabular-8.png + :class: img-responsive +.. |image9| image:: ./img/geoconnect-tabular-9.png + :class: img-responsive diff --git a/doc/sphinx-guides/source/user/dataset-management.rst b/doc/sphinx-guides/source/user/dataset-management.rst index d9db1fe9a54..a5fb52320ed 100755 --- a/doc/sphinx-guides/source/user/dataset-management.rst +++ b/doc/sphinx-guides/source/user/dataset-management.rst @@ -239,7 +239,25 @@ The Widgets feature provides you with code for your personal website so your dat In the Widgets tab, you can copy and paste the code snippets for the widget you would like to add to your website. If you need to adjust the height of the widget on your website, you may do so by editing the `heightPx=500` parameter in the code snippet. -To give someone access to your restricted files, click on the "Grant Access to Users/Groups" button in the Users/Groups section. +Dataset Widget +~~~~~~~~~~~~~~ + +The Dataset Widget allows the citation, metadata, files and terms of your dataset to be displayed on your website. When someone downloads a data file in the widget, it will download directly from the datasets on your website. If a file is restricted, they will be directed to your dataverse to log in, instead of logging in through the widget on your site. + +To edit your dataset, you will need to return to the Dataverse repository where the dataset is stored. You can easily do this by clicking on the link that says "Data Stored in (Name) Dataverse" found in the bottom of the widget. + +Dataset Citation Widget +~~~~~~~~~~~~~~~~~~~~~~~ + +The Dataset Citation Widget will provide a citation for your dataset on your personal or project website. Users can download the citation in various formats by using the Cite Data button. The persistent URL in the citation will direct users to the dataset in your dataverse. + +Adding Widgets to an OpenScholar Website +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +#. Log in to your OpenScholar website +#. Either build a new page or navigate to the page you would like to use to show the Dataverse widgets. +#. Click on the Settings Cog and select Layout +#. At the top right, select Add New Widget and under Misc. you will see the Dataverse Dataset and the Dataverse Dataset Citation Widgets. Click on the widget you would like to add, fill out the form, and then drag it to where you would like it to display in the page. Publish Dataset =============== @@ -312,35 +330,6 @@ If you deaccession the most recently published version of the dataset but not al **Important Note**: A tombstone landing page with the basic citation metadata will always be accessible to the public if they use the persistent URL (Handle or DOI) provided in the citation for that dataset. Users will not be able to see any of the files or additional metadata that were previously available prior to deaccession. -.. _dataset-widgets: - -Widgets -======= - -The Widgets feature provides you with code for your personal website so your dataset can be displayed. There are two types of Widgets for a dataset: the Dataset Widget and the Dataset Citation Widget. The Widgets are found by going to your dataset page, clicking the Edit button (the one with the pencil icon) and selecting Widgets from the dropdown menu. - -On the Widgets page, you can copy and paste the code snippets for the widget you would like to add to your website. If you need to adjust the height of the widget on your website, you may do so by editing the `heightPx=500` parameter in the code snippet. - -Dataset Widget --------------- - -The Dataset Widget allows the citation, metadata, files and terms of your dataset to be displayed on your website. When someone downloads a data file in the widget, it will download directly from the datasets on your website. If a file is restricted, they will be directed to your dataverse to log in, instead of logging in through the widget on your site. - -To edit your dataset, you will need to return to the Dataverse repository where the dataset is stored. You can easily do this by clicking on the link that says "Data Stored in (Name) Dataverse" found in the bottom of the widget. - -Dataset Citation Widget ------------------------ - -The Dataset Citation Widget will provide a citation for your dataset on your personal or project website. Users can download the citation in various formats by using the Cite Data button. The persistent URL in the citation will direct users to the dataset in your dataverse. - -Adding Widgets to an OpenScholar Website ----------------------------------------- - -#. Log in to your OpenScholar website -#. Either build a new page or navigate to the page you would like to use to show the Dataverse widgets. -#. Click on the Settings Cog and select Layout -#. At the top right, select Add New Widget and under Misc. you will see the Dataverse Dataset and the Dataverse Dataset Citation Widgets. Click on the widget you would like to add, fill out the form, and then drag it to where you would like it to display in the page. - .. |image1| image:: ./img/DatasetDiagram.png :class: img-responsive .. |image2| image:: ./img/data-download.png diff --git a/src/main/java/edu/harvard/iq/dataverse/GuestbookResponseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/GuestbookResponseServiceBean.java index 739e4123540..e9df7015a07 100644 --- a/src/main/java/edu/harvard/iq/dataverse/GuestbookResponseServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/GuestbookResponseServiceBean.java @@ -5,7 +5,6 @@ */ package edu.harvard.iq.dataverse; -import edu.harvard.iq.dataverse.authorization.providers.builtin.BuiltinUser; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.authorization.users.User; import static edu.harvard.iq.dataverse.util.JsfHelper.JH; @@ -36,6 +35,25 @@ @Stateless @Named public class GuestbookResponseServiceBean { + + public static final String BASE_QUERY_STRING_WITH_GUESTBOOK = "select r.id, g.name, v.value, r.responsetime, r.downloadtype, m.label, r.dataFile_id, r.name, r.email, r.institution, r.position from guestbookresponse r," + + " datasetfieldvalue v, filemetadata m, dvobject o, guestbook g " + + " where " + + " v.datasetfield_id = (select id from datasetfield f where datasetfieldtype_id = 1 " + + " and datasetversion_id = (select max(id) from datasetversion where dataset_id =r.dataset_id )) " + + " and m.datasetversion_id = (select max(datasetversion_id) from filemetadata where datafile_id =r.datafile_id ) " + + " and m.datafile_id = r.datafile_id " + + " and r.dataset_id = o.id " + + " and r.guestbook_id = g.id "; + + public static final String BASE_QUERY_STRING_WITHOUT_GUESTBOOK = "select r.id, v.value, r.responsetime, r.downloadtype, m.label, r.name from guestbookresponse r," + + " datasetfieldvalue v, filemetadata m , dvobject o " + + " where " + + " v.datasetfield_id = (select id from datasetfield f where datasetfieldtype_id = 1 " + + " and datasetversion_id = (select max(id) from datasetversion where dataset_id =r.dataset_id )) " + + " and m.datasetversion_id = (select max(datasetversion_id) from filemetadata where datafile_id =r.datafile_id ) " + + " and m.datafile_id = r.datafile_id " + + " and r.dataset_id = o.id "; @PersistenceContext(unitName = "VDCNet-ejbPU") private EntityManager em; @@ -66,15 +84,7 @@ public List findAllByGuestbookId(Long guestbookId) { public List findArrayByDataverseId (Long dataverseId){ - String queryString = "select r.id, g.name, v.value, r.responsetime, r.downloadtype, m.label, r.dataFile_id, r.name, r.email, r.institution, r.position from guestbookresponse r," - + " datasetfieldvalue v, filemetadata m, dvobject o, guestbook g " - + " where " - + " v.datasetfield_id = (select id from datasetfield f where datasetfieldtype_id = 1 " - + " and datasetversion_id = (select max(id) from datasetversion where dataset_id =r.dataset_id )) " - + " and m.datasetversion_id = (select max(id) from datasetversion where dataset_id =r.dataset_id ) " - + " and m.datafile_id = r.datafile_id " - + " and r.dataset_id = o.id " - + " and r.guestbook_id = g.id " + String queryString = BASE_QUERY_STRING_WITH_GUESTBOOK + " and o.owner_id = " + dataverseId.toString() + ";"; @@ -84,15 +94,7 @@ public List findArrayByDataverseId (Long dataverseId){ public List findArrayByDataverseIdAndGuestbookId (Long dataverseId, Long guestbookId){ - String queryString = "select r.id, g.name, v.value, r.responsetime, r.downloadtype, m.label, r.dataFile_id, r.name, r.email, r.institution, r.position from guestbookresponse r," - + " datasetfieldvalue v, filemetadata m, dvobject o, guestbook g " - + " where " - + " v.datasetfield_id = (select id from datasetfield f where datasetfieldtype_id = 1 " - + " and datasetversion_id = (select max(id) from datasetversion where dataset_id =r.dataset_id )) " - + " and m.datasetversion_id = (select max(id) from datasetversion where dataset_id =r.dataset_id ) " - + " and m.datafile_id = r.datafile_id " - + " and r.dataset_id = o.id " - + " and r.guestbook_id = g.id " + String queryString = BASE_QUERY_STRING_WITH_GUESTBOOK + " and o.owner_id = " + dataverseId.toString() + " and r.guestbook_id = " @@ -153,14 +155,7 @@ public List findArrayByGuestbookIdAndDataverseId (Long guestbookId, Lo boolean hasCustomQuestions = gbIn.getCustomQuestions() != null; List retVal = new ArrayList<>(); - String queryString = "select r.id, v.value, r.responsetime, r.downloadtype, m.label, r.name from guestbookresponse r," - + " datasetfieldvalue v, filemetadata m , dvobject o " - + " where " - + " v.datasetfield_id = (select id from datasetfield f where datasetfieldtype_id = 1 " - + " and datasetversion_id = (select max(id) from datasetversion where dataset_id =r.dataset_id )) " - + " and m.datasetversion_id = (select max(id) from datasetversion where dataset_id =r.dataset_id ) " - + " and m.datafile_id = r.datafile_id " - + " and r.dataset_id = o.id " + String queryString = BASE_QUERY_STRING_WITHOUT_GUESTBOOK + " and o.owner_id = " + dataverseId.toString() + " and r.guestbook_id = " @@ -409,7 +404,7 @@ public GuestbookResponse initGuestbookResponseForFragment(Dataset dataset, FileM } else { workingVersion = dataset.getLatestVersion(); } - + GuestbookResponse guestbookResponse = new GuestbookResponse(); @@ -417,6 +412,8 @@ public GuestbookResponse initGuestbookResponseForFragment(Dataset dataset, FileM guestbookResponse.setWriteResponse(false); } + // guestbookResponse.setDatasetVersion(workingVersion); + if (fileMetadata != null){ guestbookResponse.setDataFile(fileMetadata.getDataFile()); } @@ -441,6 +438,7 @@ public GuestbookResponse initGuestbookResponseForFragment(Dataset dataset, FileM guestbookResponse.setDataset(dataset); + return guestbookResponse; }