Skip to content

Commit

Permalink
Add DataCite usage example
Browse files Browse the repository at this point in the history
  • Loading branch information
dspinellis committed Jul 2, 2024
1 parent ab77e89 commit 5277c3c
Show file tree
Hide file tree
Showing 10 changed files with 376 additions and 0 deletions.
2 changes: 2 additions & 0 deletions docs/app-eg.rst
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,8 @@ Data source examples and metric measurements
set <https://github.com/dspinellis/alexandria3k/tree/main/examples/crossref-standalone>`__
- `Report metrics associated with ORCID
data <https://github.com/dspinellis/alexandria3k/tree/main/examples/orcid>`__
- `Report metrics associated with DataCite
data <https://github.com/dspinellis/alexandria3k/tree/main/examples/datacite>`__
- `Report research organization registry
metrics <https://github.com/dspinellis/alexandria3k/tree/main/examples/ror-metrics>`__

Expand Down
1 change: 1 addition & 0 deletions examples/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ export ROR?=$(TOP_DIR)/tests/data/ror.zip
export DOAJ?=$(TOP_DIR)/tests/data/doaj.csv
export JOURNAL_NAMES?=$(TOP_DIR)/tests/data/titleFile.csv
export PUBMED_DIR?=$(TOP_DIR)/tests/data/pubmed-sample
export DATACITE?=$(TOP_DIR)/tests/data/datacite.tar.gz

# Use source code tree implementation
export A3K?=$(TOP_DIR)/bin/a3k
Expand Down
1 change: 1 addition & 0 deletions examples/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ The following examples are available.
### Data source examples and metric measurements
* [Report metrics associated with the Crossref data set](crossref-standalone)
* [Report metrics associated with ORCID data](orcid)
* [Report metrics associated with DataCite data](datacite)
* [Report research organization registry metrics](ror-metrics)

### Impact and productivitycalculations
Expand Down
4 changes: 4 additions & 0 deletions examples/common/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ USPTO_DIR?=../common/uspto-data
ORCID_SUMMARIES?=../common/ORCID_2022_10_summaries.tar.gz
ROR?=../common/ror-v1.17.1-2022-12-16.zip
PUBMED_DIR?=../common/pubmed
DATACITE=../common/datacite.tar.gz

V?=1
TIME?=time
Expand All @@ -36,6 +37,9 @@ $(ORCID_SUMMARIES):
$(ROR):
curl -L 'https://zenodo.org/record/7448410/files/v1.17.1-2022-12-16-ror-data.zip?download=1' >$@

$(DATACITE):
echo "Download the DataCite Public Data File as datacite.tar.gz from https://datafiles.datacite.org/"

# TODO when using this Makefile: Add rule in including Makefile named
# "populate" to populate the database with required data

Expand Down
8 changes: 8 additions & 0 deletions examples/datacite/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
.depend
.depend.all
nohup.out
populate
reports
rolap
simple-rolap
tables
14 changes: 14 additions & 0 deletions examples/datacite/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#
# Calculate DataCite table metrics
#

export MAINDB?=datacite
export DEPENDENCIES=populate

include ../common/Makefile

# Populate database with DataCite data
populate: $(DATACITE)
$(TIME) $(A3K) --progress \
populate "$(MAINDB).db" datacite "$(DATACITE)"
touch $@
254 changes: 254 additions & 0 deletions examples/datacite/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,254 @@
# DataCite metrics
Populate a database with all DataCite data and report corresponding
metrics as well as the codes used in the employed schemes.

Below are the metrics and schemes results
as obtained from the 2023 DataCite Public Data File.
For schemes only the first ten values with the highest occurrence are listed.

## Metrics

| Value | Count |
|:------|------:|
|contributor\_affiliations|3635578|
|contributor\_name\_identifiers|4576444|
|creator\_affiliations|22560989|
|creator\_name\_identifiers|12830187|
|work\_contributors|24236641|
|work\_creators|202655948|
|work\_dates|81364966|
|work\_descriptions|41259535|
|work\_funding\_references|2305529|
|work\_geo\_locations|19969035|
|work\_related\_identifiers|483494659|
|work\_rights|26632935|
|work\_subjects|117682396|
|work\_titles|57359160|
|works|52863283|


## dc\_contributor\_name\_identifiers.name\_identifier\_scheme

| Value | Count |
|:------|------:|
|ORCID|1649199|
|ROR|782619|
|VIAF|589092|
|GRID|200583|
|GND|169558|
|ISNI|90200|
|BNF|87558|
|LCCN|87558|
|NKC|87558|
|SUDOC|87558|

## dc\_creator\_name\_identifiers.scheme\_uri

| Value | Count |
|:------|------:|
|https://orcid.org|9385742|
|(none)|2601705|
|https://orcid.org/|200060|
|https://d-nb.info/gnd/|86161|
|(none)|83605|
|http://orcid.org/|52468|
|https://www.jacow.org/|31388|
|http://isni.org/isni/|30290|
|http://lccn.loc.gov/|29186|
|https://aleph.nkp.cz/F/?func=find-c&local\_base=aut&CON\_LNG=ENG&ccl\_term=ica=|29186|

## dc\_work\_contributors.contributor\_type

| Value | Count |
|:------|------:|
|ContactPerson|5952672|
|Other|3854570|
|DataManager|3635356|
|HostingInstitution|2358178|
|Researcher|2260431|
|DataCollector|1777541|
|Funder|683551|
|Distributor|564472|
|Editor|518802|
|(none)|460972|

## dc\_work\_creators.name\_type

| Value | Count |
|:------|------:|
|Personal|155758469|
|(none)|41788325|
|Organizational|5109154|

## dc\_work\_dates.date\_type

| Value | Count |
|:------|------:|
|Issued|33869505|
|Updated|18217771|
|Created|10375387|
|Available|8191381|
|Submitted|4422420|
|Accepted|3138445|
|Collected|2968553|
|Copyrighted|108668|
|Valid|44016|
|Withdrawn|13512|

## dc\_work\_descriptions.description\_type

| Value | Count |
|:------|------:|
|Abstract|29843289|
|Other|9688729|
|SeriesInformation|1040621|
|Methods|250449|
|TechnicalInfo|244637|
|TableOfContents|119599|
|(none)|72210|
|abstract|1|

## dc\_work\_funding\_references.funder\_identifier\_type

| Value | Count |
|:------|------:|
|Crossref Funder ID|985694|
|(none)|691805|
|ROR|426936|
|GRID|168655|
|ISNI|29975|
|Other|2377|
|"Other">China Geological Survey project|36|
|Fondation Martine Aublet|20|
|Fondation Martine Aublet (Paris, France)|16|
|Fondation Martine Aublet (Paris, France).|2|

## dc\_work\_related\_identifiers.related\_identifier\_type

| Value | Count |
|:------|------:|
|DOI|456179995|
|URL|14859234|
|IGSN|9538696|
|LSID|799059|
|EISSN|745896|
|ISSN|414258|
|ISBN|303769|
|Handle|295596|
|LISSN|130048|
|PMID|71231|

## dc\_work\_related\_identifiers.relation\_type

| Value | Count |
|:------|------:|
|References|431691310|
|IsPartOf|13049632|
|IsIdenticalTo|11390034|
|IsSupplementTo|3944418|
|IsVersionOf|3906323|
|HasMetadata|3680959|
|HasPart|2840517|
|IsCitedBy|2684860|
|HasVersion|2656100|
|IsSourceOf|2118187|

## dc\_work\_related\_identifiers.scheme\_type

| Value | Count |
|:------|------:|
|(none)|480269100|
|DwC-A|1611524|
|XML|1611524|
|http://datacite.org/schema/kernel-3|1965|
|text/html|201|
|xsd|201|
|Text|64|
|XSD|43|
|Knowledge|34|
|DATA ACCES|1|

## dc\_work\_rights.rights\_identifier

| Value | Count |
|:------|------:|
|(none)|15617810|
|cc-by-4.0|5859089|
|cc-by-nc-4.0|1696788|
|cc0-1.0|1566760|
|cc-by-sa-4.0|668028|
|cc-by-3.0|335076|
|cc-by-nc-nd-4.0|202084|
|cc-by-nc-sa-4.0|106664|
|cc-by-1.0|83510|
|openaccess|54315|

## dc\_work\_rights.rights\_identifier\_scheme

| Value | Count |
|:------|------:|
|(none)|15705417|
|SPDX|10818162|
|info:eu-repo-Access-Terms vocabulary|54315|
|(none)|52238|
|Creative Commons|2105|
|creativecommons|399|
|b2share.legacy|181|
|spdx|74|
|custom|43|
|Local Contexts|1|

## dc\_works.identifier\_type

| Value | Count |
|:------|------:|
|(none)|45955613|
|DOI|4437411|
|URL|1318449|
|EISSN|619787|
|Handle|192467|
|ISSN|149125|
|LISSN|130011|
|ISBN|58286|
|URN|2008|
|arXiv|66|

## dc\_work\_subjects.subject\_scheme

| Value | Count |
|:------|------:|
|(none)|76872398|
|Fields of Science and Technology (FOS)|16338295|
|Parameter|4964564|
|arXiv|3858253|
|FOR|3763252|
|LCSH|1401406|
|Method|954884|
|ddc|741879|
|Project|407320|
|keyword|394498|

## dc\_work\_subjects.value\_uri

| Value | Count |
|:------|------:|
|(none)|116832029|
|http://id.loc.gov/authorities/subjects/sh85009003|176266|
|http://www.oecd.org/science/inno/38235147.pdf|25620|
|(none)|19432|
|http://www.narcis.nl/classfication/D37000|5274|
|https://core.tdar.org/browse/geographic-keyword/2810/southwestern-colorado|5013|
|https://core.tdar.org/browse/geographic-keyword/84604/southwestern-us|5012|
|https://core.tdar.org/browse/temporal-keyword/78/basketmaker-iii|4509|
|https://core.tdar.org/browse/material-type/1/ceramic|3824|
|http://astrothesaurus.org/uat/1469|3763|

## dc\_work\_titles.title\_type

| Value | Count |
|:------|------:|
|(none)|53091171|
|Subtitle|3366476|
|TranslatedTitle|449793|
|AlternativeTitle|282511|
|Other|169209|
32 changes: 32 additions & 0 deletions examples/datacite/metrics.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
-- Output metrics of a fully-populated DataCite database

SELECT "works" AS type,
(SELECT Count(*) FROM dc_works) AS records UNION
SELECT "work_rights" AS type,
(SELECT Count(*) FROM dc_work_rights) AS records UNION
SELECT "creator_name_identifiers" AS type,
(SELECT Count(*) FROM dc_creator_name_identifiers) AS records UNION
SELECT "work_creators" AS type,
(SELECT Count(*) FROM dc_work_creators) AS records UNION
SELECT "work_titles" AS type,
(SELECT Count(*) FROM dc_work_titles) AS records UNION
SELECT "creator_affiliations" AS type,
(SELECT Count(*) FROM dc_creator_affiliations) AS records UNION
SELECT "work_funding_references" AS type,
(SELECT Count(*) FROM dc_work_funding_references) AS records UNION
SELECT "work_geo_locations" AS type,
(SELECT Count(*) FROM dc_work_geo_locations) AS records UNION
SELECT "work_dates" AS type,
(SELECT Count(*) FROM dc_work_dates) AS records UNION
SELECT "work_contributors" AS type,
(SELECT Count(*) FROM dc_work_contributors) AS records UNION
SELECT "contributor_affiliations" AS type,
(SELECT Count(*) FROM dc_contributor_affiliations) AS records UNION
SELECT "work_related_identifiers" AS type,
(SELECT Count(*) FROM dc_work_related_identifiers) AS records UNION
SELECT "contributor_name_identifiers" AS type,
(SELECT Count(*) FROM dc_contributor_name_identifiers) AS records UNION
SELECT "work_subjects" AS type,
(SELECT Count(*) FROM dc_work_subjects) AS records UNION
SELECT "work_descriptions" AS type,
(SELECT Count(*) FROM dc_work_descriptions) AS records;
35 changes: 35 additions & 0 deletions examples/datacite/schemes.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
-- Show the values used in the employed DataCite schemes

SELECT "dc_work_funding_references.funder_identifier_type" AS field, funder_identifier_type AS name, Count(*) AS value FROM dc_work_funding_references GROUP BY funder_identifier_type UNION

SELECT "dc_works.identifier_type" AS field, identifier_type AS name, Count(*) AS value FROM dc_works GROUP BY identifier_type UNION

SELECT "dc_work_creators.name_type" AS field, name_type AS name, Count(*) AS value FROM dc_work_creators GROUP BY name_type UNION

SELECT "dc_creator_name_identifiers.scheme_uri" AS field, scheme_uri AS name, Count(*) AS value FROM dc_creator_name_identifiers GROUP BY scheme_uri UNION

SELECT "dc_work_contributors.contributor_type" AS field, contributor_type AS name, Count(*) AS value FROM dc_work_contributors GROUP BY contributor_type UNION

SELECT "dc_contributor_name_identifiers.name_identifier_scheme" AS field, name_identifier_scheme AS name, Count(*) AS value FROM dc_contributor_name_identifiers GROUP BY name_identifier_scheme UNION

SELECT "dc_work_titles.title_type" AS field, title_type AS name, Count(*) AS value FROM dc_work_titles GROUP BY title_type UNION

SELECT "dc_work_subjects.subject_scheme" AS field, subject_scheme AS name, Count(*) AS value FROM dc_work_subjects GROUP BY subject_scheme UNION

SELECT "dc_work_subjects.value_uri" AS field, value_uri AS name, Count(*) AS value FROM dc_work_subjects GROUP BY value_uri UNION

SELECT "dc_work_dates.date_type" AS field, date_type AS name, Count(*) AS value FROM dc_work_dates GROUP BY date_type UNION

SELECT "dc_work_related_identifiers.related_identifier_type" AS field, related_identifier_type AS name, Count(*) AS value FROM dc_work_related_identifiers GROUP BY related_identifier_type UNION

SELECT "dc_work_related_identifiers.relation_type" AS field, relation_type AS name, Count(*) AS value FROM dc_work_related_identifiers GROUP BY relation_type UNION

SELECT "dc_work_related_identifiers.scheme_type" AS field, scheme_type AS name, Count(*) AS value FROM dc_work_related_identifiers GROUP BY scheme_type UNION

SELECT "dc_work_rights.rights_identifier_scheme" AS field, rights_identifier_scheme AS name, Count(*) AS value FROM dc_work_rights GROUP BY rights_identifier_scheme UNION

SELECT "dc_work_rights.rights_identifier" AS field, rights_identifier AS name, Count(*) AS value FROM dc_work_rights GROUP BY rights_identifier UNION

SELECT "dc_work_descriptions.description_type" AS field, description_type AS name, Count(*) AS value FROM dc_work_descriptions GROUP BY description_type UNION

SELECT "dc_work_funding_references.funder_identifier_type" AS field, funder_identifier_type AS name, Count(*) AS value FROM dc_work_funding_references GROUP BY funder_identifier_type;
Loading

0 comments on commit 5277c3c

Please sign in to comment.