Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

#7978: linking ORCID from the metadata tab. #7979

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
### Displaying author's identifier as link

In the dataset page's metadata tab the author's identifier is displayed as a clickable link, which points to the profile page in the external service (ORCID, VIAF etc.), given that the identifier scheme provides a resolvable landing page. If the identifier does not match the expected scheme, a link is not shown.
63 changes: 12 additions & 51 deletions src/main/java/edu/harvard/iq/dataverse/DatasetAuthor.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,11 @@
package edu.harvard.iq.dataverse;

import java.util.Comparator;
import java.util.regex.Pattern;


/**
*
* @author skraffmiller
*/

public class DatasetAuthor {

public static Comparator<DatasetAuthor> DisplayOrder = new Comparator<DatasetAuthor>(){
Expand Down Expand Up @@ -89,59 +86,23 @@ public boolean isEmpty() {
);
}

/**
* https://support.orcid.org/hc/en-us/articles/360006897674-Structure-of-the-ORCID-Identifier
*/
final public static String REGEX_ORCID = "^\\d{4}-\\d{4}-\\d{4}-(\\d{4}|\\d{3}X)$";
final public static String REGEX_ISNI = "^\\d*$";
final public static String REGEX_LCNA = "^[a-z]+\\d+$";
final public static String REGEX_VIAF = "^\\d*$";
/**
* GND regex from https://www.wikidata.org/wiki/Property:P227
*/
final public static String REGEX_GND = "^1[01]?\\d{7}[0-9X]|[47]\\d{6}-\\d|[1-9]\\d{0,7}-[0-9X]|3\\d{7}[0-9X]$";

/**
* Each author identification type has its own valid pattern/syntax.
*/
public static Pattern getValidPattern(String regex) {
return Pattern.compile(regex);
public String getIdentifierAsUrl() {
if (idType != null && !idType.isEmpty() && idValue != null && !idValue.isEmpty()) {
return getIdentifierAsUrl(idType, idValue);
}
return null;
}

public String getIdentifierAsUrl() {
public static String getIdentifierAsUrl(String idType, String idValue) {
if (idType != null && !idType.isEmpty() && idValue != null && !idValue.isEmpty()) {
DatasetFieldValueValidator datasetFieldValueValidator = new DatasetFieldValueValidator();
switch (idType) {
case "ORCID":
if (datasetFieldValueValidator.isValidAuthorIdentifier(idValue, getValidPattern(REGEX_ORCID))) {
return "https://orcid.org/" + idValue;
}
break;
case "ISNI":
if (datasetFieldValueValidator.isValidAuthorIdentifier(idValue, getValidPattern(REGEX_ISNI))) {
return "http://www.isni.org/isni/" + idValue;
}
break;
case "LCNA":
if (datasetFieldValueValidator.isValidAuthorIdentifier(idValue, getValidPattern(REGEX_LCNA))) {
return "http://id.loc.gov/authorities/names/" + idValue;
}
break;
case "VIAF":
if (datasetFieldValueValidator.isValidAuthorIdentifier(idValue, getValidPattern(REGEX_VIAF))) {
return "https://viaf.org/viaf/" + idValue;
}
break;
case "GND":
if (datasetFieldValueValidator.isValidAuthorIdentifier(idValue, getValidPattern(REGEX_GND))) {
return "https://d-nb.info/gnd/" + idValue;
}
break;
default:
break;
try {
ExternalIdentifier externalIdentifier = ExternalIdentifier.valueOf(idType);
if (externalIdentifier.isValidIdentifier(idValue))
return externalIdentifier.format(idValue);
} catch (Exception e) {
// non registered identifier
}
}
return null;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.ResourceBundle;
import javax.persistence.CascadeType;
import javax.persistence.Entity;
import javax.persistence.GeneratedValue;
Expand All @@ -25,7 +24,11 @@
import javax.persistence.OneToMany;
import javax.persistence.OrderBy;
import javax.persistence.Table;
import javax.persistence.Transient;

import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.tuple.ImmutablePair;
import org.apache.commons.lang3.tuple.Pair;

/**
*
Expand Down Expand Up @@ -68,6 +71,19 @@ public static DatasetFieldCompoundValue createNewEmptyDatasetFieldCompoundValue(
@OrderBy("datasetFieldType ASC")
private List<DatasetField> childDatasetFields = new ArrayList<>();

// configurations for link creation
private static final Map<String, Pair<String, String>> linkComponents = Map.of(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I really like the idea!

"author", new ImmutablePair<>("authorIdentifierScheme", "authorIdentifier")
);

// field for handling links. Annotation '@Transient' prevents these fields to be saved in DB
@Transient
private Map<DatasetField, Boolean> linkMap = new LinkedHashMap<>();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wouldn't a List<DatasetField> do? If on the list, it's a match! 😉

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As far as I know maps are optimized for lookup values, so map.contains() faster than the list.contains().

@Transient
private String linkScheme = null;
@Transient
private String linkValue = null;
Comment on lines +82 to +85
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also it seems unlikely now - this is a compound field and there might be multiple links in one.
IMHO it would be better to create a Map<DatasetField,String> to save the link scheme and value.

One might argue that this could even be a suited for a inner wrapping class, holding the data and merge linkMap, linkScheme, linkValue together.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Theoretically yes, but in practice a compound field can not have multiple links. A single author might have one name, one identifier scheme and one identifier. Multiple links would be possible of the scheme and id would have its own compound field, which would be a component of the author field.
An inner wrapping class is a good idea.


public Long getId() {
return id;
}
Expand Down Expand Up @@ -133,15 +149,29 @@ public DatasetFieldCompoundValue copy(DatasetField parent) {
return compoundValue;
}

public Map<DatasetField,String> getDisplayValueMap() {
public Map<DatasetField, String> getDisplayValueMap() {
// todo - this currently only supports child datasetfields with single values
// need to determine how we would want to handle multiple
Map<DatasetField, String> fieldMap = new LinkedHashMap<>();
linkMap.clear();
boolean fixTrailingComma = false;
Pair<String, String> linkComponents = getLinkComponents();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Again - what happens when there are multiple links in one compound field?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

see above, it is not possible now.

linkScheme = null;
linkValue = null;
for (DatasetField childDatasetField : childDatasetFields) {
fixTrailingComma = false;
// skip the value if it is empty or N/A
if (!StringUtils.isBlank(childDatasetField.getValue()) && !DatasetField.NA_VALUE.equals(childDatasetField.getValue())) {
if (linkComponents != null) {
if (fieldNameEquals(childDatasetField, linkComponents.getKey())) {
linkScheme = childDatasetField.getValue();
} else if (fieldNameEquals(childDatasetField, linkComponents.getValue())) {
linkValue = childDatasetField.getValue();
if (StringUtils.isNotBlank(linkScheme) && StringUtils.isNotBlank(linkValue))
linkMap.put(childDatasetField, true);
}
}

String format = childDatasetField.getDatasetFieldType().getDisplayFormat();
if (StringUtils.isBlank(format)) {
format = "#VALUE";
Expand All @@ -161,8 +191,8 @@ public Map<DatasetField,String> getDisplayValueMap() {
//todo: this should be handled in more generic way for any other text that can then be internationalized
// if we need to use replaceAll for regexp, then make sure to use: java.util.regex.Matcher.quoteReplacement(<target string>)
.replace("#EMAIL", BundleUtil.getStringFromBundle("dataset.email.hiddenMessage"))
.replace("#VALUE", sanitizedValue );
fieldMap.put(childDatasetField,displayValue);
.replace("#VALUE", sanitizedValue);
fieldMap.put(childDatasetField, displayValue);
}
}

Expand All @@ -172,7 +202,29 @@ public Map<DatasetField,String> getDisplayValueMap() {

return fieldMap;
}


public String getLink() {
return DatasetAuthor.getIdentifierAsUrl(linkScheme, linkValue);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't really like the hardcoded behaviour here.
This should IMHO be written with extensibility in mind as already intended above by offering the Map of fields and link components.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

DatasetAuthor had already this method, and at first - not being aware of that - I created my solution which lacks the validation aspect of the ID. Since there is another compound field, which has a similar link structure (related publications), in the next round when it comes to implementing those schemas, we should move this function to a different class (either here, or into a 3rd location).

}

public boolean isLink(DatasetField datasetField) {
return linkMap.containsKey(datasetField) && linkMap.get(datasetField) == true && getLink() != null;
}

private boolean fieldNameEquals(DatasetField datasetField, String linkTypeComponent) {
return datasetField.getDatasetFieldType().getName().equals(linkTypeComponent);
}

public boolean isLinkableField() {
return linkComponents.containsKey(parentDatasetField.getDatasetFieldType().getName());
}

public Pair<String, String> getLinkComponents() {
if (!isLinkableField())
return null;
return linkComponents.get(parentDatasetField.getDatasetFieldType().getName());
}

private Map<DatasetField, String> removeLastComma(Map<DatasetField, String> mapIn) {

Iterator<Map.Entry<DatasetField, String>> itr = mapIn.entrySet().iterator();
Expand All @@ -192,6 +244,5 @@ private Map<DatasetField, String> removeLastComma(Map<DatasetField, String> mapI
}

return mapIn;

}
}
58 changes: 58 additions & 0 deletions src/main/java/edu/harvard/iq/dataverse/ExternalIdentifier.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
package edu.harvard.iq.dataverse;

import java.util.regex.Matcher;
import java.util.regex.Pattern;

public enum ExternalIdentifier {
ORCID("ORCID", "https://orcid.org/%s", "^\\d{4}-\\d{4}-\\d{4}-(\\d{4}|\\d{3}X)$"),
ISNI("ISNI", "http://www.isni.org/isni/%s", "^\\d*$"),
LCNA("LCNA", "http://id.loc.gov/authorities/names/%s", "^[a-z]+\\d+$"),
VIAF("VIAF", "https://viaf.org/viaf/%s", "^\\d*$"),
// GND regex from https://www.wikidata.org/wiki/Property:P227
GND("GND", "https://d-nb.info/gnd/%s", "^1[01]?\\d{7}[0-9X]|[47]\\d{6}-\\d|[1-9]\\d{0,7}-[0-9X]|3\\d{7}[0-9X]$"),
// note: DAI is missing from this list, because it doesn't have resolvable URL
ResearcherID("ResearcherID", "https://publons.com/researcher/%s/", "^[A-Z\\d][A-Z\\d-]+[A-Z\\d]$"),
ScopusID("ScopusID", "https://www.scopus.com/authid/detail.uri?authorId=%s", "^\\d*$");

private String name;
private String template;
private Pattern pattern;
private Matcher matcher;

ExternalIdentifier(String name, String template, String regex) {
this.template = template;
this.pattern = Pattern.compile(regex);
this.matcher = pattern.matcher("");
}

public ExternalIdentifier of(String name) {
System.err.println(name);
for (ExternalIdentifier identifier : values()) {
System.err.println(" vs " + identifier.name);
if (identifier.name.toLowerCase().equals(name.toLowerCase())) {
return identifier;
}
}
return null;
}

public boolean isValidIdentifier(String userInput) {
return matcher.reset(userInput).matches();
}

public String getName() {
return name;
}

public String getTemplate() {
return template;
}

public Pattern getPattern() {
return pattern;
}

public String format(String idValue) {
return String.format(template, idValue);
}
}
27 changes: 19 additions & 8 deletions src/main/webapp/metadataFragment.xhtml
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@
<span class="glyphicon glyphicon-question-sign tooltip-icon"
data-toggle="tooltip" data-placement="auto right" data-original-title="#{dsf.datasetFieldType.localeDescription}"></span>
</th>
<td jsf:rendered="#{!anonymized or !settingsWrapper.shouldBeAnonymized(dsf)}">
<td jsf:rendered="#{!anonymized or !settingsWrapper.shouldBeAnonymized(dsf)}">
<!-- Primitive datasetFields -->
<ui:fragment rendered="#{dsf.datasetFieldType.primitive}">
<h:outputText value="#{dsf.value}"
Expand Down Expand Up @@ -109,12 +109,21 @@
<h:outputText value="#{bundle['dataset.contact.tip']}"/>
</p>
</ui:fragment>
<ui:repeat value="#{dsf.datasetFieldCompoundValues}" var="compoundValue">
<ui:repeat value="#{dsf.datasetFieldCompoundValues}" var="compoundValue" varStatus="compoundValuesstatus">
<ui:repeat value="#{compoundValue.displayValueMap.entrySet().toArray()}" var="cvPart" varStatus="partStatus">
<h:outputText value="#{dsf.datasetFieldType.displayFormat} " rendered="${!partStatus.first and !cvocConf.containsKey(dsf.datasetFieldType.id)}"/>
<h:outputText value="#{cvPart.value}"
escape="#{cvPart.key.datasetFieldType.isEscapeOutputText()}"
rendered="${!cvocConf.containsKey(dsf.datasetFieldType.id)}"/>
<ui:fragment rendered="#{!cvocConf.containsKey(dsf.datasetFieldType.id)}">
<ui:fragment rendered="#{compoundValue.isLink(cvPart.key)}">
<h:outputLink value="#{compoundValue.getLink()}" target="_blank">
<h:outputText value="#{cvPart.value}"
escape="#{cvPart.key.datasetFieldType.isEscapeOutputText()}"/>
</h:outputLink>
</ui:fragment>
<ui:fragment rendered="#{not compoundValue.isLink(cvPart.key)}">
<h:outputText value="#{cvPart.value}"
escape="#{cvPart.key.datasetFieldType.isEscapeOutputText()}"/>
</ui:fragment>
</ui:fragment>
<h:outputText value="#{cvPart.key.value}"
escape="#{cvPart.key.datasetFieldType.isEscapeOutputText()}"
rendered="${cvocConf.containsKey(dsf.datasetFieldType.id) and cvPart.key.datasetFieldType.name.equals(cvocConf.get(dsf.datasetFieldType.id).getString('term-uri-field'))}">
Expand All @@ -124,13 +133,15 @@
<f:passThroughAttribute name="data-cvoc-managedfields" value="#{cvocConf.get(dsf.datasetFieldType.id).get('managedfields').toString()}" />
</h:outputText>
</ui:repeat>
<br/>
<ui:fragment rendered="#{not compoundValuesstatus.last}">
<br/>
</ui:fragment>
</ui:repeat>
</ui:fragment>
</td>
<td jsf:rendered="#{anonymized and settingsWrapper.shouldBeAnonymized(dsf)}">
<td jsf:rendered="#{anonymized and settingsWrapper.shouldBeAnonymized(dsf)}">
<h:outputText value="#{bundle['dataset.anonymized.withheld']}"/>
</td>
</td>
</tr>
</ui:repeat>
</tbody>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ public static Collection<String[]> parameters() {
{ "LCNA", "n82058243", "http://id.loc.gov/authorities/names/n82058243" },
{ "VIAF", "172389567", "https://viaf.org/viaf/172389567" },
{ "GND", "4079154-3", "https://d-nb.info/gnd/4079154-3" },
{ "ResearcherID", "634082", "https://publons.com/researcher/634082/" },
{ "ResearcherID", "AAW-9289-2021", "https://publons.com/researcher/AAW-9289-2021/" },
{ "ResearcherID", "J-9733-2013", "https://publons.com/researcher/J-9733-2013/" },
{ "ScopusID", "6602344670", "https://www.scopus.com/authid/detail.uri?authorId=6602344670" },
{ null, null, null, },
});
}
Expand All @@ -44,5 +48,4 @@ public void getIdentifierAsUrl() {
}
assertEquals(expectedIdentifierAsUrl, datasetAuthor.getIdentifierAsUrl());
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ public void testIsValid() {
@Test
public void testIsValidAuthorIdentifierOrcid() {
DatasetFieldValueValidator validator = new DatasetFieldValueValidator();
Pattern pattern = DatasetAuthor.getValidPattern(DatasetAuthor.REGEX_ORCID);
Pattern pattern = ExternalIdentifier.valueOf("ORCID").getPattern();
assertTrue(validator.isValidAuthorIdentifier("0000-0002-1825-0097", pattern));
// An "X" at the end of an ORCID is less common but still valid.
assertTrue(validator.isValidAuthorIdentifier("0000-0002-1694-233X", pattern));
Expand All @@ -154,15 +154,15 @@ public void testIsValidAuthorIdentifierOrcid() {
@Test
public void testIsValidAuthorIdentifierIsni() {
DatasetFieldValueValidator validator = new DatasetFieldValueValidator();
Pattern pattern = DatasetAuthor.getValidPattern(DatasetAuthor.REGEX_ISNI);
Pattern pattern = ExternalIdentifier.valueOf("ISNI").getPattern();
assertTrue(validator.isValidAuthorIdentifier("0000000121032683", pattern));
assertFalse(validator.isValidAuthorIdentifier("junk", pattern));
}

@Test
public void testIsValidAuthorIdentifierLcna() {
DatasetFieldValueValidator validator = new DatasetFieldValueValidator();
Pattern pattern = DatasetAuthor.getValidPattern(DatasetAuthor.REGEX_LCNA);
Pattern pattern = ExternalIdentifier.valueOf("LCNA").getPattern();
assertTrue(validator.isValidAuthorIdentifier("n82058243", pattern));
assertTrue(validator.isValidAuthorIdentifier("foobar123", pattern));
assertFalse(validator.isValidAuthorIdentifier("junk", pattern));
Expand All @@ -171,15 +171,15 @@ public void testIsValidAuthorIdentifierLcna() {
@Test
public void testIsValidAuthorIdentifierViaf() {
DatasetFieldValueValidator validator = new DatasetFieldValueValidator();
Pattern pattern = DatasetAuthor.getValidPattern(DatasetAuthor.REGEX_VIAF);
Pattern pattern = ExternalIdentifier.valueOf("VIAF").getPattern();
assertTrue(validator.isValidAuthorIdentifier("172389567", pattern));
assertFalse(validator.isValidAuthorIdentifier("junk", pattern));
}

@Test
public void testIsValidAuthorIdentifierGnd() {
DatasetFieldValueValidator validator = new DatasetFieldValueValidator();
Pattern pattern = DatasetAuthor.getValidPattern(DatasetAuthor.REGEX_GND);
Pattern pattern = ExternalIdentifier.valueOf("GND").getPattern();
assertTrue(validator.isValidAuthorIdentifier("4079154-3", pattern));
assertFalse(validator.isValidAuthorIdentifier("junk", pattern));
}
Expand Down
Loading