diff --git a/CHANGELOG.md b/CHANGELOG.md index b78c3cda0c8..1c83e4b0910 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,6 +27,7 @@ Note that this project **does not** adhere to [Semantic Versioning](https://semv - When searching for an identifier in the "Web search", the title of the search window is now "Identifier-based Web Search". [#10391](https://github.com/JabRef/jabref/pull/10391) - The ampersand checker now skips verbatim fields (`file`, `url`, ...). [#10419](https://github.com/JabRef/jabref/pull/10419) - If no existing document is selected for exporting "XMP annotated pdf" JabRef will now create a new PDF file with a sample text and the metadata. [#10102](https://github.com/JabRef/jabref/issues/10102) +- We modified the DOI cleanup to infer the DOI from an ArXiV ID if it's present. [10426](https://github.com/JabRef/jabref/issues/10426) ### Fixed diff --git a/src/main/java/org/jabref/logic/cleanup/DoiCleanup.java b/src/main/java/org/jabref/logic/cleanup/DoiCleanup.java index 3657acabb5f..1cb3f2d04e3 100644 --- a/src/main/java/org/jabref/logic/cleanup/DoiCleanup.java +++ b/src/main/java/org/jabref/logic/cleanup/DoiCleanup.java @@ -13,17 +13,19 @@ import org.jabref.model.entry.field.Field; import org.jabref.model.entry.field.StandardField; import org.jabref.model.entry.field.UnknownField; +import org.jabref.model.entry.identifier.ArXivIdentifier; import org.jabref.model.entry.identifier.DOI; /** - * Formats the DOI (e.g. removes http part) and also moves DOIs from note, url or ee field to the doi field. + * Formats the DOI (e.g. removes http part) and also infers DOIs from the note, url, eprint or ee fields. */ public class DoiCleanup implements CleanupJob { /** * Fields to check for DOIs. */ - private static final List FIELDS = Arrays.asList(StandardField.NOTE, StandardField.URL, new UnknownField("ee")); + private static final List FIELDS = Arrays.asList(StandardField.NOTE, StandardField.URL, StandardField.EPRINT, + new UnknownField("ee")); @Override public List cleanup(BibEntry entry) { @@ -57,7 +59,9 @@ public List cleanup(BibEntry entry) { } else { // As the Doi field is empty we now check if note, url, or ee field contains a Doi for (Field field : FIELDS) { - Optional doi = entry.getField(field).flatMap(DOI::parse); + Optional fieldContentOpt = entry.getField(field); + + Optional doi = fieldContentOpt.flatMap(DOI::parse); if (doi.isPresent()) { // Update Doi @@ -65,6 +69,15 @@ public List cleanup(BibEntry entry) { change.ifPresent(changes::add); removeFieldValue(entry, field, changes); } + + if (StandardField.EPRINT == field) { + fieldContentOpt.flatMap(ArXivIdentifier::parse) + .flatMap(ArXivIdentifier::inferDOI) + .ifPresent(inferredDoi -> { + Optional change = entry.setField(StandardField.DOI, inferredDoi.getDOI()); + change.ifPresent(changes::add); + }); + } } } return changes; diff --git a/src/main/java/org/jabref/model/entry/identifier/ArXivIdentifier.java b/src/main/java/org/jabref/model/entry/identifier/ArXivIdentifier.java index 4db64bad268..01a9e74270f 100644 --- a/src/main/java/org/jabref/model/entry/identifier/ArXivIdentifier.java +++ b/src/main/java/org/jabref/model/entry/identifier/ArXivIdentifier.java @@ -9,10 +9,14 @@ import org.jabref.model.strings.StringUtil; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + /** * Identifier for the arXiv. See https://arxiv.org/help/arxiv_identifier */ public class ArXivIdentifier extends EprintIdentifier { + private static final Logger LOGGER = LoggerFactory.getLogger(ArXivIdentifier.class); private static final String ARXIV_PREFIX = "http(s)?://arxiv.org/(abs|pdf)/|arxiv|arXiv"; private final String identifier; @@ -71,6 +75,22 @@ public Optional getClassification() { } } + /** + * ArXiV articles are assigned DOIs automatically, which starts with a DOI prefix '10.48550/' followed by the ArXiV + * ID (replacing the colon with a period). + *

+ * For more information: + * + * new-arxiv-articles-are-now-automatically-assigned-dois + * */ + public Optional inferDOI() { + if (StringUtil.isBlank(identifier)) { + return Optional.empty(); + } + + return DOI.parse("10.48550/arxiv." + identifier); + } + @Override public String toString() { return "ArXivIdentifier{" +