From b246b9ce534ae05008185ea2e4ed9df1d8cac132 Mon Sep 17 00:00:00 2001 From: Tobias Diez Date: Tue, 2 Jan 2018 22:22:57 +0100 Subject: [PATCH] Add oaDOI fulltext fetcher (#3581) * Add oaDOI fulltext fetcher * Rename to OpenAccessDoi --- CHANGELOG.md | 1 + .../logic/importer/FulltextFetchers.java | 24 +------- .../jabref/logic/importer/WebFetchers.java | 24 ++++++++ .../logic/importer/fetcher/OpenAccessDoi.java | 58 +++++++++++++++++++ .../jabref/model/entry/identifier/DOI.java | 2 +- .../logic/importer/WebFetchersTest.java | 12 +++- .../importer/fetcher/FulltextFetcherTest.java | 4 +- .../importer/fetcher/OpenAccessDoiTest.java | 43 ++++++++++++++ 8 files changed, 141 insertions(+), 27 deletions(-) create mode 100644 src/main/java/org/jabref/logic/importer/fetcher/OpenAccessDoi.java create mode 100644 src/test/java/org/jabref/logic/importer/fetcher/OpenAccessDoiTest.java diff --git a/CHANGELOG.md b/CHANGELOG.md index d74b9a2e6f8..363b6d7cb94 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ We refer to [GitHub issues](https://github.com/JabRef/jabref/issues) by using `# ## [Unreleased] ### Changed +- We added [oaDOI](https://oadoi.org/) as a fulltext provider, so that JabRef is now able to provide fulltexts for more than 90 million open-access articles. ### Fixed diff --git a/src/main/java/org/jabref/logic/importer/FulltextFetchers.java b/src/main/java/org/jabref/logic/importer/FulltextFetchers.java index 496573d0ebb..effcb72453d 100644 --- a/src/main/java/org/jabref/logic/importer/FulltextFetchers.java +++ b/src/main/java/org/jabref/logic/importer/FulltextFetchers.java @@ -6,13 +6,6 @@ import java.util.List; import java.util.Optional; -import org.jabref.logic.importer.fetcher.ACS; -import org.jabref.logic.importer.fetcher.ArXiv; -import org.jabref.logic.importer.fetcher.DoiResolution; -import org.jabref.logic.importer.fetcher.GoogleScholar; -import org.jabref.logic.importer.fetcher.IEEE; -import org.jabref.logic.importer.fetcher.ScienceDirect; -import org.jabref.logic.importer.fetcher.SpringerLink; import org.jabref.logic.net.URLDownload; import org.jabref.model.entry.BibEntry; import org.jabref.model.entry.FieldName; @@ -30,26 +23,13 @@ public class FulltextFetchers { private final List finders = new ArrayList<>(); public FulltextFetchers(ImportFormatPreferences importFormatPreferences) { - // Ordering is important, authorities first! - // Publisher - finders.add(new DoiResolution()); - finders.add(new ScienceDirect()); - finders.add(new SpringerLink()); - finders.add(new ACS()); - finders.add(new ArXiv(importFormatPreferences)); - finders.add(new IEEE()); - // Meta search - finders.add(new GoogleScholar(importFormatPreferences)); + this(WebFetchers.getFullTextFetchers(importFormatPreferences)); } - public FulltextFetchers(List fetcher) { + FulltextFetchers(List fetcher) { finders.addAll(fetcher); } - public List getFetchers() { - return finders; - } - public Optional findFullTextPDF(BibEntry entry) { // for accuracy, fetch DOI first but do not modify entry BibEntry clonedEntry = (BibEntry) entry.clone(); diff --git a/src/main/java/org/jabref/logic/importer/WebFetchers.java b/src/main/java/org/jabref/logic/importer/WebFetchers.java index de2ec910a0d..6dbafe67896 100644 --- a/src/main/java/org/jabref/logic/importer/WebFetchers.java +++ b/src/main/java/org/jabref/logic/importer/WebFetchers.java @@ -5,19 +5,25 @@ import java.util.List; import java.util.Optional; +import org.jabref.logic.importer.fetcher.ACS; import org.jabref.logic.importer.fetcher.ArXiv; import org.jabref.logic.importer.fetcher.AstrophysicsDataSystem; import org.jabref.logic.importer.fetcher.CrossRef; import org.jabref.logic.importer.fetcher.DBLPFetcher; import org.jabref.logic.importer.fetcher.DiVA; import org.jabref.logic.importer.fetcher.DoiFetcher; +import org.jabref.logic.importer.fetcher.DoiResolution; import org.jabref.logic.importer.fetcher.GoogleScholar; import org.jabref.logic.importer.fetcher.GvkFetcher; +import org.jabref.logic.importer.fetcher.IEEE; import org.jabref.logic.importer.fetcher.IacrEprintFetcher; import org.jabref.logic.importer.fetcher.IsbnFetcher; import org.jabref.logic.importer.fetcher.LibraryOfCongress; import org.jabref.logic.importer.fetcher.MathSciNet; import org.jabref.logic.importer.fetcher.MedlineFetcher; +import org.jabref.logic.importer.fetcher.OpenAccessDoi; +import org.jabref.logic.importer.fetcher.ScienceDirect; +import org.jabref.logic.importer.fetcher.SpringerLink; import org.jabref.logic.importer.fetcher.TitleFetcher; import org.jabref.logic.importer.fetcher.zbMATH; import org.jabref.model.entry.FieldName; @@ -113,4 +119,22 @@ public static List getIdFetchers(ImportFormatPreferences importFormat list.sort(Comparator.comparing(WebFetcher::getName)); return list; } + + public static List getFullTextFetchers(ImportFormatPreferences importFormatPreferences) { + List fetchers = new ArrayList<>(); + + // Ordering is important, authorities first! + // Publisher + fetchers.add(new DoiResolution()); + fetchers.add(new ScienceDirect()); + fetchers.add(new SpringerLink()); + fetchers.add(new ACS()); + fetchers.add(new ArXiv(importFormatPreferences)); + fetchers.add(new IEEE()); + // Meta search + fetchers.add(new GoogleScholar(importFormatPreferences)); + fetchers.add(new OpenAccessDoi()); + + return fetchers; + } } diff --git a/src/main/java/org/jabref/logic/importer/fetcher/OpenAccessDoi.java b/src/main/java/org/jabref/logic/importer/fetcher/OpenAccessDoi.java new file mode 100644 index 00000000000..b19be472a33 --- /dev/null +++ b/src/main/java/org/jabref/logic/importer/fetcher/OpenAccessDoi.java @@ -0,0 +1,58 @@ +package org.jabref.logic.importer.fetcher; + +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.Objects; +import java.util.Optional; + +import org.jabref.logic.importer.FulltextFetcher; +import org.jabref.model.entry.BibEntry; +import org.jabref.model.entry.FieldName; +import org.jabref.model.entry.identifier.DOI; + +import com.mashape.unirest.http.HttpResponse; +import com.mashape.unirest.http.JsonNode; +import com.mashape.unirest.http.Unirest; +import com.mashape.unirest.http.exceptions.UnirestException; +import org.json.JSONObject; + +/** + * A fulltext fetcher that uses oaDOI. + * + * @implSpec API is documented at https://oadoi.org/api/v2 + */ +public class OpenAccessDoi implements FulltextFetcher { + private static String API_URL = "https://api.oadoi.org/v2/"; + + @Override + public Optional findFullText(BibEntry entry) throws IOException { + Objects.requireNonNull(entry); + + Optional doi = entry.getField(FieldName.DOI) + .flatMap(DOI::parse); + if (doi.isPresent()) { + try { + return findFullText(doi.get()); + } catch (UnirestException e) { + throw new IOException(e); + } + } else { + return Optional.empty(); + } + } + + public Optional findFullText(DOI doi) throws UnirestException, MalformedURLException { + HttpResponse jsonResponse = Unirest.get(API_URL + doi.getDOI() + "?email=developers@jabref.org") + .header("accept", "application/json") + .asJson(); + JSONObject root = jsonResponse.getBody().getObject(); + Optional url = Optional.ofNullable(root.optJSONObject("best_oa_location")) + .map(location -> location.optString("url")); + if (url.isPresent()) { + return Optional.of(new URL(url.get())); + } else { + return Optional.empty(); + } + } +} diff --git a/src/main/java/org/jabref/model/entry/identifier/DOI.java b/src/main/java/org/jabref/model/entry/identifier/DOI.java index b3e6d75b824..ca20dfa09ab 100644 --- a/src/main/java/org/jabref/model/entry/identifier/DOI.java +++ b/src/main/java/org/jabref/model/entry/identifier/DOI.java @@ -95,7 +95,7 @@ public DOI(String doi) { */ public static Optional parse(String doi) { try { - return Optional.ofNullable(new DOI(doi)); + return Optional.of(new DOI(doi)); } catch (IllegalArgumentException | NullPointerException e) { return Optional.empty(); } diff --git a/src/test/java/org/jabref/logic/importer/WebFetchersTest.java b/src/test/java/org/jabref/logic/importer/WebFetchersTest.java index 0d09defd4a1..8cc80b27522 100644 --- a/src/test/java/org/jabref/logic/importer/WebFetchersTest.java +++ b/src/test/java/org/jabref/logic/importer/WebFetchersTest.java @@ -51,11 +51,19 @@ public void getEntryBasedFetchersReturnsAllFetcherDerivingFromEntryBasedFetcher( @Test public void getSearchBasedFetchersReturnsAllFetcherDerivingFromSearchBasedFetcher() throws Exception { - List idFetchers = WebFetchers.getSearchBasedFetchers(importFormatPreferences); + List searchBasedFetchers = WebFetchers.getSearchBasedFetchers(importFormatPreferences); Set> expected = reflections.getSubTypesOf(SearchBasedFetcher.class); expected.remove(SearchBasedParserFetcher.class); - assertEquals(expected, getClasses(idFetchers)); + assertEquals(expected, getClasses(searchBasedFetchers)); + } + + @Test + public void getFullTextFetchersReturnsAllFetcherDerivingFromFullTextFetcher() throws Exception { + List fullTextFetchers = WebFetchers.getFullTextFetchers(importFormatPreferences); + + Set> expected = reflections.getSubTypesOf(FulltextFetcher.class); + assertEquals(expected, getClasses(fullTextFetchers)); } @Test diff --git a/src/test/java/org/jabref/logic/importer/fetcher/FulltextFetcherTest.java b/src/test/java/org/jabref/logic/importer/fetcher/FulltextFetcherTest.java index b2e25bb381b..6b649d60c7f 100644 --- a/src/test/java/org/jabref/logic/importer/fetcher/FulltextFetcherTest.java +++ b/src/test/java/org/jabref/logic/importer/fetcher/FulltextFetcherTest.java @@ -4,8 +4,8 @@ import java.util.Optional; import org.jabref.logic.importer.FulltextFetcher; -import org.jabref.logic.importer.FulltextFetchers; import org.jabref.logic.importer.ImportFormatPreferences; +import org.jabref.logic.importer.WebFetchers; import org.jabref.model.entry.BibEntry; import org.junit.jupiter.params.ParameterizedTest; @@ -18,7 +18,7 @@ class FulltextFetcherTest { private static List fetcherProvider() { - return new FulltextFetchers(mock(ImportFormatPreferences.class)).getFetchers(); + return WebFetchers.getFullTextFetchers(mock(ImportFormatPreferences.class)); } @ParameterizedTest diff --git a/src/test/java/org/jabref/logic/importer/fetcher/OpenAccessDoiTest.java b/src/test/java/org/jabref/logic/importer/fetcher/OpenAccessDoiTest.java new file mode 100644 index 00000000000..9bb675fb340 --- /dev/null +++ b/src/test/java/org/jabref/logic/importer/fetcher/OpenAccessDoiTest.java @@ -0,0 +1,43 @@ +package org.jabref.logic.importer.fetcher; + +import java.io.IOException; +import java.net.URL; +import java.util.Optional; + +import org.jabref.model.entry.BibEntry; +import org.jabref.testutils.category.FetcherTest; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +@FetcherTest +class OpenAccessDoiTest { + + private OpenAccessDoi finder; + private BibEntry entry; + + @BeforeEach + void setUp() { + finder = new OpenAccessDoi(); + entry = new BibEntry(); + } + + @Test + void findByDOI() throws IOException { + entry.setField("doi", "10.1038/nature12373"); + + assertEquals( + Optional.of(new URL("https://dash.harvard.edu/bitstream/handle/1/12285462/Nanometer-Scale%20Thermometry.pdf?sequence=1")), + finder.findFullText(entry) + ); + } + + @Test + void notFoundByDOI() throws IOException { + entry.setField("doi", "10.1186/unknown-doi"); + + assertEquals(Optional.empty(), finder.findFullText(entry)); + } +}