Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve error handling on GROBID server connection issues #7026

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ inserting new citations in a OpenOffic/LibreOffice document. [#6957](https://git
- We improved the duplicate detection when identifiers like DOI or arxiv are semantiaclly the same, but just syntactically differ (e.g. with or without http(s):// prefix). [#6707](https://github.com/JabRef/jabref/issues/6707)
- We changed in the group interface "Generate groups from keywords in a BibTeX field" by "Generate groups from keywords in the following field". [#6983](https://github.com/JabRef/jabref/issues/6983)
- We changed the name of a group type from "Searching for keywords" to "Searching for a keyword". [6995](https://github.com/JabRef/jabref/pull/6995)
- We changed connect timeouts for server requests to 30 seconds in general and 5 seconds for GROBID server (special) and improved user notifications on connection issues. [7026](https://github.com/JabRef/jabref/pull/7026)

### Fixed

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,21 @@
import org.jabref.gui.externalfiletype.ExternalFileTypes;
import org.jabref.gui.util.BackgroundTask;
import org.jabref.gui.util.TaskExecutor;
import org.jabref.logic.importer.FetcherException;
import org.jabref.logic.importer.fetcher.GrobidCitationFetcher;
import org.jabref.logic.l10n.Localization;
import org.jabref.model.database.BibDatabaseContext;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.util.FileUpdateMonitor;
import org.jabref.preferences.JabRefPreferences;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class BibtexExtractorViewModel {

private static final Logger LOGGER = LoggerFactory.getLogger(BibtexExtractorViewModel.class);

private final StringProperty inputTextProperty = new SimpleStringProperty("");
private DialogService dialogService;
private GrobidCitationFetcher currentCitationfetcher;
Expand Down Expand Up @@ -58,6 +64,15 @@ public StringProperty inputTextProperty() {
public void startParsing() {
BackgroundTask.wrap(() -> currentCitationfetcher.performSearch(inputTextProperty.getValue()))
.onRunning(() -> dialogService.notify(Localization.lang("Your text is being parsed...")))
.onFailure((e) -> {
if (e instanceof FetcherException) {
String msg = Localization.lang("There are connection issues with a JabRef server. Detailed information: %0.",
e.getMessage());
dialogService.notify(msg);
} else {
LOGGER.warn("Missing exception handling.", e);
}
})
.onSuccess(parsedEntries -> {
dialogService.notify(Localization.lang("%0 entries were parsed from your query.", String.valueOf(parsedEntries.size())));
importHandler.importEntries(parsedEntries);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
package org.jabref.logic.importer.fetcher;

import java.io.IOException;
import java.net.SocketTimeoutException;
import java.util.Arrays;
import java.util.List;
import java.util.Optional;
import java.util.stream.Collectors;

import org.jabref.logic.importer.FetcherException;
import org.jabref.logic.importer.ImportFormatPreferences;
import org.jabref.logic.importer.ParseException;
import org.jabref.logic.importer.SearchBasedFetcher;
Expand All @@ -20,13 +22,18 @@
public class GrobidCitationFetcher implements SearchBasedFetcher {

private static final Logger LOGGER = LoggerFactory.getLogger(GrobidCitationFetcher.class);

private static final String GROBID_URL = "http://grobid.jabref.org:8070";
private ImportFormatPreferences importFormatPreferences;
private GrobidService grobidService;

public GrobidCitationFetcher(ImportFormatPreferences importFormatPreferences) {
this(importFormatPreferences, new GrobidService(GROBID_URL));
}

GrobidCitationFetcher(ImportFormatPreferences importFormatPreferences, GrobidService grobidService) {
this.importFormatPreferences = importFormatPreferences;
this.grobidService = new GrobidService(GROBID_URL);
this.grobidService = grobidService;
}

/**
Expand All @@ -38,9 +45,14 @@ public GrobidCitationFetcher(ImportFormatPreferences importFormatPreferences) {
private Optional<String> parseUsingGrobid(String plainText) {
try {
return Optional.of(grobidService.processCitation(plainText, GrobidService.ConsolidateCitations.WITH_METADATA));
} catch (SocketTimeoutException e) {
String msg = "Connection timed out.";
LOGGER.debug(msg, e);
throw new RuntimeException(msg, e);
} catch (IOException e) {
LOGGER.debug("Could not process citation", e);
return Optional.empty();
String msg = "Could not process citation. " + e.getMessage();
LOGGER.debug(msg, e);
throw new RuntimeException(msg, e);
}
}

Expand All @@ -54,20 +66,28 @@ private Optional<BibEntry> parseBibToBibEntry(String bibtexString) {
}

@Override
public List<BibEntry> performSearch(String query) {
return Arrays
.stream(query.split("\\r\\r+|\\n\\n+|\\r\\n(\\r\\n)+"))
.map(String::trim)
.filter(str -> !str.isBlank())
.map(reference -> parseUsingGrobid(reference))
.flatMap(Optional::stream)
.map(reference -> parseBibToBibEntry(reference))
.flatMap(Optional::stream)
.collect(Collectors.toList());
public List<BibEntry> performSearch(String query) throws FetcherException {
List<BibEntry> bibEntries = null;
try {
bibEntries = Arrays
.stream(query.split("\\r\\r+|\\n\\n+|\\r\\n(\\r\\n)+"))
.map(String::trim)
.filter(str -> !str.isBlank())
.map(this::parseUsingGrobid)
.flatMap(Optional::stream)
.map(this::parseBibToBibEntry)
.flatMap(Optional::stream)
.collect(Collectors.toList());
} catch (RuntimeException e) {
// un-wrap the wrapped exceptions
throw new FetcherException(e.getMessage(), e.getCause());
}
return bibEntries;
}

@Override
public String getName() {
return "GROBID";
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import java.io.IOException;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.time.Duration;

import org.jabref.logic.net.URLDownload;

Expand Down Expand Up @@ -47,6 +48,7 @@ public String processCitation(String rawCitation, ConsolidateCitations consolida
rawCitation = URLEncoder.encode(rawCitation, StandardCharsets.UTF_8);
URLDownload urlDownload = new URLDownload(grobidServerURL
+ "/api/processCitation");
urlDownload.setConnectTimeout(Duration.ofSeconds(5));
urlDownload.addHeader("Accept", MediaTypes.APPLICATION_BIBTEX);
urlDownload.setPostData("citations=" + rawCitation + "&consolidateCitations=" + consolidateCitations);
String httpResponse = urlDownload.asString();
Expand Down
16 changes: 15 additions & 1 deletion src/main/java/org/jabref/logic/net/URLDownload.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import java.nio.file.StandardCopyOption;
import java.security.SecureRandom;
import java.security.cert.X509Certificate;
import java.time.Duration;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
Expand Down Expand Up @@ -62,11 +63,13 @@
public class URLDownload {

public static final String USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:79.0) Gecko/20100101 Firefox/79.0";

private static final Logger LOGGER = LoggerFactory.getLogger(URLDownload.class);
private static final Duration DEFAULT_CONNECT_TIMEOUT = Duration.ofSeconds(30);

private final URL source;
private final Map<String, String> parameters = new HashMap<>();
private String postData = "";
private Duration connectTimeout = DEFAULT_CONNECT_TIMEOUT;

/**
* @param source the URL to download from
Expand Down Expand Up @@ -316,6 +319,7 @@ private void copy(InputStream in, Writer out, Charset encoding) throws IOExcepti

private URLConnection openConnection() throws IOException {
URLConnection connection = this.source.openConnection();
connection.setConnectTimeout((int) connectTimeout.toMillis());
for (Entry<String, String> entry : this.parameters.entrySet()) {
connection.setRequestProperty(entry.getKey(), entry.getValue());
}
Expand Down Expand Up @@ -346,4 +350,14 @@ private URLConnection openConnection() throws IOException {

return connection;
}

public void setConnectTimeout(Duration connectTimeout) {
if (connectTimeout != null) {
this.connectTimeout = connectTimeout;
}
}

public Duration getConnectTimeout() {
return connectTimeout;
}
}
1 change: 1 addition & 0 deletions src/main/resources/l10n/JabRef_de.properties
Original file line number Diff line number Diff line change
Expand Up @@ -1637,6 +1637,7 @@ User=Benutzer
Connect=Verbinden
Connection\ error=Verbindungsfehler
Connection\ to\ %0\ server\ established.=Verbindung zum %0 Server hergestellt.
There\ are\ connection\ issues\ with\ a\ JabRef\ server.\ Detailed\ information\:\ %0.=Es gibt Verbindungsprobleme mit einem JabRef Server. Detailinformation: %0.
Required\ field\ "%0"\ is\ empty.=Erforederliches Feld "%0" ist leer.
%0\ driver\ not\ available.=%0-Treiber nicht verfügbar.
The\ connection\ to\ the\ server\ has\ been\ terminated.=Verbindung zum Server wurde abgebrochen.
Expand Down
1 change: 1 addition & 0 deletions src/main/resources/l10n/JabRef_en.properties
Original file line number Diff line number Diff line change
Expand Up @@ -1631,6 +1631,7 @@ User=User
Connect=Connect
Connection\ error=Connection error
Connection\ to\ %0\ server\ established.=Connection to %0 server established.
There\ are\ connection\ issues\ with\ a\ JabRef\ server.\ Detailed\ information\:\ %0.=There are connection issues with a JabRef server. Detailed information: %0.
Required\ field\ "%0"\ is\ empty.=Required field "%0" is empty.
%0\ driver\ not\ available.=%0 driver not available.
The\ connection\ to\ the\ server\ has\ been\ terminated.=The connection to the server has been terminated.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
package org.jabref.logic.importer.fetcher;

import java.io.IOException;
import java.util.Collections;
import java.util.List;
import java.util.stream.Stream;

import org.jabref.logic.importer.FetcherException;
import org.jabref.logic.importer.ImportFormatPreferences;
import org.jabref.logic.importer.util.GrobidService;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.field.StandardField;
import org.jabref.model.entry.types.StandardEntryType;
Expand All @@ -17,7 +20,11 @@
import org.mockito.Answers;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.anyString;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;

@FetcherTest
public class GrobidCitationFetcherTest {
Expand Down Expand Up @@ -76,28 +83,38 @@ public static Stream<Arguments> provideInvalidInput() {

@ParameterizedTest(name = "{0}")
@MethodSource("provideExamplesForCorrectResultTest")
public void grobidPerformSearchCorrectResultTest(String testName, BibEntry expectedBibEntry, String searchQuery) {
public void grobidPerformSearchCorrectResultTest(String testName, BibEntry expectedBibEntry, String searchQuery) throws FetcherException {
List<BibEntry> entries = grobidCitationFetcher.performSearch(searchQuery);
assertEquals(List.of(expectedBibEntry), entries);
}

@Test
public void grobidPerformSearchCorrectlySplitsStringTest() {
public void grobidPerformSearchCorrectlySplitsStringTest() throws FetcherException {
List<BibEntry> entries = grobidCitationFetcher.performSearch(example1 + "\n\n" + example2 + "\r\n\r\n" + example3 + "\r\r" + example4);
assertEquals(List.of(example1AsBibEntry, example2AsBibEntry, example3AsBibEntry, example4AsBibEntry), entries);
}

@Test
public void grobidPerformSearchWithEmptyStringsTest() {
public void grobidPerformSearchWithEmptyStringsTest() throws FetcherException {
List<BibEntry> entries = grobidCitationFetcher.performSearch(" \n ");
assertEquals(Collections.emptyList(), entries);
}

@ParameterizedTest
@MethodSource("provideInvalidInput")
public void grobidPerformSearchWithInvalidDataTest(String invalidInput) {
public void grobidPerformSearchWithInvalidDataTest(String invalidInput) throws FetcherException {
List<BibEntry> entries = grobidCitationFetcher.performSearch(invalidInput);
assertEquals(Collections.emptyList(), entries);
}

@Test
public void performSearchThrowsExceptionInCaseOfConnectionIssues() throws IOException {
GrobidService grobidServiceMock = mock(GrobidService.class);
when(grobidServiceMock.processCitation(anyString(), any())).thenThrow(new IOException("Any IO Exception"));
grobidCitationFetcher = new GrobidCitationFetcher(importFormatPreferences, grobidServiceMock);

assertThrows(FetcherException.class, () -> {
grobidCitationFetcher.performSearch("any text");
}, "performSearch should throw an FetcherException, when there are underlying IOException.");
}
}
8 changes: 8 additions & 0 deletions src/test/java/org/jabref/logic/net/URLDownloadTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -109,4 +109,12 @@ public void testCheckConnectionFail() throws MalformedURLException {
assertThrows(UnirestException.class, nonsense::canBeReached);
}

@Test
public void connectTimeoutIsNeverNull() throws MalformedURLException {
URLDownload urlDownload = new URLDownload(new URL("http://www.example.com"));
assertNotNull(urlDownload.getConnectTimeout(), "there's a non-null default by the constructor");

urlDownload.setConnectTimeout(null);
assertNotNull(urlDownload.getConnectTimeout(), "no null value can be set");
}
}