From 07712ce29338056ba22b24f0de0226ea2c5db7e1 Mon Sep 17 00:00:00 2001 From: Benedikt Tutzer Date: Sat, 21 Aug 2021 14:43:47 +0200 Subject: [PATCH] Preferences for Grobid (#8002) --- CHANGELOG.md | 1 + src/main/java/org/jabref/gui/JabRefMain.java | 2 +- .../BibtexExtractorViewModel.java | 28 +++++++++++--- .../bibtexextractor/ExtractBibtexAction.java | 2 + .../bibtexextractor/ExtractBibtexDialog.java | 2 +- .../jabref/gui/entryeditor/EntryEditor.java | 29 +++++++++++++- .../gui/externalfiles/ImportHandler.java | 5 ++- .../gui/importer/GrobidOptInDialogHelper.java | 38 +++++++++++++++++++ .../org/jabref/gui/importer/ImportAction.java | 23 ++++++++++- .../CustomImporterTabViewModel.java | 1 + .../importexport/ImportExportTab.fxml | 7 ++++ .../importexport/ImportExportTab.java | 7 ++++ .../ImportExportTabViewModel.java | 16 +++++++- .../ExternalFilesContentImporter.java | 7 +++- .../logic/importer/ImportFormatReader.java | 10 +++-- .../jabref/logic/importer/WebFetchers.java | 5 ++- .../fetcher/GrobidCitationFetcher.java | 7 ++-- .../fileformat/PdfGrobidImporter.java | 5 ++- .../fileformat/PdfMergeMetadataImporter.java | 23 +++++------ .../ImportSettingsPreferences.java | 27 ++++++++++++- .../logic/importer/util/GrobidService.java | 14 ++++--- .../jabref/preferences/JabRefPreferences.java | 14 ++++++- src/main/resources/l10n/JabRef_en.properties | 4 ++ .../ImportFormatReaderIntegrationTest.java | 3 +- .../ImportFormatReaderTestParameterless.java | 3 +- .../logic/importer/WebFetchersTest.java | 3 +- .../fetcher/GrobidCitationFetcherTest.java | 4 +- .../fileformat/PdfGrobidImporterTest.java | 7 +++- .../PdfMergeMetadataImporterTest.java | 8 ++-- .../importer/util/GrobidServiceTest.java | 11 +++++- 30 files changed, 259 insertions(+), 57 deletions(-) create mode 100644 src/main/java/org/jabref/gui/importer/GrobidOptInDialogHelper.java diff --git a/CHANGELOG.md b/CHANGELOG.md index a82a7ec2ae2..d6a0100fe1a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ Note that this project **does not** adhere to [Semantic Versioning](http://semve - We improved the deduction of bib-entries from imported fulltext pdfs. [#7947](https://github.com/JabRef/jabref/pull/7947) - We added unprotect_terms to the list of bracketed pattern modifiers [#7826](https://github.com/JabRef/jabref/pull/7960) - We added an icon picker in group edit dialog. [#6142](https://github.com/JabRef/jabref/issues/6142) +- We added a preference to Opt-In to JabRef's online metadata extraction service (Grobid) usage. [8002](https://github.com/JabRef/jabref/pull/8002) ### Changed diff --git a/src/main/java/org/jabref/gui/JabRefMain.java b/src/main/java/org/jabref/gui/JabRefMain.java index 11636af520a..04774744eea 100644 --- a/src/main/java/org/jabref/gui/JabRefMain.java +++ b/src/main/java/org/jabref/gui/JabRefMain.java @@ -124,7 +124,7 @@ private static void applyPreferences(PreferencesService preferences) { Globals.journalAbbreviationRepository = JournalAbbreviationLoader.loadRepository(preferences.getJournalAbbreviationPreferences()); // Build list of Import and Export formats - Globals.IMPORT_FORMAT_READER.resetImportFormats(preferences.getImportFormatPreferences(), + Globals.IMPORT_FORMAT_READER.resetImportFormats(preferences.getImportSettingsPreferences(), preferences.getImportFormatPreferences(), preferences.getXmpPreferences(), Globals.getFileUpdateMonitor()); Globals.entryTypesManager.addCustomOrModifiedTypes(preferences.getBibEntryTypes(BibDatabaseMode.BIBTEX), preferences.getBibEntryTypes(BibDatabaseMode.BIBLATEX)); diff --git a/src/main/java/org/jabref/gui/bibtexextractor/BibtexExtractorViewModel.java b/src/main/java/org/jabref/gui/bibtexextractor/BibtexExtractorViewModel.java index eb020c0be0c..1e45cc5d989 100644 --- a/src/main/java/org/jabref/gui/bibtexextractor/BibtexExtractorViewModel.java +++ b/src/main/java/org/jabref/gui/bibtexextractor/BibtexExtractorViewModel.java @@ -1,6 +1,7 @@ package org.jabref.gui.bibtexextractor; import java.util.HashMap; +import java.util.List; import java.util.Map; import javax.swing.undo.UndoManager; @@ -32,7 +33,7 @@ public class BibtexExtractorViewModel { private final StringProperty inputTextProperty = new SimpleStringProperty(""); private final DialogService dialogService; - private final GrobidCitationFetcher currentCitationfetcher; + private final PreferencesService preferencesService; private final TaskExecutor taskExecutor; private final ImportHandler importHandler; @@ -45,7 +46,7 @@ public BibtexExtractorViewModel(BibDatabaseContext bibdatabaseContext, StateManager stateManager) { this.dialogService = dialogService; - currentCitationfetcher = new GrobidCitationFetcher(preferencesService.getImportFormatPreferences()); + this.preferencesService = preferencesService; this.taskExecutor = taskExecutor; this.importHandler = new ImportHandler( bibdatabaseContext, @@ -61,7 +62,22 @@ public StringProperty inputTextProperty() { } public void startParsing() { - BackgroundTask.wrap(() -> currentCitationfetcher.performSearch(inputTextProperty.getValue())) + if (preferencesService.getImportSettingsPreferences().isGrobidEnabled()) { + parseUsingGrobid(); + } else { + parseUsingBibtexExtractor(); + } + } + + private void parseUsingBibtexExtractor() { + BibEntry parsedEntry = new BibtexExtractor().extract(inputTextProperty.getValue()); + importHandler.importEntries(List.of(parsedEntry)); + trackNewEntry(parsedEntry, "ParseWithBibTeXExtractor"); + } + + private void parseUsingGrobid() { + GrobidCitationFetcher grobidCitationFetcher = new GrobidCitationFetcher(preferencesService.getImportSettingsPreferences(), preferencesService.getImportFormatPreferences()); + BackgroundTask.wrap(() -> grobidCitationFetcher.performSearch(inputTextProperty.getValue())) .onRunning(() -> dialogService.notify(Localization.lang("Your text is being parsed..."))) .onFailure((e) -> { if (e instanceof FetcherException) { @@ -76,14 +92,14 @@ public void startParsing() { dialogService.notify(Localization.lang("%0 entries were parsed from your query.", String.valueOf(parsedEntries.size()))); importHandler.importEntries(parsedEntries); for (BibEntry bibEntry : parsedEntries) { - trackNewEntry(bibEntry); + trackNewEntry(bibEntry, "ParseWithGrobid"); } }).executeWith(taskExecutor); } - private void trackNewEntry(BibEntry bibEntry) { + private void trackNewEntry(BibEntry bibEntry, String eventMessage) { Map properties = new HashMap<>(); properties.put("EntryType", bibEntry.typeProperty().getValue().getName()); - Globals.getTelemetryClient().ifPresent(client -> client.trackEvent("ParseWithGrobid", properties, new HashMap<>())); + Globals.getTelemetryClient().ifPresent(client -> client.trackEvent(eventMessage, properties, new HashMap<>())); } } diff --git a/src/main/java/org/jabref/gui/bibtexextractor/ExtractBibtexAction.java b/src/main/java/org/jabref/gui/bibtexextractor/ExtractBibtexAction.java index 7f742093845..86a080f73ff 100644 --- a/src/main/java/org/jabref/gui/bibtexextractor/ExtractBibtexAction.java +++ b/src/main/java/org/jabref/gui/bibtexextractor/ExtractBibtexAction.java @@ -3,6 +3,7 @@ import org.jabref.gui.DialogService; import org.jabref.gui.StateManager; import org.jabref.gui.actions.SimpleCommand; +import org.jabref.gui.importer.GrobidOptInDialogHelper; import com.airhacks.afterburner.injection.Injector; @@ -17,6 +18,7 @@ public ExtractBibtexAction(StateManager stateManager) { @Override public void execute() { DialogService dialogService = Injector.instantiateModelOrService(DialogService.class); + GrobidOptInDialogHelper.showAndWaitIfUserIsUndecided(dialogService); dialogService.showCustomDialogAndWait(new ExtractBibtexDialog()); } } diff --git a/src/main/java/org/jabref/gui/bibtexextractor/ExtractBibtexDialog.java b/src/main/java/org/jabref/gui/bibtexextractor/ExtractBibtexDialog.java index b7c33fdf15d..04affccbf1a 100644 --- a/src/main/java/org/jabref/gui/bibtexextractor/ExtractBibtexDialog.java +++ b/src/main/java/org/jabref/gui/bibtexextractor/ExtractBibtexDialog.java @@ -46,7 +46,7 @@ public ExtractBibtexDialog() { buttonParse = (Button) getDialogPane().lookupButton(parseButtonType); buttonParse.setTooltip(new Tooltip((Localization.lang("Starts the extraction and adds the resulting entries to the currently opened database")))); - buttonParse.setOnAction(event -> viewModel.startParsing()); + buttonParse.setOnAction((event) -> viewModel.startParsing()); buttonParse.disableProperty().bind(viewModel.inputTextProperty().isEmpty()); } diff --git a/src/main/java/org/jabref/gui/entryeditor/EntryEditor.java b/src/main/java/org/jabref/gui/entryeditor/EntryEditor.java index aa3097ec004..360162e11f4 100644 --- a/src/main/java/org/jabref/gui/entryeditor/EntryEditor.java +++ b/src/main/java/org/jabref/gui/entryeditor/EntryEditor.java @@ -8,6 +8,7 @@ import java.util.Objects; import java.util.Optional; import java.util.Set; +import java.util.SortedSet; import java.util.stream.Collectors; import javax.inject.Inject; @@ -35,6 +36,7 @@ import org.jabref.gui.externalfiles.ExternalFilesEntryLinker; import org.jabref.gui.externalfiletype.ExternalFileTypes; import org.jabref.gui.help.HelpAction; +import org.jabref.gui.importer.GrobidOptInDialogHelper; import org.jabref.gui.keyboard.KeyBinding; import org.jabref.gui.menus.ChangeEntryTypeMenu; import org.jabref.gui.mergeentries.FetchAndMergeEntry; @@ -45,6 +47,7 @@ import org.jabref.logic.help.HelpFile; import org.jabref.logic.importer.EntryBasedFetcher; import org.jabref.logic.importer.WebFetchers; +import org.jabref.logic.importer.fileformat.PdfMergeMetadataImporter; import org.jabref.model.database.BibDatabaseContext; import org.jabref.model.entry.BibEntry; import org.jabref.model.entry.field.Field; @@ -355,11 +358,33 @@ private void setupToolBar() { // Add menu for fetching bibliographic information ContextMenu fetcherMenu = new ContextMenu(); - for (EntryBasedFetcher fetcher : WebFetchers.getEntryBasedFetchers(preferencesService.getImportFormatPreferences(), preferencesService.getFilePreferences(), databaseContext, preferencesService.getDefaultEncoding())) { + SortedSet entryBasedFetchers = WebFetchers.getEntryBasedFetchers( + preferencesService.getImportSettingsPreferences(), + preferencesService.getImportFormatPreferences(), + preferencesService.getFilePreferences(), + databaseContext, + preferencesService.getDefaultEncoding()); + for (EntryBasedFetcher fetcher : entryBasedFetchers) { MenuItem fetcherMenuItem = new MenuItem(fetcher.getName()); - fetcherMenuItem.setOnAction(event -> fetchAndMerge(fetcher)); + if (fetcher instanceof PdfMergeMetadataImporter.EntryBasedFetcherWrapper) { + // Handle Grobid Opt-In in case of the PdfMergeMetadataImporter + fetcherMenuItem.setOnAction(event -> { + GrobidOptInDialogHelper.showAndWaitIfUserIsUndecided(dialogService); + PdfMergeMetadataImporter.EntryBasedFetcherWrapper pdfMergeMetadataImporter = + new PdfMergeMetadataImporter.EntryBasedFetcherWrapper( + preferencesService.getImportSettingsPreferences(), + preferencesService.getImportFormatPreferences(), + preferencesService.getFilePreferences(), + databaseContext, + preferencesService.getDefaultEncoding()); + fetchAndMerge(pdfMergeMetadataImporter); + }); + } else { + fetcherMenuItem.setOnAction(event -> fetchAndMerge(fetcher)); + } fetcherMenu.getItems().add(fetcherMenuItem); } + fetcherButton.setOnMouseClicked(event -> fetcherMenu.show(fetcherButton, Side.RIGHT, 0, 0)); } diff --git a/src/main/java/org/jabref/gui/externalfiles/ImportHandler.java b/src/main/java/org/jabref/gui/externalfiles/ImportHandler.java index 69022aca70f..08e0d301477 100644 --- a/src/main/java/org/jabref/gui/externalfiles/ImportHandler.java +++ b/src/main/java/org/jabref/gui/externalfiles/ImportHandler.java @@ -57,7 +57,10 @@ public ImportHandler(BibDatabaseContext database, this.stateManager = stateManager; this.linker = new ExternalFilesEntryLinker(externalFileTypes, preferencesService.getFilePreferences(), database); - this.contentImporter = new ExternalFilesContentImporter(preferencesService.getImportFormatPreferences(), preferencesService.getTimestampPreferences()); + this.contentImporter = new ExternalFilesContentImporter( + preferencesService.getImportSettingsPreferences(), + preferencesService.getImportFormatPreferences(), + preferencesService.getTimestampPreferences()); this.undoManager = undoManager; } diff --git a/src/main/java/org/jabref/gui/importer/GrobidOptInDialogHelper.java b/src/main/java/org/jabref/gui/importer/GrobidOptInDialogHelper.java new file mode 100644 index 00000000000..84ad1db02cf --- /dev/null +++ b/src/main/java/org/jabref/gui/importer/GrobidOptInDialogHelper.java @@ -0,0 +1,38 @@ +package org.jabref.gui.importer; + +import org.jabref.gui.DialogService; +import org.jabref.gui.Globals; +import org.jabref.logic.l10n.Localization; +import org.jabref.preferences.PreferencesService; + +/** + * Metadata extraction from PDFs and plaintext works very well using Grobid, but we do not want to enable it by default + * due to data privacy concerns. + * To make users aware of the feature, we ask each time before querrying Grobid, giving the option to opt-out. + */ +public class GrobidOptInDialogHelper { + + /** + * If Grobid is not enabled but the user has not expicitly opted-out of Grobid, we ask for permission to send data + * to Grobid using a dialog and giving an opt-out option. + * @param dialogService the DialogService to use + * @return if the user enabled Grobid, either in the past or after being asked by the dialog. + */ + public static boolean showAndWaitIfUserIsUndecided(DialogService dialogService) { + PreferencesService preferences = Globals.prefs; + if (preferences.getImportSettingsPreferences().isGrobidEnabled()) { + return true; + } + if (preferences.getImportSettingsPreferences().isGrobidOptOut()) { + return false; + } + boolean grobidEnabled = dialogService.showConfirmationDialogWithOptOutAndWait( + Localization.lang("Remote services"), + Localization.lang("Allow sending PDF files and raw citation strings to a JabRef online service (Grobid) to determine Metadata. This produces better results."), + Localization.lang("Do not ask again"), + (optOut) -> preferences.storeImportSettingsPreferences(preferences.getImportSettingsPreferences().withGrobidOptOut(optOut)) + ); + preferences.storeImportSettingsPreferences(preferences.getImportSettingsPreferences().withGrobidEnabled(grobidEnabled)); + return grobidEnabled; + } +} diff --git a/src/main/java/org/jabref/gui/importer/ImportAction.java b/src/main/java/org/jabref/gui/importer/ImportAction.java index 6a2bf12c086..2d46f75e4ee 100644 --- a/src/main/java/org/jabref/gui/importer/ImportAction.java +++ b/src/main/java/org/jabref/gui/importer/ImportAction.java @@ -21,9 +21,13 @@ import org.jabref.logic.importer.ImportFormatReader; import org.jabref.logic.importer.Importer; import org.jabref.logic.importer.ParserResult; +import org.jabref.logic.importer.fileformat.PdfGrobidImporter; +import org.jabref.logic.importer.fileformat.PdfMergeMetadataImporter; import org.jabref.logic.l10n.Localization; +import org.jabref.logic.util.StandardFileType; import org.jabref.logic.util.UpdateField; import org.jabref.model.database.BibDatabase; +import org.jabref.model.util.FileHelper; import org.jabref.preferences.PreferencesService; import org.slf4j.Logger; @@ -101,6 +105,11 @@ public void automatedImport(List filenames) { } } + private boolean fileIsPdf(Path filename) { + Optional extension = FileHelper.getFileExtension(filename); + return extension.isPresent() && StandardFileType.PDF.getExtensions().contains(extension.get()); + } + private List doImport(List files) { // We import all files and collect their results: List imports = new ArrayList<>(); @@ -108,11 +117,21 @@ private List doImport(List files) try { if (importer.isEmpty()) { // Unknown format: - DefaultTaskExecutor.runInJavaFXThread(() -> frame.getDialogService().notify(Localization.lang("Importing in unknown format") + "...")); + DefaultTaskExecutor.runAndWaitInJavaFXThread(() -> { + if (fileIsPdf(filename) && GrobidOptInDialogHelper.showAndWaitIfUserIsUndecided(frame.getDialogService())) { + Globals.IMPORT_FORMAT_READER.resetImportFormats(prefs.getImportSettingsPreferences(), prefs.getImportFormatPreferences(), prefs.getXmpPreferences(), Globals.getFileUpdateMonitor()); + } + frame.getDialogService().notify(Localization.lang("Importing in unknown format") + "..."); + }); // This import method never throws an IOException: imports.add(Globals.IMPORT_FORMAT_READER.importUnknownFormat(filename, prefs.getTimestampPreferences(), Globals.getFileUpdateMonitor())); } else { - DefaultTaskExecutor.runInJavaFXThread(() -> frame.getDialogService().notify(Localization.lang("Importing in %0 format", importer.get().getName()) + "...")); + DefaultTaskExecutor.runAndWaitInJavaFXThread(() -> { + if (importer.get() instanceof PdfGrobidImporter || importer.get() instanceof PdfMergeMetadataImporter && GrobidOptInDialogHelper.showAndWaitIfUserIsUndecided(frame.getDialogService())) { + Globals.IMPORT_FORMAT_READER.resetImportFormats(prefs.getImportSettingsPreferences(), prefs.getImportFormatPreferences(), prefs.getXmpPreferences(), Globals.getFileUpdateMonitor()); + } + frame.getDialogService().notify(Localization.lang("Importing in %0 format", importer.get().getName()) + "..."); + }); // Specific importer: ParserResult pr = importer.get().importDatabase(filename, Globals.prefs.getDefaultEncoding()); imports.add(new ImportFormatReader.UnknownFormatImport(importer.get().getName(), pr)); diff --git a/src/main/java/org/jabref/gui/preferences/customimporter/CustomImporterTabViewModel.java b/src/main/java/org/jabref/gui/preferences/customimporter/CustomImporterTabViewModel.java index c6c139591b0..a0cb014e5d3 100644 --- a/src/main/java/org/jabref/gui/preferences/customimporter/CustomImporterTabViewModel.java +++ b/src/main/java/org/jabref/gui/preferences/customimporter/CustomImporterTabViewModel.java @@ -54,6 +54,7 @@ public void storeSettings() { .map(ImporterViewModel::getLogic) .collect(Collectors.toSet())); Globals.IMPORT_FORMAT_READER.resetImportFormats( + preferences.getImportSettingsPreferences(), preferences.getImportFormatPreferences(), preferences.getXmpPreferences(), Globals.getFileUpdateMonitor()); diff --git a/src/main/java/org/jabref/gui/preferences/importexport/ImportExportTab.fxml b/src/main/java/org/jabref/gui/preferences/importexport/ImportExportTab.fxml index 88436120746..10cc58d0d5c 100644 --- a/src/main/java/org/jabref/gui/preferences/importexport/ImportExportTab.fxml +++ b/src/main/java/org/jabref/gui/preferences/importexport/ImportExportTab.fxml @@ -23,4 +23,11 @@