From 0402d0899adff9921c5a9ce53f65aa69fec910e8 Mon Sep 17 00:00:00 2001 From: Oliver Kopp Date: Wed, 16 Oct 2024 10:30:43 +0200 Subject: [PATCH] Fix handling of escapings in bracketed patterns (#11967) * Add references to other tests * Simplify code * Add test for extracting the first word * Relax escaping * Add link to PR * Add tests Refs https://github.com/JabRef/jabref/issues/11367 * Refine CHANGELOG.md * Refine CHANGELOG.md --- CHANGELOG.md | 2 + .../citationkeypattern/BracketedPattern.java | 16 +++++-- .../bibtexfields/RegexFormatter.java | 14 +++++-- .../BracketedPatternTest.java | 42 +++++++++++++++---- .../MakeLabelWithDatabaseTest.java | 20 +++++++-- .../MakeLabelWithoutDatabaseTest.java | 29 ++++++++++++- .../bibtexfields/RegexFormatterTest.java | 6 +++ 7 files changed, 110 insertions(+), 19 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6ca4820613e..2a609a8267d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -53,6 +53,7 @@ Note that this project **does not** adhere to [Semantic Versioning](https://semv - ⚠️ Renamed command line parameters `embeddBibfileInPdf` to `embedBibFileInPdf`, `writeMetadatatoPdf` to `writeMetadataToPdf`, and `writeXMPtoPdf` to `writeXmpToPdf`. [#11575](https://github.com/JabRef/jabref/pull/11575) - The browse button for a Custom theme now opens in the directory of the current used CSS file. [#11597](https://github.com/JabRef/jabref/pull/11597) - The browse button for a Custom exporter now opens in the directory of the current used exporter file. [#11717](https://github.com/JabRef/jabref/pull/11717) +- ⚠️ We relaxed the escaping requirements for [bracketed patterns](https://docs.jabref.org/setup/citationkeypatterns), which are used for the [citaton key generator](https://docs.jabref.org/advanced/entryeditor#autogenerate-citation-key) and [filename and directory patterns](https://docs.jabref.org/finding-sorting-and-cleaning-entries/filelinks#auto-linking-files). One only needs to write `\"` if a quote sign should be escaped. All other escapings are not necessary (and working) any more. [#11967](https://github.com/JabRef/jabref/pull/11967) - JabRef now uses TLS 1.2 for all HTTPS connections. [#11852](https://github.com/JabRef/jabref/pull/11852) - We improved the display of long messages in the integrity check dialog. [#11619](https://github.com/JabRef/jabref/pull/11619) - We improved the undo/redo buttons in the main toolbar and main menu to be disabled when there is nothing to undo/redo. [#8807](https://github.com/JabRef/jabref/issues/8807) @@ -90,6 +91,7 @@ Note that this project **does not** adhere to [Semantic Versioning](https://semv - We fixed an issue where unescaped braces in the arXiv fetcher were not treated. [#11704](https://github.com/JabRef/jabref/issues/11704) - We fixed an issue where HTML instead of the fulltext pdf was downloaded when importing arXiv entries. [#4913](https://github.com/JabRef/jabref/issues/4913) - We fixed an issue where the keywords and crossref fields were not properly focused. [#11177](https://github.com/JabRef/jabref/issues/11177) +- We fixed handling of `\"` in [bracketed patterns](https://docs.jabref.org/setup/citationkeypatterns) containing a RegEx. [#11967](https://github.com/JabRef/jabref/pull/11967) - We fixed an issue where the Undo/Redo buttons were active even when all libraries are closed. [#11837](https://github.com/JabRef/jabref/issues/11837) - We fixed an issue where recently opened files were not displayed in the main menu properly. [#9042](https://github.com/JabRef/jabref/issues/9042) - We fixed an issue where the DOI lookup would show an error when a DOI was found for an entry. [#11850](https://github.com/JabRef/jabref/issues/11850) diff --git a/src/main/java/org/jabref/logic/citationkeypattern/BracketedPattern.java b/src/main/java/org/jabref/logic/citationkeypattern/BracketedPattern.java index 0447f748cef..519fcdafd34 100644 --- a/src/main/java/org/jabref/logic/citationkeypattern/BracketedPattern.java +++ b/src/main/java/org/jabref/logic/citationkeypattern/BracketedPattern.java @@ -209,11 +209,10 @@ public static String expandBrackets(String pattern, Character keywordDelimiter, */ public static Function expandBracketContent(Character keywordDelimiter, BibEntry entry, BibDatabase database) { return (String bracket) -> { - String expandedPattern; List fieldParts = parseFieldAndModifiers(bracket); // check whether there is a modifier on the end such as // ":lower": - expandedPattern = getFieldValue(entry, fieldParts.getFirst(), keywordDelimiter, database); + String expandedPattern = getFieldValue(entry, fieldParts.getFirst(), keywordDelimiter, database); if (fieldParts.size() > 1) { // apply modifiers: expandedPattern = applyModifiers(expandedPattern, fieldParts, 1, expandBracketContent(keywordDelimiter, entry, database)); @@ -233,6 +232,7 @@ public static Function expandBracketContent(Character keywordDel public static String expandBrackets(String pattern, Function bracketContentHandler) { Objects.requireNonNull(pattern); StringBuilder expandedPattern = new StringBuilder(); + pattern = pattern.replace("\\\"", "\u0A17"); StringTokenizer parsedPattern = new StringTokenizer(pattern, "\\[]\"", true); while (parsedPattern.hasMoreTokens()) { @@ -254,7 +254,7 @@ public static String expandBrackets(String pattern, Function bra } } - return expandedPattern.toString(); + return expandedPattern.toString().replace("\u0A17", "\\\""); } /** @@ -1140,11 +1140,19 @@ protected static List parseFieldAndModifiers(String arg) { } else if (currentChar == '\\') { if (escaped) { escaped = false; - current.append(currentChar); + // Only : needs to be escaped + // " -> regex("...", "...") - escaping should be passed through to the regex parser + // : -> :abc:def + current.append('\\'); + current.append('\\'); } else { escaped = true; } } else if (escaped) { + if (currentChar != ':') { + // Only : needs to be escaped + current.append('\\'); + } current.append(currentChar); escaped = false; } else { diff --git a/src/main/java/org/jabref/logic/formatter/bibtexfields/RegexFormatter.java b/src/main/java/org/jabref/logic/formatter/bibtexfields/RegexFormatter.java index 26859a82699..97bf67f0cff 100644 --- a/src/main/java/org/jabref/logic/formatter/bibtexfields/RegexFormatter.java +++ b/src/main/java/org/jabref/logic/formatter/bibtexfields/RegexFormatter.java @@ -15,26 +15,34 @@ public class RegexFormatter extends Formatter { public static final String KEY = "regex"; + private static final Logger LOGGER = LoggerFactory.getLogger(RegexFormatter.class); + private static final Pattern ESCAPED_OPENING_CURLY_BRACE = Pattern.compile("\\\\\\{"); private static final Pattern ESCAPED_CLOSING_CURLY_BRACE = Pattern.compile("\\\\\\}"); + /** * Matches text enclosed in curly brackets. The capturing group is used to prevent part of the input from being * replaced. */ private static final Pattern ENCLOSED_IN_CURLY_BRACES = Pattern.compile("\\{.*?}"); + private static final String REGEX_CAPTURING_GROUP = "regex"; private static final String REPLACEMENT_CAPTURING_GROUP = "replacement"; + /** * Matches a valid argument to the constructor. Two capturing groups are used to parse the {@link * RegexFormatter#regex} and {@link RegexFormatter#replacement} used in {@link RegexFormatter#format(String)} */ private static final Pattern CONSTRUCTOR_ARGUMENT = Pattern.compile( "^\\(\"(?<" + REGEX_CAPTURING_GROUP + ">.*?)\" *?, *?\"(?<" + REPLACEMENT_CAPTURING_GROUP + ">.*)\"\\)$"); + // Magic arbitrary unicode char, which will never appear in bibtex files private static final String PLACEHOLDER_FOR_PROTECTED_GROUP = Character.toString('\u0A14'); private static final String PLACEHOLDER_FOR_OPENING_CURLY_BRACE = Character.toString('\u0A15'); private static final String PLACEHOLDER_FOR_CLOSING_CURLY_BRACE = Character.toString('\u0A16'); + private static final String PLACEHOLDER_FOR_QUOTE_SIGN = Character.toString('\u0A17'); + private final String regex; private final String replacement; @@ -46,11 +54,11 @@ public class RegexFormatter extends Formatter { */ public RegexFormatter(String input) { Objects.requireNonNull(input); - input = input.trim(); + input = input.trim().replace("\\\"", PLACEHOLDER_FOR_QUOTE_SIGN); Matcher constructorArgument = CONSTRUCTOR_ARGUMENT.matcher(input); if (constructorArgument.matches()) { - regex = constructorArgument.group(REGEX_CAPTURING_GROUP); - replacement = constructorArgument.group(REPLACEMENT_CAPTURING_GROUP); + regex = constructorArgument.group(REGEX_CAPTURING_GROUP).replace(PLACEHOLDER_FOR_QUOTE_SIGN, "\""); + replacement = constructorArgument.group(REPLACEMENT_CAPTURING_GROUP).replace(PLACEHOLDER_FOR_QUOTE_SIGN, "\""); } else { regex = null; replacement = null; diff --git a/src/test/java/org/jabref/logic/citationkeypattern/BracketedPatternTest.java b/src/test/java/org/jabref/logic/citationkeypattern/BracketedPatternTest.java index 4d1b3649778..e7b9e643db5 100644 --- a/src/test/java/org/jabref/logic/citationkeypattern/BracketedPatternTest.java +++ b/src/test/java/org/jabref/logic/citationkeypattern/BracketedPatternTest.java @@ -25,6 +25,8 @@ /** * Tests based on a BibEntry are contained in {@link CitationKeyGeneratorTest} + * + * "Complete" entries are tested at {@link org.jabref.logic.citationkeypattern.MakeLabelWithDatabaseTest} */ @Execution(ExecutionMode.CONCURRENT) class BracketedPatternTest { @@ -606,14 +608,44 @@ void expandBracketsWithAuthorStartingWithBrackets() { @Test void expandBracketsWithModifierContainingRegexCharacterClass() { BibEntry bibEntry = new BibEntry().withField(StandardField.TITLE, "Wickedness:Managing"); - assertEquals("Wickedness.Managing", BracketedPattern.expandBrackets("[title:regex(\"[:]+\",\".\")]", null, bibEntry, null)); } + @Test + void regExForFirstWord() { + BibEntry bibEntry = new BibEntry().withField(StandardField.TITLE, "First Second Third"); + assertEquals("First", BracketedPattern.expandBrackets("[TITLE:regex(\"(\\w+).*\",\"$1\")]", null, bibEntry, null)); + } + + @Test + void regExWithComma() { + BibEntry bibEntry = new BibEntry().withField(StandardField.TITLE, "First,Second,Third"); + assertEquals("First+Second+Third", BracketedPattern.expandBrackets("[TITLE:regex(\",\",\"+\")]", null, bibEntry, null)); + } + + @Test + void regExWithEscapedQuote() { + BibEntry bibEntry = new BibEntry().withField(StandardField.TITLE, "First\"Second\"Third"); + assertEquals("First+Second+Third", BracketedPattern.expandBrackets("[TITLE:regex(\"\\\"\",\"+\")]", null, bibEntry, null)); + } + + @Test + void regExWithEtAlTwoAuthors() { + // Example from https://docs.jabref.org/setup/citationkeypatterns#modifiers + BibEntry bibEntry = new BibEntry().withField(StandardField.AUTHOR, "First Last and Second Last"); + assertEquals("LastAndLast", BracketedPattern.expandBrackets("[auth.etal:regex(\"\\.etal\",\"EtAl\"):regex(\"\\.\",\"And\")]", null, bibEntry, null)); + } + + @Test + void regExWithEtAlThreeAuthors() { + // Example from https://docs.jabref.org/setup/citationkeypatterns#modifiers + BibEntry bibEntry = new BibEntry().withField(StandardField.AUTHOR, "First Last and Second Last and Third Last"); + assertEquals("LastEtAl", BracketedPattern.expandBrackets("[auth.etal:regex(\"\\.etal\",\"EtAl\"):regex(\"\\.\",\"And\")]", null, bibEntry, null)); + } + @Test void expandBracketsEmptyStringFromEmptyBrackets() { BibEntry bibEntry = new BibEntry(); - assertEquals("", BracketedPattern.expandBrackets("[]", null, bibEntry, null)); } @@ -621,7 +653,6 @@ void expandBracketsEmptyStringFromEmptyBrackets() { void expandBracketsInstitutionAbbreviationFromProvidedAbbreviation() { BibEntry bibEntry = new BibEntry() .withField(StandardField.AUTHOR, "{European Union Aviation Safety Agency ({EUASABRACKET})}"); - assertEquals("EUASABRACKET", BracketedPattern.expandBrackets("[auth]", null, bibEntry, null)); } @@ -629,7 +660,6 @@ void expandBracketsInstitutionAbbreviationFromProvidedAbbreviation() { void expandBracketsInstitutionAbbreviationForAuthorContainingUnion() { BibEntry bibEntry = new BibEntry() .withField(StandardField.AUTHOR, "{European Union Aviation Safety Agency}"); - assertEquals("EUASA", BracketedPattern.expandBrackets("[auth]", null, bibEntry, null)); } @@ -637,7 +667,6 @@ void expandBracketsInstitutionAbbreviationForAuthorContainingUnion() { void expandBracketsLastNameForAuthorStartingWithOnlyLastNameStartingWithLowerCase() { BibEntry bibEntry = new BibEntry() .withField(StandardField.AUTHOR, "{eBay}"); - assertEquals("eBay", BracketedPattern.expandBrackets("[auth]", null, bibEntry, null)); } @@ -645,7 +674,6 @@ void expandBracketsLastNameForAuthorStartingWithOnlyLastNameStartingWithLowerCas void expandBracketsLastNameWithChineseCharacters() { BibEntry bibEntry = new BibEntry() .withField(StandardField.AUTHOR, "杨秀群"); - assertEquals("杨秀群", BracketedPattern.expandBrackets("[auth]", null, bibEntry, null)); } @@ -653,7 +681,6 @@ void expandBracketsLastNameWithChineseCharacters() { void expandBracketsUnmodifiedStringFromLongFirstPageNumber() { BibEntry bibEntry = new BibEntry() .withField(StandardField.PAGES, "2325967120921344"); - assertEquals("2325967120921344", BracketedPattern.expandBrackets("[firstpage]", null, bibEntry, null)); } @@ -661,7 +688,6 @@ void expandBracketsUnmodifiedStringFromLongFirstPageNumber() { void expandBracketsUnmodifiedStringFromLongLastPageNumber() { BibEntry bibEntry = new BibEntry() .withField(StandardField.PAGES, "2325967120921344"); - assertEquals("2325967120921344", BracketedPattern.expandBrackets("[lastpage]", null, bibEntry, null)); } diff --git a/src/test/java/org/jabref/logic/citationkeypattern/MakeLabelWithDatabaseTest.java b/src/test/java/org/jabref/logic/citationkeypattern/MakeLabelWithDatabaseTest.java index 3cafefc7846..724fb72f0d1 100644 --- a/src/test/java/org/jabref/logic/citationkeypattern/MakeLabelWithDatabaseTest.java +++ b/src/test/java/org/jabref/logic/citationkeypattern/MakeLabelWithDatabaseTest.java @@ -14,6 +14,9 @@ import static org.jabref.logic.citationkeypattern.CitationKeyGenerator.DEFAULT_UNWANTED_CHARACTERS; import static org.junit.jupiter.api.Assertions.assertEquals; +/** + * Bracketed patterns themselves are tested at {@link org.jabref.logic.citationkeypattern.BracketedPatternTest}. + */ @Execution(ExecutionMode.CONCURRENT) class MakeLabelWithDatabaseTest { @@ -461,11 +464,22 @@ void generateKeyAuthIniMany() { } @Test - void generateKeyTitleRegexe() { - bibtexKeyPattern.setDefaultValue("[title:regex(\" \",\"-\")]"); + void generateKeyTitleRegex() { + // Note that TITLE is written in upper case to have the verbatim value of the title field + // See https://github.com/JabRef/user-documentation/blob/main/en/setup/citationkeypatterns.md#bibentry-fields for information on that. + // We cannot use "-", because this is in the set of unwanted characters and removed after the RegEx is applied + bibtexKeyPattern.setDefaultValue("[TITLE:regex(\" \",\"X\")]"); // regex(" ", "-") entry.setField(StandardField.TITLE, "Please replace the spaces"); new CitationKeyGenerator(bibtexKeyPattern, database, preferences).generateAndSetKey(entry); - assertEquals(Optional.of("PleaseReplacetheSpaces"), entry.getCitationKey()); + assertEquals(Optional.of("PleaseXreplaceXtheXspaces"), entry.getCitationKey()); + } + + @Test + void generateKeyTitleRegexFirstWord() { + bibtexKeyPattern.setDefaultValue("[TITLE:regex(\"(\\w+).*\",\"$1\")]"); + entry.setField(StandardField.TITLE, "First Second Third"); + new CitationKeyGenerator(bibtexKeyPattern, database, preferences).generateAndSetKey(entry); + assertEquals(Optional.of("First"), entry.getCitationKey()); } @Test diff --git a/src/test/java/org/jabref/logic/citationkeypattern/MakeLabelWithoutDatabaseTest.java b/src/test/java/org/jabref/logic/citationkeypattern/MakeLabelWithoutDatabaseTest.java index b02b69554ff..61d98eccb92 100644 --- a/src/test/java/org/jabref/logic/citationkeypattern/MakeLabelWithoutDatabaseTest.java +++ b/src/test/java/org/jabref/logic/citationkeypattern/MakeLabelWithoutDatabaseTest.java @@ -5,6 +5,7 @@ import org.jabref.model.database.BibDatabase; import org.jabref.model.entry.BibEntry; import org.jabref.model.entry.field.StandardField; +import org.jabref.model.entry.types.StandardEntryType; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -17,12 +18,13 @@ class MakeLabelWithoutDatabaseTest { private CitationKeyGenerator citationKeyGenerator; + private CitationKeyPatternPreferences patternPreferences; @BeforeEach void setUp() { GlobalCitationKeyPatterns keyPattern = new GlobalCitationKeyPatterns(CitationKeyPattern.NULL_CITATION_KEY_PATTERN); keyPattern.setDefaultValue("[auth]"); - CitationKeyPatternPreferences patternPreferences = new CitationKeyPatternPreferences( + patternPreferences = new CitationKeyPatternPreferences( false, false, false, @@ -58,4 +60,29 @@ void makeEditorLabelForFileSearch() { String label = citationKeyGenerator.generateKey(entry); assertEquals("Doe", label); } + + @Test + void bamford_1972_Comprehensive_Reaction_V_7_EN() { + // Example taken from https://github.com/JabRef/jabref/issues/11367#issuecomment-2162250948 + BibEntry entry = new BibEntry(StandardEntryType.Book) + .withCitationKey("Bamford_1972_Comprehensive_Reaction_V_7_EN") + .withField(StandardField.LANGUAGE, "english") + .withField(StandardField.MAINTITLE, "Comprehensive Chemical Kinetics") + .withField(StandardField.TITLE, "Reaction of Metallic Salts and Complexes, and Organometallic Compounds") + .withField(StandardField.VOLUME, "7") + .withField(StandardField.YEAR, "1972") + .withField(StandardField.EDITOR, "Bamford, C. H. and Tipper, C. F. H."); + citationKeyGenerator = new CitationKeyGenerator(GlobalCitationKeyPatterns.fromPattern("[edtr]_[YEAR]_[MAINTITLE:regex(\"(\\w+).*\", \"$1\")]_[TITLE:regex(\"(\\w+).*\", \"$1\")]_V_[VOLUME]_[LANGUAGE:regex(\"english\", \"EN\"):regex(\"french\", \"FR\")]"), new BibDatabase(), patternPreferences); + String label = citationKeyGenerator.generateKey(entry); + assertEquals("Bamford_1972_Comprehensive_Reaction_V_7_EN", label); + } + + @Test + void frenchRegEx() { + BibEntry entry = new BibEntry(StandardEntryType.Book) + .withField(StandardField.LANGUAGE, "french"); + citationKeyGenerator = new CitationKeyGenerator(GlobalCitationKeyPatterns.fromPattern("[LANGUAGE:regex(\"english\", \"EN\"):regex(\"french\", \"FR\")]"), new BibDatabase(), patternPreferences); + String label = citationKeyGenerator.generateKey(entry); + assertEquals("FR", label); + } } diff --git a/src/test/java/org/jabref/logic/formatter/bibtexfields/RegexFormatterTest.java b/src/test/java/org/jabref/logic/formatter/bibtexfields/RegexFormatterTest.java index 8cd7b10ab5d..dc478a9cb3d 100644 --- a/src/test/java/org/jabref/logic/formatter/bibtexfields/RegexFormatterTest.java +++ b/src/test/java/org/jabref/logic/formatter/bibtexfields/RegexFormatterTest.java @@ -79,4 +79,10 @@ void formatWithSyntaxErrorReturnUnchangedString() { formatter = new RegexFormatter("(\"(\", \"\")"); assertEquals("AaBbCc", formatter.format("AaBbCc")); } + + @Test + void extractFirstWord() { + formatter = new RegexFormatter("(\"(\\w+).*\", \"$1\")"); + assertEquals("First", formatter.format("First Second Third")); + } }