Skip to content

Commit

Permalink
Fix handling of escapings in bracketed patterns (#11967)
Browse files Browse the repository at this point in the history
* Add references to other tests

* Simplify code

* Add test for extracting the first word

* Relax escaping

* Add link to PR

* Add tests

Refs #11367

* Refine CHANGELOG.md

* Refine CHANGELOG.md
  • Loading branch information
koppor authored Oct 16, 2024
1 parent ec2059a commit 0402d08
Show file tree
Hide file tree
Showing 7 changed files with 110 additions and 19 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ Note that this project **does not** adhere to [Semantic Versioning](https://semv
- ⚠️ Renamed command line parameters `embeddBibfileInPdf` to `embedBibFileInPdf`, `writeMetadatatoPdf` to `writeMetadataToPdf`, and `writeXMPtoPdf` to `writeXmpToPdf`. [#11575](https://github.com/JabRef/jabref/pull/11575)
- The browse button for a Custom theme now opens in the directory of the current used CSS file. [#11597](https://github.com/JabRef/jabref/pull/11597)
- The browse button for a Custom exporter now opens in the directory of the current used exporter file. [#11717](https://github.com/JabRef/jabref/pull/11717)
- ⚠️ We relaxed the escaping requirements for [bracketed patterns](https://docs.jabref.org/setup/citationkeypatterns), which are used for the [citaton key generator](https://docs.jabref.org/advanced/entryeditor#autogenerate-citation-key) and [filename and directory patterns](https://docs.jabref.org/finding-sorting-and-cleaning-entries/filelinks#auto-linking-files). One only needs to write `\"` if a quote sign should be escaped. All other escapings are not necessary (and working) any more. [#11967](https://github.com/JabRef/jabref/pull/11967)
- JabRef now uses TLS 1.2 for all HTTPS connections. [#11852](https://github.com/JabRef/jabref/pull/11852)
- We improved the display of long messages in the integrity check dialog. [#11619](https://github.com/JabRef/jabref/pull/11619)
- We improved the undo/redo buttons in the main toolbar and main menu to be disabled when there is nothing to undo/redo. [#8807](https://github.com/JabRef/jabref/issues/8807)
Expand Down Expand Up @@ -90,6 +91,7 @@ Note that this project **does not** adhere to [Semantic Versioning](https://semv
- We fixed an issue where unescaped braces in the arXiv fetcher were not treated. [#11704](https://github.com/JabRef/jabref/issues/11704)
- We fixed an issue where HTML instead of the fulltext pdf was downloaded when importing arXiv entries. [#4913](https://github.com/JabRef/jabref/issues/4913)
- We fixed an issue where the keywords and crossref fields were not properly focused. [#11177](https://github.com/JabRef/jabref/issues/11177)
- We fixed handling of `\"` in [bracketed patterns](https://docs.jabref.org/setup/citationkeypatterns) containing a RegEx. [#11967](https://github.com/JabRef/jabref/pull/11967)
- We fixed an issue where the Undo/Redo buttons were active even when all libraries are closed. [#11837](https://github.com/JabRef/jabref/issues/11837)
- We fixed an issue where recently opened files were not displayed in the main menu properly. [#9042](https://github.com/JabRef/jabref/issues/9042)
- We fixed an issue where the DOI lookup would show an error when a DOI was found for an entry. [#11850](https://github.com/JabRef/jabref/issues/11850)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -209,11 +209,10 @@ public static String expandBrackets(String pattern, Character keywordDelimiter,
*/
public static Function<String, String> expandBracketContent(Character keywordDelimiter, BibEntry entry, BibDatabase database) {
return (String bracket) -> {
String expandedPattern;
List<String> fieldParts = parseFieldAndModifiers(bracket);
// check whether there is a modifier on the end such as
// ":lower":
expandedPattern = getFieldValue(entry, fieldParts.getFirst(), keywordDelimiter, database);
String expandedPattern = getFieldValue(entry, fieldParts.getFirst(), keywordDelimiter, database);
if (fieldParts.size() > 1) {
// apply modifiers:
expandedPattern = applyModifiers(expandedPattern, fieldParts, 1, expandBracketContent(keywordDelimiter, entry, database));
Expand All @@ -233,6 +232,7 @@ public static Function<String, String> expandBracketContent(Character keywordDel
public static String expandBrackets(String pattern, Function<String, String> bracketContentHandler) {
Objects.requireNonNull(pattern);
StringBuilder expandedPattern = new StringBuilder();
pattern = pattern.replace("\\\"", "\u0A17");
StringTokenizer parsedPattern = new StringTokenizer(pattern, "\\[]\"", true);

while (parsedPattern.hasMoreTokens()) {
Expand All @@ -254,7 +254,7 @@ public static String expandBrackets(String pattern, Function<String, String> bra
}
}

return expandedPattern.toString();
return expandedPattern.toString().replace("\u0A17", "\\\"");
}

/**
Expand Down Expand Up @@ -1140,11 +1140,19 @@ protected static List<String> parseFieldAndModifiers(String arg) {
} else if (currentChar == '\\') {
if (escaped) {
escaped = false;
current.append(currentChar);
// Only : needs to be escaped
// " -> regex("...", "...") - escaping should be passed through to the regex parser
// : -> :abc:def
current.append('\\');
current.append('\\');
} else {
escaped = true;
}
} else if (escaped) {
if (currentChar != ':') {
// Only : needs to be escaped
current.append('\\');
}
current.append(currentChar);
escaped = false;
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,26 +15,34 @@

public class RegexFormatter extends Formatter {
public static final String KEY = "regex";

private static final Logger LOGGER = LoggerFactory.getLogger(RegexFormatter.class);

private static final Pattern ESCAPED_OPENING_CURLY_BRACE = Pattern.compile("\\\\\\{");
private static final Pattern ESCAPED_CLOSING_CURLY_BRACE = Pattern.compile("\\\\\\}");

/**
* Matches text enclosed in curly brackets. The capturing group is used to prevent part of the input from being
* replaced.
*/
private static final Pattern ENCLOSED_IN_CURLY_BRACES = Pattern.compile("\\{.*?}");

private static final String REGEX_CAPTURING_GROUP = "regex";
private static final String REPLACEMENT_CAPTURING_GROUP = "replacement";

/**
* Matches a valid argument to the constructor. Two capturing groups are used to parse the {@link
* RegexFormatter#regex} and {@link RegexFormatter#replacement} used in {@link RegexFormatter#format(String)}
*/
private static final Pattern CONSTRUCTOR_ARGUMENT = Pattern.compile(
"^\\(\"(?<" + REGEX_CAPTURING_GROUP + ">.*?)\" *?, *?\"(?<" + REPLACEMENT_CAPTURING_GROUP + ">.*)\"\\)$");

// Magic arbitrary unicode char, which will never appear in bibtex files
private static final String PLACEHOLDER_FOR_PROTECTED_GROUP = Character.toString('\u0A14');
private static final String PLACEHOLDER_FOR_OPENING_CURLY_BRACE = Character.toString('\u0A15');
private static final String PLACEHOLDER_FOR_CLOSING_CURLY_BRACE = Character.toString('\u0A16');
private static final String PLACEHOLDER_FOR_QUOTE_SIGN = Character.toString('\u0A17');

private final String regex;
private final String replacement;

Expand All @@ -46,11 +54,11 @@ public class RegexFormatter extends Formatter {
*/
public RegexFormatter(String input) {
Objects.requireNonNull(input);
input = input.trim();
input = input.trim().replace("\\\"", PLACEHOLDER_FOR_QUOTE_SIGN);
Matcher constructorArgument = CONSTRUCTOR_ARGUMENT.matcher(input);
if (constructorArgument.matches()) {
regex = constructorArgument.group(REGEX_CAPTURING_GROUP);
replacement = constructorArgument.group(REPLACEMENT_CAPTURING_GROUP);
regex = constructorArgument.group(REGEX_CAPTURING_GROUP).replace(PLACEHOLDER_FOR_QUOTE_SIGN, "\"");
replacement = constructorArgument.group(REPLACEMENT_CAPTURING_GROUP).replace(PLACEHOLDER_FOR_QUOTE_SIGN, "\"");
} else {
regex = null;
replacement = null;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@

/**
* Tests based on a BibEntry are contained in {@link CitationKeyGeneratorTest}
*
* "Complete" entries are tested at {@link org.jabref.logic.citationkeypattern.MakeLabelWithDatabaseTest}
*/
@Execution(ExecutionMode.CONCURRENT)
class BracketedPatternTest {
Expand Down Expand Up @@ -606,62 +608,86 @@ void expandBracketsWithAuthorStartingWithBrackets() {
@Test
void expandBracketsWithModifierContainingRegexCharacterClass() {
BibEntry bibEntry = new BibEntry().withField(StandardField.TITLE, "Wickedness:Managing");

assertEquals("Wickedness.Managing", BracketedPattern.expandBrackets("[title:regex(\"[:]+\",\".\")]", null, bibEntry, null));
}

@Test
void regExForFirstWord() {
BibEntry bibEntry = new BibEntry().withField(StandardField.TITLE, "First Second Third");
assertEquals("First", BracketedPattern.expandBrackets("[TITLE:regex(\"(\\w+).*\",\"$1\")]", null, bibEntry, null));
}

@Test
void regExWithComma() {
BibEntry bibEntry = new BibEntry().withField(StandardField.TITLE, "First,Second,Third");
assertEquals("First+Second+Third", BracketedPattern.expandBrackets("[TITLE:regex(\",\",\"+\")]", null, bibEntry, null));
}

@Test
void regExWithEscapedQuote() {
BibEntry bibEntry = new BibEntry().withField(StandardField.TITLE, "First\"Second\"Third");
assertEquals("First+Second+Third", BracketedPattern.expandBrackets("[TITLE:regex(\"\\\"\",\"+\")]", null, bibEntry, null));
}

@Test
void regExWithEtAlTwoAuthors() {
// Example from https://docs.jabref.org/setup/citationkeypatterns#modifiers
BibEntry bibEntry = new BibEntry().withField(StandardField.AUTHOR, "First Last and Second Last");
assertEquals("LastAndLast", BracketedPattern.expandBrackets("[auth.etal:regex(\"\\.etal\",\"EtAl\"):regex(\"\\.\",\"And\")]", null, bibEntry, null));
}

@Test
void regExWithEtAlThreeAuthors() {
// Example from https://docs.jabref.org/setup/citationkeypatterns#modifiers
BibEntry bibEntry = new BibEntry().withField(StandardField.AUTHOR, "First Last and Second Last and Third Last");
assertEquals("LastEtAl", BracketedPattern.expandBrackets("[auth.etal:regex(\"\\.etal\",\"EtAl\"):regex(\"\\.\",\"And\")]", null, bibEntry, null));
}

@Test
void expandBracketsEmptyStringFromEmptyBrackets() {
BibEntry bibEntry = new BibEntry();

assertEquals("", BracketedPattern.expandBrackets("[]", null, bibEntry, null));
}

@Test
void expandBracketsInstitutionAbbreviationFromProvidedAbbreviation() {
BibEntry bibEntry = new BibEntry()
.withField(StandardField.AUTHOR, "{European Union Aviation Safety Agency ({EUASABRACKET})}");

assertEquals("EUASABRACKET", BracketedPattern.expandBrackets("[auth]", null, bibEntry, null));
}

@Test
void expandBracketsInstitutionAbbreviationForAuthorContainingUnion() {
BibEntry bibEntry = new BibEntry()
.withField(StandardField.AUTHOR, "{European Union Aviation Safety Agency}");

assertEquals("EUASA", BracketedPattern.expandBrackets("[auth]", null, bibEntry, null));
}

@Test
void expandBracketsLastNameForAuthorStartingWithOnlyLastNameStartingWithLowerCase() {
BibEntry bibEntry = new BibEntry()
.withField(StandardField.AUTHOR, "{eBay}");

assertEquals("eBay", BracketedPattern.expandBrackets("[auth]", null, bibEntry, null));
}

@Test
void expandBracketsLastNameWithChineseCharacters() {
BibEntry bibEntry = new BibEntry()
.withField(StandardField.AUTHOR, "杨秀群");

assertEquals("杨秀群", BracketedPattern.expandBrackets("[auth]", null, bibEntry, null));
}

@Test
void expandBracketsUnmodifiedStringFromLongFirstPageNumber() {
BibEntry bibEntry = new BibEntry()
.withField(StandardField.PAGES, "2325967120921344");

assertEquals("2325967120921344", BracketedPattern.expandBrackets("[firstpage]", null, bibEntry, null));
}

@Test
void expandBracketsUnmodifiedStringFromLongLastPageNumber() {
BibEntry bibEntry = new BibEntry()
.withField(StandardField.PAGES, "2325967120921344");

assertEquals("2325967120921344", BracketedPattern.expandBrackets("[lastpage]", null, bibEntry, null));
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@
import static org.jabref.logic.citationkeypattern.CitationKeyGenerator.DEFAULT_UNWANTED_CHARACTERS;
import static org.junit.jupiter.api.Assertions.assertEquals;

/**
* Bracketed patterns themselves are tested at {@link org.jabref.logic.citationkeypattern.BracketedPatternTest}.
*/
@Execution(ExecutionMode.CONCURRENT)
class MakeLabelWithDatabaseTest {

Expand Down Expand Up @@ -461,11 +464,22 @@ void generateKeyAuthIniMany() {
}

@Test
void generateKeyTitleRegexe() {
bibtexKeyPattern.setDefaultValue("[title:regex(\" \",\"-\")]");
void generateKeyTitleRegex() {
// Note that TITLE is written in upper case to have the verbatim value of the title field
// See https://github.com/JabRef/user-documentation/blob/main/en/setup/citationkeypatterns.md#bibentry-fields for information on that.
// We cannot use "-", because this is in the set of unwanted characters and removed after the RegEx is applied
bibtexKeyPattern.setDefaultValue("[TITLE:regex(\" \",\"X\")]"); // regex(" ", "-")
entry.setField(StandardField.TITLE, "Please replace the spaces");
new CitationKeyGenerator(bibtexKeyPattern, database, preferences).generateAndSetKey(entry);
assertEquals(Optional.of("PleaseReplacetheSpaces"), entry.getCitationKey());
assertEquals(Optional.of("PleaseXreplaceXtheXspaces"), entry.getCitationKey());
}

@Test
void generateKeyTitleRegexFirstWord() {
bibtexKeyPattern.setDefaultValue("[TITLE:regex(\"(\\w+).*\",\"$1\")]");
entry.setField(StandardField.TITLE, "First Second Third");
new CitationKeyGenerator(bibtexKeyPattern, database, preferences).generateAndSetKey(entry);
assertEquals(Optional.of("First"), entry.getCitationKey());
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import org.jabref.model.database.BibDatabase;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.field.StandardField;
import org.jabref.model.entry.types.StandardEntryType;

import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
Expand All @@ -17,12 +18,13 @@
class MakeLabelWithoutDatabaseTest {

private CitationKeyGenerator citationKeyGenerator;
private CitationKeyPatternPreferences patternPreferences;

@BeforeEach
void setUp() {
GlobalCitationKeyPatterns keyPattern = new GlobalCitationKeyPatterns(CitationKeyPattern.NULL_CITATION_KEY_PATTERN);
keyPattern.setDefaultValue("[auth]");
CitationKeyPatternPreferences patternPreferences = new CitationKeyPatternPreferences(
patternPreferences = new CitationKeyPatternPreferences(
false,
false,
false,
Expand Down Expand Up @@ -58,4 +60,29 @@ void makeEditorLabelForFileSearch() {
String label = citationKeyGenerator.generateKey(entry);
assertEquals("Doe", label);
}

@Test
void bamford_1972_Comprehensive_Reaction_V_7_EN() {
// Example taken from https://github.com/JabRef/jabref/issues/11367#issuecomment-2162250948
BibEntry entry = new BibEntry(StandardEntryType.Book)
.withCitationKey("Bamford_1972_Comprehensive_Reaction_V_7_EN")
.withField(StandardField.LANGUAGE, "english")
.withField(StandardField.MAINTITLE, "Comprehensive Chemical Kinetics")
.withField(StandardField.TITLE, "Reaction of Metallic Salts and Complexes, and Organometallic Compounds")
.withField(StandardField.VOLUME, "7")
.withField(StandardField.YEAR, "1972")
.withField(StandardField.EDITOR, "Bamford, C. H. and Tipper, C. F. H.");
citationKeyGenerator = new CitationKeyGenerator(GlobalCitationKeyPatterns.fromPattern("[edtr]_[YEAR]_[MAINTITLE:regex(\"(\\w+).*\", \"$1\")]_[TITLE:regex(\"(\\w+).*\", \"$1\")]_V_[VOLUME]_[LANGUAGE:regex(\"english\", \"EN\"):regex(\"french\", \"FR\")]"), new BibDatabase(), patternPreferences);
String label = citationKeyGenerator.generateKey(entry);
assertEquals("Bamford_1972_Comprehensive_Reaction_V_7_EN", label);
}

@Test
void frenchRegEx() {
BibEntry entry = new BibEntry(StandardEntryType.Book)
.withField(StandardField.LANGUAGE, "french");
citationKeyGenerator = new CitationKeyGenerator(GlobalCitationKeyPatterns.fromPattern("[LANGUAGE:regex(\"english\", \"EN\"):regex(\"french\", \"FR\")]"), new BibDatabase(), patternPreferences);
String label = citationKeyGenerator.generateKey(entry);
assertEquals("FR", label);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -79,4 +79,10 @@ void formatWithSyntaxErrorReturnUnchangedString() {
formatter = new RegexFormatter("(\"(\", \"\")");
assertEquals("AaBbCc", formatter.format("AaBbCc"));
}

@Test
void extractFirstWord() {
formatter = new RegexFormatter("(\"(\\w+).*\", \"$1\")");
assertEquals("First", formatter.format("First Second Third"));
}
}

0 comments on commit 0402d08

Please sign in to comment.