From e955c46850e4738830198a889c3f09870b88ab30 Mon Sep 17 00:00:00 2001 From: Jonatan Asketorp <2598631+k3KAW8Pnf7mkmdSMPHz27@users.noreply.github.com> Date: Tue, 13 Oct 2020 09:01:41 -0400 Subject: [PATCH] Merge parsing of bracketed patterns (#6989) * Add test cases * Fix test case use of unwanted character * Add method injection to `expandBrackets` * Fix logger message * Extract methods and change parsing Extracts code from `generateKey` as methods for readability purposes and switches out the parsing code for the one in `BracketedPattern` * Codestyle improvements * Fix use of illegal character in test case * Add log output for PatternSyntaxException * Fix handling unwanted characters between brackets * Fix Checkstyle * Add test case for incorrect regex replacement * Add JavaDoc * Add test case --- .../citationkeypattern/BracketedPattern.java | 54 +++++-- .../CitationKeyGenerator.java | 142 +++++++++++------- .../BracketedPatternTest.java | 14 ++ .../CitationKeyGeneratorTest.java | 35 ++++- 4 files changed, 170 insertions(+), 75 deletions(-) diff --git a/src/main/java/org/jabref/logic/citationkeypattern/BracketedPattern.java b/src/main/java/org/jabref/logic/citationkeypattern/BracketedPattern.java index b2fa0f5221a..a3c93dfd898 100644 --- a/src/main/java/org/jabref/logic/citationkeypattern/BracketedPattern.java +++ b/src/main/java/org/jabref/logic/citationkeypattern/BracketedPattern.java @@ -172,6 +172,43 @@ public String expand(BibEntry bibentry, Character keywordDelimiter, BibDatabase public static String expandBrackets(String pattern, Character keywordDelimiter, BibEntry entry, BibDatabase database) { Objects.requireNonNull(pattern); Objects.requireNonNull(entry); + return expandBrackets(pattern, expandBracketContent(keywordDelimiter, entry, database)); + } + + /** + * Utility method creating a function taking the string representation of the content of a bracketed expression and + * expanding it. + * + * @param keywordDelimiter The keyword delimiter to use + * @param entry The {@link BibEntry} to use for expansion + * @param database The {@link BibDatabase} for field resolving. May be null. + * @return a function accepting a bracketed expression and returning the result of expanding it + */ + private static Function expandBracketContent(Character keywordDelimiter, BibEntry entry, BibDatabase database) { + return (String bracket) -> { + String expandedPattern; + List fieldParts = parseFieldAndModifiers(bracket); + // check whether there is a modifier on the end such as + // ":lower": + expandedPattern = getFieldValue(entry, fieldParts.get(0), keywordDelimiter, database); + if (fieldParts.size() > 1) { + // apply modifiers: + expandedPattern = applyModifiers(expandedPattern, fieldParts, 1); + } + return expandedPattern; + }; + } + + /** + * Expands a pattern. + * + * @param pattern The pattern to expand + * @param bracketContentHandler A function taking the string representation of the content of a bracketed pattern + * and expanding it + * @return The expanded pattern. Not null. + */ + public static String expandBrackets(String pattern, Function bracketContentHandler) { + Objects.requireNonNull(pattern); StringBuilder expandedPattern = new StringBuilder(); StringTokenizer parsedPattern = new StringTokenizer(pattern, "\\[]\"", true); @@ -181,23 +218,14 @@ public static String expandBrackets(String pattern, Character keywordDelimiter, case "\"" -> appendQuote(expandedPattern, parsedPattern); case "[" -> { String fieldMarker = contentBetweenBrackets(parsedPattern, pattern); - - List fieldParts = parseFieldMarker(fieldMarker); - // check whether there is a modifier on the end such as - // ":lower": - if (fieldParts.size() <= 1) { - expandedPattern.append(getFieldValue(entry, fieldMarker, keywordDelimiter, database)); - } else { - // apply modifiers: - String fieldValue = getFieldValue(entry, fieldParts.get(0), keywordDelimiter, database); - expandedPattern.append(applyModifiers(fieldValue, fieldParts, 1)); - } + expandedPattern.append(bracketContentHandler.apply(fieldMarker)); } case "\\" -> { if (parsedPattern.hasMoreTokens()) { expandedPattern.append(parsedPattern.nextToken()); + } else { + LOGGER.warn("Found a \"\\\" that is not part of an escape sequence"); } - // FIXME: else -> raise exception or log? (S.G.) } default -> expandedPattern.append(token); } @@ -1069,7 +1097,7 @@ public static String lastPage(String pages) { * @param arg The argument string. * @return An array of strings representing the parts of the marker */ - protected static List parseFieldMarker(String arg) { + protected static List parseFieldAndModifiers(String arg) { List parts = new ArrayList<>(); StringBuilder current = new StringBuilder(); boolean escaped = false; diff --git a/src/main/java/org/jabref/logic/citationkeypattern/CitationKeyGenerator.java b/src/main/java/org/jabref/logic/citationkeypattern/CitationKeyGenerator.java index 5d13cfa7484..616ba434e8b 100644 --- a/src/main/java/org/jabref/logic/citationkeypattern/CitationKeyGenerator.java +++ b/src/main/java/org/jabref/logic/citationkeypattern/CitationKeyGenerator.java @@ -1,11 +1,12 @@ package org.jabref.logic.citationkeypattern; -import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.Objects; import java.util.Optional; +import java.util.function.Function; +import java.util.regex.PatternSyntaxException; import org.jabref.model.FieldChange; import org.jabref.model.database.BibDatabase; @@ -71,8 +72,7 @@ static String generateKey(BibEntry entry, String pattern, BibDatabase database) } /** - * Computes an appendix to a citation key that could make it unique. We use - * a-z for numbers 0-25, and then aa-az, ba-bz, etc. + * Computes an appendix to a citation key that could make it unique. We use a-z for numbers 0-25, and then aa-az, ba-bz, etc. * * @param number The appendix number. * @return The String to append. @@ -107,55 +107,31 @@ public static String cleanKey(String key, String unwantedCharacters) { return removeUnwantedCharacters(key, unwantedCharacters).replaceAll("\\s", ""); } + /** + * Generate a citation key for the given {@link BibEntry}. + * + * @param entry a {@link BibEntry} + * @return a citation key based on the user's preferences + */ public String generateKey(BibEntry entry) { - String key; - StringBuilder stringBuilder = new StringBuilder(); - try { - // get the type of entry - EntryType entryType = entry.getType(); - // Get the arrayList corresponding to the type - List typeList = new ArrayList<>(citeKeyPattern.getValue(entryType)); - if (!typeList.isEmpty()) { - typeList.remove(0); - } - boolean field = false; - for (String typeListEntry : typeList) { - if ("[".equals(typeListEntry)) { - field = true; - } else if ("]".equals(typeListEntry)) { - field = false; - } else if (field) { - // check whether there is a modifier on the end such as - // ":lower" - List parts = parseFieldMarker(typeListEntry); - Character delimiter = citationKeyPatternPreferences.getKeywordDelimiter(); - String pattern = "[" + parts.get(0) + "]"; - String label = removeUnwantedCharacters(expandBrackets(pattern, delimiter, entry, database), unwantedCharacters); - // apply modifier if present - if (parts.size() > 1) { - label = removeUnwantedCharacters(applyModifiers(label, parts, 1), unwantedCharacters); - } - // Remove all illegal characters from the label. - label = cleanKey(label, unwantedCharacters); - stringBuilder.append(label); - } else { - stringBuilder.append(typeListEntry); - } - } - } catch (Exception e) { - LOGGER.warn("Cannot make label", e); - } + Objects.requireNonNull(entry); + String currentKey = entry.getCitationKey().orElse(null); - key = stringBuilder.toString(); + String newKey = createCitationKeyFromPattern(entry); + newKey = replaceWithRegex(newKey); + newKey = appendLettersToKey(newKey, currentKey); - // Remove Regular Expressions while generating Keys - String regex = citationKeyPatternPreferences.getKeyPatternRegex(); - if ((regex != null) && !regex.trim().isEmpty()) { - String replacement = citationKeyPatternPreferences.getKeyPatternReplacement(); - key = key.replaceAll(regex, replacement); - } + return cleanKey(newKey, unwantedCharacters); + } - String oldKey = entry.getCitationKey().orElse(null); + /** + * A letter will be appended to the key based on the user's preferences, either always or to prevent duplicated keys. + * + * @param key the new key + * @param oldKey the old key + * @return a key, if needed, with an appended letter + */ + private String appendLettersToKey(String key, String oldKey) { long occurrences = database.getNumberOfCitationKeyOccurrences(key); if (Objects.equals(oldKey, key)) { @@ -165,14 +141,11 @@ public String generateKey(BibEntry entry) { boolean alwaysAddLetter = citationKeyPatternPreferences.getKeySuffix() == CitationKeyPatternPreferences.KeySuffix.ALWAYS; - boolean firstLetterA = citationKeyPatternPreferences.getKeySuffix() - == CitationKeyPatternPreferences.KeySuffix.SECOND_WITH_A; - - String newKey; - if (!alwaysAddLetter && (occurrences == 0)) { - newKey = key; - } else { + if (alwaysAddLetter || occurrences != 0) { // The key is already in use, so we must modify it. + boolean firstLetterA = citationKeyPatternPreferences.getKeySuffix() + == CitationKeyPatternPreferences.KeySuffix.SECOND_WITH_A; + int number = !alwaysAddLetter && !firstLetterA ? 1 : 0; String moddedKey; @@ -187,9 +160,64 @@ public String generateKey(BibEntry entry) { } } while (occurrences > 0); - newKey = moddedKey; + key = moddedKey; } - return newKey; + return key; + } + + /** + * Using preferences, replace matches to the provided regex with a string. + * + * @param key the citation key + * @return the citation key where matches to the regex are replaced + */ + private String replaceWithRegex(String key) { + // Remove Regular Expressions while generating Keys + String regex = citationKeyPatternPreferences.getKeyPatternRegex(); + if ((regex != null) && !regex.trim().isEmpty()) { + String replacement = citationKeyPatternPreferences.getKeyPatternReplacement(); + try { + key = key.replaceAll(regex, replacement); + } catch (PatternSyntaxException e) { + LOGGER.warn("There is a syntax error in the regular expression \"{}\" used to generate a citation key", regex, e); + } + } + return key; + } + + private String createCitationKeyFromPattern(BibEntry entry) { + // get the type of entry + EntryType entryType = entry.getType(); + // Get the arrayList corresponding to the type + List citationKeyPattern = citeKeyPattern.getValue(entryType); + if (citationKeyPattern.isEmpty()) { + return ""; + } + return expandBrackets(citationKeyPattern.get(0), expandBracketContent(entry)); + } + + /** + * A helper method to create a {@link Function} that takes a single bracketed expression, expands it, and cleans the key. + * + * @param entry the {@link BibEntry} that a citation key is generated for + * @return a cleaned citation key for the given {@link BibEntry} + */ + private Function expandBracketContent(BibEntry entry) { + Character keywordDelimiter = citationKeyPatternPreferences.getKeywordDelimiter(); + + return (String bracket) -> { + String expandedPattern; + List fieldParts = parseFieldAndModifiers(bracket); + + expandedPattern = removeUnwantedCharacters(getFieldValue(entry, fieldParts.get(0), keywordDelimiter, database), unwantedCharacters); + // check whether there is a modifier on the end such as + // ":lower": + if (fieldParts.size() > 1) { + // apply modifiers: + expandedPattern = applyModifiers(expandedPattern, fieldParts, 1); + } + return cleanKey(expandedPattern, unwantedCharacters); + }; } /** diff --git a/src/test/java/org/jabref/logic/citationkeypattern/BracketedPatternTest.java b/src/test/java/org/jabref/logic/citationkeypattern/BracketedPatternTest.java index f5931a8f1b0..c7899d32c0b 100644 --- a/src/test/java/org/jabref/logic/citationkeypattern/BracketedPatternTest.java +++ b/src/test/java/org/jabref/logic/citationkeypattern/BracketedPatternTest.java @@ -278,4 +278,18 @@ void expandBracketsWithAuthorStartingWithBrackets() { .withField(StandardField.AUTHOR, "Patrik {\\v{S}}pan{\\v{e}}l and Kseniya Dryahina and David Smith"); assertEquals("ŠpanělEtAl", BracketedPattern.expandBrackets("[authEtAl:latex_to_unicode]", null, bibEntry, null)); } + + @Test + void expandBracketsWithModifierContainingRegexCharacterCkass() { + BibEntry bibEntry = new BibEntry().withField(StandardField.TITLE, "Wickedness:Managing"); + + assertEquals("Wickedness.Managing", BracketedPattern.expandBrackets("[title:regex(\"[:]+\",\".\")]", null, bibEntry, null)); + } + + @Test + void expandBracketsEmptyStringFromEmptyBrackets() { + BibEntry bibEntry = new BibEntry(); + + assertEquals("", BracketedPattern.expandBrackets("[]", null, bibEntry, null)); + } } diff --git a/src/test/java/org/jabref/logic/citationkeypattern/CitationKeyGeneratorTest.java b/src/test/java/org/jabref/logic/citationkeypattern/CitationKeyGeneratorTest.java index e31a0776772..23eb958cb2c 100644 --- a/src/test/java/org/jabref/logic/citationkeypattern/CitationKeyGeneratorTest.java +++ b/src/test/java/org/jabref/logic/citationkeypattern/CitationKeyGeneratorTest.java @@ -1,6 +1,5 @@ package org.jabref.logic.citationkeypattern; -import java.util.Collections; import java.util.Optional; import org.jabref.logic.importer.ImportFormatPreferences; @@ -80,8 +79,7 @@ static String generateKey(BibEntry entry, String pattern) { } static String generateKey(BibEntry entry, String pattern, BibDatabase database) { - GlobalCitationKeyPattern keyPattern = new GlobalCitationKeyPattern(Collections.emptyList()); - keyPattern.setDefaultValue(pattern); + GlobalCitationKeyPattern keyPattern = GlobalCitationKeyPattern.fromPattern(pattern); CitationKeyPatternPreferences patternPreferences = new CitationKeyPatternPreferences( false, false, @@ -1070,7 +1068,7 @@ void generateKeyWithMinusInCitationStyleOutsideAField() { .withField(StandardField.AUTHOR, AUTHOR_STRING_FIRSTNAME_FULL_LASTNAME_FULL_COUNT_1) .withField(StandardField.YEAR, "2019"); - assertEquals("Newton-2019", generateKey(entry, "[auth]-[year]")); + assertEquals("Newton2019", generateKey(entry, "[auth]-[year]")); } @Test @@ -1078,7 +1076,7 @@ void generateKeyWithWithFirstNCharacters() { BibEntry entry = new BibEntry().withField(StandardField.AUTHOR, "Newton, Isaac") .withField(StandardField.YEAR, "2019"); - assertEquals("newt-2019", generateKey(entry, "[auth4:lower]-[year]")); + assertEquals("newt2019", generateKey(entry, "[auth4:lower]-[year]")); } @Test @@ -1101,4 +1099,31 @@ void generateKeyWithNonNormalizedUnicode() { assertEquals("Modele", generateKey(bibEntry, "[veryshorttitle]")); } + + @Test + void generateKeyWithModifierContainingRegexCharacterClass() { + BibEntry bibEntry = new BibEntry().withField(StandardField.TITLE, "Wickedness Managing"); + + assertEquals("WM", generateKey(bibEntry, "[title:regex(\"[a-z]+\",\"\")]")); + } + + @Test + void generateKeyDoesNotModifyTheKeyWithIncorrectRegexReplacement() { + String pattern = "[title]"; + GlobalCitationKeyPattern keyPattern = GlobalCitationKeyPattern.fromPattern(pattern); + CitationKeyPatternPreferences patternPreferences = new CitationKeyPatternPreferences( + false, + false, + false, + CitationKeyPatternPreferences.KeySuffix.SECOND_WITH_A, + "[", // Invalid regexp + "", + DEFAULT_UNWANTED_CHARACTERS, + keyPattern, + ','); + + BibEntry bibEntry = new BibEntry().withField(StandardField.TITLE, "Wickedness Managing"); + assertEquals("WickednessManaging", + new CitationKeyGenerator(keyPattern, new BibDatabase(), patternPreferences).generateKey(bibEntry)); + } }