Skip to content

Commit

Permalink
Merge parsing of bracketed patterns (#6989)
Browse files Browse the repository at this point in the history
* Add test cases

* Fix test case use of unwanted character

* Add method injection to `expandBrackets`

* Fix logger message

* Extract methods and change parsing

Extracts code from `generateKey` as methods for readability purposes and
 switches out the parsing code for the one in `BracketedPattern`

* Codestyle improvements

* Fix use of illegal character in test case

* Add log output for PatternSyntaxException

* Fix handling unwanted characters between brackets

* Fix Checkstyle

* Add test case for incorrect regex replacement

* Add JavaDoc

* Add test case
  • Loading branch information
k3KAW8Pnf7mkmdSMPHz27 authored Oct 13, 2020
1 parent fdaf25a commit e955c46
Show file tree
Hide file tree
Showing 4 changed files with 170 additions and 75 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,43 @@ public String expand(BibEntry bibentry, Character keywordDelimiter, BibDatabase
public static String expandBrackets(String pattern, Character keywordDelimiter, BibEntry entry, BibDatabase database) {
Objects.requireNonNull(pattern);
Objects.requireNonNull(entry);
return expandBrackets(pattern, expandBracketContent(keywordDelimiter, entry, database));
}

/**
* Utility method creating a function taking the string representation of the content of a bracketed expression and
* expanding it.
*
* @param keywordDelimiter The keyword delimiter to use
* @param entry The {@link BibEntry} to use for expansion
* @param database The {@link BibDatabase} for field resolving. May be null.
* @return a function accepting a bracketed expression and returning the result of expanding it
*/
private static Function<String, String> expandBracketContent(Character keywordDelimiter, BibEntry entry, BibDatabase database) {
return (String bracket) -> {
String expandedPattern;
List<String> fieldParts = parseFieldAndModifiers(bracket);
// check whether there is a modifier on the end such as
// ":lower":
expandedPattern = getFieldValue(entry, fieldParts.get(0), keywordDelimiter, database);
if (fieldParts.size() > 1) {
// apply modifiers:
expandedPattern = applyModifiers(expandedPattern, fieldParts, 1);
}
return expandedPattern;
};
}

/**
* Expands a pattern.
*
* @param pattern The pattern to expand
* @param bracketContentHandler A function taking the string representation of the content of a bracketed pattern
* and expanding it
* @return The expanded pattern. Not null.
*/
public static String expandBrackets(String pattern, Function<String, String> bracketContentHandler) {
Objects.requireNonNull(pattern);
StringBuilder expandedPattern = new StringBuilder();
StringTokenizer parsedPattern = new StringTokenizer(pattern, "\\[]\"", true);

Expand All @@ -181,23 +218,14 @@ public static String expandBrackets(String pattern, Character keywordDelimiter,
case "\"" -> appendQuote(expandedPattern, parsedPattern);
case "[" -> {
String fieldMarker = contentBetweenBrackets(parsedPattern, pattern);

List<String> fieldParts = parseFieldMarker(fieldMarker);
// check whether there is a modifier on the end such as
// ":lower":
if (fieldParts.size() <= 1) {
expandedPattern.append(getFieldValue(entry, fieldMarker, keywordDelimiter, database));
} else {
// apply modifiers:
String fieldValue = getFieldValue(entry, fieldParts.get(0), keywordDelimiter, database);
expandedPattern.append(applyModifiers(fieldValue, fieldParts, 1));
}
expandedPattern.append(bracketContentHandler.apply(fieldMarker));
}
case "\\" -> {
if (parsedPattern.hasMoreTokens()) {
expandedPattern.append(parsedPattern.nextToken());
} else {
LOGGER.warn("Found a \"\\\" that is not part of an escape sequence");
}
// FIXME: else -> raise exception or log? (S.G.)
}
default -> expandedPattern.append(token);
}
Expand Down Expand Up @@ -1069,7 +1097,7 @@ public static String lastPage(String pages) {
* @param arg The argument string.
* @return An array of strings representing the parts of the marker
*/
protected static List<String> parseFieldMarker(String arg) {
protected static List<String> parseFieldAndModifiers(String arg) {
List<String> parts = new ArrayList<>();
StringBuilder current = new StringBuilder();
boolean escaped = false;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
package org.jabref.logic.citationkeypattern;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.function.Function;
import java.util.regex.PatternSyntaxException;

import org.jabref.model.FieldChange;
import org.jabref.model.database.BibDatabase;
Expand Down Expand Up @@ -71,8 +72,7 @@ static String generateKey(BibEntry entry, String pattern, BibDatabase database)
}

/**
* Computes an appendix to a citation key that could make it unique. We use
* a-z for numbers 0-25, and then aa-az, ba-bz, etc.
* Computes an appendix to a citation key that could make it unique. We use a-z for numbers 0-25, and then aa-az, ba-bz, etc.
*
* @param number The appendix number.
* @return The String to append.
Expand Down Expand Up @@ -107,55 +107,31 @@ public static String cleanKey(String key, String unwantedCharacters) {
return removeUnwantedCharacters(key, unwantedCharacters).replaceAll("\\s", "");
}

/**
* Generate a citation key for the given {@link BibEntry}.
*
* @param entry a {@link BibEntry}
* @return a citation key based on the user's preferences
*/
public String generateKey(BibEntry entry) {
String key;
StringBuilder stringBuilder = new StringBuilder();
try {
// get the type of entry
EntryType entryType = entry.getType();
// Get the arrayList corresponding to the type
List<String> typeList = new ArrayList<>(citeKeyPattern.getValue(entryType));
if (!typeList.isEmpty()) {
typeList.remove(0);
}
boolean field = false;
for (String typeListEntry : typeList) {
if ("[".equals(typeListEntry)) {
field = true;
} else if ("]".equals(typeListEntry)) {
field = false;
} else if (field) {
// check whether there is a modifier on the end such as
// ":lower"
List<String> parts = parseFieldMarker(typeListEntry);
Character delimiter = citationKeyPatternPreferences.getKeywordDelimiter();
String pattern = "[" + parts.get(0) + "]";
String label = removeUnwantedCharacters(expandBrackets(pattern, delimiter, entry, database), unwantedCharacters);
// apply modifier if present
if (parts.size() > 1) {
label = removeUnwantedCharacters(applyModifiers(label, parts, 1), unwantedCharacters);
}
// Remove all illegal characters from the label.
label = cleanKey(label, unwantedCharacters);
stringBuilder.append(label);
} else {
stringBuilder.append(typeListEntry);
}
}
} catch (Exception e) {
LOGGER.warn("Cannot make label", e);
}
Objects.requireNonNull(entry);
String currentKey = entry.getCitationKey().orElse(null);

key = stringBuilder.toString();
String newKey = createCitationKeyFromPattern(entry);
newKey = replaceWithRegex(newKey);
newKey = appendLettersToKey(newKey, currentKey);

// Remove Regular Expressions while generating Keys
String regex = citationKeyPatternPreferences.getKeyPatternRegex();
if ((regex != null) && !regex.trim().isEmpty()) {
String replacement = citationKeyPatternPreferences.getKeyPatternReplacement();
key = key.replaceAll(regex, replacement);
}
return cleanKey(newKey, unwantedCharacters);
}

String oldKey = entry.getCitationKey().orElse(null);
/**
* A letter will be appended to the key based on the user's preferences, either always or to prevent duplicated keys.
*
* @param key the new key
* @param oldKey the old key
* @return a key, if needed, with an appended letter
*/
private String appendLettersToKey(String key, String oldKey) {
long occurrences = database.getNumberOfCitationKeyOccurrences(key);

if (Objects.equals(oldKey, key)) {
Expand All @@ -165,14 +141,11 @@ public String generateKey(BibEntry entry) {
boolean alwaysAddLetter = citationKeyPatternPreferences.getKeySuffix()
== CitationKeyPatternPreferences.KeySuffix.ALWAYS;

boolean firstLetterA = citationKeyPatternPreferences.getKeySuffix()
== CitationKeyPatternPreferences.KeySuffix.SECOND_WITH_A;

String newKey;
if (!alwaysAddLetter && (occurrences == 0)) {
newKey = key;
} else {
if (alwaysAddLetter || occurrences != 0) {
// The key is already in use, so we must modify it.
boolean firstLetterA = citationKeyPatternPreferences.getKeySuffix()
== CitationKeyPatternPreferences.KeySuffix.SECOND_WITH_A;

int number = !alwaysAddLetter && !firstLetterA ? 1 : 0;
String moddedKey;

Expand All @@ -187,9 +160,64 @@ public String generateKey(BibEntry entry) {
}
} while (occurrences > 0);

newKey = moddedKey;
key = moddedKey;
}
return newKey;
return key;
}

/**
* Using preferences, replace matches to the provided regex with a string.
*
* @param key the citation key
* @return the citation key where matches to the regex are replaced
*/
private String replaceWithRegex(String key) {
// Remove Regular Expressions while generating Keys
String regex = citationKeyPatternPreferences.getKeyPatternRegex();
if ((regex != null) && !regex.trim().isEmpty()) {
String replacement = citationKeyPatternPreferences.getKeyPatternReplacement();
try {
key = key.replaceAll(regex, replacement);
} catch (PatternSyntaxException e) {
LOGGER.warn("There is a syntax error in the regular expression \"{}\" used to generate a citation key", regex, e);
}
}
return key;
}

private String createCitationKeyFromPattern(BibEntry entry) {
// get the type of entry
EntryType entryType = entry.getType();
// Get the arrayList corresponding to the type
List<String> citationKeyPattern = citeKeyPattern.getValue(entryType);
if (citationKeyPattern.isEmpty()) {
return "";
}
return expandBrackets(citationKeyPattern.get(0), expandBracketContent(entry));
}

/**
* A helper method to create a {@link Function} that takes a single bracketed expression, expands it, and cleans the key.
*
* @param entry the {@link BibEntry} that a citation key is generated for
* @return a cleaned citation key for the given {@link BibEntry}
*/
private Function<String, String> expandBracketContent(BibEntry entry) {
Character keywordDelimiter = citationKeyPatternPreferences.getKeywordDelimiter();

return (String bracket) -> {
String expandedPattern;
List<String> fieldParts = parseFieldAndModifiers(bracket);

expandedPattern = removeUnwantedCharacters(getFieldValue(entry, fieldParts.get(0), keywordDelimiter, database), unwantedCharacters);
// check whether there is a modifier on the end such as
// ":lower":
if (fieldParts.size() > 1) {
// apply modifiers:
expandedPattern = applyModifiers(expandedPattern, fieldParts, 1);
}
return cleanKey(expandedPattern, unwantedCharacters);
};
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -278,4 +278,18 @@ void expandBracketsWithAuthorStartingWithBrackets() {
.withField(StandardField.AUTHOR, "Patrik {\\v{S}}pan{\\v{e}}l and Kseniya Dryahina and David Smith");
assertEquals("ŠpanělEtAl", BracketedPattern.expandBrackets("[authEtAl:latex_to_unicode]", null, bibEntry, null));
}

@Test
void expandBracketsWithModifierContainingRegexCharacterCkass() {
BibEntry bibEntry = new BibEntry().withField(StandardField.TITLE, "Wickedness:Managing");

assertEquals("Wickedness.Managing", BracketedPattern.expandBrackets("[title:regex(\"[:]+\",\".\")]", null, bibEntry, null));
}

@Test
void expandBracketsEmptyStringFromEmptyBrackets() {
BibEntry bibEntry = new BibEntry();

assertEquals("", BracketedPattern.expandBrackets("[]", null, bibEntry, null));
}
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
package org.jabref.logic.citationkeypattern;

import java.util.Collections;
import java.util.Optional;

import org.jabref.logic.importer.ImportFormatPreferences;
Expand Down Expand Up @@ -80,8 +79,7 @@ static String generateKey(BibEntry entry, String pattern) {
}

static String generateKey(BibEntry entry, String pattern, BibDatabase database) {
GlobalCitationKeyPattern keyPattern = new GlobalCitationKeyPattern(Collections.emptyList());
keyPattern.setDefaultValue(pattern);
GlobalCitationKeyPattern keyPattern = GlobalCitationKeyPattern.fromPattern(pattern);
CitationKeyPatternPreferences patternPreferences = new CitationKeyPatternPreferences(
false,
false,
Expand Down Expand Up @@ -1070,15 +1068,15 @@ void generateKeyWithMinusInCitationStyleOutsideAField() {
.withField(StandardField.AUTHOR, AUTHOR_STRING_FIRSTNAME_FULL_LASTNAME_FULL_COUNT_1)
.withField(StandardField.YEAR, "2019");

assertEquals("Newton-2019", generateKey(entry, "[auth]-[year]"));
assertEquals("Newton2019", generateKey(entry, "[auth]-[year]"));
}

@Test
void generateKeyWithWithFirstNCharacters() {
BibEntry entry = new BibEntry().withField(StandardField.AUTHOR, "Newton, Isaac")
.withField(StandardField.YEAR, "2019");

assertEquals("newt-2019", generateKey(entry, "[auth4:lower]-[year]"));
assertEquals("newt2019", generateKey(entry, "[auth4:lower]-[year]"));
}

@Test
Expand All @@ -1101,4 +1099,31 @@ void generateKeyWithNonNormalizedUnicode() {

assertEquals("Modele", generateKey(bibEntry, "[veryshorttitle]"));
}

@Test
void generateKeyWithModifierContainingRegexCharacterClass() {
BibEntry bibEntry = new BibEntry().withField(StandardField.TITLE, "Wickedness Managing");

assertEquals("WM", generateKey(bibEntry, "[title:regex(\"[a-z]+\",\"\")]"));
}

@Test
void generateKeyDoesNotModifyTheKeyWithIncorrectRegexReplacement() {
String pattern = "[title]";
GlobalCitationKeyPattern keyPattern = GlobalCitationKeyPattern.fromPattern(pattern);
CitationKeyPatternPreferences patternPreferences = new CitationKeyPatternPreferences(
false,
false,
false,
CitationKeyPatternPreferences.KeySuffix.SECOND_WITH_A,
"[", // Invalid regexp
"",
DEFAULT_UNWANTED_CHARACTERS,
keyPattern,
',');

BibEntry bibEntry = new BibEntry().withField(StandardField.TITLE, "Wickedness Managing");
assertEquals("WickednessManaging",
new CitationKeyGenerator(keyPattern, new BibDatabase(), patternPreferences).generateKey(bibEntry));
}
}

0 comments on commit e955c46

Please sign in to comment.