Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merge parsing of bracketed patterns #6989

Merged
merged 13 commits into from
Oct 13, 2020
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,43 @@ public String expand(BibEntry bibentry, Character keywordDelimiter, BibDatabase
public static String expandBrackets(String pattern, Character keywordDelimiter, BibEntry entry, BibDatabase database) {
Objects.requireNonNull(pattern);
Objects.requireNonNull(entry);
return expandBrackets(pattern, expandBracketContent(keywordDelimiter, entry, database));
}

/**
* Utility method creating a function taking the string representation of the content of a bracketed expression and
* expanding it.
*
* @param keywordDelimiter The keyword delimiter to use
* @param entry The {@link BibEntry} to use for expansion
* @param database The {@link BibDatabase} for field resolving. May be null.
* @return a function accepting a bracketed expression and returning the result of expanding it
*/
private static Function<String, String> expandBracketContent(Character keywordDelimiter, BibEntry entry, BibDatabase database) {
return (String bracket) -> {
String expandedPattern;
List<String> fieldParts = parseFieldAndModifiers(bracket);
// check whether there is a modifier on the end such as
// ":lower":
expandedPattern = getFieldValue(entry, fieldParts.get(0), keywordDelimiter, database);
k3KAW8Pnf7mkmdSMPHz27 marked this conversation as resolved.
Show resolved Hide resolved
if (fieldParts.size() > 1) {
// apply modifiers:
expandedPattern = applyModifiers(expandedPattern, fieldParts, 1);
}
return expandedPattern;
};
}

/**
* Expands a pattern.
*
* @param pattern The pattern to expand
* @param bracketContentHandler A function taking the string representation of the content of a bracketed pattern
* and expanding it
* @return The expanded pattern. Not null.
*/
public static String expandBrackets(String pattern, Function<String, String> bracketContentHandler) {
Objects.requireNonNull(pattern);
StringBuilder expandedPattern = new StringBuilder();
StringTokenizer parsedPattern = new StringTokenizer(pattern, "\\[]\"", true);

Expand All @@ -181,23 +218,14 @@ public static String expandBrackets(String pattern, Character keywordDelimiter,
case "\"" -> appendQuote(expandedPattern, parsedPattern);
case "[" -> {
String fieldMarker = contentBetweenBrackets(parsedPattern, pattern);

List<String> fieldParts = parseFieldMarker(fieldMarker);
// check whether there is a modifier on the end such as
// ":lower":
if (fieldParts.size() <= 1) {
expandedPattern.append(getFieldValue(entry, fieldMarker, keywordDelimiter, database));
} else {
// apply modifiers:
String fieldValue = getFieldValue(entry, fieldParts.get(0), keywordDelimiter, database);
expandedPattern.append(applyModifiers(fieldValue, fieldParts, 1));
}
expandedPattern.append(bracketContentHandler.apply(fieldMarker));
}
case "\\" -> {
if (parsedPattern.hasMoreTokens()) {
expandedPattern.append(parsedPattern.nextToken());
} else {
LOGGER.warn("Found a \"\\\" that is not part of an escape sequence");
}
// FIXME: else -> raise exception or log? (S.G.)
}
default -> expandedPattern.append(token);
}
Expand Down Expand Up @@ -1069,7 +1097,7 @@ public static String lastPage(String pages) {
* @param arg The argument string.
* @return An array of strings representing the parts of the marker
*/
protected static List<String> parseFieldMarker(String arg) {
protected static List<String> parseFieldAndModifiers(String arg) {
List<String> parts = new ArrayList<>();
StringBuilder current = new StringBuilder();
boolean escaped = false;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
package org.jabref.logic.citationkeypattern;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.function.Function;
import java.util.regex.PatternSyntaxException;

import org.jabref.model.FieldChange;
import org.jabref.model.database.BibDatabase;
Expand Down Expand Up @@ -71,8 +72,7 @@ static String generateKey(BibEntry entry, String pattern, BibDatabase database)
}

/**
* Computes an appendix to a citation key that could make it unique. We use
* a-z for numbers 0-25, and then aa-az, ba-bz, etc.
* Computes an appendix to a citation key that could make it unique. We use a-z for numbers 0-25, and then aa-az, ba-bz, etc.
*
* @param number The appendix number.
* @return The String to append.
Expand Down Expand Up @@ -107,55 +107,31 @@ public static String cleanKey(String key, String unwantedCharacters) {
return removeUnwantedCharacters(key, unwantedCharacters).replaceAll("\\s", "");
}

/**
* Generate a citation key for the given {@link BibEntry}.
*
* @param entry a {@link BibEntry}
* @return a citation key based on the user's preferences
*/
public String generateKey(BibEntry entry) {
String key;
StringBuilder stringBuilder = new StringBuilder();
try {
// get the type of entry
EntryType entryType = entry.getType();
// Get the arrayList corresponding to the type
List<String> typeList = new ArrayList<>(citeKeyPattern.getValue(entryType));
if (!typeList.isEmpty()) {
typeList.remove(0);
}
boolean field = false;
for (String typeListEntry : typeList) {
if ("[".equals(typeListEntry)) {
field = true;
} else if ("]".equals(typeListEntry)) {
field = false;
} else if (field) {
// check whether there is a modifier on the end such as
// ":lower"
List<String> parts = parseFieldMarker(typeListEntry);
Character delimiter = citationKeyPatternPreferences.getKeywordDelimiter();
String pattern = "[" + parts.get(0) + "]";
String label = removeUnwantedCharacters(expandBrackets(pattern, delimiter, entry, database), unwantedCharacters);
// apply modifier if present
if (parts.size() > 1) {
label = removeUnwantedCharacters(applyModifiers(label, parts, 1), unwantedCharacters);
}
// Remove all illegal characters from the label.
label = cleanKey(label, unwantedCharacters);
stringBuilder.append(label);
} else {
stringBuilder.append(typeListEntry);
}
}
} catch (Exception e) {
LOGGER.warn("Cannot make label", e);
}
Objects.requireNonNull(entry);
String currentKey = entry.getCitationKey().orElse(null);

key = stringBuilder.toString();
String newKey = createCitationKeyFromPattern(entry);
newKey = replaceWithRegex(newKey);
newKey = appendLettersToKey(newKey, currentKey);

// Remove Regular Expressions while generating Keys
String regex = citationKeyPatternPreferences.getKeyPatternRegex();
if ((regex != null) && !regex.trim().isEmpty()) {
String replacement = citationKeyPatternPreferences.getKeyPatternReplacement();
key = key.replaceAll(regex, replacement);
}
return cleanKey(newKey, unwantedCharacters);
}

String oldKey = entry.getCitationKey().orElse(null);
/**
* A letter will be appended to the key based on the user's preferences, either always or to prevent duplicated keys.
*
* @param key the new key
* @param oldKey the old key
* @return a key, if needed, with an appended letter
*/
private String appendLettersToKey(String key, String oldKey) {
long occurrences = database.getNumberOfCitationKeyOccurrences(key);

if (Objects.equals(oldKey, key)) {
Expand All @@ -165,14 +141,11 @@ public String generateKey(BibEntry entry) {
boolean alwaysAddLetter = citationKeyPatternPreferences.getKeySuffix()
== CitationKeyPatternPreferences.KeySuffix.ALWAYS;

boolean firstLetterA = citationKeyPatternPreferences.getKeySuffix()
== CitationKeyPatternPreferences.KeySuffix.SECOND_WITH_A;

String newKey;
if (!alwaysAddLetter && (occurrences == 0)) {
newKey = key;
} else {
if (alwaysAddLetter || occurrences != 0) {
// The key is already in use, so we must modify it.
boolean firstLetterA = citationKeyPatternPreferences.getKeySuffix()
== CitationKeyPatternPreferences.KeySuffix.SECOND_WITH_A;

int number = !alwaysAddLetter && !firstLetterA ? 1 : 0;
String moddedKey;

Expand All @@ -187,9 +160,64 @@ public String generateKey(BibEntry entry) {
}
} while (occurrences > 0);

newKey = moddedKey;
key = moddedKey;
}
return newKey;
return key;
}

/**
* Using preferences, replace matches to the provided regex with a string.
*
* @param key the citation key
* @return the citation key where matches to the regex are replaced
*/
private String replaceWithRegex(String key) {
// Remove Regular Expressions while generating Keys
String regex = citationKeyPatternPreferences.getKeyPatternRegex();
if ((regex != null) && !regex.trim().isEmpty()) {
String replacement = citationKeyPatternPreferences.getKeyPatternReplacement();
try {
key = key.replaceAll(regex, replacement);
} catch (PatternSyntaxException e) {
LOGGER.warn("There is a syntax error in the regular expression \"{}\" used to generate a citation key", regex, e);
}
}
return key;
}

private String createCitationKeyFromPattern(BibEntry entry) {
// get the type of entry
EntryType entryType = entry.getType();
// Get the arrayList corresponding to the type
List<String> citationKeyPattern = citeKeyPattern.getValue(entryType);
if (citationKeyPattern.isEmpty()) {
return "";
}
return expandBrackets(citationKeyPattern.get(0), expandBracketContent(entry));
}

/**
* A helper method to create a {@link Function} that takes a single bracketed expression, expands it, and cleans the key.
*
* @param entry the {@link BibEntry} that a citation key is generated for
* @return a cleaned citation key for the given {@link BibEntry}
*/
private Function<String, String> expandBracketContent(BibEntry entry) {
Character keywordDelimiter = citationKeyPatternPreferences.getKeywordDelimiter();

return (String bracket) -> {
String expandedPattern;
List<String> fieldParts = parseFieldAndModifiers(bracket);

expandedPattern = removeUnwantedCharacters(getFieldValue(entry, fieldParts.get(0), keywordDelimiter, database), unwantedCharacters);
// check whether there is a modifier on the end such as
// ":lower":
if (fieldParts.size() > 1) {
// apply modifiers:
expandedPattern = applyModifiers(expandedPattern, fieldParts, 1);
}
return cleanKey(expandedPattern, unwantedCharacters);
};
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -278,4 +278,18 @@ void expandBracketsWithAuthorStartingWithBrackets() {
.withField(StandardField.AUTHOR, "Patrik {\\v{S}}pan{\\v{e}}l and Kseniya Dryahina and David Smith");
assertEquals("ŠpanělEtAl", BracketedPattern.expandBrackets("[authEtAl:latex_to_unicode]", null, bibEntry, null));
}

@Test
void expandBracketsWithModifierContainingRegexCharacterCkass() {
BibEntry bibEntry = new BibEntry().withField(StandardField.TITLE, "Wickedness:Managing");

assertEquals("Wickedness.Managing", BracketedPattern.expandBrackets("[title:regex(\"[:]+\",\".\")]", null, bibEntry, null));
}

@Test
void expandBracketsEmptyStringFromEmptyBrackets() {
BibEntry bibEntry = new BibEntry();

assertEquals("", BracketedPattern.expandBrackets("[]", null, bibEntry, null));
}
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
package org.jabref.logic.citationkeypattern;

import java.util.Collections;
import java.util.Optional;

import org.jabref.logic.importer.ImportFormatPreferences;
Expand Down Expand Up @@ -80,8 +79,7 @@ static String generateKey(BibEntry entry, String pattern) {
}

static String generateKey(BibEntry entry, String pattern, BibDatabase database) {
GlobalCitationKeyPattern keyPattern = new GlobalCitationKeyPattern(Collections.emptyList());
keyPattern.setDefaultValue(pattern);
GlobalCitationKeyPattern keyPattern = GlobalCitationKeyPattern.fromPattern(pattern);
CitationKeyPatternPreferences patternPreferences = new CitationKeyPatternPreferences(
false,
false,
Expand Down Expand Up @@ -1070,15 +1068,15 @@ void generateKeyWithMinusInCitationStyleOutsideAField() {
.withField(StandardField.AUTHOR, AUTHOR_STRING_FIRSTNAME_FULL_LASTNAME_FULL_COUNT_1)
.withField(StandardField.YEAR, "2019");

assertEquals("Newton-2019", generateKey(entry, "[auth]-[year]"));
assertEquals("Newton2019", generateKey(entry, "[auth]-[year]"));
}

@Test
void generateKeyWithWithFirstNCharacters() {
BibEntry entry = new BibEntry().withField(StandardField.AUTHOR, "Newton, Isaac")
.withField(StandardField.YEAR, "2019");

assertEquals("newt-2019", generateKey(entry, "[auth4:lower]-[year]"));
assertEquals("newt2019", generateKey(entry, "[auth4:lower]-[year]"));
}

@Test
Expand All @@ -1101,4 +1099,31 @@ void generateKeyWithNonNormalizedUnicode() {

assertEquals("Modele", generateKey(bibEntry, "[veryshorttitle]"));
}

@Test
void generateKeyWithModifierContainingRegexCharacterClass() {
BibEntry bibEntry = new BibEntry().withField(StandardField.TITLE, "Wickedness Managing");

assertEquals("WM", generateKey(bibEntry, "[title:regex(\"[a-z]+\",\"\")]"));
}

@Test
void generateKeyDoesNotModifyTheKeyWithIncorrectRegexReplacement() {
String pattern = "[title]";
GlobalCitationKeyPattern keyPattern = GlobalCitationKeyPattern.fromPattern(pattern);
CitationKeyPatternPreferences patternPreferences = new CitationKeyPatternPreferences(
false,
false,
false,
CitationKeyPatternPreferences.KeySuffix.SECOND_WITH_A,
"[", // Invalid regexp
"",
DEFAULT_UNWANTED_CHARACTERS,
keyPattern,
',');

BibEntry bibEntry = new BibEntry().withField(StandardField.TITLE, "Wickedness Managing");
assertEquals("WickednessManaging",
new CitationKeyGenerator(keyPattern, new BibDatabase(), patternPreferences).generateKey(bibEntry));
}
}