Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Do no export file field in XMP #11083

Merged
merged 4 commits into from
Mar 23, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ Note that this project **does not** adhere to [Semantic Versioning](https://semv
- Keywords field are now displayed as tags. [#10910](https://github.com/JabRef/jabref/pull/10910)
- Citation relations now get more information, and have quick access to view the articles in a browser without adding them to the library [#10869](https://github.com/JabRef/jabref/issues/10869)
- Importer/Exporter for CFF format now supports JabRef `cites` and `related` relationships, as well as all fields from the CFF specification. [#10993](https://github.com/JabRef/jabref/issues/10993)
- The XMP-Exporter no longer writes the content of the `file`-field. [#11083](https://github.com/JabRef/jabref/pull/11083)

### Fixed

Expand Down
98 changes: 52 additions & 46 deletions src/main/java/org/jabref/logic/xmp/XmpUtilWriter.java
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@

/**
* Writes given BibEntries into the XMP part of a PDF file.
*
* <p>
* The conversion of a BibEntry to the XMP data (using Dublin Core) is done at
* {@link DublinCoreExtractor#fillDublinCoreSchema()}
*/
Expand All @@ -56,13 +56,13 @@ public XmpUtilWriter(XmpPreferences xmpPreferences) {
/**
* Try to write the given BibTexEntry in the XMP-stream of the given
* PDF-file.
*
* <p>
* Throws an IOException if the file cannot be read or written, so the user
* can remove a lock or cancel the operation.
*
* <p>
* The method will overwrite existing BibTeX-XMP-data, but keep other
* existing metadata.
*
* <p>
* This is a convenience method for writeXMP(File, Collection).
*
* @param file The path to write to.
Expand All @@ -75,18 +75,18 @@ public XmpUtilWriter(XmpPreferences xmpPreferences) {
public void writeXmp(Path file,
BibEntry entry,
BibDatabase database)
throws IOException, TransformerException {
throws IOException, TransformerException {
writeXmp(file, List.of(entry), database);
}

/**
* Writes the information of the bib entry to the dublin core schema using
* a custom extractor.
*
* @param dcSchema Dublin core schema, which is filled with the bib entry.
* @param entry The entry, which is added to the dublin core metadata.
* @param database An optional database which the given bibtex entries belong to, which will be used to
* resolve strings. If the database is null the strings will not be resolved.
* @param dcSchema Dublin core schema, which is filled with the bib entry.
* @param entry The entry, which is added to the dublin core metadata.
* @param database An optional database which the given bibtex entries belong to, which will be used to
* resolve strings. If the database is null the strings will not be resolved.
*/
private void writeToDCSchema(DublinCoreSchema dcSchema,
BibEntry entry,
Expand All @@ -98,8 +98,8 @@ private void writeToDCSchema(DublinCoreSchema dcSchema,
/**
* Writes the information of the bib entry to the dublin core schema using a custom extractor.
*
* @param dcSchema Dublin core schema, which is filled with the bib entry.
* @param entry The entry, which is added to the dublin core metadata.
* @param dcSchema Dublin core schema, which is filled with the bib entry.
* @param entry The entry, which is added to the dublin core metadata.
*/
private void writeToDCSchema(DublinCoreSchema dcSchema,
BibEntry entry) {
Expand All @@ -109,7 +109,7 @@ private void writeToDCSchema(DublinCoreSchema dcSchema,

/**
* Try to write the given BibTexEntries as DublinCore XMP Schemas
*
* <p>
* Existing DublinCore schemas in the document are removed
*
* @param document The pdf document to write to.
Expand All @@ -120,7 +120,7 @@ private void writeToDCSchema(DublinCoreSchema dcSchema,
private void writeDublinCore(PDDocument document,
List<BibEntry> entries,
BibDatabase database)
throws IOException, TransformerException {
throws IOException, TransformerException {

List<BibEntry> resolvedEntries;
if (database == null) {
Expand Down Expand Up @@ -166,10 +166,9 @@ private void writeDublinCore(PDDocument document,
/**
* This method generates an xmp metadata string in dublin core format.
*
* @param entries A list of entries, which are added to the dublin core metadata.
*
* @return If something goes wrong (e.g. an exception is thrown), the method returns an empty string,
* otherwise it returns the xmp metadata as a string in dublin core format.
* @param entries A list of entries, which are added to the dublin core metadata.
* @return If something goes wrong (e.g. an exception is thrown), the method returns an empty string,
* otherwise it returns the xmp metadata as a string in dublin core format.
*/
private String generateXmpStringWithXmpDeclaration(List<BibEntry> entries) {
XMPMetadata meta = XMPMetadata.createXMPMetadata();
Expand Down Expand Up @@ -198,10 +197,9 @@ private String generateXmpStringWithXmpDeclaration(List<BibEntry> entries) {
* metadata section <?xpacket begin=...>.
* <br/>
*
* @param entries A list of entries, which are added to the dublin core metadata.
*
* @return If something goes wrong (e.g. an exception is thrown), the method returns an empty string,
* otherwise it returns the xmp metadata without metadata description as a string in dublin core format.
* @param entries A list of entries, which are added to the dublin core metadata.
* @return If something goes wrong (e.g. an exception is thrown), the method returns an empty string,
* otherwise it returns the xmp metadata without metadata description as a string in dublin core format.
*/
public String generateXmpStringWithoutXmpDeclaration(List<BibEntry> entries) {
String xmpContent = generateXmpStringWithXmpDeclaration(entries);
Expand Down Expand Up @@ -237,34 +235,42 @@ private void writeDocumentInformation(PDDocument document,
for (Field field : resolvedEntry.getFields()) {
if (useXmpPrivacyFilter && xmpPreferences.getXmpPrivacyFilter().contains(field)) {
// erase field instead of adding it
if (StandardField.AUTHOR == field) {
di.setAuthor(null);
} else if (StandardField.TITLE == field) {
di.setTitle(null);
} else if (StandardField.KEYWORDS == field) {
di.setKeywords(null);
} else if (StandardField.ABSTRACT == field) {
di.setSubject(null);
} else {
di.setCustomMetadataValue("bibtex/" + field, null);
switch (field) {
case StandardField.AUTHOR ->
di.setAuthor(null);
case StandardField.TITLE ->
di.setTitle(null);
case StandardField.KEYWORDS ->
di.setKeywords(null);
case StandardField.ABSTRACT ->
di.setSubject(null);
case StandardField.FILE -> {
// do not write file field
}
case null, default ->
di.setCustomMetadataValue("bibtex/" + field, null);
}
continue;
}

// LaTeX content is removed from the string for "standard" fields in the PDF
String value = unprotectTermsFormatter.format(resolvedEntry.getField(field).get());
String value = unprotectTermsFormatter.format(resolvedEntry.getField(field).orElse(""));

if (StandardField.AUTHOR == field) {
di.setAuthor(value);
} else if (StandardField.TITLE == field) {
di.setTitle(value);
} else if (StandardField.KEYWORDS == field) {
di.setKeywords(value);
} else if (StandardField.ABSTRACT == field) {
di.setSubject(value);
} else {
switch (field) {
case StandardField.AUTHOR ->
di.setAuthor(value);
case StandardField.TITLE ->
di.setTitle(value);
case StandardField.KEYWORDS ->
di.setKeywords(value);
case StandardField.ABSTRACT ->
di.setSubject(value);
case StandardField.FILE -> {
// do not write file field
}
case null, default ->
resolvedEntry.getField(field).ifPresent(val -> di.setCustomMetadataValue("bibtex/" + field, val));
Siedlerchr marked this conversation as resolved.
Show resolved Hide resolved
// We hit the case of an PDF-unsupported field --> write it directly
di.setCustomMetadataValue("bibtex/" + field, resolvedEntry.getField(field).get());
}
}
di.setCustomMetadataValue("bibtex/entrytype", resolvedEntry.getType().getDisplayName());
Expand All @@ -273,13 +279,13 @@ private void writeDocumentInformation(PDDocument document,
/**
* Try to write the given BibTexEntry in the XMP-stream of the given
* PDF-file.
*
* <p>
* Throws an IOException if the file cannot be read or written, so the user
* can remove a lock or cancel the operation.
*
* <p>
* The method will overwrite existing BibTeX-XMP-data, but keep other
* existing metadata.
*
* <p>
* The code for using PDFBox is also used at {@link EmbeddedBibFilePdfExporter#embedBibTex(String, Path)}.
*
* @param path The file to write the entries to.
Expand All @@ -292,7 +298,7 @@ private void writeDocumentInformation(PDDocument document,
public void writeXmp(Path path,
List<BibEntry> bibtexEntries,
BibDatabase database)
throws IOException, TransformerException {
throws IOException, TransformerException {
List<BibEntry> resolvedEntries;
if (database == null) {
resolvedEntries = bibtexEntries;
Expand Down
100 changes: 50 additions & 50 deletions src/test/java/org/jabref/logic/exporter/XmpPdfExporterTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,9 @@ class XmpPdfExporterTest {

@TempDir static Path tempDir;

private static BibEntry olly2018 = new BibEntry(StandardEntryType.Article);
private static BibEntry toral2006 = new BibEntry(StandardEntryType.Article);
private static BibEntry vapnik2000 = new BibEntry(StandardEntryType.Article);
private static final BibEntry OLLY_2018 = new BibEntry(StandardEntryType.Article);
private static final BibEntry TORAL_2006 = new BibEntry(StandardEntryType.Article);
private static final BibEntry VAPNIK_2000 = new BibEntry(StandardEntryType.Article);

private XmpPdfExporter exporter;
private PdfXmpImporter importer;
Expand All @@ -61,46 +61,46 @@ class XmpPdfExporterTest {
private FilePreferences filePreferences;

private static void initBibEntries() throws IOException {
olly2018.setCitationKey("Olly2018");
olly2018.setField(StandardField.AUTHOR, "Olly and Johannes");
olly2018.setField(StandardField.TITLE, "Stefan's palace");
olly2018.setField(StandardField.JOURNAL, "Test Journal");
olly2018.setField(StandardField.VOLUME, "1");
olly2018.setField(StandardField.NUMBER, "1");
olly2018.setField(StandardField.PAGES, "1-2");
olly2018.setMonth(Month.MARCH);
olly2018.setField(StandardField.ISSN, "978-123-123");
olly2018.setField(StandardField.NOTE, "NOTE");
olly2018.setField(StandardField.ABSTRACT, "ABSTRACT");
olly2018.setField(StandardField.COMMENT, "COMMENT");
olly2018.setField(StandardField.DOI, "10/3212.3123");
olly2018.setField(StandardField.FILE, ":article_dublinCore.pdf:PDF");
olly2018.setField(StandardField.GROUPS, "NO");
olly2018.setField(StandardField.HOWPUBLISHED, "online");
olly2018.setField(StandardField.KEYWORDS, "k1, k2");
olly2018.setField(StandardField.OWNER, "me");
olly2018.setField(StandardField.REVIEW, "review");
olly2018.setField(StandardField.URL, "https://www.olly2018.edu");
OLLY_2018.setCitationKey("Olly2018");
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we do withCitationKey and move the initialization to the variable?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, but moving the initialization to the variable is a bad idea due to how the test works. Due to MethodSources it has to be static and we are setting the vars to the correct values and we are resetting them again.

OLLY_2018.setField(StandardField.AUTHOR, "Olly and Johannes");
OLLY_2018.setField(StandardField.TITLE, "Stefan's palace");
OLLY_2018.setField(StandardField.JOURNAL, "Test Journal");
OLLY_2018.setField(StandardField.VOLUME, "1");
OLLY_2018.setField(StandardField.NUMBER, "1");
OLLY_2018.setField(StandardField.PAGES, "1-2");
OLLY_2018.setMonth(Month.MARCH);
OLLY_2018.setField(StandardField.ISSN, "978-123-123");
OLLY_2018.setField(StandardField.NOTE, "NOTE");
OLLY_2018.setField(StandardField.ABSTRACT, "ABSTRACT");
OLLY_2018.setField(StandardField.COMMENT, "COMMENT");
OLLY_2018.setField(StandardField.DOI, "10/3212.3123");
OLLY_2018.setField(StandardField.FILE, ":article_dublinCore.pdf:PDF");
OLLY_2018.setField(StandardField.GROUPS, "NO");
OLLY_2018.setField(StandardField.HOWPUBLISHED, "online");
OLLY_2018.setField(StandardField.KEYWORDS, "k1, k2");
OLLY_2018.setField(StandardField.OWNER, "me");
OLLY_2018.setField(StandardField.REVIEW, "review");
OLLY_2018.setField(StandardField.URL, "https://www.olly2018.edu");

LinkedFile linkedFile = createDefaultLinkedFile("existing.pdf", tempDir);
olly2018.setFiles(List.of(linkedFile));

toral2006.setField(StandardField.AUTHOR, "Toral, Antonio and Munoz, Rafael");
toral2006.setField(StandardField.TITLE, "A proposal to automatically build and maintain gazetteers for Named Entity Recognition by using Wikipedia");
toral2006.setField(StandardField.BOOKTITLE, "Proceedings of EACL");
toral2006.setField(StandardField.PAGES, "56--61");
toral2006.setField(StandardField.EPRINTTYPE, "asdf");
toral2006.setField(StandardField.OWNER, "Ich");
toral2006.setField(StandardField.URL, "www.url.de");

toral2006.setFiles(List.of(new LinkedFile("non-existing", "path/to/nowhere.pdf", "PDF")));

vapnik2000.setCitationKey("vapnik2000");
vapnik2000.setField(StandardField.TITLE, "The Nature of Statistical Learning Theory");
vapnik2000.setField(StandardField.PUBLISHER, "Springer Science + Business Media");
vapnik2000.setField(StandardField.AUTHOR, "Vapnik, Vladimir N.");
vapnik2000.setField(StandardField.DOI, "10.1007/978-1-4757-3264-1");
vapnik2000.setField(StandardField.OWNER, "Ich");
OLLY_2018.setFiles(List.of(linkedFile));

TORAL_2006.setField(StandardField.AUTHOR, "Toral, Antonio and Munoz, Rafael");
TORAL_2006.setField(StandardField.TITLE, "A proposal to automatically build and maintain gazetteers for Named Entity Recognition by using Wikipedia");
TORAL_2006.setField(StandardField.BOOKTITLE, "Proceedings of EACL");
TORAL_2006.setField(StandardField.PAGES, "56--61");
TORAL_2006.setField(StandardField.EPRINTTYPE, "asdf");
TORAL_2006.setField(StandardField.OWNER, "Ich");
TORAL_2006.setField(StandardField.URL, "www.url.de");

TORAL_2006.setFiles(List.of(new LinkedFile("non-existing", "path/to/nowhere.pdf", "PDF")));

VAPNIK_2000.setCitationKey("vapnik2000");
VAPNIK_2000.setField(StandardField.TITLE, "The Nature of Statistical Learning Theory");
VAPNIK_2000.setField(StandardField.PUBLISHER, "Springer Science + Business Media");
VAPNIK_2000.setField(StandardField.AUTHOR, "Vapnik, Vladimir N.");
VAPNIK_2000.setField(StandardField.DOI, "10.1007/978-1-4757-3264-1");
VAPNIK_2000.setField(StandardField.OWNER, "Ich");
}

/**
Expand All @@ -124,9 +124,9 @@ void setUp() throws IOException {
BibDatabase dataBase = databaseContext.getDatabase();

initBibEntries();
dataBase.insertEntry(olly2018);
dataBase.insertEntry(toral2006);
dataBase.insertEntry(vapnik2000);
dataBase.insertEntry(OLLY_2018);
dataBase.insertEntry(TORAL_2006);
dataBase.insertEntry(VAPNIK_2000);
}

@AfterEach
Expand All @@ -136,28 +136,28 @@ void reset() throws IOException {
entry.clearField(StandardField.FILE);
}
LinkedFile linkedFile = createDefaultLinkedFile("existing.pdf", tempDir);
olly2018.setFiles(List.of(linkedFile));
toral2006.setFiles(List.of(new LinkedFile("non-existing", "path/to/nowhere.pdf", "PDF")));
OLLY_2018.setFiles(List.of(linkedFile));
TORAL_2006.setFiles(List.of(new LinkedFile("non-existing", "path/to/nowhere.pdf", "PDF")));
}

@ParameterizedTest
@MethodSource("provideBibEntriesWithValidPdfFileLinks")
void successfulExportToAllFilesOfEntry(BibEntry bibEntryWithValidPdfFileLink) throws Exception {
assertTrue(exporter.exportToAllFilesOfEntry(databaseContext, filePreferences, bibEntryWithValidPdfFileLink, List.of(olly2018), abbreviationRepository));
assertTrue(exporter.exportToAllFilesOfEntry(databaseContext, filePreferences, bibEntryWithValidPdfFileLink, List.of(OLLY_2018), abbreviationRepository));
}

@ParameterizedTest
@MethodSource("provideBibEntriesWithInvalidPdfFileLinks")
void unsuccessfulExportToAllFilesOfEntry(BibEntry bibEntryWithValidPdfFileLink) throws Exception {
assertFalse(exporter.exportToAllFilesOfEntry(databaseContext, filePreferences, bibEntryWithValidPdfFileLink, List.of(olly2018), abbreviationRepository));
assertFalse(exporter.exportToAllFilesOfEntry(databaseContext, filePreferences, bibEntryWithValidPdfFileLink, List.of(OLLY_2018), abbreviationRepository));
}

public static Stream<Arguments> provideBibEntriesWithValidPdfFileLinks() {
return Stream.of(Arguments.of(olly2018));
return Stream.of(Arguments.of(OLLY_2018));
}

public static Stream<Arguments> provideBibEntriesWithInvalidPdfFileLinks() {
return Stream.of(Arguments.of(vapnik2000), Arguments.of(toral2006));
return Stream.of(Arguments.of(VAPNIK_2000), Arguments.of(TORAL_2006));
}

@ParameterizedTest
Expand Down
Loading