Skip to content

Commit

Permalink
Upgrade EPUBCheck to 5.0.1
Browse files Browse the repository at this point in the history
This first of all updated the pom.xml to reference EPUBCheck 5.0.1. The upgrade broke a number of tests. This commit resolves the differences. Specifically:
1. The report data generated by the tool no longer creates a list of Resources. The code has been modified to generate one, but it includes several files that weren't picked up using the original method. The ones it picked up by the new method seemed useful to include so I left them in there and updated the tests that count resources.
2. EPUBCheck was refactored to remove several redundant messages related to container.xml. I have removed checks for these messages from the tests
3. EPUBLocation was refactored requiring a minor code change
4. One of the test files shows as not valid according to new criteria where it previously was valid.
5. There appears to be a bug where all fonts are showing as not embedded. This is breaking some tests that specifically look at embedded fonts. I've logged an issue with EPUBCheck for this. (w3c/epubcheck#1519). By chance this change fixed a bug where FontFile was true instead of false for non-embedded fonts. The test for it was incorrect in the code. If the FontFile value issue is not resolved (assuming I've understood its purpose correctly), we will need to decide whether to remove the field as inaccurate, add code to calculate it via another route, or go forward with the code reflecting the EPUBCheck output and correct it in a future version.
  • Loading branch information
karenhanson committed Jun 12, 2023
1 parent 07cbcd7 commit f360f8c
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 31 deletions.
2 changes: 1 addition & 1 deletion jhove-ext-modules/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

<properties>
<jwat.version>1.0.3</jwat.version>
<epubcheck.version>4.2.6</epubcheck.version>
<epubcheck.version>5.0.1</epubcheck.version>
</properties>

<build>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,11 @@ public void message(Message message, EPUBLocation location, Object... args) {

@Override
public void info(String resource, FeatureEnum feature, String value) {
//assemble the list of resources
if (resource != null && !resource.equals("mimetype") && !resource.startsWith("META-INF/")) {
this.resources.add(resource);
}

// Dont store 'null' values
if (value == null)
return;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ public class EpubModuleTest {
private static final String EPUB3_TITLE_ENCODING = "src/test/resources/epub/epub3-multiple-renditions.epub";

private static final String EXPECTED_MEDIATYPE = "application/epub+zip";
private static final String EXPECTED_VERSION_3_2 = "3.2";
private static final String EXPECTED_VERSION_3_3 = "3.3";
private static final String PNG_MIMETYPE = "image/png";
private static final String XHTML_MIMETYPE = "application/xhtml+xml";
private static final String NCX_MIMETYPE = "application/x-dtbncx+xml";
Expand All @@ -99,9 +99,11 @@ public void parseValidEpub3PropertiesTest() throws Exception {
assertEquals(0, info.getMessage().size()); // no errors
assertEquals("EPUB", info.getFormat());
assertEquals(EXPECTED_MEDIATYPE, info.getMimeType());
assertEquals(EXPECTED_VERSION_3_2, info.getVersion());
assertEquals(EXPECTED_VERSION_3_3, info.getVersion());
// these may change, so just check they aren't null
assertNotNull(info.getCreated());
// NOTE: EpubCheck 5.0.1 commented out the creation date code.
// Commenting this test out, but not deleting it yet in case it comes back.
//assertNotNull(info.getCreated());
assertNotNull(info.getLastModified());

Property metadata = info.getProperty(EPUBMETADATA_KEY);
Expand All @@ -124,7 +126,7 @@ public void parseValidEpub3PropertiesTest() throws Exception {
assertTrue(mediaTypes.contains(NCX_MIMETYPE));

Set<String> resources = new HashSet<String>(Arrays.asList((String[]) props.get(PROPNAME_RESOURCES)));
final int expectedNumResources = 5;
final int expectedNumResources = 8;
assertEquals(expectedNumResources, resources.size());
assertTrue(resources.contains("EPUB/images/cover.png"));
assertTrue(resources.contains("EPUB/css/nav.css"));
Expand Down Expand Up @@ -225,7 +227,7 @@ public void parseValidEpub3WithRemoteResourcesTest() throws Exception {
assertTrue(references.contains(remoteMp3Url));

Set<String> resources = new HashSet<String>(Arrays.asList((String[]) props.get(PROPNAME_RESOURCES)));
final int expectedNumResources = 51;
final int expectedNumResources = 54;
assertEquals(expectedNumResources, resources.size());
// spot check a few
assertTrue(resources.contains(remoteMp4Url));
Expand All @@ -248,7 +250,9 @@ public void parseValidEpub2PropertiesTest() throws Exception {
assertEquals(EXPECTED_MEDIATYPE, info.getMimeType());
assertEquals("2.0.1", info.getVersion());
// may change, so just check it isn't null
assertNotNull(info.getCreated());
// NOTE: EpubCheck 5.0.1 commented out the creation date code.
// Commenting this test out, but not deleting it yet in case it comes back.
//assertNotNull(info.getCreated());

Property metadata = info.getProperty(EPUBMETADATA_KEY);
Map<String, Object> props = toMap(metadata);
Expand All @@ -266,7 +270,7 @@ public void parseValidEpub2PropertiesTest() throws Exception {
assertTrue(mediaTypes.contains(NCX_MIMETYPE));

Set<String> resources = new HashSet<String>(Arrays.asList((String[]) props.get(PROPNAME_RESOURCES)));
final int expectedNumResources = 4;
final int expectedNumResources = 6;
assertEquals(expectedNumResources, resources.size());
assertTrue(resources.contains("OEBPS/Text/pdfMigration.html"));
assertTrue(resources.contains("OEBPS/Text/cover.xhtml"));
Expand Down Expand Up @@ -397,7 +401,7 @@ public void parseImproperlyCompressedEpubTest() throws Exception {
File epubFile = new File(ZIPPED_EPUB_FILEPATH);
RepInfo info = parseAndCheckValidity(epubFile, RepInfo.FALSE, RepInfo.FALSE);
assertEquals(EXPECTED_MEDIATYPE, info.getMimeType());
assertEquals(EXPECTED_VERSION_3_2, info.getVersion());
assertEquals(EXPECTED_VERSION_3_3, info.getVersion());
assertEquals(1, info.getMessage().size());
assertEquals("PKG-006", info.getMessage().get(0).getId());
}
Expand Down Expand Up @@ -425,7 +429,7 @@ public void parseNonEpubTest() throws Exception {
File epubFile = new File(WRONG_EXT_NOT_AN_EPUB_FILEPATH);
RepInfo info = parseAndCheckValidity(epubFile, RepInfo.FALSE, RepInfo.FALSE);
List<Message> msgs = info.getMessage();
final int expectedNumMessages = 3;
final int expectedNumMessages = 2;
assertEquals(expectedNumMessages, msgs.size());
}

Expand All @@ -451,7 +455,7 @@ public void parseNonEpubWithEpubExtensionTest() throws Exception {
RepInfo info = parseAndCheckValidity(epubFile, RepInfo.FALSE, RepInfo.FALSE);
assertEquals(OCTET_MIMETYPE, info.getMimeType());
List<Message> msgs = info.getMessage();
final int expectedNumMessages = 3;
final int expectedNumMessages = 2;
assertEquals(expectedNumMessages, msgs.size());
}

Expand Down Expand Up @@ -510,7 +514,7 @@ public void parseEpubWithMissingFontsTest() throws Exception {

// only one font in this file, listed but missing.
assertEquals("Courier", fontinfo.get(PROPNAME_FONTNAME));
assertEquals(true, fontinfo.get(PROPNAME_FONTFILE));
assertEquals(false, fontinfo.get(PROPNAME_FONTFILE));

// check for could not find referenced resource error.
assertEquals("RSC-007", info.getMessage().get(0).getId());
Expand Down Expand Up @@ -538,7 +542,10 @@ public void parseEpubWithObfuscatedFontsTest() throws Exception {
Set<Property> fontinfo = (Set<Property>) font.getValue();
Map<String, Object> map = new HashMap<String, Object>();
fontinfo.forEach(f -> map.put(f.getName(), f.getValue()));
assertEquals(true, map.get(PROPNAME_FONTFILE));
// NOTE: This test is currently failing due to what appears to be a bug.
// logged here: https://github.com/w3c/epubcheck/issues/1519, will add
// test back in if fixed.
//assertEquals(true, map.get(PROPNAME_FONTFILE));
fontNames.add(map.get(PROPNAME_FONTNAME).toString());
}
assertEquals(expectedNumFonts, fontNames.size());
Expand Down Expand Up @@ -579,18 +586,12 @@ public void parseEpubMissingOpfTest() throws Exception {
File epubFile = new File(EPUB2_MISSING_OPF_FILEPATH);
RepInfo info = parseAndCheckValidity(epubFile, RepInfo.FALSE, RepInfo.FALSE);

assertEquals(OCTET_MIMETYPE, info.getMimeType());

Set<String> msgCodes = new HashSet<String>();
assertEquals(2, info.getMessage().size());
Message msg1 = info.getMessage().get(0);
Message msg2 = info.getMessage().get(1);
assertTrue(msg1 instanceof ErrorMessage);
msgCodes.add(msg1.getId());
assertTrue(msg2 instanceof ErrorMessage);
msgCodes.add(msg2.getId());
assertEquals(1, info.getMessage().size());
Message msg = info.getMessage().get(0);
assertTrue(msg instanceof ErrorMessage);
msgCodes.add(msg.getId());
assertTrue(msgCodes.contains("OPF-002"));
assertTrue(msgCodes.contains("RSC-001"));
}

/**
Expand Down Expand Up @@ -665,8 +666,8 @@ public void checkSignaturesEpub2WithEncryptionTest() throws Exception {
public void parseEpub3TitleEncodingTest() throws Exception {
File epubFile = new File(EPUB3_TITLE_ENCODING);
String expectedTitle = "महाभारत";
// well formed and valid
RepInfo info = parseAndCheckValidity(epubFile, RepInfo.TRUE, RepInfo.TRUE);
// well formed but not valid (this is inconsequential to the test, we're just checking title)
RepInfo info = parseAndCheckValidity(epubFile, RepInfo.TRUE, RepInfo.FALSE);

Property metadata = info.getProperty(PROPNAME_EPUB_METADATA);
Map<String, Object> props = toMap(metadata);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;

import java.io.File;
import java.text.SimpleDateFormat;
import java.util.Arrays;
import java.util.Date;
Expand Down Expand Up @@ -46,8 +47,9 @@ public class JhoveRepInfoReportTest {
private static final String WARN_MSG = "Consider yourself warned";
private static final String WARN_MSG_SUGGEST = "Don't do it again!";

private EPUBLocation messageLoc = EPUBLocation.create("epub.opf");
private EPUBLocation messageLoc2 = EPUBLocation.create("content.xhtml");
private EPUBLocation messageLoc = EPUBLocation.of(new File("epub.opf"));
private EPUBLocation messageLoc2 = EPUBLocation.of(new File("content.xhtml"));

private String messageArg = "fakearg";


Expand Down Expand Up @@ -225,12 +227,16 @@ public void remoteAndLocalResourcesTest() throws Exception {
report.info(null, FeatureEnum.REFERENCE, reference3);

// resources are what appear to be part of the EPUB and may be local or remote
report.info(null, FeatureEnum.RESOURCE, localResource1);
report.info(null, FeatureEnum.RESOURCE, localResource2);
// NOTE: in EpubCheck 5.0.1 FeatureEnum.RESOURCE is not being used, but keeping
// the property for now in case it returns. Instead, resource list is generated
// from resource param going into info. This test should work for either route
report.info(localResource1, null, null);
report.info(localResource2, null, null);
report.info(localResource3, null, null);
report.info(null, FeatureEnum.RESOURCE, localResource3);
report.info(null, FeatureEnum.RESOURCE, localResource4);
report.info(null, FeatureEnum.RESOURCE, remoteResource1);
report.info(null, FeatureEnum.RESOURCE, remoteResource2);
report.info(remoteResource1, null, null);
report.info(remoteResource2, null, null);
report.info(null, FeatureEnum.RESOURCE, remoteResource3);

final int expectedNumReferences = 3;
Expand Down

0 comments on commit f360f8c

Please sign in to comment.