From 412e4abd3843ef4dfc24cf4da47a4e120e161df3 Mon Sep 17 00:00:00 2001 From: Karen Hanson Date: Fri, 7 Jul 2023 12:58:26 -0400 Subject: [PATCH] Upgrade to 5.1.0 with corresponding code changes The following changes were made to support EPUBCheck 5.1.0: 1. pom.xml has been updated to use EPUBCheck 5.1.0. This is not yet available in Maven central repo, so temporarily added sonatype reference in pom - will remove before doing a PR. 2. New version lists resources with fragments making resource list much longer without reflecting new files in the package. Added logic to remove resources with fragments and only list base URL. 3. EPUBLocation changed slightly, updated code to manage support that change. 4. Some redundant messages were removed, which changed some message counts in tests - fixed message counts to reflect correct output. 5. Update validation version to 3.3 - there is no way to determine the minor version number in an EPUB, so validation is always against whatever the latest is. 6. CREATION_DATE is missing from the report, I've logged an issue and commented out the relevant lines in tests. It will be fixed in the next maintenance release and I will move to 5.1.1 so that I can ensure creation date remains in the report before doing a PR. 7. A file that was being used to test title has flipped from Well Formed and Valid to Well Formed and Not Valid according to current criteria - updated the test to reflect this. --- jhove-ext-modules/pom.xml | 13 ++++++- .../jhove/module/epub/JhoveRepInfoReport.java | 5 ++- .../portico/jhove/module/EpubModuleTest.java | 38 +++++++++---------- .../module/epub/JhoveRepInfoReportTest.java | 8 ++-- 4 files changed, 39 insertions(+), 25 deletions(-) diff --git a/jhove-ext-modules/pom.xml b/jhove-ext-modules/pom.xml index bee08f9a0..3b64ad3bf 100644 --- a/jhove-ext-modules/pom.xml +++ b/jhove-ext-modules/pom.xml @@ -13,7 +13,7 @@ 1.0.3 - 4.2.6 + 5.1.0 @@ -89,4 +89,15 @@ ${epubcheck.version} + + + + ossrh + https://oss.sonatype.org/content/repositories/snapshots + + + ossrh + https://oss.sonatype.org/service/local/staging/deploy/maven2/ + + diff --git a/jhove-ext-modules/src/main/java/org/ithaka/portico/jhove/module/epub/JhoveRepInfoReport.java b/jhove-ext-modules/src/main/java/org/ithaka/portico/jhove/module/epub/JhoveRepInfoReport.java index 054f833dc..5fa0e80de 100644 --- a/jhove-ext-modules/src/main/java/org/ithaka/portico/jhove/module/epub/JhoveRepInfoReport.java +++ b/jhove-ext-modules/src/main/java/org/ithaka/portico/jhove/module/epub/JhoveRepInfoReport.java @@ -72,6 +72,8 @@ public class JhoveRepInfoReport extends MasterReport { protected static final String ISO_DATE_PATTERN = "yyyy-MM-dd'T'HH:mm:ss'Z'"; protected static final String FALLBACK_FORMAT = "application/octet-stream"; + protected static final String FRAGMENT_START = "#"; + public JhoveRepInfoReport(String ePubName) { this.setEpubFileName(PathUtil.removeWorkingDirectory(ePubName)); @@ -146,7 +148,8 @@ public void info(String resource, FeatureEnum feature, String value) { this.references.add(value); break; case RESOURCE: - this.resources.add(value); + String no_fragment = value.split(FRAGMENT_START)[0]; + this.resources.add(no_fragment); break; case DC_LANGUAGE: this.language = value; diff --git a/jhove-ext-modules/src/test/java/org/ithaka/portico/jhove/module/EpubModuleTest.java b/jhove-ext-modules/src/test/java/org/ithaka/portico/jhove/module/EpubModuleTest.java index 59b1b7f9b..ed25c01ef 100644 --- a/jhove-ext-modules/src/test/java/org/ithaka/portico/jhove/module/EpubModuleTest.java +++ b/jhove-ext-modules/src/test/java/org/ithaka/portico/jhove/module/EpubModuleTest.java @@ -74,7 +74,7 @@ public class EpubModuleTest { private static final String EPUB3_TITLE_ENCODING = "src/test/resources/epub/epub3-multiple-renditions.epub"; private static final String EXPECTED_MEDIATYPE = "application/epub+zip"; - private static final String EXPECTED_VERSION_3_2 = "3.2"; + private static final String EXPECTED_VERSION_3_3 = "3.3"; private static final String PNG_MIMETYPE = "image/png"; private static final String XHTML_MIMETYPE = "application/xhtml+xml"; private static final String NCX_MIMETYPE = "application/x-dtbncx+xml"; @@ -99,9 +99,11 @@ public void parseValidEpub3PropertiesTest() throws Exception { assertEquals(0, info.getMessage().size()); // no errors assertEquals("EPUB", info.getFormat()); assertEquals(EXPECTED_MEDIATYPE, info.getMimeType()); - assertEquals(EXPECTED_VERSION_3_2, info.getVersion()); + assertEquals(EXPECTED_VERSION_3_3, info.getVersion()); // these may change, so just check they aren't null - assertNotNull(info.getCreated()); + // note: this property is missing in 5.1.0 of epubcheck but + // due for fix in next maintence release, uncomment before merge. + //assertNotNull(info.getCreated()); assertNotNull(info.getLastModified()); Property metadata = info.getProperty(EPUBMETADATA_KEY); @@ -248,7 +250,9 @@ public void parseValidEpub2PropertiesTest() throws Exception { assertEquals(EXPECTED_MEDIATYPE, info.getMimeType()); assertEquals("2.0.1", info.getVersion()); // may change, so just check it isn't null - assertNotNull(info.getCreated()); + // note: this property is missing in 5.1.0 of epubcheck but + // due for fix in next maintence release, uncomment before merge. + //assertNotNull(info.getCreated()); Property metadata = info.getProperty(EPUBMETADATA_KEY); Map props = toMap(metadata); @@ -397,7 +401,7 @@ public void parseImproperlyCompressedEpubTest() throws Exception { File epubFile = new File(ZIPPED_EPUB_FILEPATH); RepInfo info = parseAndCheckValidity(epubFile, RepInfo.FALSE, RepInfo.FALSE); assertEquals(EXPECTED_MEDIATYPE, info.getMimeType()); - assertEquals(EXPECTED_VERSION_3_2, info.getVersion()); + assertEquals(EXPECTED_VERSION_3_3, info.getVersion()); assertEquals(1, info.getMessage().size()); assertEquals("PKG-006", info.getMessage().get(0).getId()); } @@ -425,7 +429,7 @@ public void parseNonEpubTest() throws Exception { File epubFile = new File(WRONG_EXT_NOT_AN_EPUB_FILEPATH); RepInfo info = parseAndCheckValidity(epubFile, RepInfo.FALSE, RepInfo.FALSE); List msgs = info.getMessage(); - final int expectedNumMessages = 3; + final int expectedNumMessages = 2; assertEquals(expectedNumMessages, msgs.size()); } @@ -451,7 +455,7 @@ public void parseNonEpubWithEpubExtensionTest() throws Exception { RepInfo info = parseAndCheckValidity(epubFile, RepInfo.FALSE, RepInfo.FALSE); assertEquals(OCTET_MIMETYPE, info.getMimeType()); List msgs = info.getMessage(); - final int expectedNumMessages = 3; + final int expectedNumMessages = 2; assertEquals(expectedNumMessages, msgs.size()); } @@ -508,7 +512,7 @@ public void parseEpubWithMissingFontsTest() throws Exception { Map fontinfo = new HashMap(); font.forEach(f -> fontinfo.put(f.getName(), f.getValue())); - // only one font in this file, listed but missing. + // a single font file is listed - it is supposed to be embedded but is missing. assertEquals("Courier", fontinfo.get(PROPNAME_FONTNAME)); assertEquals(true, fontinfo.get(PROPNAME_FONTFILE)); @@ -579,18 +583,12 @@ public void parseEpubMissingOpfTest() throws Exception { File epubFile = new File(EPUB2_MISSING_OPF_FILEPATH); RepInfo info = parseAndCheckValidity(epubFile, RepInfo.FALSE, RepInfo.FALSE); - assertEquals(OCTET_MIMETYPE, info.getMimeType()); - Set msgCodes = new HashSet(); - assertEquals(2, info.getMessage().size()); - Message msg1 = info.getMessage().get(0); - Message msg2 = info.getMessage().get(1); - assertTrue(msg1 instanceof ErrorMessage); - msgCodes.add(msg1.getId()); - assertTrue(msg2 instanceof ErrorMessage); - msgCodes.add(msg2.getId()); + assertEquals(1, info.getMessage().size()); + Message msg = info.getMessage().get(0); + assertTrue(msg instanceof ErrorMessage); + msgCodes.add(msg.getId()); assertTrue(msgCodes.contains("OPF-002")); - assertTrue(msgCodes.contains("RSC-001")); } /** @@ -665,8 +663,8 @@ public void checkSignaturesEpub2WithEncryptionTest() throws Exception { public void parseEpub3TitleEncodingTest() throws Exception { File epubFile = new File(EPUB3_TITLE_ENCODING); String expectedTitle = "महाभारत"; - // well formed and valid - RepInfo info = parseAndCheckValidity(epubFile, RepInfo.TRUE, RepInfo.TRUE); + // well formed but not valid (this is inconsequential to the test, we're just checking title) + RepInfo info = parseAndCheckValidity(epubFile, RepInfo.TRUE, RepInfo.FALSE); Property metadata = info.getProperty(PROPNAME_EPUB_METADATA); Map props = toMap(metadata); diff --git a/jhove-ext-modules/src/test/java/org/ithaka/portico/jhove/module/epub/JhoveRepInfoReportTest.java b/jhove-ext-modules/src/test/java/org/ithaka/portico/jhove/module/epub/JhoveRepInfoReportTest.java index c2c70dd30..312e4d809 100644 --- a/jhove-ext-modules/src/test/java/org/ithaka/portico/jhove/module/epub/JhoveRepInfoReportTest.java +++ b/jhove-ext-modules/src/test/java/org/ithaka/portico/jhove/module/epub/JhoveRepInfoReportTest.java @@ -4,6 +4,7 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; +import java.io.File; import java.text.SimpleDateFormat; import java.util.Arrays; import java.util.Date; @@ -45,9 +46,10 @@ public class JhoveRepInfoReportTest { private static final MessageId WARN_MSG_ID = MessageId.CHK_001; private static final String WARN_MSG = "Consider yourself warned"; private static final String WARN_MSG_SUGGEST = "Don't do it again!"; - - private EPUBLocation messageLoc = EPUBLocation.create("epub.opf"); - private EPUBLocation messageLoc2 = EPUBLocation.create("content.xhtml"); + + private EPUBLocation messageLoc = EPUBLocation.of(new File("epub.opf")); + private EPUBLocation messageLoc2 = EPUBLocation.of(new File("content.xhtml")); + private String messageArg = "fakearg";