diff --git a/readium/navigator/src/main/java/org/readium/r2/navigator/epub/HtmlInjector.kt b/readium/navigator/src/main/java/org/readium/r2/navigator/epub/HtmlInjector.kt index 92faf7169d..ca7ba8a30f 100644 --- a/readium/navigator/src/main/java/org/readium/r2/navigator/epub/HtmlInjector.kt +++ b/readium/navigator/src/main/java/org/readium/r2/navigator/epub/HtmlInjector.kt @@ -15,6 +15,7 @@ import org.readium.r2.shared.publication.services.isProtected import org.readium.r2.shared.util.AbsoluteUrl import org.readium.r2.shared.util.Try import org.readium.r2.shared.util.Url +import org.readium.r2.shared.util.data.ReadError import org.readium.r2.shared.util.mediatype.MediaType import org.readium.r2.shared.util.resource.Resource import org.readium.r2.shared.util.resource.TransformingResource @@ -42,7 +43,12 @@ internal fun Resource.injectHtml( val injectables = mutableListOf() if (publication.metadata.presentation.layout == EpubLayout.REFLOWABLE) { - content = css.injectHtml(content) + content = try { + css.injectHtml(content) + } catch (e: Exception) { + return@TransformingResource Try.failure(ReadError.Decoding(e)) + } + injectables.add( script( baseHref.resolve(Url("readium/scripts/readium-reflowable.js")!!) diff --git a/readium/shared/src/main/java/org/readium/r2/shared/extensions/String.kt b/readium/shared/src/main/java/org/readium/r2/shared/extensions/String.kt index baa7f5ab14..77bf8d82be 100644 --- a/readium/shared/src/main/java/org/readium/r2/shared/extensions/String.kt +++ b/readium/shared/src/main/java/org/readium/r2/shared/extensions/String.kt @@ -101,3 +101,9 @@ internal fun String.percentEncodedPath(): String = */ internal fun String.percentEncodedQuery(): String = Uri.encode(this, "$+,/?:=@") + +/** + * Returns whether the String receiver contains only printable ASCII characters. + */ +internal fun String.isPrintableAscii(): Boolean = + all { it.code in 0x20..0x7F } diff --git a/readium/shared/src/main/java/org/readium/r2/shared/publication/Locator.kt b/readium/shared/src/main/java/org/readium/r2/shared/publication/Locator.kt index b32bb5dbee..4a4b2c05b5 100644 --- a/readium/shared/src/main/java/org/readium/r2/shared/publication/Locator.kt +++ b/readium/shared/src/main/java/org/readium/r2/shared/publication/Locator.kt @@ -19,6 +19,7 @@ import org.readium.r2.shared.JSONable import org.readium.r2.shared.extensions.* import org.readium.r2.shared.toJSON import org.readium.r2.shared.util.Url +import org.readium.r2.shared.util.fromLegacyHref import org.readium.r2.shared.util.logging.WarningLogger import org.readium.r2.shared.util.logging.log import org.readium.r2.shared.util.mediatype.MediaType diff --git a/readium/shared/src/main/java/org/readium/r2/shared/publication/epub/EpubEncryptionParser.kt b/readium/shared/src/main/java/org/readium/r2/shared/publication/epub/EpubEncryptionParser.kt index 5e208554ce..458c515be1 100644 --- a/readium/shared/src/main/java/org/readium/r2/shared/publication/epub/EpubEncryptionParser.kt +++ b/readium/shared/src/main/java/org/readium/r2/shared/publication/epub/EpubEncryptionParser.kt @@ -10,6 +10,7 @@ import org.readium.r2.shared.InternalReadiumApi import org.readium.r2.shared.publication.encryption.Encryption import org.readium.r2.shared.publication.protection.ContentProtection import org.readium.r2.shared.util.Url +import org.readium.r2.shared.util.fromEpubHref import org.readium.r2.shared.util.xml.ElementNode @InternalReadiumApi @@ -29,7 +30,7 @@ public object EpubEncryptionParser { private fun parseEncryptedData(node: ElementNode): Pair? { val resourceURI = node.getFirst("CipherData", Namespaces.ENC) ?.getFirst("CipherReference", Namespaces.ENC)?.getAttr("URI") - ?.let { Url(it) ?: Url.fromDecodedPath(it) } + ?.let { Url.fromEpubHref(it) } ?: return null val retrievalMethod = node.getFirst("KeyInfo", Namespaces.SIG) ?.getFirst("RetrievalMethod", Namespaces.SIG)?.getAttr("URI") diff --git a/readium/shared/src/main/java/org/readium/r2/shared/util/Url.kt b/readium/shared/src/main/java/org/readium/r2/shared/util/Url.kt index 6d880631fe..0452446a9b 100644 --- a/readium/shared/src/main/java/org/readium/r2/shared/util/Url.kt +++ b/readium/shared/src/main/java/org/readium/r2/shared/util/Url.kt @@ -15,6 +15,7 @@ import java.net.URL import kotlinx.parcelize.Parcelize import org.readium.r2.shared.DelicateReadiumApi import org.readium.r2.shared.InternalReadiumApi +import org.readium.r2.shared.extensions.isPrintableAscii import org.readium.r2.shared.extensions.percentEncodedPath import org.readium.r2.shared.extensions.tryOrNull @@ -43,19 +44,6 @@ public sealed class Url : Parcelable { return invoke(Uri.parse(url)) } - /** - * Creates an [Url] from a legacy HREF. - * - * For example, if it is a relative path such as `/dir/my chapter.html`, it will be - * converted to the valid relative URL `dir/my%20chapter.html`. - * - * Only use this API when you are upgrading to Readium 3.x and migrating the HREFs stored in - * your database. See the 3.0 migration guide for more information. - */ - @DelicateReadiumApi - public fun fromLegacyHref(href: String): Url? = - AbsoluteUrl(href) ?: fromDecodedPath(href.removePrefix("/")) - internal operator fun invoke(uri: Uri): Url? = if (uri.isAbsolute) { AbsoluteUrl(uri) @@ -306,6 +294,32 @@ public class RelativeUrl private constructor(override val uri: Uri) : Url() { } } +/** + * Creates an [Url] from a legacy HREF. + * + * For example, if it is a relative path such as `/dir/my chapter.html`, it will be + * converted to the valid relative URL `dir/my%20chapter.html`. + * + * Only use this API when you are upgrading to Readium 3.x and migrating the HREFs stored in + * your database. See the 3.0 migration guide for more information. + */ +@DelicateReadiumApi +public fun Url.Companion.fromLegacyHref(href: String): Url? = + AbsoluteUrl(href) ?: Url.fromDecodedPath(href.removePrefix("/")) + +/** + * According to the EPUB specification, the HREFs in the EPUB package must be valid URLs (so + * percent-encoded). Unfortunately, many EPUBs don't follow this rule, and use invalid HREFs such + * as `my chapter.html` or `/dir/my chapter.html`. + * + * As a workaround, we assume the HREFs are valid percent-encoded URLs, and fallback to decoded paths + * if we can't parse the URL. + */ +@InternalReadiumApi +public fun Url.Companion.fromEpubHref(href: String): Url? { + return (Url(href) ?: Url.fromDecodedPath(href)) +} + public fun File.toUrl(): AbsoluteUrl = checkNotNull(AbsoluteUrl(Uri.fromFile(this))) @@ -353,8 +367,9 @@ private fun Uri.addFileAuthority(): Uri = } private fun String.isValidUrl(): Boolean = - // Uri.parse doesn't really validate the URL, it could contain invalid characters. - isNotBlank() && tryOrNull { URI(this) } != null + // Uri.parse doesn't really validate the URL, it could contain invalid characters, so we use + // URI. However, URI allows some non-ASCII characters. + isNotBlank() && isPrintableAscii() && tryOrNull { URI(this) } != null @JvmInline public value class FileExtension( diff --git a/readium/shared/src/test/java/org/readium/r2/shared/extensions/StringTest.kt b/readium/shared/src/test/java/org/readium/r2/shared/extensions/StringTest.kt new file mode 100644 index 0000000000..c3501dd7b7 --- /dev/null +++ b/readium/shared/src/test/java/org/readium/r2/shared/extensions/StringTest.kt @@ -0,0 +1,22 @@ +/* + * Copyright 2024 Readium Foundation. All rights reserved. + * Use of this source code is governed by the BSD-style license + * available in the top-level LICENSE file of the project. + */ + +package org.readium.r2.shared.extensions + +import kotlin.test.assertFalse +import kotlin.test.assertTrue +import org.junit.Test + +class StringTest { + + @Test + fun `checks if a string is made of printable ASCII characters`() { + assertTrue("".isPrintableAscii()) + assertTrue(" foo/@bar".isPrintableAscii()) + assertFalse("école".isPrintableAscii()) + assertFalse("\u0001 non printable".isPrintableAscii()) + } +} diff --git a/readium/shared/src/test/java/org/readium/r2/shared/util/UrlTest.kt b/readium/shared/src/test/java/org/readium/r2/shared/util/UrlTest.kt index d887abc74b..ed214fba32 100644 --- a/readium/shared/src/test/java/org/readium/r2/shared/util/UrlTest.kt +++ b/readium/shared/src/test/java/org/readium/r2/shared/util/UrlTest.kt @@ -23,12 +23,15 @@ class UrlTest { assertNull(Url("")) assertNull(Url(" ")) assertNull(Url("invalid character")) + assertNull(Url("école")) assertNull(AbsoluteUrl(" ")) assertNull(AbsoluteUrl("invalid character")) + assertNull(AbsoluteUrl("école")) assertNull(RelativeUrl(" ")) assertNull(RelativeUrl("invalid character")) + assertNull(RelativeUrl("école")) } @Test diff --git a/readium/streamer/src/main/java/org/readium/r2/streamer/parser/epub/EpubParser.kt b/readium/streamer/src/main/java/org/readium/r2/streamer/parser/epub/EpubParser.kt index fe54dae184..3129dd9617 100644 --- a/readium/streamer/src/main/java/org/readium/r2/streamer/parser/epub/EpubParser.kt +++ b/readium/streamer/src/main/java/org/readium/r2/streamer/parser/epub/EpubParser.kt @@ -27,6 +27,7 @@ import org.readium.r2.shared.util.data.decodeXml import org.readium.r2.shared.util.data.readDecodeOrElse import org.readium.r2.shared.util.data.readDecodeOrNull import org.readium.r2.shared.util.format.EpubSpecification +import org.readium.r2.shared.util.fromEpubHref import org.readium.r2.shared.util.getOrElse import org.readium.r2.shared.util.logging.WarningLogger import org.readium.r2.shared.util.mediatype.MediaType @@ -35,7 +36,6 @@ import org.readium.r2.shared.util.resource.TransformingContainer import org.readium.r2.shared.util.use import org.readium.r2.shared.util.xml.ElementNode import org.readium.r2.streamer.parser.PublicationParser -import org.readium.r2.streamer.parser.epub.extensions.fromEpubHref /** * Parses a Publication from an EPUB publication. diff --git a/readium/streamer/src/main/java/org/readium/r2/streamer/parser/epub/MetadataParser.kt b/readium/streamer/src/main/java/org/readium/r2/streamer/parser/epub/MetadataParser.kt index dd00ed2680..a42a1865ea 100644 --- a/readium/streamer/src/main/java/org/readium/r2/streamer/parser/epub/MetadataParser.kt +++ b/readium/streamer/src/main/java/org/readium/r2/streamer/parser/epub/MetadataParser.kt @@ -8,9 +8,9 @@ package org.readium.r2.streamer.parser.epub import org.readium.r2.shared.publication.Href import org.readium.r2.shared.util.Url +import org.readium.r2.shared.util.fromEpubHref import org.readium.r2.shared.util.mediatype.MediaType import org.readium.r2.shared.util.xml.ElementNode -import org.readium.r2.streamer.parser.epub.extensions.fromEpubHref internal class MetadataParser( private val prefixMap: Map diff --git a/readium/streamer/src/main/java/org/readium/r2/streamer/parser/epub/NavigationDocumentParser.kt b/readium/streamer/src/main/java/org/readium/r2/streamer/parser/epub/NavigationDocumentParser.kt index aa1e51054d..6bfb25050a 100644 --- a/readium/streamer/src/main/java/org/readium/r2/streamer/parser/epub/NavigationDocumentParser.kt +++ b/readium/streamer/src/main/java/org/readium/r2/streamer/parser/epub/NavigationDocumentParser.kt @@ -8,8 +8,8 @@ package org.readium.r2.streamer.parser.epub import org.readium.r2.shared.publication.Link import org.readium.r2.shared.util.Url +import org.readium.r2.shared.util.fromEpubHref import org.readium.r2.shared.util.xml.ElementNode -import org.readium.r2.streamer.parser.epub.extensions.fromEpubHref internal object NavigationDocumentParser { diff --git a/readium/streamer/src/main/java/org/readium/r2/streamer/parser/epub/NcxParser.kt b/readium/streamer/src/main/java/org/readium/r2/streamer/parser/epub/NcxParser.kt index a0db885df4..d71e3232a2 100644 --- a/readium/streamer/src/main/java/org/readium/r2/streamer/parser/epub/NcxParser.kt +++ b/readium/streamer/src/main/java/org/readium/r2/streamer/parser/epub/NcxParser.kt @@ -8,8 +8,8 @@ package org.readium.r2.streamer.parser.epub import org.readium.r2.shared.publication.Link import org.readium.r2.shared.util.Url +import org.readium.r2.shared.util.fromEpubHref import org.readium.r2.shared.util.xml.ElementNode -import org.readium.r2.streamer.parser.epub.extensions.fromEpubHref internal object NcxParser { diff --git a/readium/streamer/src/main/java/org/readium/r2/streamer/parser/epub/PackageDocument.kt b/readium/streamer/src/main/java/org/readium/r2/streamer/parser/epub/PackageDocument.kt index 0e3afb4dd7..3dac8a7cb1 100644 --- a/readium/streamer/src/main/java/org/readium/r2/streamer/parser/epub/PackageDocument.kt +++ b/readium/streamer/src/main/java/org/readium/r2/streamer/parser/epub/PackageDocument.kt @@ -8,8 +8,8 @@ package org.readium.r2.streamer.parser.epub import org.readium.r2.shared.publication.ReadingProgression import org.readium.r2.shared.util.Url +import org.readium.r2.shared.util.fromEpubHref import org.readium.r2.shared.util.xml.ElementNode -import org.readium.r2.streamer.parser.epub.extensions.fromEpubHref internal data class PackageDocument( val path: Url, diff --git a/readium/streamer/src/main/java/org/readium/r2/streamer/parser/epub/SmilParser.kt b/readium/streamer/src/main/java/org/readium/r2/streamer/parser/epub/SmilParser.kt index 460d8b8007..d5022197f4 100644 --- a/readium/streamer/src/main/java/org/readium/r2/streamer/parser/epub/SmilParser.kt +++ b/readium/streamer/src/main/java/org/readium/r2/streamer/parser/epub/SmilParser.kt @@ -9,8 +9,8 @@ package org.readium.r2.streamer.parser.epub import org.readium.r2.shared.MediaOverlayNode import org.readium.r2.shared.MediaOverlays import org.readium.r2.shared.util.Url +import org.readium.r2.shared.util.fromEpubHref import org.readium.r2.shared.util.xml.ElementNode -import org.readium.r2.streamer.parser.epub.extensions.fromEpubHref internal object SmilParser { /* According to https://www.w3.org/publishing/epub3/epub-mediaoverlays.html#sec-overlays-content-conf diff --git a/readium/streamer/src/main/java/org/readium/r2/streamer/parser/epub/extensions/UrlExt.kt b/readium/streamer/src/main/java/org/readium/r2/streamer/parser/epub/extensions/UrlExt.kt deleted file mode 100644 index 4280496e76..0000000000 --- a/readium/streamer/src/main/java/org/readium/r2/streamer/parser/epub/extensions/UrlExt.kt +++ /dev/null @@ -1,20 +0,0 @@ -/* - * Copyright 2023 Readium Foundation. All rights reserved. - * Use of this source code is governed by the BSD-style license - * available in the top-level LICENSE file of the project. - */ - -package org.readium.r2.streamer.parser.epub.extensions - -import org.readium.r2.shared.util.Url - -/** - * According to the EPUB specification, the HREFs in the EPUB package must be valid URLs (so - * percent-encoded). Unfortunately, many EPUBs don't follow this rule, and use invalid HREFs such - * as `my chapter.html` or `/dir/my chapter.html`. - * - * As a workaround, we assume the HREFs are valid percent-encoded URLs, and fallback to decoded paths - * if we can't parse the URL. - */ -internal fun Url.Companion.fromEpubHref(href: String): Url? = - Url(href) ?: fromDecodedPath(href)