Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Unescape URLs in EPUB 2 NCX and EPUB 3 navigation documents #98

Merged
merged 1 commit into from
May 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 52 additions & 0 deletions Source/VersOne.Epub.Test/Unit/Readers/Epub2NcxReaderTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,22 @@ public class Epub2NcxReaderTests
</ncx>
""";

private const string NCX_FILE_WITH_ESCAPED_CONTENT_SRC_ATTRIBUTE = """
<?xml version='1.0' encoding='utf-8'?>
<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/">
<head />
<docTitle />
<navMap>
<navPoint id="navpoint-1">
<navLabel>
<text>Chapter 1</text>
</navLabel>
<content src="chapter%31.html" />
</navPoint>
</navMap>
</ncx>
""";

private const string NCX_FILE_WITHOUT_CONTENT_SRC_ATTRIBUTE = """
<?xml version='1.0' encoding='utf-8'?>
<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/">
Expand Down Expand Up @@ -877,6 +893,42 @@ public async void ReadEpub2NcxAsyncWithoutNavLabelTextTest()
await TestFailingReadOperation(NCX_FILE_WITHOUT_NAVLABEL_TEXT_ELEMENT);
}

[Fact(DisplayName = "Reading an NCX file with a URI-escaped 'src' attribute in a 'content' XML element should succeed")]
public async void ReadEpub2NcxAsyncWithEscapedContentSrcTest()
{
Epub2Ncx expectedEpub2Ncx = new
(
filePath: NCX_FILE_PATH,
head: new Epub2NcxHead(),
docTitle: null,
docAuthors: null,
navMap: new Epub2NcxNavigationMap
(
items: new List<Epub2NcxNavigationPoint>()
{
new Epub2NcxNavigationPoint
(
id: "navpoint-1",
navigationLabels: new List<Epub2NcxNavigationLabel>()
{
new Epub2NcxNavigationLabel
(
text: "Chapter 1"
)
},
content: new Epub2NcxContent
(
source: "chapter1.html"
)
)
}
),
pageList: null,
navLists: null
);
await TestSuccessfulReadOperation(NCX_FILE_WITH_ESCAPED_CONTENT_SRC_ATTRIBUTE, expectedEpub2Ncx);
}

[Fact(DisplayName = "ReadEpub2NcxAsync should throw Epub2NcxException if a 'content' XML element has no 'src' attribute")]
public async void ReadEpub2NcxAsyncWithoutContentSrcTest()
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,20 @@ public class Epub3NavDocumentReaderTests
</html>
""";

private const string NAV_FILE_WITH_ESCAPED_HREF_IN_A_ELEMENT = """
<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops">
<body>
<nav epub:type="toc">
<ol>
<li>
<a href="chapter%31.html">Chapter 1</a>
</li>
</ol>
</nav>
</body>
</html>
""";

private static EpubPackage MinimalEpubPackageWithNav =>
new
(
Expand Down Expand Up @@ -402,6 +416,37 @@ public async void ReadEpub3NavDocumentAsyncWithEmptyLiElement()
await TestFailingReadOperation(NAV_FILE_WITH_EMPTY_LI_ELEMENT);
}

[Fact(DisplayName = "Reading a NAV file with a URI-escaped 'href' attribute in an 'a' XML element should succeed")]
public async void ReadEpub3NavDocumentAsyncWithEscapedAHrefTest()
{
Epub3NavDocument expectedEpub3NavDocument = new
(
filePath: NAV_FILE_PATH,
navs: new List<Epub3Nav>()
{
new Epub3Nav
(
type: Epub3StructuralSemanticsProperty.TOC,
ol: new Epub3NavOl
(
lis: new List<Epub3NavLi>()
{
new Epub3NavLi
(
anchor: new Epub3NavAnchor
(
href: "chapter1.html",
text: "Chapter 1"
)
)
}
)
)
}
);
await TestSuccessfulReadOperation(NAV_FILE_WITH_ESCAPED_HREF_IN_A_ELEMENT, expectedEpub3NavDocument);
}

private static async Task TestSuccessfulReadOperation(string navFileContent, Epub3NavDocument expectedEpub3NavDocument, EpubReaderOptions? epubReaderOptions = null)
{
TestZipFile testZipFile = CreateTestZipFileWithNavFile(navFileContent);
Expand Down
38 changes: 38 additions & 0 deletions Source/VersOne.Epub.Test/Unit/Readers/PackageReaderTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,17 @@ public class PackageReaderTests
</package>
""";

private const string OPF_FILE_WITH_ESCAPED_HREF_IN_MANIFEST_ITEM = $"""
<?xml version='1.0' encoding='UTF-8'?>
<package xmlns="http://www.idpf.org/2007/opf" version="3.0">
<metadata />
<manifest>
<item id="item-1" href="chapter%31.html" media-type="application/xhtml+xml" />
</manifest>
<spine />
</package>
""";

private const string OPF_FILE_WITHOUT_HREF_IN_MANIFEST_ITEM = $"""
<?xml version='1.0' encoding='UTF-8'?>
<package xmlns="http://www.idpf.org/2007/opf" version="3.0">
Expand Down Expand Up @@ -743,6 +754,33 @@ public async void ReadPackageWithoutManifestItemIdWithSkippingInvalidManifestIte
await TestSuccessfulReadOperationWithSkippingInvalidManifestItems(OPF_FILE_WITHOUT_ID_IN_MANIFEST_ITEM, MinimalEpub3Package);
}

[Fact(DisplayName = "Read an OPF package with a URI-escaped 'href' attribute in a manifest item XML node should succeed")]
public async void ReadPackageWithEscapedManifestItemHrefTest()
{
EpubPackage expectedPackage = new
(
uniqueIdentifier: null,
epubVersion: EpubVersion.EPUB_3,
metadata: new EpubMetadata(),
manifest: new EpubManifest
(
id: null,
items: new List<EpubManifestItem>()
{
new EpubManifestItem
(
id: "item-1",
href: "chapter1.html",
mediaType: "application/xhtml+xml"
)
}
),
spine: new EpubSpine(),
guide: null
);
await TestSuccessfulReadOperationWithSkippingInvalidManifestItems(OPF_FILE_WITH_ESCAPED_HREF_IN_MANIFEST_ITEM, expectedPackage);
}

[Fact(DisplayName = "Trying to read OPF package without 'href' attribute in a manifest item XML node should fail with EpubPackageException")]
public async void ReadPackageWithoutManifestItemHrefTest()
{
Expand Down
2 changes: 1 addition & 1 deletion Source/VersOne.Epub/Readers/Epub2NcxReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ private static Epub2NcxContent ReadNavigationContent(XElement navigationContentN
id = attributeValue;
break;
case "src":
source = attributeValue;
source = Uri.UnescapeDataString(attributeValue);
break;
}
}
Expand Down
2 changes: 1 addition & 1 deletion Source/VersOne.Epub/Readers/Epub3NavDocumentReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ private static Epub3NavAnchor ReadEpub3NavAnchor(XElement epub3NavAnchorNode)
switch (navAnchorNodeAttribute.GetLowerCaseLocalName())
{
case "href":
href = attributeValue;
href = Uri.UnescapeDataString(attributeValue);
break;
case "title":
title = attributeValue;
Expand Down
4 changes: 2 additions & 2 deletions Source/VersOne.Epub/Readers/NavigationReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ private static List<EpubNavigationItemRef> GetNavigationItems(EpubSchema epubSch
Epub2NcxNavigationLabel? firstNavigationLabel = navigationPoint.NavigationLabels.FirstOrDefault() ??
throw new Epub2NcxException($"Incorrect EPUB 2 NCX: navigation point \"{navigationPoint.Id}\" should contain at least one navigation label.");
string title = firstNavigationLabel.Text;
string source = Uri.UnescapeDataString(navigationPoint.Content.Source);
string source = navigationPoint.Content.Source;
if (!ContentPathUtils.IsLocalPath(source))
{
throw new Epub2NcxException($"Incorrect EPUB 2 NCX: content source \"{source}\" cannot be a remote resource.");
Expand Down Expand Up @@ -100,7 +100,7 @@ private static List<EpubNavigationItemRef> GetNavigationItems(EpubSchema epubSch
List<EpubNavigationItemRef> nestedItems = GetNavigationItems(epubSchema, epubContentRef, epub3NavLi.ChildOl, epub3NavigationBaseDirectoryPath);
if (navAnchor.Href != null)
{
string href = Uri.UnescapeDataString(navAnchor.Href);
string href = navAnchor.Href;
if (!ContentPathUtils.IsLocalPath(href))
{
throw new Epub3NavException($"Incorrect EPUB 3 navigation document: anchor href \"{href}\" cannot be a remote resource.");
Expand Down