Skip to content

Commit

Permalink
feat: extend proper URL checking to more places (like CSS)
Browse files Browse the repository at this point in the history
This commit:
- extracts the URL checking logic from `BaseURLHandler` (SAX handler)
  to its own independent class `URLChecker`
- applies the same URL-checking logic to CSS as was used in XML, using
  the new `URLChecker`
- applies the URL-checking logic to the few places were it wasn't the
  case already (in XML handlers)
  • Loading branch information
rdeltour committed Nov 27, 2022
1 parent 5b5391f commit a3c736d
Show file tree
Hide file tree
Showing 7 changed files with 171 additions and 198 deletions.
39 changes: 13 additions & 26 deletions src/main/java/com/adobe/epubcheck/css/CSSHandler.java
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
package com.adobe.epubcheck.css;

import java.util.EnumSet;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
Expand All @@ -17,6 +19,7 @@
import org.idpf.epubcheck.util.css.CssGrammar.CssSelector;
import org.idpf.epubcheck.util.css.CssGrammar.CssURI;
import org.idpf.epubcheck.util.css.CssLocation;
import org.w3c.epubcheck.url.URLChecker;

import com.adobe.epubcheck.api.EPUBLocation;
import com.adobe.epubcheck.api.Report;
Expand All @@ -34,7 +37,6 @@
import com.google.common.base.CharMatcher;
import com.google.common.collect.Sets;

import io.mola.galimatias.GalimatiasParseException;
import io.mola.galimatias.URL;

public class CSSHandler implements CssContentHandler, CssErrorHandler
Expand All @@ -47,6 +49,10 @@ public class CSSHandler implements CssContentHandler, CssErrorHandler
int startingColumnNumber = 0;
static final CharMatcher SPACE_AND_QUOTES = CharMatcher.anyOf(" \t\n\r\f\"'").precomputed();

// map to store parsed URLs
Map<String, URL> parsedURLs = new HashMap<>();
final URLChecker urlChecker;

// vars for font-face info
String fontFamily;
String fontStyle;
Expand All @@ -66,6 +72,7 @@ public CSSHandler(ValidationContext context)
this.xrefChecker = context.xrefChecker.orNull();
this.report = context.report;
this.version = context.version;
this.urlChecker = new URLChecker(context);
}

private EPUBLocation getCorrectedEPUBLocation(int lineNumber, int columnNumber, String details)
Expand Down Expand Up @@ -314,20 +321,7 @@ else if (propertyName.equals("src"))
{
if (construct.getType() == CssConstruct.Type.URI)
{
fontURI = ((CssURI) construct).toUriString();

// TODO implement more URL checks (like in BaseURLHandler)
URL fontURL = null;
try
{
fontURL = context.url.resolve(fontURI);
} catch (GalimatiasParseException e)
{
report.message(MessageId.RSC_020,
getCorrectedEPUBLocation(declaration.getLocation().getLine(),
declaration.getLocation().getColumn(), declaration.toCssString()),
fontURI, e.getLocalizedMessage());
}
URL fontURL = parsedURLs.get(((CssURI) construct).toUriString());
if (fontURL != null)
{
// check font mimetypes
Expand All @@ -348,7 +342,7 @@ else if (version == EPUBVersion.VERSION_3)
report.message(MessageId.CSS_007,
getCorrectedEPUBLocation(declaration.getLocation().getLine(),
declaration.getLocation().getColumn(), declaration.toCssString()),
fontURI, fontMimeType);
fontURL, fontMimeType);
}
}

Expand Down Expand Up @@ -388,17 +382,10 @@ private void resolveAndRegister(String uriString, int line, int col, String cssC
// we ignore this case
if (!uriString.startsWith("#"))
{
// Check the URL once and store the parsed URL for later reference
URL url = urlChecker.checkURL(uriString, getCorrectedEPUBLocation(line, col, cssContext));
parsedURLs.put(uriString, url);

// TODO implement more URL checks (like in BaseURLHandler)
URL url = null;
try
{
url = context.url.resolve(uriString);
} catch (GalimatiasParseException e)
{
report.message(MessageId.RSC_020, getCorrectedEPUBLocation(line, col, cssContext),
uriString, e.getLocalizedMessage());
}
if (url != null)
{
xrefChecker.registerReference(url, type, getCorrectedEPUBLocation(line, col, cssContext));
Expand Down
26 changes: 7 additions & 19 deletions src/main/java/com/adobe/epubcheck/ocf/OCFContainerFileHandler.java
Original file line number Diff line number Diff line change
Expand Up @@ -82,19 +82,12 @@ else if (fullPath.trim().isEmpty())
return;
}

try
// Parse the rootfile URL
URL rootfileURL = checkURL(fullPath);
if (rootfileURL != null)
{
// Parse the rootfile URL
URL rootfileURL = URL.parse(baseURL(), fullPath);

// Register the parsed rootfile entry to the data model
state.addRootfile(mediaType, rootfileURL);

} catch (GalimatiasParseException e)
{
// FIXME 2022 - test this is reported
report.message(MessageId.RSC_020, location(), fullPath);
return;
}
}

Expand All @@ -107,19 +100,14 @@ private void processMappingDoc()
&& !Strings.nullToEmpty(href).trim().isEmpty())
{

try
// Parse the href attribute against the container root URL
URL mappingDocURL = checkURL(href);
if (mappingDocURL != null)
{
// Parse the href attribute against the container root URL
URL mappingDocURL = URL.parse(baseURL(), href);

// Register the parsed mapping document entry to the data model
state.addMappingDocument(mappingDocURL);
} catch (GalimatiasParseException e)
{
// FIXME 2022 - test this is reported
report.message(MessageId.RSC_020, location(), href);
return;
}

}
}

Expand Down
16 changes: 2 additions & 14 deletions src/main/java/com/adobe/epubcheck/opf/OPFHandler.java
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Maps;

import io.mola.galimatias.GalimatiasParseException;
import io.mola.galimatias.URL;

public class OPFHandler extends XMLHandler
Expand Down Expand Up @@ -304,22 +303,11 @@ else if (name.equals("reference"))
String href = e.getAttribute("href");
if (href != null && context.xrefChecker.isPresent())
{

// FIXME next test URL string is conforming, better test remote URLs
if (href.matches("^[^:/?#]+://.*"))
{
URL url = checkURL(href);
if (context.isRemote(url)) {
report.info(path, FeatureEnum.REFERENCE, href);
}

URL url;
try
{
url = baseURL().resolve(href);
} catch (GalimatiasParseException e1)
{
report.message(MessageId.RSC_020, location(), href);
return;
}
try
{
context.xrefChecker.get().registerReference(url, XRefChecker.Type.GENERIC, location());
Expand Down
27 changes: 8 additions & 19 deletions src/main/java/com/adobe/epubcheck/opf/OPFHandler30.java
Original file line number Diff line number Diff line change
Expand Up @@ -47,15 +47,15 @@
import static com.adobe.epubcheck.vocab.PackageVocabs.META_VOCAB;
import static com.adobe.epubcheck.vocab.PackageVocabs.META_VOCAB_URI;

import java.net.URI;
import java.net.URISyntaxException;
import java.util.Deque;
import java.util.IllformedLocaleException;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;

import org.w3c.epubcheck.url.URLUtils;

import com.adobe.epubcheck.api.EPUBLocation;
import com.adobe.epubcheck.api.QuietReport;
import com.adobe.epubcheck.messages.MessageId;
Expand Down Expand Up @@ -403,22 +403,22 @@ private List<String> processCollectionRole(String roleAtt)
ImmutableList.Builder<String> rolesBuilder = ImmutableList.builder();
for (String role : TOKENIZER.split(Strings.nullToEmpty(roleAtt)))
{
if (role.matches("^[^:/?#]+://.*"))
if (URLUtils.isAbsoluteURLString(role))
{
// Role is an absolute IRI
// check that the host component doesn't contain 'idpf.org'
try
{
URI uri = new URI(role);
if (uri.getHost() != null && uri.getHost().contains("idpf.org"))
URL url = URL.parse(role);
if (url.authority() != null && url.authority().contains("idpf.org"))
{
report.message(MessageId.OPF_069, location(), role);
}
else
{
rolesBuilder.add(role);
}
} catch (URISyntaxException e)
} catch (GalimatiasParseException e)
{
report.message(MessageId.OPF_070, location(), role);
}
Expand Down Expand Up @@ -448,22 +448,11 @@ private void processLink()
if (href != null)
{ // check by schema

// FIXME next test URL string is conforming, better test remote URLs
if (href.matches("^[^:/?#]+://.*"))
{
URL url = checkURL(href);
if (context.isRemote(url)) {
report.info(path, FeatureEnum.REFERENCE, href);
}

URL url;
try
{
url = baseURL().resolve(href);
} catch (GalimatiasParseException e1)
{
report.message(MessageId.RSC_020, location(), href);
return;
}

if (context.xrefChecker.isPresent())
{
context.xrefChecker.get().registerReference(url, Type.LINK, location());
Expand Down
Loading

0 comments on commit a3c736d

Please sign in to comment.