From 89ff4ff6bd4f5f56277037b4f6215ae2b85d9117 Mon Sep 17 00:00:00 2001 From: Dan Fickle Date: Tue, 5 Apr 2016 17:49:23 +1000 Subject: [PATCH] For #18 - Further work on the pluggable URI resolver and cache. Also, explanation in readme. --- README.md | 33 +++++++++++++++++++ .../openhtmltopdf/extend/FSUriResolver.java | 31 ++++++++++++----- .../openhtmltopdf/swing/NaiveUserAgent.java | 20 ++++++++--- .../pdfboxout/PdfBoxRenderer.java | 18 +++++++--- .../pdfboxout/PdfBoxUserAgent.java | 3 -- .../pdfboxout/PdfRendererBuilder.java | 27 ++++++++++++++- 6 files changed, 110 insertions(+), 22 deletions(-) diff --git a/README.md b/README.md index f9d6f5567..2d345b96f 100644 --- a/README.md +++ b/README.md @@ -182,6 +182,37 @@ as simple as adding the following code: ```` Then use ````builder.useHttpStreamImplementation(new OkHttpStreamFactory())````. +CACHE BETWEEN RUNS - INTRODUCED IN RC3 +======= +By default, Open HTML to PDF should not cache anything between runs. However, it allows the user to plugin an external cache. It should +be noted that the URI received by the cache is already resolved (see below). Here is a simple external cache: +````java + public static class SimpleCache implements FSCache { + private final Map cache = new HashMap<>(); + + @Override + public Object get(FSCacheKey cacheKey) { + Object obj = cache.get(cacheKey); + System.out.println("Requesting: " + cacheKey.getUri() + " of type: " + cacheKey.getClazz().getName() + ", got it: " + (obj != null)); + return obj; + } + + @Override + public void put(FSCacheKey cacheKey, Object obj) { + System.out.println("Putting: " + cacheKey.getUri() + " of type: " + cacheKey.getClazz().getName()); + cache.put(cacheKey, obj); + } + } +```` +Of course, you may want to customize your cache by inspecting the URI or class name contained by cache key. Once you have a cache, you can set it +on the builder with ````builder.useCache(cache)````. + +URI RESOLVER - INTRODUCED IN RC3 +======= +By default, the code attempts to resolve relative URIs by using the document URI as a base URI. Absolute URIs are returned unchanged. If you wish to plugin your +own resolver, you can. This can not only resolve relative URIs but also resolve URIs in a private address space or even reject a URI. To use an external resolver +implement ````FSUriResolver```` and use it with ````builder.useUriResolver(new MyResolver())````. + LOGGING ======= Three options are provided by Open HTML to PDF. The default is to use java.util.logging. If you prefer to output using log4j or slf4j, adapters are provided: @@ -224,6 +255,8 @@ CHANGELOG head - 0.0.1-RC3-SNAPSHOT ======== ++ [Added support for plugging in an external URI resolver](https://github.com/danfickle/openhtmltopdf/issues/18) ++ [Added support for plugging in an external cache](https://github.com/danfickle/openhtmltopdf/issues/18) + [Added support for font fallback for Java2D](https://github.com/danfickle/openhtmltopdf/issues/10) Thanks @willamette + [Fixed crash issue when document contained CDATA sections](https://github.com/danfickle/openhtmltopdf/issues/16) Thanks @hiddendog diff --git a/openhtmltopdf-core/src/main/java/com/openhtmltopdf/extend/FSUriResolver.java b/openhtmltopdf-core/src/main/java/com/openhtmltopdf/extend/FSUriResolver.java index ac23e8bc4..8a883f047 100644 --- a/openhtmltopdf-core/src/main/java/com/openhtmltopdf/extend/FSUriResolver.java +++ b/openhtmltopdf-core/src/main/java/com/openhtmltopdf/extend/FSUriResolver.java @@ -1,14 +1,27 @@ package com.openhtmltopdf.extend; - -/** - * Used to find a uri that may be relative to the BaseURL. - * The returned value will always only be used via methods in the same - * implementation of this interface, therefore may be a private uri-space. - * - * @param uri an absolute or relative (to baseURL) uri to be resolved. - * @return the full uri in uri-spaces known to the current implementation. - */ public interface FSUriResolver { + + /** + * Used to find a uri that may be relative to the BaseURL. + * The returned value will always only be used via methods in the same + * implementation of this interface, therefore may be a private uri-space. + * + * @param uri an absolute or relative (to baseURL) uri to be resolved. + * @return the full uri in uri-spaces known to the current implementation. + */ public String resolveURI(String uri); + + /** + * Does not need to be a correct URL, only an identifier that the + * implementation can resolve. + * + * @param url A URL against which relative references can be resolved. + */ + public void setBaseURL(String uri); + + /** + * @return the base uri, possibly in the implementations private uri-space + */ + public String getBaseURL(); } diff --git a/openhtmltopdf-core/src/main/java/com/openhtmltopdf/swing/NaiveUserAgent.java b/openhtmltopdf-core/src/main/java/com/openhtmltopdf/swing/NaiveUserAgent.java index e52fc044c..b6e4907cf 100644 --- a/openhtmltopdf-core/src/main/java/com/openhtmltopdf/swing/NaiveUserAgent.java +++ b/openhtmltopdf-core/src/main/java/com/openhtmltopdf/swing/NaiveUserAgent.java @@ -76,7 +76,6 @@ public class NaiveUserAgent implements UserAgentCallback, DocumentListener { protected final LinkedHashMap _imageCache = new LinkedHashMap(); private final FSUriResolver DEFAULT_URI_RESOLVER = new DefaultUriResolver(this); - private String _baseURL; protected HttpStreamFactory _streamFactory = new DefaultHttpStreamFactory(); protected FSCache _externalCache = new NullFSCache(false); protected FSUriResolver _resolver = DEFAULT_URI_RESOLVER; @@ -420,12 +419,13 @@ public boolean isVisited(String uri) { */ @Override public void setBaseURL(String url) { - _baseURL = url; + _resolver.setBaseURL(url); } public static class DefaultUriResolver implements FSUriResolver { private final NaiveUserAgent _agent; + private String _baseURI; private DefaultUriResolver(NaiveUserAgent agent) { this._agent = agent; @@ -477,17 +477,27 @@ public String resolveURI(String uri) { if (!result.isAbsolute()) { XRLog.load(uri + " is not a URL; may be relative. Testing using parent URL " - + _agent._baseURL); + + _agent.getBaseURL()); result = new URI(_agent.getBaseURL()).resolve(result); } ret = result.toString(); } catch (URISyntaxException e) { XRLog.exception("The default NaiveUserAgent cannot resolve the URL " - + uri + " with base URL " + _agent._baseURL); + + uri + " with base URL " + _agent.getBaseURL()); } return ret; } + + @Override + public void setBaseURL(String uri) { + this._baseURI = uri; + } + + @Override + public String getBaseURL() { + return this._baseURI; + } } /** @@ -495,7 +505,7 @@ public String resolveURI(String uri) { */ @Override public String getBaseURL() { - return _baseURL; + return _resolver.getBaseURL(); } @Override diff --git a/openhtmltopdf-pdfbox/src/main/java/com/openhtmltopdf/pdfboxout/PdfBoxRenderer.java b/openhtmltopdf-pdfbox/src/main/java/com/openhtmltopdf/pdfboxout/PdfBoxRenderer.java index 9a56f0951..21de4da0c 100644 --- a/openhtmltopdf-pdfbox/src/main/java/com/openhtmltopdf/pdfboxout/PdfBoxRenderer.java +++ b/openhtmltopdf-pdfbox/src/main/java/com/openhtmltopdf/pdfboxout/PdfBoxRenderer.java @@ -58,6 +58,8 @@ import com.openhtmltopdf.bidi.SimpleBidiReorderer; import com.openhtmltopdf.context.StyleReference; import com.openhtmltopdf.css.style.CalculatedStyle; +import com.openhtmltopdf.extend.FSCache; +import com.openhtmltopdf.extend.FSUriResolver; import com.openhtmltopdf.extend.HttpStreamFactory; import com.openhtmltopdf.extend.NamespaceHandler; import com.openhtmltopdf.extend.UserInterface; @@ -101,10 +103,10 @@ public class PdfBoxRenderer { private OutputStream _os; public PdfBoxRenderer(boolean testMode) { - this(DEFAULT_DOTS_PER_POINT, DEFAULT_DOTS_PER_PIXEL, true, testMode, null); + this(DEFAULT_DOTS_PER_POINT, DEFAULT_DOTS_PER_PIXEL, true, testMode, null, null, null); } - public PdfBoxRenderer(float dotsPerPoint, int dotsPerPixel, boolean useSubsets, boolean testMode, HttpStreamFactory factory) { + public PdfBoxRenderer(float dotsPerPoint, int dotsPerPixel, boolean useSubsets, boolean testMode, HttpStreamFactory factory, FSUriResolver _resolver, FSCache _cache) { _pdfDoc = new PDDocument(); _dotsPerPoint = dotsPerPoint; @@ -113,10 +115,18 @@ public PdfBoxRenderer(float dotsPerPoint, int dotsPerPixel, boolean useSubsets, _outputDevice.setWriter(_pdfDoc); PdfBoxUserAgent userAgent = new PdfBoxUserAgent(_outputDevice); + if (factory != null) { userAgent.setHttpStreamFactory(factory); } + if (_resolver != null) { + userAgent.setUriResolver(_resolver); + } + + if (_cache != null) { + userAgent.setExternalCache(_cache); + } _sharedContext = new SharedContext(); _sharedContext.setUserAgentCallback(userAgent); @@ -141,8 +151,8 @@ public PdfBoxRenderer(boolean textDirection, boolean testMode, boolean useSubsets, HttpStreamFactory httpStreamFactory, BidiSplitterFactory splitterFactory, BidiReorderer reorderer, String html, Document document, String baseUri, String uri, File file, - OutputStream os) { - this(DEFAULT_DOTS_PER_POINT, DEFAULT_DOTS_PER_PIXEL, useSubsets, testMode, httpStreamFactory); + OutputStream os, FSUriResolver _resolver, FSCache _cache) { + this(DEFAULT_DOTS_PER_POINT, DEFAULT_DOTS_PER_PIXEL, useSubsets, testMode, httpStreamFactory, _resolver, _cache); if (splitterFactory != null) { this.setBidiSplitter(splitterFactory); diff --git a/openhtmltopdf-pdfbox/src/main/java/com/openhtmltopdf/pdfboxout/PdfBoxUserAgent.java b/openhtmltopdf-pdfbox/src/main/java/com/openhtmltopdf/pdfboxout/PdfBoxUserAgent.java index 690c9c724..37d13bf02 100644 --- a/openhtmltopdf-pdfbox/src/main/java/com/openhtmltopdf/pdfboxout/PdfBoxUserAgent.java +++ b/openhtmltopdf-pdfbox/src/main/java/com/openhtmltopdf/pdfboxout/PdfBoxUserAgent.java @@ -26,7 +26,6 @@ import java.net.URI; import java.util.Locale; -import com.openhtmltopdf.extend.FSImage; import com.openhtmltopdf.layout.SharedContext; import com.openhtmltopdf.resource.ImageResource; import com.openhtmltopdf.swing.FSCacheKey; @@ -35,8 +34,6 @@ import com.openhtmltopdf.util.XRLog; public class PdfBoxUserAgent extends NaiveUserAgent { - private static final int IMAGE_CACHE_CAPACITY = 32; - private SharedContext _sharedContext; private final PdfBoxOutputDevice _outputDevice; diff --git a/openhtmltopdf-pdfbox/src/main/java/com/openhtmltopdf/pdfboxout/PdfRendererBuilder.java b/openhtmltopdf-pdfbox/src/main/java/com/openhtmltopdf/pdfboxout/PdfRendererBuilder.java index ac4adaf3c..7016e06c3 100644 --- a/openhtmltopdf-pdfbox/src/main/java/com/openhtmltopdf/pdfboxout/PdfRendererBuilder.java +++ b/openhtmltopdf-pdfbox/src/main/java/com/openhtmltopdf/pdfboxout/PdfRendererBuilder.java @@ -7,6 +7,8 @@ import com.openhtmltopdf.bidi.BidiReorderer; import com.openhtmltopdf.bidi.BidiSplitterFactory; +import com.openhtmltopdf.extend.FSCache; +import com.openhtmltopdf.extend.FSUriResolver; import com.openhtmltopdf.extend.HttpStreamFactory; import com.openhtmltopdf.swing.NaiveUserAgent; @@ -26,6 +28,8 @@ public static enum TextDirection { RTL, LTR; } private String _uri; private File _file; private OutputStream _os; + private FSUriResolver _resolver; + private FSCache _cache; /** * Run the XHTML/XML to PDF conversion and output to an output stream set by toStream. @@ -42,7 +46,7 @@ public void run() throws Exception { * @return */ public PdfBoxRenderer buildPdfRenderer() { - return new PdfBoxRenderer(_textDirection, _testMode, _useSubsets, _httpStreamFactory, _splitter, _reorderer, _html, _document, _baseUri, _uri, _file, _os); + return new PdfBoxRenderer(_textDirection, _testMode, _useSubsets, _httpStreamFactory, _splitter, _reorderer, _html, _document, _baseUri, _uri, _file, _os, _resolver, _cache); } /** @@ -86,6 +90,27 @@ public PdfRendererBuilder useHttpStreamImplementation(HttpStreamFactory factory) return this; } + /** + * Provides a uri resolver to resolve relative uris or private uri schemes. + * @param resolver + * @return + */ + public PdfRendererBuilder useUriResolver(FSUriResolver resolver) { + this._resolver = resolver; + return this; + } + + /** + * Provides an external cache which can choose to cache items between runs, + * such as fonts or logo images. + * @param cache + * @return + */ + public PdfRendererBuilder useCache(FSCache cache) { + this._cache = cache; + return this; + } + /** * Provides a text splitter to split text into directional runs. Does nothing by default. * @param splitter