From f1597aafff8778a3268975f0bbb989bed7cf1d85 Mon Sep 17 00:00:00 2001 From: Dan Fickle Date: Wed, 29 Jun 2016 22:57:21 +1000 Subject: [PATCH] For #28 - Let user specify custom text transformers. Text transformers are currently toUpper, toTitle and toLower. --- README.md | 2 + .../extend/FSTextTransformer.java | 8 ++ .../openhtmltopdf/layout/SharedContext.java | 29 +++++ .../com/openhtmltopdf/layout/TextUtil.java | 121 ++++++++++-------- .../demo/browser/BrowserPanel.java | 4 +- .../testcases/TestcaseRunner.java | 4 +- .../pdfboxout/PdfBoxRenderer.java | 116 +++++++++++++---- .../pdfboxout/PdfRendererBuilder.java | 58 ++++++++- .../bidi/support/ICUTransformers.java | 49 +++++++ 9 files changed, 298 insertions(+), 93 deletions(-) create mode 100644 openhtmltopdf-core/src/main/java/com/openhtmltopdf/extend/FSTextTransformer.java create mode 100644 openhtmltopdf-rtl-support/src/main/java/com/openhtmltopdf/bidi/support/ICUTransformers.java diff --git a/README.md b/README.md index 592867010..b6628bbca 100644 --- a/README.md +++ b/README.md @@ -50,6 +50,8 @@ CHANGELOG head - 0.0.1-RC5-SNAPSHOT ======== ++ BREAKING CHANGE: Changed bi-directional method names in builder to be more consistent. ++ [Add method to builder to specify custom text transformers](https://github.com/danfickle/openhtmltopdf/issues/28) + [Add method to builder to specify a custom line breaker](https://github.com/danfickle/openhtmltopdf/issues/25) Thanks @Magotchi 0.0.1-RC4 diff --git a/openhtmltopdf-core/src/main/java/com/openhtmltopdf/extend/FSTextTransformer.java b/openhtmltopdf-core/src/main/java/com/openhtmltopdf/extend/FSTextTransformer.java new file mode 100644 index 000000000..acb78a438 --- /dev/null +++ b/openhtmltopdf-core/src/main/java/com/openhtmltopdf/extend/FSTextTransformer.java @@ -0,0 +1,8 @@ +package com.openhtmltopdf.extend; + +/** + * Transforms text, such as making it title case. + */ +public interface FSTextTransformer { + public String transform(String in); +} diff --git a/openhtmltopdf-core/src/main/java/com/openhtmltopdf/layout/SharedContext.java b/openhtmltopdf-core/src/main/java/com/openhtmltopdf/layout/SharedContext.java index f964c3f01..5746b37ac 100644 --- a/openhtmltopdf-core/src/main/java/com/openhtmltopdf/layout/SharedContext.java +++ b/openhtmltopdf-core/src/main/java/com/openhtmltopdf/layout/SharedContext.java @@ -39,6 +39,7 @@ import com.openhtmltopdf.css.value.FontSpecification; import com.openhtmltopdf.extend.FSCanvas; import com.openhtmltopdf.extend.FSTextBreaker; +import com.openhtmltopdf.extend.FSTextTransformer; import com.openhtmltopdf.extend.FontContext; import com.openhtmltopdf.extend.FontResolver; import com.openhtmltopdf.extend.NamespaceHandler; @@ -130,6 +131,10 @@ public class SharedContext { private String replacementText = "#"; private FSTextBreaker lineBreaker = new UrlAwareLineBreakIterator(BreakIterator.getLineInstance(Locale.US)); + + private FSTextTransformer _unicodeToLowerTransformer = new TextUtil.DefaultToLowerTransformer(Locale.US); + private FSTextTransformer _unicodeToUpperTransformer = new TextUtil.DefaultToUpperTransformer(Locale.US); + private FSTextTransformer _unicodeToTitleTransformer = new TextUtil.DefaultToTitleTransformer(); public SharedContext() { } @@ -661,6 +666,30 @@ public void registerWithThread() { public void removeFromThread() { ThreadCtx.get().setSharedContext(null); } + + public FSTextTransformer getUnicodeToLowerTransformer() { + return this._unicodeToLowerTransformer; + } + + public FSTextTransformer getUnicodeToUpperTransformer() { + return this._unicodeToUpperTransformer; + } + + public FSTextTransformer getUnicodeToTitleTransformer() { + return this._unicodeToTitleTransformer; + } + + public void setUnicodeToLowerTransformer(FSTextTransformer tr) { + this._unicodeToLowerTransformer = tr; + } + + public void setUnicodeToUpperTransformer(FSTextTransformer tr) { + this._unicodeToUpperTransformer = tr; + } + + public void setUnicodeToTitleTransformer(FSTextTransformer tr) { + this._unicodeToTitleTransformer = tr; + } } /* diff --git a/openhtmltopdf-core/src/main/java/com/openhtmltopdf/layout/TextUtil.java b/openhtmltopdf-core/src/main/java/com/openhtmltopdf/layout/TextUtil.java index b56accee9..890391af4 100755 --- a/openhtmltopdf-core/src/main/java/com/openhtmltopdf/layout/TextUtil.java +++ b/openhtmltopdf-core/src/main/java/com/openhtmltopdf/layout/TextUtil.java @@ -19,9 +19,13 @@ */ package com.openhtmltopdf.layout; +import java.util.Locale; + import com.openhtmltopdf.css.constants.CSSName; import com.openhtmltopdf.css.constants.IdentValue; import com.openhtmltopdf.css.style.CalculatedStyle; +import com.openhtmltopdf.extend.FSTextTransformer; +import com.openhtmltopdf.util.ThreadCtx; import com.openhtmltopdf.util.Uu; @@ -31,28 +35,80 @@ * @author empty */ public class TextUtil { + public static class DefaultToUpperTransformer implements FSTextTransformer { + private final Locale lc; + + public DefaultToUpperTransformer(Locale lc) { + this.lc = lc; + } + + @Override + public String transform(String in) { + return in.toUpperCase(lc); + } + } + + public static class DefaultToLowerTransformer implements FSTextTransformer { + private final Locale lc; + + public DefaultToLowerTransformer(Locale lc) { + this.lc = lc; + } + + @Override + public String transform(String in) { + return in.toLowerCase(lc); + } + } + + /** + * A best effort implementation of title casing. Use the implementation in the rtl-support + * module for better results. + */ + public static class DefaultToTitleTransformer implements FSTextTransformer { + public DefaultToTitleTransformer() { } + + @Override + public String transform(String in) { + StringBuilder out = new StringBuilder(in.length()); + boolean makeTitle = true; + + for (int i = 0; i < in.length(); ) { + int cp = in.codePointAt(i); + + if (Character.isLetter(cp) && makeTitle) { + out.appendCodePoint(Character.toTitleCase(cp)); + makeTitle = false; + } else if (Character.isWhitespace(cp) || Character.isSpaceChar(cp)) { + out.appendCodePoint(cp); + makeTitle = true; + } else { + out.appendCodePoint(cp); + } + + i += Character.charCount(cp); + } + + return out.toString(); + } + } - /** - * Description of the Method - * - * @param text PARAM - * @param style - * @return Returns - */ public static String transformText( String text, CalculatedStyle style ) { IdentValue transform = style.getIdent( CSSName.TEXT_TRANSFORM ); + SharedContext ctx = ThreadCtx.get().sharedContext(); + if ( transform == IdentValue.LOWERCASE ) { - text = text.toLowerCase(); + text = ctx.getUnicodeToLowerTransformer().transform(text); } if ( transform == IdentValue.UPPERCASE ) { - text = text.toUpperCase(); + text = ctx.getUnicodeToUpperTransformer().transform(text); } if ( transform == IdentValue.CAPITALIZE ) { - text = capitalizeWords( text ); + text = ctx.getUnicodeToTitleTransformer().transform(text); } IdentValue fontVariant = style.getIdent( CSSName.FONT_VARIANT ); if ( fontVariant == IdentValue.SMALL_CAPS ) { - text = text.toUpperCase(); + text = ctx.getUnicodeToUpperTransformer().transform(text); } return text; } @@ -125,49 +181,6 @@ public static boolean isFirstLetterSeparatorChar( char c ) { return false; } } - - - /** - * Description of the Method - * - * @param text PARAM - * @return Returns - */ - private static String capitalizeWords( String text ) { - //Uu.p("start = -"+text+"-"); - if ( text.length() == 0 ) { - return text; - } - - StringBuffer sb = new StringBuffer(); - //Uu.p("text = -" + text + "-"); - - // do first letter - //Uu.p("first = " + text.substring(0,1)); - boolean cap = true; - for ( int i = 0; i < text.length(); i++ ) { - String ch = text.substring( i, i + 1 ); - //Uu.p("ch = " + ch + " cap = " + cap); - - - if ( cap ) { - sb.append( ch.toUpperCase() ); - } else { - sb.append( ch ); - } - cap = false; - if ( ch.equals( " " ) ) { - cap = true; - } - } - - //Uu.p("final = -"+sb.toString()+"-"); - if ( sb.toString().length() != text.length() ) { - Uu.p( "error! to strings arent the same length = -" + sb.toString() + "-" + text + "-" ); - } - return sb.toString(); - } - } /* diff --git a/openhtmltopdf-examples/src/main/java/com/openhtmltopdf/demo/browser/BrowserPanel.java b/openhtmltopdf-examples/src/main/java/com/openhtmltopdf/demo/browser/BrowserPanel.java index 5d4bb8895..455cc6c03 100755 --- a/openhtmltopdf-examples/src/main/java/com/openhtmltopdf/demo/browser/BrowserPanel.java +++ b/openhtmltopdf-examples/src/main/java/com/openhtmltopdf/demo/browser/BrowserPanel.java @@ -384,9 +384,9 @@ public void exportToPdfBox( String path ) os = new FileOutputStream(path); try { PdfRendererBuilder builder = new PdfRendererBuilder(); - builder.useBidiSplitter(new ICUBidiSplitter.ICUBidiSplitterFactory()); + builder.useUnicodeBidiSplitter(new ICUBidiSplitter.ICUBidiSplitterFactory()); builder.defaultTextDirection(TextDirection.LTR); - builder.useBidiReorderer(new ICUBidiReorderer()); + builder.useUnicodeBidiReorderer(new ICUBidiReorderer()); builder.withUri(manager.getBaseURL()); builder.toStream(os); builder.run(); diff --git a/openhtmltopdf-examples/src/main/java/com/openhtmltopdf/testcases/TestcaseRunner.java b/openhtmltopdf-examples/src/main/java/com/openhtmltopdf/testcases/TestcaseRunner.java index 76ba74216..d8b062409 100644 --- a/openhtmltopdf-examples/src/main/java/com/openhtmltopdf/testcases/TestcaseRunner.java +++ b/openhtmltopdf-examples/src/main/java/com/openhtmltopdf/testcases/TestcaseRunner.java @@ -41,8 +41,8 @@ public static void runTestCase(String testCaseFile) throws Exception { try { PdfRendererBuilder builder = new PdfRendererBuilder(); - builder.useBidiSplitter(new ICUBidiSplitter.ICUBidiSplitterFactory()); - builder.useBidiReorderer(new ICUBidiReorderer()); + builder.useUnicodeBidiSplitter(new ICUBidiSplitter.ICUBidiSplitterFactory()); + builder.useUnicodeBidiReorderer(new ICUBidiReorderer()); builder.defaultTextDirection(TextDirection.LTR); builder.withHtmlContent(html, TestcaseRunner.class.getResource("/testcases/").toString()); builder.toStream(outputStream); diff --git a/openhtmltopdf-pdfbox/src/main/java/com/openhtmltopdf/pdfboxout/PdfBoxRenderer.java b/openhtmltopdf-pdfbox/src/main/java/com/openhtmltopdf/pdfboxout/PdfBoxRenderer.java index e5a700a2d..40f6bbfed 100644 --- a/openhtmltopdf-pdfbox/src/main/java/com/openhtmltopdf/pdfboxout/PdfBoxRenderer.java +++ b/openhtmltopdf-pdfbox/src/main/java/com/openhtmltopdf/pdfboxout/PdfBoxRenderer.java @@ -61,6 +61,7 @@ import com.openhtmltopdf.css.style.CalculatedStyle; import com.openhtmltopdf.extend.FSCache; import com.openhtmltopdf.extend.FSTextBreaker; +import com.openhtmltopdf.extend.FSTextTransformer; import com.openhtmltopdf.extend.FSUriResolver; import com.openhtmltopdf.extend.HttpStreamFactory; import com.openhtmltopdf.extend.NamespaceHandler; @@ -176,16 +177,63 @@ public PdfBoxRenderer(float dotsPerPoint, int dotsPerPixel, boolean useSubsets, _sharedContext.setInteractive(false); } + static class UnicodeImplementation { + final BidiReorderer reorderer; + final BidiSplitterFactory splitterFactory; + final FSTextBreaker lineBreaker; + final FSTextTransformer toLowerTransformer; + final FSTextTransformer toUpperTransformer; + final FSTextTransformer toTitleTransformer; + final boolean textDirection; + + UnicodeImplementation(BidiReorderer reorderer, BidiSplitterFactory splitterFactory, + FSTextBreaker lineBreaker, FSTextTransformer toLower, FSTextTransformer toUpper, + FSTextTransformer toTitle, boolean textDirection) { + this.reorderer = reorderer; + this.splitterFactory = splitterFactory; + this.lineBreaker = lineBreaker; + this.toLowerTransformer = toLower; + this.toUpperTransformer = toUpper; + this.toTitleTransformer = toTitle; + this.textDirection = textDirection; + } + } + + static class PageDimensions { + final Float w; + final Float h; + final boolean isSizeInches; + + PageDimensions(Float w, Float h, boolean isSizeInches) { + this.w = w; + this.h = h; + this.isSizeInches = isSizeInches; + } + } + + static class BaseDocument { + final String html; + final Document document; + final File file; + final String uri; + final String baseUri; + + BaseDocument(String baseUri, String html, Document document, File file, String uri) { + this.html = html; + this.document = document; + this.file = file; + this.uri = uri; + this.baseUri = baseUri; + } + } + /** - * Do not use this method. It is constantly changing as options are added to the builder. - * @param lineBreaker + * This method is constantly changing as options are added to the builder. */ - public PdfBoxRenderer(boolean textDirection, boolean testMode, - boolean useSubsets, HttpStreamFactory httpStreamFactory, - BidiSplitterFactory splitterFactory, BidiReorderer reorderer, String html, - Document document, String baseUri, String uri, File file, - OutputStream os, FSUriResolver _resolver, FSCache _cache, SVGDrawer svgImpl, - Float pageWidth, Float pageHeight, boolean isPageSizeInches, float pdfVersion, String replacementText, FSTextBreaker lineBreaker) { + PdfBoxRenderer(BaseDocument doc, UnicodeImplementation unicode, + boolean useSubsets, HttpStreamFactory httpStreamFactory, + OutputStream os, FSUriResolver resolver, FSCache cache, SVGDrawer svgImpl, + PageDimensions pageSize, float pdfVersion, String replacementText, boolean testMode) { _pdfDoc = new PDDocument(); _pdfDoc.setVersion(pdfVersion); @@ -202,12 +250,12 @@ public PdfBoxRenderer(boolean textDirection, boolean testMode, userAgent.setHttpStreamFactory(httpStreamFactory); } - if (_resolver != null) { - userAgent.setUriResolver(_resolver); + if (resolver != null) { + userAgent.setUriResolver(resolver); } - if (_cache != null) { - userAgent.setExternalCache(_cache); + if (cache != null) { + userAgent.setExternalCache(cache); } _sharedContext = new SharedContext(); @@ -230,39 +278,51 @@ public PdfBoxRenderer(boolean textDirection, boolean testMode, _sharedContext.setPrint(true); _sharedContext.setInteractive(false); - this.getSharedContext().setDefaultPageSize(pageWidth, pageHeight, isPageSizeInches); + this.getSharedContext().setDefaultPageSize(pageSize.w, pageSize.h, pageSize.isSizeInches); if (replacementText != null) { this.getSharedContext().setReplacementText(replacementText); } - if (splitterFactory != null) { - this._splitterFactory = splitterFactory; + if (unicode.splitterFactory != null) { + this._splitterFactory = unicode.splitterFactory; } - if (reorderer != null) { - this._reorderer = reorderer; + if (unicode.reorderer != null) { + this._reorderer = unicode.reorderer; this._outputDevice.setBidiReorderer(_reorderer); } - if (lineBreaker != null) { - _sharedContext.setLineBreaker(lineBreaker); + if (unicode.lineBreaker != null) { + _sharedContext.setLineBreaker(unicode.lineBreaker); + } + + if (unicode.toLowerTransformer != null) { + _sharedContext.setUnicodeToLowerTransformer(unicode.toLowerTransformer); + } + + if (unicode.toUpperTransformer != null) { + _sharedContext.setUnicodeToUpperTransformer(unicode.toUpperTransformer); + } + + if (unicode.toTitleTransformer != null) { + _sharedContext.setUnicodeToTitleTransformer(unicode.toTitleTransformer); } - this._defaultTextDirection = textDirection ? BidiSplitter.RTL : BidiSplitter.LTR; + this._defaultTextDirection = unicode.textDirection ? BidiSplitter.RTL : BidiSplitter.LTR; - if (html != null) { - this.setDocumentFromStringP(html, baseUri); + if (doc.html != null) { + this.setDocumentFromStringP(doc.html, doc.baseUri); } - else if (document != null) { - this.setDocumentP(document, baseUri); + else if (doc.document != null) { + this.setDocumentP(doc.document, doc.baseUri); } - else if (uri != null) { - this.setDocumentP(uri); + else if (doc.uri != null) { + this.setDocumentP(doc.uri); } - else if (file != null) { + else if (doc.file != null) { try { - this.setDocumentP(file); + this.setDocumentP(doc.file); } catch (IOException e) { XRLog.exception("Problem trying to read input XHTML file", e); throw new RuntimeException("File IO problem", e); diff --git a/openhtmltopdf-pdfbox/src/main/java/com/openhtmltopdf/pdfboxout/PdfRendererBuilder.java b/openhtmltopdf-pdfbox/src/main/java/com/openhtmltopdf/pdfboxout/PdfRendererBuilder.java index 710bf3df7..b656625ad 100644 --- a/openhtmltopdf-pdfbox/src/main/java/com/openhtmltopdf/pdfboxout/PdfRendererBuilder.java +++ b/openhtmltopdf-pdfbox/src/main/java/com/openhtmltopdf/pdfboxout/PdfRendererBuilder.java @@ -9,9 +9,13 @@ import com.openhtmltopdf.bidi.BidiSplitterFactory; import com.openhtmltopdf.extend.FSCache; import com.openhtmltopdf.extend.FSTextBreaker; +import com.openhtmltopdf.extend.FSTextTransformer; import com.openhtmltopdf.extend.FSUriResolver; import com.openhtmltopdf.extend.HttpStreamFactory; import com.openhtmltopdf.extend.SVGDrawer; +import com.openhtmltopdf.pdfboxout.PdfBoxRenderer.BaseDocument; +import com.openhtmltopdf.pdfboxout.PdfBoxRenderer.PageDimensions; +import com.openhtmltopdf.pdfboxout.PdfBoxRenderer.UnicodeImplementation; public class PdfRendererBuilder { @@ -43,6 +47,9 @@ public static enum PageSizeUnits { MM, INCHES }; private float _pdfVersion = 1.7f; private String _replacementText; private FSTextBreaker _lineBreaker; + private FSTextTransformer _unicodeToUpperTransformer; + private FSTextTransformer _unicodeToLowerTransformer; + private FSTextTransformer _unicodeToTitleTransformer; /** * Run the XHTML/XML to PDF conversion and output to an output stream set by toStream. @@ -66,10 +73,14 @@ public void run() throws Exception { * @return */ public PdfBoxRenderer buildPdfRenderer() { - return new PdfBoxRenderer(_textDirection, _testMode, _useSubsets, _httpStreamFactory, _splitter, _reorderer, - _html, _document, _baseUri, _uri, _file, _os, _resolver, _cache, _svgImpl, - _pageWidth, _pageHeight, _isPageSizeInches, _pdfVersion, _replacementText, - _lineBreaker); + UnicodeImplementation unicode = new UnicodeImplementation(_reorderer, _splitter, _lineBreaker, + _unicodeToLowerTransformer, _unicodeToUpperTransformer, _unicodeToTitleTransformer, _textDirection); + + PageDimensions pageSize = new PageDimensions(_pageWidth, _pageHeight, _isPageSizeInches); + + BaseDocument doc = new BaseDocument(_baseUri, _html, _document, _file, _uri); + + return new PdfBoxRenderer(doc, unicode, _useSubsets, _httpStreamFactory, _os, _resolver, _cache, _svgImpl, pageSize, _pdfVersion, _replacementText, _testMode); } /** @@ -139,7 +150,7 @@ public PdfRendererBuilder useCache(FSCache cache) { * @param splitter * @return */ - public PdfRendererBuilder useBidiSplitter(BidiSplitterFactory splitter) { + public PdfRendererBuilder useUnicodeBidiSplitter(BidiSplitterFactory splitter) { this._splitter = splitter; return this; } @@ -149,7 +160,7 @@ public PdfRendererBuilder useBidiSplitter(BidiSplitterFactory splitter) { * @param reorderer * @return */ - public PdfRendererBuilder useBidiReorderer(BidiReorderer reorderer) { + public PdfRendererBuilder useUnicodeBidiReorderer(BidiReorderer reorderer) { this._reorderer = reorderer; return this; } @@ -269,8 +280,41 @@ public PdfRendererBuilder useReplacementText(String replacement) { * @param breaker * @return */ - public PdfRendererBuilder useLineBreaker(FSTextBreaker breaker) { + public PdfRendererBuilder useUnicodeLineBreaker(FSTextBreaker breaker) { this._lineBreaker = breaker; return this; } + + /** + * Specify a transformer to use to upper case strings. + * By default String::toUpperCase(Locale.US) is used. + * @param tr + * @return + */ + public PdfRendererBuilder useUnicodeToUpperTransformer(FSTextTransformer tr) { + this._unicodeToUpperTransformer = tr; + return this; + } + + /** + * Specify a transformer to use to lower case strings. + * By default String::toLowerCase(Locale.US) is used. + * @param tr + * @return + */ + public PdfRendererBuilder useUnicodeToLowerTransformer(FSTextTransformer tr) { + this._unicodeToLowerTransformer = tr; + return this; + } + + /** + * Specify a transformer to title case strings. + * By default a best effort implementation (non locale aware) is used. + * @param tr + * @return + */ + public PdfRendererBuilder useUnicodeToTitleTransformer(FSTextTransformer tr) { + this._unicodeToTitleTransformer = tr; + return this; + } } diff --git a/openhtmltopdf-rtl-support/src/main/java/com/openhtmltopdf/bidi/support/ICUTransformers.java b/openhtmltopdf-rtl-support/src/main/java/com/openhtmltopdf/bidi/support/ICUTransformers.java new file mode 100644 index 000000000..b6f65a5e6 --- /dev/null +++ b/openhtmltopdf-rtl-support/src/main/java/com/openhtmltopdf/bidi/support/ICUTransformers.java @@ -0,0 +1,49 @@ +package com.openhtmltopdf.bidi.support; + +import java.util.Locale; + +import com.ibm.icu.lang.UCharacter; +import com.openhtmltopdf.extend.FSTextTransformer; + +public class ICUTransformers { + private ICUTransformers() { } + + public static class ICUToLowerTransformer implements FSTextTransformer { + private final Locale lc; + + public ICUToLowerTransformer(Locale lc) { + this.lc = lc; + } + + @Override + public String transform(String in) { + return UCharacter.toLowerCase(lc, in); + } + } + + public static class ICUToUpperTransformer implements FSTextTransformer { + private final Locale lc; + + public ICUToUpperTransformer(Locale lc) { + this.lc = lc; + } + + @Override + public String transform(String in) { + return UCharacter.toUpperCase(lc, in); + } + } + + public static class ICUToTitleTransformer implements FSTextTransformer { + private final Locale lc; + + public ICUToTitleTransformer(Locale lc) { + this.lc = lc; + } + + @Override + public String transform(String in) { + return UCharacter.toTitleCase(lc, in, null, UCharacter.TITLECASE_NO_LOWERCASE); + } + } +}