Skip to content

Commit 9cf9bf8

Browse files
tomas-sexenianiroquetaggallotti
authored
Initial PDFBox HTML support (#748)
* First wave of security fixes Issue: 102725 * Second wave of security fixes Alerts 236, 235, 234, 233, 231, 230, 229, 228, 227 and 226 * Fix "Poor Error Handling Return Inside Finally" cases reported by Fortify Issue: 102800 * Thrid wave of security fixes Fixes 225, 224, 223, 221, 220, 219, 218, 217, 216, 215, 214 and 213 * Use beta HttpClient.java code * Fixed code scanning alerts in proposed fixes * Initial PDFBox HTML support * Flatten multilist and restore line and rendering mode after after heading rendering * Code cleanup, ready to push * Fixes to heading and image rendering and text positioning --------- Co-authored-by: iroqueta <iroqueta@genexus.com> Co-authored-by: Gonzalo <gonzalogallotti@gmail.com>
1 parent 9dd7c0b commit 9cf9bf8

File tree

2 files changed

+246
-3
lines changed

2 files changed

+246
-3
lines changed

java/pom.xml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,11 @@
122122
<version>2.0.27</version>
123123
</dependency>
124124
<dependency>
125+
<groupId>org.jsoup</groupId>
126+
<artifactId>jsoup</artifactId>
127+
<version>1.16.1</version>
128+
</dependency>
129+
<dependency>
125130
<groupId>com.google.zxing</groupId>
126131
<artifactId>core</artifactId>
127132
<version>3.5.1</version>

java/src/main/java/com/genexus/reports/PDFReportPDFBox.java

Lines changed: 241 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,9 @@
55
import java.io.*;
66
import java.net.URL;
77
import java.util.ArrayList;
8+
import java.util.HashSet;
89
import java.util.List;
10+
import java.util.Set;
911
import java.util.concurrent.ConcurrentHashMap;
1012

1113
import com.genexus.CommonUtil;
@@ -19,6 +21,7 @@
1921
import com.google.zxing.BarcodeFormat;
2022
import com.google.zxing.common.BitMatrix;
2123
import com.google.zxing.oned.Code128Writer;
24+
2225
import org.apache.pdfbox.cos.*;
2326
import org.apache.pdfbox.io.IOUtils;
2427
import org.apache.pdfbox.pdmodel.*;
@@ -32,6 +35,14 @@
3235
import org.apache.pdfbox.pdmodel.interactive.viewerpreferences.PDViewerPreferences;
3336
import org.apache.pdfbox.util.Matrix;
3437

38+
import org.jsoup.Jsoup;
39+
import org.jsoup.nodes.Document;
40+
import org.jsoup.nodes.Element;
41+
import org.jsoup.nodes.Node;
42+
import org.jsoup.select.Elements;
43+
44+
import javax.imageio.IIOException;
45+
3546
public class PDFReportPDFBox extends GXReportPDFCommons{
3647
private PDRectangle pageSize;
3748
private PDFont baseFont;
@@ -49,6 +60,10 @@ public class PDFReportPDFBox extends GXReportPDFCommons{
4960
public int runDirection = 0;
5061
private int page;
5162

63+
private final float DEFAULT_PDFBOX_LEADING = 1.2f;
64+
65+
private Set<String> supportedHTMLTags = new HashSet<>();
66+
5267
static {
5368
log = org.apache.logging.log4j.LogManager.getLogger(PDFReportPDFBox.class);
5469
}
@@ -594,7 +609,36 @@ else if (valign == PDFReportPDFBox.VerticalAlign.BOTTOM.value())
594609
boolean autoResize = (align & 256) == 256;
595610

596611
if (htmlformat == 1) {
597-
log.info("As for now, HTML printing is not supported while generating reports using PDFBox");
612+
log.debug("WARNING: HTML rendering is not natively supported by PDFBOX 2.0.27. Handcrafted support is provided but it is not intended to cover all possible use cases");
613+
try {
614+
bottomAux = (float)convertScale(bottom);
615+
topAux = (float)convertScale(top);
616+
float drawingPageHeight = this.pageSize.getUpperRightY() - topMargin - bottomMargin;
617+
618+
float llx = leftAux + leftMargin;
619+
float lly = drawingPageHeight - bottomAux;
620+
float urx = rightAux + leftMargin;
621+
float ury = drawingPageHeight - topAux;
622+
623+
// Define the rectangle where the content will be displayed
624+
PDRectangle htmlRectangle = new PDRectangle();
625+
htmlRectangle.setLowerLeftX(llx);
626+
htmlRectangle.setLowerLeftY(lly);
627+
htmlRectangle.setUpperRightX(urx);
628+
htmlRectangle.setUpperRightY(ury);
629+
SpaceHandler spaceHandler = new SpaceHandler(htmlRectangle.getUpperRightY(), htmlRectangle.getHeight());
630+
631+
loadSupportedHTMLTags();
632+
633+
Document document = Jsoup.parse(sTxt);
634+
Elements allElements = document.getAllElements();
635+
for (Element element : allElements)
636+
if (this.supportedHTMLTags.contains(element.normalName()))
637+
processHTMLElement(cb, htmlRectangle, spaceHandler, element);
638+
639+
} catch (Exception e) {
640+
log.error("GxDrawText failed to print HTML text : ", e);
641+
}
598642
}
599643
else
600644
if (barcodeType != null){
@@ -801,11 +845,204 @@ else if (valign == PDFReportPDFBox.VerticalAlign.BOTTOM.value())
801845
}
802846
}
803847
}
804-
} catch (IOException ioe){
848+
} catch (Exception ioe){
805849
log.error("GxDrawText failed: ", ioe);
806850
}
807851
}
808852

853+
private void loadSupportedHTMLTags(){
854+
this.supportedHTMLTags.add("p");
855+
this.supportedHTMLTags.add("ol");
856+
this.supportedHTMLTags.add("ul");
857+
this.supportedHTMLTags.add("div");
858+
this.supportedHTMLTags.add("h1");
859+
this.supportedHTMLTags.add("h2");
860+
this.supportedHTMLTags.add("h3");
861+
this.supportedHTMLTags.add("h4");
862+
this.supportedHTMLTags.add("img");
863+
this.supportedHTMLTags.add("a");
864+
}
865+
866+
private void processHTMLElement(PDPageContentStream cb, PDRectangle htmlRectangle, SpaceHandler spaceHandler, Element blockElement) throws Exception{
867+
this.fontBold = false;
868+
String tagName = blockElement.normalName();
869+
PDFont htmlFont = PDType1Font.TIMES_ROMAN;
870+
871+
if (tagName.equals("div") || tagName.equals("span")) {
872+
for (Node child : blockElement.childNodes())
873+
if (child instanceof Element)
874+
processHTMLElement(cb, htmlRectangle, spaceHandler, (Element) child);
875+
}
876+
877+
if (spaceHandler.getAvailableSpace() <= 0){
878+
log.error("You ran out of available space while rendering HTML");
879+
return;
880+
}
881+
882+
float lineHeight = (PDType1Font.TIMES_ROMAN.getFontDescriptor().getFontBoundingBox().getHeight() / 1000 * fontSize) * DEFAULT_PDFBOX_LEADING;
883+
float leading = (float)(Double.valueOf(props.getGeneralProperty(Const.LEADING)).doubleValue());
884+
885+
float llx = htmlRectangle.getLowerLeftX();
886+
float lly = htmlRectangle.getLowerLeftY();
887+
float urx = htmlRectangle.getUpperRightX();
888+
889+
float fontSize = 16f; // Default font size for the HTML <p> tag
890+
cb.setFont(htmlFont, 16f);
891+
if (tagName.equals("h1")){
892+
cb.setFont(htmlFont, 32f);
893+
fontSize = 32f;
894+
tagName = "h";
895+
} else if (tagName.equals("h2")){
896+
cb.setFont(htmlFont, 24f);
897+
fontSize = 24f;
898+
tagName = "h";
899+
} else if (tagName.equals("h3")){
900+
cb.setFont(htmlFont, 18.72f);
901+
fontSize = 18.72f;
902+
tagName = "h";
903+
} else if (tagName.equals("h4")){
904+
cb.setFont(htmlFont, 16f);
905+
fontSize = 16.5f;
906+
tagName = "h";
907+
}
908+
909+
//fontsize / 2 is subtracted from the current Y position so that the rendered item fits within the specified rectangle in the canvas. This is because
910+
//PDFBox renders text from left to right and bottom to top
911+
spaceHandler.setCurrentYPosition(spaceHandler.getCurrentYPosition() - fontSize / 2);
912+
913+
if (tagName.equals("h")){
914+
this.fontBold = true;
915+
float lines = renderHTMLContent(cb, blockElement.text(), fontSize, llx, lly, urx, spaceHandler.getCurrentYPosition());
916+
float totalTextHeight = lineHeight * lines * DEFAULT_PDFBOX_LEADING * leading;
917+
spaceHandler.setCurrentYPosition(spaceHandler.getCurrentYPosition() - totalTextHeight);
918+
} else if (tagName.equals("p")) {
919+
float lines = renderHTMLContent(cb, blockElement.text(), fontSize, llx, lly, urx, spaceHandler.getCurrentYPosition());
920+
float totalTextHeight = lineHeight * lines * DEFAULT_PDFBOX_LEADING * leading;
921+
spaceHandler.setCurrentYPosition(spaceHandler.getCurrentYPosition() - totalTextHeight);
922+
} else if (tagName.equals("ul") || tagName.equals("ol")){
923+
int i = 0;
924+
for (Element listItem : blockElement.select("li")){
925+
String text = (tagName.equals("ul")) ? "• " + listItem.text() : i + ". " + listItem.text();
926+
i++;
927+
float lines = renderHTMLContent(cb, text, fontSize, llx, lly, urx, spaceHandler.getCurrentYPosition());
928+
float totalTextHeight = lineHeight * lines * DEFAULT_PDFBOX_LEADING;
929+
spaceHandler.setCurrentYPosition(spaceHandler.getCurrentYPosition() - totalTextHeight);
930+
}
931+
} else if (tagName.equals("a")){
932+
cb.setNonStrokingColor(new Color(0, 0, 255));
933+
float lines = renderHTMLContent(cb, blockElement.attr("href"), fontSize, llx, lly, urx, spaceHandler.getCurrentYPosition());
934+
float totalTextHeight = lineHeight * lines * DEFAULT_PDFBOX_LEADING * leading;
935+
spaceHandler.setCurrentYPosition(spaceHandler.getCurrentYPosition() - totalTextHeight);
936+
cb.setStrokingColor(new Color(0, 0, 0));
937+
} else if (tagName.equals("img")){
938+
String bitmap = blockElement.attr("src");
939+
float height = blockElement.attr("height") != "" ? Float.parseFloat(blockElement.attr("height")) : 0;
940+
float width = blockElement.attr("width") != "" ? Float.parseFloat(blockElement.attr("width")) : 0;
941+
942+
PDImageXObject image;
943+
944+
try {
945+
if (!NativeFunctions.isWindows() && new File(bitmap).isAbsolute() && bitmap.startsWith(httpContext.getStaticContentBase()))
946+
bitmap = bitmap.replace(httpContext.getStaticContentBase(), "");
947+
if (!new File(bitmap).isAbsolute() && !bitmap.toLowerCase().startsWith("http:") && !bitmap.toLowerCase().startsWith("https:")) {
948+
if (bitmap.startsWith(httpContext.getStaticContentBase()))
949+
bitmap = bitmap.replace(httpContext.getStaticContentBase(), "");
950+
image = PDImageXObject.createFromFile(defaultRelativePrepend + bitmap,document);
951+
if(image == null) {
952+
bitmap = webAppDir + bitmap;
953+
image = PDImageXObject.createFromFile(bitmap,document);
954+
}
955+
else
956+
bitmap = defaultRelativePrepend + bitmap;
957+
}
958+
else
959+
image = PDImageXObject.createFromFile(bitmap,document);
960+
} catch(java.lang.IllegalArgumentException | FileNotFoundException |IIOException e) {
961+
URL url= new java.net.URL(bitmap);
962+
image = PDImageXObject.createFromByteArray(document, IOUtils.toByteArray(url.openStream()),bitmap);
963+
}
964+
if (height == 0) height = image.getHeight();
965+
if (width == 0) width = image.getWidth();
966+
cb.drawImage(image, llx, spaceHandler.getCurrentYPosition() - height, width, height);
967+
spaceHandler.setCurrentYPosition(spaceHandler.getCurrentYPosition() - height - 10f);
968+
}
969+
970+
float availableSpace = spaceHandler.getCurrentYPosition() - lly;
971+
spaceHandler.setAvailableSpace(availableSpace);
972+
}
973+
974+
private class SpaceHandler {
975+
float currentYPosition;
976+
float availableSpace;
977+
978+
public SpaceHandler(float currentYPosition, float availableSpace) {
979+
this.currentYPosition = currentYPosition;
980+
this.availableSpace = availableSpace;
981+
}
982+
983+
public float getCurrentYPosition() {
984+
return currentYPosition;
985+
}
986+
987+
public void setCurrentYPosition(float currentYPosition) {
988+
this.currentYPosition = currentYPosition;
989+
}
990+
991+
public float getAvailableSpace() {
992+
return availableSpace;
993+
}
994+
995+
public void setAvailableSpace(float availableSpace) {
996+
this.availableSpace = availableSpace;
997+
}
998+
}
999+
1000+
private float renderHTMLContent(PDPageContentStream contentStream, String text, float fontSize, float llx, float lly, float urx, float ury) {
1001+
try {
1002+
PDFont defaultHTMLFont = PDType1Font.TIMES_ROMAN;
1003+
List<String> lines = new ArrayList<>();
1004+
String[] words = text.split(" ");
1005+
StringBuilder currentLine = new StringBuilder();
1006+
for (String word : words) {
1007+
float currentLineWidth = defaultHTMLFont.getStringWidth(currentLine + " " + word) / 1000 * fontSize;
1008+
if (currentLineWidth < urx - llx) {
1009+
if (currentLine.length() > 0) {
1010+
currentLine.append(" ");
1011+
}
1012+
currentLine.append(word);
1013+
} else {
1014+
lines.add(currentLine.toString());
1015+
currentLine.setLength(0);
1016+
currentLine.append(word);
1017+
}
1018+
}
1019+
lines.add(currentLine.toString());
1020+
1021+
float leading = lines.size() == 1 ? fontSize : DEFAULT_PDFBOX_LEADING * fontSize;
1022+
float startY = ury;
1023+
1024+
if (fontSize > 16f){
1025+
contentStream.setLineWidth(fontSize * 0.05f);
1026+
contentStream.setRenderingMode(RenderingMode.FILL_STROKE);
1027+
}
1028+
contentStream.beginText();
1029+
float lineHeight = (defaultHTMLFont.getFontDescriptor().getFontBoundingBox().getUpperRightY() - defaultHTMLFont.getFontDescriptor().getFontBoundingBox().getLowerLeftY())/ 1000 * fontSize;
1030+
contentStream.newLineAtOffset(llx, startY);
1031+
for (String line : lines) {
1032+
contentStream.showText(line);
1033+
startY = startY - leading - lineHeight;
1034+
contentStream.newLineAtOffset(0, startY);
1035+
}
1036+
contentStream.endText();
1037+
contentStream.setLineWidth(1f); // Default line width for PDFBox 2.0.27
1038+
contentStream.setRenderingMode(RenderingMode.FILL); // Default text rendering mode for PDFBox 2.0.27
1039+
return lines.size();
1040+
} catch (IOException ioe) {
1041+
log.error("failed to draw wrapped text: ", ioe);
1042+
return -1;
1043+
}
1044+
}
1045+
8091046
private void resolveTextStyling(PDPageContentStream contentStream, String text, float x, float y, boolean isWrapped){
8101047
try {
8111048
if (this.fontBold && this.fontItalic){
@@ -836,6 +1073,7 @@ private void resolveTextStyling(PDPageContentStream contentStream, String text,
8361073
contentStream.endText();
8371074
contentStream.setLineWidth(1f); // Default line width for PDFBox 2.0.27
8381075
contentStream.setRenderingMode(RenderingMode.FILL); // Default text rendering mode for PDFBox 2.0.27
1076+
contentStream.moveTo(x,y);
8391077
} catch (IOException ioe) {
8401078
log.error("failed to apply text styling: ", ioe);
8411079
}
@@ -861,7 +1099,7 @@ private void showWrappedTextAligned(PDPageContentStream contentStream, PDFont fo
8611099
}
8621100
lines.add(currentLine.toString());
8631101

864-
float leading = lines.size() == 1 ? fontSize : 1.2f * fontSize;
1102+
float leading = lines.size() == 1 ? fontSize : DEFAULT_PDFBOX_LEADING * fontSize;
8651103
float totalTextHeight = fontSize * lines.size() + leading * (lines.size() - 1);
8661104
float startY = lines.size() == 1 ? lly + (ury - lly - totalTextHeight) / 2 : lly + (ury - lly - totalTextHeight) / 2 + (lines.size() - 1) * (fontSize + leading) + font.getFontDescriptor().getDescent() / 1000 * fontSize;
8671105

0 commit comments

Comments
 (0)