diff --git a/asciidoctorj-pdf/src/test/java/org/asciidoctor/WhenBackendIsPdf.java b/asciidoctorj-pdf/src/test/java/org/asciidoctor/WhenBackendIsPdf.java
index 9e6349d..685a097 100644
--- a/asciidoctorj-pdf/src/test/java/org/asciidoctor/WhenBackendIsPdf.java
+++ b/asciidoctorj-pdf/src/test/java/org/asciidoctor/WhenBackendIsPdf.java
@@ -1,5 +1,6 @@
package org.asciidoctor;
+import org.apache.pdfbox.pdmodel.graphics.color.PDColor;
import org.asciidoctor.util.RougeColors;
import org.asciidoctor.util.pdf.ColorsProcessor;
import org.asciidoctor.util.pdf.ImageProcessor;
@@ -55,6 +56,7 @@ public void pdf_source_code_should_be_highlighted() throws IOException {
ColorsProcessor colorsProcessor = new ColorsProcessor("program", "System.out.println", "printHello", "HelloWorld", "
", "else", "Math.sqrt");
colorsProcessor.parse(outputFile1.getAbsolutePath());
Map> colors = colorsProcessor.getColors();
+
assertThat(colors.get("program").get(0), equalTo(RougeColors.GREY));
assertThat(colors.get("System.out.println").get(0), equalTo(RougeColors.LIGHT_BLUE));
assertThat(colors.get("printHello").get(0), equalTo(RougeColors.DARK_BLUE));
@@ -101,9 +103,8 @@ public void pdf_text_should_be_hyphenated_english() throws IOException {
private void removeFileIfItExists(File file) throws IOException {
if (file.exists()) {
if (!file.delete()) {
- throw new IOException("can't delete file");
+ throw new IOException("Can't delete file");
}
}
}
-
}
diff --git a/asciidoctorj-pdf/src/test/java/org/asciidoctor/util/RougeColors.java b/asciidoctorj-pdf/src/test/java/org/asciidoctor/util/RougeColors.java
index 3855411..236c941 100644
--- a/asciidoctorj-pdf/src/test/java/org/asciidoctor/util/RougeColors.java
+++ b/asciidoctorj-pdf/src/test/java/org/asciidoctor/util/RougeColors.java
@@ -7,7 +7,7 @@
*
* @author abelsromero
*/
-public class RougeColors {
+public final class RougeColors {
public static final Color GREEN = new Color(0,136,0);
diff --git a/asciidoctorj-pdf/src/test/java/org/asciidoctor/util/pdf/ColorsProcessor.java b/asciidoctorj-pdf/src/test/java/org/asciidoctor/util/pdf/ColorsProcessor.java
index d382bde..76bacd0 100644
--- a/asciidoctorj-pdf/src/test/java/org/asciidoctor/util/pdf/ColorsProcessor.java
+++ b/asciidoctorj-pdf/src/test/java/org/asciidoctor/util/pdf/ColorsProcessor.java
@@ -1,22 +1,23 @@
package org.asciidoctor.util.pdf;
+import org.apache.pdfbox.Loader;
+import org.apache.pdfbox.contentstream.operator.color.*;
import org.apache.pdfbox.pdmodel.PDDocument;
-import org.apache.pdfbox.pdmodel.PDPage;
-import org.apache.pdfbox.pdmodel.common.PDStream;
-import org.apache.pdfbox.util.PDFTextStripper;
-import org.apache.pdfbox.util.ResourceLoader;
-import org.apache.pdfbox.util.TextPosition;
+import org.apache.pdfbox.pdmodel.graphics.color.PDColor;
+import org.apache.pdfbox.text.PDFTextStripper;
+import org.apache.pdfbox.text.TextPosition;
import java.awt.*;
-import java.io.IOException;
-import java.util.*;
+import java.io.*;
import java.util.List;
+import java.util.*;
/**
* Parses a PDF document looking for certain words, if found it stores the
* associated colors.
- *
+ *
* Note: currently stores the color of the last character in fact.
+ * Based on https://svn.apache.org/viewvc/pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/util/PrintTextColors.java?revision=1904918&view=markup
*
* @author abelsromero
*/
@@ -29,46 +30,29 @@ public class ColorsProcessor extends PDFTextStripper {
/**
* List of words mapped to the different colors in which they appear
*/
- private Map> colors = new HashMap>();
+ private Map> colors = new HashMap<>();
- /**
- * Constructor
- *
- * @param words Words to look for into the document
- *
- * @throws java.io.IOException If there is an error loading text stripper properties.
- */
- public ColorsProcessor(String... words) throws IOException {
- super(ResourceLoader.loadProperties(
- "pdfbox/resources/ColorsProcessor.properties", true));
- super.setSortByPosition(true);
+ public ColorsProcessor(String... words) {
+ addOperator(new SetStrokingColorSpace(this));
+ addOperator(new SetNonStrokingColorSpace(this));
+ addOperator(new SetStrokingDeviceCMYKColor(this));
+ addOperator(new SetNonStrokingDeviceCMYKColor(this));
+ addOperator(new SetNonStrokingDeviceRGBColor(this));
+ addOperator(new SetStrokingDeviceRGBColor(this));
+ addOperator(new SetNonStrokingDeviceGrayColor(this));
+ addOperator(new SetStrokingDeviceGrayColor(this));
+ addOperator(new SetStrokingColor(this));
+ addOperator(new SetStrokingColorN(this));
+ addOperator(new SetNonStrokingColor(this));
+ addOperator(new SetNonStrokingColorN(this));
+ setSortByPosition(true);
this.words = Arrays.asList(words);
}
- /**
- * Parses a document extracting the colors for the specified words in
- * the constructor
- *
- * @param filename PDF document path
- */
- public void parse (String filename) throws IOException {
- PDDocument document = null;
- try {
- document = PDDocument.load(filename, false);
- List allPages = document.getDocumentCatalog().getAllPages();
- for( int i=0; i
* Note: \00A0: non break space
*/
private static final List TERMINALS = Arrays.asList(" ", "\n", "\t", "(", ")", "\u00A0");
/**
* Processes text events.
- *
+ *
* Stores characters in a buffer until a terminal symbol is found
* (e.g. space), then treats the characters stored as a single word.
*
* @param text The text to be processed
*/
@Override
- protected void processTextPosition( TextPosition text ) {
- String chars = text.getCharacter();
+ protected void processTextPosition(TextPosition text) {
+// super.processTextPosition(text);
+
+ String chars = text.toString();
// Some line breaks do not enter here, I ignore why
if (TERMINALS.contains(chars)) {
String word = charsBuffer.toString();
if (words.contains(word)) {
- addColor(charsBuffer.toString(), previousColor);
+ registerColor(charsBuffer.toString(), previousColor);
}
charsBuffer = new StringBuffer();
} else {
charsBuffer.append(chars);
previousText = text;
- try {
- previousColor = getGraphicsState().getNonStrokingColor().getJavaColor();
- } catch (IOException e) {
- e.printStackTrace();
- }
+ previousColor = getGraphicsState().getNonStrokingColor();
}
-
}
- /**
- * Adds a color mapping to the colors attribute
- *
- * @param word Word to add
- * @param color Color of the word
- */
- private void addColor(String word, Color color) {
+ private void registerColor(String word, PDColor color) {
List values = colors.get(word);
if (values == null) {
- List aux = new ArrayList();
- aux.add(color);
+ List aux = new ArrayList<>();
+ aux.add(toRGB(color));
colors.put(word, aux);
} else {
- values.add(color);
+ values.add(toRGB(color));
}
}
- /**
- * Returns the words and their colors after parsing a file
- *
- * @return List of found images
- */
+ private Color toRGB(PDColor pdColor) {
+ float[] components = pdColor.getComponents();
+ // Rough conversion, but enough for out tests
+ int r = Float.valueOf(256 * components[0]).intValue();
+ int g = Float.valueOf(256 * components[1]).intValue();
+ int b = Float.valueOf(256 * components[2]).intValue();
+ return new Color(r, g, b);
+ }
+
public Map> getColors() {
return colors;
}
-
}
diff --git a/asciidoctorj-pdf/src/test/java/org/asciidoctor/util/pdf/Image.java b/asciidoctorj-pdf/src/test/java/org/asciidoctor/util/pdf/Image.java
index e8a5d55..d7fdcf7 100644
--- a/asciidoctorj-pdf/src/test/java/org/asciidoctor/util/pdf/Image.java
+++ b/asciidoctorj-pdf/src/test/java/org/asciidoctor/util/pdf/Image.java
@@ -5,76 +5,42 @@
*
* @author abelsromero
*/
-public class Image {
+final class Image {
// Page where the image is localed
- private int page;
-
+ private final int page;
// Position inside the page
- private float xPosition;
- private float yPosition;
-
+ private final float xPosition;
+ private final float yPosition;
// size in pixels
- private int originalWidth;
- private int originalHeight;
+ private final int originalWidth;
+ private final int originalHeight;
- // size in pixels
- private int renderedWidth;
- private int renderedHeight;
+ Image(int page, float xPosition, float yPosition, int originalWidth, int originalHeight) {
+ this.page = page;
+ this.xPosition = xPosition;
+ this.yPosition = yPosition;
+ this.originalWidth = originalWidth;
+ this.originalHeight = originalHeight;
+ }
public int getPage() {
return page;
}
- public void setPage(int page) {
- this.page = page;
- }
-
- public float getXPosition() {
+ public float getxPosition() {
return xPosition;
}
- public void setXPosition(float xPosition) {
- this.xPosition = xPosition;
- }
-
- public float getYPosition() {
+ public float getyPosition() {
return yPosition;
}
- public void setYPosition(float yPosition) {
- this.yPosition = yPosition;
- }
-
public int getOriginalWidth() {
return originalWidth;
}
- public void setOriginalWidth(int originalWidth) {
- this.originalWidth = originalWidth;
- }
-
public int getOriginalHeight() {
return originalHeight;
}
-
- public void setOriginalHeight(int originalHeight) {
- this.originalHeight = originalHeight;
- }
-
- public int getRenderedWidth() {
- return renderedWidth;
- }
-
- public void setRenderedWidth(int renderedWidth) {
- this.renderedWidth = renderedWidth;
- }
-
- public int getRenderedHeight() {
- return renderedHeight;
- }
-
- public void setRenderedHeight(int renderedHeight) {
- this.renderedHeight = renderedHeight;
- }
}
diff --git a/asciidoctorj-pdf/src/test/java/org/asciidoctor/util/pdf/ImageProcessor.java b/asciidoctorj-pdf/src/test/java/org/asciidoctor/util/pdf/ImageProcessor.java
index 42a052a..9ac8d61 100644
--- a/asciidoctorj-pdf/src/test/java/org/asciidoctor/util/pdf/ImageProcessor.java
+++ b/asciidoctorj-pdf/src/test/java/org/asciidoctor/util/pdf/ImageProcessor.java
@@ -1,150 +1,72 @@
package org.asciidoctor.util.pdf;
+import org.apache.pdfbox.Loader;
+import org.apache.pdfbox.contentstream.PDFStreamEngine;
+import org.apache.pdfbox.contentstream.operator.DrawObject;
+import org.apache.pdfbox.contentstream.operator.Operator;
+import org.apache.pdfbox.contentstream.operator.OperatorName;
+import org.apache.pdfbox.contentstream.operator.state.*;
+import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSName;
-import org.apache.pdfbox.cos.COSStream;
import org.apache.pdfbox.pdmodel.PDDocument;
-import org.apache.pdfbox.pdmodel.PDPage;
-import org.apache.pdfbox.pdmodel.PDResources;
-import org.apache.pdfbox.pdmodel.graphics.PDGraphicsState;
-import org.apache.pdfbox.pdmodel.graphics.xobject.PDXObject;
-import org.apache.pdfbox.pdmodel.graphics.xobject.PDXObjectForm;
-import org.apache.pdfbox.pdmodel.graphics.xobject.PDXObjectImage;
+import org.apache.pdfbox.pdmodel.PDPageTree;
+import org.apache.pdfbox.pdmodel.graphics.PDXObject;
+import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
import org.apache.pdfbox.util.Matrix;
-import org.apache.pdfbox.util.PDFOperator;
-import org.apache.pdfbox.util.PDFStreamEngine;
-import org.apache.pdfbox.util.ResourceLoader;
+import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
-import java.util.Map;
/**
* Processes a PDF document to extract the metadata of the contained images.
- *
- * Based on https://svn.apache.org/viewvc/pdfbox/tags/1.8.9/examples/src/main/java/org/apache/pdfbox/examples/util/PrintImageLocations.java
- *
- * @author abelsromero
+ *
+ * Based on https://svn.apache.org/viewvc/pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/util/PrintImageLocations.java?revision=1904918&view=markup
*/
public class ImageProcessor extends PDFStreamEngine {
-
- private static final String INVOKE_OPERATOR = "Do";
private int currentPage = 0;
- private List images = new ArrayList<>();
+ private final List images = new ArrayList<>();
- /**
- * Default constructor
- *
- * @throws IOException If there is an error loading text stripper properties.
- */
- public ImageProcessor() throws IOException {
- super(ResourceLoader.loadProperties(
- "org/apache/pdfbox/resources/PDFTextStripper.properties", true));
+ public ImageProcessor() {
+ addOperator(new Concatenate(this));
+ addOperator(new DrawObject(this));
+ addOperator(new SetGraphicsStateParameters(this));
+ addOperator(new Save(this));
+ addOperator(new Restore(this));
+ addOperator(new SetMatrix(this));
}
- /**
- * Parses a document extracting the images
- *
- * @param filename PDF document path
- */
public void parse(String filename) throws IOException {
- PDDocument document = null;
- try {
- document = PDDocument.load(filename, false);
- List allPages = document.getDocumentCatalog().getAllPages();
- for( int i=0; i xobjects = getResources().getXObjects();
- PDXObject xobject = xobjects.get( objectName.getName() );
-
- if (xobject instanceof PDXObjectImage) {
- PDXObjectImage image = (PDXObjectImage)xobject;
- int imageWidth = image.getWidth();
- int imageHeight = image.getHeight();
- PDPage page = getCurrentPage();
- double pageHeight = page.getMediaBox().getHeight();
+ @Override
+ protected void processOperator(Operator operator, List operands) throws IOException {
- Matrix ctmNew = getGraphicsState().getCurrentTransformationMatrix();
- float yScaling = ctmNew.getYScale();
- float angle = (float)Math.acos(ctmNew.getValue(0, 0)/ctmNew.getXScale());
- if (ctmNew.getValue(0, 1) < 0 && ctmNew.getValue(1, 0) > 0) {
- angle = (-1)*angle;
- }
- ctmNew.setValue(2, 1, (float)(pageHeight - ctmNew.getYPosition() - Math.cos(angle)*yScaling));
- ctmNew.setValue(2, 0, (float)(ctmNew.getXPosition() - Math.sin(angle)*yScaling));
- // because of the moved 0,0-reference, we have to shear in the opposite direction
- ctmNew.setValue(0, 1, (-1)*ctmNew.getValue(0, 1));
- ctmNew.setValue(1, 0, (-1)*ctmNew.getValue(1, 0));
+ if (OperatorName.DRAW_OBJECT.equals(operator.getName())) {
+ COSName objectName = (COSName) operands.get(0);
+ PDXObject xobject = getResources().getXObject(objectName);
- Image im = new Image();
- im.setPage(currentPage);
- im.setXPosition(ctmNew.getXPosition());
- im.setYPosition(ctmNew.getYPosition());
- im.setOriginalWidth(imageWidth);
- im.setOriginalHeight(imageHeight);
- im.setRenderedWidth(Math.round(ctmNew.getXScale()));
- im.setRenderedHeight(Math.round(ctmNew.getYScale()));
+ if (xobject instanceof PDImageXObject) {
+ final PDImageXObject pdImage = (PDImageXObject) xobject;
+ final Matrix ctmNew = getGraphicsState().getCurrentTransformationMatrix();
- images.add(im);
- } else if (xobject instanceof PDXObjectForm) {
- // save the graphics state
- getGraphicsStack().push( (PDGraphicsState)getGraphicsState().clone() );
- PDPage page = getCurrentPage();
-
- PDXObjectForm form = (PDXObjectForm)xobject;
- COSStream invoke = (COSStream)form.getCOSObject();
- PDResources pdResources = form.getResources();
- if(pdResources == null)
- {
- pdResources = page.findResources();
- }
- // if there is an optional form matrix, we have to
- // map the form space to the user space
- Matrix matrix = form.getMatrix();
- if (matrix != null)
- {
- Matrix xobjectCTM = matrix.multiply( getGraphicsState().getCurrentTransformationMatrix());
- getGraphicsState().setCurrentTransformationMatrix(xobjectCTM);
- }
- processSubStream( page, pdResources, invoke );
-
- // restore the graphics state
- setGraphicsState( getGraphicsStack().pop() );
+ final Image image = new Image(currentPage, ctmNew.getTranslateX(), ctmNew.getTranslateY(), pdImage.getWidth(), pdImage.getHeight());
+ images.add(image);
}
} else {
- super.processOperator( operator, arguments );
+ super.processOperator(operator, operands);
}
}
- /**
- * Returns the list of found images after parsing a file.
- *
- * @return List of found images
- */
public List getImages() {
return images;
}
diff --git a/asciidoctorj-pdf/src/test/resources/pdfbox/resources/ColorsProcessor.properties b/asciidoctorj-pdf/src/test/resources/pdfbox/resources/ColorsProcessor.properties
deleted file mode 100644
index 664ce9a..0000000
--- a/asciidoctorj-pdf/src/test/resources/pdfbox/resources/ColorsProcessor.properties
+++ /dev/null
@@ -1,36 +0,0 @@
-b=org.apache.pdfbox.util.operator.pagedrawer.CloseFillNonZeroAndStrokePath
-b*=org.apache.pdfbox.util.operator.pagedrawer.CloseFillEvenOddAndStrokePath
-BT=org.apache.pdfbox.util.operator.BeginText
-cm=org.apache.pdfbox.util.operator.Concatenate
-CS=org.apache.pdfbox.util.operator.SetStrokingColorSpace
-cs=org.apache.pdfbox.util.operator.SetNonStrokingColorSpace
-ET=org.apache.pdfbox.util.operator.EndText
-G=org.apache.pdfbox.util.operator.SetStrokingGrayColor
-g=org.apache.pdfbox.util.operator.SetNonStrokingGrayColor
-gs=org.apache.pdfbox.util.operator.SetGraphicsStateParameters
-K=org.apache.pdfbox.util.operator.SetStrokingCMYKColor
-k=org.apache.pdfbox.util.operator.SetNonStrokingCMYKColor
-q=org.apache.pdfbox.util.operator.GSave
-Q=org.apache.pdfbox.util.operator.GRestore
-RG=org.apache.pdfbox.util.operator.SetStrokingRGBColor
-rg=org.apache.pdfbox.util.operator.SetNonStrokingRGBColor
-s=org.apache.pdfbox.util.operator.CloseAndStrokePath
-SC=org.apache.pdfbox.util.operator.SetStrokingColor
-sc=org.apache.pdfbox.util.operator.SetNonStrokingColor
-SCN=org.apache.pdfbox.util.operator.SetStrokingColor
-scn=org.apache.pdfbox.util.operator.SetNonStrokingColor
-T*=org.apache.pdfbox.util.operator.NextLine
-Tc=org.apache.pdfbox.util.operator.SetCharSpacing
-Td=org.apache.pdfbox.util.operator.MoveText
-TD=org.apache.pdfbox.util.operator.MoveTextSetLeading
-Tf=org.apache.pdfbox.util.operator.SetTextFont
-Tj=org.apache.pdfbox.util.operator.ShowText
-TJ=org.apache.pdfbox.util.operator.ShowTextGlyph
-TL=org.apache.pdfbox.util.operator.SetTextLeading
-Tm=org.apache.pdfbox.util.operator.SetMatrix
-Tr=org.apache.pdfbox.util.operator.SetTextRenderingMode
-Ts=org.apache.pdfbox.util.operator.SetTextRise
-Tw=org.apache.pdfbox.util.operator.SetWordSpacing
-Tz=org.apache.pdfbox.util.operator.SetHorizontalTextScaling
-\'=org.apache.pdfbox.util.operator.MoveAndShow
-\"=org.apache.pdfbox.util.operator.SetMoveAndShow
diff --git a/build.gradle b/build.gradle
index ee56857..1107f3c 100644
--- a/build.gradle
+++ b/build.gradle
@@ -34,7 +34,7 @@ ext {
arquillianVersion = '1.1.10.Final'
arquillianSpockVersion = '1.0.0.Beta3'
jrubyVersion = '9.4.0.0'
- pdfboxVersion = '1.8.17'
+ pdfboxVersion = '3.0.0'
junitVersion = '4.13.2'
hamcrestVersion = '2.2'