Skip to content

Commit

Permalink
Merge pull request #196 from Frooodle/compress
Browse files Browse the repository at this point in the history
Compress abd cert sign
  • Loading branch information
Frooodle authored May 21, 2023
2 parents 8e4cdb7 + 7ac6e9d commit b44e036
Show file tree
Hide file tree
Showing 31 changed files with 945 additions and 216 deletions.
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Build jbig2enc in a separate stage
FROM frooodle/stirling-pdf-base:beta2
FROM frooodle/stirling-pdf-base:beta3

# Create scripts folder and copy local scripts
RUN mkdir /scripts
Expand Down
27 changes: 1 addition & 26 deletions DockerfileBase
Original file line number Diff line number Diff line change
@@ -1,27 +1,3 @@
# Build jbig2enc in a separate stage
FROM debian:bullseye-slim as jbig2enc_builder

RUN apt-get update && \
apt-get install -y --no-install-recommends \
git \
automake \
autoconf \
libtool \
libleptonica-dev \
pkg-config \
ca-certificates \
zlib1g-dev \
make \
g++

RUN git clone https://github.com/agl/jbig2enc && \
cd jbig2enc && \
./autogen.sh && \
./configure && \
make && \
make install


# Main stage
FROM openjdk:17-jdk-slim AS base
RUN apt-get update && \
Expand Down Expand Up @@ -58,5 +34,4 @@ RUN apt-get update && \

# Final stage: Copy necessary files from the previous stage
FROM base
COPY --from=python-packages /usr/local /usr/local
COPY --from=jbig2enc_builder /usr/local/bin/jbig2 /usr/local/bin/jbig2
COPY --from=python-packages /usr/local /usr/local
7 changes: 4 additions & 3 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ plugins {
}

group = 'stirling.software'
version = '0.8.3'
version = '0.9.0'
sourceCompatibility = '17'

repositories {
Expand All @@ -24,8 +24,9 @@ dependencies {

//general PDF
implementation 'org.apache.pdfbox:pdfbox:2.0.28'


implementation 'org.bouncycastle:bcprov-jdk15on:1.70'
implementation 'org.bouncycastle:bcpkix-jdk15on:1.70'
implementation 'com.itextpdf:itext7-core:7.2.5'
implementation 'org.springframework.boot:spring-boot-starter-actuator'
implementation 'io.micrometer:micrometer-core'

Expand Down
2 changes: 1 addition & 1 deletion src/main/java/stirling/software/SPDF/config/Beans.java
Original file line number Diff line number Diff line change
Expand Up @@ -49,5 +49,5 @@ public LocaleResolver localeResolver() {
slr.setDefaultLocale(defaultLocale);
return slr;
}

}
Original file line number Diff line number Diff line change
@@ -1,15 +1,13 @@
package stirling.software.SPDF.config;

import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Service;
import org.slf4j.Logger;
@Service
public class EndpointConfiguration {
private static final Logger logger = LoggerFactory.getLogger(EndpointConfiguration.class);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,8 @@
package stirling.software.SPDF.config;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.stereotype.Component;
import org.springframework.web.filter.OncePerRequestFilter;

import io.micrometer.core.instrument.Counter;
import io.micrometer.core.instrument.Meter;
import io.micrometer.core.instrument.MeterRegistry;
import io.micrometer.core.instrument.config.MeterFilter;
import io.micrometer.core.instrument.config.MeterFilterReply;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,11 @@
import java.io.IOException;

import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.annotation.Bean;
import org.springframework.stereotype.Component;
import org.springframework.web.filter.OncePerRequestFilter;

import io.micrometer.core.instrument.Counter;
import io.micrometer.core.instrument.Meter;
import io.micrometer.core.instrument.MeterRegistry;
import io.micrometer.core.instrument.config.MeterFilter;
import io.micrometer.core.instrument.config.MeterFilterReply;
import jakarta.servlet.FilterChain;
import jakarta.servlet.ServletException;
import jakarta.servlet.http.HttpServletRequest;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,24 @@
package stirling.software.SPDF.controller.api.other;

import java.io.IOException;
import java.awt.Image;
import java.awt.image.BufferedImage;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.List;

import javax.imageio.ImageIO;

import org.apache.commons.io.FileUtils;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDResources;
import org.apache.pdfbox.pdmodel.graphics.PDXObject;
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.http.ResponseEntity;
Expand All @@ -17,67 +30,190 @@

import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.Parameter;
import io.swagger.v3.oas.annotations.media.Schema;
import stirling.software.SPDF.utils.PdfUtils;
import stirling.software.SPDF.utils.ProcessExecutor;
import io.swagger.v3.oas.annotations.media.Schema;

@RestController
public class CompressController {

private static final Logger logger = LoggerFactory.getLogger(CompressController.class);

@PostMapping(consumes = "multipart/form-data", value = "/compress-pdf")
@Operation(
summary = "Optimize PDF file",
description = "This endpoint accepts a PDF file and optimizes it based on the provided parameters."
)
@Operation(summary = "Optimize PDF file", description = "This endpoint accepts a PDF file and optimizes it based on the provided parameters.")
public ResponseEntity<byte[]> optimizePdf(
@RequestPart(required = true, value = "fileInput")
@Parameter(description = "The input PDF file to be optimized.", required = true)
MultipartFile inputFile,
@RequestParam("optimizeLevel")
@Parameter(description = "The level of optimization to apply to the PDF file. Higher values indicate greater compression but may reduce quality.",
schema = @Schema(allowableValues = {"0", "1", "2", "3"}), example = "1")
int optimizeLevel,
@RequestParam(name = "fastWebView", required = false)
@Parameter(description = "If true, optimize the PDF for fast web view. This increases the file size by about 25%.", example = "false")
Boolean fastWebView,
@RequestParam(name = "jbig2Lossy", required = false)
@Parameter(description = "If true, apply lossy JB2 compression to the PDF file.", example = "false")
Boolean jbig2Lossy)
throws IOException, InterruptedException {
@RequestPart(value = "fileInput") @Parameter(description = "The input PDF file to be optimized.", required = true) MultipartFile inputFile,
@RequestParam(required = false, value = "optimizeLevel") @Parameter(description = "The level of optimization to apply to the PDF file. Higher values indicate greater compression but may reduce quality.", schema = @Schema(allowableValues = {
"1", "2", "3", "4", "5" })) Integer optimizeLevel,
@RequestParam(value = "expectedOutputSize", required = false) @Parameter(description = "The expected output size, e.g. '100MB', '25KB', etc.", required = false) String expectedOutputSizeString)
throws Exception {

if(expectedOutputSizeString == null && optimizeLevel == null) {
throw new Exception("Both expected output size and optimize level are not specified");
}

Long expectedOutputSize = 0L;
boolean autoMode = false;
if (expectedOutputSizeString != null && expectedOutputSizeString.length() > 1 ) {
expectedOutputSize = PdfUtils.convertSizeToBytes(expectedOutputSizeString);
autoMode = true;
}

// Save the uploaded file to a temporary location
Path tempInputFile = Files.createTempFile("input_", ".pdf");
inputFile.transferTo(tempInputFile.toFile());

long inputFileSize = Files.size(tempInputFile);

// Prepare the output file path
Path tempOutputFile = Files.createTempFile("output_", ".pdf");

// Prepare the OCRmyPDF command
List<String> command = new ArrayList<>();
command.add("ocrmypdf");
command.add("--skip-text");
command.add("--tesseract-timeout=0");
command.add("--optimize");
command.add(String.valueOf(optimizeLevel));
command.add("--output-type");
command.add("pdf");

if (fastWebView != null && fastWebView) {
long fileSize = inputFile.getSize();
long fastWebViewSize = (long) (fileSize * 1.25); // 25% higher than file size
command.add("--fast-web-view");
command.add(String.valueOf(fastWebViewSize));
// Determine initial optimization level based on expected size reduction, only if in autoMode
if(autoMode) {
double sizeReductionRatio = expectedOutputSize / (double) inputFileSize;
if (sizeReductionRatio > 0.7) {
optimizeLevel = 1;
} else if (sizeReductionRatio > 0.5) {
optimizeLevel = 2;
} else if (sizeReductionRatio > 0.35) {
optimizeLevel = 3;
} else {
optimizeLevel = 3;
}
}

if (jbig2Lossy != null && jbig2Lossy) {
command.add("--jbig2-lossy");
boolean sizeMet = false;
while (!sizeMet && optimizeLevel <= 4) {
// Prepare the Ghostscript command
List<String> command = new ArrayList<>();
command.add("gs");
command.add("-sDEVICE=pdfwrite");
command.add("-dCompatibilityLevel=1.4");

switch (optimizeLevel) {
case 1:
command.add("-dPDFSETTINGS=/prepress");
break;
case 2:
command.add("-dPDFSETTINGS=/printer");
break;
case 3:
command.add("-dPDFSETTINGS=/ebook");
break;
case 4:
command.add("-dPDFSETTINGS=/screen");
break;
default:
command.add("-dPDFSETTINGS=/default");
}

command.add("-dNOPAUSE");
command.add("-dQUIET");
command.add("-dBATCH");
command.add("-sOutputFile=" + tempOutputFile.toString());
command.add(tempInputFile.toString());

int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.GHOSTSCRIPT).runCommandWithOutputHandling(command);

// Check if file size is within expected size or not auto mode so instantly finish
long outputFileSize = Files.size(tempOutputFile);
if (outputFileSize <= expectedOutputSize || !autoMode) {
sizeMet = true;
} else {
// Increase optimization level for next iteration
optimizeLevel++;
if(autoMode && optimizeLevel > 3) {
System.out.println("Skipping level 4 due to bad results in auto mode");
sizeMet = true;
} else if(optimizeLevel == 5) {

} else {
System.out.println("Increasing ghostscript optimisation level to " + optimizeLevel);
}
}
}

command.add(tempInputFile.toString());
command.add(tempOutputFile.toString());

int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.OCR_MY_PDF).runCommandWithOutputHandling(command);


if (expectedOutputSize != null && autoMode) {
long outputFileSize = Files.size(tempOutputFile);
if (outputFileSize > expectedOutputSize) {
try (PDDocument doc = PDDocument.load(new File(tempOutputFile.toString()))) {
long previousFileSize = 0;
double scaleFactor = 1.0;
while (true) {
for (PDPage page : doc.getPages()) {
PDResources res = page.getResources();

for (COSName name : res.getXObjectNames()) {
PDXObject xobj = res.getXObject(name);
if (xobj instanceof PDImageXObject) {
PDImageXObject image = (PDImageXObject) xobj;

// Get the image in BufferedImage format
BufferedImage bufferedImage = image.getImage();

// Calculate the new dimensions
int newWidth = (int)(bufferedImage.getWidth() * scaleFactor);
int newHeight = (int)(bufferedImage.getHeight() * scaleFactor);

// If the new dimensions are zero, skip this iteration
if (newWidth == 0 || newHeight == 0) {
continue;
}

// Otherwise, proceed with the scaling
Image scaledImage = bufferedImage.getScaledInstance(newWidth, newHeight, Image.SCALE_SMOOTH);

// Convert the scaled image back to a BufferedImage
BufferedImage scaledBufferedImage = new BufferedImage(newWidth, newHeight, BufferedImage.TYPE_INT_RGB);
scaledBufferedImage.getGraphics().drawImage(scaledImage, 0, 0, null);

// Compress the scaled image
ByteArrayOutputStream compressedImageStream = new ByteArrayOutputStream();
ImageIO.write(scaledBufferedImage, "jpeg", compressedImageStream);
byte[] imageBytes = compressedImageStream.toByteArray();
compressedImageStream.close();

// Convert compressed image back to PDImageXObject
ByteArrayInputStream bais = new ByteArrayInputStream(imageBytes);
PDImageXObject compressedImage = PDImageXObject.createFromByteArray(doc, imageBytes, image.getCOSObject().toString());

// Replace the image in the resources with the compressed version
res.put(name, compressedImage);
}
}
}

// save the document to tempOutputFile again
doc.save(tempOutputFile.toString());

long currentSize = Files.size(tempOutputFile);
// Check if the overall PDF size is still larger than expectedOutputSize
if (currentSize > expectedOutputSize) {
// Log the current file size and scaleFactor

System.out.println("Current file size: " + FileUtils.byteCountToDisplaySize(currentSize));
System.out.println("Current scale factor: " + scaleFactor);

// The file is still too large, reduce scaleFactor and try again
scaleFactor *= 0.9; // reduce scaleFactor by 10%
// Avoid scaleFactor being too small, causing the image to shrink to 0
if(scaleFactor < 0.2 || previousFileSize == currentSize){
throw new RuntimeException("Could not reach the desired size without excessively degrading image quality, lowest size recommended is " + FileUtils.byteCountToDisplaySize(currentSize) + ", " + currentSize + " bytes");
}
previousFileSize = currentSize;
} else {
// The file is small enough, break the loop
break;
}
}

}


}
}

// Read the optimized PDF file
byte[] pdfBytes = Files.readAllBytes(tempOutputFile);
Expand Down
Loading

0 comments on commit b44e036

Please sign in to comment.