Skip to content

Commit

Permalink
Fix for issue #564, with latest versions of javacpp, leptonica, tesse…
Browse files Browse the repository at this point in the history
…ract

	- javacpp set to 1.5.8
	- leptonica set to 1.82.0
	- tesseract set to 5.2.0
	- TesseractOCR and TesseractOrder classes updated to new API
  • Loading branch information
hbitteur committed May 5, 2023
1 parent f29f417 commit d5b1e05
Show file tree
Hide file tree
Showing 3 changed files with 106 additions and 62 deletions.
6 changes: 3 additions & 3 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@ ext.programVersion = "$project.version"
ext.companyName = "$programName Ltd."
ext.companyId = "${programName}Ltd"

ext.jcppVersion = '1.5.6'
ext.leptVersion = '1.81.1'
ext.tessVersion = '4.1.1'
ext.jcppVersion = '1.5.8'
ext.leptVersion = '1.82.0'
ext.tessVersion = '5.2.0'

// this code is required in order to adapt values of os.name and os.arch to the
// conventions used by Javacpp's dependencies
Expand Down
40 changes: 30 additions & 10 deletions src/main/org/audiveris/omr/text/tesseract/TesseractOCR.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
import org.audiveris.omr.text.OCR;
import org.audiveris.omr.text.TextLine;

import org.bytedeco.tesseract.StringGenericVector;
import org.bytedeco.tesseract.StringVector;
import org.bytedeco.tesseract.TessBaseAPI;
import org.bytedeco.tesseract.global.tesseract;

Expand All @@ -52,7 +52,7 @@
/**
* Class <code>TesseractOCR</code> is an OCR service built on Google Tesseract engine.
* <p>
* It relies on <b>tesseract3</b> C++ program, accessed through a <b>JavaCPP</b>-based bridge.
* It relies on <b>tesseract-ocr</b> C++ program, accessed through a <b>JavaCPP</b>-based bridge.
*
* @author Hervé Bitteur
*/
Expand All @@ -68,9 +68,16 @@ public class TesseractOCR
/** Latin encoder, to check character validity. (not used yet) */
private static final CharsetEncoder encoder = Charset.forName("iso-8859-1").newEncoder();

/** Specific name of folder where Tesseract language files are located. */
private static final String TESSDATA = "tessdata";

/** System environment variable pointing to TESSDATA location. */
private static final String TESSDATA_PREFIX = "TESSDATA_PREFIX";

/** Warning message when OCR folder cannot be found. */
private static final String ocrNotFoundMsg = "Tesseract data could not be found. "
+ "Try setting the TESSDATA_PREFIX environment variable to the parent folder of \"tessdata\".";
+ "Try setting " + TESSDATA_PREFIX + " environment variable to point to " + TESSDATA
+ " folder.";

//~ Instance fields ----------------------------------------------------------------------------

Expand All @@ -97,11 +104,15 @@ private TesseractOCR ()
//---------------//
// findOcrFolder //
//---------------//
/**
* Look for Tesseract TESSDATA folder, according to environment.
*
* @return TESSDATA folder found, perhaps null
*/
private Path findOcrFolder ()
{
// First, try to use TESSDATA_PREFIX environment variable
// which might denote a Tesseract installation
final String TESSDATA_PREFIX = "TESSDATA_PREFIX";
final String tessPrefix = System.getenv(TESSDATA_PREFIX);

if (tessPrefix != null) {
Expand Down Expand Up @@ -147,6 +158,11 @@ private Path findOcrFolder ()
//--------------//
// getLanguages //
//--------------//
/**
* Report the set of languages currently available for OCR.
*
* @return the set of available languages, perhaps empty.
*/
@Override
public Set<String> getLanguages ()
{
Expand All @@ -158,11 +174,11 @@ public Set<String> getLanguages ()
final TessBaseAPI api = new TessBaseAPI();

if (api.Init(ocrFolder.toString(), "eng") == 0) {
final StringGenericVector languages = new StringGenericVector();
final StringVector languages = new StringVector();
api.GetAvailableLanguagesAsVector(languages);

while (!languages.empty()) {
set.add(languages.pop_back().string().getString());
set.add(languages.pop_back().getString());
}
} else {
logger.warn("Error in loading Tesseract languages");
Expand Down Expand Up @@ -196,7 +212,7 @@ public double getMinConfidence ()
/**
* Map the OCR layout mode to Tesseract segmentation mode.
*
* @param layoutMode the desired OCR layout mode
* @param layoutMode the desired OCR layout mode (MULTI_BLOCK or SINGLE_BLOCK)
* @return the corresponding Tesseract segmentation mode
*/
private int getMode (LayoutMode layoutMode)
Expand Down Expand Up @@ -303,10 +319,16 @@ public List<TextLine> recognize (Sheet sheet,
//------------------//
// scanOcrLocations //
//------------------//
/**
* Scan the provided sequence of locations for a TESSDATA folder.
*
* @param locations the locations to scan
* @return the first suitable location or null
*/
private Path scanOcrLocations (String[] locations)
{
for (String loc : locations) {
final Path path = Paths.get(loc).resolve("tessdata");
final Path path = Paths.get(loc).resolve(TESSDATA);

if (Files.exists(path)) {
return path;
Expand Down Expand Up @@ -341,7 +363,6 @@ public static TesseractOCR getInstance ()
private static class Constants
extends ConstantSet
{

private final Constant.Boolean useOCR = new Constant.Boolean(
true,
"Should we use the OCR feature?");
Expand All @@ -366,7 +387,6 @@ private static class Constants
//---------------//
private static class LazySingleton
{

static final TesseractOCR INSTANCE = new TesseractOCR();
}
}
122 changes: 73 additions & 49 deletions src/main/org/audiveris/omr/text/tesseract/TesseractOrder.java
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,18 @@
import org.audiveris.omr.text.TextLine;
import org.audiveris.omr.text.TextWord;

import org.bytedeco.javacpp.*;
import org.bytedeco.javacpp.BoolPointer;
import org.bytedeco.javacpp.BytePointer;
import org.bytedeco.javacpp.IntPointer;
import org.bytedeco.leptonica.PIX;
import static org.bytedeco.leptonica.global.lept.*;
import org.bytedeco.tesseract.PageIterator;
import static org.bytedeco.leptonica.global.leptonica.pixDestroy;
import static org.bytedeco.leptonica.global.leptonica.pixReadMemTiff;
import org.bytedeco.tesseract.ResultIterator;
import org.bytedeco.tesseract.TessBaseAPI;
import static org.bytedeco.tesseract.global.tesseract.*;
import static org.bytedeco.tesseract.global.tesseract.OEM_TESSERACT_ONLY;
import static org.bytedeco.tesseract.global.tesseract.RIL_SYMBOL;
import static org.bytedeco.tesseract.global.tesseract.RIL_TEXTLINE;
import static org.bytedeco.tesseract.global.tesseract.RIL_WORD;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand Down Expand Up @@ -150,7 +155,7 @@ public TesseractOrder (Sheet sheet,
this.segMode = segMode;

// Build a PIX from the image provided
ByteBuffer buf = toTiffBuffer(bufferedImage);
final ByteBuffer buf = toTiffBuffer(bufferedImage);
buf.position(0);
image = pixReadMemTiff(buf, buf.capacity(), 0);

Expand Down Expand Up @@ -184,38 +189,58 @@ private List<TextLine> finish (List<TextLine> lines)
return lines;
}

//-------------//
// getBaseline //
//-------------//
/**
* Report the baseline of the provided OCR'd item.
*
* @param it iterator on results structure
* @param level desired level (word)
* @return item baseline as a Line2D or null
*/
private Line2D getBaseline (ResultIterator rit,
int level)
{
IntPointer x1 = new IntPointer(0);
IntPointer y1 = new IntPointer(0);
IntPointer x2 = new IntPointer(0);
IntPointer y2 = new IntPointer(0);
final IntPointer x1 = new IntPointer(0);
final IntPointer y1 = new IntPointer(0);
final IntPointer x2 = new IntPointer(0);
final IntPointer y2 = new IntPointer(0);

if (rit.Baseline(level, x1, y1, x2, y2)) {
return new Line2D.Double(x1.get(), y1.get(), x2.get(), y2.get());
} else {
return null;
}

return null;
}

private Rectangle getBoundingBox (PageIterator it,
//----------------//
// getBoundingBox //
//----------------//
/**
* Report the bounding box of the provided OCR'd item.
*
* @param it iterator on results structure
* @param level desired level (word or char/symbol)
* @return item bounding box as a Rectangle or null
*/
private Rectangle getBoundingBox (ResultIterator it,
int level)
{
IntPointer left = new IntPointer(0);
IntPointer top = new IntPointer(0);
IntPointer right = new IntPointer(0);
IntPointer bottom = new IntPointer(0);
final IntPointer left = new IntPointer(0);
final IntPointer top = new IntPointer(0);
final IntPointer right = new IntPointer(0);
final IntPointer bottom = new IntPointer(0);

if (it.BoundingBox(level, left, top, right, bottom)) {
return new Rectangle(
left.get(),
top.get(),
right.get() - left.get(),
bottom.get() - top.get());
} else {
return null;
}

return null;
}

//---------//
Expand All @@ -229,18 +254,16 @@ private Rectangle getBoundingBox (PageIterator it,
*/
private FontInfo getFont (ResultIterator rit)
{
BoolPointer is_bold = new BoolPointer(0);
BoolPointer is_italic = new BoolPointer(0);
BoolPointer is_underlined = new BoolPointer(0);
BoolPointer is_monospace = new BoolPointer(0);
BoolPointer is_serif = new BoolPointer(0);
BoolPointer is_smallcaps = new BoolPointer(0);
IntPointer pointSize = new IntPointer(0);
IntPointer font_id = new IntPointer(0);

String fontName = null;

BytePointer bp = rit.WordFontAttributes(
final BoolPointer is_bold = new BoolPointer(0);
final BoolPointer is_italic = new BoolPointer(0);
final BoolPointer is_underlined = new BoolPointer(0);
final BoolPointer is_monospace = new BoolPointer(0);
final BoolPointer is_serif = new BoolPointer(0);
final BoolPointer is_smallcaps = new BoolPointer(0);
final IntPointer pointSize = new IntPointer(0);
final IntPointer font_id = new IntPointer(0);

final BytePointer bp = rit.WordFontAttributes(
is_bold,
is_italic,
is_underlined,
Expand All @@ -250,24 +273,25 @@ private FontInfo getFont (ResultIterator rit)
pointSize,
font_id);

// don't try to decode fontName from null bytepointer!
if (bp != null) {
fontName = bp.getString();
if (bp == null) {
return null;
}

if (fontName != null) {
return new FontInfo(
is_bold.get(),
is_italic.get(),
is_underlined.get(),
is_monospace.get(),
is_serif.get(),
is_smallcaps.get(),
pointSize.get(),
fontName);
} else {
final String fontName = bp.getString();

if (fontName == null) {
return null;
}

return new FontInfo(
is_bold.get(),
is_italic.get(),
is_underlined.get(),
is_monospace.get(),
is_serif.get(),
is_smallcaps.get(),
pointSize.get(),
fontName);
}

//----------//
Expand Down Expand Up @@ -445,18 +469,18 @@ public List<TextLine> process ()
private ByteBuffer toTiffBuffer (BufferedImage image)
throws IOException
{
ByteArrayOutputStream baos = new ByteArrayOutputStream();
final ByteArrayOutputStream baos = new ByteArrayOutputStream();

try (ImageOutputStream ios = ImageIO.createImageOutputStream(baos)) {
ImageWriter writer = ImageIO.getImageWritersByFormatName("tiff").next();
final ImageWriter writer = ImageIO.getImageWritersByFormatName("tiff").next();
writer.setOutput(ios);
writer.write(image);
} catch (IOException ex) {
logger.warn("Could not write image", ex);
}

ByteBuffer buf = ByteBuffer.allocate(baos.size());
byte[] bytes = baos.toByteArray();
final ByteBuffer buf = ByteBuffer.allocate(baos.size());
final byte[] bytes = baos.toByteArray();
buf.put(bytes);

// Should we keep a local copy of this buffer on disk?
Expand Down Expand Up @@ -498,7 +522,7 @@ private void wordAddChars (TextWord word,
if (len == 1) {
word.addChar(new TextChar(bounds, value)); // Normal case
} else {
double meanCharWidth = (double) bounds.width / len;
final double meanCharWidth = (double) bounds.width / len;

for (int i = 0; i < len; i++) {
Rectangle cb = new Rectangle2D.Double(
Expand Down

0 comments on commit d5b1e05

Please sign in to comment.