Skip to content

Commit

Permalink
backport from 4.00: training
Browse files Browse the repository at this point in the history
  • Loading branch information
zdenop committed Dec 4, 2016
1 parent 53152e4 commit 7099358
Show file tree
Hide file tree
Showing 15 changed files with 357 additions and 121 deletions.
12 changes: 9 additions & 3 deletions training/boxchar.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,6 @@ void BoxChar::PrepareToWrite(vector<BoxChar*>* boxes) {
if (rtl_rules) {
ReorderRTLText(boxes);
}
tprintf("Rtl = %d ,vertical=%d\n", rtl_rules, vertical_rules);
}

// Inserts newline (tab) characters into the vector at newline positions.
Expand Down Expand Up @@ -291,20 +290,27 @@ const int kMaxLineLength = 1024;
/* static */
void BoxChar::WriteTesseractBoxFile(const string& filename, int height,
const vector<BoxChar*>& boxes) {
string output = GetTesseractBoxStr(height, boxes);
File::WriteStringToFileOrDie(output, filename);
}

/* static */
string BoxChar::GetTesseractBoxStr(int height, const vector<BoxChar*>& boxes) {
string output;
char buffer[kMaxLineLength];
for (int i = 0; i < boxes.size(); ++i) {
const Box* box = boxes[i]->box_;
if (box == NULL) {
tprintf("Error: Call PrepareToWrite before WriteTesseractBoxFile!!\n");
return;
return "";
}
int nbytes =
snprintf(buffer, kMaxLineLength, "%s %d %d %d %d %d\n",
boxes[i]->ch_.c_str(), box->x, height - box->y - box->h,
box->x + box->w, height - box->y, boxes[i]->page_);
output.append(buffer, nbytes);
}
File::WriteStringToFileOrDie(output, filename);
return output;
}

} // namespace tesseract
3 changes: 3 additions & 0 deletions training/boxchar.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,9 @@ class BoxChar {
// is needed to convert to tesseract coordinates.
static void WriteTesseractBoxFile(const string& name, int height,
const vector<BoxChar*>& boxes);
// Gets the tesseract box file as a string from the vector of boxes.
// The image height is needed to convert to tesseract coordinates.
static string GetTesseractBoxStr(int height, const vector<BoxChar*>& boxes);

private:
string ch_;
Expand Down
42 changes: 23 additions & 19 deletions training/cntraining.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,8 @@ int main (
Private Function Prototypes
----------------------------------------------------------------------------*/

void WriteNormProtos (const char *Directory, LIST LabeledProtoList,
CLUSTERER *Clusterer);
void WriteNormProtos(const char *Directory, LIST LabeledProtoList,
const FEATURE_DESC_STRUCT *feature_desc);

/*
PARAMDESC *ConvertToPARAMDESC(
Expand Down Expand Up @@ -160,13 +160,18 @@ int main(int argc, char *argv[]) {
// reduce the min samples:
// Config.MinSamples = 0.5 / num_fonts;
pCharList = CharList;
// The norm protos will count the source protos, so we keep them here in
// freeable_protos, so they can be freed later.
GenericVector<LIST> freeable_protos;
iterate(pCharList) {
//Cluster
if (Clusterer)
FreeClusterer(Clusterer);
CharSample = (LABELEDLIST)first_node(pCharList);
Clusterer =
SetUpForClustering(FeatureDefs, CharSample, PROGRAM_FEATURE_TYPE);
if (Clusterer == NULL) { // To avoid a SIGSEGV
fprintf(stderr, "Error: NULL clusterer!\n");
return 1;
}
float SavedMinSamples = Config.MinSamples;
// To disable the tendency to produce a single cluster for all fonts,
// make MagicSamples an impossible to achieve number:
Expand All @@ -185,21 +190,21 @@ int main(int argc, char *argv[]) {
}
Config.MinSamples = SavedMinSamples;
AddToNormProtosList(&NormProtoList, ProtoList, CharSample->Label);
freeable_protos.push_back(ProtoList);
FreeClusterer(Clusterer);
}
FreeTrainingSamples(CharList);
if (Clusterer == NULL) { // To avoid a SIGSEGV
fprintf(stderr, "Error: NULL clusterer!\n");
return 1;
}
WriteNormProtos(FLAGS_D.c_str(), NormProtoList, Clusterer);
int desc_index = ShortNameToFeatureType(FeatureDefs, PROGRAM_FEATURE_TYPE);
WriteNormProtos(FLAGS_D.c_str(), NormProtoList,
FeatureDefs.FeatureDesc[desc_index]);
FreeNormProtoList(NormProtoList);
FreeProtoList(&ProtoList);
FreeClusterer(Clusterer);
for (int i = 0; i < freeable_protos.size(); ++i) {
FreeProtoList(&freeable_protos[i]);
}
printf ("\n");
return 0;
} // main


/*----------------------------------------------------------------------------
Private Code
----------------------------------------------------------------------------*/
Expand All @@ -211,14 +216,13 @@ int main(int argc, char *argv[]) {
* of the samples.
* @param Directory directory to place sample files into
* @param LabeledProtoList List of labeled protos
* @param Clusterer The CLUSTERER to use
* @param feature_desc Description of the features
* @return none
* @note Exceptions: none
* @note History: Fri Aug 18 16:17:06 1989, DSJ, Created.
*/
void WriteNormProtos(const char *Directory, LIST LabeledProtoList,
CLUSTERER *Clusterer)
{
void WriteNormProtos(const char *Directory, LIST LabeledProtoList,
const FEATURE_DESC_STRUCT *feature_desc) {
FILE *File;
STRING Filename;
LABELEDLIST LabeledProto;
Expand All @@ -233,8 +237,8 @@ void WriteNormProtos(const char *Directory, LIST LabeledProtoList,
Filename += "normproto";
printf ("\nWriting %s ...", Filename.string());
File = Efopen (Filename.string(), "wb");
fprintf(File, "%0d\n", Clusterer->SampleSize);
WriteParamDesc(File, Clusterer->SampleSize,Clusterer->ParamDesc);
fprintf(File, "%0d\n", feature_desc->NumParams);
WriteParamDesc(File, feature_desc->NumParams, feature_desc->ParamDesc);
iterate(LabeledProtoList)
{
LabeledProto = (LABELEDLIST) first_node (LabeledProtoList);
Expand All @@ -249,7 +253,7 @@ void WriteNormProtos(const char *Directory, LIST LabeledProtoList,
exit(1);
}
fprintf(File, "\n%s %d\n", LabeledProto->Label, N);
WriteProtos(File, Clusterer->SampleSize, LabeledProto->List, true, false);
WriteProtos(File, feature_desc->NumParams, LabeledProto->List, true, false);
}
fclose (File);

Expand Down
10 changes: 6 additions & 4 deletions training/commontraining.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@

using tesseract::CCUtil;
using tesseract::IntFeatureSpace;
using tesseract::FontInfo;
using tesseract::ParamUtils;
using tesseract::ShapeTable;

Expand Down Expand Up @@ -453,6 +452,7 @@ void FreeTrainingSamples(LIST CharList) {
FEATURE_SET FeatureSet;
LIST FeatureList;

LIST nodes = CharList;
iterate(CharList) { /* iterate through all of the fonts */
char_sample = (LABELEDLIST) first_node(CharList);
FeatureList = char_sample->List;
Expand All @@ -462,7 +462,7 @@ void FreeTrainingSamples(LIST CharList) {
}
FreeLabeledList(char_sample);
}
destroy(CharList);
destroy(nodes);
} /* FreeTrainingSamples */

/*---------------------------------------------------------------------------*/
Expand Down Expand Up @@ -728,14 +728,15 @@ MERGE_CLASS NewLabeledClass(const char* Label) {
void FreeLabeledClassList(LIST ClassList) {
MERGE_CLASS MergeClass;

LIST nodes = ClassList;
iterate(ClassList) /* iterate through all of the fonts */
{
MergeClass = (MERGE_CLASS) first_node (ClassList);
free (MergeClass->Label);
FreeClass(MergeClass->Class);
delete MergeClass;
}
destroy(ClassList);
destroy(nodes);

} /* FreeLabeledClassList */

Expand Down Expand Up @@ -825,12 +826,13 @@ void FreeNormProtoList(LIST CharList)
{
LABELEDLIST char_sample;

LIST nodes = CharList;
iterate(CharList) /* iterate through all of the fonts */
{
char_sample = (LABELEDLIST) first_node (CharList);
FreeLabeledList (char_sample);
}
destroy(CharList);
destroy(nodes);

} // FreeNormProtoList

Expand Down
163 changes: 163 additions & 0 deletions training/degradeimage.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,36 @@

#include <stdlib.h>
#include "allheaders.h" // from leptonica
#include "genericvector.h"
#include "helpers.h" // For TRand.
#include "rect.h"

namespace tesseract {

// A randomized perspective distortion can be applied to synthetic input.
// The perspective distortion comes from leptonica, which uses 2 sets of 4
// corners to determine the distortion. There are random values for each of
// the x numbers x0..x3 and y0..y3, except for x2 and x3 which are instead
// defined in terms of a single shear value. This reduces the degrees of
// freedom enough to make the distortion more realistic than it would otherwise
// be if all 8 coordinates could move independently.
// One additional factor is used for the color of the pixels that don't exist
// in the source image.
// Name for each of the randomizing factors.
enum FactorNames {
FN_INCOLOR,
FN_Y0,
FN_Y1,
FN_Y2,
FN_Y3,
FN_X0,
FN_X1,
FN_SHEAR,
// x2 = x1 - shear
// x3 = x0 + shear
FN_NUM_FACTORS
};

// Rotation is +/- kRotationRange radians.
const float kRotationRange = 0.02f;
// Number of grey levels to shift by for each exposure step.
Expand Down Expand Up @@ -144,4 +170,141 @@ Pix* DegradeImage(Pix* input, int exposure, TRand* randomizer,
return input;
}

// Creates and returns a Pix distorted by various means according to the bool
// flags. If boxes is not NULL, the boxes are resized/positioned according to
// any spatial distortion and also by the integer reduction factor box_scale
// so they will match what the network will output.
// Returns NULL on error. The returned Pix must be pixDestroyed.
Pix* PrepareDistortedPix(const Pix* pix, bool perspective, bool invert,
bool white_noise, bool smooth_noise, bool blur,
int box_reduction, TRand* randomizer,
GenericVector<TBOX>* boxes) {
Pix* distorted = pixCopy(NULL, const_cast<Pix*>(pix));
// Things to do to synthetic training data.
if (invert && randomizer->SignedRand(1.0) < 0)
pixInvert(distorted, distorted);
if ((white_noise || smooth_noise) && randomizer->SignedRand(1.0) > 0.0) {
// TODO(rays) Cook noise in a more thread-safe manner than rand().
// Attempt to make the sequences reproducible.
srand(randomizer->IntRand());
Pix* pixn = pixAddGaussianNoise(distorted, 8.0);
pixDestroy(&distorted);
if (smooth_noise) {
distorted = pixBlockconv(pixn, 1, 1);
pixDestroy(&pixn);
} else {
distorted = pixn;
}
}
if (blur && randomizer->SignedRand(1.0) > 0.0) {
Pix* blurred = pixBlockconv(distorted, 1, 1);
pixDestroy(&distorted);
distorted = blurred;
}
if (perspective)
GeneratePerspectiveDistortion(0, 0, randomizer, &distorted, boxes);
if (boxes != NULL) {
for (int b = 0; b < boxes->size(); ++b) {
(*boxes)[b].scale(1.0f / box_reduction);
if ((*boxes)[b].width() <= 0)
(*boxes)[b].set_right((*boxes)[b].left() + 1);
}
}
return distorted;
}

// Distorts anything that has a non-null pointer with the same pseudo-random
// perspective distortion. Width and height only need to be set if there
// is no pix. If there is a pix, then they will be taken from there.
void GeneratePerspectiveDistortion(int width, int height, TRand* randomizer,
Pix** pix, GenericVector<TBOX>* boxes) {
if (pix != NULL && *pix != NULL) {
width = pixGetWidth(*pix);
height = pixGetHeight(*pix);
}
float* im_coeffs = NULL;
float* box_coeffs = NULL;
l_int32 incolor =
ProjectiveCoeffs(width, height, randomizer, &im_coeffs, &box_coeffs);
if (pix != NULL && *pix != NULL) {
// Transform the image.
Pix* transformed = pixProjective(*pix, im_coeffs, incolor);
if (transformed == NULL) {
tprintf("Projective transformation failed!!\n");
return;
}
pixDestroy(pix);
*pix = transformed;
}
if (boxes != NULL) {
// Transform the boxes.
for (int b = 0; b < boxes->size(); ++b) {
int x1, y1, x2, y2;
const TBOX& box = (*boxes)[b];
projectiveXformSampledPt(box_coeffs, box.left(), height - box.top(), &x1,
&y1);
projectiveXformSampledPt(box_coeffs, box.right(), height - box.bottom(),
&x2, &y2);
TBOX new_box1(x1, height - y2, x2, height - y1);
projectiveXformSampledPt(box_coeffs, box.left(), height - box.bottom(),
&x1, &y1);
projectiveXformSampledPt(box_coeffs, box.right(), height - box.top(), &x2,
&y2);
TBOX new_box2(x1, height - y1, x2, height - y2);
(*boxes)[b] = new_box1.bounding_union(new_box2);
}
}
free(im_coeffs);
free(box_coeffs);
}

// Computes the coefficients of a randomized projective transformation.
// The image transform requires backward transformation coefficient, and the
// box transform the forward coefficients.
// Returns the incolor arg to pixProjective.
int ProjectiveCoeffs(int width, int height, TRand* randomizer,
float** im_coeffs, float** box_coeffs) {
// Setup "from" points.
Pta* src_pts = ptaCreate(4);
ptaAddPt(src_pts, 0.0f, 0.0f);
ptaAddPt(src_pts, width, 0.0f);
ptaAddPt(src_pts, width, height);
ptaAddPt(src_pts, 0.0f, height);
// Extract factors from pseudo-random sequence.
float factors[FN_NUM_FACTORS];
float shear = 0.0f; // Shear is signed.
for (int i = 0; i < FN_NUM_FACTORS; ++i) {
// Everything is squared to make wild values rarer.
if (i == FN_SHEAR) {
// Shear is signed.
shear = randomizer->SignedRand(0.5 / 3.0);
shear = shear >= 0.0 ? shear * shear : -shear * shear;
// Keep the sheared points within the original rectangle.
if (shear < -factors[FN_X0]) shear = -factors[FN_X0];
if (shear > factors[FN_X1]) shear = factors[FN_X1];
factors[i] = shear;
} else if (i != FN_INCOLOR) {
factors[i] = fabs(randomizer->SignedRand(1.0));
if (i <= FN_Y3)
factors[i] *= 5.0 / 8.0;
else
factors[i] *= 0.5;
factors[i] *= factors[i];
}
}
// Setup "to" points.
Pta* dest_pts = ptaCreate(4);
ptaAddPt(dest_pts, factors[FN_X0] * width, factors[FN_Y0] * height);
ptaAddPt(dest_pts, (1.0f - factors[FN_X1]) * width, factors[FN_Y1] * height);
ptaAddPt(dest_pts, (1.0f - factors[FN_X1] + shear) * width,
(1 - factors[FN_Y2]) * height);
ptaAddPt(dest_pts, (factors[FN_X0] + shear) * width,
(1 - factors[FN_Y3]) * height);
getProjectiveXformCoeffs(dest_pts, src_pts, im_coeffs);
getProjectiveXformCoeffs(src_pts, dest_pts, box_coeffs);
ptaDestroy(&src_pts);
ptaDestroy(&dest_pts);
return factors[FN_INCOLOR] > 0.5f ? L_BRING_IN_WHITE : L_BRING_IN_BLACK;
}

} // namespace tesseract
Loading

0 comments on commit 7099358

Please sign in to comment.