From a29fc3536304b97744a44fa9c8fb956fbf95359c Mon Sep 17 00:00:00 2001 From: Olly Date: Wed, 1 Jan 2025 23:26:29 +0000 Subject: [PATCH] PixelOCR --- .../script/imports/simba.import_pixelocr.pas | 119 ++++ Source/script/simba.script_imports.pas | 4 +- Source/simba.image.pas | 19 +- Source/simba.pixelocr.pas | 556 ++++++++++++++++++ Tests/pixelocr.simba | 166 ++++++ 5 files changed, 860 insertions(+), 4 deletions(-) create mode 100644 Source/script/imports/simba.import_pixelocr.pas create mode 100644 Source/simba.pixelocr.pas create mode 100644 Tests/pixelocr.simba diff --git a/Source/script/imports/simba.import_pixelocr.pas b/Source/script/imports/simba.import_pixelocr.pas new file mode 100644 index 000000000..a8bd36b48 --- /dev/null +++ b/Source/script/imports/simba.import_pixelocr.pas @@ -0,0 +1,119 @@ +unit simba.import_pixelocr; + +{$i simba.inc} + +interface + +uses + Classes, SysUtils, + simba.base, simba.script; + +procedure ImportPixelOCR(Script: TSimbaScript); + +implementation + +uses + lptypes, lpvartypes, + simba.pixelocr, + simba.image; + +type + PPixelFont = ^TPixelFont; + PPixelOCR = ^TPixelOCR; + +procedure _LapePixelOCR_LoadFont(const Params: PParamArray; const Result: Pointer); LAPE_WRAPPER_CALLING_CONV +begin + PPixelFont(Result)^ := PPixelOCR(Params^[0])^.LoadFont(PString(Params^[1])^, PInteger(Params^[2])^); +end; + +procedure _LapePixelOCR_TextToTPA(const Params: PParamArray; const Result: Pointer); LAPE_WRAPPER_CALLING_CONV +begin + PPointArray(Result)^ := PPixelOCR(Params^[0])^.TextToTPA(PPixelFont(Params^[1])^, PString(Params^[2])^); +end; + +procedure _LapePixelOCR_Locate(const Params: PParamArray; const Result: Pointer); LAPE_WRAPPER_CALLING_CONV +begin + PSingle(Result)^ := PPixelOCR(Params^[0])^.Locate(PSimbaImage(Params^[1])^, PPixelFont(Params^[2])^, PString(Params^[3])^); +end; + +procedure _LapePixelOCR_Recognize1(const Params: PParamArray; const Result: Pointer); LAPE_WRAPPER_CALLING_CONV +begin + PString(Result)^ := PPixelOCR(Params^[0])^.Recognize(PSimbaImage(Params^[1])^, PPixelFont(Params^[2])^, PPoint(Params^[3])^); +end; + +procedure _LapePixelOCR_Recognize2(const Params: PParamArray; const Result: Pointer); LAPE_WRAPPER_CALLING_CONV +begin + PString(Result)^ := PPixelOCR(Params^[0])^.Recognize(PSimbaImage(Params^[1])^, PPixelFont(Params^[2])^, PBox(Params^[3])^); +end; + +procedure _LapePixelOCR_RecognizeLines(const Params: PParamArray; const Result: Pointer); LAPE_WRAPPER_CALLING_CONV +begin + PStringArray(Result)^ := PPixelOCR(Params^[0])^.RecognizeLines(PSimbaImage(Params^[1])^, PPixelFont(Params^[2])^, PBox(Params^[3])^); +end; + +procedure ImportPixelOCR(Script: TSimbaScript); +begin + with Script.Compiler do + begin + addGlobalType([ + 'record', + ' Glyphs: array of record', + ' Value: Char;', + ' Width: Integer;', + ' Height: Integer;', + '', + ' points: TPointArray;', + ' shadow: TPointArray;', + ' background: TPointArray;', + ' backgrounddiv2: Integer;', + '', + ' InnerBounds: TBox;', + ' end;', + '', + ' SpaceWidth: Integer;', + ' MaxGlyphHeight: Integer;', + ' MaxGlyphWidth: Integer;', + 'end;'], + 'TPixelFont' + ); + + addGlobalType([ + 'record', + ' Text: String;', + ' Hits: Integer;', + ' Bounds: TBox;', + 'end;'], + 'TPixelOCRMatch' + ); + + addGlobalType([ + 'record', + ' Tolerance: Single;', + ' ShadowTolerance: Single;', + ' Whitelist: set of Char;', + ' MaxWalk: Integer;', + ' MaxLen: Integer;', + ' Matches: array of TPixelOCRMatch;', + 'end'], + 'TPixelOCR' + ); + + if (getGlobalType('TPixelFont').Size <> SizeOf(TPixelFont)) then + SimbaException('TPixelFont import is wrong'); + if (getGlobalType('TPixelOCRMatch').Size <> SizeOf(TPixelOCRMatch)) then + SimbaException('TPixelOCRMatch import is wrong'); + if (getGlobalType('TPixelOCR').Size <> SizeOf(TPixelOCR)) then + SimbaException('TPixelOCR import is wrong'); + + addGlobalFunc('function TPixelOCR.LoadFont(dir: String; SpaceWidth: Integer): TPixelFont;', @_LapePixelOCR_LoadFont); + addGlobalFunc('function TPixelOCR.TextToTPA(font: TPixelFont; Text: String): TPointArray;', @_LapePixelOCR_TextToTPA); + addGlobalFunc('function TPixelOCR.Locate(img: TImage; font: TPixelFont; text: String): Single;', @_LapePixelOCR_Locate); + + addGlobalFunc('function TPixelOCR.Recognize(img: TImage; font: TPixelFont; p: TPoint): String; overload;', @_LapePixelOCR_Recognize1); + addGlobalFunc('function TPixelOCR.Recognize(img: TImage; font: TPixelFont; bounds: TBox): String; overload;', @_LapePixelOCR_Recognize2); + addGlobalFunc('function TPixelOCR.RecognizeLines(img: TImage; font: TPixelFont; bounds: TBox): TStringArray;', @_LapePixelOCR_RecognizeLines); + end; +end; + +end. + diff --git a/Source/script/simba.script_imports.pas b/Source/script/simba.script_imports.pas index ea860cf24..6eaa9c0b1 100644 --- a/Source/script/simba.script_imports.pas +++ b/Source/script/simba.script_imports.pas @@ -35,11 +35,10 @@ implementation simba.import_encoding, simba.import_file, simba.import_process, simba.import_target, simba.import_math, simba.import_misc, simba.import_slacktree, simba.import_kdtree, simba.import_string, simba.import_random, simba.import_debugimage, simba.import_web, simba.import_threading, - simba.import_async, + simba.import_async, simba.import_pixelocr, // Simba shapes simba.import_quad, simba.import_triangle, simba.import_box, simba.import_point, simba.import_circle, - // Simba classes simba.import_image, simba.import_externalcanvas, simba.import_dtm, simba.import_matchtemplate, @@ -89,6 +88,7 @@ procedure AddSimbaImports(Script: TSimbaScript); ImportExternalCanvas(Script); ImportMatchTemplate(Script); ImportJSON(Script); + ImportPixelOCR(Script); ImportDateTime(Script); ImportEncoding(Script); diff --git a/Source/simba.image.pas b/Source/simba.image.pas index 687fb7ce2..58b06606d 100644 --- a/Source/simba.image.pas +++ b/Source/simba.image.pas @@ -134,6 +134,8 @@ TSimbaImage = class(TSimbaBaseClass) procedure Pad(Amount: Integer); procedure Offset(X, Y: Integer); + function isBinary: Boolean; + function GetPixels(Points: TPointArray): TColorArray; procedure SetPixels(Points: TPointArray; Color: TColor); overload; procedure SetPixels(Points: TPointArray; Colors: TColorArray); overload; @@ -258,8 +260,8 @@ TSimbaImage = class(TSimbaBaseClass) procedure FromLazBitmap(LazBitmap: TBitmap); // Basic finders, use Target.SetTarget(img) for all - function FindColor(Color: TColor; Tolerance: Single): TPointArray; - function FindImage(Image: TSimbaImage; Tolerance: Single): TPoint; + function FindColor(Color: TColor; Tolerance: Single = 0): TPointArray; + function FindImage(Image: TSimbaImage; Tolerance: Single = 0): TPoint; end; PSimbaImage = ^TSimbaImage; @@ -1749,6 +1751,19 @@ function TSimbaImage.Blur(Algo: EImageBlurAlgo; Radius: Integer): TSimbaImage; end; end; +function TSimbaImage.isBinary: Boolean; +var + Ptr: PColorBGRA; + Upper: PtrUInt; +begin + Ptr := FData; + Upper := PtrUInt(FData) + FDataSize; + while (PtrUInt(Ptr) < Upper) and ((Ptr^.R = 0) and (Ptr^.G = 0) and (Ptr^.B = 0)) or ((Ptr^.R = 255) and (Ptr^.G = 255) and (Ptr^.B = 255)) do + Inc(Ptr); + + Result := PtrUInt(Ptr) = Upper; +end; + function TSimbaImage.GetPixels(Points: TPointArray): TColorArray; var I: Integer; diff --git a/Source/simba.pixelocr.pas b/Source/simba.pixelocr.pas new file mode 100644 index 000000000..79a7c0251 --- /dev/null +++ b/Source/simba.pixelocr.pas @@ -0,0 +1,556 @@ +{ + Author: Raymond van Venetiƫ and Merlijn Wajer + Project: Simba (https://github.com/MerlijnWajer/Simba) + License: GNU General Public License (https://www.gnu.org/licenses/gpl-3.0) +} +unit simba.pixelocr; + +{$i simba.inc} + +interface + +uses + Classes, SysUtils, + simba.base, + simba.image; + +type + TPixelFontGlyph = record + Value: Char; + Width: Integer; + Height: Integer; + + Points: TPointArray; + Shadow: TPointArray; + Background: TPointArray; + BackgroundDiv2: Integer; + + InnerBounds: TBox; + end; + PPixelFontGlyph = ^TPixelFontGlyph; + + TPixelFont = record + Glyphs: array of TPixelFontGlyph; + SpaceWidth: Integer; + MaxGlyphWidth: Integer; + MaxGlyphHeight: Integer; + end; + PPixelFont = ^TPixelFont; + + TPixelOCRMatch = record + Text: String; + Hits: Integer; + Bounds: TBox; + end; + + TPixelOCR = record + Tolerance: Single; + ShadowTolerance: Single; + + Whitelist: set of Char; + MaxWalk: Integer; + MaxLen: Integer; + Matches: array of TPixelOCRMatch; + private + procedure _TextToTPA(font: TPixelFont; Text: String; out Points, Background: TPointArray); + + function _RecognizeX(const img: TSimbaImage; const font: PPixelFont; x,y: Integer; isBinary: Boolean): TPixelOCRMatch; + function _RecognizeXY(const img: TSimbaImage; const font: PPixelFont; x,y, height: Integer; isBinary: Boolean): TPixelOCRMatch; + public + function LoadFont(dir: String; SpaceWidth: Integer): TPixelFont; + function TextToTPA(font: TPixelFont; Text: String): TPointArray; + function Locate(img: TSimbaImage; font: TPixelFont; text: String): Single; + + function Recognize(img: TSimbaImage; font: TPixelFont; p: TPoint): String; overload; + function Recognize(img: TSimbaImage; font: TPixelFont; bounds: TBox): String; overload; + function RecognizeLines(img: TSimbaImage; font: TPixelFont; bounds: TBox): TStringArray; + end; + +const + ALPHA_NUM_SYMBOLS = ['a'..'z', 'A'..'Z', '0'..'9', '%', '&', '#', '$', '[', ']', '{', '}', '@', '!', '?']; + +implementation + +uses + simba.containers, + simba.vartype_string, + simba.vartype_box, + simba.vartype_pointarray, + simba.fs, + simba.nativeinterface; + +type + TSimilarColorCache = record + Color1, Color2: TColorBGRA; + IsSimilar: Boolean; + end; + +function IsSimilar(const Image: TSimbaImage; const X, Y: Integer; const Color2: TColorBGRA; const Tol: Single; var Cache: TSimilarColorCache): Boolean; //inline; +const + MAX_DISTANCE_RGB = Single(441.672955930064); // Sqrt(Sqr(255) + Sqr(255) + Sqr(255)) +begin + with Image.Data[Y * Image.Width + X] do + Result := (Sqrt(Sqr(R - Color2.R) + Sqr(G - Color2.G) + Sqr(B - Color2.B)) / MAX_DISTANCE_RGB * 100) <= Tol; + + //if (Color1.EqualsIgnoreAlpha(Cache.Color1)) and (Color2.EqualsIgnoreAlpha(Cache.Color2)) then + // Result := Cache.IsSimilar + //else + //begin + // Result := (Sqrt(Sqr(Color1.R-Color2.R) + Sqr(Color1.G-Color2.G) + Sqr(Color1.B-Color2.B)) / MAX_DISTANCE_RGB * 100) <= Tol; + // + // Cache.Color1 := Color1; + // Cache.Color2 := Color2; + // Cache.IsSimilar := Result; + //end; +end; + +function IsShadow(const Image: TSimbaImage; const X, Y: Integer; const tol: Single): Boolean; inline; +begin + with Image.Data[Y * Image.Width + X] do + Result := (R <= tol) and (G <= tol) and (B <= tol + 5); // allow a little more in the blue channel only +end; + +function getGlyph(const Font: PPixelFont; const c: Char): PPixelFontGlyph; inline; +var + i: Integer; +begin + for i := 0 to High(Font^.Glyphs) do + if (Font^.Glyphs[i].Value = c) then + Exit(@Font^.Glyphs[i]); + + SimbaException('Character %s does exist in the font', [c]); + Result := nil; +end; + +function ContainsAlphaNumSym(const Text: string): Boolean; inline; +var + I: Integer; +begin + for I := 1 to Length(Text) do + if Text[I] in ALPHA_NUM_SYMBOLS then + Exit(True); + + Result := False; +end; + +procedure TPixelOCR._TextToTPA(font: TPixelFont; Text: String; out Points, Background: TPointArray); +var + I, X: Integer; + Glyph: PPixelFontGlyph; + PointBuf, BackgroundBuf: TSimbaPointBuffer; +begin + X := 0; + for I := 1 to Length(Text) do + begin + Glyph := getGlyph(@Font, Text[I]); + + PointBuf.Add(Glyph^.points.Offset(X,0)); + BackgroundBuf.Add(Glyph^.background.Offset(X,0)); + + X += Glyph^.Width; + end; + + Points := PointBuf.ToArray(False); + Background := BackgroundBuf.ToArray(False); +end; + +function TPixelOCR._RecognizeX(const img: TSimbaImage; const font: PPixelFont; x,y: Integer; isBinary: Boolean): TPixelOCRMatch; +var + ColorCache: TSimilarColorCache; + + function CompareUsingBackground(const Glyph: PPixelFontGlyph; const X, Y: Integer): Integer; inline; + var + FirstPixel: TColorBGRA; + I, Misses: Integer; + begin + case isBinary of + True: + begin + for I := 0 to High(Glyph^.points) do + with Img.Data[(Glyph^.points[I].Y + Y) * Img.Width + (Glyph^.points[I].X + X)] do + if (B <> 255) or (G <> 255) or (R <> 255) then + Exit(-1); + + Misses := 0; + for I := 0 to High(Glyph^.background) do + with Img.Data[(Glyph^.background[I].Y + Y) * Img.Width + (Glyph^.background[I].X + X)] do + if (B <> 0) or (G <> 0) or (R <> 0) then + begin + Inc(Misses); + if (Misses > Glyph^.backgrounddiv2) then + Exit(-1); + end; + end; + + False: + begin + FirstPixel := Img.Data[(Glyph^.points[0].Y + Y) * Img.Width + (Glyph^.points[0].X + X)]; + for I := 1 to High(Glyph^.points) do + if not IsSimilar(img, Glyph^.points[I].X + X, Glyph^.points[I].Y + Y, FirstPixel, Tolerance, ColorCache) then + Exit(-1); + + Misses := 0; + for I := 0 to High(Glyph^.background) do + if IsSimilar(img, Glyph^.background[i].X + X, Glyph^.background[i].Y + Y, FirstPixel, Tolerance, ColorCache) then + begin + Inc(Misses); + if (Misses > Glyph^.backgrounddiv2) then + Exit(-1); + end; + end; + end; + + Result := Length(Glyph^.points) + (Length(Glyph^.background) - Misses); + end; + + function CompareUsingShadow(const Glyph: PPixelFontGlyph; const X, Y: Integer): Integer; inline; + var + FirstPixel: TColorBGRA; + I: Integer; + begin + case isBinary of + True: + begin + for I := 0 to High(Glyph^.points) do + with Img.Data[(Glyph^.points[I].Y + Y) * Img.Width + (Glyph^.points[I].X + X)] do + if (B <> 255) or (G <> 255) or (R <> 255) then + Exit(-1); + + for I := 0 to High(Glyph^.Shadow) do + with Img.Data[(Glyph^.Shadow[I].Y + Y) * Img.Width + (Glyph^.Shadow[I].X + X)] do + if (B <> 0) or (G <> 0) or (R <> 0) then + Exit(-1); + end; + + False: + begin + // if the first pixel is a dark'ish color its a non starter + if IsShadow(Img, Glyph^.points[0].X + X, Glyph^.points[0].Y + Y, Self.shadowTolerance * 2) then + Exit(-1); + + // check all shadows are shadows + for i:=0 to High(Glyph^.shadow) do + if not IsShadow(Img, Glyph^.shadow[i].X + X, Glyph^.shadow[i].Y + Y, self.shadowTolerance) then + Exit(-1); + + // Always use first pixel to compare against + FirstPixel := Img.Data[(Glyph^.points[0].Y + Y) * Img.Width + (Glyph^.points[0].X + X)]; + // check all other points match the first pixel + for i:=1 to High(Glyph^.points) do + if not IsSimilar(Img, Glyph^.Points[i].X + X, Glyph^.Points[i].Y + Y, FirstPixel, Tolerance, ColorCache) then + Exit(-1); + end; + end; + + Result := Length(Glyph^.points) + Length(Glyph^.shadow); + end; + +var + Space: Integer; + Hits, BestHits: Integer; + Lo, Hi: PPixelFontGlyph; + Glyph, BestGlyph: PPixelFontGlyph; +begin + Result := Default(TPixelOCRMatch); + Result.Bounds.X1 := $FFFFFF; + Result.Bounds.Y1 := $FFFFFF; + Space := 0; + + if (x < 0) then x := 0; + if (y < 0) then y := 0; + + Lo := @Font^.Glyphs[0]; + Hi := @Font^.Glyphs[High(Font^.Glyphs)]; + + while (x < img.Width) and ((MaxWalk = 0) or (Space < MaxWalk)) do + begin + BestHits := 0; + + Glyph := Lo; + while (PtrUInt(Glyph) <= PtrUInt(Hi)) do + begin + if (Glyph^.points <> nil) and (Glyph^.Width + X < Img.Width) and (Glyph^.Height + Y < Img.Height) then + begin + if (Glyph^.shadow <> nil) then + Hits := CompareUsingShadow(Glyph, x, y) + else + Hits := CompareUsingBackground(Glyph, x, y); + + if (Hits > BestHits) then + begin + BestGlyph := Glyph; + BestHits := Hits; + end; + end; + + Inc(Glyph); + end; + + if (BestHits > 0) then + begin + if ((WhiteList = []) or (BestGlyph^.Value in Self.Whitelist)) then + begin + if (Result.Text <> '') and (Space >= Font^.SpaceWidth) then + Result.Text += ' '; + + with Result.Bounds do + begin + X1 := Min(X1, X); + Y1 := Min(Y1, Y + BestGlyph^.InnerBounds.Y1); + X2 := Max(X2, X + BestGlyph^.Width); + Y2 := Max(Y2, Y + BestGlyph^.InnerBounds.Y2); + end; + + Result.Text += BestGlyph^.Value; + Result.Hits += BestHits; + + if (MaxLen >= Length(Result.Text)) then + Exit; + end; + + Space := 0; + x += BestGlyph^.Width; + end else + begin + Space += 1; + x += 1; + end; + end; +end; + +function TPixelOCR._RecognizeXY(const img: TSimbaImage; const font: PPixelFont; x,y, height: Integer; isBinary: Boolean): TPixelOCRMatch; +var + stop: Integer; + Match: TPixelOCRMatch; +begin + Result := Default(TPixelOCRMatch); + + stop := y+height; + while (y < stop) do + begin + Match := Self._RecognizeX(img, font, x, y, isBinary); + if (Match.Hits > Result.Hits) then + Result := Match; + + y += 1; + end; +end; + +function TPixelOCR.LoadFont(dir: String; SpaceWidth: Integer): TPixelFont; +var + img: TSimbaImage; + files: TStringArray; + i: Integer; + Character: String; + Glyph: TPixelFontGlyph; + B: TBox; +begin + Result := Default(TPixelFont); + Result.SpaceWidth := SpaceWidth; + + Files := TSimbaDir.DirList(dir); + if (Length(Files) = 0) then + Exit; + + SetLength(Result.Glyphs, Length(Files)); + + img := TSimbaImage.Create(); + for i := 0 to High(Files) do + begin + if not TSimbaPath.PathHasExt(Files[I], ['.bmp', '.png']) then + Continue; + + Character := TSimbaPath.PathExtractNameWithoutExt(Files[I]); + if Character.IsNumeric and (Character.ToInteger in [32..126]) then + begin + img.Load(Files[I]); + + Glyph := Default(TPixelFontGlyph); + Glyph.Value := Char(Character.ToInteger); + if (Glyph.Value = #32) then + begin + Glyph.Width := Img.Width; + Glyph.Height := Img.Height; + end else + begin + Glyph.Points := Img.FindColor($FFFFFF); + Glyph.Shadow := Img.FindColor($0000FF); + Glyph.InnerBounds := TPointArray(Glyph.Points + Glyph.Shadow).Bounds; + + B := Glyph.Points.Bounds; + if (B.X1 > 0) then + begin + Glyph.Points := Glyph.points.Offset(-B.X1, 0); + Glyph.shadow := Glyph.shadow.Offset(-B.X1, 0); + end; + + B := TPointArray(Glyph.Points + Glyph.Shadow).Bounds; + + Glyph.Background := TPointArray(Glyph.Points + Glyph.Shadow).Invert(B.Expand(1)); + Glyph.backgrounddiv2 := Length(Glyph.background) div 2; + Glyph.Width := B.Width; + Glyph.Height := B.Height; + + Result.MaxGlyphWidth := Max(Result.MaxGlyphWidth, Glyph.Width); + Result.MaxGlyphHeight := Max(Result.MaxGlyphHeight, Glyph.Height); + end; + + Result.Glyphs += [Glyph]; + end; + end; + img.Free(); +end; + +function TPixelOCR.TextToTPA(font: TPixelFont; Text: String): TPointArray; +var + _: TPointArray; +begin + _TextToTPA(Font, Text, Result, _); +end; + +function TPixelOCR.Locate(img: TSimbaImage; font: TPixelFont; text: String): Single; +var + X, Y: Integer; + SearchWidth, SearchHeight: Integer; + Bad, I: Integer; + Match: Single; + BestMatch: TPixelOCRMatch; + First: TColorBGRA; + points,background: TPointArray; + cache: TSimilarColorCache; +label + NotFound, Finished; +begin + Result := 0; + + _TextToTPA(font, text, points, background); + if Length(background) = 0 then + Exit; + + with background.Bounds do + begin + background := background.Offset(-X1, -Y1); + points := points.Offset(-X1, -Y1); + SearchWidth := (img.Width - Width) - 1; + SearchHeight := (img.Height - Height) - 1; + end; + + cache := Default(TSimilarColorCache); + cache.IsSimilar := True; + + for Y := 0 to SearchHeight do + for X := 0 to SearchWidth do + begin + First := Img.Data[(Y + points[0].Y) * Img.Width + (X + points[0].X)]; + for i := 1 to High(points) do + if (not IsSimilar(Img, X + points[i].x, Y + points[i].y, First, tolerance, cache)) then + goto NotFound; + + Bad := 0; + for i := 0 to High(background) do + if IsSimilar(Img, X + background[i].x, Y + background[i].y, First, tolerance, cache) then + Inc(Bad); + + Match := 1 - (Bad / Length(Background)); + if (Match > Result) then + begin + Result := Match; + + BestMatch.Bounds.X1 := X; + BestMatch.Bounds.Y1 := Y; + + if (Result = 1) then + goto Finished; + end; + + NotFound: + end; + Finished: + + BestMatch.Hits := Round(Result * 100); + BestMatch.Text := Text; + BestMatch.Bounds.X2 := BestMatch.Bounds.X1 + points.Bounds.Width; + BestMatch.Bounds.Y2 := BestMatch.Bounds.Y1 + points.Bounds.Height; + + Matches := [BestMatch]; +end; + +function TPixelOCR.Recognize(img: TSimbaImage; font: TPixelFont; p: TPoint): String; overload; +begin + if (Length(Font.Glyphs) = 0) then + SimbaException('Font is empty'); + + Matches := [Self._RecognizeX(img, @font, p.x, p.y, img.isBinary)]; + Result := Matches[0].Text; +end; + +function TPixelOCR.Recognize(img: TSimbaImage; font: TPixelFont; bounds: TBox): String; overload; +begin + if (Length(Font.Glyphs) = 0) then + SimbaException('Font is empty'); + + Matches := [Self._RecognizeXY(img, @font, bounds.X1, bounds.Y1, bounds.Height, img.isBinary)]; + Result := Matches[0].Text; +end; + +function TPixelOCR.RecognizeLines(img: TSimbaImage; font: TPixelFont; bounds: TBox): TStringArray; +var + isBinary: Boolean; + + function MaybeRecognize(X, Y: Integer; out Match: TPixelOCRMatch): Boolean; + var + Temp: TPixelOCR; + begin + Result := False; + + // use a copy here since we change these properties + Temp := Self; + Temp.Whitelist := ALPHA_NUM_SYMBOLS; + Temp.MaxLen := 1; + Temp.MaxWalk := 0; + + // Find something on a row that isn't a small character + Match := Temp._RecognizeX(Img, @Font, X, Y, isBinary); + if (Match.Hits > 0) then + begin + // OCR the row and some extra rows + Temp.Whitelist := Self.Whitelist; + Temp.MaxWalk := 0; + Temp.MaxLen := 0; + + Match := Temp._RecognizeXY(img, @font, X, Y, Font.MaxGlyphHeight div 2, isBinary); + // Ensure that actual text was extracted, not just a symbol mess of short or small character symbols. + if ContainsAlphaNumSym(Match.Text) then + Result := True; + end; + end; + +var + Match: TPixelOCRMatch; +begin + if (Length(Font.Glyphs) = 0) then + SimbaException('Font is empty'); + + Result := []; + Matches := []; + + isBinary := img.isBinary; + + while (Bounds.Y1 < Bounds.Y2) do + begin + if MaybeRecognize(Bounds.X1, Bounds.Y1, Match) then + begin + Result += [Match.Text]; + Matches += [Match]; + + // Now we can confidently skip this search line by a jump, but we dont skip fully in case of close/overlapping text + // So we divide the texts max glyph height by 2 and subtract that from the lower end of the found bounds. + Bounds.Y1 := Max(Bounds.Y1, Match.Bounds.Y2 - (Font.MaxGlyphHeight div 2)); + end; + + Bounds.Y1 += 1; + end; +end; + +end. + diff --git a/Tests/pixelocr.simba b/Tests/pixelocr.simba new file mode 100644 index 000000000..42f727f47 --- /dev/null +++ b/Tests/pixelocr.simba @@ -0,0 +1,166 @@ +{$assertions on} + +const + TEST_FONTNAME = 'Arial'; + TEST_FONTSIZE = 25; + +procedure GenerateFont(dir: String; FontName: String; FontSize: Integer; shadow: Boolean); + + procedure addShadow(img: TImage); + var + p: TPoint; + begin + for p in img.FindColor($FFFFFF).Offset([1,1]) do + if img.Pixel[p.x, p.y] = 0 then + img.Pixel[p.x, p.y] := $FF; + end; + +var + img: TImage; + s: String; + i: integer; + size: TPoint; +begin + DirCreate(dir); + + img := TImage.Create(50,50); + + img.FontName := FontName; + img.FontSize := FontSize; + img.DrawColor := $FFFFFF; + + for i := 32 to 126 do + begin + size := img.TextSize(Char(i)); + if shadow then + size.x += 1; + img.SetSize(size.X, size.y); + img.Clear(); + img.DrawText(Char(i), [0,0]); + img.Save(dir + ToStr(i) + '.bmp', True); + end; + + img.Free(); +end; + +procedure Test_Static; + + function GenerateImage: TImage; + var + tpa: TPointArray; + atpa: T2DPointArray; + i,x,y: Integer; + begin + Result := TImage.Create(250,50); + Result.FontName := TEST_FONTNAME; + Result.FontSize := TEST_FONTSIZE; + Result.DrawColor := $FFFFFF; + Result.DrawText('hello world xyz', [5,5]); + + tpa := Result.FindColor($FFFFFF); + atpa := tpa.Cluster(1, 10).SortByX(True); + tpa := []; + for i := 0 to High(atpa) do + begin + tpa += atpa[i]; + atpa := atpa.Offset([0,0]); + end; + + for x := 0 to Result.Width-1 do + for y := 0 to Result.Height-1 do + Result.Pixel[x,y] := x; + + Result.DrawColor := $FFFFFF; + for i := 0 to High(tpa) do + begin + Result.DrawAlpha := 255-Random(25); + Result.DrawTPA([tpa[i]]); + end; + end; + +var + ocr: TPixelOCR; + font: TPixelFont; + img: TImage; +begin + img := GenerateImage(); + font := ocr.LoadFont(SimbaEnv.TempPath + 'pixelfont/', 5); + ocr.Tolerance := 10; + Assert(ocr.Recognize(img, font, [5,5]) = 'hello world xyz'); + img.Free(); +end; + +procedure Test_StaticShadow; + + function GenerateImage: TImage; + var + tpa: TPointArray; + atpa: T2DPointArray; + x,y: Integer; + p: TPoint; + begin + Result := TImage.Create(250,50); + Result.FontName := TEST_FONTNAME; + Result.FontSize := TEST_FONTSIZE; + Result.DrawColor := $FFFFFF; + Result.DrawText('hello world', [5,5]); + for p in Result.FindColor($FFFFFF).Offset([1, 1]) do + if Result.Pixel[p.x, p.y] = 0 then + Result.Pixel[p.x, p.y] := 1 + TColorRGB([Random(10),Random(10),Random(10)]).ToColor; + + for x := 0 to Result.Width-1 do + for y := 0 to Result.Height-1 do + if (Result.Pixel[x,y] = 0) then + Result.Pixel[x,y] := x; + end; + +var + ocr: TPixelOCR; + font: TPixelFont; + img: TImage; +begin + img := GenerateImage(); + font := ocr.LoadFont(SimbaEnv.TempPath + 'pixelfontshadow/', 8); + ocr.Tolerance := 30; + ocr.ShadowTolerance := 30; + Assert(ocr.Recognize(img, font, [5,5]) = 'hello world'); + img.Free(); +end; + +procedure Test_MultiLine; + + function GenerateImage: TImage; + var + x,y: Integer; + begin + Result := TImage.Create(250,250); + Result.FontName := TEST_FONTNAME; + Result.FontSize := TEST_FONTSIZE; + Result.DrawColor := $FFFFFF; + Result.DrawText('hello world', [5,5]); + Result.DrawText('abc xyz', [5,30]); + Result.DrawText('a', [5,55]); + Result.DrawText('zz', [80,75]); + end; + +var + ocr: TPixelOCR; + font: TPixelFont; + img: TImage; +begin + img := GenerateImage(); + font := ocr.LoadFont(SimbaEnv.TempPath + 'pixelfont/', 8); + Assert(ocr.RecognizeLines(img, font, [0,0,img.Width-1,img.Height-1]).Equals(['hello world', 'abc xyz', 'a', 'zz'])); + img.Free(); +end; + +begin + RandSeed := 123; + + GenerateFont(SimbaEnv.TempPath + 'pixelfont/', TEST_FONTNAME, TEST_FONTSIZE, False); + GenerateFont(SimbaEnv.TempPath + 'pixelfontshadow/', TEST_FONTNAME, TEST_FONTSIZE, True); + + Test_Static(); + Test_StaticShadow(); + Test_MultiLine(); +end.