-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
+ class
PaddleOcrRecognizer.cs
to introduce Sdcb.PaddleSharp
whic…
…h is based on the native `PaddleInference` that is faster than requesting the remote `PaddleServing` or `HubServing` server via HTTP + config `ImageOcrPipeline.PaddleOcr.ModelPath` for method `PaddleOcrRecognizer.ctor()` @ appsettings.json + implicit casting `RotatedRect->TextBox` and `Point2f->Coordinate` for method `PaddleOcrRecognitionResult.FromPaddleSharp()` @ PaddleOcrResponse.cs + generate flip and flopped variants for each image to be recognized @ `ImageOcrPipelineWorker.DoWork()` @ crawler
- Loading branch information
Showing
8 changed files
with
115 additions
and
18 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
using OpenCvSharp; | ||
using Sdcb.PaddleInference; | ||
using Sdcb.PaddleOCR; | ||
using Sdcb.PaddleOCR.Models; | ||
using Sdcb.PaddleOCR.Models.Online; | ||
|
||
namespace tbm.Crawler.ImagePipeline.Ocr; | ||
|
||
public class PaddleOcrRecognizer | ||
{ | ||
private Dictionary<string, PaddleOcrAll> _modelsKeyByScript = new(); | ||
|
||
public PaddleOcrRecognizer(IConfiguration config) => Settings.GlobalModelDirectory = | ||
config.GetSection("ImageOcrPipeline").GetSection("PaddleOcr") | ||
.GetValue("ModelPath", "./PaddleOcrModels") ?? "./PaddleOcrModels"; | ||
|
||
public void Dispose() => _modelsKeyByScript.ForEach(pair => pair.Value.Dispose()); | ||
|
||
public async Task InitializeModels(CancellationToken stoppingToken) | ||
{ | ||
PaddleOcrAll Create(FullOcrModel model) => | ||
new(model, PaddleDevice.Mkldnn()) | ||
{ | ||
AllowRotateDetection = true, | ||
Enable180Classification = true | ||
}; | ||
_modelsKeyByScript = new() | ||
{ | ||
{"zh-Hans", Create(await OnlineFullModels.ChineseV3.DownloadAsync(stoppingToken))}, | ||
{"zh-Hant", Create(await OnlineFullModels.TranditionalChinseV3.DownloadAsync(stoppingToken))}, | ||
{ | ||
"ja", Create(await new OnlineFullModels( | ||
OnlineDetectionModel.MultiLanguageV3, | ||
OnlineClassificationModel.ChineseMobileV2, | ||
LocalDictOnlineRecognizationModel.JapanV3 | ||
).DownloadAsync(stoppingToken)) | ||
}, | ||
{"en", Create(await OnlineFullModels.EnglishV3.DownloadAsync(stoppingToken))} | ||
}; | ||
} | ||
|
||
public IEnumerable<PaddleOcrRecognitionResult> RecognizeImageMatrices(Dictionary<string, Mat> matricesKeyByImageId) => | ||
matricesKeyByImageId.SelectMany(matrix => _modelsKeyByScript.SelectMany(model => | ||
PaddleOcrRecognitionResult.FromPaddleSharp(matrix.Key, model.Key, model.Value.Run(matrix.Value)))); | ||
|
||
public IEnumerable<PaddleOcrRequester.DetectionResult> DetectImageMatrices(Dictionary<string, Mat> matricesKeyByImageId) => | ||
matricesKeyByImageId.SelectMany(matrix => _modelsKeyByScript.SelectMany(model => | ||
model.Value.Detector.Run(matrix.Value).Select(rect => new PaddleOcrRequester.DetectionResult(matrix.Key, rect)))); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters