@@ -106,7 +106,7 @@ import os
106
106
import asyncio
107
107
import sys
108
108
109
- from anyparser_core import Anyparser, AnyparserOption, OcrLanguage, OCRPreset
109
+ from anyparser_core import Anyparser, AnyparserOption, OcrLanguage, OcrPreset
110
110
111
111
single_file = " docs/document.png"
112
112
@@ -116,7 +116,7 @@ options = AnyparserOption(
116
116
model = " ocr" ,
117
117
format = " markdown" ,
118
118
ocr_language = [OcrLanguage.JAPANESE ],
119
- ocr_preset = OCRPreset .SCAN ,
119
+ ocr_preset = OcrPreset .SCAN ,
120
120
)
121
121
122
122
parser = Anyparser(options)
@@ -226,7 +226,7 @@ The `Anyparser` class utilizes the `AnyparserOption` dataclass for flexible conf
226
226
from dataclasses import dataclass
227
227
from typing import List, Literal, Optional, Union
228
228
229
- from anyparser_core import OcrLanguage, OCRPreset
229
+ from anyparser_core import OcrLanguage, OcrPreset
230
230
231
231
@dataclass
232
232
class AnyparserOption :
@@ -255,7 +255,7 @@ class AnyparserOption:
255
255
256
256
# OCR Configuration
257
257
ocr_language: Optional[List[OcrLanguage]] = None # Languages for OCR processing
258
- ocr_preset: Optional[OCRPreset ] = None # Preset configuration for OCR
258
+ ocr_preset: Optional[OcrPreset ] = None # Preset configuration for OCR
259
259
260
260
# Crawler Configuration
261
261
max_depth: Optional[int ] = None # Maximum crawl depth
@@ -278,7 +278,7 @@ class AnyparserOption:
278
278
| ` files ` | ` Optional[Union[str, List[str]]] ` | ` None ` | Input files to process |
279
279
| ` url ` | ` Optional[str] ` | ` None ` | URL for crawler model |
280
280
| ` ocr_language ` | ` Optional[List[OcrLanguage]] ` | ` None ` | Languages for OCR processing |
281
- | ` ocr_preset ` | ` Optional[OCRPreset ] ` | ` None ` | Preset configuration for OCR |
281
+ | ` ocr_preset ` | ` Optional[OcrPreset ] ` | ` None ` | Preset configuration for OCR |
282
282
| ` max_depth ` | ` Optional[int] ` | ` None ` | Maximum crawl depth for crawler model |
283
283
| ` max_executions ` | ` Optional[int] ` | ` None ` | Maximum number of pages to crawl |
284
284
| ` strategy ` | ` Optional[str] ` | ` None ` | Crawling strategy: ` "LIFO" ` or ` "FIFO" ` |
@@ -288,19 +288,19 @@ class AnyparserOption:
288
288
289
289
The following OCR presets are available for optimized document processing:
290
290
291
- - ` OCRPreset .DOCUMENT` - General document processing
292
- - ` OCRPreset .HANDWRITING` - Handwritten text recognition
293
- - ` OCRPreset .SCAN` - Scanned document processing
294
- - ` OCRPreset .RECEIPT` - Receipt processing
295
- - ` OCRPreset .MAGAZINE` - Magazine/article processing
296
- - ` OCRPreset .INVOICE` - Invoice processing
297
- - ` OCRPreset .BUSINESS_CARD` - Business card processing
298
- - ` OCRPreset .PASSPORT` - Passport document processing
299
- - ` OCRPreset .DRIVER_LICENSE` - Driver's license processing
300
- - ` OCRPreset .IDENTITY_CARD` - ID card processing
301
- - ` OCRPreset .LICENSE_PLATE` - License plate recognition
302
- - ` OCRPreset .MEDICAL_REPORT` - Medical document processing
303
- - ` OCRPreset .BANK_STATEMENT` - Bank statement processing
291
+ - ` OcrPreset .DOCUMENT` - General document processing
292
+ - ` OcrPreset .HANDWRITING` - Handwritten text recognition
293
+ - ` OcrPreset .SCAN` - Scanned document processing
294
+ - ` OcrPreset .RECEIPT` - Receipt processing
295
+ - ` OcrPreset .MAGAZINE` - Magazine/article processing
296
+ - ` OcrPreset .INVOICE` - Invoice processing
297
+ - ` OcrPreset .BUSINESS_CARD` - Business card processing
298
+ - ` OcrPreset .PASSPORT` - Passport document processing
299
+ - ` OcrPreset .DRIVER_LICENSE` - Driver's license processing
300
+ - ` OcrPreset .IDENTITY_CARD` - ID card processing
301
+ - ` OcrPreset .LICENSE_PLATE` - License plate recognition
302
+ - ` OcrPreset .MEDICAL_REPORT` - Medical document processing
303
+ - ` OcrPreset .BANK_STATEMENT` - Bank statement processing
304
304
305
305
** Model Types for AI Data Pipelines:**
306
306
0 commit comments