diff --git a/qurator/eynollah/cli.py b/qurator/eynollah/cli.py index a2a2ad0..a12a61d 100644 --- a/qurator/eynollah/cli.py +++ b/qurator/eynollah/cli.py @@ -67,6 +67,12 @@ is_flag=True, help="If set, will plot intermediary files and images", ) +@click.option( + "--extract_only_images/--disable-extracting_only_images", + "-eoi/-noeoi", + is_flag=True, + help="If a directory is given, only images in documents will be cropped and saved there and the other processing will not be done", +) @click.option( "--allow-enhancement/--no-allow-enhancement", "-ae/-noae", @@ -148,6 +154,7 @@ def main( save_layout, save_deskewed, save_all, + extract_only_images, save_page, enable_plotting, allow_enhancement, @@ -175,12 +182,16 @@ def main( if textline_light and not light_version: print('Error: You used -tll to enable light textline detection but -light is not enabled') sys.exit(1) + if extract_only_images and not ( save_images and enable_plotting): + print('Error: You used -eoi to enable extract images only mode but did not enable plotting with -ep and providing an output directory with -si') + sys.exit(1) eynollah = Eynollah( image_filename=image, dir_out=out, dir_in=dir_in, dir_models=model, dir_of_cropped_images=save_images, + extract_only_images=extract_only_images, dir_of_layout=save_layout, dir_of_deskewed=save_deskewed, dir_of_all=save_all, diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 0c11327..deb178f 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -148,6 +148,7 @@ def __init__( dir_out=None, dir_in=None, dir_of_cropped_images=None, + extract_only_images=False, dir_of_layout=None, dir_of_deskewed=None, dir_of_all=None, @@ -195,7 +196,7 @@ def __init__( self.allow_scaling = allow_scaling self.headers_off = headers_off self.light_version = light_version - self.extract_only_images = True + self.extract_only_images = extract_only_images self.ignore_page_extraction = ignore_page_extraction self.pcgts = pcgts if not dir_in: @@ -2953,9 +2954,6 @@ def run(self): Get image and scales, then extract the page of scanned image """ self.logger.debug("enter run") - - self.extract_only_images = True - t0_tot = time.time()