Merge remote-tracking branch 'origin/refactor' into refactoring-2024-08

qurator-spk · Aug 24, 2024 · 8ec9fc6 · 8ec9fc6
2 parents b954a55 + 1469dd5
commit 8ec9fc6
Show file tree

Hide file tree

Showing 44 changed files with 2,390 additions and 492 deletions.
diff --git a/README.md b/README.md
@@ -17,11 +17,12 @@
 * Detection of reading order (left-to-right or right-to-left)
 * Output in [PAGE-XML](https://github.com/PRImA-Research-Lab/PAGE-XML)
 * [OCR-D](https://github.com/qurator-spk/eynollah#use-as-ocr-d-processor) interface
+* [Examples](https://github.com/qurator-spk/eynollah/wiki#examples)
 
 :warning: Development is currently focused on achieving the best possible quality of results for a wide variety of historical documents and therefore processing can be very slow. We aim to improve this, but contributions are welcome.
 
 ## Installation
-Python `3.8-3.11` with Tensorflow `2.12-2.15` on Linux are currently supported.
+Python versions `3.8-3.11` with Tensorflow versions `<2.16` on Linux are currently supported.
 
 For (limited) GPU support the CUDA toolkit needs to be installed.
 
@@ -38,17 +39,17 @@ git clone git@github.com:qurator-spk/eynollah.git
 cd eynollah; pip install -e .
 ```
 
-Alternatively, you can run `make install` or `make install-dev` for editable installation.
+Alternatively, run `make install` or `make install-dev` for editable installation.
 
 ## Models
 Pre-trained models can be downloaded from [qurator-data.de](https://qurator-data.de/eynollah/) or [huggingface](https://huggingface.co/SBB?search_models=eynollah). 
 
 ## Train
 🚧 **Work in progress**  
 
-In case you want to train your own model, have a look at [`sbb_pixelwise_segmentation`](https://github.com/qurator-spk/sbb_pixelwise_segmentation). 
+In case you want to train your own model, have a look at [`train`](https://github.com/qurator-spk/eynollah/tree/main/eynollah/eynollah/train). 
 
-## Usage
+## Use
 The command-line interface can be called like this:
 
 ```sh
@@ -82,7 +83,6 @@ If no option is set, the tool performs layout detection of main regions (backgro
 The best output quality is produced when RGB images are used as input rather than greyscale or binarized images.
 
 #### Use as OCR-D processor
-🚧 **Work in progress** 
 
 Eynollah ships with a CLI interface to be used as [OCR-D](https://ocr-d.de) processor. 
 
@@ -104,7 +104,7 @@ uses the original (RGB) image despite any binarization that may have occured in
 Please check the [wiki](https://github.com/qurator-spk/eynollah/wiki).
 
 ## How to cite
-If you find this tool useful in your work, please consider citing our paper:
+If you find this useful in your work, please consider citing our paper:
 
 ```bibtex
 @inproceedings{hip23rezanezhad,

diff --git a/qurator/.gitkeep → eynollah/.gitkeep b/qurator/.gitkeep → eynollah/.gitkeep
diff --git a/eynollah/__init__.py b/eynollah/__init__.py
@@ -0,0 +1 @@
+__import__("pkg_resources").declare_namespace(__name__)
diff --git a/qurator/eynollah/__init__.py → eynollah/eynollah/__init__.py b/qurator/eynollah/__init__.py → eynollah/eynollah/__init__.py
diff --git a/qurator/eynollah/cli.py → eynollah/eynollah/cli.py b/qurator/eynollah/cli.py → eynollah/eynollah/cli.py
@@ -1,8 +1,8 @@
 import sys
 import click
 from ocrd_utils import getLogger, initLogging, setOverrideLogLevel
-from qurator.eynollah.eynollah import Eynollah
-from qurator.eynollah.utils.dirs import EynollahDirs
+from eynollah.eynollah.eynollah import Eynollah
+from eynollah.eynollah.utils.dirs import EynollahDirs
 
 
 @click.command()
@@ -11,6 +11,7 @@
     "-i",
     help="image filename",
     type=click.Path(exists=True, dir_okay=False),
+    # required=True,
 )
 @click.option(
     "--out",

diff --git a/qurator/eynollah/eynollah.py → eynollah/eynollah/eynollah.py b/qurator/eynollah/eynollah.py → eynollah/eynollah/eynollah.py
diff --git a/qurator/eynollah/ocrd-tool.json → eynollah/eynollah/ocrd-tool.json b/qurator/eynollah/ocrd-tool.json → eynollah/eynollah/ocrd-tool.json
diff --git a/qurator/eynollah/ocrd_cli.py → eynollah/eynollah/ocrd_cli.py b/qurator/eynollah/ocrd_cli.py → eynollah/eynollah/ocrd_cli.py
@@ -2,10 +2,12 @@
 from click import command
 from ocrd.decorators import ocrd_cli_options, ocrd_cli_wrap_processor
 
+
 @command()
 @ocrd_cli_options
 def main(*args, **kwargs):
     return ocrd_cli_wrap_processor(EynollahProcessor, *args, **kwargs)
 
+
 if __name__ == '__main__':
     main()
diff --git a/qurator/eynollah/plot.py → eynollah/eynollah/plot.py b/qurator/eynollah/plot.py → eynollah/eynollah/plot.py
@@ -10,6 +10,7 @@
 from .utils.resize import resize_image
 from .utils.dirs import EynollahDirs
 
+
 class EynollahPlotter():
     """
     Class collecting all the plotting and image writing methods
@@ -34,13 +35,15 @@ def save_plot_of_layout_main(self, text_regions_p, image_page):
         if self.dirs.dir_of_layout is not None:
             values = np.unique(text_regions_p[:, :])
             # pixels=['Background' , 'Main text' , 'Heading' , 'Marginalia' ,'Drop capitals' , 'Images' , 'Seperators' , 'Tables', 'Graphics']
-            pixels=['Background' , 'Main text'  , 'Image' , 'Separator','Marginalia']
+            pixels = ['Background', 'Main text', 'Image', 'Separator', 'Marginalia']
             values_indexes = [0, 1, 2, 3, 4]
             plt.figure(figsize=(40, 40))
             plt.rcParams["font.size"] = "40"
             im = plt.imshow(text_regions_p[:, :])
             colors = [im.cmap(im.norm(value)) for value in values]
-            patches = [mpatches.Patch(color=colors[np.where(values == i)[0][0]], label="{l}".format(l=pixels[int(np.where(values_indexes == i)[0][0])])) for i in values]
+            patches = [mpatches.Patch(color=colors[np.where(values == i)[0][0]],
+                                      label="{l}".format(l=pixels[int(np.where(values_indexes == i)[0][0])])) for i in
+                       values]
             plt.legend(handles=patches, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.0, fontsize=40)
             plt.savefig(os.path.join(self.dirs.dir_of_layout, self.image_filename_stem + "_layout_main.png"))
 
@@ -49,7 +52,7 @@ def save_plot_of_layout_main_all(self, text_regions_p, image_page):
         if self.dirs.dir_of_all is not None:
             values = np.unique(text_regions_p[:, :])
             # pixels=['Background' , 'Main text' , 'Heading' , 'Marginalia' ,'Drop capitals' , 'Images' , 'Seperators' , 'Tables', 'Graphics']
-            pixels=['Background' , 'Main text'  , 'Image' , 'Separator','Marginalia']
+            pixels = ['Background', 'Main text', 'Image', 'Separator', 'Marginalia']
             values_indexes = [0, 1, 2, 3, 4]
             plt.figure(figsize=(80, 40))
             plt.rcParams["font.size"] = "40"
@@ -58,7 +61,9 @@ def save_plot_of_layout_main_all(self, text_regions_p, image_page):
             plt.subplot(1, 2, 2)
             im = plt.imshow(text_regions_p[:, :])
             colors = [im.cmap(im.norm(value)) for value in values]
-            patches = [mpatches.Patch(color=colors[np.where(values == i)[0][0]], label="{l}".format(l=pixels[int(np.where(values_indexes == i)[0][0])])) for i in values]
+            patches = [mpatches.Patch(color=colors[np.where(values == i)[0][0]],
+                                      label="{l}".format(l=pixels[int(np.where(values_indexes == i)[0][0])])) for i in
+                       values]
             plt.legend(handles=patches, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.0, fontsize=60)
             plt.savefig(os.path.join(self.dirs.dir_of_all, self.image_filename_stem + "_layout_main_and_page.png"))
 
@@ -72,7 +77,9 @@ def save_plot_of_layout(self, text_regions_p, image_page):
             plt.rcParams["font.size"] = "40"
             im = plt.imshow(text_regions_p[:, :])
             colors = [im.cmap(im.norm(value)) for value in values]
-            patches = [mpatches.Patch(color=colors[np.where(values == i)[0][0]], label="{l}".format(l=pixels[int(np.where(values_indexes == i)[0][0])])) for i in values]
+            patches = [mpatches.Patch(color=colors[np.where(values == i)[0][0]],
+                                      label="{l}".format(l=pixels[int(np.where(values_indexes == i)[0][0])])) for i in
+                       values]
             plt.legend(handles=patches, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.0, fontsize=40)
             plt.savefig(os.path.join(self.dirs.dir_of_layout, self.image_filename_stem + "_layout.png"))
 
@@ -89,7 +96,9 @@ def save_plot_of_layout_all(self, text_regions_p, image_page):
             plt.subplot(1, 2, 2)
             im = plt.imshow(text_regions_p[:, :])
             colors = [im.cmap(im.norm(value)) for value in values]
-            patches = [mpatches.Patch(color=colors[np.where(values == i)[0][0]], label="{l}".format(l=pixels[int(np.where(values_indexes == i)[0][0])])) for i in values]
+            patches = [mpatches.Patch(color=colors[np.where(values == i)[0][0]],
+                                      label="{l}".format(l=pixels[int(np.where(values_indexes == i)[0][0])])) for i in
+                       values]
             plt.legend(handles=patches, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.0, fontsize=60)
             plt.savefig(os.path.join(self.dirs.dir_of_all, self.image_filename_stem + "_layout_and_page.png"))
 
@@ -105,7 +114,9 @@ def save_plot_of_textlines(self, textline_mask_tot_ea, image_page):
             plt.subplot(1, 2, 2)
             im = plt.imshow(textline_mask_tot_ea[:, :])
             colors = [im.cmap(im.norm(value)) for value in values]
-            patches = [mpatches.Patch(color=colors[np.where(values == i)[0][0]], label="{l}".format(l=pixels[int(np.where(values_indexes == i)[0][0])])) for i in values]
+            patches = [mpatches.Patch(color=colors[np.where(values == i)[0][0]],
+                                      label="{l}".format(l=pixels[int(np.where(values_indexes == i)[0][0])])) for i in
+                       values]
             plt.legend(handles=patches, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.0, fontsize=60)
             plt.savefig(os.path.join(self.dirs.dir_of_all, self.image_filename_stem + "_textline_and_page.png"))
 
@@ -130,11 +141,12 @@ def save_plot_of_textline_density(self, img_patch_org):
             plt.rcParams['font.size']='50'
             plt.subplot(1,2,1)
             plt.imshow(img_patch_org)
-            plt.subplot(1,2,2)
-            plt.plot(gaussian_filter1d(img_patch_org.sum(axis=1), 3),np.array(range(len(gaussian_filter1d(img_patch_org.sum(axis=1), 3)))),linewidth=8)
-            plt.xlabel('Density of textline prediction in direction of X axis',fontsize=60)
-            plt.ylabel('Height',fontsize=60)
-            plt.yticks([0,len(gaussian_filter1d(img_patch_org.sum(axis=1), 3))])
+            plt.subplot(1, 2, 2)
+            plt.plot(gaussian_filter1d(img_patch_org.sum(axis=1), 3),
+                     np.array(range(len(gaussian_filter1d(img_patch_org.sum(axis=1), 3)))), linewidth=8)
+            plt.xlabel('Density of textline prediction in direction of X axis', fontsize=60)
+            plt.ylabel('Height', fontsize=60)
+            plt.yticks([0, len(gaussian_filter1d(img_patch_org.sum(axis=1), 3))])
             plt.gca().invert_yaxis()
             plt.savefig(os.path.join(self.dirs.dir_of_all, self.image_filename_stem+'_density_of_textline.png'))
 
@@ -157,9 +169,9 @@ def write_images_into_directory(self, img_contours, image_page):
                 box = [x, y, w, h]
                 croped_page, page_coord = crop_image_inside_box(box, image_page)
 
-                croped_page = resize_image(croped_page, int(croped_page.shape[0] / self.scale_y), int(croped_page.shape[1] / self.scale_x))
+                croped_page = resize_image(croped_page, int(croped_page.shape[0] / self.scale_y),
+                                           int(croped_page.shape[1] / self.scale_x))
 
                 path = os.path.join(self.dirs.dir_of_cropped_images, self.image_filename_stem + "_" + str(index) + ".jpg")
                 cv2.imwrite(path, croped_page)
                 index += 1
-
diff --git a/qurator/eynollah/processor.py → eynollah/eynollah/processor.py b/qurator/eynollah/processor.py → eynollah/eynollah/processor.py
@@ -7,6 +7,7 @@
 
 from .eynollah import Eynollah
 
+
 class EynollahProcessor(Processor):
 
     @property

diff --git a/eynollah/eynollah/train/README.md b/eynollah/eynollah/train/README.md
@@ -0,0 +1,67 @@
+# Pixelwise Segmentation
+> Pixelwise segmentation for document images
+
+## Introduction
+This repository contains the source code for training an encoder model for document image segmentation.
+
+## Installation
+Either clone the repository via `git clone https://github.com/qurator-spk/sbb_pixelwise_segmentation.git` or download and unpack the [ZIP](https://github.com/qurator-spk/sbb_pixelwise_segmentation/archive/master.zip).
+
+### Pretrained encoder
+Download our pretrained weights and add them to a ``pretrained_model`` folder:   
+https://qurator-data.de/sbb_pixelwise_segmentation/pretrained_encoder/
+## Usage
+
+### Train
+To train a model, run: ``python train.py with config_params.json``
+
+### Ground truth format
+Lables for each pixel are identified by a number. So if you have a 
+binary case, ``n_classes`` should be set to ``2`` and labels should 
+be ``0`` and ``1`` for each class and pixel.
+
+In the case of multiclass, just set ``n_classes`` to the number of classes 
+you have and the try to produce the labels by pixels set from ``0 , 1 ,2 .., n_classes-1``.
+The labels format should be png. 
+Our lables are 3 channel png images but only information of first channel is used. 
+If you have an image label with height and width of 10, for a binary case the first channel should look like this:
+
+    Label: [ [1, 0, 0, 1, 1, 0, 0, 1, 0, 0],
+             [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+             ...,
+             [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+             [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] ] 
+
+ This means that you have an image by `10*10*3` and `pixel[0,0]` belongs
+ to class `1` and `pixel[0,1]` belongs to class `0`.
+
+ A small sample of training data for binarization experiment can be found here, [Training data sample](https://qurator-data.de/~vahid.rezanezhad/binarization_training_data_sample/), which contains images and lables folders.
+
+### Training , evaluation and output 
+The train and evaluation folders should contain subfolders of images and labels.
+The output folder should be an empty folder where the output model will be written to.
+
+### Parameter configuration
+* patches: If you want to break input images into smaller patches (input size of the model) you need to set this parameter to ``true``. In the case that the model should see the image once, like page extraction, patches should be set to ``false``.
+* n_batch: Number of batches at each iteration.
+* n_classes: Number of classes. In the case of binary classification this should be 2.
+* n_epochs: Number of epochs.
+* input_height: This indicates the height of model's input.
+* input_width: This indicates the width of model's input.
+* weight_decay: Weight decay of l2 regularization of model layers.
+* augmentation: If you want to apply any kind of augmentation this parameter should first set to ``true``.
+* flip_aug: If ``true``, different types of filp will be applied on image. Type of flips is given with "flip_index" in train.py file.
+* blur_aug: If ``true``, different types of blurring will be applied on image. Type of blurrings is given with "blur_k" in train.py file.
+* scaling: If ``true``, scaling will be applied on image. Scale of scaling is given with "scales" in train.py file.
+* rotation_not_90: If ``true``, rotation (not 90 degree) will be applied on image. Rothation angles are given with "thetha" in train.py file.
+* rotation: If ``true``, 90 degree rotation will be applied on image.
+* binarization: If ``true``,Otsu thresholding will be applied to augment the input data with binarized images.
+* scaling_bluring: If ``true``, combination of scaling and blurring will be applied on image.
+* scaling_binarization: If ``true``, combination of scaling and binarization will be applied on image.
+* scaling_flip: If ``true``, combination of scaling and flip will be applied on image.
+* continue_training: If ``true``, it means that you have already trained a model and you would like to continue the training. So it is needed to provide the dir of trained model with "dir_of_start_model" and index for naming the models. For example if you have already trained for 3 epochs then your last index is 2 and if you want to continue from model_1.h5, you can set "index_start" to 3 to start naming model with index 3. 
+* weighted_loss: If ``true``, this means that you want to apply weighted categorical_crossentropy as loss fucntion. Be carefull if you set to ``true``the parameter "is_loss_soft_dice" should be ``false``
+* data_is_provided: If you have already provided the input data you can set this to ``true``. Be sure that the train and eval data are in "dir_output". Since when once we provide training data we resize and augment them and then we write them in sub-directories train and eval in "dir_output". 
+* dir_train: This is the directory of "images" and "labels" (dir_train should include two subdirectories with names of images and labels ) for raw images and labels. Namely they are not prepared (not resized and not augmented) yet for training the model. When we run this tool these raw data will be transformed to suitable size needed for the model and they will be written in "dir_output" in train and eval directories. Each of train and eval include "images" and "labels" sub-directories.
+
+
diff --git a/eynollah/eynollah/train/__init__.py b/eynollah/eynollah/train/__init__.py
@@ -0,0 +1 @@
+
diff --git a/eynollah/eynollah/train/build_model_load_pretrained_weights_and_save.py b/eynollah/eynollah/train/build_model_load_pretrained_weights_and_save.py
@@ -0,0 +1,29 @@
+import os
+import sys
+import tensorflow as tf
+import keras, warnings
+from keras.optimizers import *
+from sacred import Experiment
+from models import *
+from utils import *
+from metrics import *
+
+
+def configuration():
+    gpu_options = tf.compat.v1.GPUOptions(allow_growth=True)
+    session = tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(gpu_options=gpu_options))
+
+
+if __name__ == '__main__':
+    n_classes = 2
+    input_height = 224
+    input_width = 448
+    weight_decay = 1e-6
+    pretraining = False
+    dir_of_weights = 'model_bin_sbb_ens.h5'
+
+    # configuration()
+
+    model = resnet50_unet(n_classes, input_height, input_width, weight_decay, pretraining)
+    model.load_weights(dir_of_weights)
+    model.save('./name_in_another_python_version.h5')
diff --git a/eynollah/eynollah/train/config_params.json b/eynollah/eynollah/train/config_params.json
@@ -0,0 +1,30 @@
+{
+    "n_classes" : 3,
+    "n_epochs" : 2,
+    "input_height" : 448,
+    "input_width" : 672,
+    "weight_decay" : 1e-6,
+    "n_batch" : 2,
+    "learning_rate": 1e-4,
+    "patches" : true,
+    "pretraining" : true,
+    "augmentation" : false,
+    "flip_aug" : false,
+    "blur_aug" : false,
+    "scaling" : true,
+    "binarization" : false,
+    "scaling_bluring" : false,
+    "scaling_binarization" : false,
+    "scaling_flip" : false,
+    "rotation": false,
+    "rotation_not_90": false,
+    "continue_training": false,
+    "index_start": 0,
+    "dir_of_start_model": " ", 
+    "weighted_loss": false,
+    "is_loss_soft_dice": false,
+    "data_is_provided": false,
+    "dir_train": "/path/to/training/files/train",
+    "dir_eval": "/path/to/training/files/eval",
+    "dir_output": "/path/to/training/files/output"
+}
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		__import__("pkg_resources").declare_namespace(__name__)
Original file line number	Diff line number	Diff line change
Expand Up		@@ -7,6 +7,7 @@

		from .eynollah import Eynollah


		class EynollahProcessor(Processor):

		@property
Expand Down