Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/refactor' into refactoring-2024-08
Browse files Browse the repository at this point in the history
  • Loading branch information
kba committed Aug 24, 2024
2 parents b954a55 + 1469dd5 commit 8ec9fc6
Show file tree
Hide file tree
Showing 44 changed files with 2,390 additions and 492 deletions.
12 changes: 6 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,12 @@
* Detection of reading order (left-to-right or right-to-left)
* Output in [PAGE-XML](https://github.com/PRImA-Research-Lab/PAGE-XML)
* [OCR-D](https://github.com/qurator-spk/eynollah#use-as-ocr-d-processor) interface
* [Examples](https://github.com/qurator-spk/eynollah/wiki#examples)

:warning: Development is currently focused on achieving the best possible quality of results for a wide variety of historical documents and therefore processing can be very slow. We aim to improve this, but contributions are welcome.

## Installation
Python `3.8-3.11` with Tensorflow `2.12-2.15` on Linux are currently supported.
Python versions `3.8-3.11` with Tensorflow versions `<2.16` on Linux are currently supported.

For (limited) GPU support the CUDA toolkit needs to be installed.

Expand All @@ -38,17 +39,17 @@ git clone git@github.com:qurator-spk/eynollah.git
cd eynollah; pip install -e .
```

Alternatively, you can run `make install` or `make install-dev` for editable installation.
Alternatively, run `make install` or `make install-dev` for editable installation.

## Models
Pre-trained models can be downloaded from [qurator-data.de](https://qurator-data.de/eynollah/) or [huggingface](https://huggingface.co/SBB?search_models=eynollah).

## Train
🚧 **Work in progress**

In case you want to train your own model, have a look at [`sbb_pixelwise_segmentation`](https://github.com/qurator-spk/sbb_pixelwise_segmentation).
In case you want to train your own model, have a look at [`train`](https://github.com/qurator-spk/eynollah/tree/main/eynollah/eynollah/train).

## Usage
## Use
The command-line interface can be called like this:

```sh
Expand Down Expand Up @@ -82,7 +83,6 @@ If no option is set, the tool performs layout detection of main regions (backgro
The best output quality is produced when RGB images are used as input rather than greyscale or binarized images.

#### Use as OCR-D processor
🚧 **Work in progress**

Eynollah ships with a CLI interface to be used as [OCR-D](https://ocr-d.de) processor.

Expand All @@ -104,7 +104,7 @@ uses the original (RGB) image despite any binarization that may have occured in
Please check the [wiki](https://github.com/qurator-spk/eynollah/wiki).

## How to cite
If you find this tool useful in your work, please consider citing our paper:
If you find this useful in your work, please consider citing our paper:

```bibtex
@inproceedings{hip23rezanezhad,
Expand Down
File renamed without changes.
1 change: 1 addition & 0 deletions eynollah/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
__import__("pkg_resources").declare_namespace(__name__)
File renamed without changes.
5 changes: 3 additions & 2 deletions qurator/eynollah/cli.py → eynollah/eynollah/cli.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import sys
import click
from ocrd_utils import getLogger, initLogging, setOverrideLogLevel
from qurator.eynollah.eynollah import Eynollah
from qurator.eynollah.utils.dirs import EynollahDirs
from eynollah.eynollah.eynollah import Eynollah
from eynollah.eynollah.utils.dirs import EynollahDirs


@click.command()
Expand All @@ -11,6 +11,7 @@
"-i",
help="image filename",
type=click.Path(exists=True, dir_okay=False),
# required=True,
)
@click.option(
"--out",
Expand Down
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@
from click import command
from ocrd.decorators import ocrd_cli_options, ocrd_cli_wrap_processor


@command()
@ocrd_cli_options
def main(*args, **kwargs):
return ocrd_cli_wrap_processor(EynollahProcessor, *args, **kwargs)


if __name__ == '__main__':
main()
40 changes: 26 additions & 14 deletions qurator/eynollah/plot.py → eynollah/eynollah/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from .utils.resize import resize_image
from .utils.dirs import EynollahDirs


class EynollahPlotter():
"""
Class collecting all the plotting and image writing methods
Expand All @@ -34,13 +35,15 @@ def save_plot_of_layout_main(self, text_regions_p, image_page):
if self.dirs.dir_of_layout is not None:
values = np.unique(text_regions_p[:, :])
# pixels=['Background' , 'Main text' , 'Heading' , 'Marginalia' ,'Drop capitals' , 'Images' , 'Seperators' , 'Tables', 'Graphics']
pixels=['Background' , 'Main text' , 'Image' , 'Separator','Marginalia']
pixels = ['Background', 'Main text', 'Image', 'Separator', 'Marginalia']
values_indexes = [0, 1, 2, 3, 4]
plt.figure(figsize=(40, 40))
plt.rcParams["font.size"] = "40"
im = plt.imshow(text_regions_p[:, :])
colors = [im.cmap(im.norm(value)) for value in values]
patches = [mpatches.Patch(color=colors[np.where(values == i)[0][0]], label="{l}".format(l=pixels[int(np.where(values_indexes == i)[0][0])])) for i in values]
patches = [mpatches.Patch(color=colors[np.where(values == i)[0][0]],
label="{l}".format(l=pixels[int(np.where(values_indexes == i)[0][0])])) for i in
values]
plt.legend(handles=patches, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.0, fontsize=40)
plt.savefig(os.path.join(self.dirs.dir_of_layout, self.image_filename_stem + "_layout_main.png"))

Expand All @@ -49,7 +52,7 @@ def save_plot_of_layout_main_all(self, text_regions_p, image_page):
if self.dirs.dir_of_all is not None:
values = np.unique(text_regions_p[:, :])
# pixels=['Background' , 'Main text' , 'Heading' , 'Marginalia' ,'Drop capitals' , 'Images' , 'Seperators' , 'Tables', 'Graphics']
pixels=['Background' , 'Main text' , 'Image' , 'Separator','Marginalia']
pixels = ['Background', 'Main text', 'Image', 'Separator', 'Marginalia']
values_indexes = [0, 1, 2, 3, 4]
plt.figure(figsize=(80, 40))
plt.rcParams["font.size"] = "40"
Expand All @@ -58,7 +61,9 @@ def save_plot_of_layout_main_all(self, text_regions_p, image_page):
plt.subplot(1, 2, 2)
im = plt.imshow(text_regions_p[:, :])
colors = [im.cmap(im.norm(value)) for value in values]
patches = [mpatches.Patch(color=colors[np.where(values == i)[0][0]], label="{l}".format(l=pixels[int(np.where(values_indexes == i)[0][0])])) for i in values]
patches = [mpatches.Patch(color=colors[np.where(values == i)[0][0]],
label="{l}".format(l=pixels[int(np.where(values_indexes == i)[0][0])])) for i in
values]
plt.legend(handles=patches, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.0, fontsize=60)
plt.savefig(os.path.join(self.dirs.dir_of_all, self.image_filename_stem + "_layout_main_and_page.png"))

Expand All @@ -72,7 +77,9 @@ def save_plot_of_layout(self, text_regions_p, image_page):
plt.rcParams["font.size"] = "40"
im = plt.imshow(text_regions_p[:, :])
colors = [im.cmap(im.norm(value)) for value in values]
patches = [mpatches.Patch(color=colors[np.where(values == i)[0][0]], label="{l}".format(l=pixels[int(np.where(values_indexes == i)[0][0])])) for i in values]
patches = [mpatches.Patch(color=colors[np.where(values == i)[0][0]],
label="{l}".format(l=pixels[int(np.where(values_indexes == i)[0][0])])) for i in
values]
plt.legend(handles=patches, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.0, fontsize=40)
plt.savefig(os.path.join(self.dirs.dir_of_layout, self.image_filename_stem + "_layout.png"))

Expand All @@ -89,7 +96,9 @@ def save_plot_of_layout_all(self, text_regions_p, image_page):
plt.subplot(1, 2, 2)
im = plt.imshow(text_regions_p[:, :])
colors = [im.cmap(im.norm(value)) for value in values]
patches = [mpatches.Patch(color=colors[np.where(values == i)[0][0]], label="{l}".format(l=pixels[int(np.where(values_indexes == i)[0][0])])) for i in values]
patches = [mpatches.Patch(color=colors[np.where(values == i)[0][0]],
label="{l}".format(l=pixels[int(np.where(values_indexes == i)[0][0])])) for i in
values]
plt.legend(handles=patches, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.0, fontsize=60)
plt.savefig(os.path.join(self.dirs.dir_of_all, self.image_filename_stem + "_layout_and_page.png"))

Expand All @@ -105,7 +114,9 @@ def save_plot_of_textlines(self, textline_mask_tot_ea, image_page):
plt.subplot(1, 2, 2)
im = plt.imshow(textline_mask_tot_ea[:, :])
colors = [im.cmap(im.norm(value)) for value in values]
patches = [mpatches.Patch(color=colors[np.where(values == i)[0][0]], label="{l}".format(l=pixels[int(np.where(values_indexes == i)[0][0])])) for i in values]
patches = [mpatches.Patch(color=colors[np.where(values == i)[0][0]],
label="{l}".format(l=pixels[int(np.where(values_indexes == i)[0][0])])) for i in
values]
plt.legend(handles=patches, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.0, fontsize=60)
plt.savefig(os.path.join(self.dirs.dir_of_all, self.image_filename_stem + "_textline_and_page.png"))

Expand All @@ -130,11 +141,12 @@ def save_plot_of_textline_density(self, img_patch_org):
plt.rcParams['font.size']='50'
plt.subplot(1,2,1)
plt.imshow(img_patch_org)
plt.subplot(1,2,2)
plt.plot(gaussian_filter1d(img_patch_org.sum(axis=1), 3),np.array(range(len(gaussian_filter1d(img_patch_org.sum(axis=1), 3)))),linewidth=8)
plt.xlabel('Density of textline prediction in direction of X axis',fontsize=60)
plt.ylabel('Height',fontsize=60)
plt.yticks([0,len(gaussian_filter1d(img_patch_org.sum(axis=1), 3))])
plt.subplot(1, 2, 2)
plt.plot(gaussian_filter1d(img_patch_org.sum(axis=1), 3),
np.array(range(len(gaussian_filter1d(img_patch_org.sum(axis=1), 3)))), linewidth=8)
plt.xlabel('Density of textline prediction in direction of X axis', fontsize=60)
plt.ylabel('Height', fontsize=60)
plt.yticks([0, len(gaussian_filter1d(img_patch_org.sum(axis=1), 3))])
plt.gca().invert_yaxis()
plt.savefig(os.path.join(self.dirs.dir_of_all, self.image_filename_stem+'_density_of_textline.png'))

Expand All @@ -157,9 +169,9 @@ def write_images_into_directory(self, img_contours, image_page):
box = [x, y, w, h]
croped_page, page_coord = crop_image_inside_box(box, image_page)

croped_page = resize_image(croped_page, int(croped_page.shape[0] / self.scale_y), int(croped_page.shape[1] / self.scale_x))
croped_page = resize_image(croped_page, int(croped_page.shape[0] / self.scale_y),
int(croped_page.shape[1] / self.scale_x))

path = os.path.join(self.dirs.dir_of_cropped_images, self.image_filename_stem + "_" + str(index) + ".jpg")
cv2.imwrite(path, croped_page)
index += 1

Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

from .eynollah import Eynollah


class EynollahProcessor(Processor):

@property
Expand Down
67 changes: 67 additions & 0 deletions eynollah/eynollah/train/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
# Pixelwise Segmentation
> Pixelwise segmentation for document images
## Introduction
This repository contains the source code for training an encoder model for document image segmentation.

## Installation
Either clone the repository via `git clone https://github.com/qurator-spk/sbb_pixelwise_segmentation.git` or download and unpack the [ZIP](https://github.com/qurator-spk/sbb_pixelwise_segmentation/archive/master.zip).

### Pretrained encoder
Download our pretrained weights and add them to a ``pretrained_model`` folder:
https://qurator-data.de/sbb_pixelwise_segmentation/pretrained_encoder/
## Usage

### Train
To train a model, run: ``python train.py with config_params.json``

### Ground truth format
Lables for each pixel are identified by a number. So if you have a
binary case, ``n_classes`` should be set to ``2`` and labels should
be ``0`` and ``1`` for each class and pixel.

In the case of multiclass, just set ``n_classes`` to the number of classes
you have and the try to produce the labels by pixels set from ``0 , 1 ,2 .., n_classes-1``.
The labels format should be png.
Our lables are 3 channel png images but only information of first channel is used.
If you have an image label with height and width of 10, for a binary case the first channel should look like this:

Label: [ [1, 0, 0, 1, 1, 0, 0, 1, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
...,
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0] ]

This means that you have an image by `10*10*3` and `pixel[0,0]` belongs
to class `1` and `pixel[0,1]` belongs to class `0`.

A small sample of training data for binarization experiment can be found here, [Training data sample](https://qurator-data.de/~vahid.rezanezhad/binarization_training_data_sample/), which contains images and lables folders.

### Training , evaluation and output
The train and evaluation folders should contain subfolders of images and labels.
The output folder should be an empty folder where the output model will be written to.

### Parameter configuration
* patches: If you want to break input images into smaller patches (input size of the model) you need to set this parameter to ``true``. In the case that the model should see the image once, like page extraction, patches should be set to ``false``.
* n_batch: Number of batches at each iteration.
* n_classes: Number of classes. In the case of binary classification this should be 2.
* n_epochs: Number of epochs.
* input_height: This indicates the height of model's input.
* input_width: This indicates the width of model's input.
* weight_decay: Weight decay of l2 regularization of model layers.
* augmentation: If you want to apply any kind of augmentation this parameter should first set to ``true``.
* flip_aug: If ``true``, different types of filp will be applied on image. Type of flips is given with "flip_index" in train.py file.
* blur_aug: If ``true``, different types of blurring will be applied on image. Type of blurrings is given with "blur_k" in train.py file.
* scaling: If ``true``, scaling will be applied on image. Scale of scaling is given with "scales" in train.py file.
* rotation_not_90: If ``true``, rotation (not 90 degree) will be applied on image. Rothation angles are given with "thetha" in train.py file.
* rotation: If ``true``, 90 degree rotation will be applied on image.
* binarization: If ``true``,Otsu thresholding will be applied to augment the input data with binarized images.
* scaling_bluring: If ``true``, combination of scaling and blurring will be applied on image.
* scaling_binarization: If ``true``, combination of scaling and binarization will be applied on image.
* scaling_flip: If ``true``, combination of scaling and flip will be applied on image.
* continue_training: If ``true``, it means that you have already trained a model and you would like to continue the training. So it is needed to provide the dir of trained model with "dir_of_start_model" and index for naming the models. For example if you have already trained for 3 epochs then your last index is 2 and if you want to continue from model_1.h5, you can set "index_start" to 3 to start naming model with index 3.
* weighted_loss: If ``true``, this means that you want to apply weighted categorical_crossentropy as loss fucntion. Be carefull if you set to ``true``the parameter "is_loss_soft_dice" should be ``false``
* data_is_provided: If you have already provided the input data you can set this to ``true``. Be sure that the train and eval data are in "dir_output". Since when once we provide training data we resize and augment them and then we write them in sub-directories train and eval in "dir_output".
* dir_train: This is the directory of "images" and "labels" (dir_train should include two subdirectories with names of images and labels ) for raw images and labels. Namely they are not prepared (not resized and not augmented) yet for training the model. When we run this tool these raw data will be transformed to suitable size needed for the model and they will be written in "dir_output" in train and eval directories. Each of train and eval include "images" and "labels" sub-directories.


1 change: 1 addition & 0 deletions eynollah/eynollah/train/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import os
import sys
import tensorflow as tf
import keras, warnings
from keras.optimizers import *
from sacred import Experiment
from models import *
from utils import *
from metrics import *


def configuration():
gpu_options = tf.compat.v1.GPUOptions(allow_growth=True)
session = tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(gpu_options=gpu_options))


if __name__ == '__main__':
n_classes = 2
input_height = 224
input_width = 448
weight_decay = 1e-6
pretraining = False
dir_of_weights = 'model_bin_sbb_ens.h5'

# configuration()

model = resnet50_unet(n_classes, input_height, input_width, weight_decay, pretraining)
model.load_weights(dir_of_weights)
model.save('./name_in_another_python_version.h5')
30 changes: 30 additions & 0 deletions eynollah/eynollah/train/config_params.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
{
"n_classes" : 3,
"n_epochs" : 2,
"input_height" : 448,
"input_width" : 672,
"weight_decay" : 1e-6,
"n_batch" : 2,
"learning_rate": 1e-4,
"patches" : true,
"pretraining" : true,
"augmentation" : false,
"flip_aug" : false,
"blur_aug" : false,
"scaling" : true,
"binarization" : false,
"scaling_bluring" : false,
"scaling_binarization" : false,
"scaling_flip" : false,
"rotation": false,
"rotation_not_90": false,
"continue_training": false,
"index_start": 0,
"dir_of_start_model": " ",
"weighted_loss": false,
"is_loss_soft_dice": false,
"data_is_provided": false,
"dir_train": "/path/to/training/files/train",
"dir_eval": "/path/to/training/files/eval",
"dir_output": "/path/to/training/files/output"
}
Loading

0 comments on commit 8ec9fc6

Please sign in to comment.