-
Notifications
You must be signed in to change notification settings - Fork 30
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge remote-tracking branch 'origin/refactor' into refactoring-2024-08
- Loading branch information
Showing
44 changed files
with
2,390 additions
and
492 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
__import__("pkg_resources").declare_namespace(__name__) |
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,6 +7,7 @@ | |
|
||
from .eynollah import Eynollah | ||
|
||
|
||
class EynollahProcessor(Processor): | ||
|
||
@property | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
# Pixelwise Segmentation | ||
> Pixelwise segmentation for document images | ||
## Introduction | ||
This repository contains the source code for training an encoder model for document image segmentation. | ||
|
||
## Installation | ||
Either clone the repository via `git clone https://github.com/qurator-spk/sbb_pixelwise_segmentation.git` or download and unpack the [ZIP](https://github.com/qurator-spk/sbb_pixelwise_segmentation/archive/master.zip). | ||
|
||
### Pretrained encoder | ||
Download our pretrained weights and add them to a ``pretrained_model`` folder: | ||
https://qurator-data.de/sbb_pixelwise_segmentation/pretrained_encoder/ | ||
## Usage | ||
|
||
### Train | ||
To train a model, run: ``python train.py with config_params.json`` | ||
|
||
### Ground truth format | ||
Lables for each pixel are identified by a number. So if you have a | ||
binary case, ``n_classes`` should be set to ``2`` and labels should | ||
be ``0`` and ``1`` for each class and pixel. | ||
|
||
In the case of multiclass, just set ``n_classes`` to the number of classes | ||
you have and the try to produce the labels by pixels set from ``0 , 1 ,2 .., n_classes-1``. | ||
The labels format should be png. | ||
Our lables are 3 channel png images but only information of first channel is used. | ||
If you have an image label with height and width of 10, for a binary case the first channel should look like this: | ||
|
||
Label: [ [1, 0, 0, 1, 1, 0, 0, 1, 0, 0], | ||
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0], | ||
..., | ||
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0], | ||
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0] ] | ||
|
||
This means that you have an image by `10*10*3` and `pixel[0,0]` belongs | ||
to class `1` and `pixel[0,1]` belongs to class `0`. | ||
|
||
A small sample of training data for binarization experiment can be found here, [Training data sample](https://qurator-data.de/~vahid.rezanezhad/binarization_training_data_sample/), which contains images and lables folders. | ||
|
||
### Training , evaluation and output | ||
The train and evaluation folders should contain subfolders of images and labels. | ||
The output folder should be an empty folder where the output model will be written to. | ||
|
||
### Parameter configuration | ||
* patches: If you want to break input images into smaller patches (input size of the model) you need to set this parameter to ``true``. In the case that the model should see the image once, like page extraction, patches should be set to ``false``. | ||
* n_batch: Number of batches at each iteration. | ||
* n_classes: Number of classes. In the case of binary classification this should be 2. | ||
* n_epochs: Number of epochs. | ||
* input_height: This indicates the height of model's input. | ||
* input_width: This indicates the width of model's input. | ||
* weight_decay: Weight decay of l2 regularization of model layers. | ||
* augmentation: If you want to apply any kind of augmentation this parameter should first set to ``true``. | ||
* flip_aug: If ``true``, different types of filp will be applied on image. Type of flips is given with "flip_index" in train.py file. | ||
* blur_aug: If ``true``, different types of blurring will be applied on image. Type of blurrings is given with "blur_k" in train.py file. | ||
* scaling: If ``true``, scaling will be applied on image. Scale of scaling is given with "scales" in train.py file. | ||
* rotation_not_90: If ``true``, rotation (not 90 degree) will be applied on image. Rothation angles are given with "thetha" in train.py file. | ||
* rotation: If ``true``, 90 degree rotation will be applied on image. | ||
* binarization: If ``true``,Otsu thresholding will be applied to augment the input data with binarized images. | ||
* scaling_bluring: If ``true``, combination of scaling and blurring will be applied on image. | ||
* scaling_binarization: If ``true``, combination of scaling and binarization will be applied on image. | ||
* scaling_flip: If ``true``, combination of scaling and flip will be applied on image. | ||
* continue_training: If ``true``, it means that you have already trained a model and you would like to continue the training. So it is needed to provide the dir of trained model with "dir_of_start_model" and index for naming the models. For example if you have already trained for 3 epochs then your last index is 2 and if you want to continue from model_1.h5, you can set "index_start" to 3 to start naming model with index 3. | ||
* weighted_loss: If ``true``, this means that you want to apply weighted categorical_crossentropy as loss fucntion. Be carefull if you set to ``true``the parameter "is_loss_soft_dice" should be ``false`` | ||
* data_is_provided: If you have already provided the input data you can set this to ``true``. Be sure that the train and eval data are in "dir_output". Since when once we provide training data we resize and augment them and then we write them in sub-directories train and eval in "dir_output". | ||
* dir_train: This is the directory of "images" and "labels" (dir_train should include two subdirectories with names of images and labels ) for raw images and labels. Namely they are not prepared (not resized and not augmented) yet for training the model. When we run this tool these raw data will be transformed to suitable size needed for the model and they will be written in "dir_output" in train and eval directories. Each of train and eval include "images" and "labels" sub-directories. | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
|
29 changes: 29 additions & 0 deletions
29
eynollah/eynollah/train/build_model_load_pretrained_weights_and_save.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
import os | ||
import sys | ||
import tensorflow as tf | ||
import keras, warnings | ||
from keras.optimizers import * | ||
from sacred import Experiment | ||
from models import * | ||
from utils import * | ||
from metrics import * | ||
|
||
|
||
def configuration(): | ||
gpu_options = tf.compat.v1.GPUOptions(allow_growth=True) | ||
session = tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(gpu_options=gpu_options)) | ||
|
||
|
||
if __name__ == '__main__': | ||
n_classes = 2 | ||
input_height = 224 | ||
input_width = 448 | ||
weight_decay = 1e-6 | ||
pretraining = False | ||
dir_of_weights = 'model_bin_sbb_ens.h5' | ||
|
||
# configuration() | ||
|
||
model = resnet50_unet(n_classes, input_height, input_width, weight_decay, pretraining) | ||
model.load_weights(dir_of_weights) | ||
model.save('./name_in_another_python_version.h5') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
{ | ||
"n_classes" : 3, | ||
"n_epochs" : 2, | ||
"input_height" : 448, | ||
"input_width" : 672, | ||
"weight_decay" : 1e-6, | ||
"n_batch" : 2, | ||
"learning_rate": 1e-4, | ||
"patches" : true, | ||
"pretraining" : true, | ||
"augmentation" : false, | ||
"flip_aug" : false, | ||
"blur_aug" : false, | ||
"scaling" : true, | ||
"binarization" : false, | ||
"scaling_bluring" : false, | ||
"scaling_binarization" : false, | ||
"scaling_flip" : false, | ||
"rotation": false, | ||
"rotation_not_90": false, | ||
"continue_training": false, | ||
"index_start": 0, | ||
"dir_of_start_model": " ", | ||
"weighted_loss": false, | ||
"is_loss_soft_dice": false, | ||
"data_is_provided": false, | ||
"dir_train": "/path/to/training/files/train", | ||
"dir_eval": "/path/to/training/files/eval", | ||
"dir_output": "/path/to/training/files/output" | ||
} |
Oops, something went wrong.