-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdata_loader.py
152 lines (127 loc) · 5.42 KB
/
data_loader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
from PIL import Image
from keras import backend as K
import matplotlib.pyplot as plt
from keras.preprocessing import sequence
import glob,cv2
import numpy as np
from sklearn.cross_validation import train_test_split
num_class=280# number of unique charcter
dim=(128,32)# image dimension
maxlen=32 #the max strign length og GT
def load_dataset():
'''
This function loads the training and test datset ( we have three differnt test sets)
and returns the following arguments. you may download the image from
http://www.dfki.uni-kl.de/~belay/ and store in the same directory
train_imagei --> training text-line images
train_texi --> Ground truth of training data
test_imagep----> test set of printed text-line image with a power-geez font
test_imagepg----> test set of synthetic text-line image with power-geez font
test_imagevg----> test set of synthetic text-line with visual geez font
test_textp----> Ground truth for printed text-line image with a power-geez font
test_textpg----> Ground truth for synthetic text-line image with power-geez font
test_textvg----> Ground truth for synthetic text-line with Visualgeez font
we recommend you to run this code with full dataset directly if you computer have >=32 GB RAM
Otherwise, you need to write your own Data-generator code ( will do it soon).
to check how it works you could use the give sample text line image.
'''
train_imagei=np.load('./sample_dataset/X_trainp_pg_vg.npy')
train_texi=np.load('./sample_dataset/y_trainp_pg_vg.npy')
test_imagep=np.load('./sample_dataset/x_testp.npy')
test_textp=np.load('./sample_dataset/y_testp.npy')
test_imagepg=np.load('./sample_dataset/X_test_pg.npy')
test_textpg=np.load('./sample_dataset/y_test_pg.npy')
test_imagevg=np.load('./sample_dataset/X_test_vg.npy')
test_textvg=np.load('./sample_dataset/y_test_vg.npy')
return train_imagei, train_texi, test_imagep, test_textp, test_imagepg, test_textpg, test_imagevg, test_textvg
#the following two functions are employed for test sets and trainsets separetly just for simplcity
def preprocess_traindata():
'''
input: a 2D shape text-line image (h,w)
output: returns 3D shape image format (h,w,1)
Plus this function randomly splits the training and validation set
This function also computes list of length for both training and validation images and GT
'''
im_train=load_dataset()
train_imagei = im_train[0]
train_texi = im_train[1]
im_train=[]
for i in train_imagei:
im_train.append(im_resize(i))
im_train=np.array(im_train)
train_image, val_image, train_tex, val_tex = train_test_split(im_train, train_texi, test_size=0.07)
X_train=train_image.reshape(train_image.shape[0], train_image.shape[1], train_image.shape[2],1) #[samplesize,32,128,1]
X_val=val_image.reshape(val_image.shape[0], val_image.shape[1], val_image.shape[2],1)
y_train=train_tex
y_val=val_tex
nb_train = len(X_train)
nb_val = len(X_val)
#create list of input lengths
#the +31 here is just a kind pad to make the size of the image equal to the out put of your LSTMs
x_train_len = np.asarray([len(X_train[i])+31 for i in range(nb_train)])
x_val_len = np.asarray([len(X_val[i])+31 for i in range(nb_val)])
y_train_len = np.asarray([len(y_train[i]) for i in range(nb_train)])
y_val_len = np.asarray([len(y_val[i]) for i in range(nb_val)])
training_img = np.array(X_train)
train_input_length = np.array(x_train_len)
train_label_length = np.array(y_train_len)
valid_img = np.array(X_val)
valid_input_length = np.array(x_val_len)
valid_label_length = np.array(y_val_len)
return training_img, y_train, train_input_length, train_label_length, valid_img, y_val, valid_input_length, valid_label_length
def preprocess_testdata():
'''
this function helps to manipulate the test samples
input: 2D test image
output: 3D image formats
'''
im_test=load_dataset()
test_imagep = im_test[2]
test_imagepg = im_test[4]
test_imagevg = im_test[6]
y_testp = im_test[3]
y_testpg = im_test[5]
y_testvg = im_test[7]
im_testp=[]
for i in test_imagep:
im_testp.append(im_resize(i))
im_testpg=[]
for i in test_imagepg:
im_testpg.append(im_resize(i))
im_testvg=[]
for i in test_imagevg:
im_testvg.append(im_resize(i))
im_testp=np.array(im_testp)
im_testpg=np.array(im_testpg)
im_testvg=np.array(im_testvg)
X_testp=im_testp.reshape(im_testp.shape[0],im_testp.shape[1],im_testp.shape[2],1)
X_testpg=im_testpg.reshape(im_testpg.shape[0],im_testpg.shape[1],im_testpg.shape[2],1)
X_testpvg=im_testvg.reshape(im_testvg.shape[0],im_testvg.shape[1],im_testvg.shape[2],1)
return X_testp, X_testpg, X_testpvg, y_testp, y_testpg, y_testvg
def im_resize(input_image):
'''
resize the image if you want , otherwise you can use as it is.
'''
f=cv2.transpose(input_image)# since images in the original dataset are transposed
im_resize=cv2.resize(f,dim)
return im_resize
'''
all set of text images and GT
'''
train=preprocess_traindata()
x_train=train[0]
y_train=train[1]
x_train_length=train[2]
y_train_length=train[3]
x_val=train[4]
y_val=train[5]
x_val_length=train[6]
y_val_length=train[7]
test=preprocess_testdata()
x_testp= test[0]
y_testp= test[3]
x_testpg= test[2]
y_testpg= test[4]
x_testvg= test[2]
y_testvg= test[5]
print("data_loading is compeletd")