-
Notifications
You must be signed in to change notification settings - Fork 22
/
Copy pathpickle_dataset.py
167 lines (142 loc) · 7.25 KB
/
pickle_dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
import argparse
import cv2
import imghdr
import math
import numpy as np
import os
import pickle
'''
Extended CK dataset comprises of Image, Emotion Labels, and related data.
Extended CK Images Directory contains multiple directory.
Most of this directory's contain one or more directory's, with each of the
directory containing a set of images.
The Images are of actor's face, from their normal face till a particular emotion.
Extended CK Emotion label has a structure and name convention similar to images
directory but instead of multiple images it contains a text file.
The text file contains a emotion label(a number).
Emotion Label for corresponding Emotional Expression
0:'neutral', 1:'anger', 2:'contempt', 3:'disgust', 4:'fear', 5:'happy', 6:'sadness', 7:'surprise'
'''
if __name__ != '__main__':
raise ImportError('Should be run as Script')
parser = argparse.ArgumentParser(
description='''
Serialize\'s CK+ Dataset into pickle format.
''',
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
# TODO: formating
epilog='''
Examples:
python %(prog)s /home/user/datasets/ck_dataset --crop
python %(prog)s /home/user/datasets/ck_dataset --outfile ck_dataset.pickle
python %(prog)s /home/user/datasets/ck_dataset --training 70 --validation 20 --test 10
python %(prog)s /home/user/datasets/ck_dataset -t 70 -v 20 -test 10 -o fd.pickle
Note: Training Size, Validation Size, Testing Size should be equal be 100
'''
)
parser.add_argument('dataset_path', help='Absolute Path of the Extended CK Dataset Images')
parser.add_argument('label_path', help='Absolute Path of the Extended CK Dataset Emotion Labels')
parser.add_argument('-o', '--outfile', default='ck_dataset.pickle',
help='Name of the output pickle file')
parser.add_argument('-t', '--training', dest='training_size', type=int, default=80,
help='Percent of dataset to use for Training')
parser.add_argument('-v', '--validation', dest='validation_size', type=int,
default=10, help='Percent of dataset to use for Validation')
parser.add_argument('-test', '--test', dest='testing_size', type=int, default=10,
help='Percent of dataset to use for Testing')
parser.add_argument('--crop', dest='detect_face', action="store_true",
help='Crop and save face in the img')
parser.add_argument('--resize', nargs=2, type=int, default=[100, 100],
help='Resize image to paticular dimensions (w x h) ')
dataset_path, label_path, outfile, training_size, validation_size, testing_size, detect_face, resize = vars(parser.parse_args()).values()
# dataset_path, outfile, training_size, validation_size, testing_size, detect_face, resize = vars(parser.parse_args()).values()
if training_size + validation_size + testing_size != 100:
raise argparse.ArgumentTypeError(
"Training Size, Validation Size, Testing Size should be equal be 100"
)
if not os.path.exists(dataset_path):
raise IOError('No such file or directory', dataset_path)
if not os.path.exists(label_path):
raise IOError('No such file or directory', label_path)
training_size, validation_size, testing_size, resize = training_size/100, validation_size/100, testing_size/100, tuple(resize)
face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
ck_dataset = [[], [], [], [], [], [], [], []]
def load_img(imgpath):
img = cv2.imread(imgpath, cv2.IMREAD_GRAYSCALE)
if detect_face:
scale_factor = 1.25
faces = face_cascade.detectMultiScale(img, scale_factor, 5)
while not len(faces):
scale_factor -= .05
faces = face_cascade.detectMultiScale(img, scale_factor, 5)
if scale_factor <= 1:
print(imgpath)
return
for (x,y,w,h) in faces:
return cv2.resize(img[y:y+h, x:x+w], resize, interpolation = cv2.INTER_AREA)
else:
return cv2.resize(img, resize, interpolation = cv2.INTER_AREA)
def load_img_data(files_path, label):
global ck_dataset
threshold = math.floor((len(files_path)-1)*0.3) #how many pictures be considered neutral in directory
for file in files_path:
if imghdr.what(file) in ['png']: #Makes sure file is .png image
img = load_img(file)
if img is None:
continue
img = img.flatten()
img = (img/max(img)).astype(np.float32) # normalization of data [for sigmoid neurons(0-1)]
if int(file[-12:-4]) <= threshold: #Image name are of type 'S005_001_00000002.png' looks at the part '00000002'
ck_dataset[0].append(img)
else:
ck_dataset[label].append(img)
def load_emotion_labels(emotion_label_path):
labels = dict()
for root, dirs, files in os.walk(emotion_label_path):
if dirs or not files:
continue
id = os.sep.join(root.split(os.sep)[-2:])
for file in files:
f_name = os.path.join(root, file)
with open(f_name, 'r') as f:
labels[id] = int(float(f.readline()))
return labels
def load_extended_CK(img_path, emotion_label_path):
emotion_labels = load_emotion_labels(emotion_label_path)
print("\nProcessing: ")
for root, dirs, files in os.walk(img_path):
if dirs:
print(root)
continue
files_path = [os.path.join(root,file) for file in files]
id = os.sep.join(root.split(os.sep)[-2:])
if id in emotion_labels:
load_img_data(files_path, emotion_labels[id])
def serialize_extended_CK(img_path, emotion_label_path):
load_extended_CK(img_path, emotion_label_path)
print("\nSerializing: ")
training_data = []
validation_data = []
test_data = []
training_label = []
validation_label = []
test_label = []
for x,y in zip(ck_dataset,range(0,8)):
i1, i2 = math.ceil(len(x)*training_size), math.floor(len(x)*(training_size+validation_size))
training_data.append(x[0:i1])
validation_data.append(x[i1:i2])
test_data.append(x[i2:len(x)])
training_label.append(y*np.ones(shape=(len(x[0:i1]),1), dtype=np.int8)) #Generating Corresponding Label
validation_label.append(y*np.ones(shape=(len(x[i1:i2]),1), dtype=np.int8))
test_label.append(y*np.ones(shape=(len(x[i2:len(x)]),1), dtype=np.int8))
#np.vstack(list_of_array) converts the list_of_numpy_arrays into a single numpy array
training_data, validation_data, test_data = np.vstack(training_data), np.vstack(validation_data), np.vstack(test_data)
training_label, validation_label, test_label = np.vstack(training_label), np.vstack(validation_label), np.vstack(test_label)
with open(outfile, 'wb') as f:
pickle.dump({
"training_data" : [ training_data, training_label],
"validation_data" : [ validation_data, validation_label],
"test_data" : [ test_data, test_label],
"img_dim" : {"width": resize[0], "height": resize[1]}
}, f, protocol=pickle.HIGHEST_PROTOCOL)
serialize_extended_CK(dataset_path, label_path)