-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathprepare_training.py
126 lines (111 loc) · 6.22 KB
/
prepare_training.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import numpy as np
from tensorflow.keras.utils import to_categorical
def spotting_pseudo_labeling(dataset_name, final_samples, final_dataset_spotting, k_p):
pseudo_y = [] #Pseudolabel spotting [0,1]
video_count = 0
for subject_index, subject in enumerate(final_samples):
for video_index, video in enumerate(subject):
samples_arr = []
if (len(video)==0):
pseudo_y.append([0 for i in range(len(final_dataset_spotting[video_count]))]) #Last k_p frames are ignored
else:
pseudo_y_each = [0]*(len(final_dataset_spotting[video_count]))
for ME_index, ME in enumerate(video):
samples_arr.append(np.arange(ME[0], ME[2]+1))
for arr_index, ground_truth_arr in enumerate(samples_arr):
for index in range(len(pseudo_y_each)):
pseudo_arr = np.arange(index, index+k_p)
# Heaviside step function, if IoU>0 then y=1, else y=0
if (pseudo_y_each[index] < len(np.intersect1d(pseudo_arr, ground_truth_arr))/len(np.union1d(pseudo_arr, ground_truth_arr))):
pseudo_y_each[index] = 1
pseudo_y.append(pseudo_y_each)
video_count+=1
# Integrate all videos into one dataset
pseudo_y = [y for x in pseudo_y for y in x]
print(dataset_name, 'Total frames:', len(pseudo_y))
return pseudo_y
def recognition_label(dataset_name, emotion_class, final_samples, final_emotions, final_dataset_spotting, final_dataset_recognition, pseudo_y):
# Detect only the highest peak or all peaks above threshold p
if(dataset_name == 'CASME_sq' or dataset_name == 'SAMMLV'):
spot_multiple = True
else:
spot_multiple = False
# Determine the emotions used for evaluation
if (dataset_name == 'CASME_sq' or dataset_name == 'SAMMLV'):
emotion_list = ['negative', 'positive', 'surprise']
else:
emotion_list = ['repression', 'anger', 'contempt', 'disgust', 'fear', 'sadness', 'negative', 'happiness', 'positive', 'surprise', 'others', 'other']
X1 = [] # Recognition validation
y1 = [] # Recognition validation
X2 = [] # Recognition training
y2 = [] # Recognition training
label_videos = [videos for subjects in final_samples for videos in subjects] # Get [onset, apex, offset] for each video
emotion_videos = [videos for subjects in final_emotions for videos in subjects] # Get emotion for each video
video_count = 0
for video_index in range(len(final_dataset_spotting)):
for sample_index in range(len(label_videos[video_index])):
if(emotion_videos[video_index][sample_index] in emotion_list):
image_index = label_videos[video_index][sample_index][0]
X1.append(final_dataset_spotting[video_index][image_index])
y1.append(emotion_videos[video_index][sample_index])
X2.append(final_dataset_recognition[video_count])
video_count+=1
y1 = np.array(y1)
#Pseudolabel Recognition
if (dataset_name == 'CASME2'): # Convert 0, 1, 2 to negative, positive, surprise
y1 = [0 if ele=='disgust' else ele for ele in y1]
y1 = [1 if ele=='happiness' else ele for ele in y1]
y1 = [2 if ele=='others' else ele for ele in y1]
y1 = [3 if ele=='surprise' else ele for ele in y1]
y1 = [4 if ele=='repression' else ele for ele in y1]
else: # Convert 0, 1, 2 to negative, positive, surprise
y1 = [0 if ele=='negative' else ele for ele in y1]
y1 = [1 if ele=='positive' else ele for ele in y1]
y1 = [2 if ele=='surprise' else ele for ele in y1]
all_images = [frame for video in final_dataset_spotting for frame in video]
X = features_split(all_images) # For spotting training
y = np.array(pseudo_y) # For spotting training
X1 = features_split(X1) # For recognition validation
y1 = to_categorical(y1) # For recognition validation
X2 = features_split(X2) # For recognition training
y2 = y1 # For recognition training
return spot_multiple, X, y, X1, y1, X2, y2, emotion_list
#Split the features for channel-wise learning
def features_split(X):
X_copy = X.copy()
for index in range(len(X_copy)):
u = np.array(X_copy[index][:,:,0].reshape(42,42,1))
v = np.array(X_copy[index][:,:,1].reshape(42,42,1))
os = np.array(X_copy[index][:,:,2].reshape(42,42,1))
X_copy[index] = [u, v, os]
return X_copy
def loso_split(X, y, X1, y1, X2, y2, final_subjects, final_samples, final_dataset_spotting, final_emotions, emotion_list):
#To split the dataset by subjects
groupsLabel = y.copy() # For spotting
groupsLabel1 = [] # For recognition
prevIndex = 0
countVideos = 0
videos_len = []
#Get total frames of each video
for video_index in range(len(final_dataset_spotting)):
videos_len.append(len(final_dataset_spotting[video_index]))
print('Frame Index for each subject (Spotting):-')
for subject_index in range(len(final_samples)):
countVideos += len(final_samples[subject_index])
index = sum(videos_len[:countVideos])
groupsLabel[prevIndex:index] = subject_index
print('Subject', final_subjects[subject_index], ':', prevIndex, '->', index)
prevIndex = index
#Get total frames of each video
print('\nFrame Index for each subject (Recognition):-')
for subject_index in range(len(final_samples)):
for video_index in range(len(final_samples[subject_index])):
for sample_index in range(len(final_samples[subject_index][video_index])):
if(final_emotions[subject_index][video_index][sample_index] in emotion_list):
groupsLabel1.append(subject_index)
if(subject_index in np.unique(groupsLabel1)):
print('Subject', final_subjects[subject_index], ':', len(groupsLabel1)-len(final_samples[subject_index]), '->', len(groupsLabel1)-1)
else:
print('Subject', final_subjects[subject_index], ':', 'Not available')
print('\nTotal X:', len(X), '| Total y:', len(y), '| Total X1:', len(X1), '| Total y1:', len(y1), '| Total X2:', len(X2), '| Total y2:', len(y2))
return groupsLabel, groupsLabel1