-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfeatures_extractor.py
152 lines (122 loc) · 4.74 KB
/
features_extractor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
import numpy as np
import os
from extractor import Extractor
import cv2
from videolibs import crop_center_square
from tensorflow import keras
import pandas as pd
DATA_PATH = r"data\\"
NUM_FEATURES = 2048
MAX_SEQ_LENGTH = 20
IMG_SIZE = 229
"""
trick: Convert frame from BGR to RGB
image = image[:, :, [2,1,0]]
"""
def load_video(path, max_frames, resize):
cap = cv2.VideoCapture(path)
frames = []
try:
while True:
ret, frame = cap.read()
if not ret:
break
frame = crop_center_square(frame)
frame = cv2.resize(frame, resize)
frame = frame[:, :, [2, 1, 0]]
frames.append(frame)
if len(frames) == max_frames:
break
finally:
cap.release()
return np.array(frames)
"""
Finally, we can put all the pieces together to create our data processing utility.
Note: Here the Ellipsis object selected all dimensions. (three dots ...)
Accessing and slicing multidimensional Arrays/NumPy indexing.
.numpy(): convert a Tensor to Numpy array.
"""
def prepare_all_videos(df, root_dir, label_processor, img_size, max_seq_length, nb_features, dataset):
num_samples = len(df)
video_paths = df["video_name"].values.tolist()
labels = df["tag"].values
labels = label_processor(labels[..., None]).numpy()
# `frame_masks` and `frame_features` are what we will feed to our sequence model.
# `frame_masks` will contain a bunch of booleans denoting if a timestep is
# masked with padding or not.
frame_masks = np.zeros(shape=(num_samples, max_seq_length), dtype="bool")
frame_features = np.zeros(
shape=(num_samples, max_seq_length, nb_features), dtype="float32"
)
# init extractor model
model = Extractor(image_shape=(img_size, img_size, 3))
# For each video.
for idx, path in enumerate(video_paths):
# Gather all its frames and add a batch dimension.
frames = load_video(
path=os.path.join(root_dir, path),
max_frames=max_seq_length,
resize=(img_size, img_size)
)
frames = frames[None, ...]
# Initialize placeholders to store the masks and features of the current video.
temp_frame_mask = np.zeros(shape=(1, max_seq_length, ), dtype="bool")
temp_frame_features = np.zeros(
shape=(1, max_seq_length, nb_features), dtype="float32"
)
# Extract features from the frames of the current video.
for i, batch in enumerate(frames):
video_length = batch.shape[0]
length = min(max_seq_length, video_length)
for j in range(length):
temp_frame_features[i, j, :] = model.extract(
batch[None, j, :]
)
temp_frame_mask[i, :length] = 1 # 1 = not masked, 0 = masked
# numpy.sequeeze() function is used when we want to remove single-dimensional entries from the shape of an array.
frame_features[idx,] = temp_frame_features.squeeze()
frame_masks[idx,] = temp_frame_mask.squeeze()
# Save the sequence.
save_path = r'data\\tmp\\'
np.save(open(save_path + dataset + '-frame_features.npy', 'wb'), frame_features)
np.save(open(save_path + dataset + '-frame_mask.npy','wb'), frame_masks)
np.save(open(save_path + dataset + '-labels.npy','wb'), labels)
return
"""
The labels of the videos are strings. Neural networks do not understand string values,
so they must be converted to some numerical form before they are fed to the model. Here
we will use the [`StringLookup`](https://keras.io/api/layers/preprocessing_layers/categorical/string_lookup)
layer encode the class labels as integers.
"""
def get_class_one_hot(vocabs):
label_processor = keras.layers.StringLookup(
num_oov_indices=0, vocabulary=np.unique(vocabs)
)
return label_processor
def main():
train_df = pd.read_csv(DATA_PATH + "train.csv")
test_df = pd.read_csv(DATA_PATH + "test.csv")
print(f"Total videos for training: {len(train_df)}")
print(f"Total videos for testing: {len(test_df)}")
label_processor = get_class_one_hot(train_df["tag"].values)
prepare_all_videos(
df = train_df,
root_dir= DATA_PATH + "train",
label_processor= label_processor,
img_size= IMG_SIZE,
max_seq_length= MAX_SEQ_LENGTH,
nb_features= NUM_FEATURES,
dataset="train"
)
# prepare_all_videos(
# df = test_df,
# root_dir= DATA_PATH + "test",
# label_processor= label_processor,
# img_size= IMG_SIZE,
# max_seq_length= MAX_SEQ_LENGTH,
# nb_features= NUM_FEATURES,
# dataset="test"
# )
return
if __name__ == '__main__':
main()