-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathc3d_extract.py
69 lines (55 loc) · 3.01 KB
/
c3d_extract.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import os
import numpy as np
from c3d import *
from utils.visualization_util import *
## USE THIS EXTENSION IF YOU'VE PUT THE VIDEOS YOU WANT TO USE INTO THE INPUT FOLDER
# PATHS: cfg.input_folder, cfg.C3D_path, cfg.C3D_info_path
# AIM: extract from 1 video, n features. Merge n features to end with 32 features and save them in a .txt file
def run_c3d():
'''
Starting from videos, extract features using the C3D model pre-trained on the Sports-1M dataset and save them as .txt files.
How?
Each video is divided into n clips of 16 frames, where n = int(np.round(total_number_of_frames/16)).
Each clip is given as input to the C3D model, that returns as output an array of 4096 floats. This array is 1 feature.
(n, 4096) features are then merged to end up with (32, 4096) features per each video.
Paths
--------------
1. cfg.input_folder = folder containing all those videos whose features need to be extracted
2. cfg.C3D_path = folder where these features are saved
3. cfg.C3D_info_path = folder where num_frames_clips.txt is saved
'''
num_clips_frames = []
for filename in os.listdir(cfg.input_folder):
if filename.endswith('.mp4'):
video_name = os.path.join(cfg.input_folder, filename)
name = os.path.basename(video_name).split('.')[0]
# Read video: create n clips of 16 frames
video_clips, num_frames = get_video_clips(video_name)
print("Number of clips in the video : ", len(video_clips))
num_clips_frames.append((video_name, num_frames, len(video_clips)))
# Initialize C3D model
feature_extractor = c3d_feature_extractor()
# Extract features
rgb_features = []
for i, clip in enumerate(video_clips):
clip = np.array(clip)
if len(clip) < params.frame_count:
continue
clip = preprocess_input(clip)
# Predict fc6 output using C3D weights
rgb_feature = feature_extractor.predict(clip)[0]
rgb_features.append(rgb_feature)
print("Processed clip : {} / {}".format(i, len(video_clips)))
# Bag features: from n to 32 features per each video
rgb_features = np.array(rgb_features)
rgb_feature_bag = interpolate(rgb_features, params.features_per_bag)
# Save each bag of features as .txt
save_path = os.path.join(cfg.C3D_path, name + '.txt')
np.savetxt(save_path, rgb_feature_bag)
# Save as txt the num of frames and clips in each video
cat_0 = num_clips_frames[0][0].split('/')[-1].strip('mp4')
cat = ''.join(filter(str.isalpha, cat_0))[:-1]
with open(os.path.join(cfg.C3D_info_path, 'num_frames_clips_'+cat+'.txt'), 'w') as f:
print(num_clips_frames, file=f)
if __name__ == '__main__':
run_c3d()