-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathleave_some_subject_out.py
92 lines (71 loc) · 3.01 KB
/
leave_some_subject_out.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import os
from shutil import copyfile, rmtree
import pandas as pd
from sklearn.model_selection import StratifiedKFold, train_test_split
from utils.utils import create_folder
# Constants
FOLDS_FOLDER_PATH = os.path.join('Datasets', 'Folds')
DATASETS_FOLDER_PATH = os.path.join('Datasets', 'DatasetFaces')
CALIBRATION_FOLDER_PATH = os.path.join('Datasets', 'Calibration')
CALIBRATION_SIZE = 0.2
N_FOLDS = 10
def copy_files(src_files, dst_folder):
"""
Tries to create a folder on the informed path.
"""
for src_file in src_files:
src = os.path.join(DATASETS_FOLDER_PATH, 'Images', src_file)
dst = os.path.join(dst_folder, src_file)
copyfile(src, dst)
if os.path.exists(FOLDS_FOLDER_PATH):
rmtree(FOLDS_FOLDER_PATH)
create_folder(FOLDS_FOLDER_PATH)
if os.path.exists(CALIBRATION_FOLDER_PATH):
rmtree(CALIBRATION_FOLDER_PATH)
create_folder(CALIBRATION_FOLDER_PATH)
# Read the data
dataframe = pd.read_csv('iCOPE+UNIFESP_data.csv')
# Remove entries without face detection
dataframe = dataframe[dataframe['face_coordinates'] != "[]"]
# Only keep pain and no pain labels
dataframe = dataframe[(dataframe['class']=='pain') |
(dataframe['class']=='nopain')]
# Only keep selected datasets
dataframe = dataframe[(dataframe['dataset']=='iCOPE') |
(dataframe['dataset']=='UNIFESP')]
if CALIBRATION_SIZE > 0:
# Split the data into calibration and training/testing
_, X_calib, _, y_calib = train_test_split(
dataframe["new_file_name"],
dataframe["class"],
test_size=CALIBRATION_SIZE,
random_state=42,
stratify=dataframe["class"]
)
# Copy the calibration images to a separate folder
copy_files(X_calib, CALIBRATION_FOLDER_PATH)
# Drop the calibration images from the training/testing data
dataframe = dataframe.drop(y_calib.index)
# Gather unique subjects
unique_subjects = list(set(dataframe['new_subject']))
datasets = dataframe.drop_duplicates(subset=['new_subject'])['dataset'].values
train_subjects = []
test_subjects = []
# The StratifiedKFold is used to split the data between subjects but also consider their original datasets, achieving balance between datasets
skf = StratifiedKFold(n_splits=N_FOLDS)
for train_index, test_index in skf.split(unique_subjects, datasets):
train_subjects.append([unique_subjects[i] for i in train_index])
test_subjects.append([unique_subjects[i] for i in test_index])
for fold in range(N_FOLDS):
# Create the fold folder
fold_path = os.path.join(FOLDS_FOLDER_PATH, str(fold))
create_folder(fold_path)
# Create the train and test folders
train_path = os.path.join(fold_path, "Train")
create_folder(train_path)
test_path = os.path.join(fold_path, "Test")
create_folder(test_path)
# Copy Train Subjects
copy_files(dataframe[dataframe['new_subject'].isin(train_subjects[fold])]['new_file_name'], train_path)
# Copy Test Subjects
copy_files(dataframe[dataframe['new_subject'].isin(test_subjects[fold])]['new_file_name'], test_path)