-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcreate_annotationstxt.py
184 lines (134 loc) · 5.54 KB
/
create_annotationstxt.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
import cv2
import sys
import os
import csv
# import pandas as pd
# from PIL import Image
import os
import shutil
import random
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.model_selection import StratifiedKFold
def split_txt():
# Load your dataset from a text file
with open('annotations_filtered_peak_all.txt', 'r') as file:
data = file.readlines()
# Assuming each line has a label at the end (e.g., 'data... class_id')
labels = [line.split()[-1] for line in data]
# Split the data into train, validation, and test sets with equal class distribution
train_data, test_data, train_labels, test_labels = train_test_split(data, labels, test_size=0.2, random_state=42, stratify=labels)
val_data, test_data, val_labels, test_labels = train_test_split(test_data, test_labels, test_size=0.5, random_state=42, stratify=test_labels)
# Define file names for the splits
train_file = 'train.txt'
val_file = 'val.txt'
test_file = 'test.txt'
# Write the data to separate files
with open(train_file, 'w') as file:
file.writelines(train_data)
with open(val_file, 'w') as file:
file.writelines(val_data)
with open(test_file, 'w') as file:
file.writelines(test_data)
print(f'Dataset has been split and saved into {train_file}, {val_file}, and {test_file}.')
# split_txt()
# create five folds with the given file and name the text files as train_fold1.txt, test_fold1.txt train_fold2.txt, test_fold2.txt and so on
def createfolds(txt_file_path):
# Step 1: Load the Data
with open(txt_file_path, 'r') as file:
lines = file.readlines()
data = []
labels = []
for line in lines:
label = line.strip()[-1] # Assuming label is the last character
text = line.strip()[:-1] # Assuming text is everything except the last character
labels.append(label)
data.append(text)
# Step 2: Stratified Split
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42) # 5-fold split
for fold, (train_idx, test_idx) in enumerate(skf.split(data, labels)):
train_data = [data[i] for i in train_idx]
train_labels = [labels[i] for i in train_idx]
test_data = [data[i] for i in test_idx]
test_labels = [labels[i] for i in test_idx]
# Step 3: Save Folds
with open(f'fold_{fold+1}_train.txt', 'w') as train_file:
for label, text in zip(train_labels, train_data):
train_file.write(f'{text}{label}\n')
with open(f'fold_{fold+1}_test.txt', 'w') as test_file:
for label, text in zip(test_labels, test_data):
test_file.write(f'{text}{label}\n')
createfolds('annotations_filtered_peak_2.txt')
def create_annotxt():
bl = 'BL'
one= 'PA1'
two= 'PA2'
three= 'PA3'
four= 'PA4'
root_dir = '/home/livia/work/Biovid/PartB/biovid_classes'
#loop to go through all the subdirectories in the source directory
file = open("annotations_filtered_peak_all.txt", "w")
for sub_dir in os.listdir(root_dir):
if sub_dir.endswith('.txt'):
continue
sub_dir_path = os.path.join(root_dir, sub_dir)
videos_list = os.listdir(sub_dir_path)
for file_dir in videos_list:
sub_video_path = os.path.join(sub_dir_path, file_dir)
#count nyumber of images in each folder
count = 0
for sub_video in os.listdir(sub_video_path):
count = count + 1
if count == 75:
if sub_dir == '0' or sub_dir == '1' or sub_dir == '2' or sub_dir == '3' or sub_dir == '4':
# write_file = os.path.join(sub_dir, file_dir) + " " + '1' + " " +str(count) + " " + str(sub_dir)
if sub_dir == '0':
class_label = '0'
elif sub_dir == '1':
class_label = '1'
elif sub_dir == '2':
class_label = '2'
elif sub_dir == '3':
class_label = '3'
elif sub_dir == '4':
class_label = '4'
write_file = os.path.join(sub_dir, file_dir) + " " + '50' + " " + "70" + " " + class_label # only peak
file.write(write_file + "\n")
file.close()
# def count_labels(file_path):
# with open(file_path, 'r') as file:
# lines = file.readlines()
# labels = [line.strip().split()[-1] for line in lines]
# label_counts = {}
# for label in labels:
# if label in label_counts:
# label_counts[label] += 1
# else:
# label_counts[label] = 1
# return label_counts
# # File paths for train, validation, and test sets
# train_file = 'train.txt'
# val_file = 'val.txt'
# test_file = 'test.txt'
# # Count labels in each file
# train_label_counts = count_labels(train_file)
# val_label_counts = count_labels(val_file)
# test_label_counts = count_labels(test_file)
# # Print label counts
# print(f"Label counts in train set: {train_label_counts}")
# print(f"Label counts in validation set: {val_label_counts}")
# print(f"Label counts in test set: {test_label_counts}")
# code to write current file name and folder name to a text file
# import os
# import sys
#
# # Open a file
# path = "/var/www/html/"
# dirs = os.listdir( path )
#
# # This would print all the files and directories
# for file in dirs:
# print (file)
# f = open("demofile2.txt", "a")
# f.write(file + "\n")
# f.close()