-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathpreprocessing.py
160 lines (133 loc) · 4.61 KB
/
preprocessing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
import os
from multiprocessing import freeze_support
from concurrent import futures
from itertools import repeat
from urllib.parse import unquote
import pandas as pd
from colorama import Fore
from line_profiler_pycharm import profile
from tqdm import tqdm
from audio_process import AudioConverter
from utils import yes_or_no, print_results, get_paths, is_dir, get_filename
WAV_PATH = "/Volumes/vault0/dataset3/wav-22khz"
CHORUS_PATH = "/Volumes/vault0/dataset3/chorus-22khz"
WAV_OVERWRITE = False
CHORUS_OVERWRITE = False
N_PROCESS = 10
SAMPLE_RATE = 22050
def export_track(data):
"""Export the audio track into WAV file
:param data: tuple that contains (dict of track info, export path)
:type data: tuple
:return: True if export was successful
:rtype: bool
"""
track, path = data
track_path = unquote(track["Location"].replace("file://", ""))
file_name = str(track["Track_ID"]) + ".wav"
export_path = os.path.join(path, file_name)
# if the converted file exists, and the overwrite option is False
if (not WAV_OVERWRITE) and os.path.exists(export_path):
return -1
try:
c = AudioConverter(track_path, export_path)
c.export(format="wav", sample_rate=SAMPLE_RATE)
return 1
except:
return track_path
def convert_wav(tracks, out_path):
# TODO: create option to save full wav file or not
if not is_dir(out_path):
print("Using default path...")
out_path = os.path.join(os.getcwd(), "prep")
os.mkdir(out_path)
error_list = []
skip_cnt = 0
target_cnt = len(tracks)
print()
print(f"Start converting {target_cnt} files...")
with futures.ProcessPoolExecutor(max_workers=N_PROCESS) as exe:
results = tqdm(
exe.map(export_track, zip(tracks, repeat(out_path))),
total=target_cnt,
)
for result in results:
if result == -1:
skip_cnt += 1
elif result != 1:
error_list.append(result)
return target_cnt, error_list, skip_cnt
# @profile
def _extract_chorus(data):
input_path, dest_path = data
export_path = os.path.join(dest_path, get_filename(input_path) + ".wav")
# skip duplicate if option is on
if not CHORUS_OVERWRITE and os.path.exists(export_path):
return -1
try:
c = AudioConverter(input_path, export_path)
chorus_sec, success = c.detect_chorus()
c.cut_audio(chorus_sec, 30)
c.normalize()
c.export(format="wav", sample_rate=SAMPLE_RATE)
if not success:
return False
else:
return True
except:
return input_path
# @profile
def extract_chorus(in_path, out_path):
if not os.path.isdir(out_path):
print(Fore.RED + "chorus extract path is not exist. ", end="")
if yes_or_no("Would you like to make one?"):
os.mkdir(out_path)
else:
print("Using default path...")
out_path = "/tmp"
track_paths = get_paths(in_path)
target_cnt = len(track_paths)
error_list = []
skipped_cnt = failed_cnt = 0
print(f"Start extracting chorus of {target_cnt} files...")
with futures.ProcessPoolExecutor(max_workers=N_PROCESS) as exe:
results = tqdm(
exe.map(_extract_chorus, zip(track_paths, repeat(out_path))),
total=target_cnt,
)
for result in results:
if result == -1:
skipped_cnt += 1
elif result is False:
failed_cnt += 1
elif result is not True:
error_list.append(result)
# for test in single process
# results = tqdm(
# map(_extract_chorus, zip(track_paths, repeat(out_path))),
# total=target_cnt,
# )
# for result in results:
# if result == -1:
# skipped_cnt += 1
# elif result is False:
# failed_cnt += 1
# elif result is not True:
# error_list.append(result)
return target_cnt, error_list, skipped_cnt, failed_cnt
def main():
# read tracks and convert to wav
tracks = pd.read_csv(os.path.join("./result", "itdb_tracks.csv")).to_dict(
"records"
)
total_cnt, failed, skip_cnt = convert_wav(tracks, WAV_PATH)
print_results(total_cnt, failed, skip_cnt)
# preprocess
# normalize + cut 30sec of chorus part
total_cnt, errors, skip_cnt, failed_cnt = extract_chorus(WAV_PATH, CHORUS_PATH)
print_results(total_cnt, errors, skip_cnt)
if failed_cnt > 0:
print(f"{failed_cnt} items failed to estimate, used default time (60s)")
if __name__ == "__main__":
freeze_support()
main()