-
Notifications
You must be signed in to change notification settings - Fork 2
/
audio_tokenizer_auditok.py
165 lines (133 loc) · 5.37 KB
/
audio_tokenizer_auditok.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
############################################################################################################
# AIM : Script to chunk audio files from path or youtube URL using auditok
# USAGE : python3 audio_tokenizer_auditok.py -url "https://www.youtube.com/watch?v=sXs4LZQhTio"
# python3 audio_tokenizer_auditok.py -filepath "/home/aswin/abc.wav"
############################################################################################################
import sys
import argparse
import uuid
import youtube_dl
import os
import subprocess
import auditok
import csv
import pafy
import os
from pytube import YouTube
msg = "auditok Audio Tokenizer"
# Initialize parser & add arguments
parser = argparse.ArgumentParser(description = msg)
parser.add_argument("-url", "--url", help = "youtube URL")
parser.add_argument("-filepath", "--filepath", help = "file path in local")
parser.add_argument("-desc", "--desc", help = "file description")
args = parser.parse_args()
if args.url is None and args.filepath is None:
sys.exit("ERROR : either enter URL or Path")
# values to change
PATH='AUDITOK_CHUNKS/'
MIN_DUR = 5 # minimum duration of a valid audio event in seconds
MAX_DUR = 45 # maximum duration of an event
MAX_SILENCE = 0.050 # maximum duration of tolerated continuous silence within an event
DEF_ENERGY_THRESHOLD = 50 # threshold of detection
def download_youtubeaudio(url):
try:
filepath = str(uuid.uuid4())+".wav"
output_file=str(uuid.uuid1())+".wav"
#code to download using youtube_dl [slow]
# ydl_opts = {
# 'format': 'bestaudio/best',
# 'outtmpl': filepath,
# 'postprocessors': [{
# 'key': 'FFmpegExtractAudio',
# 'preferredcodec': 'wav',
# 'preferredquality': '192',
# }]
# }
# with youtube_dl.YoutubeDL(ydl_opts) as ydl:
# ydl.download([url])
#code to download using pytube
yt = YouTube(url)
yt.streams.filter(type = "audio").first().download()
os.rename(yt.streams.filter(type = "audio").first().default_filename, filepath)
# subprocess.call(["python -m denoiser.enhance --dns48 --noisy_dir {} --out_dir {} --sample_rate {} --num_workers {} --device cpu".format(dir_name, dir_name, 16000, 1)], shell=True)
#process audio file to fixed format
subprocess.call(["ffmpeg -loglevel error -y -i {} -ar {} -ac {} -bits_per_raw_sample {} -vn {}".format(filepath, 16000, 1, 16, output_file)], shell=True)
os.remove(filepath)
return output_file
except Exception as e:
print(e)
#process audio file to fixed format
def audio_formatter(filepath):
output_file=str(uuid.uuid1())+".wav"
subprocess.call(["ffmpeg -loglevel error -y -i {} -ar {} -ac {} -bits_per_raw_sample {} -vn {}".format(filepath, 16000, 1, 16, output_file)], shell=True)
return output_file
#core logic of auditok
def audio_tokenize(var_threshold):
audio_region = auditok.split(
audio_file,
min_dur=MIN_DUR,
max_dur=MAX_DUR,
max_silence=MAX_SILENCE,
energy_threshold=var_threshold
)
return audio_region
if args.url:
print("Passed inputs : ")
print("----------------")
url = args.url
video = pafy.new(url)
print("Input URL : " , url)
print("Input TITLE : " , video.title)
audio_file = download_youtubeaudio(url)
if args.filepath:
print("Passed inputs : ")
print("----------------")
print("Input filepath : " + args.filepath)
audio_file = audio_formatter(args.filepath)
try:
#generate chunks using default threshold
max_yield_threshold = DEF_ENERGY_THRESHOLD
audio_regions = audio_tokenize(max_yield_threshold)
max_chunk = len(list(enumerate(audio_regions)))
#iterates to find better yielding threshold
for i in range(20,80):
tmp_audio_regions = audio_tokenize(i)
tmp_no_chunks = len(list(enumerate(tmp_audio_regions)))
if tmp_no_chunks >= max_chunk:
max_chunk = tmp_no_chunks
audio_regions = tmp_audio_regions
max_yield_threshold = i
print("max threshold : ", max_yield_threshold)
audio_regions = audio_tokenize(max_yield_threshold)
videouuid = str(uuid.uuid4())
if args.url:
# foldername = video.title
foldername = videouuid
row_contents = [args.url , videouuid , video.title , video.duration]
print(row_contents)
elif args.filepath:
foldername = videouuid
row_contents = [args.filepath , videouuid , " " , ""]
print(row_contents)
else:
pass
savepath = PATH + foldername+"/"
os.makedirs(savepath)
counter = 0
#save audio chunks
for i, r in enumerate(audio_regions):
#avoid chunks with fixed duration as max_dur
if((r.meta.end - r.meta.start) != MAX_DUR ):
filename = r.save(os.path.join(savepath, videouuid + "_region_{meta.start:.3f}-{meta.end:.3f}.wav"))
# print("region saved as: {}".format(filename))
counter = counter + 1
for item in audio_regions:
print(item)
row_contents.append(counter)
#save metadata to csv
with open(PATH+'url_details.csv', 'a') as f:
writer = csv.writer(f)
writer.writerow(row_contents)
print(row_contents)
except Exception as e:
print(e)