Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
4 changes: 2 additions & 2 deletions .github/workflows/black.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,5 @@ jobs:
- uses: actions/checkout@v2
- uses: psf/black@stable
with:
options: "--check --diff --color"
version: "21.10b0"
options: "--check --diff --color --extend-exclude '/hyp_utils\\/kaldi/'"
version: "21.10b0"
276 changes: 167 additions & 109 deletions egs/chime5_spkdet/v1/local/make_musan.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,125 +7,183 @@

import os, sys


def process_music_annotations(path):
utt2spk = {}
utt2vocals = {}
lines = open(path, 'r').readlines()
for line in lines:
utt, genres, vocals, musician = line.rstrip().split()[:4]
# For this application, the musican ID isn't important
utt2spk[utt] = utt
utt2vocals[utt] = vocals == "Y"
return utt2spk, utt2vocals
utt2spk = {}
utt2vocals = {}
lines = open(path, "r").readlines()
for line in lines:
utt, genres, vocals, musician = line.rstrip().split()[:4]
# For this application, the musican ID isn't important
utt2spk[utt] = utt
utt2vocals[utt] = vocals == "Y"
return utt2spk, utt2vocals


def prepare_music(root_dir, fs, use_vocals):
utt2vocals = {}
utt2spk = {}
utt2wav = {}
num_good_files = 0
num_bad_files = 0
music_dir = os.path.join(root_dir, "music")
for root, dirs, files in os.walk(music_dir):
for file in files:
file_path = os.path.join(root, file)
if file.endswith(".wav"):
utt = str(file).replace(".wav", "")
utt2wav[utt] = file_path
elif str(file) == "ANNOTATIONS":
utt2spk_part, utt2vocals_part = process_music_annotations(file_path)
utt2spk.update(utt2spk_part)
utt2vocals.update(utt2vocals_part)
utt2spk_str = ""
utt2wav_str = ""
for utt in utt2vocals:
if utt in utt2wav:
if use_vocals or not utt2vocals[utt]:
utt2spk_str = utt2spk_str + utt + " " + utt2spk[utt] + "\n"
if fs == 8:
utt2wav_str = utt2wav_str + utt + " sox -t wav " + utt2wav[utt] + " -r 8k -t wav - |\n"
utt2vocals = {}
utt2spk = {}
utt2wav = {}
num_good_files = 0
num_bad_files = 0
music_dir = os.path.join(root_dir, "music")
for root, dirs, files in os.walk(music_dir):
for file in files:
file_path = os.path.join(root, file)
if file.endswith(".wav"):
utt = str(file).replace(".wav", "")
utt2wav[utt] = file_path
elif str(file) == "ANNOTATIONS":
utt2spk_part, utt2vocals_part = process_music_annotations(file_path)
utt2spk.update(utt2spk_part)
utt2vocals.update(utt2vocals_part)
utt2spk_str = ""
utt2wav_str = ""
for utt in utt2vocals:
if utt in utt2wav:
if use_vocals or not utt2vocals[utt]:
utt2spk_str = utt2spk_str + utt + " " + utt2spk[utt] + "\n"
if fs == 8:
utt2wav_str = (
utt2wav_str
+ utt
+ " sox -t wav "
+ utt2wav[utt]
+ " -r 8k -t wav - |\n"
)
else:
utt2wav_str = (
utt2wav_str
+ utt
+ " sox -t wav "
+ utt2wav[utt]
+ " -r 16k -t wav - |\n"
)
num_good_files += 1
else:
utt2wav_str = utt2wav_str + utt + " sox -t wav " + utt2wav[utt] + " -r 16k -t wav - |\n"
num_good_files += 1
else:
print("Missing file", utt)
num_bad_files += 1
print("In music directory, processed", num_good_files, "files;", num_bad_files, "had missing wav data")
return utt2spk_str, utt2wav_str
print("Missing file", utt)
num_bad_files += 1
print(
"In music directory, processed",
num_good_files,
"files;",
num_bad_files,
"had missing wav data",
)
return utt2spk_str, utt2wav_str


def prepare_speech(root_dir, fs):
utt2spk = {}
utt2wav = {}
num_good_files = 0
num_bad_files = 0
speech_dir = os.path.join(root_dir, "speech")
for root, dirs, files in os.walk(speech_dir):
for file in files:
file_path = os.path.join(root, file)
if file.endswith(".wav"):
utt = str(file).replace(".wav", "")
utt2wav[utt] = file_path
utt2spk[utt] = utt
utt2spk_str = ""
utt2wav_str = ""
for utt in utt2spk:
if utt in utt2wav:
utt2spk_str = utt2spk_str + utt + " " + utt2spk[utt] + "\n"
if fs == 8:
utt2wav_str = utt2wav_str + utt + " sox -t wav " + utt2wav[utt] + " -r 8k -t wav - |\n"
else:
utt2wav_str = utt2wav_str + utt + " sox -t wav " + utt2wav[utt] + " -r 16k -t wav - |\n"
num_good_files += 1
else:
print("Missing file", utt)
num_bad_files += 1
print("In speech directory, processed", num_good_files, "files;", num_bad_files, "had missing wav data")
return utt2spk_str, utt2wav_str
utt2spk = {}
utt2wav = {}
num_good_files = 0
num_bad_files = 0
speech_dir = os.path.join(root_dir, "speech")
for root, dirs, files in os.walk(speech_dir):
for file in files:
file_path = os.path.join(root, file)
if file.endswith(".wav"):
utt = str(file).replace(".wav", "")
utt2wav[utt] = file_path
utt2spk[utt] = utt
utt2spk_str = ""
utt2wav_str = ""
for utt in utt2spk:
if utt in utt2wav:
utt2spk_str = utt2spk_str + utt + " " + utt2spk[utt] + "\n"
if fs == 8:
utt2wav_str = (
utt2wav_str
+ utt
+ " sox -t wav "
+ utt2wav[utt]
+ " -r 8k -t wav - |\n"
)
else:
utt2wav_str = (
utt2wav_str
+ utt
+ " sox -t wav "
+ utt2wav[utt]
+ " -r 16k -t wav - |\n"
)
num_good_files += 1
else:
print("Missing file", utt)
num_bad_files += 1
print(
"In speech directory, processed",
num_good_files,
"files;",
num_bad_files,
"had missing wav data",
)
return utt2spk_str, utt2wav_str


def prepare_noise(root_dir, fs):
utt2spk = {}
utt2wav = {}
num_good_files = 0
num_bad_files = 0
noise_dir = os.path.join(root_dir, "noise")
for root, dirs, files in os.walk(noise_dir):
for file in files:
file_path = os.path.join(root, file)
if file.endswith(".wav"):
utt = str(file).replace(".wav", "")
utt2wav[utt] = file_path
utt2spk[utt] = utt
utt2spk_str = ""
utt2wav_str = ""
for utt in utt2spk:
if utt in utt2wav:
utt2spk_str = utt2spk_str + utt + " " + utt2spk[utt] + "\n"
if fs == 8:
utt2wav_str = utt2wav_str + utt + " sox -t wav " + utt2wav[utt] + " -r 8k -t wav - |\n"
else:
utt2wav_str = utt2wav_str + utt + " sox -t wav " + utt2wav[utt] + " -r 16k -t wav - |\n"
num_good_files += 1
else:
print("Missing file", utt)
num_bad_files += 1
print("In noise directory, processed", num_good_files, "files;", num_bad_files, "had missing wav data")
return utt2spk_str, utt2wav_str
utt2spk = {}
utt2wav = {}
num_good_files = 0
num_bad_files = 0
noise_dir = os.path.join(root_dir, "noise")
for root, dirs, files in os.walk(noise_dir):
for file in files:
file_path = os.path.join(root, file)
if file.endswith(".wav"):
utt = str(file).replace(".wav", "")
utt2wav[utt] = file_path
utt2spk[utt] = utt
utt2spk_str = ""
utt2wav_str = ""
for utt in utt2spk:
if utt in utt2wav:
utt2spk_str = utt2spk_str + utt + " " + utt2spk[utt] + "\n"
if fs == 8:
utt2wav_str = (
utt2wav_str
+ utt
+ " sox -t wav "
+ utt2wav[utt]
+ " -r 8k -t wav - |\n"
)
else:
utt2wav_str = (
utt2wav_str
+ utt
+ " sox -t wav "
+ utt2wav[utt]
+ " -r 16k -t wav - |\n"
)
num_good_files += 1
else:
print("Missing file", utt)
num_bad_files += 1
print(
"In noise directory, processed",
num_good_files,
"files;",
num_bad_files,
"had missing wav data",
)
return utt2spk_str, utt2wav_str


def main():
in_dir = sys.argv[1]
fs = int(sys.argv[2])
out_dir = sys.argv[3]
use_vocals = sys.argv[4] == "Y"
utt2spk_music, utt2wav_music = prepare_music(in_dir, fs, use_vocals)
utt2spk_speech, utt2wav_speech = prepare_speech(in_dir, fs)
utt2spk_noise, utt2wav_noise = prepare_noise(in_dir, fs)
utt2spk = utt2spk_speech + utt2spk_music + utt2spk_noise
utt2wav = utt2wav_speech + utt2wav_music + utt2wav_noise
wav_fi = open(os.path.join(out_dir, "wav.scp"), 'w')
wav_fi.write(utt2wav)
utt2spk_fi = open(os.path.join(out_dir, "utt2spk"), 'w')
utt2spk_fi.write(utt2spk)
in_dir = sys.argv[1]
fs = int(sys.argv[2])
out_dir = sys.argv[3]
use_vocals = sys.argv[4] == "Y"
utt2spk_music, utt2wav_music = prepare_music(in_dir, fs, use_vocals)
utt2spk_speech, utt2wav_speech = prepare_speech(in_dir, fs)
utt2spk_noise, utt2wav_noise = prepare_noise(in_dir, fs)
utt2spk = utt2spk_speech + utt2spk_music + utt2spk_noise
utt2wav = utt2wav_speech + utt2wav_music + utt2wav_noise
wav_fi = open(os.path.join(out_dir, "wav.scp"), "w")
wav_fi.write(utt2wav)
utt2spk_fi = open(os.path.join(out_dir, "utt2spk"), "w")
utt2spk_fi.write(utt2spk)


if __name__=="__main__":
main()
if __name__ == "__main__":
main()
58 changes: 32 additions & 26 deletions egs/chime5_spkdet/v1/local/score_dcf.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,48 +20,54 @@

def score_dcf(key_file, score_file, output_path):

logging.info('Load key: %s' % key_file)
logging.info("Load key: %s" % key_file)
key = TrialKey.load_txt(key_file)
logging.info('Load scores: %s' % score_file)
logging.info("Load scores: %s" % score_file)
scr = TrialScores.load_txt(score_file)
tar, non = scr.get_tar_non(key)

priors = np.array([0.001, 0.005, 0.01, 0.05 ])
priors = np.array([0.001, 0.005, 0.01, 0.05])
min_dcf, act_dcf, eer, _ = fast_eval(tar, non, priors)

output_dir = os.path.dirname(output_path)
if not os.path.isdir(output_dir):
os.makedirs(output_dir)

output_file = output_path + '_results'
with open(output_file, 'w') as f:
s = 'EER: {0:.2f} DCF5e-2: {1:.3f} / {2:.3f} DCF1e-2: {3:.3f} / {4:.3f} DCF5e-3: {5:.3f} / {6:.3f} DCF1e-3: {7:.3f} / {8:.3f}'.format(
eer * 100, min_dcf[3], act_dcf[3],
min_dcf[2], act_dcf[2],
min_dcf[1], act_dcf[1],
min_dcf[0], act_dcf[0])
output_file = output_path + "_results"
with open(output_file, "w") as f:
s = "EER: {0:.2f} DCF5e-2: {1:.3f} / {2:.3f} DCF1e-2: {3:.3f} / {4:.3f} DCF5e-3: {5:.3f} / {6:.3f} DCF1e-3: {7:.3f} / {8:.3f}".format(
eer * 100,
min_dcf[3],
act_dcf[3],
min_dcf[2],
act_dcf[2],
min_dcf[1],
act_dcf[1],
min_dcf[0],
act_dcf[0],
)
f.write(s)
logging.info(s)


if __name__ == "__main__":

parser=argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
fromfile_prefix_chars='@',
description='Computes EER and DCF')
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
fromfile_prefix_chars="@",
description="Computes EER and DCF",
)

parser.add_argument("--key-file", dest="key_file", required=True)
parser.add_argument("--score-file", dest="score_file", required=True)
parser.add_argument("--output-path", dest="output_path", required=True)
parser.add_argument(
"-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
)

parser.add_argument('--key-file', dest='key_file', required=True)
parser.add_argument('--score-file', dest='score_file', required=True)
parser.add_argument('--output-path', dest='output_path', required=True)
parser.add_argument('-v', '--verbose', dest='verbose', default=1,
choices=[0, 1, 2, 3], type=int)

args=parser.parse_args()
args = parser.parse_args()
config_logger(args.verbose)
del args.verbose
logging.debug(args)

score_dcf(**vars(args))


score_dcf(**vars(args))
Loading