Skip to content

Commit

Permalink
update speaker docs (#4164)
Browse files Browse the repository at this point in the history
* update speaker docs

Signed-off-by: nithinraok <nithinrao.koluguri@gmail.com>

* chunks -> segments

Signed-off-by: nithinraok <nithinrao.koluguri@gmail.com>

* Khz -> kHz

Signed-off-by: nithinraok <nithinrao.koluguri@gmail.com>
  • Loading branch information
nithinraok authored and ericharper committed Jun 3, 2022
1 parent afca46a commit f55c02b
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 20 deletions.
34 changes: 17 additions & 17 deletions scripts/speaker_tasks/filelist_to_manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,21 +30,21 @@
This scipt converts a filelist file where each line contains
<absolute path of wav file> to a manifest json file.
Optionally post processes the manifest file to create dev and train split for speaker embedding
training, also optionally chunk an audio file in to segments of random DURATIONS and create those
training, also optionally segment an audio file in to segments of random DURATIONS and create those
wav files in CWD.
While creating chunks, if audio is not sampled at 16Khz, it resamples to 16Khz and write the wav file.
While creating segments, if audio is not sampled at 16kHz, it resamples to 16kHz and write the wav file.
Args:
--filelist: path to file containing list of audio files
--manifest(optional): if you already have manifest file, but would like to process it for creating chunks and splitting then use manifest ignoring filelist
--manifest(optional): if you already have manifest file, but would like to process it for creating
segments and splitting then use manifest ignoring filelist
--id: index of speaker label in filename present in filelist file that is separated by '/'
--out: output manifest file name
--split: if you would want to split the manifest file for training purposes
you may not need this for test set. output file names is <out>_<train/dev>.json
Defaults to False
--create_chunks:if you would want to chunk each manifest line to chunks of 4 sec or less
you may not need this for test set, Defaults to False
--min_spkrs_count: min number of samples per speaker to consider and ignore otherwise
you may not need this for test set. output file names is <out>_<train/dev>.json, defaults to False
--create_segments: if you would want to segment each manifest line to segments of [1,2,3,4] sec or less
you may not need this for test set, defaults to False
--min_spkrs_count: min number of samples per speaker to consider and ignore otherwise, defaults to 0 (all speakers)
"""

DURATIONS = sorted([1, 2, 3, 4], reverse=True)
Expand All @@ -60,7 +60,7 @@ def filter_manifest_line(manifest_line):
dur = manifest_line['duration']
label = manifest_line['label']
endname = os.path.splitext(audio_path.split(label, 1)[-1])[0]
to_path = os.path.join(CWD, 'chunks', label)
to_path = os.path.join(CWD, 'segments', label)
to_path = os.path.join(to_path, endname[1:])
os.makedirs(os.path.dirname(to_path), exist_ok=True)

Expand All @@ -87,8 +87,8 @@ def filter_manifest_line(manifest_line):

c_start = int(float(start * sr))
c_end = c_start + int(float(temp_dur * sr))
chunk = signal[c_start:c_end]
sf.write(to_file, chunk, sr)
segment = signal[c_start:c_end]
sf.write(to_file, segment, sr)

meta = manifest_line.copy()
meta['audio_filepath'] = to_file
Expand Down Expand Up @@ -172,7 +172,7 @@ def get_labels(lines):
return labels


def main(filelist, manifest, id, out, split=False, create_chunks=False, min_count=10):
def main(filelist, manifest, id, out, split=False, create_segments=False, min_count=10):
if os.path.exists(out):
os.remove(out)
if filelist:
Expand All @@ -185,8 +185,8 @@ def main(filelist, manifest, id, out, split=False, create_chunks=False, min_coun

lines = process_map(get_duration, lines, chunksize=100)

if create_chunks:
print(f"creating and writing chunks to {CWD}")
if create_segments:
print(f"creating and writing segments to {CWD}")
lines = process_map(filter_manifest_line, lines, chunksize=100)
temp = []
for line in lines:
Expand Down Expand Up @@ -232,8 +232,8 @@ def main(filelist, manifest, id, out, split=False, create_chunks=False, min_coun
action='store_true',
)
parser.add_argument(
"--create_chunks",
help="bool if you would want to chunk each manifest line to chunks of 4 sec or less",
"--create_segments",
help="bool if you would want to segment each manifest line to segments of 4 sec or less",
required=False,
action='store_true',
)
Expand All @@ -247,5 +247,5 @@ def main(filelist, manifest, id, out, split=False, create_chunks=False, min_coun
args = parser.parse_args()

main(
args.filelist, args.manifest, args.id, args.out, args.split, args.create_chunks, args.min_spkrs_count,
args.filelist, args.manifest, args.id, args.out, args.split, args.create_segments, args.min_spkrs_count,
)
2 changes: 1 addition & 1 deletion tutorials/speaker_tasks/ASR_with_SpeakerDiarization.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -663,4 +663,4 @@
},
"nbformat": 4,
"nbformat_minor": 4
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -593,4 +593,4 @@
},
"nbformat": 4,
"nbformat_minor": 4
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -1264,4 +1264,4 @@
},
"nbformat": 4,
"nbformat_minor": 1
}
}

0 comments on commit f55c02b

Please sign in to comment.