update speaker docs (#4164)

* update speaker docs Signed-off-by: nithinraok <nithinrao.koluguri@gmail.com> * chunks -> segments Signed-off-by: nithinraok <nithinrao.koluguri@gmail.com> * Khz -> kHz Signed-off-by: nithinraok <nithinrao.koluguri@gmail.com>
NVIDIA · Jun 3, 2022 · f55c02b · f55c02b
1 parent afca46a
commit f55c02b
Show file tree

Hide file tree

Showing 4 changed files with 20 additions and 20 deletions.
diff --git a/scripts/speaker_tasks/filelist_to_manifest.py b/scripts/speaker_tasks/filelist_to_manifest.py
@@ -30,21 +30,21 @@
 This scipt converts a filelist file where each line contains  
 <absolute path of wav file> to a manifest json file. 
 Optionally post processes the manifest file to create dev and train split for speaker embedding 
-training, also optionally chunk an audio file in to segments of random DURATIONS and create those
+training, also optionally segment an audio file in to segments of random DURATIONS and create those
 wav files in CWD. 
 
-While creating chunks, if audio is not sampled at 16Khz, it resamples to 16Khz and write the wav file.
+While creating segments, if audio is not sampled at 16kHz, it resamples to 16kHz and write the wav file.
 Args: 
 --filelist: path to file containing list of audio files
---manifest(optional): if you already have manifest file, but would like to process it for creating chunks and splitting then use manifest ignoring filelist
+--manifest(optional): if you already have manifest file, but would like to process it for creating 
+    segments and splitting then use manifest ignoring filelist
 --id: index of speaker label in filename present in filelist file that is separated by '/'
 --out: output manifest file name
 --split: if you would want to split the  manifest file for training purposes
-        you may not need this for test set. output file names is <out>_<train/dev>.json
-        Defaults to False
---create_chunks:if you would want to chunk each manifest line to chunks of 4 sec or less
-        you may not need this for test set, Defaults to False
---min_spkrs_count: min number of samples per speaker to consider and ignore otherwise
+    you may not need this for test set. output file names is <out>_<train/dev>.json, defaults to False
+--create_segments: if you would want to segment each manifest line to segments of [1,2,3,4] sec or less
+    you may not need this for test set, defaults to False
+--min_spkrs_count: min number of samples per speaker to consider and ignore otherwise, defaults to 0 (all speakers)
 """
 
 DURATIONS = sorted([1, 2, 3, 4], reverse=True)
@@ -60,7 +60,7 @@ def filter_manifest_line(manifest_line):
     dur = manifest_line['duration']
     label = manifest_line['label']
     endname = os.path.splitext(audio_path.split(label, 1)[-1])[0]
-    to_path = os.path.join(CWD, 'chunks', label)
+    to_path = os.path.join(CWD, 'segments', label)
     to_path = os.path.join(to_path, endname[1:])
     os.makedirs(os.path.dirname(to_path), exist_ok=True)
 
@@ -87,8 +87,8 @@ def filter_manifest_line(manifest_line):
 
                 c_start = int(float(start * sr))
                 c_end = c_start + int(float(temp_dur * sr))
-                chunk = signal[c_start:c_end]
-                sf.write(to_file, chunk, sr)
+                segment = signal[c_start:c_end]
+                sf.write(to_file, segment, sr)
 
                 meta = manifest_line.copy()
                 meta['audio_filepath'] = to_file
@@ -172,7 +172,7 @@ def get_labels(lines):
     return labels
 
 
-def main(filelist, manifest, id, out, split=False, create_chunks=False, min_count=10):
+def main(filelist, manifest, id, out, split=False, create_segments=False, min_count=10):
     if os.path.exists(out):
         os.remove(out)
     if filelist:
@@ -185,8 +185,8 @@ def main(filelist, manifest, id, out, split=False, create_chunks=False, min_coun
 
     lines = process_map(get_duration, lines, chunksize=100)
 
-    if create_chunks:
-        print(f"creating and writing chunks to {CWD}")
+    if create_segments:
+        print(f"creating and writing segments to {CWD}")
         lines = process_map(filter_manifest_line, lines, chunksize=100)
         temp = []
         for line in lines:
@@ -232,8 +232,8 @@ def main(filelist, manifest, id, out, split=False, create_chunks=False, min_coun
         action='store_true',
     )
     parser.add_argument(
-        "--create_chunks",
-        help="bool if you would want to chunk each manifest line to chunks of 4 sec or less",
+        "--create_segments",
+        help="bool if you would want to segment each manifest line to segments of 4 sec or less",
         required=False,
         action='store_true',
     )
@@ -247,5 +247,5 @@ def main(filelist, manifest, id, out, split=False, create_chunks=False, min_coun
     args = parser.parse_args()
 
     main(
-        args.filelist, args.manifest, args.id, args.out, args.split, args.create_chunks, args.min_spkrs_count,
+        args.filelist, args.manifest, args.id, args.out, args.split, args.create_segments, args.min_spkrs_count,
     )
diff --git a/tutorials/speaker_tasks/ASR_with_SpeakerDiarization.ipynb b/tutorials/speaker_tasks/ASR_with_SpeakerDiarization.ipynb
@@ -663,4 +663,4 @@
     },
     "nbformat": 4,
     "nbformat_minor": 4
-}
+}
diff --git a/tutorials/speaker_tasks/Speaker_Diarization_Inference.ipynb b/tutorials/speaker_tasks/Speaker_Diarization_Inference.ipynb
@@ -593,4 +593,4 @@
     },
     "nbformat": 4,
     "nbformat_minor": 4
-}
+}
diff --git a/tutorials/speaker_tasks/Speaker_Identification_Verification.ipynb b/tutorials/speaker_tasks/Speaker_Identification_Verification.ipynb
@@ -1264,4 +1264,4 @@
     },
     "nbformat": 4,
     "nbformat_minor": 1
-}
+}
-Original file line number
+Diff line change
@@ Expand Up / @@ -663,4 +663,4 @@ @@
         },
         "nbformat": 4,
         "nbformat_minor": 4
-    }
+    }