Skip to content

Commit 20d0f03

Browse files
committed
ENH: Add error messages in vak.annotation, fix #525
- Add custom exception `AudioFilenameNotFound` - Have `audio_stem_from_path` raise this exception when it fails to find a valid audio format extension and then runs out of extensions to remove. The error clearly states that an audio filename was not found in the path, instead of short cryptic "unable to stem". - Have `map_annoted_to_annot` look for this custom exception in a try-except block when it first creates the keys for the map. If we catch an AudioFilenameNotFound exception, then we raise a verbose ValueError explaining what happened, including both `audio_path` and the path to the annotation itself, and a link to the file naming convention page added in #564 and to the how-to-user-annotation-format page.
1 parent 617359d commit 20d0f03

File tree

1 file changed

+82
-37
lines changed

1 file changed

+82
-37
lines changed

src/vak/annotation.py

+82-37
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from __future__ import annotations
12
from collections import Counter
23
import copy
34
import os
@@ -134,6 +135,10 @@ def files_from_dir(annot_dir, annot_format):
134135
return annot_files
135136

136137

138+
class AudioFilenameNotFound(Exception):
139+
"""Error raised by ``audio_stem_from_path``"""
140+
141+
137142
def audio_stem_from_path(path):
138143
"""Find the name of an audio file within a filename
139144
by removing extensions until finding an audio extension,
@@ -175,42 +180,90 @@ def audio_stem_from_path(path):
175180
new_stem, ext = os.path.splitext(stem)
176181
ext = ext.replace(".", "").lower()
177182
if new_stem == stem:
178-
raise ValueError(f"unable to compute stem of {path}")
183+
raise AudioFilenameNotFound(
184+
f"Unable to find a valid audio filename in path:\n{path}.\n"
185+
f"Valid audio file extensions are:\n{constants.VALID_AUDIO_FORMATS}"
186+
)
179187
else:
180188
stem = new_stem
181189
return stem
182190

183191

184-
def map_annotated_to_annot(source_files, annot_list):
185-
"""map source files, i.e. audio or spectrogram files, to annotations
192+
def map_annotated_to_annot(annotated_files: list,
193+
annot_list: crowsetta.Annotation) -> dict:
194+
"""Map annotated files,
195+
i.e. audio or spectrogram files,
196+
to their corresponding annotations.
186197
187-
returns a ``dict`` where each key is a path to a source audio file
188-
or array file containing a spectrogram, and the corresponding value
189-
is the annotation for the source file
198+
Returns a ``dict`` where each key
199+
is a path to an annotated file,
200+
and the value for each key
201+
is a ``crowsetta.Annotation``.
190202
191203
Parameters
192204
----------
193-
source_files : list
194-
of audio or spectrogram files. The names of the files must begin with the
195-
audio_path attribute of the corresponding annotations. E.g., if an audio file is
196-
'bird0-2016-05-04-133027.wav', then there must be an annotation whose
197-
file attribute equals that filename. Spectrogram files should include
198-
the audio file name, e.g. 'bird0-2016-05-04-133027.wav.mat' or
199-
'bird0-2016-05-04-133027.spect.npz' would match an annotation with the
200-
audio_path attribute 'bird0-2016-05-04-133027.wav'.
205+
annotated_files : list
206+
Of paths to audio or spectrogram files.
201207
annot_list : list
202-
of Annotations corresponding to files in source_files
208+
of Annotations corresponding to files in annotated_files
209+
210+
Notes
211+
-----
212+
The filenames of the ``annotated_files`` must
213+
begin with the filename of the ``audio_path``
214+
attribute of the corresponding
215+
``crowsetta.Annotation`` instances.
216+
E.g., if `annotated_files` includes
217+
an audio file named
218+
'bird0-2016-05-04-133027.wav',
219+
then it will be mapped to an ``Annotation``
220+
with an `audio_path` attribute
221+
whose filename matches it.
222+
Spectrogram files should also include
223+
the audio file name,
224+
e.g. 'bird0-2016-05-04-133027.wav.mat'
225+
or 'bird0-2016-05-04-133027.spect.npz'
226+
would match an ``Annotation`` with the
227+
``audio_path`` attribute '/some/path/bird0-2016-05-04-133027.wav'.
228+
229+
For more detail, please see
230+
the page on file naming conventions in the
231+
reference section of the documentation:
232+
https://vak.readthedocs.io/en/latest/reference/filenames.html
203233
"""
204-
if type(source_files) == np.ndarray: # e.g., vak DataFrame['spect_path'].values
205-
source_files = source_files.tolist()
234+
if type(annotated_files) == np.ndarray: # e.g., vak DataFrame['spect_path'].values
235+
annotated_files = annotated_files.tolist()
206236

207237
# to pair audio files with annotations, make list of tuples
208-
source_annot_map = {}
238+
annotated_annot_map = {}
209239

210240
# ----> make a dict with audio stems as keys,
211241
# so we can look up annotations by stemming source files and using as keys.
212242
# First check that we don't have duplicate keys that would cause this to fail silently
213-
keys = [audio_stem_from_path(annot.audio_path) for annot in annot_list]
243+
keys = []
244+
for annot in annot_list:
245+
try:
246+
key = audio_stem_from_path(annot.audio_path)
247+
except AudioFilenameNotFound as e:
248+
# Do this as a loop with a super verbose error
249+
# instead of e.g. a single-line list comprehension
250+
# so we can help users troubleshoot,
251+
# see https://github.com/vocalpy/vak/issues/525
252+
raise ValueError(
253+
"The ``audio_path`` attribute of a ``crowsetta.Annotation`` was "
254+
"not recognized as a valid audio filename.\n"
255+
f"The ``audio_path`` attribute was:\n{annot.audio_path}\n"
256+
f"The annotation was loaded from this path:\n{annot.annot_path}\n"
257+
"For some annotation formats, audio filenames are inferred from annotation filenames.\n"
258+
"Please check that your annotation files are named "
259+
"according to the conventions:\n"
260+
"https://vak.readthedocs.io/en/latest/reference/filenames.html\n"
261+
"It may also be helpful to read the page on converting custom formats "
262+
"to annotations that ``vak`` can work with:\n"
263+
"https://vak.readthedocs.io/en/latest/howto/howto_user_annot.html"
264+
) from e
265+
keys.append(key)
266+
214267
keys_set = set(keys)
215268
if len(keys_set) < len(keys):
216269
duplicates = [item for item, count in Counter(keys).items() if count > 1]
@@ -225,33 +278,25 @@ def map_annotated_to_annot(source_files, annot_list):
225278
# Make a copy from which we remove source files after mapping them to annotation,
226279
# to validate that function worked,
227280
# by making sure there are no items left in this copy after the loop
228-
source_files_copy = copy.deepcopy(source_files)
229-
for source_file in list(
230-
source_files
281+
annotated_files_copy = copy.deepcopy(annotated_files)
282+
for annotated_file in list(
283+
annotated_files
231284
): # list() to copy, so we can pop off items while iterating
232285
# remove stem so we can find .spect files that match with audio files,
233286
# e.g. find 'llb3_0003_2018_04_23_14_18_54.mat' that should match
234287
# with 'llb3_0003_2018_04_23_14_18_54.wav'
235-
source_file_stem = audio_stem_from_path(source_file)
236-
237-
try:
238-
annot = audio_stem_annot_map[source_file_stem]
239-
except KeyError:
240-
raise ValueError(
241-
f"could not find annotation for source file: {source_file}.\n"
242-
f"No annotation had an audio file whose stem matched the source file stem: {source_file_stem}"
243-
)
244-
245-
source_annot_map[source_file] = annot
246-
source_files_copy.remove(source_file)
288+
annotated_file_stem = audio_stem_from_path(annotated_file)
289+
annot = audio_stem_annot_map[annotated_file_stem]
290+
annotated_annot_map[annotated_file] = annot
291+
annotated_files_copy.remove(annotated_file)
247292

248-
if len(source_files_copy) > 0:
293+
if len(annotated_files_copy) > 0:
249294
raise ValueError(
250295
"could not map the following source files to annotations: "
251-
f"{source_files_copy}"
296+
f"{annotated_files_copy}"
252297
)
253298

254-
return source_annot_map
299+
return annotated_annot_map
255300

256301

257302
def has_unlabeled(annot: crowsetta.Annotation,

0 commit comments

Comments
 (0)