Merge pull request #75 from NickleDave/phn2annot-fixes

LGTM, me!
vocalpy · Jan 12, 2021 · 53ee345 · 53ee345
2 parents 9ff0467 + 41a44b4
commit 53ee345
Show file tree

Hide file tree

Showing 31 changed files with 426 additions and 23 deletions.
diff --git a/setup.py b/setup.py
@@ -40,7 +40,12 @@
 }
 
 REQUIRED = [
-    'numpy', 'scipy', 'attrs', 'evfuncs', 'koumura',
+    'attrs',
+    'evfuncs',
+    'koumura',
+    'numpy',
+    'scipy',
+    'soundfile',
 ]
 
 DEV_DEPS = [

diff --git a/src/crowsetta/koumura.py b/src/crowsetta/koumura.py
@@ -9,9 +9,9 @@
 import os
 from pathlib import Path
 import numpy as np
-import wave
 
 import koumura
+import soundfile
 
 from .annotation import Annotation
 from .sequence import Sequence
@@ -94,10 +94,7 @@ def koumura2annot(annot_path='Annotation.xml', concat_seqs_into_songs=True,
                 f'.wav file {wav_filename} specified in '
                 f'annotation file {annot_path} is not found'
             )
-        # found with %%timeit that Python wave module takes about 1/2 the time of
-        # scipy.io.wavfile for just reading sampling frequency from each file
-        with wave.open(wav_filename, 'rb') as wav_file:
-            samp_freq = wav_file.getframerate()
+        samp_freq = soundfile.info(wav_filename).samplerate
         onsets_s = np.round(onsets_Hz / samp_freq, decimals=3)
         offsets_s = np.round(offsets_Hz / samp_freq, decimals=3)
 

diff --git a/src/crowsetta/phn.py b/src/crowsetta/phn.py
@@ -2,9 +2,9 @@
 """
 import os
 from pathlib import Path
-import wave
 
 import numpy as np
+import soundfile
 
 from .sequence import Sequence
 from .annotation import Annotation
@@ -55,7 +55,9 @@ def phn2annot(annot_path,
     due to floating point error, e.g. when loading .phn files and then sending them to
     a csv file, the result should be the same on Windows and Linux
     """
-    annot_path = _parse_file(annot_path, extension='.phn')
+    # note multiple extensions, both all-uppercase and all-lowercase `.phn` exist,
+    # depending on which version of TIMIT dataset you have
+    annot_path = _parse_file(annot_path, extension=('.phn', '.PHN'))
 
     if abspath and basename:
         raise ValueError('abspath and basename arguments cannot both be set to True, '
@@ -77,13 +79,17 @@ def phn2annot(annot_path,
         offsets_Hz = np.asarray(offsets_Hz)
         labels = np.asarray(labels)
 
-        audio_pathname = str(
-            Path(a_phn).parent.joinpath(
-                Path(a_phn).stem + '.wav'
+        # checking for audio_pathname need to be case insensitive
+        # since some versions of TIMIT dataset use .WAV instead of .wav
+        audio_pathname = Path(a_phn).parent.joinpath(Path(a_phn).stem + '.wav')
+        if not audio_pathname.exists():
+            audio_pathname = Path(a_phn).parent.joinpath(Path(a_phn).stem + '.WAV')
+            if not audio_pathname.exists():
+                raise FileNotFoundError(
+                    f'did not find a matching file with extension .wav or .WAV for the .phn file:\n{a_phn}'
                 )
-        )
-        with wave.open(audio_pathname, 'rb') as wav_file:
-            samp_freq = wav_file.getframerate()
+
+        samp_freq = soundfile.info(audio_pathname).samplerate
         onsets_s = onsets_Hz / samp_freq
         offsets_s = offsets_Hz / samp_freq
 
@@ -140,7 +146,7 @@ def phn2csv(annot_path, csv_filename, abspath=False, basename=False):
     -------
     None
     """
-    annot_path = _parse_file(annot_path, extension='.phn')
+    annot_path = _parse_file(annot_path, extension=('.phn', '.PHN'))
 
     if abspath and basename:
         raise ValueError('abspath and basename arguments cannot both be set to True, '

diff --git a/src/crowsetta/validation.py b/src/crowsetta/validation.py
@@ -65,11 +65,36 @@ def column_or_row_or_1d(y):
 
 
 def _parse_file(file, extension):
-    """helper function that parses/validates value for file argument;
-    puts a single string or Path into a list to iterate over it (cheap hack
-    that lets functions accept multiple types), and checks list to make sure
-    all types are consistent
+    """"check that all files have valid extensions,
+    convert into a list that can be iterated over
+
+    Parameters
+    ----------
+    file : str, pathlib.Path, list
+        filename(s), list must be of str or pathlib.Path
+    extension : str, tuple
+        valid file extension(s). tuple must be tuple of strings.
+        Function expects that extensions will be specified with a period,
+        e.g. {'.phn', '.PHN'}
+
+    Returns
+    -------
+    files_validated : list
+        of filenames, all having validated extensions
     """
+    if isinstance(extension, str):
+        extension = (extension,)
+    elif isinstance(extension, tuple):
+        if not all([isinstance(element, str) for element in extension]):
+            raise TypeError(
+                "must specify all valid extensions as strings, but value was \n"
+                f"'{extension}' with types: {[type(element) for element in extension]}"
+            )
+    else:
+        raise TypeError(
+            f'extension must be str or tuple but type was {type(extension)}'
+        )
+
     if not(isinstance(file, str) or isinstance(file, PurePath) or isinstance(file, list)):
         raise TypeError(
             f"file must be a str or a pathlib.Path, but type of file was {type(file)}.\n"
@@ -94,8 +119,8 @@ def _parse_file(file, extension):
         # (because using Path.suffixes() would require too much special casing)
         a_file = str(a_file)
         if not a_file.endswith(extension):
-            raise ValueError(f"all filenames in 'file' must end with '{extension}' "
-                             f"but {a_file} does not")
+            raise ValueError(f"file does not have a valid extension: {a_file}"
+                             f"valid extension(s) for filenames are: '{extension}'")
         file_out.append(a_file)
 
     return file_out
diff --git a/tests/test_data/audio_WAV_annot_PHN/README.md b/tests/test_data/audio_WAV_annot_PHN/README.md
@@ -0,0 +1,22 @@
+These are files from a version of TIMIT with audio files that end in .WAV, 
+and annotation files that end in .PHN.
+
+The .PHN files are the same as .phn files, it's just that the extension is all uppercase.
+
+The .WAV files are actually a different format; specifically the NIST format.
+This format can be parsed by `soundfile` but *not* by `wave` from the Python standard 
+library or `scipy.io.wavefile`.
+
+Presumably the `.WAV.wav` files are the NIST format converted to a more 
+common .wav format.
+I did verify for one file that both have the same sampling rate.
+So literally I think they just removed the weird NIST header and 
+converted to a standard .wav file.
+
+Both file types are added here so tests can verify that:
+- `phn2annot`  is case-insensitive, i.e., it parses .phn and .PHN files
+- **and** `phn2annot` is case and "format" insensitive when parsing .wav 
+  and .WAV files -- it's still able to figure out the sampling rate so 
+  it can convert onset and offset times from sample number to seconds,
+  regardless of whether it's a `.wav` or a `.WAV` file
+
diff --git a/tests/test_data/audio_WAV_annot_PHN/SA1.PHN b/tests/test_data/audio_WAV_annot_PHN/SA1.PHN
@@ -0,0 +1,41 @@
+0 9640 h#
+9640 11240 sh
+11240 12783 iy
+12783 14078 hv
+14078 16157 ae
+16157 16880 dcl
+16880 17103 d
+17103 17587 y
+17587 18760 er
+18760 19720 dcl
+19720 19962 d
+19962 21514 aa
+21514 22680 r
+22680 23800 kcl
+23800 24104 k
+24104 26280 s
+26280 28591 uw
+28591 29179 dx
+29179 30337 ih
+30337 31880 ng
+31880 32500 gcl
+32500 33170 g
+33170 33829 r
+33829 35150 iy
+35150 37370 s
+37370 38568 iy
+38568 40546 w
+40546 42357 aa
+42357 45119 sh
+45119 45624 epi
+45624 46855 w
+46855 48680 aa
+48680 49240 dx
+49240 51033 er
+51033 52378 q
+52378 54500 ao
+54500 55461 l
+55461 57395 y
+57395 59179 iy
+59179 60600 axr
+60600 63440 h#
diff --git a/tests/test_data/audio_WAV_annot_PHN/SA1.TXT b/tests/test_data/audio_WAV_annot_PHN/SA1.TXT
@@ -0,0 +1 @@
+0 63488 She had your dark suit in greasy wash water all year.
diff --git a/tests/test_data/audio_WAV_annot_PHN/SA1.WAV b/tests/test_data/audio_WAV_annot_PHN/SA1.WAV
diff --git a/tests/test_data/audio_WAV_annot_PHN/SA1.WAV.wav b/tests/test_data/audio_WAV_annot_PHN/SA1.WAV.wav
diff --git a/tests/test_data/audio_WAV_annot_PHN/SA1.WRD b/tests/test_data/audio_WAV_annot_PHN/SA1.WRD
@@ -0,0 +1,11 @@
+9640 12783 she
+12783 17103 had
+17103 18760 your
+18760 24104 dark
+24104 29179 suit
+29179 31880 in
+31880 38568 greasy
+38568 45119 wash
+45624 51033 water
+52378 55461 all
+55461 60600 year
diff --git a/tests/test_data/audio_WAV_annot_PHN/SA2.PHN b/tests/test_data/audio_WAV_annot_PHN/SA2.PHN
@@ -0,0 +1,34 @@
+0 13227 h#
+13227 13419 d
+13419 15093 ow
+15093 15907 n
+15907 18250 ae
+18250 19990 s
+19990 21200 epi
+21200 21600 m
+21600 23120 iy
+23120 23728 tcl
+23728 23920 t
+23920 24680 ix
+24680 25360 kcl
+25360 26270 k
+26270 28114 eh
+28114 30036 r
+30036 31248 iy
+31248 32110 ix
+32110 33149 n
+33149 36133 oy
+36133 37595 l
+37595 39080 iy
+39080 40770 r
+40770 43454 ae
+43454 44410 gcl
+44410 44850 g
+44850 45824 l
+45824 47400 ay
+47400 48290 kcl
+48290 49040 k
+49040 50010 dh
+50010 52840 ae
+52840 55000 tcl
+55000 58000 h#
diff --git a/tests/test_data/audio_WAV_annot_PHN/SA2.TXT b/tests/test_data/audio_WAV_annot_PHN/SA2.TXT
@@ -0,0 +1 @@
+0 58061 Don't ask me to carry an oily rag like that.
diff --git a/tests/test_data/audio_WAV_annot_PHN/SA2.WAV b/tests/test_data/audio_WAV_annot_PHN/SA2.WAV
diff --git a/tests/test_data/audio_WAV_annot_PHN/SA2.WAV.wav b/tests/test_data/audio_WAV_annot_PHN/SA2.WAV.wav
diff --git a/tests/test_data/audio_WAV_annot_PHN/SA2.WRD b/tests/test_data/audio_WAV_annot_PHN/SA2.WRD
@@ -0,0 +1,10 @@
+13227 15907 don't
+15907 19990 ask
+21200 23120 me
+23120 24680 to
+24680 31248 carry
+31248 33149 an
+33149 39080 oily
+39080 44850 rag
+44850 49040 like
+49040 55000 that
diff --git a/tests/test_data/audio_WAV_annot_PHN/SI1573.PHN b/tests/test_data/audio_WAV_annot_PHN/SI1573.PHN
@@ -0,0 +1,60 @@
+0 8920 h#
+8920 9530 hh
+9530 10694 ih
+10694 12281 z
+12281 12930 kcl
+12930 13710 k
+13710 15707 ae
+15707 17540 pcl
+17540 17830 t
+17830 18798 ix
+18798 20161 n
+20161 21054 w
+21054 21765 ax
+21765 24187 s
+24187 25260 th
+25260 26840 ih
+26840 27710 n
+27710 28747 ae
+28747 30036 n
+30036 31558 hv
+31558 34297 ae
+34297 35100 gcl
+35100 35410 g
+35410 37240 er
+37240 38004 dcl
+38004 38210 d
+38210 39234 ix
+39234 40480 n
+40480 41851 ih
+41851 43550 z
+43550 44470 bcl
+44470 44730 b
+44730 45736 y
+45736 46520 ux
+46520 47000 dx
+47000 47880 ux
+47880 49740 f
+49740 51080 el
+51080 52440 bcl
+52440 52700 b
+52700 54440 uw
+54440 56040 tcl
+56040 56379 t
+56379 58100 s
+58100 58720 epi
+58720 59506 w
+59506 60859 axr
+60859 63324 w
+63324 64450 ao
+64450 65680 r
+65680 66800 n
+66800 67360 ix
+67360 68560 n
+68560 69000 epi
+69000 70909 sh
+70909 74200 ae
+74200 75105 bcl
+75105 75377 b
+75377 76867 iy
+76867 79520 h#
diff --git a/tests/test_data/audio_WAV_annot_PHN/SI1573.TXT b/tests/test_data/audio_WAV_annot_PHN/SI1573.TXT
@@ -0,0 +1 @@
+0 79565 His captain was thin and haggard and his beautiful boots were worn and shabby.
diff --git a/tests/test_data/audio_WAV_annot_PHN/SI1573.WAV b/tests/test_data/audio_WAV_annot_PHN/SI1573.WAV
diff --git a/tests/test_data/audio_WAV_annot_PHN/SI1573.WAV.wav b/tests/test_data/audio_WAV_annot_PHN/SI1573.WAV.wav
diff --git a/tests/test_data/audio_WAV_annot_PHN/SI1573.WRD b/tests/test_data/audio_WAV_annot_PHN/SI1573.WRD
@@ -0,0 +1,14 @@
+8920 12281 his
+12281 20161 captain
+20161 24187 was
+24187 27710 thin
+27710 30036 and
+30036 38210 haggard
+38210 40480 and
+40480 43550 his
+43550 51080 beautiful
+51080 58100 boots
+58720 60859 were
+60859 66800 worn
+66800 68560 and
+69000 76867 shabby
diff --git a/tests/test_data/audio_WAV_annot_PHN/SI2203.PHN b/tests/test_data/audio_WAV_annot_PHN/SI2203.PHN
@@ -0,0 +1,32 @@
+0 9580 h#
+9580 9840 dh
+9840 10652 ix
+10652 11997 r
+11997 14640 iy
+14640 16340 z
+16340 16751 ax
+16751 19140 n
+19140 21040 z
+21040 22200 f
+22200 23227 axr
+23227 24220 dh
+24220 25240 ih
+25240 27560 s
+27560 28200 dcl
+28200 28477 d
+28477 32655 ay
+32655 33480 v
+33480 36060 s
+36060 37640 iy
+37640 39608 m
+39608 40101 dcl
+40101 40370 d
+40370 42830 f
+42830 44902 uw
+44902 45779 l
+45779 47090 ix
+47090 49480 sh
+49480 49925 epi
+49925 50400 n
+50400 54080 aw
+54080 56160 h#
diff --git a/tests/test_data/audio_WAV_annot_PHN/SI2203.TXT b/tests/test_data/audio_WAV_annot_PHN/SI2203.TXT
@@ -0,0 +1 @@
+0 56218 The reasons for this dive seemed foolish now.
diff --git a/tests/test_data/audio_WAV_annot_PHN/SI2203.WAV b/tests/test_data/audio_WAV_annot_PHN/SI2203.WAV
diff --git a/tests/test_data/audio_WAV_annot_PHN/SI2203.WAV.wav b/tests/test_data/audio_WAV_annot_PHN/SI2203.WAV.wav
diff --git a/tests/test_data/audio_WAV_annot_PHN/SI2203.WRD b/tests/test_data/audio_WAV_annot_PHN/SI2203.WRD
@@ -0,0 +1,8 @@
+9580 10652 the
+10652 21040 reasons
+21040 23227 for
+23227 27560 this
+27560 33480 dive
+33480 40370 seemed
+40370 49480 foolish
+49925 54080 now
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		0 63488 She had your dark suit in greasy wash water all year.
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		0 58061 Don't ask me to carry an oily rag like that.
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		0 79565 His captain was thin and haggard and his beautiful boots were worn and shabby.
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		0 56218 The reasons for this dive seemed foolish now.