Skip to content

Commit

Permalink
Merge pull request #75 from NickleDave/phn2annot-fixes
Browse files Browse the repository at this point in the history
LGTM, me!
  • Loading branch information
NickleDave authored Jan 12, 2021
2 parents 9ff0467 + 41a44b4 commit 53ee345
Show file tree
Hide file tree
Showing 31 changed files with 426 additions and 23 deletions.
7 changes: 6 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,12 @@
}

REQUIRED = [
'numpy', 'scipy', 'attrs', 'evfuncs', 'koumura',
'attrs',
'evfuncs',
'koumura',
'numpy',
'scipy',
'soundfile',
]

DEV_DEPS = [
Expand Down
7 changes: 2 additions & 5 deletions src/crowsetta/koumura.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@
import os
from pathlib import Path
import numpy as np
import wave

import koumura
import soundfile

from .annotation import Annotation
from .sequence import Sequence
Expand Down Expand Up @@ -94,10 +94,7 @@ def koumura2annot(annot_path='Annotation.xml', concat_seqs_into_songs=True,
f'.wav file {wav_filename} specified in '
f'annotation file {annot_path} is not found'
)
# found with %%timeit that Python wave module takes about 1/2 the time of
# scipy.io.wavfile for just reading sampling frequency from each file
with wave.open(wav_filename, 'rb') as wav_file:
samp_freq = wav_file.getframerate()
samp_freq = soundfile.info(wav_filename).samplerate
onsets_s = np.round(onsets_Hz / samp_freq, decimals=3)
offsets_s = np.round(offsets_Hz / samp_freq, decimals=3)

Expand Down
24 changes: 15 additions & 9 deletions src/crowsetta/phn.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
"""
import os
from pathlib import Path
import wave

import numpy as np
import soundfile

from .sequence import Sequence
from .annotation import Annotation
Expand Down Expand Up @@ -55,7 +55,9 @@ def phn2annot(annot_path,
due to floating point error, e.g. when loading .phn files and then sending them to
a csv file, the result should be the same on Windows and Linux
"""
annot_path = _parse_file(annot_path, extension='.phn')
# note multiple extensions, both all-uppercase and all-lowercase `.phn` exist,
# depending on which version of TIMIT dataset you have
annot_path = _parse_file(annot_path, extension=('.phn', '.PHN'))

if abspath and basename:
raise ValueError('abspath and basename arguments cannot both be set to True, '
Expand All @@ -77,13 +79,17 @@ def phn2annot(annot_path,
offsets_Hz = np.asarray(offsets_Hz)
labels = np.asarray(labels)

audio_pathname = str(
Path(a_phn).parent.joinpath(
Path(a_phn).stem + '.wav'
# checking for audio_pathname need to be case insensitive
# since some versions of TIMIT dataset use .WAV instead of .wav
audio_pathname = Path(a_phn).parent.joinpath(Path(a_phn).stem + '.wav')
if not audio_pathname.exists():
audio_pathname = Path(a_phn).parent.joinpath(Path(a_phn).stem + '.WAV')
if not audio_pathname.exists():
raise FileNotFoundError(
f'did not find a matching file with extension .wav or .WAV for the .phn file:\n{a_phn}'
)
)
with wave.open(audio_pathname, 'rb') as wav_file:
samp_freq = wav_file.getframerate()

samp_freq = soundfile.info(audio_pathname).samplerate
onsets_s = onsets_Hz / samp_freq
offsets_s = offsets_Hz / samp_freq

Expand Down Expand Up @@ -140,7 +146,7 @@ def phn2csv(annot_path, csv_filename, abspath=False, basename=False):
-------
None
"""
annot_path = _parse_file(annot_path, extension='.phn')
annot_path = _parse_file(annot_path, extension=('.phn', '.PHN'))

if abspath and basename:
raise ValueError('abspath and basename arguments cannot both be set to True, '
Expand Down
37 changes: 31 additions & 6 deletions src/crowsetta/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,11 +65,36 @@ def column_or_row_or_1d(y):


def _parse_file(file, extension):
"""helper function that parses/validates value for file argument;
puts a single string or Path into a list to iterate over it (cheap hack
that lets functions accept multiple types), and checks list to make sure
all types are consistent
""""check that all files have valid extensions,
convert into a list that can be iterated over
Parameters
----------
file : str, pathlib.Path, list
filename(s), list must be of str or pathlib.Path
extension : str, tuple
valid file extension(s). tuple must be tuple of strings.
Function expects that extensions will be specified with a period,
e.g. {'.phn', '.PHN'}
Returns
-------
files_validated : list
of filenames, all having validated extensions
"""
if isinstance(extension, str):
extension = (extension,)
elif isinstance(extension, tuple):
if not all([isinstance(element, str) for element in extension]):
raise TypeError(
"must specify all valid extensions as strings, but value was \n"
f"'{extension}' with types: {[type(element) for element in extension]}"
)
else:
raise TypeError(
f'extension must be str or tuple but type was {type(extension)}'
)

if not(isinstance(file, str) or isinstance(file, PurePath) or isinstance(file, list)):
raise TypeError(
f"file must be a str or a pathlib.Path, but type of file was {type(file)}.\n"
Expand All @@ -94,8 +119,8 @@ def _parse_file(file, extension):
# (because using Path.suffixes() would require too much special casing)
a_file = str(a_file)
if not a_file.endswith(extension):
raise ValueError(f"all filenames in 'file' must end with '{extension}' "
f"but {a_file} does not")
raise ValueError(f"file does not have a valid extension: {a_file}"
f"valid extension(s) for filenames are: '{extension}'")
file_out.append(a_file)

return file_out
22 changes: 22 additions & 0 deletions tests/test_data/audio_WAV_annot_PHN/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
These are files from a version of TIMIT with audio files that end in .WAV,
and annotation files that end in .PHN.

The .PHN files are the same as .phn files, it's just that the extension is all uppercase.

The .WAV files are actually a different format; specifically the NIST format.
This format can be parsed by `soundfile` but *not* by `wave` from the Python standard
library or `scipy.io.wavefile`.

Presumably the `.WAV.wav` files are the NIST format converted to a more
common .wav format.
I did verify for one file that both have the same sampling rate.
So literally I think they just removed the weird NIST header and
converted to a standard .wav file.

Both file types are added here so tests can verify that:
- `phn2annot` is case-insensitive, i.e., it parses .phn and .PHN files
- **and** `phn2annot` is case and "format" insensitive when parsing .wav
and .WAV files -- it's still able to figure out the sampling rate so
it can convert onset and offset times from sample number to seconds,
regardless of whether it's a `.wav` or a `.WAV` file

41 changes: 41 additions & 0 deletions tests/test_data/audio_WAV_annot_PHN/SA1.PHN
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
0 9640 h#
9640 11240 sh
11240 12783 iy
12783 14078 hv
14078 16157 ae
16157 16880 dcl
16880 17103 d
17103 17587 y
17587 18760 er
18760 19720 dcl
19720 19962 d
19962 21514 aa
21514 22680 r
22680 23800 kcl
23800 24104 k
24104 26280 s
26280 28591 uw
28591 29179 dx
29179 30337 ih
30337 31880 ng
31880 32500 gcl
32500 33170 g
33170 33829 r
33829 35150 iy
35150 37370 s
37370 38568 iy
38568 40546 w
40546 42357 aa
42357 45119 sh
45119 45624 epi
45624 46855 w
46855 48680 aa
48680 49240 dx
49240 51033 er
51033 52378 q
52378 54500 ao
54500 55461 l
55461 57395 y
57395 59179 iy
59179 60600 axr
60600 63440 h#
1 change: 1 addition & 0 deletions tests/test_data/audio_WAV_annot_PHN/SA1.TXT
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
0 63488 She had your dark suit in greasy wash water all year.
Binary file added tests/test_data/audio_WAV_annot_PHN/SA1.WAV
Binary file not shown.
Binary file added tests/test_data/audio_WAV_annot_PHN/SA1.WAV.wav
Binary file not shown.
11 changes: 11 additions & 0 deletions tests/test_data/audio_WAV_annot_PHN/SA1.WRD
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
9640 12783 she
12783 17103 had
17103 18760 your
18760 24104 dark
24104 29179 suit
29179 31880 in
31880 38568 greasy
38568 45119 wash
45624 51033 water
52378 55461 all
55461 60600 year
34 changes: 34 additions & 0 deletions tests/test_data/audio_WAV_annot_PHN/SA2.PHN
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
0 13227 h#
13227 13419 d
13419 15093 ow
15093 15907 n
15907 18250 ae
18250 19990 s
19990 21200 epi
21200 21600 m
21600 23120 iy
23120 23728 tcl
23728 23920 t
23920 24680 ix
24680 25360 kcl
25360 26270 k
26270 28114 eh
28114 30036 r
30036 31248 iy
31248 32110 ix
32110 33149 n
33149 36133 oy
36133 37595 l
37595 39080 iy
39080 40770 r
40770 43454 ae
43454 44410 gcl
44410 44850 g
44850 45824 l
45824 47400 ay
47400 48290 kcl
48290 49040 k
49040 50010 dh
50010 52840 ae
52840 55000 tcl
55000 58000 h#
1 change: 1 addition & 0 deletions tests/test_data/audio_WAV_annot_PHN/SA2.TXT
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
0 58061 Don't ask me to carry an oily rag like that.
Binary file added tests/test_data/audio_WAV_annot_PHN/SA2.WAV
Binary file not shown.
Binary file added tests/test_data/audio_WAV_annot_PHN/SA2.WAV.wav
Binary file not shown.
10 changes: 10 additions & 0 deletions tests/test_data/audio_WAV_annot_PHN/SA2.WRD
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
13227 15907 don't
15907 19990 ask
21200 23120 me
23120 24680 to
24680 31248 carry
31248 33149 an
33149 39080 oily
39080 44850 rag
44850 49040 like
49040 55000 that
60 changes: 60 additions & 0 deletions tests/test_data/audio_WAV_annot_PHN/SI1573.PHN
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
0 8920 h#
8920 9530 hh
9530 10694 ih
10694 12281 z
12281 12930 kcl
12930 13710 k
13710 15707 ae
15707 17540 pcl
17540 17830 t
17830 18798 ix
18798 20161 n
20161 21054 w
21054 21765 ax
21765 24187 s
24187 25260 th
25260 26840 ih
26840 27710 n
27710 28747 ae
28747 30036 n
30036 31558 hv
31558 34297 ae
34297 35100 gcl
35100 35410 g
35410 37240 er
37240 38004 dcl
38004 38210 d
38210 39234 ix
39234 40480 n
40480 41851 ih
41851 43550 z
43550 44470 bcl
44470 44730 b
44730 45736 y
45736 46520 ux
46520 47000 dx
47000 47880 ux
47880 49740 f
49740 51080 el
51080 52440 bcl
52440 52700 b
52700 54440 uw
54440 56040 tcl
56040 56379 t
56379 58100 s
58100 58720 epi
58720 59506 w
59506 60859 axr
60859 63324 w
63324 64450 ao
64450 65680 r
65680 66800 n
66800 67360 ix
67360 68560 n
68560 69000 epi
69000 70909 sh
70909 74200 ae
74200 75105 bcl
75105 75377 b
75377 76867 iy
76867 79520 h#
1 change: 1 addition & 0 deletions tests/test_data/audio_WAV_annot_PHN/SI1573.TXT
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
0 79565 His captain was thin and haggard and his beautiful boots were worn and shabby.
Binary file added tests/test_data/audio_WAV_annot_PHN/SI1573.WAV
Binary file not shown.
Binary file not shown.
14 changes: 14 additions & 0 deletions tests/test_data/audio_WAV_annot_PHN/SI1573.WRD
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
8920 12281 his
12281 20161 captain
20161 24187 was
24187 27710 thin
27710 30036 and
30036 38210 haggard
38210 40480 and
40480 43550 his
43550 51080 beautiful
51080 58100 boots
58720 60859 were
60859 66800 worn
66800 68560 and
69000 76867 shabby
32 changes: 32 additions & 0 deletions tests/test_data/audio_WAV_annot_PHN/SI2203.PHN
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
0 9580 h#
9580 9840 dh
9840 10652 ix
10652 11997 r
11997 14640 iy
14640 16340 z
16340 16751 ax
16751 19140 n
19140 21040 z
21040 22200 f
22200 23227 axr
23227 24220 dh
24220 25240 ih
25240 27560 s
27560 28200 dcl
28200 28477 d
28477 32655 ay
32655 33480 v
33480 36060 s
36060 37640 iy
37640 39608 m
39608 40101 dcl
40101 40370 d
40370 42830 f
42830 44902 uw
44902 45779 l
45779 47090 ix
47090 49480 sh
49480 49925 epi
49925 50400 n
50400 54080 aw
54080 56160 h#
1 change: 1 addition & 0 deletions tests/test_data/audio_WAV_annot_PHN/SI2203.TXT
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
0 56218 The reasons for this dive seemed foolish now.
Binary file added tests/test_data/audio_WAV_annot_PHN/SI2203.WAV
Binary file not shown.
Binary file not shown.
8 changes: 8 additions & 0 deletions tests/test_data/audio_WAV_annot_PHN/SI2203.WRD
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
9580 10652 the
10652 21040 reasons
21040 23227 for
23227 27560 this
27560 33480 dive
33480 40370 seemed
40370 49480 foolish
49925 54080 now
Loading

0 comments on commit 53ee345

Please sign in to comment.