From de67b7899e38774f4190540fa9ed0b06bbac77a3 Mon Sep 17 00:00:00 2001 From: David Nicholson Date: Sun, 2 Jan 2022 11:18:22 -0500 Subject: [PATCH] CLN: rename `on/offsets_Hz` -> `on/offset_inds`, fix #87 --- README.md | 12 +- doc/CHANGELOG.md | 4 +- doc/howto-user-format.rst | 12 +- doc/index.rst | 14 +- doc/notebooks/batlab2seq.py | 4 +- doc/notebooks/howto-user-format.ipynb | 12 +- doc/notebooks/tutorial.ipynb | 6 +- doc/tutorial.rst | 6 +- notebooks/batlab2seq.py | 4 +- notebooks/howto-user-format.ipynb | 12 +- notebooks/tutorial.ipynb | 6 +- src/crowsetta/birdsongrec.py | 12 +- src/crowsetta/csv.py | 8 +- src/crowsetta/phn.py | 22 +- src/crowsetta/segment.py | 24 +-- src/crowsetta/sequence.py | 188 +++++++++--------- src/crowsetta/textgrid.py | 2 +- src/crowsetta/yarden.py | 10 +- ...tion_with_onset_inds_offset_inds_None.csv} | 2 +- tests/data_for_tests/csv/gy6or6_032312.csv | 2 +- .../csv/missing_fields_in_header.csv | 2 +- .../csv/unrecognized_fields_in_header.csv | 2 +- tests/fixtures/segment.py | 12 +- tests/fixtures/sequence.py | 24 +-- tests/helpers/keywords.py | 14 +- tests/scripts/remake_test_csv.py | 2 +- tests/test_csv.py | 18 +- tests/test_phn.py | 8 +- tests/test_segment.py | 20 +- tests/test_sequence.py | 64 +++--- 30 files changed, 264 insertions(+), 264 deletions(-) rename tests/data_for_tests/csv/{example_annotation_with_onsets_Hz_offsets_Hz_None.csv => example_annotation_with_onset_inds_offset_inds_None.csv} (98%) diff --git a/README.md b/README.md index c27be256..aef1232f 100644 --- a/README.md +++ b/README.md @@ -17,17 +17,17 @@ work with any format: namely, `Sequence`s made up of `Segment`s. >>> from crowsetta import Segment, Sequence >>> a_segment = Segment.from_keyword( ... label='a', - ... onset_Hz=16000, - ... offset_Hz=32000, + ... onset_ind=16000, + ... offset_ind=32000, ... file='bird21.wav' ... ) >>> list_of_segments = [a_segment] * 3 >>> seq = Sequence(segments=list_of_segments) >>> print(seq) - Sequence(segments=[Segment(label='a', onset_s=None, offset_s=None, onset_Hz=16000, - offset_Hz=32000, file='bird21.wav'), Segment(label='a', onset_s=None, offset_s=None, - onset_Hz=16000, offset_Hz=32000, file='bird21.wav'), Segment(label='a', onset_s=None, - offset_s=None, onset_Hz=16000, offset_Hz=32000, file='bird21.wav')]) + Sequence(segments=[Segment(label='a', onset_s=None, offset_s=None, onset_ind=16000, + offset_ind=32000, file='bird21.wav'), Segment(label='a', onset_s=None, offset_s=None, + onset_ind=16000, offset_ind=32000, file='bird21.wav'), Segment(label='a', onset_s=None, + offset_s=None, onset_ind=16000, offset_ind=32000, file='bird21.wav')]) ``` You can load annotation from your format of choice into `Sequence`s of `Segment`s diff --git a/doc/CHANGELOG.md b/doc/CHANGELOG.md index 80f8ab00..ce459212 100644 --- a/doc/CHANGELOG.md +++ b/doc/CHANGELOG.md @@ -193,8 +193,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## 0.2.0a5 ### added -- Sequence instances have attributes: labels, onsets_s, offsets_s, onsets_Hz, - offsets_Hz, and file. +- Sequence instances have attributes: labels, onsets_s, offsets_s, onset_inds, + offset_inds, and file. - Explanation of default `to_csv` function for user formats in `howto-user-config`. ### changed diff --git a/doc/howto-user-format.rst b/doc/howto-user-format.rst index 490c2751..0d56ee5c 100644 --- a/doc/howto-user-format.rst +++ b/doc/howto-user-format.rst @@ -225,8 +225,8 @@ us back an instance of a ``Sequence``. One such factory function is a_sequence = Sequence.from_keyword(labels=annot_dict['seg_types'], onsets_s=annot_dict['seg_start_times'], offsets_s=annot_dict['seg_end_times'], - onsets_Hz=annot_dict['seg_start_times_Hz'], - offsets_Hz=annot_dict['seg_end_times_Hz'], + onset_inds=annot_dict['seg_start_times_Hz'], + offset_inds=annot_dict['seg_end_times_Hz'], file=annot_dict['audio_file']) print("a_sequence:\n", a_sequence) @@ -268,8 +268,8 @@ Then at the end of your main loop, instead of making your labels=seg_types, onsets_s=seg_start_times, offsets_s=seg_end_times, - onsets_Hz=seg_start_times_Hz, - offsets_Hz=seg_end_times_Hz) + onset_inds=seg_start_times_Hz, + offset_inds=seg_end_times_Hz) seq_list.append(seq) return seq_list @@ -391,7 +391,7 @@ of ``Sequence``\ s from your format: First item in seq_list: First segment in first sequence: - Segment(label='1', file='lbr3009_0005_2017_04_27_06_14_46.wav', onset_s=0.0029761904761904934, offset_s=0.14150432900432905, onset_Hz=143, offset_Hz=6792) + Segment(label='1', file='lbr3009_0005_2017_04_27_06_14_46.wav', onset_s=0.0029761904761904934, offset_s=0.14150432900432905, onset_ind=143, offset_ind=6792) Notice that we also get a ``to_csv`` function for free: @@ -411,7 +411,7 @@ Notice that we also get a ``to_csv`` function for free: .. parsed-literal:: - ['label', 'onset_s', 'offset_s', 'onset_Hz', 'offset_Hz', 'file'] + ['label', 'onset_s', 'offset_s', 'onset_ind', 'offset_ind', 'file'] ['1', '0.0029761904761904934', '0.14150432900432905', '143', '6792', 'lbr3009_0005_2017_04_27_06_14_46.wav'] ['1', '0.279125', '0.504625', '13398', '24222', 'lbr3009_0005_2017_04_27_06_14_46.wav'] ['5', '0.5556472915365209', '0.5962916666666667', '26671', '28622', 'lbr3009_0005_2017_04_27_06_14_46.wav'] diff --git a/doc/index.rst b/doc/index.rst index 546c81a1..daeb4365 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -28,14 +28,14 @@ The code block below shows some of the features of these data types. >>> from crowsetta import Segment, Sequence >>> a_segment = Segment.from_keyword( ... label='a', - ... onset_Hz=16000, - ... offset_Hz=32000, + ... onset_ind=16000, + ... offset_ind=32000, ... file='bird21.wav' ... ) >>> another_segment = Segment.from_keyword( ... label='b', - ... onset_Hz=36000, - ... offset_Hz=48000, + ... onset_ind=36000, + ... offset_ind=48000, ... file='bird21.wav' ... ) >>> list_of_segments = [a_segment, another_segment] @@ -43,11 +43,11 @@ The code block below shows some of the features of these data types. >>> print(seq) >>> for segment in seq.segments: print(segment) - Segment(label='a', file='bird21.wav', onset_s=None, offset_s=None, onset_Hz=16000, offset_Hz=32000) - Segment(label='b', file='bird21.wav', onset_s=None, offset_s=None, onset_Hz=36000, offset_Hz=48000) + Segment(label='a', file='bird21.wav', onset_s=None, offset_s=None, onset_ind=16000, offset_ind=32000) + Segment(label='b', file='bird21.wav', onset_s=None, offset_s=None, onset_ind=36000, offset_ind=48000) >>> seq.file bird21.wav - >>> seq.onsets_Hz + >>> seq.onset_inds array([16000, 36000]) You load annotation from your format of choice into ``Sequence``\ s of ``Segment``\ s diff --git a/doc/notebooks/batlab2seq.py b/doc/notebooks/batlab2seq.py index 4bbe0657..ec37596e 100644 --- a/doc/notebooks/batlab2seq.py +++ b/doc/notebooks/batlab2seq.py @@ -57,7 +57,7 @@ def batlab2seq(mat_file): labels=seg_types, onsets_s=seg_start_times, offsets_s=seg_end_times, - onsets_Hz=seg_start_times_Hz, - offsets_Hz=seg_end_times_Hz) + onset_inds=seg_start_times_Hz, + offset_inds=seg_end_times_Hz) seq_list.append(seq) return seq_list diff --git a/doc/notebooks/howto-user-format.ipynb b/doc/notebooks/howto-user-format.ipynb index 5a42ca43..16ccb98e 100644 --- a/doc/notebooks/howto-user-format.ipynb +++ b/doc/notebooks/howto-user-format.ipynb @@ -212,8 +212,8 @@ "a_sequence = Sequence.from_keyword(labels=annot_dict['seg_types'],\n", " onsets_s=annot_dict['seg_start_times'],\n", " offsets_s=annot_dict['seg_end_times'],\n", - " onsets_Hz=annot_dict['seg_start_times_Hz'],\n", - " offsets_Hz=annot_dict['seg_end_times_Hz'],\n", + " onset_inds=annot_dict['seg_start_times_Hz'],\n", + " offset_inds=annot_dict['seg_end_times_Hz'],\n", " file=annot_dict['audio_file'])\n", "print(\"a_sequence:\\n\", a_sequence)" ] @@ -261,8 +261,8 @@ " labels=seg_types,\n", " onsets_s=seg_start_times,\n", " offsets_s=seg_end_times,\n", - " onsets_Hz=seg_start_times_Hz,\n", - " offsets_Hz=seg_end_times_Hz)\n", + " onset_inds=seg_start_times_Hz,\n", + " offset_inds=seg_end_times_Hz)\n", "seq_list.append(seq)\n", " return seq_list" ] @@ -370,7 +370,7 @@ "text": [ "First item in seq_list: \n", "First segment in first sequence:\n", - "Segment(label='1', file='lbr3009_0005_2017_04_27_06_14_46.wav', onset_s=0.0029761904761904934, offset_s=0.14150432900432905, onset_Hz=143, offset_Hz=6792)\n" + "Segment(label='1', file='lbr3009_0005_2017_04_27_06_14_46.wav', onset_s=0.0029761904761904934, offset_s=0.14150432900432905, onset_ind=143, offset_ind=6792)\n" ] } ], @@ -395,7 +395,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "['label', 'onset_s', 'offset_s', 'onset_Hz', 'offset_Hz', 'file']\n", + "['label', 'onset_s', 'offset_s', 'onset_ind', 'offset_ind', 'file']\n", "['1', '0.0029761904761904934', '0.14150432900432905', '143', '6792', 'lbr3009_0005_2017_04_27_06_14_46.wav']\n", "['1', '0.279125', '0.504625', '13398', '24222', 'lbr3009_0005_2017_04_27_06_14_46.wav']\n", "['5', '0.5556472915365209', '0.5962916666666667', '26671', '28622', 'lbr3009_0005_2017_04_27_06_14_46.wav']\n" diff --git a/doc/notebooks/tutorial.ipynb b/doc/notebooks/tutorial.ipynb index 3627ab17..a511a5f1 100644 --- a/doc/notebooks/tutorial.ipynb +++ b/doc/notebooks/tutorial.ipynb @@ -278,8 +278,8 @@ "first element of seq: \n", "\n", "First two Segments of first Sequence:\n", - "Segment(label='i', file='./data/cbin-notmat/032312/gy6or6_baseline_230312_0819.190.cbin', onset_s=0.435, offset_s=0.511, onset_Hz=13924, offset_Hz=16350)\n", - "Segment(label='i', file='./data/cbin-notmat/032312/gy6or6_baseline_230312_0819.190.cbin', onset_s=0.583, offset_s=0.662, onset_Hz=18670, offset_Hz=21184)\n" + "Segment(label='i', file='./data/cbin-notmat/032312/gy6or6_baseline_230312_0819.190.cbin', onset_s=0.435, offset_s=0.511, onset_ind=13924, offset_ind=16350)\n", + "Segment(label='i', file='./data/cbin-notmat/032312/gy6or6_baseline_230312_0819.190.cbin', onset_s=0.583, offset_s=0.662, onset_ind=18670, offset_ind=21184)\n" ] } ], @@ -417,7 +417,7 @@ " smoothed = evfuncs.smooth_data(raw_audio, samp_freq,\n", " freq_cutoffs=(500, 10000))\n", " for segment in sequence.segments:\n", - " smoothed_seg = smoothed[segment.onset_Hz:segment.offset_Hz]\n", + " smoothed_seg = smoothed[segment.onset_ind:segment.offset_ind]\n", " mean_seg_amp = np.mean(smoothed_seg)\n", " syl_amp_dict[segment.label].append(mean_seg_amp)\n", "\n", diff --git a/doc/tutorial.rst b/doc/tutorial.rst index 0cdb9594..cf34b0bc 100644 --- a/doc/tutorial.rst +++ b/doc/tutorial.rst @@ -205,8 +205,8 @@ part of the sequence defined by an ``onset`` and ``offset`` that has a first element of seq: First two Segments of first Sequence: - Segment(label='i', file='./data/cbin-notmat/032312/gy6or6_baseline_230312_0819.190.cbin', onset_s=0.435, offset_s=0.511, onset_Hz=13924, offset_Hz=16350) - Segment(label='i', file='./data/cbin-notmat/032312/gy6or6_baseline_230312_0819.190.cbin', onset_s=0.583, offset_s=0.662, onset_Hz=18670, offset_Hz=21184) + Segment(label='i', file='./data/cbin-notmat/032312/gy6or6_baseline_230312_0819.190.cbin', onset_s=0.435, offset_s=0.511, onset_ind=13924, offset_ind=16350) + Segment(label='i', file='./data/cbin-notmat/032312/gy6or6_baseline_230312_0819.190.cbin', onset_s=0.583, offset_s=0.662, onset_ind=18670, offset_ind=21184) **Using** ``crowsetta`` **data types to write clean code** @@ -329,7 +329,7 @@ the signal, and then smooths it with a sliding window. smoothed = evfuncs.smooth_data(raw_audio, samp_freq, freq_cutoffs=(500, 10000)) for segment in sequence.segments: - smoothed_seg = smoothed[segment.onset_Hz:segment.offset_Hz] + smoothed_seg = smoothed[segment.onset_ind:segment.offset_ind] mean_seg_amp = np.mean(smoothed_seg) syl_amp_dict[segment.label].append(mean_seg_amp) diff --git a/notebooks/batlab2seq.py b/notebooks/batlab2seq.py index 4bbe0657..ec37596e 100644 --- a/notebooks/batlab2seq.py +++ b/notebooks/batlab2seq.py @@ -57,7 +57,7 @@ def batlab2seq(mat_file): labels=seg_types, onsets_s=seg_start_times, offsets_s=seg_end_times, - onsets_Hz=seg_start_times_Hz, - offsets_Hz=seg_end_times_Hz) + onset_inds=seg_start_times_Hz, + offset_inds=seg_end_times_Hz) seq_list.append(seq) return seq_list diff --git a/notebooks/howto-user-format.ipynb b/notebooks/howto-user-format.ipynb index 5a42ca43..16ccb98e 100644 --- a/notebooks/howto-user-format.ipynb +++ b/notebooks/howto-user-format.ipynb @@ -212,8 +212,8 @@ "a_sequence = Sequence.from_keyword(labels=annot_dict['seg_types'],\n", " onsets_s=annot_dict['seg_start_times'],\n", " offsets_s=annot_dict['seg_end_times'],\n", - " onsets_Hz=annot_dict['seg_start_times_Hz'],\n", - " offsets_Hz=annot_dict['seg_end_times_Hz'],\n", + " onset_inds=annot_dict['seg_start_times_Hz'],\n", + " offset_inds=annot_dict['seg_end_times_Hz'],\n", " file=annot_dict['audio_file'])\n", "print(\"a_sequence:\\n\", a_sequence)" ] @@ -261,8 +261,8 @@ " labels=seg_types,\n", " onsets_s=seg_start_times,\n", " offsets_s=seg_end_times,\n", - " onsets_Hz=seg_start_times_Hz,\n", - " offsets_Hz=seg_end_times_Hz)\n", + " onset_inds=seg_start_times_Hz,\n", + " offset_inds=seg_end_times_Hz)\n", "seq_list.append(seq)\n", " return seq_list" ] @@ -370,7 +370,7 @@ "text": [ "First item in seq_list: \n", "First segment in first sequence:\n", - "Segment(label='1', file='lbr3009_0005_2017_04_27_06_14_46.wav', onset_s=0.0029761904761904934, offset_s=0.14150432900432905, onset_Hz=143, offset_Hz=6792)\n" + "Segment(label='1', file='lbr3009_0005_2017_04_27_06_14_46.wav', onset_s=0.0029761904761904934, offset_s=0.14150432900432905, onset_ind=143, offset_ind=6792)\n" ] } ], @@ -395,7 +395,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "['label', 'onset_s', 'offset_s', 'onset_Hz', 'offset_Hz', 'file']\n", + "['label', 'onset_s', 'offset_s', 'onset_ind', 'offset_ind', 'file']\n", "['1', '0.0029761904761904934', '0.14150432900432905', '143', '6792', 'lbr3009_0005_2017_04_27_06_14_46.wav']\n", "['1', '0.279125', '0.504625', '13398', '24222', 'lbr3009_0005_2017_04_27_06_14_46.wav']\n", "['5', '0.5556472915365209', '0.5962916666666667', '26671', '28622', 'lbr3009_0005_2017_04_27_06_14_46.wav']\n" diff --git a/notebooks/tutorial.ipynb b/notebooks/tutorial.ipynb index 3627ab17..a511a5f1 100644 --- a/notebooks/tutorial.ipynb +++ b/notebooks/tutorial.ipynb @@ -278,8 +278,8 @@ "first element of seq: \n", "\n", "First two Segments of first Sequence:\n", - "Segment(label='i', file='./data/cbin-notmat/032312/gy6or6_baseline_230312_0819.190.cbin', onset_s=0.435, offset_s=0.511, onset_Hz=13924, offset_Hz=16350)\n", - "Segment(label='i', file='./data/cbin-notmat/032312/gy6or6_baseline_230312_0819.190.cbin', onset_s=0.583, offset_s=0.662, onset_Hz=18670, offset_Hz=21184)\n" + "Segment(label='i', file='./data/cbin-notmat/032312/gy6or6_baseline_230312_0819.190.cbin', onset_s=0.435, offset_s=0.511, onset_ind=13924, offset_ind=16350)\n", + "Segment(label='i', file='./data/cbin-notmat/032312/gy6or6_baseline_230312_0819.190.cbin', onset_s=0.583, offset_s=0.662, onset_ind=18670, offset_ind=21184)\n" ] } ], @@ -417,7 +417,7 @@ " smoothed = evfuncs.smooth_data(raw_audio, samp_freq,\n", " freq_cutoffs=(500, 10000))\n", " for segment in sequence.segments:\n", - " smoothed_seg = smoothed[segment.onset_Hz:segment.offset_Hz]\n", + " smoothed_seg = smoothed[segment.onset_ind:segment.offset_ind]\n", " mean_seg_amp = np.mean(smoothed_seg)\n", " syl_amp_dict[segment.label].append(mean_seg_amp)\n", "\n", diff --git a/src/crowsetta/birdsongrec.py b/src/crowsetta/birdsongrec.py index 54217585..6f36d5a4 100644 --- a/src/crowsetta/birdsongrec.py +++ b/src/crowsetta/birdsongrec.py @@ -82,8 +82,8 @@ def birdsongrec2annot(annot_path='Annotation.xml', concat_seqs_into_songs=True, annot_list = [] for seq_xml in seq_list_xml: - onsets_Hz = np.asarray([syl.position for syl in seq_xml.syls]) - offsets_Hz = np.asarray([syl.position + syl.length for syl in seq_xml.syls]) + onset_inds = np.asarray([syl.position for syl in seq_xml.syls]) + offset_inds = np.asarray([syl.position + syl.length for syl in seq_xml.syls]) labels = [syl.label for syl in seq_xml.syls] wav_filename = os.path.join(wavpath, seq_xml.wav_file) @@ -94,11 +94,11 @@ def birdsongrec2annot(annot_path='Annotation.xml', concat_seqs_into_songs=True, f'annotation file {annot_path} is not found' ) samp_freq = soundfile.info(wav_filename).samplerate - onsets_s = np.round(onsets_Hz / samp_freq, decimals=3) - offsets_s = np.round(offsets_Hz / samp_freq, decimals=3) + onsets_s = np.round(onset_inds / samp_freq, decimals=3) + offsets_s = np.round(offset_inds / samp_freq, decimals=3) - seq = Sequence.from_keyword(onsets_Hz=onsets_Hz, - offsets_Hz=offsets_Hz, + seq = Sequence.from_keyword(onset_inds=onset_inds, + offset_inds=offset_inds, onsets_s=onsets_s, offsets_s=offsets_s, labels=labels diff --git a/src/crowsetta/csv.py b/src/crowsetta/csv.py index b8e1e56b..03da11b3 100644 --- a/src/crowsetta/csv.py +++ b/src/crowsetta/csv.py @@ -13,8 +13,8 @@ 'label', 'onset_s', 'offset_s', - 'onset_Hz', - 'offset_Hz', + 'onset_ind', + 'offset_ind', 'audio_path', 'annot_path', 'sequence', @@ -26,8 +26,8 @@ 'label': str, 'onset_s': float, 'offset_s': float, - 'onset_Hz': int, - 'offset_Hz': int, + 'onset_ind': int, + 'offset_ind': int, 'audio_path': str, 'annot_path': str, 'sequence': int, diff --git a/src/crowsetta/phn.py b/src/crowsetta/phn.py index 8beb78cf..f09de3d2 100644 --- a/src/crowsetta/phn.py +++ b/src/crowsetta/phn.py @@ -65,17 +65,17 @@ def phn2annot(annot_path, annot = [] for a_phn in annot_path: - labels, onsets_Hz, offsets_Hz = [], [], [] + labels, onset_inds, offset_inds = [], [], [] with open(a_phn) as fp: lines = fp.read().splitlines() for line in lines: onset, offset, label = line.split() - onsets_Hz.append(int(onset)) - offsets_Hz.append(int(offset)) + onset_inds.append(int(onset)) + offset_inds.append(int(offset)) labels.append(label) - onsets_Hz = np.asarray(onsets_Hz) - offsets_Hz = np.asarray(offsets_Hz) + onset_inds = np.asarray(onset_inds) + offset_inds = np.asarray(offset_inds) labels = np.asarray(labels) # checking for audio_pathname need to be case insensitive @@ -89,8 +89,8 @@ def phn2annot(annot_path, ) samp_freq = soundfile.info(audio_pathname).samplerate - onsets_s = onsets_Hz / samp_freq - offsets_s = offsets_Hz / samp_freq + onsets_s = onset_inds / samp_freq + offsets_s = offset_inds / samp_freq if round_times: onsets_s = np.around(onsets_s, decimals=decimals) @@ -104,8 +104,8 @@ def phn2annot(annot_path, a_phn = os.path.basename(a_phn) phn_seq = Sequence.from_keyword(labels=labels, - onsets_Hz=onsets_Hz, - offsets_Hz=offsets_Hz, + onset_inds=onset_inds, + offset_inds=offset_inds, onsets_s=onsets_s, offsets_s=offsets_s) annot.append( @@ -174,8 +174,8 @@ def annot2phn(annot, annot_path = Path(annot_path) lines = [] - onsets_Hz, offsets_Hz, labels = annot.seq.onsets_Hz, annot.seq.offsets_Hz, annot.seq.labels - for onset, offset, label in zip(onsets_Hz, offsets_Hz, labels): + onset_inds, offset_inds, labels = annot.seq.onset_inds, annot.seq.offset_inds, annot.seq.labels + for onset, offset, label in zip(onset_inds, offset_inds, labels): lines.append( f'{onset} {offset} {label}\n' ) diff --git a/src/crowsetta/segment.py b/src/crowsetta/segment.py index fe2f82d8..de6a2e86 100644 --- a/src/crowsetta/segment.py +++ b/src/crowsetta/segment.py @@ -24,13 +24,13 @@ def int_or_None(val): class Segment(object): """object that represents a segment of a time series, usually a syllable in a bout of birdsong""" - _FIELDS = ('label', 'onset_s', 'offset_s', 'onset_Hz', 'offset_Hz') + _FIELDS = ('label', 'onset_s', 'offset_s', 'onset_ind', 'offset_ind') label = attr.ib(converter=str) onset_s = attr.ib(converter=attr.converters.optional(float_or_None)) offset_s = attr.ib(converter=attr.converters.optional(float_or_None)) - onset_Hz = attr.ib(converter=attr.converters.optional(int_or_None)) - offset_Hz = attr.ib(converter=attr.converters.optional(int_or_None)) + onset_ind = attr.ib(converter=attr.converters.optional(int_or_None)) + offset_ind = attr.ib(converter=attr.converters.optional(int_or_None)) asdict = attr.asdict @classmethod @@ -64,20 +64,20 @@ def from_row(cls, row, header=None): @classmethod def from_keyword(cls, label, onset_s=None, offset_s=None, - onset_Hz=None, offset_Hz=None): - if ((onset_Hz is None and offset_Hz is None) and + onset_ind=None, offset_ind=None): + if ((onset_ind is None and offset_ind is None) and (onset_s is None and offset_s is None)): - raise ValueError('must provide either onset_Hz and offset_Hz, or ' + raise ValueError('must provide either onset_ind and offset_ind, or ' 'onsets_s and offsets_s') - if onset_Hz and offset_Hz is None: - raise ValueError(f'onset_Hz specified as {onset_Hz} but offset_Hz is None') - if onset_Hz is None and offset_Hz: - raise ValueError(f'offset_Hz specified as {offset_Hz} but onset_Hz is None') + if onset_ind and offset_ind is None: + raise ValueError(f'onset_ind specified as {onset_ind} but offset_ind is None') + if onset_ind is None and offset_ind: + raise ValueError(f'offset_ind specified as {offset_ind} but onset_ind is None') if onset_s and offset_s is None: raise ValueError(f'onset_s specified as {onset_s} but offset_s is None') if onset_s is None and offset_s: - raise ValueError(f'offset_s specified as {offset_Hz} but onset_s is None') + raise ValueError(f'offset_s specified as {offset_ind} but onset_s is None') return cls(label=label, onset_s=onset_s, offset_s=offset_s, - onset_Hz=onset_Hz, offset_Hz=offset_Hz) + onset_ind=onset_ind, offset_ind=offset_ind) diff --git a/src/crowsetta/sequence.py b/src/crowsetta/sequence.py index 79cdfb1c..6d910eb4 100644 --- a/src/crowsetta/sequence.py +++ b/src/crowsetta/sequence.py @@ -14,9 +14,9 @@ class Sequence: ---------- segments : tuple of Segment objects. - onsets_Hz : numpy.ndarray or None + onset_inds : numpy.ndarray or None of type int, onset of each annotated segment in samples/second - offsets_Hz : numpy.ndarray or None + offset_inds : numpy.ndarray or None of type int, offset of each annotated segment in samples/second onsets_s : numpy.ndarray or None of type float, onset of each annotated segment in seconds @@ -38,17 +38,17 @@ def __init__(self, labels, onsets_s=None, offsets_s=None, - onsets_Hz=None, - offsets_Hz=None): + onset_inds=None, + offset_inds=None): """Sequence __init__ Parameters ---------- segments : list or tuple of Segment objects. - onsets_Hz : numpy.ndarray or None + onset_inds : numpy.ndarray or None of type int, onset of each annotated segment in samples/second - offsets_Hz : numpy.ndarray or None + offset_inds : numpy.ndarray or None of type int, offset of each annotated segment in samples/second onsets_s : numpy.ndarray or None of type float, onset of each annotated segment in seconds @@ -72,12 +72,12 @@ def __init__(self, (onsets_s, offsets_s, - onsets_Hz, - offsets_Hz, + onset_inds, + offset_inds, labels) = self._validate_onsets_offsets_labels(onsets_s, offsets_s, - onsets_Hz, - offsets_Hz, + onset_inds, + offset_inds, labels) self._validate_segments_type(segments) @@ -85,8 +85,8 @@ def __init__(self, super().__setattr__('_segments', segments) super().__setattr__('_onsets_s', onsets_s) super().__setattr__('_offsets_s', offsets_s) - super().__setattr__('_onsets_Hz', onsets_Hz) - super().__setattr__('_offsets_Hz', offsets_Hz) + super().__setattr__('_onset_inds', onset_inds) + super().__setattr__('_offset_inds', offset_inds) super().__setattr__('_labels', labels) @property @@ -102,12 +102,12 @@ def offsets_s(self): return self._offsets_s @property - def onsets_Hz(self): - return self._onsets_Hz + def onset_inds(self): + return self._onset_inds @property - def offsets_Hz(self): - return self._offsets_Hz + def offset_inds(self): + return self._offset_inds @property def labels(self): @@ -117,8 +117,8 @@ def __hash__(self): list_for_hash = [self._segments, self._onsets_s, self._offsets_s, - self._onsets_Hz, - self._offsets_Hz, + self._onset_inds, + self._offset_inds, self._labels] list_for_hash = [tuple(item.tolist()) if type(item) == np.ndarray @@ -137,7 +137,7 @@ def __eq__(self, other): eq = [] for attr in ['_segments', '_labels', '_onsets_s', '_offsets_s', - '_onsets_Hz', '_offsets_Hz']: + '_onset_inds', '_offset_inds']: self_attr = getattr(self, attr) other_attr = getattr(other, attr) if type(self_attr) == np.ndarray: @@ -195,8 +195,8 @@ def _validate_segments_type(segments): @staticmethod def _validate_onsets_offsets_labels(onsets_s, offsets_s, - onsets_Hz, - offsets_Hz, + onset_inds, + offset_inds, labels): """validate onsets, offsets, and labels passed to __init__ or class methods @@ -204,63 +204,63 @@ def _validate_onsets_offsets_labels(onsets_s, ---------- onsets_s : numpy.ndarray or None offsets_s : numpy.ndarray or None - onsets_Hz : numpy.ndarray or None - offsets_Hz : numpy.ndarray or None + onset_inds : numpy.ndarray or None + offset_inds : numpy.ndarray or None labels : str, list, or tuple Returns ------- onsets_s : numpy.ndarray offsets_s : numpy.ndarray - onsets_Hz : numpy.ndarray - offsets_Hz : numpy.ndarray + onset_inds : numpy.ndarray + offset_inds : numpy.ndarray labels : numpy.ndarray """ - # make sure user passed either onsets_Hz and offsets_Hz, or + # make sure user passed either onset_inds and offset_inds, or # onsets_s and offsets_s, or both. # first make sure at least one pair of onsets and offsets is specified - if ((onsets_Hz is None and offsets_Hz is None) and + if ((onset_inds is None and offset_inds is None) and (onsets_s is None and offsets_s is None)): - raise ValueError('must provide either onsets_Hz and offsets_Hz, or ' + raise ValueError('must provide either onset_inds and offset_inds, or ' 'onsets_s and offsets_s') # then make sure both elements of each pair are specified - if onsets_Hz is not None and offsets_Hz is None: - raise ValueError(f'onset_Hz specified as {onsets_Hz} but offset_Hz is None') - if onsets_Hz is None and offsets_Hz is not None: - raise ValueError(f'offset_Hz specified as {offsets_Hz} but onset_Hz is None') + if onset_inds is not None and offset_inds is None: + raise ValueError(f'onset_ind specified as {onset_inds} but offset_ind is None') + if onset_inds is None and offset_inds is not None: + raise ValueError(f'offset_ind specified as {offset_inds} but onset_ind is None') if onsets_s is not None and offsets_s is None: raise ValueError(f'onset_s specified as {onsets_s} but offset_s is None') if onsets_s is None and offsets_s is not None: - raise ValueError(f'offset_s specified as {offsets_Hz} but onset_s is None') + raise ValueError(f'offset_s specified as {offset_inds} but onset_s is None') # then do type/shape checking on onsets and offsets; # also make sure everybody is the same length - if (not (onsets_Hz is None and offsets_Hz is None) and - not (np.all(onsets_Hz == None) and np.all(offsets_Hz == None))): - onsets_Hz = column_or_row_or_1d(onsets_Hz) - offsets_Hz = column_or_row_or_1d(offsets_Hz) + if (not (onset_inds is None and offset_inds is None) and + not (np.all(onset_inds == None) and np.all(offset_inds == None))): + onset_inds = column_or_row_or_1d(onset_inds) + offset_inds = column_or_row_or_1d(offset_inds) - if onsets_Hz.dtype != int or offsets_Hz.dtype != int: - raise TypeError('dtype of onsets_Hz and offsets_Hz ' + if onset_inds.dtype != int or offset_inds.dtype != int: + raise TypeError('dtype of onset_inds and offset_inds ' 'must be some kind of int') try: - check_consistent_length([labels, onsets_Hz, offsets_Hz]) + check_consistent_length([labels, onset_inds, offset_inds]) except ValueError: # try to give human-interpretable-error message - if not (onsets_Hz.shape[0] == offsets_Hz.shape[0]): - raise ValueError('onsets_Hz and offsets_Hz have different lengths: ' - f'labels: {onsets_Hz.shape[0]}, ' - f'onsets_Hz: {offsets_Hz.shape[0]}') - if not (labels.shape[0] == onsets_Hz.shape[0]): - raise ValueError('labels and onsets_Hz have different lengths: ' + if not (onset_inds.shape[0] == offset_inds.shape[0]): + raise ValueError('onset_inds and offset_inds have different lengths: ' + f'labels: {onset_inds.shape[0]}, ' + f'onset_inds: {offset_inds.shape[0]}') + if not (labels.shape[0] == onset_inds.shape[0]): + raise ValueError('labels and onset_inds have different lengths: ' f'labels: {labels.shape[0]}, ' - f'onsets_Hz: {onsets_Hz.shape[0]}') - if not (labels.shape[0] == offsets_Hz.shape[0]): - raise ValueError('labels and offsets_Hz have different lengths: ' + f'onset_inds: {onset_inds.shape[0]}') + if not (labels.shape[0] == offset_inds.shape[0]): + raise ValueError('labels and offset_inds have different lengths: ' f'labels: {labels.shape[0]}, ' - f'onsets_Hz: {offsets_Hz.shape[0]}') + f'onset_inds: {offset_inds.shape[0]}') if (not (onsets_s is None and offsets_s is None) and not (np.all(onsets_s == None) and np.all(offsets_s == None))): @@ -272,33 +272,33 @@ def _validate_onsets_offsets_labels(onsets_s, 'must be some kind of float') try: - check_consistent_length([labels, onsets_Hz, offsets_Hz]) + check_consistent_length([labels, onset_inds, offset_inds]) except ValueError: # try to give human-interpretable-error message if not (onsets_s.shape[0] == offsets_s.shape[0]): - raise ValueError('onsets_Hz and offsets_Hz have different lengths: ' + raise ValueError('onset_inds and offset_inds have different lengths: ' f'labels: {onsets_s.shape[0]}, ' - f'onsets_Hz: {offsets_s.shape[0]}') + f'onset_inds: {offsets_s.shape[0]}') if not (labels.shape[0] == onsets_s.shape[0]): raise ValueError('labels and onsets_s have different lengths: ' f'labels: {labels.shape[0]}, ' - f'onsets_Hz: {onsets_Hz.shape[0]}') - if not (labels.shape[0] == offsets_Hz.shape[0]): - raise ValueError('labels and offsets_Hz have different lengths: ' + f'onset_inds: {onset_inds.shape[0]}') + if not (labels.shape[0] == offset_inds.shape[0]): + raise ValueError('labels and offset_inds have different lengths: ' f'labels: {labels.shape[0]}, ' - f'onsets_Hz: {offsets_Hz.shape[0]}') + f'onset_inds: {offset_inds.shape[0]}') num_samples = _num_samples(labels) # need to make arrays to iterate over for onsets and offsets that are None - if onsets_Hz is None and offsets_Hz is None: - onsets_Hz = np.asarray([None] * num_samples) - offsets_Hz = np.asarray([None] * num_samples) + if onset_inds is None and offset_inds is None: + onset_inds = np.asarray([None] * num_samples) + offset_inds = np.asarray([None] * num_samples) elif onsets_s is None and offsets_s is None: onsets_s = np.asarray([None] * num_samples) offsets_s = np.asarray([None] * num_samples) - return onsets_s, offsets_s, onsets_Hz, offsets_Hz, labels + return onsets_s, offsets_s, onset_inds, offset_inds, labels @classmethod def from_segments(cls, segments): @@ -317,52 +317,52 @@ def from_segments(cls, segments): onsets_s = [] offsets_s = [] - onsets_Hz = [] - offsets_Hz = [] + onset_inds = [] + offset_inds = [] labels = [] for seg in segments: onsets_s.append(seg.onset_s) offsets_s.append(seg.offset_s) - onsets_Hz.append(seg.onset_Hz) - offsets_Hz.append(seg.offset_Hz) + onset_inds.append(seg.onset_ind) + offset_inds.append(seg.offset_ind) labels.append(seg.label) onsets_s = np.asarray(onsets_s) offsets_s = np.asarray(offsets_s) - onsets_Hz = np.asarray(onsets_Hz) - offsets_Hz = np.asarray(offsets_Hz) + onset_inds = np.asarray(onset_inds) + offset_inds = np.asarray(offset_inds) labels = np.asarray(labels) labels = cls._convert_labels(labels) (onsets_s, offsets_s, - onsets_Hz, - offsets_Hz, + onset_inds, + offset_inds, labels) = cls._validate_onsets_offsets_labels(onsets_s, offsets_s, - onsets_Hz, - offsets_Hz, + onset_inds, + offset_inds, labels) return cls(segments, labels, onsets_s, offsets_s, - onsets_Hz, - offsets_Hz) + onset_inds, + offset_inds) @classmethod - def from_keyword(cls, labels, onsets_Hz=None, offsets_Hz=None, + def from_keyword(cls, labels, onset_inds=None, offset_inds=None, onsets_s=None, offsets_s=None): """construct a Sequence from keyword arguments Parameters ---------- - onsets_Hz : numpy.ndarray or None + onset_inds : numpy.ndarray or None of type int, onset of each annotated segment in samples/second - offsets_Hz : numpy.ndarray or None + offset_inds : numpy.ndarray or None of type int, offset of each annotated segment in samples/second onsets_s : numpy.ndarray or None of type float, onset of each annotated segment in seconds @@ -377,20 +377,20 @@ def from_keyword(cls, labels, onsets_Hz=None, offsets_Hz=None, (onsets_s, offsets_s, - onsets_Hz, - offsets_Hz, + onset_inds, + offset_inds, labels) = cls._validate_onsets_offsets_labels(onsets_s, offsets_s, - onsets_Hz, - offsets_Hz, + onset_inds, + offset_inds, labels) segments = [] - zipped = zip(labels, onsets_Hz, offsets_Hz, onsets_s, offsets_s) - for label, onset_Hz, offset_Hz, onset_s, offset_s in zipped: + zipped = zip(labels, onset_inds, offset_inds, onsets_s, offsets_s) + for label, onset_ind, offset_ind, onset_s, offset_s in zipped: segments.append(Segment.from_keyword(label=label, - onset_Hz=onset_Hz, - offset_Hz=offset_Hz, + onset_ind=onset_ind, + offset_ind=offset_ind, onset_s=onset_s, offset_s=offset_s)) @@ -398,8 +398,8 @@ def from_keyword(cls, labels, onsets_Hz=None, offsets_Hz=None, labels, onsets_s, offsets_s, - onsets_Hz, - offsets_Hz + onset_inds, + offset_inds ) @classmethod @@ -411,9 +411,9 @@ def from_dict(cls, seq_dict): ---------- seq_dict : dict with following key, value pairs - onsets_Hz : numpy.ndarray or None + onset_inds : numpy.ndarray or None of type int, onset of each annotated segment in samples/second - offsets_Hz : numpy.ndarray or None + offset_inds : numpy.ndarray or None of type int, offset of each annotated segment in samples/second onsets_s : numpy.ndarray or None of type float, onset of each annotated segment in seconds @@ -429,8 +429,8 @@ def from_dict(cls, seq_dict): -------- >>> seq_dict = { ... 'labels': 'abc', - ... 'onsets_Hz': np.asarray([16005, 17925, 19837]), - ... 'offsets_Hz': np.asarray([17602, 19520, 21435]), + ... 'onset_inds': np.asarray([16005, 17925, 19837]), + ... 'offset_inds': np.asarray([17602, 19520, 21435]), ... 'file': 'bird0.wav', ... } >>> seq = Sequence.from_dict(seq_dict) @@ -450,9 +450,9 @@ def as_dict(self): ------- seq_dict : dict with the following key, value pairs: - onsets_Hz : numpy.ndarray or None + onset_inds : numpy.ndarray or None of type int, onset of each annotated segment in samples/second - offsets_Hz : numpy.ndarray or None + offset_inds : numpy.ndarray or None of type int, offset of each annotated segment in samples/second onsets_s : numpy.ndarray or None of type float, onset of each annotated segment in seconds @@ -461,12 +461,12 @@ def as_dict(self): labels : numpy.ndarray of type str; label for each annotated segment """ - seq_keys = ['onsets_Hz', 'offsets_Hz', 'onsets_s', 'offsets_s', 'labels'] + seq_keys = ['onset_inds', 'offset_inds', 'onsets_s', 'offsets_s', 'labels'] seq_dict = dict(zip( seq_keys, [getattr(self, seq_key) for seq_key in seq_keys] )) - for a_key in ['onsets_Hz', 'offsets_Hz', 'onsets_s', 'offsets_s']: + for a_key in ['onset_inds', 'offset_inds', 'onsets_s', 'offsets_s']: # if value is an array full of Nones, just convert to one None. # Use == to do elementwise comparison (so ignore warnings about # 'comparison with None performed with equality operators') diff --git a/src/crowsetta/textgrid.py b/src/crowsetta/textgrid.py index c6e12547..44601e3c 100644 --- a/src/crowsetta/textgrid.py +++ b/src/crowsetta/textgrid.py @@ -91,7 +91,7 @@ def textgrid2annot(annot_path, onsets_s = np.asarray(onsets_s) offsets_s = np.asarray(offsets_s) - # do this *after* converting onsets_s and offsets_s to onsets_Hz and offsets_Hz + # do this *after* converting onsets_s and offsets_s to onset_inds and offset_inds # probably doesn't matter but why introduce more noise? if round_times: onsets_s = np.around(onsets_s, decimals=decimals) diff --git a/src/crowsetta/yarden.py b/src/crowsetta/yarden.py index 5e1c7f66..43c40f94 100644 --- a/src/crowsetta/yarden.py +++ b/src/crowsetta/yarden.py @@ -159,14 +159,14 @@ def yarden2annot(annot_path, ) # we want to wait to add file to seq dict until *after* casting all values in dict to numpy arrays samp_freq = annotation[samp_freq_key].tolist() - seq_dict['onsets_Hz'] = np.round(seq_dict['onsets_s'] * samp_freq).astype(int) - seq_dict['offsets_Hz'] = np.round(seq_dict['offsets_s'] * samp_freq).astype(int) + seq_dict['onset_inds'] = np.round(seq_dict['onsets_s'] * samp_freq).astype(int) + seq_dict['offset_inds'] = np.round(seq_dict['offsets_s'] * samp_freq).astype(int) - # do this *after* converting onsets_s and offsets_s to onsets_Hz and offsets_Hz + # do this *after* converting onsets_s and offsets_s to onset_inds and offset_inds # probably doesn't matter but why introduce more noise? if round_times: - seq_dict['onsets_Hz'] = np.around(seq_dict['onsets_Hz'], decimals=decimals) - seq_dict['offsets_Hz'] = np.around(seq_dict['offsets_Hz'], decimals=decimals) + seq_dict['onset_inds'] = np.around(seq_dict['onset_inds'], decimals=decimals) + seq_dict['offset_inds'] = np.around(seq_dict['offset_inds'], decimals=decimals) seq = Sequence.from_dict(seq_dict) annot = Annotation(seq=seq, diff --git a/tests/data_for_tests/csv/example_annotation_with_onsets_Hz_offsets_Hz_None.csv b/tests/data_for_tests/csv/example_annotation_with_onset_inds_offset_inds_None.csv similarity index 98% rename from tests/data_for_tests/csv/example_annotation_with_onsets_Hz_offsets_Hz_None.csv rename to tests/data_for_tests/csv/example_annotation_with_onset_inds_offset_inds_None.csv index 4851987c..7b21f706 100644 --- a/tests/data_for_tests/csv/example_annotation_with_onsets_Hz_offsets_Hz_None.csv +++ b/tests/data_for_tests/csv/example_annotation_with_onset_inds_offset_inds_None.csv @@ -1,4 +1,4 @@ -label,onset_s,offset_s,onset_Hz,offset_Hz,audio_path,annot_path,sequence,annotation +label,onset_s,offset_s,onset_ind,offset_ind,audio_path,annot_path,sequence,annotation 1,0.0029761904761904934,0.14150432900432905,None,None,lbr3009_0005_2017_04_27_06_14_46.wav,../test_data/example_user_format/bird1_annotation.mat,0,0 1,0.279125,0.504625,None,None,lbr3009_0005_2017_04_27_06_14_46.wav,../test_data/example_user_format/bird1_annotation.mat,0,0 5,0.5556472915365209,0.5962916666666667,None,None,lbr3009_0005_2017_04_27_06_14_46.wav,../test_data/example_user_format/bird1_annotation.mat,0,0 diff --git a/tests/data_for_tests/csv/gy6or6_032312.csv b/tests/data_for_tests/csv/gy6or6_032312.csv index d9d96cf5..b8c27af8 100644 --- a/tests/data_for_tests/csv/gy6or6_032312.csv +++ b/tests/data_for_tests/csv/gy6or6_032312.csv @@ -1,4 +1,4 @@ -label,onset_s,offset_s,onset_Hz,offset_Hz,audio_path,annot_path,sequence,annotation +label,onset_s,offset_s,onset_ind,offset_ind,audio_path,annot_path,sequence,annotation i,1.278,1.351,None,None,gy6or6_baseline_230312_0808.138.cbin,gy6or6_baseline_230312_0808.138.cbin.not.mat,0,0 i,1.452,1.536,None,None,gy6or6_baseline_230312_0808.138.cbin,gy6or6_baseline_230312_0808.138.cbin.not.mat,0,0 i,1.605,1.712,None,None,gy6or6_baseline_230312_0808.138.cbin,gy6or6_baseline_230312_0808.138.cbin.not.mat,0,0 diff --git a/tests/data_for_tests/csv/missing_fields_in_header.csv b/tests/data_for_tests/csv/missing_fields_in_header.csv index 44dddd6f..0c209f12 100644 --- a/tests/data_for_tests/csv/missing_fields_in_header.csv +++ b/tests/data_for_tests/csv/missing_fields_in_header.csv @@ -1,4 +1,4 @@ -filename,onset_Hz,offset_Hz,onset_s,offset_s +filename,onset_ind,offset_ind,onset_s,offset_s gy6or6_baseline_230312_0808.138.cbin,40888,43239,1.278,1.351,i gy6or6_baseline_230312_0808.138.cbin,46478,49146,1.452,1.536,i gy6or6_baseline_230312_0808.138.cbin,51370,54774,1.605,1.712,i diff --git a/tests/data_for_tests/csv/unrecognized_fields_in_header.csv b/tests/data_for_tests/csv/unrecognized_fields_in_header.csv index 035ce890..7ba3e1d8 100644 --- a/tests/data_for_tests/csv/unrecognized_fields_in_header.csv +++ b/tests/data_for_tests/csv/unrecognized_fields_in_header.csv @@ -1,4 +1,4 @@ -filename,onset_Hz,offset_Hz,onset_s,offset_s,label,unrecognized +filename,onset_ind,offset_ind,onset_s,offset_s,label,unrecognized gy6or6_baseline_230312_0808.138.cbin,40888,43239,1.278,1.351,i gy6or6_baseline_230312_0808.138.cbin,46478,49146,1.452,1.536,i gy6or6_baseline_230312_0808.138.cbin,51370,54774,1.605,1.712,i diff --git a/tests/fixtures/segment.py b/tests/fixtures/segment.py index 56b58656..b8ce6382 100644 --- a/tests/fixtures/segment.py +++ b/tests/fixtures/segment.py @@ -7,7 +7,7 @@ def list_of_segments(): list_of_segments = [] - for label, onset_Hz, offset_Hz in zip( + for label, onset_ind, offset_ind in zip( ('a', 'b', 'c'), (16000, 32000, 64000), (17000, 33000, 65000) @@ -15,8 +15,8 @@ def list_of_segments(): list_of_segments.append( Segment.from_keyword( label=label, - onset_Hz=onset_Hz, - offset_Hz=offset_Hz, + onset_ind=onset_ind, + offset_ind=offset_ind, ) ) @@ -27,7 +27,7 @@ def list_of_segments(): def different_list_of_segments(): list_of_segments = [] - for label, onset_Hz, offset_Hz in zip( + for label, onset_ind, offset_ind in zip( ('a', 'b', 'c', 'd'), (16000, 32000, 64000, 128000), (17100, 33100, 65100, 129100) @@ -35,8 +35,8 @@ def different_list_of_segments(): list_of_segments.append( Segment.from_keyword( label=label, - onset_Hz=onset_Hz, - offset_Hz=offset_Hz, + onset_ind=onset_ind, + offset_ind=offset_ind, ) ) diff --git a/tests/fixtures/sequence.py b/tests/fixtures/sequence.py index bf5085b9..50c57461 100644 --- a/tests/fixtures/sequence.py +++ b/tests/fixtures/sequence.py @@ -7,15 +7,15 @@ @pytest.fixture def a_seq(list_of_segments): - (onsets_Hz, - offsets_Hz, + (onset_inds, + offset_inds, onsets_s, offsets_s, labels) = keywords.from_segments(list_of_segments) a_seq = Sequence(segments=list_of_segments, - onsets_Hz=onsets_Hz, - offsets_Hz=offsets_Hz, + onset_inds=onset_inds, + offset_inds=offset_inds, onsets_s=onsets_s, offsets_s=offsets_s, labels=labels, @@ -25,15 +25,15 @@ def a_seq(list_of_segments): @pytest.fixture def same_seq(list_of_segments): - (onsets_Hz, - offsets_Hz, + (onset_inds, + offset_inds, onsets_s, offsets_s, labels) = keywords.from_segments(list_of_segments) same_seq = Sequence(segments=list_of_segments, - onsets_Hz=onsets_Hz, - offsets_Hz=offsets_Hz, + onset_inds=onset_inds, + offset_inds=offset_inds, onsets_s=onsets_s, offsets_s=offsets_s, labels=labels, @@ -43,15 +43,15 @@ def same_seq(list_of_segments): @pytest.fixture def different_seq(different_list_of_segments): - (onsets_Hz, - offsets_Hz, + (onset_inds, + offset_inds, onsets_s, offsets_s, labels) = keywords.from_segments(different_list_of_segments) different_seq = Sequence(segments=different_list_of_segments, - onsets_Hz=onsets_Hz, - offsets_Hz=offsets_Hz, + onset_inds=onset_inds, + offset_inds=offset_inds, onsets_s=onsets_s, offsets_s=offsets_s, labels=labels, diff --git a/tests/helpers/keywords.py b/tests/helpers/keywords.py index 1aadc7c6..a16fd27a 100644 --- a/tests/helpers/keywords.py +++ b/tests/helpers/keywords.py @@ -3,18 +3,18 @@ def from_segments(segments): labels = [] - onsets_Hz = [] - offsets_Hz = [] + onset_inds = [] + offset_inds = [] onsets_s = [] offsets_s = [] for seg in segments: labels.append(seg.label) - onsets_Hz.append(seg.onset_Hz) - offsets_Hz.append(seg.offset_Hz) + onset_inds.append(seg.onset_ind) + offset_inds.append(seg.offset_ind) onsets_s.append(None) offsets_s.append(None) - onsets_Hz = np.asarray(onsets_Hz) - offsets_Hz = np.asarray(offsets_Hz) + onset_inds = np.asarray(onset_inds) + offset_inds = np.asarray(offset_inds) onsets_s = np.asarray(onsets_s) offsets_s = np.asarray(offsets_s) - return onsets_Hz, offsets_Hz, onsets_s, offsets_s, labels + return onset_inds, offset_inds, onsets_s, offsets_s, labels diff --git a/tests/scripts/remake_test_csv.py b/tests/scripts/remake_test_csv.py index 3dea1a68..acb7c0fa 100644 --- a/tests/scripts/remake_test_csv.py +++ b/tests/scripts/remake_test_csv.py @@ -23,7 +23,7 @@ def main(): TEST_DATA.joinpath('example_user_format', 'bird1_annotation.mat') ) csv_filename = str( - TEST_DATA.joinpath('csv', 'example_annotation_with_onsets_Hz_offsets_Hz_None.csv') + TEST_DATA.joinpath('csv', 'example_annotation_with_onset_inds_offset_inds_None.csv') ) scribe.to_csv(mat_file, csv_filename=csv_filename) diff --git a/tests/test_csv.py b/tests/test_csv.py index a0a951aa..0196e4f2 100644 --- a/tests/test_csv.py +++ b/tests/test_csv.py @@ -48,9 +48,9 @@ def test_annot2csv_when_one_pair_of_onsets_and_offsets_is_None(tmp_path, example_user_format_root, example_user_format_annotation_file): # example2seq only gets onset and offset times in seconds - # so onset_Hz and offset_Hz will be None + # so onset_ind and offset_ind will be None # Test that we can make a csv that has 'None' in columns for - # onset_Hz and offset_Hz when they are None for each segment + # onset_ind and offset_ind when they are None for each segment sys.path.append(str(example_user_format_root)) example_module = import_module(name='example') sys.path.remove(str(example_user_format_root)) @@ -58,7 +58,7 @@ def test_annot2csv_when_one_pair_of_onsets_and_offsets_is_None(tmp_path, annot_list = example_module.example2annot(annot_path=example_user_format_annotation_file) assert( all( - [seg.onset_Hz is None and seg.offset_Hz is None + [seg.onset_ind is None and seg.offset_ind is None for annot in annot_list for seg in annot.seq.segments] ) ) @@ -69,10 +69,10 @@ def test_annot2csv_when_one_pair_of_onsets_and_offsets_is_None(tmp_path, with open(csv_filename, 'r', newline='') as csvfile: reader = csv.reader(csvfile) header = next(reader) - onset_Hz_ind = header.index('onset_Hz') - offset_Hz_ind = header.index('offset_Hz') + onset_ind_ind = header.index('onset_ind') + offset_ind_ind = header.index('offset_ind') for row in reader: - assert row[onset_Hz_ind] == 'None' and row[offset_Hz_ind] == 'None' + assert row[onset_ind_ind] == 'None' and row[offset_ind_ind] == 'None' def test_toannot_func_to_csv_with_builtin_format(test_data_root, tmp_path): @@ -141,12 +141,12 @@ def test_csv2annot_missing_fields_raises(test_data_root): def test_csv2annot_when_one_pair_of_onsets_and_offsets_is_None(test_data_root): # Test that we can load a csv that has 'None' in columns for - # onset_Hz and offset_Hz, so that they are None for each segment + # onset_ind and offset_ind, so that they are None for each segment csv_with_None_columns = test_data_root.joinpath( - 'csv/example_annotation_with_onsets_Hz_offsets_Hz_None.csv' + 'csv/example_annotation_with_onset_inds_offset_inds_None.csv' ) seq = crowsetta.csv.csv2annot(csv_filename=csv_with_None_columns) assert ( - seg.onset_Hz is None and seg.offset_Hz is None + seg.onset_ind is None and seg.offset_ind is None for a_seq in seq for seg in a_seq.segments ) diff --git a/tests/test_phn.py b/tests/test_phn.py index 56e6bd90..0e25f7c8 100644 --- a/tests/test_phn.py +++ b/tests/test_phn.py @@ -60,8 +60,8 @@ def test_annot2phn(tmp_path, phns): shutil.copyfile(src=annot.audio_path, dst=Path(tmp_path).joinpath(Path(annot.audio_path.name))) crowsetta.phn.annot2phn(annot, annot_path) annot_made = crowsetta.phn.phn2annot(annot_path) - assert np.all(np.equal(annot.seq.onsets_Hz, annot_made.seq.onsets_Hz)) - assert np.all(np.equal(annot.seq.offsets_Hz, annot_made.seq.offsets_Hz)) + assert np.all(np.equal(annot.seq.onset_inds, annot_made.seq.onset_inds)) + assert np.all(np.equal(annot.seq.offset_inds, annot_made.seq.offset_inds)) assert np.all(np.char.equal(annot.seq.labels, annot_made.seq.labels)) @@ -132,6 +132,6 @@ def test_annot2PHN(tmp_path, PHNs): shutil.copyfile(src=annot.audio_path, dst=Path(tmp_path).joinpath(Path(annot.audio_path.name))) crowsetta.phn.annot2phn(annot, annot_path) annot_made = crowsetta.phn.phn2annot(annot_path) - assert np.all(np.equal(annot.seq.onsets_Hz, annot_made.seq.onsets_Hz)) - assert np.all(np.equal(annot.seq.offsets_Hz, annot_made.seq.offsets_Hz)) + assert np.all(np.equal(annot.seq.onset_inds, annot_made.seq.onset_inds)) + assert np.all(np.equal(annot.seq.offset_inds, annot_made.seq.offset_inds)) assert np.all(np.char.equal(annot.seq.labels, annot_made.seq.labels)) diff --git a/tests/test_segment.py b/tests/test_segment.py index 03eb2187..6ef95e47 100644 --- a/tests/test_segment.py +++ b/tests/test_segment.py @@ -9,35 +9,35 @@ def test_Segment_init_onset_offset_in_seconds_from_keyword(): offset_s=0.170) for attr in ['label', 'onset_s', 'offset_s']: assert hasattr(a_segment, attr) - for attr in ['onset_Hz', 'offset_Hz']: + for attr in ['onset_ind', 'offset_ind']: assert getattr(a_segment, attr) is None def test_Segment_init_onset_offset_in_Hertz_from_keyword(): a_segment = Segment.from_keyword(label='a', - onset_Hz=15655, - offset_Hz=20001) - for attr in ['label', 'onset_Hz', 'offset_Hz']: + onset_ind=15655, + offset_ind=20001) + for attr in ['label', 'onset_ind', 'offset_ind']: assert hasattr(a_segment, attr) for attr in ['onset_s', 'offset_s']: assert getattr(a_segment, attr) is None def test_Segment_init_onset_offset_in_seconds_from_row(): - header = ['label', 'onset_s', 'offset_s', 'onset_Hz', 'offset_Hz'] + header = ['label', 'onset_s', 'offset_s', 'onset_ind', 'offset_ind'] row = ['a', '0.123', '0.170', 'None', 'None'] a_segment = Segment.from_row(row=row, header=header) for attr in ['label', 'onset_s', 'offset_s']: assert hasattr(a_segment, attr) - for attr in ['onset_Hz', 'offset_Hz']: + for attr in ['onset_ind', 'offset_ind']: assert getattr(a_segment, attr) is None def test_Segment_init_onset_offset_in_Hertz_from_row(): - header = ['label', 'onset_s', 'offset_s', 'onset_Hz', 'offset_Hz'] + header = ['label', 'onset_s', 'offset_s', 'onset_ind', 'offset_ind'] row = ['a', 'None', 'None', '15655', '20001'] a_segment = Segment.from_row(row=row, header=header) - for attr in ['label', 'onset_Hz', 'offset_Hz']: + for attr in ['label', 'onset_ind', 'offset_ind']: assert hasattr(a_segment, attr) for attr in ['onset_s', 'offset_s']: assert getattr(a_segment, attr) is None @@ -63,10 +63,10 @@ def test_Segment_init_missing_onset_seconds_raises(): def test_Segment_init_missing_offset_Hertz_raises(): with pytest.raises(ValueError): a_segment = Segment.from_keyword(label='a', - onset_Hz=0.123) + onset_ind=0.123) def test_Segment_init_missing_onset_Hertz_raises(): with pytest.raises(ValueError): a_segment = Segment.from_keyword(label='a', - offset_Hz=0.177) + offset_ind=0.177) diff --git a/tests/test_sequence.py b/tests/test_sequence.py index 1b5ab858..96c80f39 100644 --- a/tests/test_sequence.py +++ b/tests/test_sequence.py @@ -7,15 +7,15 @@ def test_init(list_of_segments): - (onsets_Hz, - offsets_Hz, + (onset_inds, + offset_inds, onsets_s, offsets_s, labels) = keywords.from_segments(list_of_segments) seq = Sequence(segments=list_of_segments, - onsets_Hz=onsets_Hz, - offsets_Hz=offsets_Hz, + onset_inds=onset_inds, + offset_inds=offset_inds, onsets_s=onsets_s, offsets_s=offsets_s, labels=labels, @@ -35,8 +35,8 @@ def test_init_with_wrong_type_for_segments_raises(list_of_segments): def test_init_with_bad_type_in_segments_raises(list_of_segments): segment_dict = { 'label': 'a', - 'onset_Hz': 16000, - 'offset_Hz': 32000, + 'onset_ind': 16000, + 'offset_ind': 32000, } list_of_segments.append(segment_dict) with pytest.raises(TypeError): @@ -51,11 +51,11 @@ def test_from_segments(list_of_segments): def test_from_keyword_bad_labels_type_raises(): labels = 12345 - onsets_Hz = np.asarray([0, 2, 4, 6, 8]) - offsets_Hz = np.asarray([1, 3, 5, 7, 9]) + onset_inds = np.asarray([0, 2, 4, 6, 8]) + offset_inds = np.asarray([1, 3, 5, 7, 9]) with pytest.raises(TypeError): - Sequence.from_keyword(labels=labels, onsets_Hz=onsets_Hz, - offsets_Hz=offsets_Hz) + Sequence.from_keyword(labels=labels, onset_inds=onset_inds, + offset_inds=offset_inds) def test_from_keyword__onset_offset_in_seconds(): @@ -71,11 +71,11 @@ def test_from_keyword__onset_offset_in_seconds(): def test_from_keyword_onset_offset_in_Hertz(): labels = 'abcde' - onsets_Hz = np.asarray([0, 2, 4, 6, 8]) - offsets_Hz = np.asarray([1, 3, 5, 7, 9]) + onset_inds = np.asarray([0, 2, 4, 6, 8]) + offset_inds = np.asarray([1, 3, 5, 7, 9]) seq = Sequence.from_keyword(labels=labels, - onsets_Hz=onsets_Hz, - offsets_Hz=offsets_Hz) + onset_inds=onset_inds, + offset_inds=offset_inds) assert hasattr(seq, 'segments') assert type(seq.segments) == tuple @@ -94,8 +94,8 @@ def test_from_dict_onset_offset_in_seconds(): def test_from_dict_onset_offset_in_Hertz(): seq_dict = { 'labels': 'abcde', - 'onsets_Hz': np.asarray([0, 2, 4, 6, 8]), - 'offsets_Hz': np.asarray([1, 3, 5, 7, 9]), + 'onset_inds': np.asarray([0, 2, 4, 6, 8]), + 'offset_inds': np.asarray([1, 3, 5, 7, 9]), } seq = Sequence.from_dict(seq_dict=seq_dict) assert hasattr(seq, 'segments') @@ -122,27 +122,27 @@ def test_missing_onset_seconds_raises(): def test_missing_offset_Hertz_raises(): with pytest.raises(ValueError): Sequence.from_keyword(labels='abcde', - onsets_Hz=np.asarray([0, 2, 4, 6, 8])) + onset_inds=np.asarray([0, 2, 4, 6, 8])) def test_missing_onset_Hertz_raises(): with pytest.raises(ValueError): Sequence.from_keyword(labels='abcde', - offsets_Hz=np.asarray([0, 2, 4, 6, 8])) + offset_inds=np.asarray([0, 2, 4, 6, 8])) def test_as_dict_onset_offset_in_Hertz(): labels = 'abcde' - onsets_Hz = np.asarray([0, 2, 4, 6, 8]) - offsets_Hz = np.asarray([1, 3, 5, 7, 9]) + onset_inds = np.asarray([0, 2, 4, 6, 8]) + offset_inds = np.asarray([1, 3, 5, 7, 9]) seq = Sequence.from_keyword(labels=labels, - onsets_Hz=onsets_Hz, - offsets_Hz=offsets_Hz) + onset_inds=onset_inds, + offset_inds=offset_inds) seq_dict = seq.as_dict() assert np.all(seq_dict['labels'] == np.asarray(list(labels))) - assert np.all(seq_dict['onsets_Hz'] == onsets_Hz) - assert np.all(seq_dict['offsets_Hz'] == offsets_Hz) + assert np.all(seq_dict['onset_inds'] == onset_inds) + assert np.all(seq_dict['offset_inds'] == offset_inds) assert seq_dict['onsets_s'] is None assert seq_dict['offsets_s'] is None @@ -159,19 +159,19 @@ def test_as_dict_onset_offset_in_seconds(): assert np.all(seq_dict['labels'] == np.asarray(list(labels))) assert np.all(seq_dict['onsets_s'] == onsets_s) assert np.all(seq_dict['offsets_s'] == offsets_s) - assert seq_dict['onsets_Hz'] is None - assert seq_dict['offsets_Hz'] is None + assert seq_dict['onset_inds'] is None + assert seq_dict['offset_inds'] is None def test_to_dict_onset_offset_both_units(): labels = 'abcde' - onsets_Hz = np.asarray([0, 2, 4, 6, 8]) - offsets_Hz = np.asarray([1, 3, 5, 7, 9]) + onset_inds = np.asarray([0, 2, 4, 6, 8]) + offset_inds = np.asarray([1, 3, 5, 7, 9]) onsets_s = np.asarray([0., 0.2, 0.4, 0.6, 0.8]), offsets_s = np.asarray([0.1, 0.3, 0.5, 0.7, 0.9]), seq = Sequence.from_keyword(labels=labels, - onsets_Hz=onsets_Hz, - offsets_Hz=offsets_Hz, + onset_inds=onset_inds, + offset_inds=offset_inds, onsets_s=onsets_s, offsets_s=offsets_s) seq_dict = seq.as_dict() @@ -179,8 +179,8 @@ def test_to_dict_onset_offset_both_units(): assert np.all(seq_dict['labels'] == np.asarray(list(labels))) assert np.all(seq_dict['onsets_s'] == onsets_s) assert np.all(seq_dict['offsets_s'] == offsets_s) - assert np.all(seq_dict['onsets_Hz'] == onsets_Hz) - assert np.all(seq_dict['offsets_Hz'] == offsets_Hz) + assert np.all(seq_dict['onset_inds'] == onset_inds) + assert np.all(seq_dict['offset_inds'] == offset_inds) def test_eq(a_seq, same_seq):