DEV: switch dependency koumura -> birdsongrec, fixes #124

necessary after renaming the package to `birdsong-recognition-dataset` and the module to `birdsongrec` - switch dependency and rename entry point in pyproject.toml - update poetry.lock - rename crowsetta/koumura.py -> crowsetta/birdsongrec.py - fix tests after renaming
vocalpy · Jan 1, 2022 · de1a7de · de1a7de
1 parent 5bacc97
commit de1a7de
Show file tree

Hide file tree

Showing 7 changed files with 231 additions and 192 deletions.
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -26,7 +26,7 @@ packages = [
 python = ">=3.7, <3.10"
 attrs = ">=19.3.0"
 evfuncs = ">=0.3.2"
-koumura = ">=0.2.1"
+birdsong-recognition-dataset = ">=0.3.0"
 numpy = ">=1.18.1"
 scipy = ">=1.4.1"
 SoundFile = ">=0.10.3"
@@ -40,7 +40,7 @@ pytest-cov = "^2.12.0"
 [tool.poetry.plugins]
 [tool.poetry.plugins."crowsetta.format"]
 csv = 'crowsetta.csv'
-koumura = 'crowsetta.koumura'
+birdsong-recognition-dataset = 'crowsetta.birdsongrec'
 notmat = 'crowsetta.notmat'
 phn = 'crowsetta.phn'
 textgrid = 'crowsetta.textgrid'

diff --git a/src/crowsetta/__init__.py b/src/crowsetta/__init__.py
@@ -18,7 +18,7 @@
 from . import (
     csv,
     formats,
-    koumura,
+    birdsongrec,
     notmat,
     phn,
     textgrid,

diff --git a/src/crowsetta/koumura.py → src/crowsetta/birdsongrec.py b/src/crowsetta/koumura.py → src/crowsetta/birdsongrec.py
@@ -10,7 +10,7 @@
 from pathlib import Path
 import numpy as np
 
-import koumura
+import birdsongrec
 import soundfile
 
 from .annotation import Annotation
@@ -19,8 +19,8 @@
 from .meta import Meta
 
 
-def koumura2annot(annot_path='Annotation.xml', concat_seqs_into_songs=True,
-                  wavpath=None):
+def birdsongrec2annot(annot_path='Annotation.xml', concat_seqs_into_songs=True,
+                      wavpath=None):
     """converts Annotation.xml from [1]_ into an annotation list
 
     Parameters
@@ -75,11 +75,10 @@ def koumura2annot(annot_path='Annotation.xml', concat_seqs_into_songs=True,
             f"\nValue for 'wavpath' was: {wavpath}"
         )
 
-    # confusingly, koumura also has an object named 'Sequence'
-    # (which is where I borrowed the idea from)
-    # but it has a totally different structure
-    seq_list_xml = koumura.parse_xml(annot_path,
-                                     concat_seqs_into_songs=concat_seqs_into_songs)
+    # `birdsong-recongition-dataset` also has a 'Sequence' class
+    # but it is slightly different from the `generic.Sequence` used by `crowsetta`
+    seq_list_xml = birdsongrec.parse_xml(annot_path,
+                                         concat_seqs_into_songs=concat_seqs_into_songs)
 
     annot_list = []
     for seq_xml in seq_list_xml:
@@ -109,8 +108,8 @@ def koumura2annot(annot_path='Annotation.xml', concat_seqs_into_songs=True,
     return annot_list
 
 
-def koumura2csv(annot_path, concat_seqs_into_songs=True, wavpath='./Wave',
-                csv_filename=None, abspath=False, basename=False):
+def birdsongrec2csv(annot_path, concat_seqs_into_songs=True, wavpath='./Wave',
+                    csv_filename=None, abspath=False, basename=False):
     """takes Annotation.xml file from BirdsongRecognition dataset
     and saves the annotation from all files in one comma-separated
     values (csv) file, where each row represents one syllable from
@@ -151,17 +150,17 @@ def koumura2csv(annot_path, concat_seqs_into_songs=True, wavpath='./Wave',
     see annot2scv function for explanation of when you would want to use
     the abspath and basename parameters
     """
-    annot = koumura2annot(annot_path, concat_seqs_into_songs=concat_seqs_into_songs,
-                          wavpath=wavpath)
+    annot = birdsongrec2annot(annot_path, concat_seqs_into_songs=concat_seqs_into_songs,
+                              wavpath=wavpath)
     if csv_filename is None:
         csv_filename = os.path.abspath(annot_path)
         csv_filename = csv_filename.replace('xml', 'csv')
     csv.annot2csv(annot, csv_filename, abspath=abspath, basename=basename)
 
 
 meta = Meta(
-    name='koumura',
+    name='birdsong-recognition-dataset',
     ext='xml',
-    from_file=koumura2annot,
-    to_csv=koumura2csv,
+    from_file=birdsongrec2annot,
+    to_csv=birdsongrec2csv,
 )
diff --git a/tests/test_birdsongrec.py b/tests/test_birdsongrec.py
@@ -0,0 +1,42 @@
+"""test functions in birdsongrec module"""
+import csv
+
+import crowsetta
+
+
+def test_birdsongrec2annot(birdsong_rec_xml_file,
+                           birdsong_rec_wavpath):
+    annots = crowsetta.birdsongrec.birdsongrec2annot(annot_path=birdsong_rec_xml_file,
+                                                     concat_seqs_into_songs=True,
+                                                     wavpath=birdsong_rec_wavpath)
+    assert isinstance(annots, list)
+    assert all([type(annot) == crowsetta.Annotation for annot in annots])
+
+
+def test_birdsongrec2csv(tmp_path,
+                         birdsong_rec_xml_file,
+                         birdsong_rec_wavpath):
+    # since birdsongrec2csv is basically a wrapper around
+    # birdsongrec2seq and seq2csv,
+    # and those are tested above and in other test modules,
+    # here just need to make sure this function doesn't fail
+    csv_filename = tmp_path / 'test.csv'
+    crowsetta.birdsongrec.birdsongrec2csv(annot_path=birdsong_rec_xml_file,
+                                          wavpath=birdsong_rec_wavpath,
+                                          csv_filename=csv_filename,
+                                          basename=True)
+    # make sure file was created
+    assert csv_filename.exists()
+
+    # to be extra sure, make sure all .wav files filenames from are in csv
+    filenames_from_csv = []
+    with open(csv_filename, 'r', newline='') as csvfile:
+        reader = csv.DictReader(csvfile)
+        header = next(reader)
+        for row in reader:
+            filenames_from_csv.append(row['audio_path'])
+
+    wav_list = sorted(birdsong_rec_wavpath.glob('*.wav'))
+    wav_list = [wav_file.name for wav_file in wav_list]
+    for wav_file in wav_list:
+        assert(wav_file in filenames_from_csv)
diff --git a/tests/test_koumura.py b/tests/test_koumura.py
diff --git a/tests/test_transcriber.py b/tests/test_transcriber.py
@@ -4,20 +4,20 @@
 import pytest
 
 
-def test_koumura_from_file(birdsong_rec_xml_file,
-                           birdsong_rec_wavpath):
-    scribe = crowsetta.Transcriber(format='koumura')
+def test_birdsongrec_from_file(birdsong_rec_xml_file,
+                               birdsong_rec_wavpath):
+    scribe = crowsetta.Transcriber(format='birdsong-recognition-dataset')
     annots = scribe.from_file(annot_path=birdsong_rec_xml_file,
                               wavpath=birdsong_rec_wavpath)
     assert type(annots) == list
     assert all([type(annot) == crowsetta.Annotation
                 for annot in annots])
 
 
-def test_koumura_to_csv(tmp_path,
-                        birdsong_rec_xml_file,
-                        birdsong_rec_wavpath):
-    scribe = crowsetta.Transcriber(format='koumura')
+def test_birdsongrec_to_csv(tmp_path,
+                            birdsong_rec_xml_file,
+                            birdsong_rec_wavpath):
+    scribe = crowsetta.Transcriber(format='birdsong-recognition-dataset')
     csv_filename = tmp_path / 'Annotation.csv'
     scribe.to_csv(annot_path=birdsong_rec_xml_file,
                   wavpath=birdsong_rec_wavpath,