From d44fead956b80677202363c50130512869ffc6e1 Mon Sep 17 00:00:00 2001 From: David Nicholson Date: Thu, 23 Mar 2023 20:03:45 -0400 Subject: [PATCH] ENH: Better handle Praat TextGrids, fix #241 * Be able to parse both "normal" and short format TextGrid files, in both UTF-8 and UTF-16 encoding * Remove empty intervals by default, provide option to not remove them * Be able to load a TextGrid with multiple IntervalTiers and then convert to an Annotation with multiple sequences * Use reprlib with the vendored TextGrid so it is readable Squash commits: - Remove _vendor/textgrid - Move textgrid module into sub-package - Move test_textgrid into its own subpackage - Change example TextGrid to a file with more than one tier, for demo purposes - Add textgrid sub-package in formats/seq/ - Add tests/data_for_tests/textgrid/ - Add test fixtures: - a_parse_textgrid_path to fixtures/textgrid.py - add textgrid with empty interval fixtures - Add test_textgrid sub-package in test_formats/test_seq - Rewrite doc/formats/seq/textgrid.md --- doc/formats/seq/textgrid.md | 7 +- src/crowsetta/_vendor/textgrid/AUTHORS | 5 - src/crowsetta/_vendor/textgrid/LICENSE | 7 - src/crowsetta/_vendor/textgrid/README.md | 27 - src/crowsetta/_vendor/textgrid/__init__.py | 1 - src/crowsetta/_vendor/textgrid/exceptions.py | 2 - src/crowsetta/_vendor/textgrid/textgrid.py | 856 ------------------ src/crowsetta/data/data.py | 2 +- src/crowsetta/data/textgrid/1179.TextGrid | 578 ------------ .../data/textgrid/AVO-maea-basic.TextGrid | 97 ++ src/crowsetta/data/textgrid/citation.txt | 10 +- src/crowsetta/formats/seq/textgrid.py | 194 ---- .../formats/seq/textgrid/__init__.py | 3 + src/crowsetta/formats/seq/textgrid/classes.py | 179 ++++ src/crowsetta/formats/seq/textgrid/parse.py | 252 ++++++ .../formats/seq/textgrid/textgrid.py | 411 +++++++++ tests/data_for_tests/textgrid/README.md | 30 + .../AVO-maea-basic.TextGrid | 97 ++ .../BroadFocusAlofa.TextGrid | 159 ++++ .../BroadFocusLeilani.TextGrid | 119 +++ .../BroadFocusRosita.TextGrid | 113 +++ .../ObjectFocusLeilani.TextGrid | 116 +++ .../ObjectFocusLupe.TextGrid | 116 +++ .../ObjectFocusRosita.TextGrid | 135 +++ .../SubjectFocusLeilani.TextGrid | Bin 0 -> 6980 bytes .../SubjectFocusLupe.TextGrid | 121 +++ .../SubjectFocusRosita.TextGrid | 113 +++ .../VAO-maea-basic.TextGrid | 97 ++ ...orth_wind_and_the_sun.short.utf16.TextGrid | Bin 0 -> 2000 bytes ...north_wind_and_the_sun.short.utf8.TextGrid | 77 ++ .../the_north_wind_and_the_sun.utf16.TextGrid | Bin 0 -> 5916 bytes .../the_north_wind_and_the_sun.utf8.TextGrid | 102 +++ .../all_tiers_have_the_same_name.TextGrid | 62 ++ .../praatIO/bobby_phones_elan.TextGrid | 74 ++ .../textgrid/praatIO/mary.TextGrid | 96 ++ .../textgrid/praatIO/mary_longfile.TextGrid | Bin 0 -> 7124 bytes .../mary_longfile_with_negative_zero.TextGrid | Bin 0 -> 7136 bytes .../mary_with_constrained_tier_times.TextGrid | 84 ++ .../praatIO/mary_with_negative_zero.TextGrid | 96 ++ tests/data_for_tests/textgrid/textgrids.json | 27 + tests/fixtures/textgrid.py | 27 + tests/test_formats/test_seq/test_textgrid.py | 90 -- .../test_seq/test_textgrid}/__init__.py | 0 .../test_seq/test_textgrid/test_classes.py | 132 +++ .../test_seq/test_textgrid/test_parse.py | 163 ++++ .../test_seq/test_textgrid/test_textgrid.py | 129 +++ 46 files changed, 3235 insertions(+), 1771 deletions(-) delete mode 100644 src/crowsetta/_vendor/textgrid/AUTHORS delete mode 100644 src/crowsetta/_vendor/textgrid/LICENSE delete mode 100644 src/crowsetta/_vendor/textgrid/README.md delete mode 100644 src/crowsetta/_vendor/textgrid/__init__.py delete mode 100644 src/crowsetta/_vendor/textgrid/exceptions.py delete mode 100644 src/crowsetta/_vendor/textgrid/textgrid.py delete mode 100644 src/crowsetta/data/textgrid/1179.TextGrid create mode 100644 src/crowsetta/data/textgrid/AVO-maea-basic.TextGrid delete mode 100644 src/crowsetta/formats/seq/textgrid.py create mode 100644 src/crowsetta/formats/seq/textgrid/__init__.py create mode 100644 src/crowsetta/formats/seq/textgrid/classes.py create mode 100644 src/crowsetta/formats/seq/textgrid/parse.py create mode 100644 src/crowsetta/formats/seq/textgrid/textgrid.py create mode 100644 tests/data_for_tests/textgrid/README.md create mode 100644 tests/data_for_tests/textgrid/calhoun-et-al-2022/AVO-maea-basic.TextGrid create mode 100644 tests/data_for_tests/textgrid/calhoun-et-al-2022/BroadFocusAlofa.TextGrid create mode 100644 tests/data_for_tests/textgrid/calhoun-et-al-2022/BroadFocusLeilani.TextGrid create mode 100644 tests/data_for_tests/textgrid/calhoun-et-al-2022/BroadFocusRosita.TextGrid create mode 100644 tests/data_for_tests/textgrid/calhoun-et-al-2022/ObjectFocusLeilani.TextGrid create mode 100644 tests/data_for_tests/textgrid/calhoun-et-al-2022/ObjectFocusLupe.TextGrid create mode 100644 tests/data_for_tests/textgrid/calhoun-et-al-2022/ObjectFocusRosita.TextGrid create mode 100644 tests/data_for_tests/textgrid/calhoun-et-al-2022/SubjectFocusLeilani.TextGrid create mode 100644 tests/data_for_tests/textgrid/calhoun-et-al-2022/SubjectFocusLupe.TextGrid create mode 100644 tests/data_for_tests/textgrid/calhoun-et-al-2022/SubjectFocusRosita.TextGrid create mode 100644 tests/data_for_tests/textgrid/calhoun-et-al-2022/VAO-maea-basic.TextGrid create mode 100644 tests/data_for_tests/textgrid/parselmouth/the_north_wind_and_the_sun.short.utf16.TextGrid create mode 100644 tests/data_for_tests/textgrid/parselmouth/the_north_wind_and_the_sun.short.utf8.TextGrid create mode 100644 tests/data_for_tests/textgrid/parselmouth/the_north_wind_and_the_sun.utf16.TextGrid create mode 100644 tests/data_for_tests/textgrid/parselmouth/the_north_wind_and_the_sun.utf8.TextGrid create mode 100644 tests/data_for_tests/textgrid/praatIO/all_tiers_have_the_same_name.TextGrid create mode 100644 tests/data_for_tests/textgrid/praatIO/bobby_phones_elan.TextGrid create mode 100644 tests/data_for_tests/textgrid/praatIO/mary.TextGrid create mode 100644 tests/data_for_tests/textgrid/praatIO/mary_longfile.TextGrid create mode 100644 tests/data_for_tests/textgrid/praatIO/mary_longfile_with_negative_zero.TextGrid create mode 100644 tests/data_for_tests/textgrid/praatIO/mary_with_constrained_tier_times.TextGrid create mode 100644 tests/data_for_tests/textgrid/praatIO/mary_with_negative_zero.TextGrid create mode 100644 tests/data_for_tests/textgrid/textgrids.json delete mode 100644 tests/test_formats/test_seq/test_textgrid.py rename {src/crowsetta/_vendor => tests/test_formats/test_seq/test_textgrid}/__init__.py (100%) create mode 100644 tests/test_formats/test_seq/test_textgrid/test_classes.py create mode 100644 tests/test_formats/test_seq/test_textgrid/test_parse.py create mode 100644 tests/test_formats/test_seq/test_textgrid/test_textgrid.py diff --git a/doc/formats/seq/textgrid.md b/doc/formats/seq/textgrid.md index 8b395dc0..10608ea2 100644 --- a/doc/formats/seq/textgrid.md +++ b/doc/formats/seq/textgrid.md @@ -6,12 +6,7 @@ Annotation format saved by the [Praat](https://www.fon.hum.uva.nl/praat/) applic More details about annotating with Praat can be found here: The specification for TextGrid objects is here: - - -Internally, crowsetta uses the Python tool `textgrid` -() to load .TextGrid files. -A version is distributed with crowsetta -under [MIT license](https://github.com/kylebgorman/textgrid/blob/master/LICENSE). + The annotations can be loaded with the following class: {py:class}`crowsetta.formats.seq.textgrid.TextGrid`. diff --git a/src/crowsetta/_vendor/textgrid/AUTHORS b/src/crowsetta/_vendor/textgrid/AUTHORS deleted file mode 100644 index f10cd545..00000000 --- a/src/crowsetta/_vendor/textgrid/AUTHORS +++ /dev/null @@ -1,5 +0,0 @@ -Max Bane , University of Chicago -Kyle Gorman , University of Pennsylvania -Morgan Sonderegger - -KG designed this for personal use in 2007, circulated it in 2008, and turned the original methods into Python classes in 2009. MS and MB began fixing bugs and contributing functions for particular use cases in 2011. diff --git a/src/crowsetta/_vendor/textgrid/LICENSE b/src/crowsetta/_vendor/textgrid/LICENSE deleted file mode 100644 index f1f2f199..00000000 --- a/src/crowsetta/_vendor/textgrid/LICENSE +++ /dev/null @@ -1,7 +0,0 @@ -Copyright (c) 2011-2014 Kyle Gorman, Max Bane, Morgan Sonderegger - -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/src/crowsetta/_vendor/textgrid/README.md b/src/crowsetta/_vendor/textgrid/README.md deleted file mode 100644 index e5d53f71..00000000 --- a/src/crowsetta/_vendor/textgrid/README.md +++ /dev/null @@ -1,27 +0,0 @@ -textgrid.py -=========== - -Python classes for Praat TextGrid and TextTier files (and HTK .mlf files) - -Kyle Gorman and contributors (see commit history). - -How to cite: ------------- - -While you don't have to, if you want to cite textgrid.py in a publication, include a footnote link to the source: - - http://github.com/kylebgorman/textgrid.py/ - -How to install: ---------------- - -The code can be placed in your working directory or in your `$PYTHONPATH`, and then imported in your Python script. You also can install it via `pip`, like so: - - pip install textgrid - -(if you're not working in a virtualenv, you may need to do this with `sudo`.) - -Synopsis: ---------- - -See the docstrings in `textgrid.py` diff --git a/src/crowsetta/_vendor/textgrid/__init__.py b/src/crowsetta/_vendor/textgrid/__init__.py deleted file mode 100644 index 8c7943f8..00000000 --- a/src/crowsetta/_vendor/textgrid/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .textgrid import MLF, Interval, IntervalTier, Point, PointTier, TextGrid diff --git a/src/crowsetta/_vendor/textgrid/exceptions.py b/src/crowsetta/_vendor/textgrid/exceptions.py deleted file mode 100644 index 5e3aced9..00000000 --- a/src/crowsetta/_vendor/textgrid/exceptions.py +++ /dev/null @@ -1,2 +0,0 @@ -class TextGridError(Exception): - pass diff --git a/src/crowsetta/_vendor/textgrid/textgrid.py b/src/crowsetta/_vendor/textgrid/textgrid.py deleted file mode 100644 index 86bfa1df..00000000 --- a/src/crowsetta/_vendor/textgrid/textgrid.py +++ /dev/null @@ -1,856 +0,0 @@ -#!/usr/bin/env python -O -# -# Copyright (c) 2011-2016 Kyle Gorman, Max Bane, Morgan Sonderegger -# -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be included -# in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE -# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -# -# textgrid.py: classes for Praat TextGrid and HTK mlf files -# -# Max Bane -# Kyle Gorman -# Morgan Sonderegger - -from __future__ import print_function - -import codecs -import os.path -import re -from bisect import bisect_left - -from .exceptions import TextGridError - -DEFAULT_TEXTGRID_PRECISION = 5 -DEFAULT_MLF_PRECISION = 5 - - -def _getMark(text, short): - """ - Return the mark or text entry on a line. Praat escapes double-quotes - by doubling them, so doubled double-quotes are read as single - double-quotes. Newlines within an entry are allowed. - """ - - line = text.readline() - - # check that the line begins with a valid entry type - if not short and not re.match(r'^\s*(text|mark) = "', line): - raise ValueError("Bad entry: " + line) - - # read until the number of double-quotes is even - while line.count('"') % 2: - next_line = text.readline() - - if not next_line: - raise EOFError("Bad entry: " + line[:20] + "...") - - line += next_line - if short: - pattern = r'^"(.*?)"\s*$' - else: - pattern = r'^\s*(text|mark) = "(.*?)"\s*$' - entry = re.match(pattern, line, re.DOTALL) - - return entry.groups()[-1].replace('""', '"') - - -def _formatMark(text): - return text.replace('"', '""') - - -def detectEncoding(f): - """ - This helper method returns the file encoding corresponding to path f. - This handles UTF-8, which is itself an ASCII extension, so also ASCII. - """ - encoding = "ascii" - try: - with codecs.open(f, "r", encoding="utf-16") as source: - source.readline() # Read one line to ensure correct encoding - except UnicodeError: - try: - with codecs.open( - f, "r", encoding="utf-8-sig" - ) as source: # revised utf-8 to utf-8-sig for utf-8 with byte order mark (BOM) - source.readline() # Read one line to ensure correct encoding - except UnicodeError: - with codecs.open(f, "r", encoding="ascii") as source: - source.readline() # Read one line to ensure correct encoding - else: - encoding = "utf-8-sig" # revised utf-8 to utf-8-sig for utf-8 with byte order mark (BOM) - else: - encoding = "utf-16" - - return encoding - - -class Point(object): - """ - Represents a point in time with an associated textual mark, as stored - in a PointTier. - - """ - - def __init__(self, time, mark): - self.time = time - self.mark = mark - - def __repr__(self): - return "Point({0}, {1})".format(self.time, self.mark if self.mark else None) - - def __lt__(self, other): - if hasattr(other, "time"): - return self.time < other.time - elif hasattr(other, "minTime"): - return self.time < other.minTime - else: - return self.time < other - - def __gt__(self, other): - if hasattr(other, "time"): - return self.time > other.time - elif hasattr(other, "maxTime"): - return self.time > other.maxTime - else: - return self.time > other - - def __eq__(self, other): - if isinstance(other, Point): - return self.time == other.time - elif isinstance(other, Interval): - return other.minTime < self.time < other.maxTime - else: - return self.time == other - - def __gte__(self, other): - return self > other or self == other - - def __lte__(self, other): - return self < other or self == other - - def __cmp__(self, other): - """ - In addition to the obvious semantics, Point/Interval comparison is - 0 iff the point is inside the interval (non-inclusively), if you - need inclusive membership, use Interval.__contains__ - """ - if hasattr(other, "time"): - return cmp(self.time, other.time) - elif hasattr(other, "minTime") and hasattr(other, "maxTime"): - return cmp(self.time, other.minTime) + cmp(self.time, other.maxTime) - else: # hopefully numerical - return cmp(self.time, other) - - def __iadd__(self, other): - self.time += other - - def __isub__(self, other): - self.time -= other - - -def decode(string): - """ - Decode HTK's mangling of UTF-8 strings into something useful - """ - # print(string) - return string - return string.decode("string_escape").decode("UTF-8") - - -class Interval(object): - """ - Represents an interval of time, with an associated textual mark, as - stored in an IntervalTier. - - """ - - def __init__(self, minTime, maxTime, mark): - if minTime >= maxTime: - # Praat does not support intervals with duration <= 0 - raise ValueError(minTime, maxTime) - self.minTime = minTime - self.maxTime = maxTime - self.mark = mark - self.strict = True - - def __repr__(self): - return "Interval({0}, {1}, {2})".format(self.minTime, self.maxTime, self.mark if self.mark else None) - - def duration(self): - """ - Returns the duration of the interval in seconds. - """ - return self.maxTime - self.minTime - - def __lt__(self, other): - if hasattr(other, "minTime"): - if self.strict and self.overlaps(other): - raise (ValueError(self, other)) - return self.minTime < other.minTime - elif hasattr(other, "time"): - return self.maxTime < other.time - else: - return self.maxTime < other - - def __gt__(self, other): - if hasattr(other, "maxTime"): - if self.strict and self.overlaps(other): - raise (ValueError(self, other)) - return self.maxTime > other.maxTime - elif hasattr(other, "time"): - return self.minTime > other.time - else: - return self.minTime > other - - def __gte__(self, other): - return self > other or self == other - - def __lte__(self, other): - return self < other or self == other - - def __cmp__(self, other): - if hasattr(other, "minTime") and hasattr(other, "maxTime"): - if self.overlaps(other): - raise ValueError(self, other) - # this returns the two intervals, so user can patch things - # up if s/he so chooses - return cmp(self.minTime, other.minTime) - elif hasattr(other, "time"): # comparing Intervals and Points - return cmp(self.minTime, other.time) + cmp(self.maxTime, other.time) - else: - return cmp(self.minTime, other) + cmp(self.maxTime, other) - - def __eq__(self, other): - """ - This might seem superfluous but not that a ValueError will be - raised if you compare two intervals to each other...not anymore - """ - if hasattr(other, "minTime") and hasattr(other, "maxTime"): - if self.minTime == other.minTime: - if self.maxTime == other.maxTime: - return True - elif hasattr(other, "time"): - return self.minTime < other.time < self.maxTime - else: - return False - - def __iadd__(self, other): - self.minTime += other - self.maxTime += other - - def __isub__(self, other): - self.minTime -= other - self.maxTime -= other - - def overlaps(self, other): - """ - Tests whether self overlaps with the given interval. Symmetric. - See: http://www.rgrjr.com/emacs/overlap.html - """ - return other.minTime < self.maxTime and self.minTime < other.maxTime - - def __contains__(self, other): - """ - Tests whether the given time point is contained in this interval, - either a numeric type or a Point object. - """ - if hasattr(other, "minTime") and hasattr(other, "maxTime"): - return self.minTime <= other.minTime and other.maxTime <= self.maxTime - elif hasattr(other, "time"): - return self.minTime <= other.time <= self.maxTime - else: - return self.minTime <= other <= self.maxTime - - def bounds(self): - return (self.minTime, self.maxTime) - - -class PointTier(object): - """ - Represents Praat PointTiers (also called TextTiers) as list of Points - (e.g., for point in pointtier). A PointTier is used much like a Python - set in that it has add/remove methods, not append/extend methods. - - """ - - def __init__(self, name=None, minTime=0.0, maxTime=None): - self.name = name - self.minTime = minTime - self.maxTime = maxTime - self.points = [] - - def __str__(self): - return "".format(self.name, len(self)) - - def __repr__(self): - return "PointTier({0}, {1})".format(self.name, self.points) - - def __iter__(self): - return iter(self.points) - - def __len__(self): - return len(self.points) - - def __getitem__(self, i): - return self.points[i] - - def add(self, time, mark): - """ - constructs a Point and adds it to the PointTier, maintaining order - """ - self.addPoint(Point(time, mark)) - - def addPoint(self, point): - if point < self.minTime: - raise ValueError(self.minTime) # too early - if self.maxTime and point > self.maxTime: - raise ValueError(self.maxTime) # too late - i = bisect_left(self.points, point) - if i < len(self.points) and self.points[i].time == point.time: - raise ValueError(point) # we already got one right there - self.points.insert(i, point) - - def remove(self, time, mark): - """ - removes a constructed Point i from the PointTier - """ - self.removePoint(Point(time, mark)) - - def removePoint(self, point): - self.points.remove(point) - - def read(self, f, round_digits=DEFAULT_TEXTGRID_PRECISION): - """ - Read the Points contained in the Praat-formated PointTier/TextTier - file indicated by string f - """ - encoding = detectEncoding(f) - with codecs.open(f, "r", encoding=encoding) as source: - file_type, short = parse_header(source) - if file_type != "TextTier": - raise TextGridError("The file could not be parsed as a PointTier as it is lacking a proper header.") - - self.minTime = parse_line(source.readline(), short, round_digits) - self.maxTime = parse_line(source.readline(), short, round_digits) - n = int(parse_line(source.readline(), short, round_digits)) - for i in range(n): - source.readline().rstrip() # header - itim = parse_line(source.readline(), short, round_digits) - imrk = _getMark(source, short) - self.points.append(Point(itim, imrk)) - - def write(self, f): - """ - Write the current state into a Praat-format PointTier/TextTier - file. f may be a file object to write to, or a string naming a - path for writing - """ - sink = f if hasattr(f, "write") else codecs.open(f, "w", "UTF-8") - print('File type = "ooTextFile"', file=sink) - print('Object class = "TextTier"\n', file=sink) - - print("xmin = {0}".format(self.minTime), file=sink) - print("xmax = {0}".format(self.maxTime if self.maxTime else self.points[-1].time), file=sink) - print("points: size = {0}".format(len(self)), file=sink) - for i, point in enumerate(self.points, 1): - print("points [{0}]:".format(i), file=sink) - print("\ttime = {0}".format(point.time), file=sink) - mark = _formatMark(point.mark) - print('\tmark = "{0}"'.format(mark), file=sink) - sink.close() - - def bounds(self): - return (self.minTime, self.maxTime or self.points[-1].time) - - # alternative constructor - - @classmethod - def fromFile(cls, f, name=None): - pt = cls(name=name) - pt.read(f) - return pt - - -class IntervalTier(object): - """ - Represents Praat IntervalTiers as list of sequence types of Intervals - (e.g., for interval in intervaltier). An IntervalTier is used much like a - Python set in that it has add/remove methods, not append/extend methods. - - """ - - def __init__(self, name=None, minTime=0.0, maxTime=None): - self.name = name - self.minTime = minTime - self.maxTime = maxTime - self.intervals = [] - self.strict = True - - def __str__(self): - return "".format(self.name, len(self)) - - def __repr__(self): - return "IntervalTier({0}, {1})".format(self.name, self.intervals) - - def __iter__(self): - return iter(self.intervals) - - def __len__(self): - return len(self.intervals) - - def __getitem__(self, i): - return self.intervals[i] - - def add(self, minTime, maxTime, mark): - interval = Interval(minTime, maxTime, mark) - interval.strict = self.strict - self.addInterval(interval) - - def addInterval(self, interval): - if interval.minTime < self.minTime: # too early - raise ValueError(self.minTime) - if self.maxTime and interval.maxTime > self.maxTime: # too late - # raise ValueError, self.maxTime - raise ValueError(self.maxTime) - i = bisect_left(self.intervals, interval) - if i != len(self.intervals) and self.intervals[i] == interval: - raise ValueError(self.intervals[i]) - interval.strict = self.strict - self.intervals.insert(i, interval) - - def remove(self, minTime, maxTime, mark): - self.removeInterval(Interval(minTime, maxTime, mark)) - - def removeInterval(self, interval): - self.intervals.remove(interval) - - def indexContaining(self, time): - """ - Returns the index of the interval containing the given time point, - or None if the time point is outside the bounds of this tier. The - argument can be a numeric type, or a Point object. - """ - i = bisect_left(self.intervals, time) - if i != len(self.intervals): - if self.intervals[i].minTime <= time <= self.intervals[i].maxTime: - return i - - def intervalContaining(self, time): - """ - Returns the interval containing the given time point, or None if - the time point is outside the bounds of this tier. The argument - can be a numeric type, or a Point object. - """ - i = self.indexContaining(time) - if i: - return self.intervals[i] - - def read(self, f, round_digits=DEFAULT_TEXTGRID_PRECISION): - """ - Read the Intervals contained in the Praat-formated IntervalTier - file indicated by string f - """ - encoding = detectEncoding(f) - with codecs.open(f, "r", encoding=encoding) as source: - file_type, short = parse_header(source) - if file_type != "IntervalTier": - raise TextGridError("The file could not be parsed as a IntervalTier as it is lacking a proper header.") - - self.minTime = parse_line(source.readline(), short, round_digits) - self.maxTime = parse_line(source.readline(), short, round_digits) - n = int(parse_line(source.readline(), short, round_digits)) - for i in range(n): - source.readline().rstrip() # header - imin = parse_line(source.readline(), short, round_digits) - imax = parse_line(source.readline(), short, round_digits) - imrk = _getMark(source, short) - self.intervals.append(Interval(imin, imax, imrk)) - - def _fillInTheGaps(self, null): - """ - Returns a pseudo-IntervalTier with the temporal gaps filled in - """ - prev_t = self.minTime - output = [] - for interval in self.intervals: - if prev_t < interval.minTime: - output.append(Interval(prev_t, interval.minTime, null)) - output.append(interval) - prev_t = interval.maxTime - # last interval - if self.maxTime is not None and prev_t < self.maxTime: # also false if maxTime isn't defined - output.append(Interval(prev_t, self.maxTime, null)) - return output - - def write(self, f, null=""): - """ - Write the current state into a Praat-format IntervalTier file. f - may be a file object to write to, or a string naming a path for - writing - """ - sink = f if hasattr(f, "write") else open(f, "w") - print('File type = "ooTextFile"', file=sink) - print('Object class = "IntervalTier"\n', file=sink) - print("xmin = {0}".format(self.minTime), file=sink) - print("xmax = {0}".format(self.maxTime if self.maxTime else self.intervals[-1].maxTime), file=sink) - # compute the number of intervals and make the empty ones - output = self._fillInTheGaps(null) - # write it all out - print("intervals: size = {0}".format(len(output)), file=sink) - for i, interval in enumerate(output, 1): - print("intervals [{0}]".format(i), file=sink) - print("\txmin = {0}".format(interval.minTime), file=sink) - print("\txmax = {0}".format(interval.maxTime), file=sink) - mark = _formatMark(interval.mark) - print('\ttext = "{0}"'.format(mark), file=sink) - sink.close() - - def bounds(self): - return self.minTime, self.maxTime or self.intervals[-1].maxTime - - # alternative constructor - - @classmethod - def fromFile(cls, f, name=None): - it = cls(name=name) - it.intervals = [] - it.read(f) - return it - - -def parse_line(line, short, to_round): - line = line.strip() - if short: - if '"' in line: - return line[1:-1] - return round(float(line), to_round) - if '"' in line: - m = re.match(r'.+? = "(.*)"', line) - return m.groups()[0] - m = re.match(r".+? = (.*)", line) - return round(float(m.groups()[0]), to_round) - - -def parse_header(source): - header = source.readline() # header junk - m = re.match(r'File type = "([\w ]+)"', header) - if m is None or not m.groups()[0].startswith("ooTextFile"): - raise TextGridError("The file could not be parsed as a Praat text file as it is lacking a proper header.") - - short = "short" in m.groups()[0] - file_type = parse_line(source.readline(), short, "") # header junk - t = source.readline() # header junk - return file_type, short - - -class TextGrid(object): - """ - Represents Praat TextGrids as list of sequence types of tiers (e.g., - for tier in textgrid), and as map from names to tiers (e.g., - textgrid['tierName']). Whereas the *Tier classes that make up a - TextGrid impose a strict ordering on Points/Intervals, a TextGrid - instance is given order by the user. Like a true Python list, there - are append/extend methods for a TextGrid. - - """ - - def __init__(self, name=None, minTime=0.0, maxTime=None, strict=True): - """ - Construct a TextGrid instance with the given (optional) name - (which is only relevant for MLF stuff). If file is given, it is a - string naming the location of a Praat-format TextGrid file from - which to populate this instance. - """ - self.name = name - self.minTime = minTime - self.maxTime = maxTime - self.tiers = [] - self.strict = strict - - def __str__(self): - return "".format(self.name, len(self)) - - def __repr__(self): - return "TextGrid({0}, {1})".format(self.name, self.tiers) - - def __iter__(self): - return iter(self.tiers) - - def __len__(self): - return len(self.tiers) - - def __getitem__(self, i): - """ - Return the ith tier - """ - return self.tiers[i] - - def getFirst(self, tierName): - """ - Return the first tier with the given name. - """ - for t in self.tiers: - if t.name == tierName: - return t - - def getList(self, tierName): - """ - Return a list of all tiers with the given name. - """ - tiers = [] - for t in self.tiers: - if t.name == tierName: - tiers.append(t) - return tiers - - def getNames(self): - """ - return a list of the names of the intervals contained in this - TextGrid - """ - return [tier.name for tier in self.tiers] - - def append(self, tier): - if self.maxTime is not None and tier.maxTime is not None and tier.maxTime > self.maxTime: - raise ValueError(self.maxTime) # too late - tier.strict = self.strict - for i in tier: - i.strict = self.strict - self.tiers.append(tier) - - def extend(self, tiers): - if min([t.minTime for t in tiers]) < self.minTime: - raise ValueError(self.minTime) # too early - if self.maxTime and max([t.minTime for t in tiers]) > self.maxTime: - raise ValueError(self.maxTime) # too late - self.tiers.extend(tiers) - - def pop(self, i=None): - """ - Remove and return tier at index i (default last). Will raise - IndexError if TextGrid is empty or index is out of range. - """ - return self.tiers.pop(i) if i else self.tiers.pop() - - def read(self, f, round_digits=DEFAULT_TEXTGRID_PRECISION, encoding=None): - """ - Read the tiers contained in the Praat-formatted TextGrid file - indicated by string f. Times are rounded to the specified precision. - """ - if encoding is None: - encoding = detectEncoding(f) - with codecs.open(f, "r", encoding=encoding) as source: - file_type, short = parse_header(source) - if file_type != "TextGrid": - raise TextGridError("The file could not be parsed as a TextGrid as it is lacking a proper header.") - self.minTime = parse_line(source.readline(), short, round_digits) - self.maxTime = parse_line(source.readline(), short, round_digits) - source.readline() # more header junk - if short: - m = int(source.readline().strip()) # will be self.n - else: - m = int(source.readline().strip().split()[2]) # will be self.n - if not short: - source.readline() - for i in range(m): # loop over grids - if not short: - source.readline() - if parse_line(source.readline(), short, round_digits) == "IntervalTier": - inam = parse_line(source.readline(), short, round_digits) - imin = parse_line(source.readline(), short, round_digits) - imax = parse_line(source.readline(), short, round_digits) - itie = IntervalTier(inam, imin, imax) - itie.strict = self.strict - n = int(parse_line(source.readline(), short, round_digits)) - for j in range(n): - if not short: - source.readline().rstrip().split() # header junk - jmin = parse_line(source.readline(), short, round_digits) - jmax = parse_line(source.readline(), short, round_digits) - jmrk = _getMark(source, short) - if jmin < jmax: # non-null - itie.addInterval(Interval(jmin, jmax, jmrk)) - self.append(itie) - else: # pointTier - inam = parse_line(source.readline(), short, round_digits) - imin = parse_line(source.readline(), short, round_digits) - imax = parse_line(source.readline(), short, round_digits) - itie = PointTier(inam) - n = int(parse_line(source.readline(), short, round_digits)) - for j in range(n): - source.readline().rstrip() # header junk - jtim = parse_line(source.readline(), short, round_digits) - jmrk = _getMark(source, short) - itie.addPoint(Point(jtim, jmrk)) - self.append(itie) - - def write(self, f, null=""): - """ - Write the current state into a Praat-format TextGrid file. f may - be a file object to write to, or a string naming a path to open - for writing. - """ - sink = f if hasattr(f, "write") else codecs.open(f, "w", "UTF-8") - print('File type = "ooTextFile"', file=sink) - print('Object class = "TextGrid"\n', file=sink) - print("xmin = {0}".format(self.minTime), file=sink) - # compute max time - maxT = self.maxTime - if not maxT: - maxT = max([t.maxTime if t.maxTime else t[-1].maxTime for t in self.tiers]) - print("xmax = {0}".format(maxT), file=sink) - print("tiers? ", file=sink) - print("size = {0}".format(len(self)), file=sink) - print("item []:", file=sink) - for i, tier in enumerate(self.tiers, 1): - print("\titem [{0}]:".format(i), file=sink) - if tier.__class__ == IntervalTier: - print('\t\tclass = "IntervalTier"', file=sink) - print('\t\tname = "{0}"'.format(tier.name), file=sink) - print("\t\txmin = {0}".format(tier.minTime), file=sink) - print("\t\txmax = {0}".format(maxT), file=sink) - # compute the number of intervals and make the empty ones - output = tier._fillInTheGaps(null) - print("\t\tintervals: size = {0}".format(len(output)), file=sink) - for j, interval in enumerate(output, 1): - print("\t\t\tintervals [{0}]:".format(j), file=sink) - print("\t\t\t\txmin = {0}".format(interval.minTime), file=sink) - print("\t\t\t\txmax = {0}".format(interval.maxTime), file=sink) - mark = _formatMark(interval.mark) - print('\t\t\t\ttext = "{0}"'.format(mark), file=sink) - elif tier.__class__ == PointTier: # PointTier - print('\t\tclass = "TextTier"', file=sink) - print('\t\tname = "{0}"'.format(tier.name), file=sink) - print("\t\txmin = {0}".format(tier.minTime), file=sink) - print("\t\txmax = {0}".format(maxT), file=sink) - print("\t\tpoints: size = {0}".format(len(tier)), file=sink) - for k, point in enumerate(tier, 1): - print("\t\t\tpoints [{0}]:".format(k), file=sink) - print("\t\t\t\ttime = {0}".format(point.time), file=sink) - mark = _formatMark(point.mark) - print('\t\t\t\tmark = "{0}"'.format(mark), file=sink) - sink.close() - - # alternative constructor - - @classmethod - def fromFile(cls, f, name=None): - tg = cls(name=name) - tg.read(f) - return tg - - -class MLF(object): - """ - Read in a HTK .mlf file generated with HVite -o SM and turn it into a - list of TextGrids. The resulting class can be iterated over to give - one TextGrid at a time, or the write(prefix='') class method can be - used to write all the resulting TextGrids into separate files. - - Unlike other classes, this is always initialized from a text file. - """ - - def __init__(self, f, samplerate=10e6): - self.grids = [] - self.read(f, samplerate) - - def __iter__(self): - return iter(self.grids) - - def __str__(self): - return "".format(len(self)) - - def __repr__(self): - return "MLF({0})".format(self.grids) - - def __len__(self): - return len(self.grids) - - def __getitem__(self, i): - """ - Return the ith TextGrid - """ - return self.grids[i] - - def read(self, f, samplerate, round_digits=DEFAULT_MLF_PRECISION): - source = open(f, "r") # HTK returns ostensible ASCII - - source.readline() # header - while True: # loop over text - name = re.match(r"\"(.*)\"", source.readline().rstrip()) - if name: - name = name.groups()[0] - grid = TextGrid(name) - phon = IntervalTier(name="phones") - word = IntervalTier(name="words") - wmrk = "" - wsrt = 0.0 - wend = 0.0 - while 1: # loop over the lines in each grid - line = source.readline().rstrip().split() - if len(line) == 4: # word on this baby - pmin = round(float(line[0]) / samplerate, round_digits) - pmax = round(float(line[1]) / samplerate, round_digits) - if pmin == pmax: - raise ValueError("null duration interval") - phon.add(pmin, pmax, line[2]) - if wmrk: - word.add(wsrt, wend, wmrk) - wmrk = decode(line[3]) - wsrt = pmin - wend = pmax - elif len(line) == 3: # just phone - pmin = round(float(line[0]) / samplerate, round_digits) - pmax = round(float(line[1]) / samplerate, round_digits) - if line[2] == "sp" and pmin != pmax: - if wmrk: - word.add(wsrt, wend, wmrk) - wmrk = decode(line[2]) - wsrt = pmin - wend = pmax - elif pmin != pmax: - phon.add(pmin, pmax, line[2]) - wend = pmax - else: # it's a period - word.add(wsrt, wend, wmrk) - self.grids.append(grid) - break - grid.append(phon) - grid.append(word) - else: - source.close() - break - - def write(self, prefix=""): - """ - Write the current state into Praat-formatted TextGrids. The - filenames that the output is stored in are taken from the HTK - label files. If a string argument is given, then the any prefix in - the name of the label file (e.g., "mfc/myLabFile.lab"), it is - truncated and files are written to the directory given by the - prefix. An IOError will result if the folder does not exist. - - The number of TextGrids is returned. - """ - for grid in self.grids: - (junk, tail) = os.path.split(grid.name) - (root, junk) = os.path.splitext(tail) - my_path = os.path.join(prefix, root + ".TextGrid") - grid.write(codecs.open(my_path, "w", "UTF-8")) - return len(self.grids) diff --git a/src/crowsetta/data/data.py b/src/crowsetta/data/data.py index 72c459b9..2260033d 100644 --- a/src/crowsetta/data/data.py +++ b/src/crowsetta/data/data.py @@ -71,7 +71,7 @@ class ExampleAnnotFile: "simple-seq": FormatPathArgs( package="crowsetta.data.simple", resource="bl26lb16_190412_0721.20144_annotations.csv" ), - "textgrid": FormatPathArgs(package="crowsetta.data.textgrid", resource="1179.TextGrid"), + "textgrid": FormatPathArgs(package="crowsetta.data.textgrid", resource="AVO-maea-basic.TextGrid"), "timit": FormatPathArgs(package="crowsetta.data.timit", resource="sa1.phn"), } diff --git a/src/crowsetta/data/textgrid/1179.TextGrid b/src/crowsetta/data/textgrid/1179.TextGrid deleted file mode 100644 index b7a92db0..00000000 --- a/src/crowsetta/data/textgrid/1179.TextGrid +++ /dev/null @@ -1,578 +0,0 @@ -File type = "ooTextFile" -Object class = "TextGrid" - -xmin = 0 -xmax = 320.40333333333336 -tiers? -size = 1 -item []: - item [1]: - class = "IntervalTier" - name = "Cavi1179" - xmin = 0 - xmax = 320.40333333333336 - intervals: size = 141 - intervals [1]: - xmin = 0 - xmax = 2.0744434001240704 - text = "" - intervals [2]: - xmin = 2.0744434001240704 - xmax = 2.451854126334473 - text = "ca" - intervals [3]: - xmin = 2.451854126334473 - xmax = 5.575679484347052 - text = "" - intervals [4]: - xmin = 5.575679484347052 - xmax = 5.901152954656941 - text = "ck" - intervals [5]: - xmin = 5.901152954656941 - xmax = 16.273286825711253 - text = "" - intervals [6]: - xmin = 16.273286825711253 - xmax = 16.622997682108046 - text = "ca" - intervals [7]: - xmin = 16.622997682108046 - xmax = 19.05778877850714 - text = "" - intervals [8]: - xmin = 19.05778877850714 - xmax = 19.43519950471754 - text = "cl" - intervals [9]: - xmin = 19.43519950471754 - xmax = 21.208656209316754 - text = "" - intervals [10]: - xmin = 21.208656209316754 - xmax = 21.53759216335334 - text = "ck" - intervals [11]: - xmin = 21.53759216335334 - xmax = 23.92390848757862 - text = "" - intervals [12]: - xmin = 23.92390848757862 - xmax = 24.290931762608917 - text = "cl" - intervals [13]: - xmin = 24.290931762608917 - xmax = 27.331657511180676 - text = "" - intervals [14]: - xmin = 27.331657511180676 - xmax = 27.67444340012407 - text = "ck" - intervals [15]: - xmin = 27.67444340012407 - xmax = 30.310058552671816 - text = "" - intervals [16]: - xmin = 30.310058552671816 - xmax = 30.69439424633562 - text = "cl" - intervals [17]: - xmin = 30.69439424633562 - xmax = 33.18112259863523 - text = "" - intervals [18]: - xmin = 33.18112259863523 - xmax = 33.513521036398515 - text = "ck" - intervals [19]: - xmin = 33.513521036398515 - xmax = 36.46768469180275 - text = "" - intervals [20]: - xmin = 36.46768469180275 - xmax = 36.85202038546656 - text = "ca" - intervals [21]: - xmin = 36.85202038546656 - xmax = 41.2846645921721 - text = "" - intervals [22]: - xmin = 41.2846645921721 - xmax = 41.599750611301886 - text = "ck" - intervals [23]: - xmin = 41.599750611301886 - xmax = 44.24575321502974 - text = "" - intervals [24]: - xmin = 44.24575321502974 - xmax = 44.609314006333335 - text = "ca" - intervals [25]: - xmin = 44.609314006333335 - xmax = 46.798268758136665 - text = "" - intervals [26]: - xmin = 46.798268758136665 - xmax = 47.12720471217325 - text = "ck" - intervals [27]: - xmin = 47.12720471217325 - xmax = 49.35770926869699 - text = "" - intervals [28]: - xmin = 49.35770926869699 - xmax = 49.71434509254719 - text = "cl" - intervals [29]: - xmin = 49.71434509254719 - xmax = 52.917807576273894 - text = "" - intervals [30]: - xmin = 52.917807576273894 - xmax = 53.25020601403718 - text = "ck" - intervals [31]: - xmin = 53.25020601403718 - xmax = 55.44608573329391 - text = "" - intervals [32]: - xmin = 55.44608573329391 - xmax = 55.80964652459751 - text = "cl" - intervals [33]: - xmin = 55.80964652459751 - xmax = 57.866361858257875 - text = "" - intervals [34]: - xmin = 57.866361858257875 - xmax = 58.236847617014874 - text = "ca" - intervals [35]: - xmin = 58.236847617014874 - xmax = 60.484664592172116 - text = "" - intervals [36]: - xmin = 60.484664592172116 - xmax = 60.820525513662105 - text = "ck" - intervals [37]: - xmin = 60.820525513662105 - xmax = 63.61541491763809 - text = "" - intervals [38]: - xmin = 63.61541491763809 - xmax = 63.99975061130189 - text = "cl" - intervals [39]: - xmin = 63.99975061130189 - xmax = 67.34171244409664 - text = "" - intervals [40]: - xmin = 67.34171244409664 - xmax = 67.66026094695313 - text = "ck" - intervals [41]: - xmin = 67.66026094695313 - xmax = 69.29521830248432 - text = "" - intervals [42]: - xmin = 69.29521830248432 - xmax = 69.66916654496801 - text = "cl" - intervals [43]: - xmin = 69.66916654496801 - xmax = 72.14896992981421 - text = "" - intervals [44]: - xmin = 72.14896992981421 - xmax = 72.4675184326707 - text = "ck" - intervals [45]: - xmin = 72.4675184326707 - xmax = 75.03388391068444 - text = "" - intervals [46]: - xmin = 75.03388391068444 - xmax = 75.40783215316814 - text = "ca" - intervals [47]: - xmin = 75.40783215316814 - xmax = 77.84608573329393 - text = "" - intervals [48]: - xmin = 77.84608573329393 - xmax = 78.17848417105722 - text = "ck" - intervals [49]: - xmin = 78.17848417105722 - xmax = 81.74550744608752 - text = "" - intervals [50]: - xmin = 81.74550744608752 - xmax = 82.06751843267071 - text = "ck" - intervals [51]: - xmin = 82.06751843267071 - xmax = 87.03058768608983 - text = "" - intervals [52]: - xmin = 87.03058768608983 - xmax = 87.40107344484683 - text = "ca" - intervals [53]: - xmin = 87.40107344484683 - xmax = 90.98194665478394 - text = "" - intervals [54]: - xmin = 90.98194665478394 - xmax = 91.36628234844774 - text = "cl" - intervals [55]: - xmin = 91.36628234844774 - xmax = 93.52061226298406 - text = "" - intervals [56]: - xmin = 93.52061226298406 - xmax = 93.88071057056096 - text = "ca" - intervals [57]: - xmin = 93.88071057056096 - xmax = 95.17567948434711 - text = "" - intervals [58]: - xmin = 95.17567948434711 - xmax = 95.501152954657 - text = "ck" - intervals [59]: - xmin = 95.501152954657 - xmax = 98.15061804211156 - text = "" - intervals [60]: - xmin = 98.15061804211156 - xmax = 98.52456628459525 - text = "cl" - intervals [61]: - xmin = 98.52456628459525 - xmax = 101.38524287937855 - text = "" - intervals [62]: - xmin = 101.38524287937855 - xmax = 101.76265360558895 - text = "ca" - intervals [63]: - xmin = 101.76265360558895 - xmax = 160.13478747664317 - text = "" - intervals [64]: - xmin = 160.13478747664317 - xmax = 160.49834826794677 - text = "ci" - intervals [65]: - xmin = 160.49834826794677 - xmax = 162.43107922397422 - text = "" - intervals [66]: - xmin = 162.43107922397422 - xmax = 162.7877150478244 - text = "cp" - intervals [67]: - xmin = 162.7877150478244 - xmax = 166.06388968981182 - text = "" - intervals [68]: - xmin = 166.06388968981182 - xmax = 166.43437544856883 - text = "ci" - intervals [69]: - xmin = 166.43437544856883 - xmax = 167.95160835739213 - text = "" - intervals [70]: - xmin = 167.95160835739213 - xmax = 168.29785673006222 - text = "ch" - intervals [71]: - xmin = 168.29785673006222 - xmax = 170.32753723043731 - text = "" - intervals [72]: - xmin = 170.32753723043731 - xmax = 170.69456050546762 - text = "da" - intervals [73]: - xmin = 170.69456050546762 - xmax = 172.4465772711783 - text = "" - intervals [74]: - xmin = 172.4465772711783 - xmax = 172.820525513662 - text = "ci" - intervals [75]: - xmin = 172.820525513662 - xmax = 174.47279528782664 - text = "" - intervals [76]: - xmin = 174.47279528782664 - xmax = 174.82250614422344 - text = "ch" - intervals [77]: - xmin = 174.82250614422344 - xmax = 175.7746891690662 - text = "" - intervals [78]: - xmin = 175.7746891690662 - xmax = 176.1451749278232 - text = "da" - intervals [79]: - xmin = 176.1451749278232 - xmax = 177.73858247863396 - text = "" - intervals [80]: - xmin = 177.73858247863396 - xmax = 178.09868078621085 - text = "ci" - intervals [81]: - xmin = 178.09868078621085 - xmax = 179.0058515226065 - text = "" - intervals [82]: - xmin = 179.0058515226065 - xmax = 179.3694123139101 - text = "cp" - intervals [83]: - xmin = 179.3694123139101 - xmax = 180.78623319465913 - text = "" - intervals [84]: - xmin = 180.78623319465913 - xmax = 182.41360054620858 - text = "Chatter" - intervals [85]: - xmin = 182.41360054620858 - xmax = 193.05993566820052 - text = "" - intervals [86]: - xmin = 193.05993566820052 - xmax = 193.42003397577741 - text = "cp" - intervals [87]: - xmin = 193.42003397577741 - xmax = 196.74814587366535 - text = "" - intervals [88]: - xmin = 196.74814587366535 - xmax = 197.09439424633544 - text = "ch" - intervals [89]: - xmin = 197.09439424633544 - xmax = 201.225802368818 - text = "" - intervals [90]: - xmin = 201.225802368818 - xmax = 201.57205074148808 - text = "ch" - intervals [91]: - xmin = 201.57205074148808 - xmax = 203.95836706571336 - text = "" - intervals [92]: - xmin = 203.95836706571336 - xmax = 204.32192785701696 - text = "ch" - intervals [93]: - xmin = 204.32192785701696 - xmax = 206.62514457180137 - text = "" - intervals [94]: - xmin = 206.62514457180137 - xmax = 206.99216784683168 - text = "ch" - intervals [95]: - xmin = 206.99216784683168 - xmax = 210.2475675864589 - text = "" - intervals [96]: - xmin = 210.2475675864589 - xmax = 210.6111283777625 - text = "ch" - intervals [97]: - xmin = 210.6111283777625 - xmax = 213.2398185628568 - text = "" - intervals [98]: - xmin = 213.2398185628568 - xmax = 213.596454386707 - text = "ch" - intervals [99]: - xmin = 213.596454386707 - xmax = 219.927204712173 - text = "" - intervals [100]: - xmin = 219.927204712173 - xmax = 220.29769047093 - text = "ci" - intervals [101]: - xmin = 220.29769047093 - xmax = 226.22333020037192 - text = "" - intervals [102]: - xmin = 226.22333020037192 - xmax = 226.569578573042 - text = "ch" - intervals [103]: - xmin = 226.569578573042 - xmax = 229.23289359540337 - text = "" - intervals [104]: - xmin = 229.23289359540337 - xmax = 229.60684183788706 - text = "da" - intervals [105]: - xmin = 229.60684183788706 - xmax = 231.5361103101878 - text = "" - intervals [106]: - xmin = 231.5361103101878 - xmax = 231.9100585526715 - text = "ci" - intervals [107]: - xmin = 231.9100585526715 - xmax = 234.47988651441193 - text = "" - intervals [108]: - xmin = 234.47988651441193 - xmax = 234.8226724033553 - text = "ch" - intervals [109]: - xmin = 234.8226724033553 - xmax = 236.5573767504325 - text = "" - intervals [110]: - xmin = 236.5573767504325 - xmax = 236.82745048111516 - text = "db" - intervals [111]: - xmin = 236.82745048111516 - xmax = 239.0648800050923 - text = "" - intervals [112]: - xmin = 239.0648800050923 - xmax = 239.4111283777624 - text = "ch" - intervals [113]: - xmin = 239.4111283777624 - xmax = 242.09868078621065 - text = "" - intervals [114]: - xmin = 242.09868078621065 - xmax = 242.47262902869434 - text = "ci" - intervals [115]: - xmin = 242.47262902869434 - xmax = 246.20585152260628 - text = "" - intervals [116]: - xmin = 246.20585152260628 - xmax = 246.55556237900308 - text = "ch" - intervals [117]: - xmin = 246.55556237900308 - xmax = 249.3469892992523 - text = "" - intervals [118]: - xmin = 249.3469892992523 - xmax = 249.7105500905559 - text = "ci" - intervals [119]: - xmin = 249.7105500905559 - xmax = 252.14534118695502 - text = "" - intervals [120]: - xmin = 252.14534118695502 - xmax = 252.50890197825862 - text = "cp" - intervals [121]: - xmin = 252.50890197825862 - xmax = 261.8076658940356 - text = "" - intervals [122]: - xmin = 261.8076658940356 - xmax = 262.1227519131654 - text = "ae" - intervals [123]: - xmin = 262.1227519131654 - xmax = 264.6060177817383 - text = "" - intervals [124]: - xmin = 264.6060177817383 - xmax = 264.8899414473277 - text = "ce" - intervals [125]: - xmin = 264.8899414473277 - xmax = 266.1509505603752 - text = "" - intervals [126]: - xmin = 266.1509505603752 - xmax = 266.466036579505 - text = "ae" - intervals [127]: - xmin = 266.466036579505 - xmax = 279.79925907341686 - text = "" - intervals [128]: - xmin = 279.79925907341686 - xmax = 280.1212700600001 - text = "en" - intervals [129]: - xmin = 280.1212700600001 - xmax = 280.9488036706816 - text = "" - intervals [130]: - xmin = 280.9488036706816 - xmax = 281.26042720608467 - text = "ds" - intervals [131]: - xmin = 281.26042720608467 - xmax = 282.16133801155524 - text = "" - intervals [132]: - xmin = 282.16133801155524 - xmax = 282.41756180733114 - text = "dn" - intervals [133]: - xmin = 282.41756180733114 - xmax = 284.12802876832137 - text = "" - intervals [134]: - xmin = 284.12802876832137 - xmax = 284.699338583227 - text = "ej" - intervals [135]: - xmin = 284.699338583227 - xmax = 287.1306671958995 - text = "" - intervals [136]: - xmin = 287.1306671958995 - xmax = 287.4976904709298 - text = "di" - intervals [137]: - xmin = 287.4976904709298 - xmax = 288.47477591838776 - text = "" - intervals [138]: - xmin = 288.47477591838776 - xmax = 288.8487241608715 - text = "cn" - intervals [139]: - xmin = 288.8487241608715 - xmax = 291.43866198844376 - text = "" - intervals [140]: - xmin = 291.43866198844376 - xmax = 291.71566068657984 - text = "co" - intervals [141]: - xmin = 291.71566068657984 - xmax = 320.40333333333336 - text = "" diff --git a/src/crowsetta/data/textgrid/AVO-maea-basic.TextGrid b/src/crowsetta/data/textgrid/AVO-maea-basic.TextGrid new file mode 100644 index 00000000..2ce9a2ff --- /dev/null +++ b/src/crowsetta/data/textgrid/AVO-maea-basic.TextGrid @@ -0,0 +1,97 @@ +File type = "ooTextFile" +Object class = "TextGrid" + +xmin = 0 +xmax = 2.4360509767904546 +tiers? +size = 3 +item []: + item [1]: + class = "TextTier" + name = "Tones" + xmin = 0 + xmax = 2.4360509767904546 + points: size = 5 + points [1]: + number = 0.4351780385722748 + mark = "L+H*" + points [2]: + number = 0.6407379583230295 + mark = "H-" + points [3]: + number = 1.1328819591093477 + mark = "L+H*" + points [4]: + number = 1.244041566788134 + mark = "H-" + points [5]: + number = 2.350907447504575 + mark = "L+!H-" + item [2]: + class = "IntervalTier" + name = "Samoan" + xmin = 0 + xmax = 2.4360509767904546 + intervals: size = 7 + intervals [1]: + xmin = 0 + xmax = 0.051451575248407266 + text = "'o" + intervals [2]: + xmin = 0.051451575248407266 + xmax = 0.6407379583230295 + text = "Sione" + intervals [3]: + xmin = 0.6407379583230295 + xmax = 0.7544662733943284 + text = "na" + intervals [4]: + xmin = 0.7544662733943284 + xmax = 1.244041566788134 + text = "tosoa" + intervals [5]: + xmin = 1.244041566788134 + xmax = 1.3481058803597676 + text = "le" + intervals [6]: + xmin = 1.3481058803597676 + xmax = 1.70760078178904 + text = "maea" + intervals [7]: + xmin = 1.70760078178904 + xmax = 2.4360509767904546 + text = "analeila\-^" + item [3]: + class = "IntervalTier" + name = "Gloss" + xmin = 0 + xmax = 2.4360509767904546 + intervals: size = 7 + intervals [1]: + xmin = 0 + xmax = 0.051451575248407266 + text = "PRES" + intervals [2]: + xmin = 0.051451575248407266 + xmax = 0.6407379583230295 + text = "Sione" + intervals [3]: + xmin = 0.6407379583230295 + xmax = 0.7544662733943284 + text = "PAST" + intervals [4]: + xmin = 0.7544662733943284 + xmax = 1.244041566788134 + text = "pull-ES" + intervals [5]: + xmin = 1.244041566788134 + xmax = 1.3481058803597676 + text = "DET" + intervals [6]: + xmin = 1.3481058803597676 + xmax = 1.70760078178904 + text = "rope" + intervals [7]: + xmin = 1.70760078178904 + xmax = 2.4360509767904546 + text = "earlier" diff --git a/src/crowsetta/data/textgrid/citation.txt b/src/crowsetta/data/textgrid/citation.txt index ef3cb863..de884475 100644 --- a/src/crowsetta/data/textgrid/citation.txt +++ b/src/crowsetta/data/textgrid/citation.txt @@ -1,4 +1,6 @@ -Hedley, Richard (2016): Data used in PLoS One article -"Complexity, Predictability and Time Homogeneity of Syntax -in the Songs of Cassin’s Vireo (Vireo cassini)". -figshare. Dataset. https://doi.org/10.6084/m9.figshare.3081814.v1 +Sound files and Praat TextGrids accompanying +Sasha Calhoun, Corinne Seals, Toaga Alefosio and Niusila Faamanatu-Eteuati, + 'Phrasal Prosody of Heritage Speakers of Samoan in Aotearoa New Zealand', + to appear in Rajiv Rao (ed), + The Phonetics and Phonology of Heritage Languages, Cambridge University Press. + https://osf.io/bxasd/ diff --git a/src/crowsetta/formats/seq/textgrid.py b/src/crowsetta/formats/seq/textgrid.py deleted file mode 100644 index be5168ce..00000000 --- a/src/crowsetta/formats/seq/textgrid.py +++ /dev/null @@ -1,194 +0,0 @@ -"""Module with functions for working with Praat TextGrid annotation files - -Uses the Python library ``textgrid``: -https://github.com/kylebgorman/textgrid -A version is distributed with this code (../textgrid) under MIT license. -https://github.com/kylebgorman/textgrid/blob/master/LICENSE -""" -import pathlib -from typing import ClassVar, Optional - -import attr -import numpy as np - -import crowsetta -from crowsetta._vendor import textgrid -from crowsetta.typing import PathLike - - -@crowsetta.interface.SeqLike.register -@attr.define -class TextGrid: - """Class that represents annotations - from TextGrid annotation files - produced by the application Praat. - - See ``Notes`` below for more detail - for more details on the types of - TextGrid annotations that this class - can work with. - - Attributes - ---------- - name: str - Shorthand name for annotation format: ``'textgrid'``. - ext: str - Extension of files in annotation format: ``'.TextGrid'``. - textgrid : textgrid.TextGrid - object that contains annotations from the a '.TextGrid' file. - annot_path : str, pathlib.Path - Path to TextGrid file from which annotations were loaded. - audio_path : str, pathlib.Path - Path to audio file that ``annot_path`` annotates. - - Notes - ----- - Uses the Python library textgrid - https://github.com/kylebgorman/textgrid - - A version is distributed with this code (../textgrid) under MIT license - https://github.com/kylebgorman/textgrid/blob/master/LICENSE - - This class will load any file that the :mod:`~crowsetta._vendor.textgrid` libray can parse, - but it can only convert Praat IntervalTiers to :class:`crowsetta.Sequence` and - :class:`crowsetta.Annotation` instances. - Additionally, it will only convert a single IntervalTier - (that can be specified when calling :meth:`crowsetta.formats.seq.TextGrid.to_seq` - or :meth:`crowsetta.formats.seq.TextGrid.to_annot`). - """ - - name: ClassVar[str] = "textgrid" - ext: ClassVar[str] = ".TextGrid" - - textgrid: textgrid.TextGrid - annot_path: pathlib.Path - audio_path: Optional[pathlib.Path] = attr.field(default=None, converter=attr.converters.optional(pathlib.Path)) - - @classmethod - def from_file( - cls, - annot_path: PathLike, - audio_path: Optional[PathLike] = None, - ) -> "Self": # noqa: F821 - """Load annotations from a TextGrid file - in the format used by Praat. - - Parameters - ---------- - annot_path: str, pathlib.Path - Path to a TextGrid file in the format used by Praat. - audio_path : str. pathlib.Path - Path to audio file that the ``annot_path`` annotates. - Optional, default is None. - - Examples - -------- - >>> example = crowsetta.data.get('textgrid') - >>> textgrid = crowsetta.formats.seq.TextGrid.from_file(example.annot_path) - """ - annot_path = pathlib.Path(annot_path) - crowsetta.validation.validate_ext(annot_path, extension=cls.ext) - - tg = textgrid.TextGrid.fromFile(annot_path) - - return cls(textgrid=tg, annot_path=annot_path, audio_path=audio_path) - - def to_seq(self, interval_tier: int = 0, round_times: bool = True, decimals: int = 3) -> crowsetta.Sequence: - """Convert an IntervalTier from this TextGrid annotation - into a :class:`crowsetta.Sequence`. - - Currently, there is only support for converting a single IntervalTier - to a single :class:`~crowsetta.Sequence`. - - Parameters - ---------- - interval_tier : int - Index of IntervalTier in TextGrid file from which annotations - should be taken. Default is 0, i.e., the first IntervalTier. - Necessary in cases where files have multiple IntervalTiers. - round_times : bool - If True, round times of onsets and offsets. - Default is True. - decimals : int - Number of decimals places to round floating point numbers to. - Only meaningful if round_times is True. - Default is 3, so that times are rounded to milliseconds. - - Returns - ------- - seq : crowsetta.Sequence - - Examples - -------- - >>> example = crowsetta.data.get('textgrid') - >>> textgrid = crowsetta.formats.seq.TextGrid.from_file(example.annot_path) - >>> seq = textgrid.to_seq() - - Notes - ----- - The ``round_times`` and ``decimals`` arguments are provided - to reduce differences across platforms - due to floating point error, e.g. when loading annotation files - and then sending them to a csv file, - the result should be the same on Windows and Linux. - """ - intv_tier = self.textgrid[interval_tier] - if not isinstance(intv_tier, textgrid.IntervalTier): - raise ValueError( - f"Index specified for IntervalTier was {interval_tier}, " - f"but type at that index was {type(intv_tier)}, not an IntervalTier" - ) - - onsets_s = np.asarray([interval.minTime for interval in intv_tier]) - offsets_s = np.asarray([interval.maxTime for interval in intv_tier]) - labels = np.asarray([interval.mark for interval in intv_tier]) - - if round_times: - onsets_s = np.around(onsets_s, decimals=decimals) - offsets_s = np.around(offsets_s, decimals=decimals) - - seq = crowsetta.Sequence.from_keyword(labels=labels, onsets_s=onsets_s, offsets_s=offsets_s) - return seq - - def to_annot(self, interval_tier: int = 0, round_times: bool = True, decimals: int = 3) -> crowsetta.Annotation: - """Convert an IntervalTier from this TextGrid annotation - to a :class:`crowsetta.Annotation`. - - Currently, there is only support for converting a single IntervalTier - to an :class:`~crowsetta.Annotation` with a single :class:`~crowsetta.Sequence`. - - Parameters - ---------- - interval_tier : int - Index of IntervalTier in TextGrid file from which annotations - should be taken. Default is 0, i.e., the first IntervalTier. - Necessary in cases where files have multiple IntervalTiers. - round_times : bool - If True, round times of onsets and offsets. - Default is True. - decimals : int - Number of decimals places to round floating point numbers to. - Only meaningful if round_times is True. - Default is 3, so that times are rounded to milliseconds. - - Returns - ------- - annot : crowsetta.Annotation - - Examples - -------- - >>> example = crowsetta.data.get('textgrid') - >>> textgrid = crowsetta.formats.seq.TextGrid.from_file(example.annot_path) - >>> annot = textgrid.to_annot() - - Notes - ----- - The ``round_times`` and ``decimals`` arguments are provided - to reduce differences across platforms - due to floating point error, e.g. when loading annotation files - and then sending them to a csv file, - the result should be the same on Windows and Linux. - """ - seq = self.to_seq(interval_tier=interval_tier, round_times=round_times, decimals=decimals) - - return crowsetta.Annotation(annot_path=self.annot_path, notated_path=self.audio_path, seq=seq) diff --git a/src/crowsetta/formats/seq/textgrid/__init__.py b/src/crowsetta/formats/seq/textgrid/__init__.py new file mode 100644 index 00000000..dd562794 --- /dev/null +++ b/src/crowsetta/formats/seq/textgrid/__init__.py @@ -0,0 +1,3 @@ +from .textgrid import TextGrid + +__all__ = ["TextGrid"] diff --git a/src/crowsetta/formats/seq/textgrid/classes.py b/src/crowsetta/formats/seq/textgrid/classes.py new file mode 100644 index 00000000..e4968730 --- /dev/null +++ b/src/crowsetta/formats/seq/textgrid/classes.py @@ -0,0 +1,179 @@ +"""Data classes used to represent components of TextGrids. +""" +from __future__ import annotations + +import numpy as np +from attrs import define, field + + +def valid_time(instance, attribute, value): + if not value >= 0.0: + raise ValueError(f"{attribute} is a time and must be a non-negative number but was: {value}") + + +@define +class Interval: + """Class representing an interval in an interval tier + from a Praat TextGrid. + + Attributes + ---------- + xmin: float + Start time of interval, in seconds. + xmax: float + End time of interval, in seconds. + text: str + Label for interval. + + See Also + -------- + :class:`~crowsetta.formats.seq.textgrid.classes.IntervalTier`. + """ + + xmin: float = field(validator=valid_time) + xmax: float = field(validator=valid_time) + text: str + + def __attrs_post_init__(self): + if self.xmax < self.xmin: + raise ValueError(f"xmax must be greater than xmin but xmax was {self.xmax} and xmin was {self.xmin}") + + +@define +class IntervalTier: + """Class representing an *interval tier* in a Praat TextGrid. + + As described in the Praat documentation[1]_: + + An interval tier is a connected sequence of labelled intervals, + with boundaries in between. + + Attributes + ---------- + name: str + A name given to the interval tier, e.g. "phonemes". + xmin: float + Start time of interval tier, in seconds. + xmax: float + End time of interval tier, in seconds. + intervals: list + A list of + :class:`~crowsetta.formats.seq.textgrid.classes.Interval` + instances. + + See Also + -------- + :class:`~crowsetta.formats.seq.textgrid.classes.Interval` + + References + ---------- + .. [^1]: https://www.fon.hum.uva.nl/praat/manual/TextGrid.html + """ + + name: str + xmin: float = field(validator=valid_time) + xmax: float = field(validator=valid_time) + intervals: list[Interval] + + def __attrs_post_init__(self): + if self.xmax < self.xmin: + raise ValueError(f"xmax must be greater than xmin but xmax was {self.xmax} and xmin was {self.xmin}") + + # sort because (1) we want them in ascending order of xmin and + # (2) we use this to check for overlap + self.intervals = sorted(self.intervals, key=lambda interval: interval.xmin) + + xmax_lt_all_xmin = [] + for ind in range(len(self.intervals) - 1): + xmax_lt_all_xmin.append( + all([self.intervals[ind].xmax <= interval.xmin for interval in self.intervals[ind + 1 :]]) # noqa: E203 + ) + + if not all(xmax_lt_all_xmin): + have_overlap = [(ind, self.intervals[ind]) for ind in np.nonzero(xmax_lt_all_xmin)[0]] + err_str = "" + for has_overlap_ind, interval in have_overlap: + overlaps_with = [ + (overlaps_with_ind, self.intervals[overlaps_with_ind]) + for overlaps_with_ind in range(has_overlap_ind + 1, len(self.intervals) - 1) + if not (self.intervals[has_overlap_ind].xmax <= self.intervals[overlaps_with_ind].xmin) + ] + err_str += ( + f"Interval {has_overlap_ind} with xmin {interval.xmin} and xmax {interval.xmax} overlaps with " + ) + for overlaps_with_ind, interval in overlaps_with: + err_str += f"interval {overlaps_with_ind} with xmin {interval.xmin} and xmax {interval.xmax}, " + err_str = err_str[:-2] + ".\n" + + raise ValueError( + "TextGrids with overlapping intervals are not valid.\n" + "Found the following overlapping intervals:\n" + f"{err_str}" + ) + + def __iter__(self): + return iter(self.intervals) + + +@define +class Point: + """Class representing a point in a point tier + from a Praat TextGrid. + + Attributes + ---------- + number: float + Time of point, in seconds. + mark: str + Label for point. + + See Also + -------- + :class:`~crowsetta.formats.seq.textgrid.classes.PointTier`. + """ + + number: float = field(validator=valid_time) + mark: str + + +@define +class PointTier: + """Class representing a *point tier* in a Praat TextGrid. + + As described in the Praat documentation[1]_: + + A point tier is a sequence of labelled points. + + Attributes + ---------- + name: str + A name given to the point tier, e.g. "stimulus onset". + xmin: float + Start time of IntervalTier, in seconds. + xmax: float + End time of IntervalTier, in seconds. + points: list + A list of + :class:`~crowsetta.formats.seq.textgrid.classes.Point` + instances. + + See Also + -------- + :class:`~crowsetta.formats.seq.textgrid.classes.Point` + + References + ---------- + .. [^1]: https://www.fon.hum.uva.nl/praat/manual/TextGrid.html + """ + + name: str + xmin: float = field(validator=valid_time) + xmax: float = field(validator=valid_time) + points: list[Point] + + def __attrs_post_init__(self): + if self.xmax < self.xmin: + raise ValueError(f"xmax must be greater than xmin but xmax was {self.xmax} and xmin was {self.xmin}") + + def __iter__(self): + return iter(self.points) diff --git a/src/crowsetta/formats/seq/textgrid/parse.py b/src/crowsetta/formats/seq/textgrid/parse.py new file mode 100644 index 00000000..44a7ca2f --- /dev/null +++ b/src/crowsetta/formats/seq/textgrid/parse.py @@ -0,0 +1,252 @@ +"""Functions for parsing TextGrid files. + +Code for parsing TextGrids is adapted from several sources, +all under MIT license. +The main logic in +:func:`~crowsetta.formats.seq.textgrid.parse.parse_fp` +is from +which is perhaps the most concise +Python code I have found for parsing TextGrids. +However there are also good ideas in +https://github.com/kylebgorman/textgrid/blob/master/textgrid/textgrid.py +(__getitem__ method) and +https://github.com/timmahrt/praatIO +(data classes, handling encoding). +For some documentation of the binary format see +https://github.com/Legisign/Praat-textgrids +and for a citable library with docs see +https://github.com/hbuschme/TextGridTools +but note that both of these have a GPL license. +""" +from __future__ import annotations + +import pathlib +import re +from typing import Final, TextIO + +from .classes import Interval, IntervalTier, Point, PointTier + +FLOAT_PAT: Final = re.compile(r"([\d.]+)\s*$", flags=re.UNICODE) +INT_PAT: Final = re.compile(r"([\d]+)\s*$", flags=re.UNICODE) +STR_PAT: Final = re.compile(r'"(.*)"\s*$', flags=re.UNICODE) + + +def search_next_line(fp: TextIO, pat: re.Pattern) -> str: + """Get next line from a text stream + and search it for a regex pattern. + + This is a helper function used by + :func:`~crowsetta.textgrid.parse.get_float_from_line`, + :func:`~crowsetta.textgrid.parse.get_int_from_line`, + and :func:`~crowsetta.textgrid.parse.get_str_from_line`. + + Parameters + ---------- + fp : TextIO + Python text stream from an open TextGrid file. + pat : re.Pattern + A complied regex pattern. + + Returns + ------- + match : str + The match string + """ + line = fp.readline() + return pat.search(line).group(1) + + +def get_float_from_next_line(fp: TextIO) -> float: + """Get next line from a text stream, + search for a string that matches a float value, + and return as a float. + + Helper function used by + :func:`~crowsetta.textgrid.parse.parse_fp`, + e.g., to parse ``xmin`` and ``xmax`` times of + ``IntervalTier``s. + + Parameters + ---------- + fp : TextIO + Python text stream from an open TextGrid file. + + Returns + ------- + val : float + """ + return float(search_next_line(fp, pat=FLOAT_PAT)) + + +def get_int_from_next_line(fp: TextIO) -> int: + """Get next line from a text stream, + search for a string that matches an int value, + and return as an int. + + Helper function used by + :func:`~crowsetta.textgrid.parse.parse_fp`, + e.g., to parse the number of intervals in + an interval tier. + + Parameters + ---------- + fp : TextIO + Python text stream from an open TextGrid file. + + Returns + ------- + val : int + """ + return int(search_next_line(fp, pat=INT_PAT)) + + +def get_str_from_next_line(fp: TextIO) -> str: + """Get next line from a text stream, + search for a string as Praat writes them + (with double quoting), + and then return just that string. + + Helper function used by + :func:`~crowsetta.textgrid.parse.parse_fp`, + e.g., to parse ``text``s for ``Interval``s + in ``IntervalTier``s or to parse ``text`` + for ``PointTier``s. + + Parameters + ---------- + fp : TextIO + Python text stream from an open TextGrid file. + + Returns + ------- + val : str + """ + # don't need to cast here + return search_next_line(fp, pat=STR_PAT) + + +INTERVAL_TIER: Final = "IntervalTier" +POINT_TIER: Final = "TextTier" + + +def parse_fp(fp: TextIO, keep_empty: bool = False) -> dict: + """Parse a TextGrid file passed in as an open + text stream, converting it to a :class:`dict`. + + Helper function called by + :func:`~crowsetta.formats.seq.textgrid.parse.parse`. + + Parameters + ---------- + fp : TextIO + Python text stream from an open TextGrid file. + keep_empty : bool + If True, keep intervals in + interval tiers that have empty labels + (i.e., the empty string ""). + Default is False. + + Returns + ------- + tg : dict + A parsed TextGrid as a :class:`dict:. + """ + # Skip the Headers and empty line + for _ in range(3): + fp.readline() + + xmin_tg, xmax_tg = get_float_from_next_line(fp), get_float_from_next_line(fp) + # We don't use next line except to determine format: + # if it's just '' then format is "short", anything else is "full" + line = fp.readline() + is_short = line.strip() == "" + + n_tier = get_int_from_next_line(fp) + if not is_short: + # skip item[]: + fp.readline() + + # make textgrid dict we will return below + tg = { + "xmin": xmin_tg, + "xmax": xmax_tg, + } + + tiers = [] + for i in range(n_tier): + if not is_short: + fp.readline() # skip item[\d]: (where \d is some number) + tier_type = get_str_from_next_line(fp) + tier_name = get_str_from_next_line(fp) + xmin_tier = get_float_from_next_line(fp) + xmax_tier = get_float_from_next_line(fp) + + entries = [] # intervals or points depending on tier type + for i in range(get_int_from_next_line(fp)): + if not is_short: + fp.readline() # skip intervals [\d] + if tier_type == INTERVAL_TIER: + xmin = get_float_from_next_line(fp) + xmax = get_float_from_next_line(fp) + text = get_str_from_next_line(fp) + if not keep_empty: + if text == "": + continue + entry = Interval(xmin=xmin, xmax=xmax, text=text) + elif tier_type == POINT_TIER: + number = get_float_from_next_line(fp) + mark = get_str_from_next_line(fp) + entry = Point( + number=number, + mark=mark, + ) + entries.append(entry) + + if tier_type == INTERVAL_TIER: + tier = IntervalTier(name=tier_name, xmin=xmin_tier, xmax=xmax_tier, intervals=entries) + elif tier_type == POINT_TIER: + tier = PointTier( + name=tier_name, + xmin=xmin_tier, + xmax=xmax_tier, + points=entries, + ) + + tiers.append(tier) + + tg["tiers"] = tiers + + return tg + + +def parse(textgrid_path: str | pathlib.Path, keep_empty: bool = False) -> dict: + """Parse a TextGrid file, loading it into a :class:`dict`. + + This function is used by + :meth:`crowsetta.formats.seq.TextGrid.from_file` + to load and parse the TextGrid file passed in + as the ``annot_path`` argument. + + Parameters + ---------- + textgrid_path : str, pathlib.Path + The path to a TextGrid file. + keep_empty : bool + If True, keep intervals in + interval tiers that have empty labels + (i.e., the empty string ""). + Default is False. + + Returns + ------- + textgrid_raw : dict + A dict with keys 'xmin', 'xmax', and 'tiers'. + """ + textgrid_path = pathlib.Path(textgrid_path) + try: + with textgrid_path.open("r", encoding="utf-16") as fp: + textgrid_raw = parse_fp(fp, keep_empty) + except (UnicodeError, UnicodeDecodeError): + with textgrid_path.open("r", encoding="utf-8") as fp: + textgrid_raw = parse_fp(fp, keep_empty) + return textgrid_raw diff --git a/src/crowsetta/formats/seq/textgrid/textgrid.py b/src/crowsetta/formats/seq/textgrid/textgrid.py new file mode 100644 index 00000000..b1e877f7 --- /dev/null +++ b/src/crowsetta/formats/seq/textgrid/textgrid.py @@ -0,0 +1,411 @@ +"""Module with functions for working with Praat TextGrid annotation files""" +import pathlib +import reprlib +from typing import ClassVar, Optional, Union + +import attr +import numpy as np + +import crowsetta +from crowsetta.typing import PathLike + +from .classes import IntervalTier, PointTier +from .parse import parse + + +@crowsetta.interface.SeqLike.register +@attr.define +class TextGrid: + """Class that represents annotations + from TextGrid[1]_ files + produced by the application Praat[2]_. + + Attributes + ---------- + name: str + Shorthand name for annotation format: ``'textgrid'``. + ext: str + Extension of files in annotation format: ``'.TextGrid'``. + xmin: float + Start time in seconds of this TextGrid. + xmax: float + End time in seconds of this TextGrid. + tiers: list + The tiers in this TextGrid, + a list of IntervalTier and/or PointTier instances. + annot_path : str, pathlib.Path + The path to the TextGrid file from which annotations were loaded. + audio_path : str, pathlib.Path + The path to the audio file that ``annot_path`` annotates. + Optional, default is None. + + Examples + -------- + Loading the example textgrid + + >>> example = crowsetta.data.get('textgrid') + >>> textgrid = crowsetta.formats.seq.TextGrid.from_file(example.annot_path) + >>> print(textgrid) + TextGrid(tiers=[PointTier(nam...ark='L+!H-')]), IntervalTier(...aleila\\-^')]), IntervalTier(...t='earlier')])], xmin=0.0, xmax=2.4360509767904546, annot_path=PosixPath('/home/pimienta/.local/share/crowsetta/5.0.0rc2/textgrid/AVO-maea-basic.TextGrid'), audio_path=None) # noqa: E501 + + Determining the number of tiers in the textgrid + + >>> example = crowsetta.data.get('textgrid') + >>> textgrid = crowsetta.formats.seq.TextGrid.from_file(example.annot_path) + >>> len(textgrid) + 3 + + Getting the names of the tiers in the textgrid + + >>> example = crowsetta.data.get('textgrid') + >>> textgrid = crowsetta.formats.seq.TextGrid.from_file(example.annot_path) + >>> textgrid.tier_names + ['Tones', 'Samoan', 'Gloss'] + + Getting a tier from the TextGrid by name + + >>> example = crowsetta.data.get('textgrid') + >>> textgrid = crowsetta.formats.seq.TextGrid.from_file(example.annot_path) + >>> textgrid['Gloss'] + IntervalTier(name='Gloss', xmin=0.0, xmax=2.4360509767904546, intervals=[Interval(xmin=0.0, xmax=0.051451575248407266, text='PRES'), Interval(xmin=0.051451575248407266, xmax=0.6407379583230295, text='Sione'), Interval(xmin=0.6407379583230295, xmax=0.7544662733943284, text='PAST'), Interval(xmin=0.7544662733943284, xmax=1.244041566788134, text='pull-ES'), Interval(xmin=1.244041566788134, xmax=1.3481058803597676, text='DET'), Interval(xmin=1.3481058803597676, xmax=1.70760078178904, text='rope'), Interval(xmin=1.70760078178904, xmax=2.4360509767904546, text='earlier')]) # noqa: E501 + + Getting a tier from the TextGrid by index + + >>> example = crowsetta.data.get('textgrid') + >>> textgrid = crowsetta.formats.seq.TextGrid.from_file(example.annot_path) + >>> textgrid[2] # same tier we just got by name + IntervalTier(name='Gloss', xmin=0.0, xmax=2.4360509767904546, intervals=[Interval(xmin=0.0, xmax=0.051451575248407266, text='PRES'), Interval(xmin=0.051451575248407266, xmax=0.6407379583230295, text='Sione'), Interval(xmin=0.6407379583230295, xmax=0.7544662733943284, text='PAST'), Interval(xmin=0.7544662733943284, xmax=1.244041566788134, text='pull-ES'), Interval(xmin=1.244041566788134, xmax=1.3481058803597676, text='DET'), Interval(xmin=1.3481058803597676, xmax=1.70760078178904, text='rope'), Interval(xmin=1.70760078178904, xmax=2.4360509767904546, text='earlier')]) # noqa: E501 + + Calling the :meth:`~crowsetta.formats.seq.TextGrid.to_seq` method + with no arguments will convert all the + :class:`~crowsetta.formats.seq.textgrid.classes.IntervalTier`s to + :class:`~crowsetta.Sequence`s, in the order they appear in the TextGrid. + + >>> example = crowsetta.data.get('textgrid') + >>> textgrid = crowsetta.formats.seq.TextGrid.from_file(example.annot_path) + >>> textgrid.to_seq() + [, ] + + Call the :meth:`~crowsetta.formats.seq.TextGrid.to_seq` method + with a ``tier`` arguments to convert a specific + :class:`~crowsetta.formats.seq.textgrid.classes.IntervalTier`s to a + single :class:`~crowsetta.Sequence`s. + + >>> example = crowsetta.data.get('textgrid') + >>> textgrid = crowsetta.formats.seq.TextGrid.from_file(example.annot_path) + >>> textgrid.to_seq(tier=2) + [] + + The :method:`~crowsetta.formats.seq.TextGrid.to_seq` + argument ``tier`` has the same semantics as the + item access for this class; that is, you can pass in an int + or the name of the tier as a string. + + >>> example = crowsetta.data.get('textgrid') + >>> textgrid = crowsetta.formats.seq.TextGrid.from_file(example.annot_path) + >>> tier1 = textgrid.to_seq(tier=2) + >>> tier2 = textgrid.to_seq(tier="Gloss") + >>> tier1 == tier2 + True + + Notes + ----- + + Formats + ======= + This class can load TextGrid files + saved by Praat in the text format, + either the default format + or the "short" format, + as described in the specification[1]_. + The class can load either UTF-8 or UTF-16 text files. + It should detect both the encoding (UTF-8 or UTF-16) + and the format (default or "short") automatically. + + The class does not currently parse binary TextGrid files + (althoug there is an issue to add this, + see https://github.com/vocalpy/crowsetta/issues/242). + Please "thumbs up" that issue and comment + if you would find this helpful. + + Converting to crowsetta classes + =============================== + This class can parse both IntervalTiers + and PointTiers in TextGrid files, + but when converting to a + :class:`crowsetta.Annotation` it can only + convert :class:`~crowsetta.formats.seq.textgrid.classes.IntervalTier`s + to :class`:crowsetta.Sequence`s. + See the :meth:`~crowsetta.formats.seq.textgrid.TextGrid.to_seq` + method for details. + + Implementation + ============== + Code for parsing TextGrids is adapted from several sources, + all under MIT license. + The main logic in + :func:`~crowsetta.formats.seq.textgrid.parse.parse_fp` + is from + which is perhaps the most concise + Python code I have found for parsing TextGrids. + However, there are also good ideas in + https://github.com/kylebgorman/textgrid/blob/master/textgrid/textgrid.py + (__getitem__ method for tier access) and + https://github.com/timmahrt/praatIO + (data classes, handling encoding). + + For some documentation of the binary format see + https://github.com/Legisign/Praat-textgrids + and for a citable library with docs see + https://github.com/hbuschme/TextGridTools + but note that both of these have a GPL license. + + References + ---------- + .. [1]^ https://www.fon.hum.uva.nl/praat/manual/TextGrid_file_formats.html + .. [2]^ Boersma, Paul & Weenink, David (2023). + Praat: doing phonetics by computer [Computer program]. + Version 6.3.09, retrieved 2 March 2023 from http://www.praat.org/ + """ + + name: ClassVar[str] = "textgrid" + ext: ClassVar[str] = ".TextGrid" + + tiers: list[Union[IntervalTier, PointTier]] = attr.field(repr=reprlib.repr) + xmin: float + xmax: float + annot_path: pathlib.Path + audio_path: Optional[pathlib.Path] = attr.field(default=None, converter=attr.converters.optional(pathlib.Path)) + + @classmethod + def from_file( + cls, + annot_path: PathLike, + audio_path: Optional[PathLike] = None, + keep_empty: bool = False, + ) -> "Self": # noqa: F821 + """Load annotations from a TextGrid file + in the format used by Praat. + + Parameters + ---------- + annot_path : str, pathlib.Path + The path to a TextGrid file from which annotations were loaded. + audio_path : str, pathlib.Path + The path to the audio file that ``annot_path`` annotates. + Optional, default is None. + keep_empty : bool + If True, keep intervals in + interval tiers that have empty labels + (i.e., the empty string ""). + Default is False. + + Examples + -------- + >>> example = crowsetta.data.get('textgrid') + >>> textgrid = crowsetta.formats.seq.TextGrid.from_file(example.annot_path) + >>> print(textgrid) + TextGrid(tiers=[PointTier(nam...ark='L+!H-')]), IntervalTier(...aleila\\-^')]), IntervalTier(...t='earlier')])], xmin=0.0, xmax=2.4360509767904546, annot_path=PosixPath('/home/pimienta/.local/share/crowsetta/5.0.0rc2/textgrid/AVO-maea-basic.TextGrid'), audio_path=None) # noqa: E501 + + For usage, see the + "Examples" section in :class:`crowsetta.formats.seq.TextGrid`. + + See Also + -------- + :class:`crowsetta.formats.seq.TextGrid` + """ + annot_path = pathlib.Path(annot_path) + crowsetta.validation.validate_ext(annot_path, extension=cls.ext) + + tg_dict = parse(annot_path, keep_empty) + + return cls( + tiers=tg_dict["tiers"], + xmin=tg_dict["xmin"], + xmax=tg_dict["xmax"], + annot_path=annot_path, + audio_path=audio_path, + ) + + def __len__(self): + return len(self.tiers) + + @property + def tier_names(self): + return list(tier.name for tier in self.tiers) + + def __getitem__(self, key: Union[str, int, slice]) -> Union[IntervalTier, PointTier]: + if isinstance(key, str): + matching_name_inds = [tier_ind for tier_ind, tier in enumerate(self.tiers) if tier.name == key] + if len(matching_name_inds) > 1: + raise ValueError( + f"Multiple tiers have the name '{key}', tiers are: {matching_name_inds}." + "Please access tiers with one of those integer indices, " + "or give the tiers unique names to be able to access with a string." + ) + ind = matching_name_inds[0] + return self.tiers[ind] + + elif isinstance(key, (int, slice)): + return self.tiers[key] + + else: + raise TypeError(f"Tiers must be accessed with a string key or an integer index, but got a {type(key)}.") + + @staticmethod + def _interval_tier_to_seq( + interval_tier: IntervalTier, round_times: bool = True, decimals: int = 3 + ) -> crowsetta.Sequence: + """Helper method used by ``to_seq`` + that converts a single IntervalTier to a ``crowsetta.Sequence``""" + onsets_s = [] + offsets_s = [] + labels = [] + + for interval in interval_tier.intervals: + xmin, xmax, text = interval.xmin, interval.xmax, interval.text + onsets_s.append(xmin) + offsets_s.append(xmax) + labels.append(text) + + onsets_s = np.array(onsets_s) + offsets_s = np.array(offsets_s) + labels = np.array(labels) + + if round_times: + onsets_s = np.around(onsets_s, decimals=decimals) + offsets_s = np.around(offsets_s, decimals=decimals) + + seq = crowsetta.Sequence.from_keyword(labels=labels, onsets_s=onsets_s, offsets_s=offsets_s) + + return seq + + def to_seq( + self, tier: int | str | None = None, round_times: bool = True, decimals: int = 3 + ) -> crowsetta.Sequence | list[crowsetta.Sequence]: + """Convert an IntervalTier from this TextGrid annotation + into a :class:`crowsetta.Sequence`. + + Currently, there is only support for converting a single IntervalTier + to a single :class:`~crowsetta.Sequence`. + + Parameters + ---------- + tier : int + Index or string name of interval tier in TextGrid file + from which annotations should be taken. + Default is None, in which case all interval tiers + are converted to :class:`crowsetta.Sequence`s. + round_times : bool + If True, round times of onsets and offsets. + Default is True. + decimals : int + Number of decimals places to round floating point numbers to. + Only meaningful if round_times is True. + Default is 3, so that times are rounded to milliseconds. + + Returns + ------- + seq : crowsetta.Sequence + + Examples + -------- + Calling the :meth:`~crowsetta.formats.seq.TextGrid.to_seq` method + with no arguments will convert all the + :class:`~crowsetta.formats.seq.textgrid.classes.IntervalTier`s to + :class:`~crowsetta.Sequence`s, in the order they appear in the TextGrid. + + >>> example = crowsetta.data.get('textgrid') + >>> textgrid = crowsetta.formats.seq.TextGrid.from_file(example.annot_path) + >>> textgrid.to_seq() + [, ] + + Call the :meth:`~crowsetta.formats.seq.TextGrid.to_seq` method + with a ``tier`` arguments to convert a specific + :class:`~crowsetta.formats.seq.textgrid.classes.IntervalTier`s to a + single :class:`~crowsetta.Sequence`s. + + >>> example = crowsetta.data.get('textgrid') + >>> textgrid = crowsetta.formats.seq.TextGrid.from_file(example.annot_path) + >>> textgrid.to_seq(tier=2) + [] + + The :method:`~crowsetta.formats.seq.TextGrid.to_seq` + argument ``tier`` has the same semantics as the + item access for this class; that is, you can pass in an int + or the name of the tier as a string. + + >>> example = crowsetta.data.get('textgrid') + >>> textgrid = crowsetta.formats.seq.TextGrid.from_file(example.annot_path) + >>> tier1 = textgrid.to_seq(tier=2) + >>> tier2 = textgrid.to_seq(tier="Gloss") + >>> tier1 == tier2 + True + + Notes + ----- + The ``round_times`` and ``decimals`` arguments are provided + to reduce differences across platforms + due to floating point error, e.g. when loading annotation files + and then sending them to a csv file, + the result should be the same on Windows and Linux. + """ + if tier is not None: + tier_ = self.__getitem__(tier) + if not isinstance(tier_, IntervalTier): + raise ValueError( + f"The specified tier {tier} is not an IntervalTier but a {type(tier_)}." + f"Cannot convert to a sequence" + ) + return self._interval_tier_to_seq(tier_, round_times, decimals) + + seq = [ + self._interval_tier_to_seq(tier, round_times, decimals) + for tier in self.tiers + if isinstance(tier, IntervalTier) + ] + if len(seq) == 1: + seq = seq[0] + + return seq + + def to_annot(self, tier: int | str | None = None, round_times: bool = True, decimals: int = 3) -> crowsetta.Annotation: + """Convert an IntervalTier from this TextGrid annotation + to a :class:`crowsetta.Annotation`. + + Parameters + ---------- + tier : int + Index or string name of interval tier in TextGrid file + from which annotations should be taken. + Default is None, in which case all interval tiers + are converted to :class:`crowsetta.Sequence`s. + round_times : bool + If True, round times of onsets and offsets. + Default is True. + decimals : int + Number of decimals places to round floating point numbers to. + Only meaningful if round_times is True. + Default is 3, so that times are rounded to milliseconds. + + Returns + ------- + annot : crowsetta.Annotation + + Examples + -------- + >>> example = crowsetta.data.get('textgrid') + >>> textgrid = crowsetta.formats.seq.TextGrid.from_file(example.annot_path) + >>> annot = textgrid.to_annot() + + Notes + ----- + The ``round_times`` and ``decimals`` arguments are provided + to reduce differences across platforms + due to floating point error, e.g. when loading annotation files + and then sending them to a csv file, + the result should be the same on Windows and Linux. + """ + seq = self.to_seq(tier=tier, round_times=round_times, decimals=decimals) + + return crowsetta.Annotation(annot_path=self.annot_path, notated_path=self.audio_path, seq=seq) diff --git a/tests/data_for_tests/textgrid/README.md b/tests/data_for_tests/textgrid/README.md new file mode 100644 index 00000000..8ffb27d8 --- /dev/null +++ b/tests/data_for_tests/textgrid/README.md @@ -0,0 +1,30 @@ +# sources + +## Calhoun et al. 2022 + +Sound files and Praat TextGrids accompanying Sasha Calhoun, Corinne Seals, Toaga Alefosio and Niusila Faamanatu-Eteuati, +'Phrasal Prosody of Heritage Speakers of Samoan in Aotearoa New Zealand', +to appear in Rajiv Rao (ed), The Phonetics and Phonology of Heritage Languages, Cambridge University Press. +https://osf.io/bxasd/ + +These files were used because they have multiple IntervalTiers. + +## parselmouth + +Files in ./parselmouth were kindly provided by Yannick Jadoul +during the pyOpenSci review. + +They are adaptations of a test TextGrid file used by Parselmouth: +https://github.com/YannickJadoul/Parselmouth +Jadoul, Y., Thompson, B., & de Boer, B. (2018). +Introducing Parselmouth: A Python interface to Praat. Journal of Phonetics, 71, 1-15. +https://doi.org/10.1016/j.wocn.2018.07.001 + +## praatIO +To test short format TextGrids we use some text files from PraatIO. + +Tim Mahrt. PraatIO. https://github.com/timmahrt/praatIO, 2016. + +The code for parsing TextGrid files is itself adapted from this library, +under MIT license: +https://github.com/timmahrt/praatIO/blob/main/LICENSE \ No newline at end of file diff --git a/tests/data_for_tests/textgrid/calhoun-et-al-2022/AVO-maea-basic.TextGrid b/tests/data_for_tests/textgrid/calhoun-et-al-2022/AVO-maea-basic.TextGrid new file mode 100644 index 00000000..2ce9a2ff --- /dev/null +++ b/tests/data_for_tests/textgrid/calhoun-et-al-2022/AVO-maea-basic.TextGrid @@ -0,0 +1,97 @@ +File type = "ooTextFile" +Object class = "TextGrid" + +xmin = 0 +xmax = 2.4360509767904546 +tiers? +size = 3 +item []: + item [1]: + class = "TextTier" + name = "Tones" + xmin = 0 + xmax = 2.4360509767904546 + points: size = 5 + points [1]: + number = 0.4351780385722748 + mark = "L+H*" + points [2]: + number = 0.6407379583230295 + mark = "H-" + points [3]: + number = 1.1328819591093477 + mark = "L+H*" + points [4]: + number = 1.244041566788134 + mark = "H-" + points [5]: + number = 2.350907447504575 + mark = "L+!H-" + item [2]: + class = "IntervalTier" + name = "Samoan" + xmin = 0 + xmax = 2.4360509767904546 + intervals: size = 7 + intervals [1]: + xmin = 0 + xmax = 0.051451575248407266 + text = "'o" + intervals [2]: + xmin = 0.051451575248407266 + xmax = 0.6407379583230295 + text = "Sione" + intervals [3]: + xmin = 0.6407379583230295 + xmax = 0.7544662733943284 + text = "na" + intervals [4]: + xmin = 0.7544662733943284 + xmax = 1.244041566788134 + text = "tosoa" + intervals [5]: + xmin = 1.244041566788134 + xmax = 1.3481058803597676 + text = "le" + intervals [6]: + xmin = 1.3481058803597676 + xmax = 1.70760078178904 + text = "maea" + intervals [7]: + xmin = 1.70760078178904 + xmax = 2.4360509767904546 + text = "analeila\-^" + item [3]: + class = "IntervalTier" + name = "Gloss" + xmin = 0 + xmax = 2.4360509767904546 + intervals: size = 7 + intervals [1]: + xmin = 0 + xmax = 0.051451575248407266 + text = "PRES" + intervals [2]: + xmin = 0.051451575248407266 + xmax = 0.6407379583230295 + text = "Sione" + intervals [3]: + xmin = 0.6407379583230295 + xmax = 0.7544662733943284 + text = "PAST" + intervals [4]: + xmin = 0.7544662733943284 + xmax = 1.244041566788134 + text = "pull-ES" + intervals [5]: + xmin = 1.244041566788134 + xmax = 1.3481058803597676 + text = "DET" + intervals [6]: + xmin = 1.3481058803597676 + xmax = 1.70760078178904 + text = "rope" + intervals [7]: + xmin = 1.70760078178904 + xmax = 2.4360509767904546 + text = "earlier" diff --git a/tests/data_for_tests/textgrid/calhoun-et-al-2022/BroadFocusAlofa.TextGrid b/tests/data_for_tests/textgrid/calhoun-et-al-2022/BroadFocusAlofa.TextGrid new file mode 100644 index 00000000..c0f24978 --- /dev/null +++ b/tests/data_for_tests/textgrid/calhoun-et-al-2022/BroadFocusAlofa.TextGrid @@ -0,0 +1,159 @@ +File type = "ooTextFile" +Object class = "TextGrid" + +xmin = 0 +xmax = 2.885532879818594 +tiers? +size = 3 +item []: + item [1]: + class = "TextTier" + name = "tones" + xmin = 0 + xmax = 2.885532879818594 + points: size = 7 + points [1]: + number = 0.29923241806936424 + mark = "L+H*" + points [2]: + number = 0.41 + mark = "H-" + points [3]: + number = 0.77 + mark = "H-" + points [4]: + number = 1.1401065371229195 + mark = "H*" + points [5]: + number = 1.6933852734382882 + mark = "?H+!H*" + points [6]: + number = 2.4959133414600854 + mark = "!H*" + points [7]: + number = 2.8656788335520402 + mark = "L-L\% " + item [2]: + class = "IntervalTier" + name = "words" + xmin = 0 + xmax = 2.885532879818594 + intervals: size = 14 + intervals [1]: + xmin = 0 + xmax = 0.03081006084705667 + text = "" + intervals [2]: + xmin = 0.03081006084705667 + xmax = 0.17 + text = "na" + intervals [3]: + xmin = 0.17 + xmax = 0.41 + text = "alu" + intervals [4]: + xmin = 0.41 + xmax = 0.77 + text = "iona" + intervals [5]: + xmin = 0.77 + xmax = 0.9155082382226214 + text = "e" + intervals [6]: + xmin = 0.9155082382226214 + xmax = 1.26 + text = "pu'e" + intervals [7]: + xmin = 1.26 + xmax = 1.43 + text = "mai" + intervals [8]: + xmin = 1.43 + xmax = 1.548218080246632 + text = "le" + intervals [9]: + xmin = 1.548218080246632 + xmax = 1.91 + text = "maile" + intervals [10]: + xmin = 1.91 + xmax = 2.21 + text = "io" + intervals [11]: + xmin = 2.21 + xmax = 2.25 + text = "i" + intervals [12]: + xmin = 2.25 + xmax = 2.35 + text = "le" + intervals [13]: + xmin = 2.35 + xmax = 2.87 + text = "paka" + intervals [14]: + xmin = 2.87 + xmax = 2.885532879818594 + text = "" + item [3]: + class = "IntervalTier" + name = "gloss" + xmin = 0 + xmax = 2.885532879818594 + intervals: size = 14 + intervals [1]: + xmin = 0 + xmax = 0.03081006084705667 + text = "" + intervals [2]: + xmin = 0.03081006084705667 + xmax = 0.17 + text = "PAST" + intervals [3]: + xmin = 0.17 + xmax = 0.41 + text = "go" + intervals [4]: + xmin = 0.41 + xmax = 0.77 + text = "Iona" + intervals [5]: + xmin = 0.77 + xmax = 0.9155082382226214 + text = "TAM" + intervals [6]: + xmin = 0.9155082382226214 + xmax = 1.26 + text = "catch" + intervals [7]: + xmin = 1.26 + xmax = 1.43 + text = "DIR" + intervals [8]: + xmin = 1.43 + xmax = 1.548218080246632 + text = "DET" + intervals [9]: + xmin = 1.548218080246632 + xmax = 1.91 + text = "dog" + intervals [10]: + xmin = 1.91 + xmax = 2.21 + text = "DEM" + intervals [11]: + xmin = 2.21 + xmax = 2.25 + text = "LD" + intervals [12]: + xmin = 2.25 + xmax = 2.35 + text = "DET" + intervals [13]: + xmin = 2.35 + xmax = 2.87 + text = "park" + intervals [14]: + xmin = 2.87 + xmax = 2.885532879818594 + text = "" diff --git a/tests/data_for_tests/textgrid/calhoun-et-al-2022/BroadFocusLeilani.TextGrid b/tests/data_for_tests/textgrid/calhoun-et-al-2022/BroadFocusLeilani.TextGrid new file mode 100644 index 00000000..00266dd5 --- /dev/null +++ b/tests/data_for_tests/textgrid/calhoun-et-al-2022/BroadFocusLeilani.TextGrid @@ -0,0 +1,119 @@ +File type = "ooTextFile" +Object class = "TextGrid" + +xmin = 0 +xmax = 2.958565356019844 +tiers? +size = 3 +item []: + item [1]: + class = "TextTier" + name = "tones" + xmin = 0 + xmax = 2.958565356019844 + points: size = 7 + points [1]: + number = 0.37059549624578725 + mark = "L+H*" + points [2]: + number = 1.096153884398847 + mark = "H-/\% r" + points [3]: + number = 1.4063260598947809 + mark = "L+H*" + points [4]: + number = 2.1261538843988466 + mark = "H-" + points [5]: + number = 2.2861538843988467 + mark = "\% r" + points [6]: + number = 2.5888843279691103 + mark = "!H*" + points [7]: + number = 2.8661538843988468 + mark = "L-L\% " + item [2]: + class = "IntervalTier" + name = "words" + xmin = 0 + xmax = 2.958565356019844 + intervals: size = 9 + intervals [1]: + xmin = 0 + xmax = 0.08615388439884694 + text = "" + intervals [2]: + xmin = 0.08615388439884694 + xmax = 0.18615388439884692 + text = "na" + intervals [3]: + xmin = 0.18615388439884692 + xmax = 0.6661538843988469 + text = "si'i" + intervals [4]: + xmin = 0.6661538843988469 + xmax = 1.096153884398847 + text = "e" + intervals [5]: + xmin = 1.096153884398847 + xmax = 1.646153884398847 + text = "Leone" + intervals [6]: + xmin = 1.646153884398847 + xmax = 2.1261538843988466 + text = "le" + intervals [7]: + xmin = 2.1261538843988466 + xmax = 2.2861538843988467 + text = "" + intervals [8]: + xmin = 2.2861538843988467 + xmax = 2.8661538843988468 + text = "nofoa" + intervals [9]: + xmin = 2.8661538843988468 + xmax = 2.958565356019844 + text = "" + item [3]: + class = "IntervalTier" + name = "gloss" + xmin = 0 + xmax = 2.958565356019844 + intervals: size = 9 + intervals [1]: + xmin = 0 + xmax = 0.08615388439884694 + text = "" + intervals [2]: + xmin = 0.08615388439884694 + xmax = 0.18615388439884692 + text = "PAST" + intervals [3]: + xmin = 0.18615388439884692 + xmax = 0.6661538843988469 + text = "carry" + intervals [4]: + xmin = 0.6661538843988469 + xmax = 1.096153884398847 + text = "ERG" + intervals [5]: + xmin = 1.096153884398847 + xmax = 1.646153884398847 + text = "Leone" + intervals [6]: + xmin = 1.646153884398847 + xmax = 2.1261538843988466 + text = "DET" + intervals [7]: + xmin = 2.1261538843988466 + xmax = 2.2861538843988467 + text = "" + intervals [8]: + xmin = 2.2861538843988467 + xmax = 2.8661538843988468 + text = "chair" + intervals [9]: + xmin = 2.8661538843988468 + xmax = 2.958565356019844 + text = "" diff --git a/tests/data_for_tests/textgrid/calhoun-et-al-2022/BroadFocusRosita.TextGrid b/tests/data_for_tests/textgrid/calhoun-et-al-2022/BroadFocusRosita.TextGrid new file mode 100644 index 00000000..712754e8 --- /dev/null +++ b/tests/data_for_tests/textgrid/calhoun-et-al-2022/BroadFocusRosita.TextGrid @@ -0,0 +1,113 @@ +File type = "ooTextFile" +Object class = "TextGrid" + +xmin = 0 +xmax = 3.242471655328798 +tiers? +size = 3 +item []: + item [1]: + class = "TextTier" + name = "tones" + xmin = 0 + xmax = 3.242471655328798 + points: size = 5 + points [1]: + number = 0.8040754572624709 + mark = "L+H*" + points [2]: + number = 1.6812532559990037 + mark = "L+H*" + points [3]: + number = 1.86 + mark = "H-" + points [4]: + number = 2.3122057077217733 + mark = "!H*" + points [5]: + number = 3.22 + mark = "L-L\% " + item [2]: + class = "IntervalTier" + name = "words" + xmin = 0 + xmax = 3.242471655328798 + intervals: size = 9 + intervals [1]: + xmin = 0 + xmax = 0.04 + text = "" + intervals [2]: + xmin = 0.04 + xmax = 0.51 + text = "sa" + intervals [3]: + xmin = 0.51 + xmax = 1.15 + text = "salu" + intervals [4]: + xmin = 1.15 + xmax = 1.34 + text = "e" + intervals [5]: + xmin = 1.34 + xmax = 1.86 + text = "Moana" + intervals [6]: + xmin = 1.86 + xmax = 1.99 + text = "le" + intervals [7]: + xmin = 1.99 + xmax = 2.48 + text = "malae" + intervals [8]: + xmin = 2.48 + xmax = 3.22 + text = "ananafi" + intervals [9]: + xmin = 3.22 + xmax = 3.242471655328798 + text = "" + item [3]: + class = "IntervalTier" + name = "gloss" + xmin = 0 + xmax = 3.242471655328798 + intervals: size = 9 + intervals [1]: + xmin = 0 + xmax = 0.04 + text = "" + intervals [2]: + xmin = 0.04 + xmax = 0.51 + text = "PAST" + intervals [3]: + xmin = 0.51 + xmax = 1.15 + text = "sweep" + intervals [4]: + xmin = 1.15 + xmax = 1.34 + text = "ERG" + intervals [5]: + xmin = 1.34 + xmax = 1.86 + text = "Moana" + intervals [6]: + xmin = 1.86 + xmax = 1.99 + text = "DET" + intervals [7]: + xmin = 1.99 + xmax = 2.48 + text = "courtyard" + intervals [8]: + xmin = 2.48 + xmax = 3.22 + text = "yesterday" + intervals [9]: + xmin = 3.22 + xmax = 3.242471655328798 + text = "" diff --git a/tests/data_for_tests/textgrid/calhoun-et-al-2022/ObjectFocusLeilani.TextGrid b/tests/data_for_tests/textgrid/calhoun-et-al-2022/ObjectFocusLeilani.TextGrid new file mode 100644 index 00000000..e509b208 --- /dev/null +++ b/tests/data_for_tests/textgrid/calhoun-et-al-2022/ObjectFocusLeilani.TextGrid @@ -0,0 +1,116 @@ +File type = "ooTextFile" +Object class = "TextGrid" + +xmin = 0 +xmax = 2.169954648526077 +tiers? +size = 3 +item []: + item [1]: + class = "TextTier" + name = "tones" + xmin = 0 + xmax = 2.169954648526077 + points: size = 6 + points [1]: + number = 0.34006953489740466 + mark = "L+H*" + points [2]: + number = 0.8684574601540942 + mark = "L+H*" + points [3]: + number = 1.48 + mark = "H-" + points [4]: + number = 1.53 + mark = "\% r" + points [5]: + number = 1.9649617261003072 + mark = "L+!H*" + points [6]: + number = 2.15 + mark = "L-L\% " + item [2]: + class = "IntervalTier" + name = "words" + xmin = 0 + xmax = 2.169954648526077 + intervals: size = 9 + intervals [1]: + xmin = 0 + xmax = 0.08 + text = "" + intervals [2]: + xmin = 0.08 + xmax = 0.23 + text = "na" + intervals [3]: + xmin = 0.23 + xmax = 0.59 + text = "amo" + intervals [4]: + xmin = 0.59 + xmax = 0.63 + text = "e" + intervals [5]: + xmin = 0.63 + xmax = 1.06 + text = "Iona" + intervals [6]: + xmin = 1.06 + xmax = 1.48 + text = "le" + intervals [7]: + xmin = 1.48 + xmax = 1.53 + text = "" + intervals [8]: + xmin = 1.53 + xmax = 2.15 + text = "suo\-^" + intervals [9]: + xmin = 2.15 + xmax = 2.169954648526077 + text = "" + item [3]: + class = "IntervalTier" + name = "gloss" + xmin = 0 + xmax = 2.169954648526077 + intervals: size = 9 + intervals [1]: + xmin = 0 + xmax = 0.08 + text = "" + intervals [2]: + xmin = 0.08 + xmax = 0.23 + text = "PAST" + intervals [3]: + xmin = 0.23 + xmax = 0.59 + text = "carry" + intervals [4]: + xmin = 0.59 + xmax = 0.63 + text = "ERG" + intervals [5]: + xmin = 0.63 + xmax = 1.06 + text = "Iona" + intervals [6]: + xmin = 1.06 + xmax = 1.48 + text = "DET" + intervals [7]: + xmin = 1.48 + xmax = 1.53 + text = "" + intervals [8]: + xmin = 1.53 + xmax = 2.15 + text = "spade" + intervals [9]: + xmin = 2.15 + xmax = 2.169954648526077 + text = "" diff --git a/tests/data_for_tests/textgrid/calhoun-et-al-2022/ObjectFocusLupe.TextGrid b/tests/data_for_tests/textgrid/calhoun-et-al-2022/ObjectFocusLupe.TextGrid new file mode 100644 index 00000000..66280487 --- /dev/null +++ b/tests/data_for_tests/textgrid/calhoun-et-al-2022/ObjectFocusLupe.TextGrid @@ -0,0 +1,116 @@ +File type = "ooTextFile" +Object class = "TextGrid" + +xmin = 0 +xmax = 2.5101360544217686 +tiers? +size = 3 +item []: + item [1]: + class = "TextTier" + name = "tones" + xmin = 0 + xmax = 2.5101360544217686 + points: size = 6 + points [1]: + number = 0.47236102601076596 + mark = "L+H*" + points [2]: + number = 0.9274509414726307 + mark = "L+H*" + points [3]: + number = 1.09 + mark = "H-" + points [4]: + number = 1.5731544340912977 + mark = "H*" + points [5]: + number = 2.3165473850028255 + mark = "L+H-" + points [6]: + number = 2.49 + mark = "L\% " + item [2]: + class = "IntervalTier" + name = "words" + xmin = 0 + xmax = 2.5101360544217686 + intervals: size = 9 + intervals [1]: + xmin = 0 + xmax = 0.08 + text = "" + intervals [2]: + xmin = 0.08 + xmax = 0.32 + text = "sa" + intervals [3]: + xmin = 0.32 + xmax = 0.53 + text = "'ai" + intervals [4]: + xmin = 0.53 + xmax = 0.62 + text = "e" + intervals [5]: + xmin = 0.62 + xmax = 1.09 + text = "Sione" + intervals [6]: + xmin = 1.09 + xmax = 1.22 + text = "le" + intervals [7]: + xmin = 1.22 + xmax = 1.8 + text = "siamu" + intervals [8]: + xmin = 1.8 + xmax = 2.49 + text = "anapo\-^" + intervals [9]: + xmin = 2.49 + xmax = 2.5101360544217686 + text = "" + item [3]: + class = "IntervalTier" + name = "gloss" + xmin = 0 + xmax = 2.5101360544217686 + intervals: size = 9 + intervals [1]: + xmin = 0 + xmax = 0.08 + text = "" + intervals [2]: + xmin = 0.08 + xmax = 0.32 + text = "PAST" + intervals [3]: + xmin = 0.32 + xmax = 0.53 + text = "eat" + intervals [4]: + xmin = 0.53 + xmax = 0.62 + text = "ERG" + intervals [5]: + xmin = 0.62 + xmax = 1.09 + text = "Sione" + intervals [6]: + xmin = 1.09 + xmax = 1.22 + text = "DET" + intervals [7]: + xmin = 1.22 + xmax = 1.8 + text = "jam" + intervals [8]: + xmin = 1.8 + xmax = 2.49 + text = "last-night" + intervals [9]: + xmin = 2.49 + xmax = 2.5101360544217686 + text = "" diff --git a/tests/data_for_tests/textgrid/calhoun-et-al-2022/ObjectFocusRosita.TextGrid b/tests/data_for_tests/textgrid/calhoun-et-al-2022/ObjectFocusRosita.TextGrid new file mode 100644 index 00000000..626b63b3 --- /dev/null +++ b/tests/data_for_tests/textgrid/calhoun-et-al-2022/ObjectFocusRosita.TextGrid @@ -0,0 +1,135 @@ +File type = "ooTextFile" +Object class = "TextGrid" + +xmin = 0 +xmax = 3.623401360544218 +tiers? +size = 3 +item []: + item [1]: + class = "TextTier" + name = "tones" + xmin = 0 + xmax = 3.623401360544218 + points: size = 7 + points [1]: + number = 0.8366300190962462 + mark = "L+H*" + points [2]: + number = 1.4213284755332714 + mark = "L+H*" + points [3]: + number = 1.56 + mark = "H-" + points [4]: + number = 2.043860361504457 + mark = "L+!H*" + points [5]: + number = 2.78 + mark = "\% r" + points [6]: + number = 3.144469220680034 + mark = "!H*" + points [7]: + number = 3.6 + mark = "L-L\% " + item [2]: + class = "IntervalTier" + name = "words" + xmin = 0 + xmax = 3.623401360544218 + intervals: size = 11 + intervals [1]: + xmin = 0 + xmax = 0.06 + text = "" + intervals [2]: + xmin = 0.06 + xmax = 0.46 + text = "leai" + intervals [3]: + xmin = 0.46 + xmax = 0.6 + text = "na" + intervals [4]: + xmin = 0.6 + xmax = 1.02 + text = "salu" + intervals [5]: + xmin = 1.02 + xmax = 1.11 + text = "e" + intervals [6]: + xmin = 1.11 + xmax = 1.56 + text = "Moana" + intervals [7]: + xmin = 1.56 + xmax = 1.67 + text = "le" + intervals [8]: + xmin = 1.67 + xmax = 2.21 + text = "malae" + intervals [9]: + xmin = 2.21 + xmax = 2.78 + text = "" + intervals [10]: + xmin = 2.78 + xmax = 3.6 + text = "ananafi" + intervals [11]: + xmin = 3.6 + xmax = 3.623401360544218 + text = "" + item [3]: + class = "IntervalTier" + name = "gloss" + xmin = 0 + xmax = 3.623401360544218 + intervals: size = 11 + intervals [1]: + xmin = 0 + xmax = 0.06 + text = "" + intervals [2]: + xmin = 0.06 + xmax = 0.46 + text = "no" + intervals [3]: + xmin = 0.46 + xmax = 0.6 + text = "PAST" + intervals [4]: + xmin = 0.6 + xmax = 1.02 + text = "sweep" + intervals [5]: + xmin = 1.02 + xmax = 1.11 + text = "ERG" + intervals [6]: + xmin = 1.11 + xmax = 1.56 + text = "Moana" + intervals [7]: + xmin = 1.56 + xmax = 1.67 + text = "DET" + intervals [8]: + xmin = 1.67 + xmax = 2.21 + text = "courtyard" + intervals [9]: + xmin = 2.21 + xmax = 2.78 + text = "" + intervals [10]: + xmin = 2.78 + xmax = 3.6 + text = "yesterday" + intervals [11]: + xmin = 3.6 + xmax = 3.623401360544218 + text = "" diff --git a/tests/data_for_tests/textgrid/calhoun-et-al-2022/SubjectFocusLeilani.TextGrid b/tests/data_for_tests/textgrid/calhoun-et-al-2022/SubjectFocusLeilani.TextGrid new file mode 100644 index 0000000000000000000000000000000000000000..2e89e4dee86baa879a5c3b324c211d790eade148 GIT binary patch literal 6980 zcmeHMU2oGs5L{l7_zxixFCb8yIF5t(5E7+D1c9J$6r{97C6FeOla|1b2WD+=8{heK zUZ+R_MOJ#T&v%}gz1!KizkbsT8q9ND-f<{IG{3wnwxk5a5-oGtNrlHaqyJwMTt&E3hbj&aWjSQ_+!KGMVd zY6C4F>%BqR&nTr=S~u^*99I)#%0^D1*V!eeCa}z)$q1-jBg%CXB+~L-b##>R($0a0 zqr^J0Hbo8l4p!@X2#nOI!mLI}a<#Zo?7hgB1;-DX-R8jrg`I^Ym+C<{YV0 za)8V;plqj`p0nO_ZP{uCKvv##&z=%9R8D%mB0QdvZ{vAX63I}Ojbj9$<ZUMm@~KlAVVvR~n% zpv}pT&*oI^jfFhZY)!uAYFw|$+-`OKHQ8yp&-vYsiAWd})pJ!t*QJ+TkJ#@g{)d|E zEj^?6sGwb{+d6P{xxIB6zx7)w*|^uaYwhi-GM!iK(L35#MrBu-@#-sPJ+3}?m6_~H zX^T0ZHS$+LrVl=+J#`k`-rDR0btb>}=Qr!R%)Y&qS@yr5C92ISa{MWMQFHspYqUQh CINzH9 literal 0 HcmV?d00001 diff --git a/tests/data_for_tests/textgrid/calhoun-et-al-2022/SubjectFocusLupe.TextGrid b/tests/data_for_tests/textgrid/calhoun-et-al-2022/SubjectFocusLupe.TextGrid new file mode 100644 index 00000000..606b54c9 --- /dev/null +++ b/tests/data_for_tests/textgrid/calhoun-et-al-2022/SubjectFocusLupe.TextGrid @@ -0,0 +1,121 @@ +File type = "ooTextFile" +Object class = "TextGrid" + +xmin = 0 +xmax = 2.5710953024720617 +tiers? +size = 3 +item []: + item [1]: + class = "TextTier" + name = "tones" + xmin = 0 + xmax = 2.5710953024720617 + points: size = 5 + points [1]: + number = 0.568428380444058 + mark = "L+H*" + points [2]: + number = 0.8200748943087963 + mark = "H-" + points [3]: + number = 1.050754627700043 + mark = "!H*" + points [4]: + number = 1.4575358000846088 + mark = "?H+!H*" + points [5]: + number = 2.3350209005141718 + mark = "L+H-" + item [2]: + class = "IntervalTier" + name = "words" + xmin = 0 + xmax = 2.5710953024720617 + intervals: size = 10 + intervals [1]: + xmin = 0 + xmax = 0.010074894308796212 + text = "" + intervals [2]: + xmin = 0.010074894308796212 + xmax = 0.09007489430879617 + text = "'o" + intervals [3]: + xmin = 0.09007489430879617 + xmax = 0.6500748943087962 + text = "Kalolo" + intervals [4]: + xmin = 0.6500748943087962 + xmax = 0.8200748943087963 + text = "lea" + intervals [5]: + xmin = 0.8200748943087963 + xmax = 0.9400748943087962 + text = "na" + intervals [6]: + xmin = 0.9400748943087962 + xmax = 1.1500748943087962 + text = "vali" + intervals [7]: + xmin = 1.1500748943087962 + xmax = 1.3500748943087963 + text = "le" + intervals [8]: + xmin = 1.3500748943087963 + xmax = 1.6400748943087962 + text = "uila" + intervals [9]: + xmin = 1.6400748943087962 + xmax = 2.5500748943087963 + text = "analeila\-^" + intervals [10]: + xmin = 2.5500748943087963 + xmax = 2.5710953024720617 + text = "" + item [3]: + class = "IntervalTier" + name = "gloss" + xmin = 0 + xmax = 2.5710953024720617 + intervals: size = 10 + intervals [1]: + xmin = 0 + xmax = 0.010074894308796212 + text = "" + intervals [2]: + xmin = 0.010074894308796212 + xmax = 0.09007489430879617 + text = "PRES" + intervals [3]: + xmin = 0.09007489430879617 + xmax = 0.6500748943087962 + text = "Kalolo" + intervals [4]: + xmin = 0.6500748943087962 + xmax = 0.8200748943087963 + text = "DEM" + intervals [5]: + xmin = 0.8200748943087963 + xmax = 0.9400748943087962 + text = "PAST" + intervals [6]: + xmin = 0.9400748943087962 + xmax = 1.1500748943087962 + text = "paint" + intervals [7]: + xmin = 1.1500748943087962 + xmax = 1.3500748943087963 + text = "DET" + intervals [8]: + xmin = 1.3500748943087963 + xmax = 1.6400748943087962 + text = "bike" + intervals [9]: + xmin = 1.6400748943087962 + xmax = 2.5500748943087963 + text = "earlier" + intervals [10]: + xmin = 2.5500748943087963 + xmax = 2.5710953024720617 + text = "" diff --git a/tests/data_for_tests/textgrid/calhoun-et-al-2022/SubjectFocusRosita.TextGrid b/tests/data_for_tests/textgrid/calhoun-et-al-2022/SubjectFocusRosita.TextGrid new file mode 100644 index 00000000..a454a6c9 --- /dev/null +++ b/tests/data_for_tests/textgrid/calhoun-et-al-2022/SubjectFocusRosita.TextGrid @@ -0,0 +1,113 @@ +File type = "ooTextFile" +Object class = "TextGrid" + +xmin = 0 +xmax = 2.7965753537583624 +tiers? +size = 3 +item []: + item [1]: + class = "TextTier" + name = "tones" + xmin = 0 + xmax = 2.7965753537583624 + points: size = 5 + points [1]: + number = 0.6820356802492782 + mark = "L+H*" + points [2]: + number = 1.3120408755082833 + mark = "L+H*" + points [3]: + number = 1.5048973492232154 + mark = "H-" + points [4]: + number = 1.8614292922270368 + mark = "!H*" + points [5]: + number = 2.7748973492232154 + mark = "L-L\% " + item [2]: + class = "IntervalTier" + name = "words" + xmin = 0 + xmax = 2.7965753537583624 + intervals: size = 9 + intervals [1]: + xmin = 0 + xmax = 0.00489734922321533 + text = "" + intervals [2]: + xmin = 0.00489734922321533 + xmax = 0.2248973492232153 + text = "sa" + intervals [3]: + xmin = 0.2248973492232153 + xmax = 0.8948973492232155 + text = "fusia" + intervals [4]: + xmin = 0.8948973492232155 + xmax = 0.9548973492232153 + text = "e" + intervals [5]: + xmin = 0.9548973492232153 + xmax = 1.5048973492232154 + text = "Kalolo" + intervals [6]: + xmin = 1.5048973492232154 + xmax = 1.6048973492232153 + text = "le" + intervals [7]: + xmin = 1.6048973492232153 + xmax = 1.9648973492232156 + text = "teine" + intervals [8]: + xmin = 1.9648973492232156 + xmax = 2.7748973492232154 + text = "ananafi" + intervals [9]: + xmin = 2.7748973492232154 + xmax = 2.7965753537583624 + text = "" + item [3]: + class = "IntervalTier" + name = "gloss" + xmin = 0 + xmax = 2.7965753537583624 + intervals: size = 9 + intervals [1]: + xmin = 0 + xmax = 0.00489734922321533 + text = "" + intervals [2]: + xmin = 0.00489734922321533 + xmax = 0.2248973492232153 + text = "PAST" + intervals [3]: + xmin = 0.2248973492232153 + xmax = 0.8948973492232155 + text = "hug-ES" + intervals [4]: + xmin = 0.8948973492232155 + xmax = 0.9548973492232153 + text = "ERG" + intervals [5]: + xmin = 0.9548973492232153 + xmax = 1.5048973492232154 + text = "Kalolo" + intervals [6]: + xmin = 1.5048973492232154 + xmax = 1.6048973492232153 + text = "DET" + intervals [7]: + xmin = 1.6048973492232153 + xmax = 1.9648973492232156 + text = "girl" + intervals [8]: + xmin = 1.9648973492232156 + xmax = 2.7748973492232154 + text = "yesterday" + intervals [9]: + xmin = 2.7748973492232154 + xmax = 2.7965753537583624 + text = "" diff --git a/tests/data_for_tests/textgrid/calhoun-et-al-2022/VAO-maea-basic.TextGrid b/tests/data_for_tests/textgrid/calhoun-et-al-2022/VAO-maea-basic.TextGrid new file mode 100644 index 00000000..90df34ca --- /dev/null +++ b/tests/data_for_tests/textgrid/calhoun-et-al-2022/VAO-maea-basic.TextGrid @@ -0,0 +1,97 @@ +File type = "ooTextFile" +Object class = "TextGrid" + +xmin = 0 +xmax = 2.567865094163011 +tiers? +size = 3 +item []: + item [1]: + class = "TextTier" + name = "Tones" + xmin = 0 + xmax = 2.567865094163011 + points: size = 5 + points [1]: + number = 0.5371682898422421 + mark = "L+H*" + points [2]: + number = 1.1713410695870436 + mark = "L+H*" + points [3]: + number = 1.352035722374353 + mark = "H-" + points [4]: + number = 1.7019161256276902 + mark = "!H*" + points [5]: + number = 2.4715695293575948 + mark = "L+H-" + item [2]: + class = "IntervalTier" + name = "Samoan" + xmin = 0 + xmax = 2.567865094163011 + intervals: size = 7 + intervals [1]: + xmin = 0 + xmax = 0.36835598696856664 + text = "sa" + intervals [2]: + xmin = 0.36835598696856664 + xmax = 0.8238467987574438 + text = "toso" + intervals [3]: + xmin = 0.8238467987574438 + xmax = 0.8808515647843649 + text = "e" + intervals [4]: + xmin = 0.8808515647843649 + xmax = 1.352035722374353 + text = "Sione" + intervals [5]: + xmin = 1.352035722374353 + xmax = 1.4603013378723801 + text = "le" + intervals [6]: + xmin = 1.4603013378723801 + xmax = 1.818645276492589 + text = "maea" + intervals [7]: + xmin = 1.818645276492589 + xmax = 2.567865094163011 + text = "analeila\-^" + item [3]: + class = "IntervalTier" + name = "Gloss" + xmin = 0 + xmax = 2.567865094163011 + intervals: size = 7 + intervals [1]: + xmin = 0 + xmax = 0.36835598696856664 + text = "PAST" + intervals [2]: + xmin = 0.36835598696856664 + xmax = 0.8238467987574438 + text = "pull" + intervals [3]: + xmin = 0.8238467987574438 + xmax = 0.8808515647843649 + text = "ERG" + intervals [4]: + xmin = 0.8808515647843649 + xmax = 1.352035722374353 + text = "Sione" + intervals [5]: + xmin = 1.352035722374353 + xmax = 1.4603013378723801 + text = "DET" + intervals [6]: + xmin = 1.4603013378723801 + xmax = 1.818645276492589 + text = "rope" + intervals [7]: + xmin = 1.818645276492589 + xmax = 2.567865094163011 + text = "earlier" diff --git a/tests/data_for_tests/textgrid/parselmouth/the_north_wind_and_the_sun.short.utf16.TextGrid b/tests/data_for_tests/textgrid/parselmouth/the_north_wind_and_the_sun.short.utf16.TextGrid new file mode 100644 index 0000000000000000000000000000000000000000..c69c3476c58583fdcd5c038d3f7bfd8a8d9a0e8c GIT binary patch literal 2000 zcma)-O^(w*5QR%&H>Zev0AxEqb|hd4Vgm~zA(qS#X)+1{O@_Zaa3GGw34r(2vpq9H zV5?=v-Kl={s_J$1uix=B_Hl?^%;P4$#mD@5tXUBsG`5<5)jz#e5ohr-&f`k!7h0d| zeTcVl(wK+$^?RPhwXoj_=}e>4Q^mumqm6~$>saZx)NiF(9ZQXkY#yb@z3^`IJkd(6 zr-2O}85rV+ucR{q+T1% z8vP^O2R{bfDfY%Ce@!AR<-r`O6>6Y%)a=V0+;MwwK`1+9zNr?7tMtJcC}vj!>bNzS zsEWurUE~ziqO&S}C8kbCui9~IFdN~JKXa^8U*w?G7m@p9k6YRT-BgN`s*+WdwGcil zuKcY^eeV8e4W=t>tuHc*8)2E@-x?jaHl}MxheD-wGVQsyS({s)WWurh%bJdd!tsvg zU7L+qqJ7@(Zthsqtg!-#$EtMgIoq)5Pty0S_cUv)#yWG+DX183h5J4Dr2FHLx52sI z+$hKRl=s6(It#ik?~4-&eQz|XdBg3 literal 0 HcmV?d00001 diff --git a/tests/data_for_tests/textgrid/parselmouth/the_north_wind_and_the_sun.short.utf8.TextGrid b/tests/data_for_tests/textgrid/parselmouth/the_north_wind_and_the_sun.short.utf8.TextGrid new file mode 100644 index 00000000..6bdb8df6 --- /dev/null +++ b/tests/data_for_tests/textgrid/parselmouth/the_north_wind_and_the_sun.short.utf8.TextGrid @@ -0,0 +1,77 @@ +File type = "ooTextFile" +Object class = "TextGrid" + +0 +1.283265306122449 + +2 +"IntervalTier" +"phonemes" +0 +1.283265306122449 +16 +0 +0.06834975785384344 +"" +0.06834975785384344 +0.08867687921858255 +"ð" +0.08867687921858255 +0.11975392053582794 +"ə" +0.11975392053582794 +0.2061349091724575 +"n" +0.2061349091724575 +0.30880021874037517 +"ɔ" +0.30880021874037517 +0.46230666457849673 +"θ" +0.46230666457849673 +0.49477342517553885 +"w" +0.49477342517553885 +0.5504775558385259 +"ɪ" +0.5504775558385259 +0.6864284078871509 +"n" +0.6864284078871509 +0.70675552925189 +"d" +0.70675552925189 +0.7571330473436337 +"ə" +0.7571330473436337 +0.8495823589351875 +"n" +0.8495823589351875 +0.8947320227357138 +"ə" +0.8947320227357138 +1.0384214079574572 +"s" +1.0384214079574572 +1.1414251864114717 +"ʌ" +1.1414251864114717 +1.283265306122449 +"n" +"TextTier" +"syllable nuclei" +0 +1.283265306122449 +6 +0.10218212453545583 +"The" +0.22258122800352595 +"North" +0.5032518653089622 +"Wind" +0.7307592621220037 +"and" +0.8699218622344483 +"the" +1.0771021376827508 +"Sun" diff --git a/tests/data_for_tests/textgrid/parselmouth/the_north_wind_and_the_sun.utf16.TextGrid b/tests/data_for_tests/textgrid/parselmouth/the_north_wind_and_the_sun.utf16.TextGrid new file mode 100644 index 0000000000000000000000000000000000000000..b210e26edcaa4e1d301b52d62da826a3e7bfc969 GIT binary patch literal 5916 zcmcIoU2fAr5FVAco+8QtknJS3gZOy^@d6K2La5@Qg-A(BLzNULoPh&zEKUG?-%ggM zyW=JkgRI!z_3q~TW@cw*=hts}Dl?hOM0#>1U*$5qJwmH5A8{R{eT^@1R9_C|rJTt* z`ro0yhx=SkWreFJIxo(7CQGdS9xEN>7d<1k`{5#P|St5%_JhVK&~=*fw^l?S1(hdZy$ zZ)Ny;xc(AHSS+7|3)fuAXYijxQi`jOx%vDB#$5cx)Q6m>knSR2ngF>y%aYj*CVk@= zuME>QsFzJHt01Ai&;LGGH#P&{O4VmZH~QRSinkX7qk=FL=8)+(W1RH=wa{B zL>kv4Q`&U2S6lWrud2m9;@T;%u2^qtSGhOfc?8asK@AC1kT&@@cpKrev=s5B+<&%M zp3Q!+2m3B)bM_^;s?G<2c@gxk+$jr}<`3Su*bm+8yP?h5S8I=`mS2Ex2@9ZiKdP`h#N^_ais=ZfJA%L+temTV#l^nagMAmYIeYRf*8KTS zUq{}fAPLQhqIXZ9iMKyEcX1!Pxp%>uGiQ9GturU&UT2hN#4CeDT+ET*c%poXe`A)I z=W$WOM9UbdIRxW!7qmI|I_!0|a%!4Uo_?;uo5$LjeOu%0_|ACc<-IRbi!-BG3G1e( z#vTar~+PqqO z&Bj;7SfOGx5A~lj+*xs1$fr;dnjkvbk%zL_jGR_<`WN!LM14gq->PQVwT#U o>CzH*wcl-CIY+BeiD$#O>zFAr7iShhE4e#=NYTE+j$V7zKeVNPg8%>k literal 0 HcmV?d00001 diff --git a/tests/data_for_tests/textgrid/parselmouth/the_north_wind_and_the_sun.utf8.TextGrid b/tests/data_for_tests/textgrid/parselmouth/the_north_wind_and_the_sun.utf8.TextGrid new file mode 100644 index 00000000..3f5c3caf --- /dev/null +++ b/tests/data_for_tests/textgrid/parselmouth/the_north_wind_and_the_sun.utf8.TextGrid @@ -0,0 +1,102 @@ +File type = "ooTextFile" +Object class = "TextGrid" + +xmin = 0 +xmax = 1.283265306122449 +tiers? +size = 2 +item []: + item [1]: + class = "IntervalTier" + name = "phonemes" + xmin = 0 + xmax = 1.283265306122449 + intervals: size = 16 + intervals [1]: + xmin = 0 + xmax = 0.06834975785384344 + text = "" + intervals [2]: + xmin = 0.06834975785384344 + xmax = 0.08867687921858255 + text = "ð" + intervals [3]: + xmin = 0.08867687921858255 + xmax = 0.11975392053582794 + text = "ə" + intervals [4]: + xmin = 0.11975392053582794 + xmax = 0.2061349091724575 + text = "n" + intervals [5]: + xmin = 0.2061349091724575 + xmax = 0.30880021874037517 + text = "ɔ" + intervals [6]: + xmin = 0.30880021874037517 + xmax = 0.46230666457849673 + text = "θ" + intervals [7]: + xmin = 0.46230666457849673 + xmax = 0.49477342517553885 + text = "w" + intervals [8]: + xmin = 0.49477342517553885 + xmax = 0.5504775558385259 + text = "ɪ" + intervals [9]: + xmin = 0.5504775558385259 + xmax = 0.6864284078871509 + text = "n" + intervals [10]: + xmin = 0.6864284078871509 + xmax = 0.70675552925189 + text = "d" + intervals [11]: + xmin = 0.70675552925189 + xmax = 0.7571330473436337 + text = "ə" + intervals [12]: + xmin = 0.7571330473436337 + xmax = 0.8495823589351875 + text = "n" + intervals [13]: + xmin = 0.8495823589351875 + xmax = 0.8947320227357138 + text = "ə" + intervals [14]: + xmin = 0.8947320227357138 + xmax = 1.0384214079574572 + text = "s" + intervals [15]: + xmin = 1.0384214079574572 + xmax = 1.1414251864114717 + text = "ʌ" + intervals [16]: + xmin = 1.1414251864114717 + xmax = 1.283265306122449 + text = "n" + item [2]: + class = "TextTier" + name = "syllable nuclei" + xmin = 0 + xmax = 1.283265306122449 + points: size = 6 + points [1]: + number = 0.10218212453545583 + mark = "The" + points [2]: + number = 0.22258122800352595 + mark = "North" + points [3]: + number = 0.5032518653089622 + mark = "Wind" + points [4]: + number = 0.7307592621220037 + mark = "and" + points [5]: + number = 0.8699218622344483 + mark = "the" + points [6]: + number = 1.0771021376827508 + mark = "Sun" diff --git a/tests/data_for_tests/textgrid/praatIO/all_tiers_have_the_same_name.TextGrid b/tests/data_for_tests/textgrid/praatIO/all_tiers_have_the_same_name.TextGrid new file mode 100644 index 00000000..62b05948 --- /dev/null +++ b/tests/data_for_tests/textgrid/praatIO/all_tiers_have_the_same_name.TextGrid @@ -0,0 +1,62 @@ +File type = "ooTextFile" +Object class = "TextGrid" + +xmin = 0 +xmax = 1 +tiers? +size = 3 +item []: + item [1]: + class = "IntervalTier" + name = "Mary" + xmin = 0 + xmax = 1 + intervals: size = 3 + intervals [1]: + xmin = 0 + xmax = 0.06588581304037375 + text = "" + intervals [2]: + xmin = 0.06588581304037375 + xmax = 0.2342878321396945 + text = "Mary" + intervals [3]: + xmin = 0.2342878321396945 + xmax = 1 + text = "" + item [2]: + class = "IntervalTier" + name = "Mary" + xmin = 0 + xmax = 1 + intervals: size = 3 + intervals [1]: + xmin = 0 + xmax = 0.19395201918177338 + text = "" + intervals [2]: + xmin = 0.19395201918177338 + xmax = 0.3341189692105493 + text = "Maary" + intervals [3]: + xmin = 0.3341189692105493 + xmax = 1 + text = "" + item [3]: + class = "IntervalTier" + name = "Mary" + xmin = 0 + xmax = 1 + intervals: size = 3 + intervals [1]: + xmin = 0 + xmax = 0.4117654091545475 + text = "" + intervals [2]: + xmin = 0.4117654091545475 + xmax = 0.5186553134930385 + text = "Bob" + intervals [3]: + xmin = 0.5186553134930385 + xmax = 1 + text = "" diff --git a/tests/data_for_tests/textgrid/praatIO/bobby_phones_elan.TextGrid b/tests/data_for_tests/textgrid/praatIO/bobby_phones_elan.TextGrid new file mode 100644 index 00000000..63843ab3 --- /dev/null +++ b/tests/data_for_tests/textgrid/praatIO/bobby_phones_elan.TextGrid @@ -0,0 +1,74 @@ +File type = "ooTextFile" +Object class = "TextGrid" + +xmin = 0.0 +xmax = 1.194625 +tiers? +size = 1 +item []: + item[1]: + class = "IntervalTier" + name = "phone" + xmin = 0.0 + xmax = 1.18979591837 + intervals: size = 15 + intervals [1] + xmin = 0.0124716553288 + xmax = 0.06469123242311078 + text = "" + intervals [2] + xmin = 0.06469123242311078 + xmax = 0.08438971390281873 + text = "B" + intervals [3] + xmin = 0.08438971390281873 + xmax = 0.23285789838876556 + text = "AA1" + intervals [4] + xmin = 0.23285789838876556 + xmax = 0.2788210218414174 + text = "B" + intervals [5] + xmin = 0.2788210218414174 + xmax = 0.41156462585 + text = "IY0" + intervals [6] + xmin = 0.41156462585 + xmax = 0.47094510353588265 + text = "R" + intervals [7] + xmin = 0.47094510353588265 + xmax = 0.521315192744 + text = "IH1" + intervals [8] + xmin = 0.521315192744 + xmax = 0.658052967538796 + text = "PT" + intervals [9] + xmin = 0.658052967538796 + xmax = 0.680952380952 + text = "DH" + intervals [10] + xmin = 0.680952380952 + xmax = 0.740816326531 + text = "AH0" + intervals [11] + xmin = 0.740816326531 + xmax = 0.807647261005538 + text = "L" + intervals [12] + xmin = 0.807647261005538 + xmax = 0.910430839002 + text = "EH1" + intervals [13] + xmin = 0.910430839002 + xmax = 0.980272108844 + text = "JH" + intervals [14] + xmin = 0.980272108844 + xmax = 1.1171482864527198 + text = "ER0" + intervals [15] + xmin = 1.1171482864527198 + xmax = 1.18979591837 + text = "" diff --git a/tests/data_for_tests/textgrid/praatIO/mary.TextGrid b/tests/data_for_tests/textgrid/praatIO/mary.TextGrid new file mode 100644 index 00000000..3e5cdc2d --- /dev/null +++ b/tests/data_for_tests/textgrid/praatIO/mary.TextGrid @@ -0,0 +1,96 @@ +File type = "ooTextFile" +Object class = "TextGrid" + +0 +1.869687 + +3 +"IntervalTier" +"phone" +0 +1.869687 +16 +0 +0.3154201182247563 +"" +0.3154201182247563 +0.38526757369599995 +"m" +0.38526757369599995 +0.4906833231456586 +"ə" +0.4906833231456586 +0.5687114623227726 +"r" +0.5687114623227726 +0.6755499913498981 +"i" +0.6755499913498981 +0.8142925170069999 +"r" +0.8142925170069999 +0.854201814059 +"o" +0.854201814059 +0.9240430839 +"l" +0.9240430839 +0.9839070294779999 +"d" +0.9839070294779999 +1.0164729379083655 +"θ" +1.0164729379083655 +1.063725623583 +"ə" +1.063725623583 +1.1152822781165286 +"b" +1.1152822781165286 +1.2325508617834506 +"œ" +1.2325508617834506 +1.3345876591689074 +"r" +1.3345876591689074 +1.5182538944627297 +"l" +1.5182538944627297 +1.869687 +"" +"IntervalTier" +"word" +0 +1.869687 +6 +0 +0.3154201182247563 +"" +0.3154201182247563 +0.6755499913498981 +"mary" +0.6755499913498981 +0.9839070294779999 +"rolled" +0.9839070294779999 +1.063725623583 +"the" +1.063725623583 +1.5182538944627297 +"barrel" +1.5182538944627297 +1.869687 +"" +"TextTier" +"pitch" +0 +1.869687 +4 +0.5978689404359245 +"120" +0.8264598697308528 +"85" +1.0195797927558785 +"97" +1.2008760470242699 +"104" diff --git a/tests/data_for_tests/textgrid/praatIO/mary_longfile.TextGrid b/tests/data_for_tests/textgrid/praatIO/mary_longfile.TextGrid new file mode 100644 index 0000000000000000000000000000000000000000..1f9a663bbca581bcb63d83c73f14c314693d125c GIT binary patch literal 7124 zcmeHMO>fgc5FOywUlHX8AUm-W2k~(O^~8Z*5aQ63N(9ohN&*yq8NUvAZ#PTq-Eo`g zsuzST$M$-6J#S`q=I!LyZ+R&TS<1N#WGmn0%H2J~(@-vPoZ$HyZ$hu39LZ~$%O|v- zp?!egrJTwJ$3XnJFy@u4G4}_|bcEwtF7UM4^(t`nZKrtSN0#zf8kxupXCqVBV~a88 zm}es|@ch(Cv%u39=X2L%gC0N7?&lc0wgujw<9h>nC-3EnYa8H~NBdpr-X87VgdQ5E zV@S?9*YXu;m(YusYlyMw_!atG{FP}4t$u_qS7_1uYr5G>fk` zzDnp?Ly(T8F8lNqzI+IdcHcdo0=6Gx{#q&-NrAf(J}|;j!4qmo=`9;Z$5-N{p9UY) z^{k^B^>{(}MvN*?(ykpn?gj`)z(}N1AmtjUar&cHEfl1Skze%qn2vpX2kbf0rsyl6 zEP%KH?lH7LFQ}ZwYZnbP@9e?;@U01ZWyYQ(Yl@s+%{WJ2uW(Q7^!OKu;*!PV zYcuj3SySZR)4A5YhnGOk`^KH63Hic=d}2nPBWsGhakd@-=LkMt!qSEF{;<~X&*oDz z@*G(;X%EIq~59~PUEA8P&ed{yFmWOOO#3ORw&a;#?p%&&|qca^q_oeY)SWa97=_t>HK zR-VA0O}YBfPa}34|H_qC!%iN??8JP)e8n7XVrIr1UE(Vidwa}wDO0S>%4B7pg-&s0 z3vJ9cTXvos?QCOZ#GE5-irq62dNVzHg4v0mIhrjd><``;vD;V~vFAveVkZkFPBJrE zd*-Z8X2NJ!sj(`=o|ucgzeurr60y=^}Iaoejm0*+cOz?g_T=&EzY==lM`apbLa7EAh+eR ze=n2ut>ZM4o$TtcwhVUVs`6*>GCjK7PY%1niuT^CRmOFu*B*07-df2Qw1f&?PDY%itEyn5pu^%`8+RRjhZ_H zzZcRNMVK*`mSYZ+wa@3m%&Dwg|JUa^ aeFijxX literal 0 HcmV?d00001 diff --git a/tests/data_for_tests/textgrid/praatIO/mary_longfile_with_negative_zero.TextGrid b/tests/data_for_tests/textgrid/praatIO/mary_longfile_with_negative_zero.TextGrid new file mode 100644 index 0000000000000000000000000000000000000000..8ab0b66e789dd783331afaedd531055b87170956 GIT binary patch literal 7136 zcmeHMO>Yx15FOywUlHXF$|jra2JvwN^~8Z*P{pAsl?bG1wFyx8W&AqeJv$~@d$wfa zazV&Svg@_I@6C*7#*-gE<%P^;As5nr}9=FyS^U2*>;~5?&;CyNf@EA zItF&G*~n+`T|hD(S08iJ`Adws{Eev(xxR-qm*~;Q>(bfc@mCymyR_mNl=v>{m6JtV zVg;nFaY*M`Sm5fN>4p zBS?W(P&$o|okuWe{r(#h_tMNg2W!flR!v_=TQ6}p zw{-4xZ{Y=)^SXAQ!o+-TVm>i5&%v59ubr-k;5mei7tr*;S%28;e^2L?nRyOYjrrEM z`;`aI$5|L(nRHzrz}Sv4WlG(a&}s>*9yq@?HCgn+#Byq8X~m~8q_?Kurv1{I8Ko=y z`exB7vqWvY9(azc))nY7;VbTj4`^vTP*taob4!6u8hje%6t}D;g>zsG5c)Md1lnJEh{7E z9JDESpNY_#Y1tF3PX3J1>@jh__u9zamX(ov4%(DEvrzJ6W=3sKoz|I|(A$-&uL`~= zK84t_GTNKf)^pIN+?kQk-p7yu8JB3>=iKpAd~1HXi@Pm~ac5b%=b%lw$JK|xEB=jj z+>Tzm6^1U)mDWv;qqDlL!te$4g~nA|H2sh%W2rwGhw4kr$}W#IKC{-+t}p1GBAxla zx)Ahp_tk}*^Gj^&tU)!X6}D|Y*pjYSTP!309K>xq=4v_FU(CP3PFuK|sV!@@#k5@1 zZ5bSz5yjlssAG23Fm=t_qZxAxYkrxs&4~*!>zT9o2Fz{o?B9!KUHdqVW+$6ktSyRN zyXyQotW5VV)HChuuH)D>_O!QOtwOHnd%JV?iCs(i1oSh=rP<);9%pXJyqZ-&=32gD zz+To5Za+zSPyBJSS|52EtLD1oVzga(N$}%3TIPj3_Y$Aal~}3f`M{ruwA>@C7^CH! z{jhdfU06BA>it~-l&qyw!jvZ~>prLFyDeC`!hTr)%7R Oy>b-GmDQ2bqx}V`CjUhM literal 0 HcmV?d00001 diff --git a/tests/data_for_tests/textgrid/praatIO/mary_with_constrained_tier_times.TextGrid b/tests/data_for_tests/textgrid/praatIO/mary_with_constrained_tier_times.TextGrid new file mode 100644 index 00000000..82bd051c --- /dev/null +++ b/tests/data_for_tests/textgrid/praatIO/mary_with_constrained_tier_times.TextGrid @@ -0,0 +1,84 @@ +File type = "ooTextFile" +Object class = "TextGrid" + +0 +1.869687 + +3 +"IntervalTier" +"phone" +0.3154201182247563 +1.5182538944627297 +14 +0.3154201182247563 +0.38526757369599995 +"m" +0.38526757369599995 +0.4906833231456586 +"ə" +0.4906833231456586 +0.5687114623227726 +"r" +0.5687114623227726 +0.6755499913498981 +"i" +0.6755499913498981 +0.8142925170069999 +"r" +0.8142925170069999 +0.854201814059 +"o" +0.854201814059 +0.9240430839 +"l" +0.9240430839 +0.9839070294779999 +"d" +0.9839070294779999 +1.0164729379083655 +"θ" +1.0164729379083655 +1.063725623583 +"ə" +1.063725623583 +1.1152822781165286 +"b" +1.1152822781165286 +1.2325508617834506 +"œ" +1.2325508617834506 +1.3345876591689074 +"r" +1.3345876591689074 +1.5182538944627297 +"l" +"IntervalTier" +"word" +0.3154201182247563 +1.5182538944627297 +4 +0.3154201182247563 +0.6755499913498981 +"mary" +0.6755499913498981 +0.9839070294779999 +"rolled" +0.9839070294779999 +1.063725623583 +"the" +1.063725623583 +1.5182538944627297 +"barrel" +"TextTier" +"pitch" +0.3154201182247563 +1.5182538944627297 +4 +0.5978689404359245 +"120" +0.8264598697308528 +"85" +1.0195797927558785 +"97" +1.2008760470242699 +"104" diff --git a/tests/data_for_tests/textgrid/praatIO/mary_with_negative_zero.TextGrid b/tests/data_for_tests/textgrid/praatIO/mary_with_negative_zero.TextGrid new file mode 100644 index 00000000..625c028a --- /dev/null +++ b/tests/data_for_tests/textgrid/praatIO/mary_with_negative_zero.TextGrid @@ -0,0 +1,96 @@ +File type = "ooTextFile" +Object class = "TextGrid" + +-0 +1.869687 + +3 +"IntervalTier" +"phone" +-0 +1.869687 +16 +-0 +0.3154201182247563 +"" +0.3154201182247563 +0.38526757369599995 +"m" +0.38526757369599995 +0.4906833231456586 +"ə" +0.4906833231456586 +0.5687114623227726 +"r" +0.5687114623227726 +0.6755499913498981 +"i" +0.6755499913498981 +0.8142925170069999 +"r" +0.8142925170069999 +0.854201814059 +"o" +0.854201814059 +0.9240430839 +"l" +0.9240430839 +0.9839070294779999 +"d" +0.9839070294779999 +1.0164729379083655 +"θ" +1.0164729379083655 +1.063725623583 +"ə" +1.063725623583 +1.1152822781165286 +"b" +1.1152822781165286 +1.2325508617834506 +"œ" +1.2325508617834506 +1.3345876591689074 +"r" +1.3345876591689074 +1.5182538944627297 +"l" +1.5182538944627297 +1.869687 +"" +"IntervalTier" +"word" +-0 +1.869687 +6 +-0 +0.3154201182247563 +"" +0.3154201182247563 +0.6755499913498981 +"mary" +0.6755499913498981 +0.9839070294779999 +"rolled" +0.9839070294779999 +1.063725623583 +"the" +1.063725623583 +1.5182538944627297 +"barrel" +1.5182538944627297 +1.869687 +"" +"TextTier" +"pitch" +-0 +1.869687 +4 +0.5978689404359245 +"120" +0.8264598697308528 +"85" +1.0195797927558785 +"97" +1.2008760470242699 +"104" diff --git a/tests/data_for_tests/textgrid/textgrids.json b/tests/data_for_tests/textgrid/textgrids.json new file mode 100644 index 00000000..3e63defe --- /dev/null +++ b/tests/data_for_tests/textgrid/textgrids.json @@ -0,0 +1,27 @@ +[ + { + "path": "parselmouth/the_north_wind_and_the_sun.bin.TextGrid", + "format": "full", + "encoding": "bin" + }, + { + "path": "parselmouth/the_north_wind_and_the_sun.short.utf8.TextGrid", + "format": "short", + "encoding": "utf-8" + }, + { + "path": "parselmouth/the_north_wind_and_the_sun.short.utf16.TextGrid", + "format": "short", + "encoding": "utf-16" + }, + { + "path": "parselmouth/the_north_wind_and_the_sun.utf8.TextGrid", + "format": "full", + "encoding": "utf-8" + }, + { + "path": "parselmouth/the_north_wind_and_the_sun.utf16.TextGrid", + "format": "full", + "encoding": "utf-16" + } +] diff --git a/tests/fixtures/textgrid.py b/tests/fixtures/textgrid.py index ccaecb29..1fec1e50 100644 --- a/tests/fixtures/textgrid.py +++ b/tests/fixtures/textgrid.py @@ -13,3 +13,30 @@ def textgrid_paths(): @pytest.fixture(params=TEXTGRID_PATHS) def a_textgrid_path(request): return request.param + + +TEXTGRID_ROOT = TEST_DATA_ROOT / 'textgrid' +PARSE_TEXTGRID_PATHS = TEXTGRID_ROOT.glob('**/*TextGrid') + + +@pytest.fixture(params=PARSE_TEXTGRID_PATHS) +def a_parse_textgrid_path(request): + return request.param + + +TEXTGRIDS_WITH_EMPTY_INTERVALS_PATHS = [ + TEXTGRID_ROOT / path + for path in ( + 'calhoun-et-al-2022/BroadFocusAlofa.TextGrid', + 'parselmouth/the_north_wind_and_the_sun.short.utf8.TextGrid', + 'praatIO/mary.TextGrid' + ) +] +@pytest.fixture(params=TEXTGRIDS_WITH_EMPTY_INTERVALS_PATHS) +def a_textgrid_with_empty_intervals_path(request): + return request.param + + +@pytest.fixture(params=(True, False)) +def keep_empty(request): + return request.param diff --git a/tests/test_formats/test_seq/test_textgrid.py b/tests/test_formats/test_seq/test_textgrid.py deleted file mode 100644 index 15dee751..00000000 --- a/tests/test_formats/test_seq/test_textgrid.py +++ /dev/null @@ -1,90 +0,0 @@ -import numpy as np -import pytest - -import crowsetta.formats - -from .asserts import assert_rounded_correct_num_decimals - - -def test_from_file(a_textgrid_path): - textgrid = crowsetta.formats.seq.TextGrid.from_file(annot_path=a_textgrid_path) - assert isinstance(textgrid, crowsetta.formats.seq.TextGrid) - - -def test_from_file_str(a_textgrid_path): - a_textgrid_path_str = str(a_textgrid_path) - textgrid = crowsetta.formats.seq.TextGrid.from_file(annot_path=a_textgrid_path_str) - assert isinstance(textgrid, crowsetta.formats.seq.TextGrid) - - -def test_to_seq(a_textgrid_path): - textgrid = crowsetta.formats.seq.TextGrid.from_file(annot_path=a_textgrid_path) - seq = textgrid.to_seq() - assert isinstance(seq, crowsetta.Sequence) - - -@pytest.mark.parametrize( - "decimals", - [ - 1, - 2, - 3, - 4, - 5, - ], -) -def test_to_seq_round_times_true(test_data_root, a_textgrid_path, decimals): - textgrid = crowsetta.formats.seq.TextGrid.from_file(annot_path=a_textgrid_path) - seq = textgrid.to_seq(round_times=True, decimals=decimals) - assert_rounded_correct_num_decimals(seq.onsets_s, decimals) - assert_rounded_correct_num_decimals(seq.offsets_s, decimals) - - -def test_to_seq_round_times_false(test_data_root, a_textgrid_path): - interval_tier = 0 - textgrid = crowsetta.formats.seq.TextGrid.from_file(annot_path=a_textgrid_path) - seq = textgrid.to_seq(interval_tier=interval_tier, round_times=False) - - intv_tier = textgrid.textgrid[interval_tier] - onsets_s = np.asarray([interval.minTime for interval in intv_tier]) - offsets_s = np.asarray([interval.maxTime for interval in intv_tier]) - - assert np.all(np.allclose(seq.onsets_s, onsets_s)) - assert np.all(np.allclose(seq.offsets_s, offsets_s)) - - -def test_to_annot(a_textgrid_path): - textgrid = crowsetta.formats.seq.TextGrid.from_file(annot_path=a_textgrid_path) - annot = textgrid.to_annot() - assert isinstance(annot, crowsetta.Annotation) - assert hasattr(annot, "seq") - - -@pytest.mark.parametrize( - "decimals", - [ - 1, - 2, - 3, - 4, - 5, - ], -) -def test_to_annot_round_times_true(test_data_root, a_textgrid_path, decimals): - textgrid = crowsetta.formats.seq.TextGrid.from_file(annot_path=a_textgrid_path) - annot = textgrid.to_annot(round_times=True, decimals=decimals) - assert_rounded_correct_num_decimals(annot.seq.onsets_s, decimals) - assert_rounded_correct_num_decimals(annot.seq.offsets_s, decimals) - - -def test_to_annot_round_times_false(test_data_root, a_textgrid_path): - interval_tier = 0 - textgrid = crowsetta.formats.seq.TextGrid.from_file(annot_path=a_textgrid_path) - annot = textgrid.to_annot(interval_tier=interval_tier, round_times=False) - - intv_tier = textgrid.textgrid[interval_tier] - onsets_s = np.asarray([interval.minTime for interval in intv_tier]) - offsets_s = np.asarray([interval.maxTime for interval in intv_tier]) - - assert np.all(np.allclose(annot.seq.onsets_s, onsets_s)) - assert np.all(np.allclose(annot.seq.offsets_s, offsets_s)) diff --git a/src/crowsetta/_vendor/__init__.py b/tests/test_formats/test_seq/test_textgrid/__init__.py similarity index 100% rename from src/crowsetta/_vendor/__init__.py rename to tests/test_formats/test_seq/test_textgrid/__init__.py diff --git a/tests/test_formats/test_seq/test_textgrid/test_classes.py b/tests/test_formats/test_seq/test_textgrid/test_classes.py new file mode 100644 index 00000000..836af897 --- /dev/null +++ b/tests/test_formats/test_seq/test_textgrid/test_classes.py @@ -0,0 +1,132 @@ +import pytest + + +import crowsetta.formats.seq.textgrid.classes + + +@pytest.mark.parametrize( + 'xmin, xmax, text', + [ + (0., 0.3349812397, 'm'), + ] +) +def test_Interval(xmin, xmax, text): + interval = crowsetta.formats.seq.textgrid.classes.Interval(xmin, xmax, text) + assert isinstance(interval, crowsetta.formats.seq.textgrid.classes.Interval) + for attr_name, expected_attr_val in zip( + ('xmin', 'xmax', 'text'), + (xmin, xmax, text) + ): + assert hasattr(interval, attr_name) + assert getattr(interval, attr_name) == expected_attr_val + + +@pytest.mark.parametrize( + 'xmin, xmax, text, expected_error', + [ + # xmin > xmax + (0.3349812397, 0, 'm', ValueError), + # xmin is negative + (-0.3, 0, 'm', ValueError), + # xmax is negative + (0., -0.3, 'm', ValueError), + ] +) +def test_Interval_raises(xmin, xmax, text, expected_error): + with pytest.raises(expected_error): + crowsetta.formats.seq.textgrid.classes.Interval(xmin, xmax, text) + + +@pytest.mark.parametrize( + 'xmin, xmax, name, intervals', + [ + ( + 0., + 2.3, + 'phones', + [ + crowsetta.formats.seq.textgrid.classes.Interval(0., 0.35, 'fo'), + crowsetta.formats.seq.textgrid.classes.Interval(0.35, 1.25, 'n'), + crowsetta.formats.seq.textgrid.classes.Interval(1.25, 2.02, 's') + ] + ) + ] +) +def test_IntervalTier(xmin, xmax, name, intervals): + interval_tier = crowsetta.formats.seq.textgrid.classes.IntervalTier( + xmin=xmin, xmax=xmax, name=name, intervals=intervals) + for attr_name, expected_attr_val in zip( + ('xmin', 'xmax', 'name', 'intervals'), + (xmin, xmax, name, intervals) + ): + assert hasattr(interval_tier, attr_name) + assert getattr(interval_tier, attr_name) == expected_attr_val + + +@pytest.mark.parametrize( + 'xmin, xmax, name, intervals, expected_error', + [ + ( + 0., + 2.3, + 'phones', + # intervals overlap, should raise error + [ + crowsetta.formats.seq.textgrid.classes.Interval(0., 0.35, 'fo'), + crowsetta.formats.seq.textgrid.classes.Interval(0.1, 1.25, 'n'), + crowsetta.formats.seq.textgrid.classes.Interval(1.23, 2.02, 's') + ], + ValueError + ) + ] +) +def test_IntervalTier_raises(xmin, xmax, name, intervals, expected_error): + with pytest.raises(expected_error): + crowsetta.formats.seq.textgrid.classes.IntervalTier( + xmin=xmin, xmax=xmax, name=name, intervals=intervals) + + +@pytest.mark.parametrize( + 'number, mark', + [ + (0., 'fo'), + (0.35, 'n'), + (1.25, 's') + ] +) +def test_Point(number, mark): + point = crowsetta.formats.seq.textgrid.classes.Point(number, mark) + for attr_name, expected_attr_val in zip( + ('number', 'mark'), + (number, mark) + ): + assert hasattr(point, attr_name) + assert getattr(point, attr_name) == expected_attr_val + + + + +@pytest.mark.parametrize( + 'xmin, xmax, name, points', + [ + ( + 0., + 2.3, + 'phones', + [ + crowsetta.formats.seq.textgrid.classes.Point(0., 'fo'), + crowsetta.formats.seq.textgrid.classes.Point(0.35, 'n'), + crowsetta.formats.seq.textgrid.classes.Point(1.25, 's') + ] + ) + ] +) +def test_PointTier(xmin, xmax, name, points): + point_tier = crowsetta.formats.seq.textgrid.classes.PointTier( + xmin=xmin, xmax=xmax, name=name, points=points) + for attr_name, expected_attr_val in zip( + ('xmin', 'xmax', 'name', 'points'), + (xmin, xmax, name, points) + ): + assert hasattr(point_tier, attr_name) + assert getattr(point_tier, attr_name) == expected_attr_val diff --git a/tests/test_formats/test_seq/test_textgrid/test_parse.py b/tests/test_formats/test_seq/test_textgrid/test_parse.py new file mode 100644 index 00000000..a9287f2e --- /dev/null +++ b/tests/test_formats/test_seq/test_textgrid/test_parse.py @@ -0,0 +1,163 @@ +import math +import re +from unittest.mock import mock_open, patch + +import pytest + +from crowsetta.formats.seq.textgrid.classes import IntervalTier, PointTier +import crowsetta.formats.seq.textgrid.parse + + +@pytest.fixture +def fp_factory(): + """Factory that returns a mocked open file ``fp`` + that will return a specified string when ``readline`` when + ``fp.readline`` is called""" + + def _fp(readline): + with patch('builtins.open', mock_open(read_data=readline)) as mock_file: + with open('fake.TextGrid') as fp: + return fp + + return _fp + + +@pytest.mark.parametrize( + 'readline, pat, expected_result', + [ + # float + ('xmin = 0 \n', re.compile('([\\d.]+)\\s*$'), '0'), + ('xmax = 2.4360509767904546 \n', re.compile('([\\d.]+)\\s*$'), '2.4360509767904546'), + # int + ('size = 3 \n', re.compile('([\\d]+)\\s*$'), '3'), + (' points: size = 5 \n', re.compile('([\\d]+)\\s*$'), '5'), + # string + (' class = "TextTier" \n', re.compile('"(.*)"\\s*$'), 'TextTier'), + (' name = "Tones" \n', re.compile('"(.*)"\\s*$'), 'Tones') + ] +) +def test_search_next_line(readline, pat, expected_result, fp_factory): + fp = fp_factory(readline) + out = crowsetta.formats.seq.textgrid.parse.search_next_line(fp, pat) + assert out == expected_result + + +@pytest.mark.parametrize( + 'readline, expected_result', + [ + # float + ('xmin = 0 \n', 0.0), + ('xmax = 2.4360509767904546 \n', 2.4360509767904546), + ] +) +def test_get_float_from_next_line(readline, expected_result, fp_factory): + fp = fp_factory(readline) + out = crowsetta.formats.seq.textgrid.parse.get_float_from_next_line(fp) + assert isinstance(out, float) + assert math.isclose(out, expected_result) + + +@pytest.mark.parametrize( + 'readline, expected_result', + [ + # int + ('size = 3 \n', 3), + (' points: size = 5 \n', 5), + ] +) +def test_get_int_from_next_line(readline, expected_result, fp_factory): + fp = fp_factory(readline) + out = crowsetta.formats.seq.textgrid.parse.get_int_from_next_line(fp) + assert isinstance(out, int) + assert out == expected_result + + +@pytest.mark.parametrize( + 'readline, expected_result', + [ + (' class = "TextTier" \n', 'TextTier'), + (' name = "Tones" \n', 'Tones') + ] +) +def test_get_str_from_next_line(readline, expected_result, fp_factory): + fp = fp_factory(readline) + out = crowsetta.formats.seq.textgrid.parse.get_str_from_next_line(fp) + assert isinstance(out, str) + assert out == expected_result + + +def test_parse_fp(a_parse_textgrid_path): + try: + with a_parse_textgrid_path.open("r", encoding="utf-16") as fp: + out = crowsetta.formats.seq.textgrid.parse.parse_fp(fp) + except (UnicodeError, UnicodeDecodeError): + with a_parse_textgrid_path.open("r", encoding="utf-8") as fp: + out = crowsetta.formats.seq.textgrid.parse.parse_fp(fp) + + assert isinstance(out, dict) + for expected_key, expected_type in zip( + ('xmin', 'xmax', 'tiers'), (float, float, list) + ): + assert expected_key in out + assert isinstance(out[expected_key], expected_type) + + assert all( + [isinstance(tier, (IntervalTier, PointTier)) for tier in out['tiers']] + ) + + +def test_parse_fp_keep_empty(a_textgrid_with_empty_intervals_path, keep_empty): + try: + with a_textgrid_with_empty_intervals_path.open("r", encoding="utf-16") as fp: + out = crowsetta.formats.seq.textgrid.parse.parse_fp(fp, keep_empty) + except (UnicodeError, UnicodeDecodeError): + with a_textgrid_with_empty_intervals_path.open("r", encoding="utf-8") as fp: + out = crowsetta.formats.seq.textgrid.parse.parse_fp(fp, keep_empty) + + tiers = [tier for tier in out['tiers'] if isinstance(tier, IntervalTier)] + + if keep_empty: + assert any( + [interval.text == "" + for tier in tiers + for interval in tier] + ) + else: + assert not any( + [interval.text == "" + for tier in tiers + for interval in tier] + ) + + +def test_parse(a_parse_textgrid_path): + out = crowsetta.formats.seq.textgrid.parse.parse(a_parse_textgrid_path) + + assert isinstance(out, dict) + for expected_key, expected_type in zip( + ('xmin', 'xmax', 'tiers'), (float, float, list) + ): + assert expected_key in out + assert isinstance(out[expected_key], expected_type) + + assert all( + [isinstance(tier, (IntervalTier, PointTier)) for tier in out['tiers']] + ) + + +def test_parse_keep_empty(a_textgrid_with_empty_intervals_path, keep_empty): + out = crowsetta.formats.seq.textgrid.parse.parse(a_textgrid_with_empty_intervals_path, keep_empty) + tiers = [tier for tier in out['tiers'] if isinstance(tier, IntervalTier)] + + if keep_empty: + assert any( + [interval.text == "" + for tier in tiers + for interval in tier] + ) + else: + assert not any( + [interval.text == "" + for tier in tiers + for interval in tier] + ) diff --git a/tests/test_formats/test_seq/test_textgrid/test_textgrid.py b/tests/test_formats/test_seq/test_textgrid/test_textgrid.py new file mode 100644 index 00000000..77904709 --- /dev/null +++ b/tests/test_formats/test_seq/test_textgrid/test_textgrid.py @@ -0,0 +1,129 @@ +import numpy as np +import pytest + +import crowsetta.formats +from crowsetta.formats.seq.textgrid.classes import IntervalTier + +from ..asserts import assert_rounded_correct_num_decimals + + +def test_from_file(a_textgrid_path): + textgrid = crowsetta.formats.seq.TextGrid.from_file(annot_path=a_textgrid_path) + assert isinstance(textgrid, crowsetta.formats.seq.TextGrid) + + +def test_from_file_str(a_textgrid_path): + a_textgrid_path_str = str(a_textgrid_path) + textgrid = crowsetta.formats.seq.TextGrid.from_file(annot_path=a_textgrid_path_str) + assert isinstance(textgrid, crowsetta.formats.seq.TextGrid) + + +def test_from_file_keep_empty(a_textgrid_with_empty_intervals_path, keep_empty): + textgrid = crowsetta.formats.seq.TextGrid.from_file(annot_path=a_textgrid_with_empty_intervals_path, + keep_empty=keep_empty) + tiers = [tier for tier in textgrid.tiers if isinstance(tier, IntervalTier)] + + if keep_empty: + assert any( + [interval.text == "" + for tier in tiers + for interval in tier] + ) + else: + assert not any( + [interval.text == "" + for tier in tiers + for interval in tier] + ) + + +def test_to_seq(a_textgrid_path): + textgrid = crowsetta.formats.seq.TextGrid.from_file(annot_path=a_textgrid_path) + seq = textgrid.to_seq() + assert isinstance(seq, (crowsetta.Sequence, list)) + if isinstance(seq, list): + assert all([isinstance(seq_, crowsetta.Sequence) for seq_ in seq]) + + +@pytest.mark.parametrize( + "decimals", + [ + 1, + 2, + 3, + 4, + 5, + ], +) +def test_to_seq_round_times_true(a_textgrid_path, decimals): + textgrid = crowsetta.formats.seq.TextGrid.from_file(annot_path=a_textgrid_path) + seq = textgrid.to_seq(round_times=True, decimals=decimals) + + if isinstance(seq, crowsetta.Sequence): + assert_rounded_correct_num_decimals(seq.onsets_s, decimals) + assert_rounded_correct_num_decimals(seq.offsets_s, decimals) + elif isinstance(seq, list): + for seq_ in seq: + assert_rounded_correct_num_decimals(seq_.onsets_s, decimals) + assert_rounded_correct_num_decimals(seq_.offsets_s, decimals) + + +def test_to_seq_round_times_false(a_textgrid_path): + tier = 0 + textgrid = crowsetta.formats.seq.TextGrid.from_file(annot_path=a_textgrid_path) + seq = textgrid.to_seq(tier=tier, round_times=False) + + intv_tier = textgrid[tier] + onsets_s = np.asarray([interval.xmin for interval in intv_tier]) + offsets_s = np.asarray([interval.xmax for interval in intv_tier]) + + if isinstance(seq, crowsetta.Sequence): + assert np.all(np.allclose(seq.onsets_s, onsets_s)) + assert np.all(np.allclose(seq.offsets_s, offsets_s)) + elif isinstance(seq, list): + breakpoint() + for seq_ in seq: + assert np.all(np.allclose(seq_.onsets_s, onsets_s)) + assert np.all(np.allclose(seq_.offsets_s, offsets_s)) + + +def test_to_annot(a_textgrid_path): + textgrid = crowsetta.formats.seq.TextGrid.from_file(annot_path=a_textgrid_path) + annot = textgrid.to_annot() + assert isinstance(annot, crowsetta.Annotation) + assert hasattr(annot, "seq") + + +@pytest.mark.parametrize( + "decimals", + [ + 1, + 2, + 3, + 4, + 5, + ], +) +def test_to_annot_round_times_true(a_textgrid_path, decimals): + textgrid = crowsetta.formats.seq.TextGrid.from_file(annot_path=a_textgrid_path) + annot = textgrid.to_annot(round_times=True, decimals=decimals) + if isinstance(annot.seq, crowsetta.Sequence): + assert_rounded_correct_num_decimals(annot.seq.onsets_s, decimals) + assert_rounded_correct_num_decimals(annot.seq.offsets_s, decimals) + elif isinstance(annot.seq, list): + for seq_ in annot.seq: + assert_rounded_correct_num_decimals(seq_.onsets_s, decimals) + assert_rounded_correct_num_decimals(seq_.offsets_s, decimals) + + +def test_to_annot_round_times_false(a_textgrid_path): + tier = 0 + textgrid = crowsetta.formats.seq.TextGrid.from_file(annot_path=a_textgrid_path) + annot = textgrid.to_annot(tier=tier, round_times=False) + + intv_tier = textgrid[tier] + onsets_s = np.asarray([interval.xmin for interval in intv_tier]) + offsets_s = np.asarray([interval.xmax for interval in intv_tier]) + + assert np.all(np.allclose(annot.seq.onsets_s, onsets_s)) + assert np.all(np.allclose(annot.seq.offsets_s, offsets_s))