Skip to content

Commit

Permalink
Bump up praatio, scipy version and fix conda installation issue
Browse files Browse the repository at this point in the history
  • Loading branch information
lxy2304 committed Jan 7, 2025
1 parent 6c86226 commit 663610f
Show file tree
Hide file tree
Showing 11 changed files with 57 additions and 56 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ var/
*.egg-info/
.installed.cfg
*.egg
_version.py

# Mac files
.DS_Store
Expand Down
2 changes: 1 addition & 1 deletion docs/source/getting_started.rst
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ If you don't have conda installed on your device:

To install via pip:

#. Create the a conda environment via :code:`conda create -n polyglotdb -c conda-forge openjdk=21 pip`
#. Create the a conda environment via :code:`conda create -n polyglotdb -c conda-forge openjdk=21 python=3.12 librosa`
#. Activate conda environment :code:`conda activate polyglotdb`
#. Install PolyglotDB via :code:`pip install polyglotdb`, which will install the ``pgdb`` utility that can be run inside your conda environment
and manages a local database.
Expand Down
4 changes: 2 additions & 2 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ dependencies:
- openjdk=21
- pip
- librosa
- scipy<=1.12.0
- praatio<=5.0
- scipy
- praatio
- textgrid
- influxdb
- tqdm
Expand Down
12 changes: 6 additions & 6 deletions polyglotdb/corpus/importable.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,21 +65,21 @@ def _corpus_create(tx, corpus_name):

with self.graph_driver.session() as session:
try:
session.write_transaction(_corpus_index)
session.execute_write(_corpus_index)
except neo4j.exceptions.ClientError as e:
if e.code != 'Neo.ClientError.Schema.EquivalentSchemaRuleAlreadyExists':
raise
try:
session.write_transaction(_discourse_index)
session.execute_write(_discourse_index)
except neo4j.exceptions.ClientError as e:
if e.code != 'Neo.ClientError.Schema.EquivalentSchemaRuleAlreadyExists':
raise
try:
session.write_transaction(_speaker_index)
session.execute_write(_speaker_index)
except neo4j.exceptions.ClientError as e:
if e.code != 'Neo.ClientError.Schema.EquivalentSchemaRuleAlreadyExists':
raise
session.write_transaction(_corpus_create, self.corpus_name)
session.execute_write(_corpus_create, self.corpus_name)

def finalize_import(self, speakers, token_headers, hierarchy, call_back=None, stop_check=None):
"""
Expand Down Expand Up @@ -125,9 +125,9 @@ def _create_speaker_discourse(tx, speaker_name, discourse_name, channel):
with self.graph_driver.session() as session:
for s in data.speakers:
if s in data.speaker_channel_mapping:
session.write_transaction(_create_speaker_discourse, s, data.name, data.speaker_channel_mapping[s])
session.execute_write(_create_speaker_discourse, s, data.name, data.speaker_channel_mapping[s])
else:
session.write_transaction(_create_speaker_discourse, s, data.name, 0)
session.execute_write(_create_speaker_discourse, s, data.name, 0)
data.corpus_name = self.corpus_name
data_to_graph_csvs(self, data)
self.hierarchy.update(data.hierarchy)
Expand Down
10 changes: 5 additions & 5 deletions polyglotdb/io/importer/from_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ def _end_index(tx, at):
else:
rel_path = 'file:///{}'.format(make_path_safe(path))
try:
session.write_transaction(_unique_function, at)
session.execute_write(_unique_function, at)
except neo4j.exceptions.ClientError as e:
if e.code != 'Neo.ClientError.Schema.EquivalentSchemaRuleAlreadyExists':
raise
Expand All @@ -161,14 +161,14 @@ def _end_index(tx, at):
continue
properties.append(prop_temp.format(name=x))
try:
session.write_transaction(_prop_index, at, x)
session.execute_write(_prop_index, at, x)
except neo4j.exceptions.ClientError as e:
if e.code != 'Neo.ClientError.Schema.EquivalentSchemaRuleAlreadyExists':
raise
if 'label' in token_headers[at]:
properties.append('label_insensitive: toLower(csvLine.label)')
try:
session.write_transaction(_label_index, at)
session.execute_write(_label_index, at)
except neo4j.exceptions.ClientError as e:
if e.code != 'Neo.ClientError.Schema.EquivalentSchemaRuleAlreadyExists':
raise
Expand Down Expand Up @@ -233,12 +233,12 @@ def _end_index(tx, at):
speaker_statements.append((node_statement, rel_statement, path, at, s))
begin = time.time()
try:
session.write_transaction(_begin_index, at)
session.execute_write(_begin_index, at)
except neo4j.exceptions.ClientError as e:
if e.code != 'Neo.ClientError.Schema.EquivalentSchemaRuleAlreadyExists':
raise
try:
session.write_transaction(_end_index, at)
session.execute_write(_end_index, at)
except neo4j.exceptions.ClientError as e:
if e.code != 'Neo.ClientError.Schema.EquivalentSchemaRuleAlreadyExists':
raise
Expand Down
30 changes: 15 additions & 15 deletions polyglotdb/io/inspect/textgrid.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,9 +82,9 @@ def uniqueLabels(tier):
label from the tier
"""
if isinstance(tier, textgrid.IntervalTier):
return set(x for _, _, x in tier.entryList)
return set(x for _, _, x in tier.entries)
else:
return set(x for _, x in tier.entryList)
return set(x for _, x in tier.entries)


def average_duration(tier):
Expand All @@ -103,9 +103,9 @@ def average_duration(tier):
"""

if isinstance(tier, textgrid.IntervalTier):
return sum(float(end) - float(begin) for (begin, end, _) in tier.entryList) / len(tier.entryList)
return sum(float(end) - float(begin) for (begin, end, _) in tier.entries) / len(tier.entries)
else:
return float(tier.maxTime) / len(tier.entryList)
return float(tier.maxTime) / len(tier.entries)


def averageLabelLen(tier):
Expand Down Expand Up @@ -168,9 +168,9 @@ def guess_tiers(tg):
"""
tier_properties = {}
tier_guesses = {}
for i, tier_name in enumerate(tg.tierNameList):
ti = tg.tierDict[tier_name]
if len(ti.entryList) == 0:
for i, tier_name in enumerate(tg.tierNames):
ti = tg.getTier(tier_name)
if len(ti.entries) == 0:
continue
ti.maxTime = tg.maxTimestamp
tier_properties[ti.name] = (i, average_duration(ti))
Expand Down Expand Up @@ -228,8 +228,8 @@ def inspect_textgrid(path):
tg = textgrid.openTextgrid(t, includeEmptyIntervals=True)
if len(anno_types) == 0:
tier_guesses, hierarchy = guess_tiers(tg)
for i, tier_name in enumerate(tg.tierNameList):
ti = tg.tierDict[tier_name]
for i, tier_name in enumerate(tg.tierNames):
ti = tg.getTier(tier_name)
if tier_name not in tier_guesses:
a = OrthographyTier('word', 'word')
a.ignored = True
Expand Down Expand Up @@ -261,19 +261,19 @@ def inspect_textgrid(path):
raise (NotImplementedError)
if not a.ignored:
if isinstance(ti, textgrid.IntervalTier):
a.add(( (text.strip(), begin, end) for (begin, end, text) in ti.entryList), save=False)
a.add(( (text.strip(), begin, end) for (begin, end, text) in ti.entries), save=False)
else:
a.add(((text.strip(), time) for time, text in ti.entryList), save=False)
a.add(((text.strip(), time) for time, text in ti.entries), save=False)
anno_types.append(a)
else:
for i, tier_name in enumerate(tg.tierNameList):
ti = tg.tierDict[tier_name]
for i, tier_name in enumerate(tg.tierNames):
ti = tg.getTier(tier_name)
if anno_types[i].ignored:
continue
if isinstance(ti, textgrid.IntervalTier):
anno_types[i].add(( (text.strip(), begin, end) for (begin, end, text) in ti.entryList), save=False)
anno_types[i].add(( (text.strip(), begin, end) for (begin, end, text) in ti.entries), save=False)
else:
anno_types[i].add(((text.strip(), time) for time, text in ti.entryList), save=False)
anno_types[i].add(((text.strip(), time) for time, text in ti.entries), save=False)

parser = TextgridParser(anno_types, hierarchy)
return parser
30 changes: 15 additions & 15 deletions polyglotdb/io/parsers/aligner.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,20 +58,20 @@ def _is_valid(self, tg):
found_phone = False
invalid = True
multiple_speakers = False
for i, tier_name in enumerate(tg.tierNameList):
for i, tier_name in enumerate(tg.tierNames):
if ' - ' in tier_name:
multiple_speakers = True
break
if multiple_speakers:
if self.speaker_first:
speakers = {tier_name.split(' - ')[0].strip().replace('/', '_').replace('\\', '_') for tier_name in tg.tierNameList if
speakers = {tier_name.split(' - ')[0].strip().replace('/', '_').replace('\\', '_') for tier_name in tg.tierNames if
' - ' in tier_name}
else:
speakers = {tier_name.split(' - ')[1].strip().replace('/', '_').replace('\\', '_') for tier_name in tg.tierNameList if
speakers = {tier_name.split(' - ')[1].strip().replace('/', '_').replace('\\', '_') for tier_name in tg.tierNames if
' - ' in tier_name}
found_words = {x: False for x in speakers}
found_phones = {x: False for x in speakers}
for i, tier_name in enumerate(tg.tierNameList):
for i, tier_name in enumerate(tg.tierNames):
if ' - ' not in tier_name:
continue
if self.speaker_first:
Expand All @@ -87,7 +87,7 @@ def _is_valid(self, tg):
found_word = all(found_words.values())
found_phone = all(found_words.values())
else:
for i, tier_name in enumerate(tg.tierNameList):
for i, tier_name in enumerate(tg.tierNames):
if tier_name.lower().startswith(self.word_label):
found_word = True
elif tier_name.lower().startswith(self.phone_label):
Expand Down Expand Up @@ -130,12 +130,12 @@ def parse_discourse(self, path, types_only=False):
a.speaker = speaker

# Parse the tiers
for i, tier_name in enumerate(tg.tierNameList):
ti = tg.tierDict[tier_name]
for i, tier_name in enumerate(tg.tierNames):
ti = tg.getTier(tier_name)
if tier_name.lower().startswith(self.word_label):
self.annotation_tiers[0].add(( (text.strip(), begin, end) for (begin, end, text) in ti.entryList))
self.annotation_tiers[0].add(( (text.strip(), begin, end) for (begin, end, text) in ti.entries))
elif tier_name.lower().startswith(self.phone_label):
self.annotation_tiers[1].add(( (text.strip(), begin, end) for (begin, end, text) in ti.entryList))
self.annotation_tiers[1].add(( (text.strip(), begin, end) for (begin, end, text) in ti.entries))
pg_annotations = self._parse_annotations(types_only)

data = DiscourseData(name, pg_annotations, self.hierarchy)
Expand All @@ -153,15 +153,15 @@ def parse_discourse(self, path, types_only=False):
if n_channels > 1:
# Figure speaker-channel mapping
n_tiers = 0
for i, tier_name in enumerate(tg.tierNameList):
for i, tier_name in enumerate(tg.tierNames):
try:
speaker, type = tier_name.split(' - ')
except ValueError:
continue
n_tiers += 1
ind = 0
cutoffs = [x / n_channels for x in range(1, n_channels)]
for i, tier_name in enumerate(tg.tierNameList):
for i, tier_name in enumerate(tg.tierNames):
try:
if self.speaker_first:
speaker, type = tier_name.split(' - ')
Expand All @@ -181,8 +181,8 @@ def parse_discourse(self, path, types_only=False):
ind += 1

# Parse the tiers
for i, tier_name in enumerate(tg.tierNameList):
ti = tg.tierDict[tier_name]
for i, tier_name in enumerate(tg.tierNames):
ti = tg.getTier(tier_name)
try:
if self.speaker_first:
speaker, type = tier_name.split(' - ')
Expand All @@ -195,11 +195,11 @@ def parse_discourse(self, path, types_only=False):
type = 'word'
elif type.lower().startswith(self.phone_label):
type = 'phone'
if len(ti.entryList) == 1 and ti.entryList[0][2].strip() == '':
if len(ti.entries) == 1 and ti.entries[0][2].strip() == '':
continue
at = OrthographyTier(type, type)
at.speaker = speaker
at.add(( (text.strip(), begin, end) for (begin, end, text) in ti.entryList))
at.add(( (text.strip(), begin, end) for (begin, end, text) in ti.entries))
self.annotation_tiers.append(at)
pg_annotations = self._parse_annotations(types_only)
data = DiscourseData(name, pg_annotations, self.hierarchy)
Expand Down
6 changes: 3 additions & 3 deletions polyglotdb/io/parsers/labbcat.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,15 +51,15 @@ def load_textgrid(self, path):
try:
tg = textgrid.openTextgrid(path, includeEmptyIntervals=True)
new_tiers = []
dup_tiers_maxes = {k:0 for k,v in Counter([t for t in tg.tierNameList]).items() if v > 1}
dup_tiers_maxes = {k:0 for k,v in Counter([t for t in tg.tierNames]).items() if v > 1}
dup_tiers_inds = {k:0 for k in dup_tiers_maxes.keys()}

for i, t in enumerate(tg.tierNameList):
for i, t in enumerate(tg.tierNames):
if t in dup_tiers_maxes:
if len(t) > dup_tiers_maxes[t]:
dup_tiers_maxes[t] = len(t)
dup_tiers_inds[t] = i
for i, t in enumerate(tg.tierNameList):
for i, t in enumerate(tg.tierNames):
if t in dup_tiers_maxes:
if i != dup_tiers_inds[t]:
continue
Expand Down
10 changes: 5 additions & 5 deletions polyglotdb/io/parsers/textgrid.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def parse_discourse(self, path, types_only=False):
"""
tg = self.load_textgrid(path)

if len(tg.tierNameList) != len(self.annotation_tiers):
if len(tg.tierNames) != len(self.annotation_tiers):
raise (TextGridError(
"The TextGrid ({}) does not have the same number of interval tiers as the number of annotation types specified.".format(
path)))
Expand All @@ -92,12 +92,12 @@ def parse_discourse(self, path, types_only=False):
a.speaker = speaker

# Parse the tiers
for i, tier_name in enumerate(tg.tierNameList):
ti = tg.tierDict[tier_name]
for i, tier_name in enumerate(tg.tierNames):
ti = tg.getTier(tier_name)
if isinstance(ti, textgrid.IntervalTier):
self.annotation_tiers[i].add(( (text.strip(), begin, end) for (begin, end, text) in ti.entryList))
self.annotation_tiers[i].add(( (text.strip(), begin, end) for (begin, end, text) in ti.entries))
else:
self.annotation_tiers[i].add(((text.strip(), time) for time, text in ti.entryList))
self.annotation_tiers[i].add(((text.strip(), time) for time, text in ti.entries))

is_empty_textgrid = True

Expand Down
4 changes: 2 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
neo4j
librosa
scipy ~= 1.12.0
praatio ~= 5.0
scipy
praatio
textgrid
influxdb
tqdm
Expand Down
4 changes: 2 additions & 2 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,14 @@ license_file = LICENSE
packages = find:
install_requires =
neo4j
praatio<=5.0
praatio
textgrid
conch_sounds
librosa
influxdb
tqdm
requests
scipy<=1.12.0
scipy
pywin32; os_name == 'nt'
include_package_data = True

Expand Down

0 comments on commit 663610f

Please sign in to comment.