Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix OrderedDict tests on Python 3.12 (stringTools.parenthesesMatch) #1625

Merged
merged 2 commits into from
Jun 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
125 changes: 124 additions & 1 deletion music21/common/stringTools.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,11 @@
'stripAccents',
'normalizeFilename',
'removePunctuation',
'parenthesesMatch',
'ParenthesesMatch',
]

import dataclasses
import hashlib
import random
import re
Expand Down Expand Up @@ -349,9 +352,129 @@ def removePunctuation(s: str) -> str:
out = s.translate(maketrans)
return out

@dataclasses.dataclass
class ParenthesesMatch:
start: int
end: int
text: str
nested: list[ParenthesesMatch]

def parenthesesMatch(
s: str,
open: str = '(', # pylint: disable=redefined-builtin
close: str = ')',
) -> list[ParenthesesMatch]:
r'''
Utility tool to return a list of parentheses matches for a string using a dataclass
called `ParenthesesMatch` which has indices of the `start` and `end`
of the match, and the `text` of the match, and a set of `nested`
ParenthesesMatch objects (which may have their own nested objects).

>>> st = r'Bologne wrote (a (whole) (lot) \(of\)) sym\(ph(on)ies\) concertantes.'
>>> common.stringTools.parenthesesMatch(st)
[ParenthesesMatch(start=15, end=37, text='a (whole) (lot) \\(of\\)',
nested=[ParenthesesMatch(start=18, end=23, text='whole', nested=[]),
ParenthesesMatch(start=26, end=29, text='lot', nested=[])]),
ParenthesesMatch(start=47, end=49, text='on', nested=[])]

Other brackets can be used:

>>> st = r'[Whammy bars] and [oboes] do [not [mix] very] [well.]'
>>> common.stringTools.parenthesesMatch(st, open='[', close=']')
[ParenthesesMatch(start=1, end=12, text='Whammy bars', nested=[]),
ParenthesesMatch(start=19, end=24, text='oboes', nested=[]),
ParenthesesMatch(start=30, end=44, text='not [mix] very',
nested=[ParenthesesMatch(start=35, end=38, text='mix', nested=[])]),
ParenthesesMatch(start=47, end=52, text='well.', nested=[])]

The `open` and `close` parameters can be multiple characters:

>>> st = r'Did you eat <<beans>> today <<Pythagoreas<<?>>>>'
>>> common.stringTools.parenthesesMatch(st, open='<<', close='>>')
[ParenthesesMatch(start=14, end=19, text='beans', nested=[]),
ParenthesesMatch(start=30, end=46, text='Pythagoreas<<?>>',
nested=[ParenthesesMatch(start=43, end=44, text='?', nested=[])])]

They cannot, however, be empty:

>>> common.stringTools.parenthesesMatch(st, open='', close='')
Traceback (most recent call last):
ValueError: Neither open nor close can be empty.

# -----------------------------------------------------------------------------
Unmatched opening or closing parentheses will raise a ValueError:

>>> common.stringTools.parenthesesMatch('My (parentheses (sometimes (continue',)
Traceback (most recent call last):
ValueError: Opening '(' at index 3 was never closed

>>> common.stringTools.parenthesesMatch('This is a <bad> example>', open='<', close='>')
Traceback (most recent call last):
ValueError: Closing '>' without '<' at index 23.

Note that using multiple characters like a prefix can have unintended consequences:

>>> st = r'[Pitch("C4"), [Pitch("D5"), Pitch("E6")], Pitch("Pity("Z9")")]'
>>> common.stringTools.parenthesesMatch(st, open='Pitch("', close='")')
Traceback (most recent call last):
ValueError: Closing '")' without 'Pitch("' at index 59.

So to do something like this, you might need to get creative:
>>> out = common.stringTools.parenthesesMatch(st, open='("', close='")')
>>> out
[ParenthesesMatch(start=8, end=10, text='C4', nested=[]),
ParenthesesMatch(start=22, end=24, text='D5', nested=[]),
ParenthesesMatch(start=35, end=37, text='E6', nested=[]),
ParenthesesMatch(start=49, end=59, text='Pity("Z9")',
nested=[ParenthesesMatch(start=55, end=57, text='Z9', nested=[])])]
>>> extractedPitches = []
>>> for match in out:
... if st[match.start - 7:match.start] == 'Pitch("':
... extractedPitches.append(match.text)
>>> extractedPitches
['C4', 'D5', 'E6', 'Pity("Z9")']

* New in v9.3.
'''
if not open or not close:
raise ValueError('Neither open nor close can be empty.')

mainMatch = ParenthesesMatch(-1, -1, '', [])
stack: list[ParenthesesMatch] = [mainMatch]

lastCharWasBackslash = False

i = 0
while i < len(s):
if (not lastCharWasBackslash
and s[i:i + len(open)] == open):
curPM = ParenthesesMatch(i + len(open), -1, '', [])
stack.append(curPM)
i += len(open)
continue
elif (not lastCharWasBackslash
and s[i:i + len(close)] == close):
if len(stack) <= 1:
raise ValueError(f'Closing {close!r} without {open!r} at index {i}.')
curPM = stack.pop()
curPM.end = i
curPM.text = s[curPM.start:i]
stack[-1].nested.append(curPM)
i += len(close)
continue

if s[i] == '\\':
lastCharWasBackslash = not lastCharWasBackslash
else:
lastCharWasBackslash = False
i += 1

if len(stack) > 1:
raise ValueError(f'Opening {open!r} at index {stack[1].start-1} was never closed')

return mainMatch.nested


# -----------------------------------------------------------------------------
if __name__ == '__main__':
import music21
music21.mainTest()
46 changes: 42 additions & 4 deletions music21/test/testRunner.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
import sys
import unittest

from music21.common.stringTools import parenthesesMatch, ParenthesesMatch

defaultImports = ['music21']


Expand Down Expand Up @@ -85,19 +87,24 @@ def addDocAttrTestsToSuite(suite,

def fixDoctests(doctestSuite: doctest._DocTestSuite) -> None:
r'''
Fix doctests so that addresses are sanitized.
Fix doctests so that addresses are sanitized, WindowsPath is okay on windows
and OrderedDicts pass on Python 3.12.

In the past this fixed other differences among Python versions.
In the future, it might again!
In the future, it will again!
'''
windows: bool = platform.system() == 'Windows'
isPython312 = sys.version_info[1] >= 12

for dtc in doctestSuite: # Suite to DocTestCase -- undocumented.
if not hasattr(dtc, '_dt_test'):
continue

dt = dtc._dt_test # DocTest
for example in dt.examples:
example.want = stripAddresses(example.want, '0x...')
if isPython312:
example.want = fix312OrderedDict(example.want, '...')
if windows:
example.want = example.want.replace('PosixPath', 'WindowsPath')

Expand All @@ -121,7 +128,6 @@ def stripAddresses(textString, replacement='ADDRESS') -> str:
>>> stripA('{0.0} <music21.humdrum.spineParser.MiscTandem *>I>')
'{0.0} <music21.humdrum.spineParser.MiscTandem *>I>'


For doctests, can strip to '...' to make it work fine with doctest.ELLIPSIS

>>> stripA('{0.0} <music21.base.Music21Object object at 0x102a0ff10>', '0x...')
Expand All @@ -130,8 +136,40 @@ def stripAddresses(textString, replacement='ADDRESS') -> str:
return ADDRESS.sub(replacement, textString)


# ------------------------------------------------------------------------------
def fix312OrderedDict(textString, replacement='...') -> str:
'''
Function that fixes the OrderedDicts to work on Python 3.12 and above.
(eventually when 3.12 is the norm, this should be replaced to neuter
the doctests for 3.10/3.11 instead.)

>>> fix312 = test.testRunner.fix312OrderedDict
>>> fix312('OrderedDict([(0, 1), (1, 2), (2, 3)])')
'OrderedDict({...})'

while this is left alone:

>>> fix312('{0: 1, 1: 2, 2: 3}', 'nope!')
'{0: 1, 1: 2, 2: 3}'
'''
if 'OrderedDict([(' not in textString:
return textString

try:
matches = parenthesesMatch(textString, open='OrderedDict([(', close=')])')
out = []
last = 0
m: ParenthesesMatch
for m in matches:
out.append(textString[last:m.start - len('OrderedDict([(')])
out.append('OrderedDict({' + replacement + '})')
last = m.end + 3 # compensate for ')])'
out.append(textString[last:])
return ''.join(out)
except ValueError:
return replacement # ignore -- too complex to test, hopefully okay on other Python


# ------------------------------------------------------------------------------
def mainTest(*testClasses, **keywords):
'''
Takes as its arguments modules (or a string 'noDocTest' or 'verbose')
Expand Down