diff --git a/big/text.py b/big/text.py index 964b5a4..b8279d9 100644 --- a/big/text.py +++ b/big/text.py @@ -245,13 +245,34 @@ def encode_strings(o, encoding='ascii'): # False # >>> is_newline_byte(b'\v') # False +# +# however! with defensive programming, in case this changes in the future +# (as it should!), big will automatically still agree with Python. +# +# p.s. you have to put characters around the linebreak character, +# because str.splitlines (and bytes.splitlines) rstrips the linebreak +# characters before it splits, sigh. _export_name('bytes_linebreaks') bytes_linebreaks = ( b'\n' , # 10 0x000a - newline + ) + +if len(b'x\vx'.splitlines()) == 2: # pragma: nocover + bytes_linebreaks += ( + '\v' , # 11 - 0x000b - vertical tab + ) + +if len(b'x\fx'.splitlines()) == 2: # pragma: nocover + bytes_linebreaks += ( + '\f' , # 12 - 0x000c - form feed + ) + +bytes_linebreaks += ( b'\r' , # 13 0x000d - carriage return b'\r\n' , # bonus! the classic DOS newline sequence! ) + _export_name('bytes_linebreaks_without_crlf') bytes_linebreaks_without_crlf = tuple(s for s in bytes_linebreaks if s != b'\r\n') @@ -2010,6 +2031,13 @@ def old_split_quoted_strings(s, quotes=None, *, triple_quotes=True, backslash=No ## One added benefit of this approach: it works on both str and bytes objects, you don't need to ## handle them separately. ## +## Update: OOOOPS! s.splitlines() implicitly does an s.rstrip(newline-characters) before splitting! +## Hooray for special cases breaking the rules! +## +## So now I have to do this more complicated version: +## contains_newlines = (len( s.splitlines() ) > 1) or (len( ( s[-1:] + 'x' ').splitlines() ) > 1) +## (Why the colon in [-1:] ? So it works on bytes strings. yes, we also have to use b'x' then.) +## _sqs_quotes_str = ( '"', "'") _sqs_quotes_bytes = (b'"', b"'") @@ -2018,7 +2046,7 @@ def old_split_quoted_strings(s, quotes=None, *, triple_quotes=True, backslash=No _sqs_escape_bytes = b'\\' -def split_quoted_strings(s, separators, all_quotes_set, quotes, multiline_quotes, empty, state): +def split_quoted_strings(s, separators, all_quotes_set, quotes, multiline_quotes, empty, laden, state): """ This is the generator function implementing the split_quoted_strings iterator. The public split_quoted_strings analyzes its arguments, @@ -2055,7 +2083,7 @@ def split_quoted_strings(s, separators, all_quotes_set, quotes, multiline_quotes if quote or text: if quote and text and (quote not in multiline_quotes): # see treatise above - if len(text.splitlines()) > 1: + if (len(text.splitlines()) > 1) or (len( (text[-1:] + laden).splitlines()) > 1): raise SyntaxError("unterminated quoted string, {s!r}") if state: state = None @@ -2072,7 +2100,7 @@ def split_quoted_strings(s, separators, all_quotes_set, quotes, multiline_quotes if text or quote: if quote and text and (quote not in multiline_quotes): # see treatise above - if len(text.splitlines()) > 1: + if (len(text.splitlines()) > 1) or (len( (text[-1:] + laden).splitlines()) > 1): raise SyntaxError("unterminated quoted string, {s!r}") if state: state = None @@ -2102,9 +2130,12 @@ def split_quoted_strings(s, quotes=_sqs_quotes_str, *, escape=_sqs_escape_str, m quotes is an iterable of unique quote delimiters. Quote delimiters may be any string of 1 or more characters. They must be the same type as s, either str or bytes. - By default, quotes is ('"', "'"). (If s is bytes, - quotes defaults to (b'"', b"'").) Text delimited - inside quotes must not contain a newline. + When one of these quote delimiters is encountered in s, + it begins a quoted section, which only ends at the + next occurance of that quote delimiter. By default, + quotes is ('"', "'"). (If s is bytes, quotes defaults + to (b'"', b"'").) Text delimited inside quotes must + not contain a newline. escape is a string of any length. If escape is not an empty string, the string will "escape" (quote) @@ -2149,9 +2180,13 @@ def split_quoted_strings(s, quotes=_sqs_quotes_str, *, escape=_sqs_escape_str, m if s ends with an unterminated string. In that case, the last tuple yielded will have a non-empty leading_quote and an empty trailing_quote. + * split_quoted_strings only supports the opening and + closing marker for a string being the same string. + If you need the opening and closing markers to be + different strings, use split_delimiters. """ - # print(f"split_quoted_strings({s=}, {quotes=}, *, {escape=}, {state=})") + # print(f"split_quoted_strings({s=}, {quotes=}, *, {escape=}, {multiline_quotes=}, {state=})") if multiline_quotes is None: multiline_quotes = () @@ -2160,6 +2195,7 @@ def split_quoted_strings(s, quotes=_sqs_quotes_str, *, escape=_sqs_escape_str, m if is_bytes: s_type = bytes empty = b'' + laden = b'x' if quotes in (_sqs_quotes_str, None): quotes = _sqs_quotes_bytes else: @@ -2181,6 +2217,7 @@ def split_quoted_strings(s, quotes=_sqs_quotes_str, *, escape=_sqs_escape_str, m else: s_type = str empty = "" + laden = 'x' if quotes in (_sqs_quotes_bytes, None): quotes = _sqs_quotes_str else: @@ -2191,6 +2228,12 @@ def split_quoted_strings(s, quotes=_sqs_quotes_str, *, escape=_sqs_escape_str, m raise ValueError("quotes cannot contain an empty string") if escape in (_sqs_escape_bytes, None): escape = _sqs_escape_str + if multiline_quotes: + for q in multiline_quotes: + if not isinstance(q, s_type): + raise TypeError(f"values in multiline_quotes must match s (str or bytes), not {q!r}") + if not q: + raise ValueError("multiline_quotes cannot contain an empty string") elif not isinstance(escape, s_type): raise TypeError(f"escape must match s (str or bytes), not {escape!r}") @@ -2252,7 +2295,7 @@ def split_quoted_strings(s, quotes=_sqs_quotes_str, *, escape=_sqs_escape_str, m # help multisplit work better--it memoizes the conversion to a regular expression separators.sort() - return _split_quoted_strings(s, separators, all_quotes_set, quotes_set, multiline_quotes_set, empty, state) + return _split_quoted_strings(s, separators, all_quotes_set, quotes_set, multiline_quotes_set, empty, laden, state) @_export @@ -2283,14 +2326,22 @@ def __init__(self, close, *, escape='', multiline=True, quoting=False): if is_bytes: t = bytes empty = b'' + if close == b'\\': + raise ValueError("close delimiter must not be '\\'") else: t = str empty = '' + if close == '\\': + raise ValueError("close delimiter must not be b'\\'") # they can't both be false, and they can't both be true if bool(escape) != bool(quoting): raise ValueError("quoting and escape mismatch; they must either both be true, or both be false") + # if quoting=False, you can only have multiline=True + if not (quoting or multiline): + raise ValueError(f"multiline=False unsupported when quoting=False") + self._close = close self._escape = escape or empty self._quoting = quoting @@ -2380,7 +2431,7 @@ def __init__(self, open={}, close=(), escape='', illegal={}, single_line_only=Fa self.illegal = illegal self.single_line_only = single_line_only - def __repr__(self): + def __repr__(self): # pragma: nocover return f"DelimiterState(open={self.open!r}, close={self.close!r}, escape={self.escape!r}, illegal={self.illegal!r}, single_line_only={self.single_line_only!r})" @@ -2401,12 +2452,10 @@ def _delimiters_to_state_machine(delimiters, is_bytes): s_type = bytes s_type_description = "bytes" not_s_type_description = "str" - disallowed_delimiter = b'\\' else: s_type = str s_type_description = "str" not_s_type_description = "bytes" - disallowed_delimiter = '\\' all_closers = set() all_openers = set(delimiters) @@ -2415,14 +2464,10 @@ def _delimiters_to_state_machine(delimiters, is_bytes): for k, v in delimiters.items(): if not isinstance(k, s_type): raise TypeError(f"open delimiter {k!r} must be {s_type_description}, not {not_s_type_description}") - if k == disallowed_delimiter: - raise ValueError(f"illegal open delimiter {k!r}") if not isinstance(v, Delimiter): raise TypeError(f"delimiter values must be Delimiter, not {v!r}") if not isinstance(v.close, s_type): raise TypeError(f"close delimiter {v.close!r} must be {s_type_description}, not {not_s_type_description}") - if v.close == disallowed_delimiter: - raise ValueError(f"Delimiter: illegal close delimiter {v.close!r}") all_closers.add(v.close) if not isinstance(v.escape, s_type): raise TypeError(f"Delimiter: escape {v.escape!r} must be {s_type_description}, not {not_s_type_description}") @@ -2465,7 +2510,7 @@ def _delimiters_to_state_machine(delimiters, is_bytes): return initial_state, all_tokens -def split_delimiters(s, all_tokens, current, stack, empty): +def split_delimiters(s, all_tokens, current, stack, empty, laden): "Internal generator function returned by the real split_delimiters." push = stack.append pop = stack.pop @@ -2492,9 +2537,9 @@ def split_delimiters(s, all_tokens, current, stack, empty): # flush open delimiter s = join(text) clear() - # see treatise above - if current.single_line_only and (len(s.splitlines()) > 1): - raise SyntaxError("unterminated quoted string, {s!r}") + # we don't need to test to see if s contains a newline here. + # if we have an open delimiter, that means quoting is False. + # if quoting is False, multiline must be True. this is a Delimiter invariant. yield s, delimiter, empty push(current) @@ -2505,9 +2550,10 @@ def split_delimiters(s, all_tokens, current, stack, empty): # flush close delimiter s = join(text) clear() - # see treatise above - if current.single_line_only and (len(s.splitlines()) > 1): - raise SyntaxError("unterminated quoted string, {s!r}") + if s: + # see treatise above + if current.single_line_only and ((len(s.splitlines()) > 1) or (len( (s[-1:] + laden).splitlines()) > 1)): + raise SyntaxError(f"unterminated quoted string, {s!r}") yield s, empty, delimiter current = pop() @@ -2527,8 +2573,8 @@ def split_delimiters(s, all_tokens, current, stack, empty): s = join(text) if s: # see treatise above - if current.single_line_only and (len(s.splitlines()) > 1): - raise SyntaxError("unterminated quoted string, {s!r}") + if current.single_line_only and ((len(s.splitlines()) > 1) or (len( (s[-1:] + laden).splitlines()) > 1)): + raise SyntaxError(f"unterminated quoted string, {s!r}") yield s, empty, empty @@ -2572,15 +2618,19 @@ def split_delimiters(s, delimiters=split_delimiters_default_delimiters, *, state If s doesn't end with a closing delimiter, in the final tuple yielded, both open and close will be empty strings. - (Tip: Use a list as a stack to track the state of split_delimiters. - Every time split_delimiters yields a tuple, first process text. - Then, if open is true, push that string with stack.append. - Else, if close is true, pop the stack with stack.pop.) + split_delimiters doesn't publish its internal state, but it's + easy to track. Use a list as a stack to track the state, + like so: + * Create an empty list to store the state. + * Every time split_delimiters yields a tuple, first, + process the text. + * Then, if open is true, push that string with + stack.append. + * Else, if close is true, pop the stack with stack.pop. - You may reuse a particular character as a closing - delimiter multiple times. + You may use multiple Delimiter objects with the same close string. - You may not specify backslash ('\\') as a delimiter. + You may not specify backslash ('\\') as an open or close delimiter. parse_delimiter doesn't complain if a string ends with unclosed delimiters. @@ -2592,18 +2642,25 @@ def split_delimiters(s, delimiters=split_delimiters_default_delimiters, *, state is_bytes = isinstance(s, bytes) if is_bytes: + empty = b'' + laden = b'x' if delimiters in (None, split_delimiters_default_delimiters): initial_state, all_tokens = _split_delimiters_default_delimiters_bytes_cache - empty = b'' + elif not delimiters: + raise ValueError("invalid delimiters") + elif b'\\' in delimiters: + raise ValueError("open delimiter must not be b'\\'") else: + empty = '' + laden = 'x' if delimiters in (None, split_delimiters_default_delimiters_bytes): initial_state, all_tokens = _split_delimiters_default_delimiters_cache - empty = '' - - if not initial_state: - if not delimiters: + elif not delimiters: raise ValueError("invalid delimiters") + elif '\\' in delimiters: + raise ValueError("open delimiter must not be '\\'") + if not initial_state: initial_state, all_tokens = _delimiters_to_state_machine(delimiters, is_bytes) stack = [] @@ -2624,7 +2681,7 @@ def split_delimiters(s, delimiters=split_delimiters_default_delimiters, *, state current = next last_open = open - return _split_delimiters(s, all_tokens, current, stack, empty) + return _split_delimiters(s, all_tokens, current, stack, empty, laden) diff --git a/tests/test_text.py b/tests/test_text.py index f700833..c25780a 100644 --- a/tests/test_text.py +++ b/tests/test_text.py @@ -67,6 +67,8 @@ def unchanged(o): def to_bytes(o): # pragma: no cover if o is None: return None + if isinstance(o, bytes): + return o if isinstance(o, str): return o.encode('ascii') if isinstance(o, list): @@ -75,11 +77,20 @@ def to_bytes(o): # pragma: no cover return tuple(to_bytes(x) for x in o) if isinstance(o, set): return set(to_bytes(x) for x in o) + if isinstance(o, dict): + return {to_bytes(k): to_bytes(v) for k, v in o.items()} if isinstance(o, re_Pattern): flags = o.flags if flags & re.UNICODE: flags = flags - re.UNICODE - o = re.compile(to_bytes(o.pattern), flags=flags) + return re.compile(to_bytes(o.pattern), flags=flags) + if isinstance(o, big.Delimiter): + return big.Delimiter( + close=to_bytes(o.close), + escape=to_bytes(o.escape), + multiline=o.multiline, + quoting=o.quoting, + ) return o @@ -2812,19 +2823,11 @@ def test(s, expected, **kwargs): return # convert everybody to ascii - try: - if 'quotes' in kwargs: - kwargs['quotes'] = [x.encode('ascii') for x in kwargs['quotes']] - if 'state' in kwargs: - kwargs['state'] = kwargs['state'].encode('ascii') - if 'escape' in kwargs: - kwargs['escape'] = kwargs['escape'].encode('ascii') - - got = list(big.split_quoted_strings(s.encode('ascii'), **kwargs)) - expected = [(b.encode('ascii'), x.encode('ascii'), a.encode('ascii')) for b, x, a in expected] - self.assertEqual(expected, got) - except UnicodeEncodeError: - pass + kwargs = {k: to_bytes(v) for k, v in kwargs.items()} + + got = list(big.split_quoted_strings(to_bytes(s), **kwargs)) + expected = to_bytes(expected) + self.assertEqual(expected, got) test("""hey there "this is quoted" an empty quote: '' this is not quoted 'this is more quoted' "here's quoting a quote mark: \\" wow!" this is working!""", [ @@ -2875,7 +2878,7 @@ def test(s, expected, **kwargs): ("\n", 'def', "\n"), ("", 'ghi', ""), ], - quotes=("\n") + quotes=("\n",) ) test("abc'qxqqxx'qqq'def", @@ -2887,6 +2890,16 @@ def test(s, expected, **kwargs): escape="xx" ) + test("abc^Sqxqq^X^Sqqq^Sdef^Qghi^Q", + [ + ("", 'abc', ""), + ("^S", "qxqq^X^Sqqq", "^S"), + ("", 'def', ""), + ("^Q", 'ghi', "^Q"), + ], + quotes=('^S', '^Q',), escape="^X" + ) + test("abc'qxqqxx\\'qqq'def", [ ("", 'abc', ""), @@ -2927,7 +2940,8 @@ def test(s, expected, **kwargs): [ (b"", b'abcd', b""), ], - escape=big.text._sqs_escape_str + escape=big.text._sqs_escape_str, + multiline_quotes=None, ) # test auto-converting _sqs_escape_bytes @@ -2938,6 +2952,14 @@ def test(s, expected, **kwargs): escape=big.text._sqs_escape_bytes ) + # quotes and multiline_quotes are both empty + with self.assertRaises(ValueError): + test("a b c' x y z 'd e f'", + [], + quotes=(), + multiline_quotes=(), + ) + # type mismatch, s is str and quotes are bytes with self.assertRaises(TypeError): test("a b c' x y z 'd e f'", @@ -2952,6 +2974,20 @@ def test(s, expected, **kwargs): quotes=('"', "'", "'''"), ) + # type mismatch, s is str and multiline_quotes are bytes + with self.assertRaises(TypeError): + test("a b c' x y z 'd e f'", + [], + multiline_quotes=(b'<<', b">>", b"^^^"), + ) + + # type mismatch, s is bytes and multiline_quotes are str + with self.assertRaises(TypeError): + test(b"a b c' x y z 'd e f'", + [], + multiline_quotes=('<<', ">>", "^^^"), + ) + # type mismatch, s is str and escape is bytes with self.assertRaises(TypeError): test("a b c' x y z 'd e f'", @@ -2980,6 +3016,20 @@ def test(s, expected, **kwargs): quotes=(b'"', b"'", b""), ) + # empty string in multiline_quotes str + with self.assertRaises(ValueError): + test("a b c' x y z 'd e f'", + [], + multiline_quotes=('<<', ">>", ""), + ) + + # empty string in multiline_quotes bytes + with self.assertRaises(ValueError): + test(b"a b c' x y z 'd e f'", + [], + multiline_quotes=(b'<<', b">>", b""), + ) + with self.assertRaises(ValueError): test("a b c' x y z 'd e f'", [], @@ -2998,13 +3048,34 @@ def test(s, expected, **kwargs): state=b"'" ) - # repeated markers + # repeated markers in quotes with self.assertRaises(ValueError): test("a b c' x y z 'd e f'", [], quotes=('"', "'", '"'), ) + # repeated markers in multiline_quotes + with self.assertRaises(ValueError): + test("a b c' x y z 'd e f'", + [], + multiline_quotes=('<<', ">>", '<<'), + ) + + # marker appears in both quotes and multiline_quotes + with self.assertRaises(ValueError): + test("a b c' x y z 'd e f'", + [], + multiline_quotes=('<<', ">>", '"'), + ) + + # marker appears in both quotes and multiline_quotes + with self.assertRaises(ValueError): + test("a b c' x y z 'd e f'", + [], + multiline_quotes=('<<', "'", '"'), + ) + # initial state is not a quote marker with self.assertRaises(ValueError): test("a b c' x y z 'd e f'", @@ -3017,6 +3088,10 @@ def test(s, expected, **kwargs): test('abc "def\nghi" jkl', [], ) + with self.assertRaises(SyntaxError): + test('abc "defghi" "jk\nl', + [], + ) test('abc """def\nghi""" jkl', [('', 'abc ', ''), ('"""', 'def\nghi', '"""'), ('', ' jkl', '')], @@ -3083,6 +3158,8 @@ def test(s, expected, *, delimiters=None, state=()): empty = b'' if state: state = to_bytes(state) + if delimiters: + delimiters = to_bytes(delimiters) test('a[x] = foo("howdy (folks)\\n", {1:2, 3:4})', ( @@ -3183,6 +3260,26 @@ def test(s, expected, *, delimiters=None, state=()): ( '', '', ')'), )) + # test multi-character delimiters and escape + test(r'abc^Sdef<>klm^Xno**^Xp*^Xqrs^Qtuv<>z', + ( + ('abc', '^S', ''), + ('def', '<<', ''), + ('gh>>'), + ('klm', '^X', ''), + ('no**^Xp*', '', '^X'), + ('qrs', '', '^Q'), + ('tuv', '<<', ''), + ('wxy', '', '>>'), + ('z', '', ''), + ), + delimiters = { + '^S': big.Delimiter('^Q'), + '<<': big.Delimiter('>>'), + '^X': big.Delimiter('^X', escape='**', quoting=True), + }, + ) + with self.assertRaises(ValueError): test('a[3)', None) with self.assertRaises(ValueError): @@ -3202,8 +3299,14 @@ def test(s, expected, *, delimiters=None, state=()): test('bytes/str mismatch', None, delimiters={b'a': big.Delimiter(close='b')}) with self.assertRaises(TypeError): test('bytes/str mismatch', None, delimiters={b'a': big.Delimiter(close=b'x', escape='b', quoting=True)}) + with self.assertRaises(ValueError): + test('no delimiters?!', None, delimiters={}) + with self.assertRaises(ValueError): + test(b'no delimiters?!', None, delimiters={}) with self.assertRaises(ValueError): test('open delimiters is a ', None, delimiters={'\\': big.Delimiter(close='z')}) + with self.assertRaises(ValueError): + test(b'open delimiters is a bytes ', None, delimiters={b'\\': big.Delimiter(close=b'z')}) with self.assertRaises(ValueError): test('close delimiter is a ', None, delimiters={'z': big.Delimiter(close='\\')}) with self.assertRaises(ValueError): @@ -3214,6 +3317,43 @@ def test(s, expected, *, delimiters=None, state=()): test('quoting and escape must either both be true or both be false 1', None, delimiters={'<': big.Delimiter(close='x', quoting=True, escape='')}) with self.assertRaises(ValueError): test('quoting and escape must either both be true or both be false 1', None, delimiters={'<': big.Delimiter(close='x', quoting=False, escape='z')}) + with self.assertRaises(ValueError): + test('quoting and escape must either both be true or both be false 1', None, delimiters={'<': big.Delimiter(close='x', quoting=False, escape='z')}) + + with self.assertRaises(SyntaxError): + test('by default quote marks are now single-line only "ab\n", test 1, complete quoted string', None, ) + with self.assertRaises(SyntaxError): + test('by default quote marks are now single-line only "ab\n, test 2, unterminated quoted string', None, ) + + # testing on the Delimiter class itself + d = big.Delimiter(close='x') + self.assertEqual(d, big.Delimiter(d) ) + d = big.Delimiter(close='q', quoting=True, escape='>') + self.assertEqual(d, d.copy() ) + + self.assertEqual( + repr(big.Delimiter(close='x', escape='y', multiline=False, quoting=True)), + "Delimiter(close='x', escape='y', multiline=False, quoting=True)" + ) + + with self.assertRaises(ValueError): + big.Delimiter(close='x', escape='', quoting=True, multiline=True) + with self.assertRaises(ValueError): + big.Delimiter(close='x', escape='', quoting=True, multiline=False) + with self.assertRaises(ValueError): + big.Delimiter(close='x', escape='z', quoting=False, multiline=True) + with self.assertRaises(ValueError): + big.Delimiter(close='\\') + with self.assertRaises(ValueError): + big.Delimiter(close=b'\\') + # invariant: one of multiline or quoting must be true. + with self.assertRaises(ValueError): + big.Delimiter(close=')', multiline=False, quoting=False) + with self.assertRaises(ValueError): + test('abcde', [], + delimiters={'\\': big.Delimiter(close='x')}, + ) + # Delimiter objects are now read-only d = big.Delimiter(close='x') with self.assertRaises(AttributeError): @@ -3225,17 +3365,6 @@ def test(s, expected, *, delimiters=None, state=()): with self.assertRaises(AttributeError): d.quoting = True - # testing on the Delimiter class itself - d = big.Delimiter(close='x') - self.assertEqual(d, big.Delimiter(d) ) - d = big.Delimiter(close='q', quoting=True, escape='>') - self.assertEqual(d, d.copy() ) - - with self.assertRaises(ValueError): - big.Delimiter(close='x', escape='', quoting=True) - with self.assertRaises(ValueError): - big.Delimiter(close='x', escape='z', quoting=False) - def test_parse_delimiters(self): @@ -3398,7 +3527,13 @@ def L(line, line_number, column_number=1, end='\n', final=None, **kwargs): L('', 6, 1, end=''), ]) - list_of_lines = ['first line', '\tsecond line', 'third line'] + # you can give lines an iterable of strings, + # in which case we don't populate "end". + list_of_lines = [ + 'first line', + '\tsecond line', + 'third line' + ] lines = big.lines(list_of_lines) test(big.lines_strip(lines), [ @@ -3407,6 +3542,22 @@ def L(line, line_number, column_number=1, end='\n', final=None, **kwargs): L('third line', 3, 1, end=''), ]) + # or! you can give lines an iterable of 2-tuples of strings, + # in which case the first string is the line and the second is the end. + list_of_lines = [ + ('line 1', '\n'), + ('hey! line 2.', '\n'), + ('the only line with the word eggplant! line 3!', '\n'), + ('the final line, line 4.', '') + ] + lines = big.lines(list_of_lines) + test(big.lines_grep(lines, 'eggplant', invert=True), + [ + L('line 1', 1, 1), + L('hey! line 2.', 2, 1), + L('the final line, line 4.', 4, 1, end=''), + ]) + # test lines_filter_line_comment_lines # note, slight white box testing here: # lines_filter_line_comment_lines has different approaches @@ -3639,6 +3790,21 @@ def test_and_remove_lineinfo_match(i, substring, *, invert=False, match='match') L('', 3, 1, end=''), ]) + # test funny separators for lines_strip, + # *and* multiple calls to clip_leading and clip_trailing + li = lines = big.text.lines('xxxA B C Dyyy\nyyyE F G Hzzz\nxyzI J K Lyzx') + li = big.text.lines_strip(li, ('x', '?')) + li = big.text.lines_strip(li, ('y', '!')) + li = big.text.lines_strip(li, ('z', '.')) + test(li, + [ + L('xxxA B C Dyyy', 1, 4, leading='xxx', final='A B C D', trailing='yyy'), + L('yyyE F G Hzzz', 2, 4, leading='yyy', final='E F G H', trailing='zzz'), + L('xyzI J K Lyzx', 3, 4, leading='xyz', final='I J K Ly', trailing='zx', end=''), + ] + ) + + lines = big.lines(""" a = b @@ -3756,12 +3922,15 @@ def test_and_remove_lineinfo_match(i, substring, *, invert=False, match='match') test(big.lines_strip_line_comments(big.lines("foo 'bar\n' bat 'zzz'"), ("#", '//',)), []) # check that the exception has the right column number + sentinel = object() + result = sentinel try: - list(big.lines_strip_line_comments(big.lines("\nfoo\nbar 'bat' baz 'cinco\n' doodle 'zzz'"), ("#", '//',))) - self.assertTrue(False, "shouldn't reach here") + # this should throw an exception, result should not be written to here. + result = list(big.lines_strip_line_comments(big.lines("\nfoo\nbar 'bat' baz 'cinco\n' doodle 'zzz'"), ("#", '//',))) except SyntaxError as e: self.assertTrue(str(e).startswith("Line 3 column 15:")) self.assertTrue(str(e).endswith("'")) + self.assertEqual(result, sentinel) # unterminated single-quotes at the end with self.assertRaises(SyntaxError): @@ -3877,7 +4046,7 @@ def test(lines, expected, *, tab_width=8): pprint.pprint(got) print("\n\n") - self.assertEqual(got, expected) + self.assertEqual(expected, got) _sentinel = object() @@ -3940,7 +4109,6 @@ def LineInfo(lines, line, line_number, column_number, end=_sentinel, **kwargs): (LineInfo(li, line='', line_number=15, column_number=1, indent=0, leading='', end=''), ''), ] - test(lines, expected)