From 49d7d52143dfd8530ff22126c0f9a0e3d55d2607 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Tue, 16 Jul 2024 19:57:22 +0100 Subject: [PATCH] [3.12] gh-121130: Fix f-string format specifiers with debug expressions (GH-121150) (cherry picked from commit c46d64e0ef8e92a6b4ab4805d813d7e4d6663380) Co-authored-by: Pablo Galindo Salgado --- Doc/library/ast.rst | 4 +- Lib/test/test_ast.py | 4637 ++++++++++++--- Lib/test/test_fstring.py | 1611 ++--- ...-06-29-10-46-14.gh-issue-121130.Rj66Xs.rst | 2 + Parser/action_helpers.c | 2965 +++++---- Parser/tokenizer.c | 5278 ++++++++--------- Parser/tokenizer.h | 211 +- 7 files changed, 8741 insertions(+), 5967 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-06-29-10-46-14.gh-issue-121130.Rj66Xs.rst diff --git a/Doc/library/ast.rst b/Doc/library/ast.rst index e98fe73e849fd7..401b4f3902ec14 100644 --- a/Doc/library/ast.rst +++ b/Doc/library/ast.rst @@ -302,9 +302,7 @@ Literals Name(id='a', ctx=Load())], keywords=[]), conversion=-1, - format_spec=JoinedStr( - values=[ - Constant(value='.3')]))])) + format_spec=Constant(value='.3'))])) .. class:: List(elts, ctx) diff --git a/Lib/test/test_ast.py b/Lib/test/test_ast.py index fc823a4ed7b2bc..37b3aeed918439 100644 --- a/Lib/test/test_ast.py +++ b/Lib/test/test_ast.py @@ -18,15 +18,16 @@ from test.support import os_helper, script_helper from test.support.ast_helper import ASTTestMixin + def to_tuple(t): if t is None or isinstance(t, (str, int, complex)) or t is Ellipsis: return t elif isinstance(t, list): return [to_tuple(e) for e in t] result = [t.__class__.__name__] - if hasattr(t, 'lineno') and hasattr(t, 'col_offset'): + if hasattr(t, "lineno") and hasattr(t, "col_offset"): result.append((t.lineno, t.col_offset)) - if hasattr(t, 'end_lineno') and hasattr(t, 'end_col_offset'): + if hasattr(t, "end_lineno") and hasattr(t, "end_col_offset"): result[-1] += (t.end_lineno, t.end_col_offset) if t._fields is None: return tuple(result) @@ -204,86 +205,85 @@ def to_tuple(t): # These are compiled through "single" # because of overlap with "eval", it just tests what # can't be tested with "eval" -single_tests = [ - "1+2" -] +single_tests = ["1+2"] # These are compiled through "eval" # It should test all expressions eval_tests = [ - # None - "None", - # BoolOp - "a and b", - # BinOp - "a + b", - # UnaryOp - "not v", - # Lambda - "lambda:None", - # Dict - "{ 1:2 }", - # Empty dict - "{}", - # Set - "{None,}", - # Multiline dict (test for .lineno & .col_offset) - """{ + # None + "None", + # BoolOp + "a and b", + # BinOp + "a + b", + # UnaryOp + "not v", + # Lambda + "lambda:None", + # Dict + "{ 1:2 }", + # Empty dict + "{}", + # Set + "{None,}", + # Multiline dict (test for .lineno & .col_offset) + """{ 1 : 2 }""", - # ListComp - "[a for b in c if d]", - # GeneratorExp - "(a for b in c if d)", - # Comprehensions with multiple for targets - "[(a,b) for a,b in c]", - "[(a,b) for (a,b) in c]", - "[(a,b) for [a,b] in c]", - "{(a,b) for a,b in c}", - "{(a,b) for (a,b) in c}", - "{(a,b) for [a,b] in c}", - "((a,b) for a,b in c)", - "((a,b) for (a,b) in c)", - "((a,b) for [a,b] in c)", - # Yield - yield expressions can't work outside a function - # - # Compare - "1 < 2 < 3", - # Call - "f(1,2,c=3,*d,**e)", - # Call with multi-character starred - "f(*[0, 1])", - # Call with a generator argument - "f(a for a in b)", - # Num - "10", - # Str - "'string'", - # Attribute - "a.b", - # Subscript - "a[b:c]", - # Name - "v", - # List - "[1,2,3]", - # Empty list - "[]", - # Tuple - "1,2,3", - # Tuple - "(1,2,3)", - # Empty tuple - "()", - # Combination - "a.b.c.d(a.b[1:2])", + # ListComp + "[a for b in c if d]", + # GeneratorExp + "(a for b in c if d)", + # Comprehensions with multiple for targets + "[(a,b) for a,b in c]", + "[(a,b) for (a,b) in c]", + "[(a,b) for [a,b] in c]", + "{(a,b) for a,b in c}", + "{(a,b) for (a,b) in c}", + "{(a,b) for [a,b] in c}", + "((a,b) for a,b in c)", + "((a,b) for (a,b) in c)", + "((a,b) for [a,b] in c)", + # Yield - yield expressions can't work outside a function + # + # Compare + "1 < 2 < 3", + # Call + "f(1,2,c=3,*d,**e)", + # Call with multi-character starred + "f(*[0, 1])", + # Call with a generator argument + "f(a for a in b)", + # Num + "10", + # Str + "'string'", + # Attribute + "a.b", + # Subscript + "a[b:c]", + # Name + "v", + # List + "[1,2,3]", + # Empty list + "[]", + # Tuple + "1,2,3", + # Tuple + "(1,2,3)", + # Empty tuple + "()", + # Combination + "a.b.c.d(a.b[1:2])", ] # TODO: expr_context, slice, boolop, operator, unaryop, cmpop, comprehension # excepthandler, arguments, keywords, alias + class AST_Tests(unittest.TestCase): maxDiff = None @@ -292,7 +292,7 @@ def _is_ast_node(self, name, node): return False if "ast" not in node.__module__: return False - return name != 'AST' and name[0].isupper() + return name != "AST" and name[0].isupper() def _assertTrueorder(self, ast_node, parent_pos): if not isinstance(ast_node, ast.AST) or ast_node._fields is None: @@ -305,7 +305,7 @@ def _assertTrueorder(self, ast_node, parent_pos): value = getattr(ast_node, name) if isinstance(value, list): first_pos = parent_pos - if value and name == 'decorator_list': + if value and name == "decorator_list": first_pos = (value[0].lineno, value[0].col_offset) for child in value: self._assertTrueorder(child, first_pos) @@ -330,6 +330,7 @@ def test_AST_objects(self): def test_AST_garbage_collection(self): class X: pass + a = ast.AST() a.x = X() a.x.a = a @@ -339,9 +340,11 @@ class X: self.assertIsNone(ref()) def test_snippets(self): - for input, output, kind in ((exec_tests, exec_results, "exec"), - (single_tests, single_results, "single"), - (eval_tests, eval_results, "eval")): + for input, output, kind in ( + (exec_tests, exec_results, "exec"), + (single_tests, single_results, "single"), + (eval_tests, eval_results, "eval"), + ): for i, o in zip(input, output): with self.subTest(action="parsing", input=i): ast_tree = compile(i, "?", kind, ast.PyCF_ONLY_AST) @@ -355,12 +358,10 @@ def test_ast_validation(self): snippets_to_validate = exec_tests + single_tests + eval_tests for snippet in snippets_to_validate: tree = ast.parse(snippet) - compile(tree, '', 'exec') + compile(tree, "", "exec") def test_invalid_position_information(self): - invalid_linenos = [ - (10, 1), (-10, -11), (10, -11), (-5, -2), (-5, 1) - ] + invalid_linenos = [(10, 1), (-10, -11), (10, -11), (-5, -2), (-5, 1)] for lineno, end_lineno in invalid_linenos: with self.subTest(f"Check invalid linenos {lineno}:{end_lineno}"): @@ -369,25 +370,36 @@ def test_invalid_position_information(self): tree.body[0].lineno = lineno tree.body[0].end_lineno = end_lineno with self.assertRaises(ValueError): - compile(tree, '', 'exec') + compile(tree, "", "exec") - invalid_col_offsets = [ - (10, 1), (-10, -11), (10, -11), (-5, -2), (-5, 1) - ] + invalid_col_offsets = [(10, 1), (-10, -11), (10, -11), (-5, -2), (-5, 1)] for col_offset, end_col_offset in invalid_col_offsets: - with self.subTest(f"Check invalid col_offset {col_offset}:{end_col_offset}"): + with self.subTest( + f"Check invalid col_offset {col_offset}:{end_col_offset}" + ): snippet = "a = 1" tree = ast.parse(snippet) tree.body[0].col_offset = col_offset tree.body[0].end_col_offset = end_col_offset with self.assertRaises(ValueError): - compile(tree, '', 'exec') + compile(tree, "", "exec") def test_compilation_of_ast_nodes_with_default_end_position_values(self): - tree = ast.Module(body=[ - ast.Import(names=[ast.alias(name='builtins', lineno=1, col_offset=0)], lineno=1, col_offset=0), - ast.Import(names=[ast.alias(name='traceback', lineno=0, col_offset=0)], lineno=0, col_offset=1) - ], type_ignores=[]) + tree = ast.Module( + body=[ + ast.Import( + names=[ast.alias(name="builtins", lineno=1, col_offset=0)], + lineno=1, + col_offset=0, + ), + ast.Import( + names=[ast.alias(name="traceback", lineno=0, col_offset=0)], + lineno=0, + col_offset=1, + ), + ], + type_ignores=[], + ) # Check that compilation doesn't crash. Note: this may crash explicitly only on debug mode. compile(tree, "", "exec") @@ -412,7 +424,7 @@ def test_alias(self): im = ast.parse("from bar import y").body[0] self.assertEqual(len(im.names), 1) alias = im.names[0] - self.assertEqual(alias.name, 'y') + self.assertEqual(alias.name, "y") self.assertIsNone(alias.asname) self.assertEqual(alias.lineno, 1) self.assertEqual(alias.end_lineno, 1) @@ -421,7 +433,7 @@ def test_alias(self): im = ast.parse("from bar import *").body[0] alias = im.names[0] - self.assertEqual(alias.name, '*') + self.assertEqual(alias.name, "*") self.assertIsNone(alias.asname) self.assertEqual(alias.lineno, 1) self.assertEqual(alias.end_lineno, 1) @@ -455,21 +467,21 @@ def test_base_classes(self): self.assertTrue(issubclass(ast.Gt, ast.AST)) def test_import_deprecated(self): - ast = import_fresh_module('ast') + ast = import_fresh_module("ast") depr_regex = ( - r'ast\.{} is deprecated and will be removed in Python 3.14; ' - r'use ast\.Constant instead' + r"ast\.{} is deprecated and will be removed in Python 3.14; " + r"use ast\.Constant instead" ) - for name in 'Num', 'Str', 'Bytes', 'NameConstant', 'Ellipsis': + for name in "Num", "Str", "Bytes", "NameConstant", "Ellipsis": with self.assertWarnsRegex(DeprecationWarning, depr_regex.format(name)): getattr(ast, name) def test_field_attr_existence_deprecated(self): with warnings.catch_warnings(): - warnings.filterwarnings('ignore', '', DeprecationWarning) + warnings.filterwarnings("ignore", "", DeprecationWarning) from ast import Num, Str, Bytes, NameConstant, Ellipsis - for name in ('Num', 'Str', 'Bytes', 'NameConstant', 'Ellipsis'): + for name in ("Num", "Str", "Bytes", "NameConstant", "Ellipsis"): item = getattr(ast, name) if self._is_ast_node(name, item): with self.subTest(item): @@ -481,10 +493,10 @@ def test_field_attr_existence_deprecated(self): def test_field_attr_existence(self): for name, item in ast.__dict__.items(): # These emit DeprecationWarnings - if name in {'Num', 'Str', 'Bytes', 'NameConstant', 'Ellipsis'}: + if name in {"Num", "Str", "Bytes", "NameConstant", "Ellipsis"}: continue # constructor has a different signature - if name == 'Index': + if name == "Index": continue if self._is_ast_node(name, item): x = item() @@ -493,8 +505,18 @@ def test_field_attr_existence(self): def test_arguments(self): x = ast.arguments() - self.assertEqual(x._fields, ('posonlyargs', 'args', 'vararg', 'kwonlyargs', - 'kw_defaults', 'kwarg', 'defaults')) + self.assertEqual( + x._fields, + ( + "posonlyargs", + "args", + "vararg", + "kwonlyargs", + "kw_defaults", + "kwarg", + "defaults", + ), + ) with self.assertRaises(AttributeError): x.args @@ -506,7 +528,7 @@ def test_arguments(self): def test_field_attr_writable_deprecated(self): with warnings.catch_warnings(): - warnings.filterwarnings('ignore', '', DeprecationWarning) + warnings.filterwarnings("ignore", "", DeprecationWarning) x = ast.Num() # We can assign to _fields x._fields = 666 @@ -520,13 +542,13 @@ def test_field_attr_writable(self): def test_classattrs_deprecated(self): with warnings.catch_warnings(): - warnings.filterwarnings('ignore', '', DeprecationWarning) + warnings.filterwarnings("ignore", "", DeprecationWarning) from ast import Num, Str, Bytes, NameConstant, Ellipsis with warnings.catch_warnings(record=True) as wlog: - warnings.filterwarnings('always', '', DeprecationWarning) + warnings.filterwarnings("always", "", DeprecationWarning) x = ast.Num() - self.assertEqual(x._fields, ('value', 'kind')) + self.assertEqual(x._fields, ("value", "kind")) with self.assertRaises(AttributeError): x.value @@ -549,7 +571,7 @@ def test_classattrs_deprecated(self): x = ast.Num(42, lineno=0) self.assertEqual(x.lineno, 0) - self.assertEqual(x._fields, ('value', 'kind')) + self.assertEqual(x._fields, ("value", "kind")) self.assertEqual(x.value, 42) self.assertEqual(x.n, 42) @@ -557,49 +579,54 @@ def test_classattrs_deprecated(self): self.assertRaises(TypeError, ast.Num, 1, None, 2, lineno=0) # Arbitrary keyword arguments are supported - self.assertEqual(ast.Num(1, foo='bar').foo, 'bar') + self.assertEqual(ast.Num(1, foo="bar").foo, "bar") - with self.assertRaisesRegex(TypeError, "Num got multiple values for argument 'n'"): + with self.assertRaisesRegex( + TypeError, "Num got multiple values for argument 'n'" + ): ast.Num(1, n=2) self.assertEqual(ast.Num(42).n, 42) self.assertEqual(ast.Num(4.25).n, 4.25) self.assertEqual(ast.Num(4.25j).n, 4.25j) - self.assertEqual(ast.Str('42').s, '42') - self.assertEqual(ast.Bytes(b'42').s, b'42') + self.assertEqual(ast.Str("42").s, "42") + self.assertEqual(ast.Bytes(b"42").s, b"42") self.assertIs(ast.NameConstant(True).value, True) self.assertIs(ast.NameConstant(False).value, False) self.assertIs(ast.NameConstant(None).value, None) - self.assertEqual([str(w.message) for w in wlog], [ - 'ast.Num is deprecated and will be removed in Python 3.14; use ast.Constant instead', - 'Attribute n is deprecated and will be removed in Python 3.14; use value instead', - 'ast.Num is deprecated and will be removed in Python 3.14; use ast.Constant instead', - 'Attribute n is deprecated and will be removed in Python 3.14; use value instead', - 'ast.Num is deprecated and will be removed in Python 3.14; use ast.Constant instead', - 'ast.Num is deprecated and will be removed in Python 3.14; use ast.Constant instead', - 'Attribute n is deprecated and will be removed in Python 3.14; use value instead', - 'ast.Num is deprecated and will be removed in Python 3.14; use ast.Constant instead', - 'ast.Num is deprecated and will be removed in Python 3.14; use ast.Constant instead', - 'ast.Num is deprecated and will be removed in Python 3.14; use ast.Constant instead', - 'ast.Num is deprecated and will be removed in Python 3.14; use ast.Constant instead', - 'Attribute n is deprecated and will be removed in Python 3.14; use value instead', - 'ast.Num is deprecated and will be removed in Python 3.14; use ast.Constant instead', - 'Attribute n is deprecated and will be removed in Python 3.14; use value instead', - 'ast.Num is deprecated and will be removed in Python 3.14; use ast.Constant instead', - 'Attribute n is deprecated and will be removed in Python 3.14; use value instead', - 'ast.Str is deprecated and will be removed in Python 3.14; use ast.Constant instead', - 'Attribute s is deprecated and will be removed in Python 3.14; use value instead', - 'ast.Bytes is deprecated and will be removed in Python 3.14; use ast.Constant instead', - 'Attribute s is deprecated and will be removed in Python 3.14; use value instead', - 'ast.NameConstant is deprecated and will be removed in Python 3.14; use ast.Constant instead', - 'ast.NameConstant is deprecated and will be removed in Python 3.14; use ast.Constant instead', - 'ast.NameConstant is deprecated and will be removed in Python 3.14; use ast.Constant instead', - ]) + self.assertEqual( + [str(w.message) for w in wlog], + [ + "ast.Num is deprecated and will be removed in Python 3.14; use ast.Constant instead", + "Attribute n is deprecated and will be removed in Python 3.14; use value instead", + "ast.Num is deprecated and will be removed in Python 3.14; use ast.Constant instead", + "Attribute n is deprecated and will be removed in Python 3.14; use value instead", + "ast.Num is deprecated and will be removed in Python 3.14; use ast.Constant instead", + "ast.Num is deprecated and will be removed in Python 3.14; use ast.Constant instead", + "Attribute n is deprecated and will be removed in Python 3.14; use value instead", + "ast.Num is deprecated and will be removed in Python 3.14; use ast.Constant instead", + "ast.Num is deprecated and will be removed in Python 3.14; use ast.Constant instead", + "ast.Num is deprecated and will be removed in Python 3.14; use ast.Constant instead", + "ast.Num is deprecated and will be removed in Python 3.14; use ast.Constant instead", + "Attribute n is deprecated and will be removed in Python 3.14; use value instead", + "ast.Num is deprecated and will be removed in Python 3.14; use ast.Constant instead", + "Attribute n is deprecated and will be removed in Python 3.14; use value instead", + "ast.Num is deprecated and will be removed in Python 3.14; use ast.Constant instead", + "Attribute n is deprecated and will be removed in Python 3.14; use value instead", + "ast.Str is deprecated and will be removed in Python 3.14; use ast.Constant instead", + "Attribute s is deprecated and will be removed in Python 3.14; use value instead", + "ast.Bytes is deprecated and will be removed in Python 3.14; use ast.Constant instead", + "Attribute s is deprecated and will be removed in Python 3.14; use value instead", + "ast.NameConstant is deprecated and will be removed in Python 3.14; use ast.Constant instead", + "ast.NameConstant is deprecated and will be removed in Python 3.14; use ast.Constant instead", + "ast.NameConstant is deprecated and will be removed in Python 3.14; use ast.Constant instead", + ], + ) def test_classattrs(self): x = ast.Constant() - self.assertEqual(x._fields, ('value', 'kind')) + self.assertEqual(x._fields, ("value", "kind")) with self.assertRaises(AttributeError): x.value @@ -618,23 +645,25 @@ def test_classattrs(self): x = ast.Constant(42, lineno=0) self.assertEqual(x.lineno, 0) - self.assertEqual(x._fields, ('value', 'kind')) + self.assertEqual(x._fields, ("value", "kind")) self.assertEqual(x.value, 42) self.assertRaises(TypeError, ast.Constant, 1, None, 2) self.assertRaises(TypeError, ast.Constant, 1, None, 2, lineno=0) # Arbitrary keyword arguments are supported - self.assertEqual(ast.Constant(1, foo='bar').foo, 'bar') + self.assertEqual(ast.Constant(1, foo="bar").foo, "bar") - with self.assertRaisesRegex(TypeError, "Constant got multiple values for argument 'value'"): + with self.assertRaisesRegex( + TypeError, "Constant got multiple values for argument 'value'" + ): ast.Constant(1, value=2) self.assertEqual(ast.Constant(42).value, 42) self.assertEqual(ast.Constant(4.25).value, 4.25) self.assertEqual(ast.Constant(4.25j).value, 4.25j) - self.assertEqual(ast.Constant('42').value, '42') - self.assertEqual(ast.Constant(b'42').value, b'42') + self.assertEqual(ast.Constant("42").value, "42") + self.assertEqual(ast.Constant(b"42").value, b"42") self.assertIs(ast.Constant(True).value, True) self.assertIs(ast.Constant(False).value, False) self.assertIs(ast.Constant(None).value, None) @@ -642,43 +671,46 @@ def test_classattrs(self): def test_realtype(self): with warnings.catch_warnings(): - warnings.filterwarnings('ignore', '', DeprecationWarning) + warnings.filterwarnings("ignore", "", DeprecationWarning) from ast import Num, Str, Bytes, NameConstant, Ellipsis with warnings.catch_warnings(record=True) as wlog: - warnings.filterwarnings('always', '', DeprecationWarning) + warnings.filterwarnings("always", "", DeprecationWarning) self.assertIs(type(ast.Num(42)), ast.Constant) self.assertIs(type(ast.Num(4.25)), ast.Constant) self.assertIs(type(ast.Num(4.25j)), ast.Constant) - self.assertIs(type(ast.Str('42')), ast.Constant) - self.assertIs(type(ast.Bytes(b'42')), ast.Constant) + self.assertIs(type(ast.Str("42")), ast.Constant) + self.assertIs(type(ast.Bytes(b"42")), ast.Constant) self.assertIs(type(ast.NameConstant(True)), ast.Constant) self.assertIs(type(ast.NameConstant(False)), ast.Constant) self.assertIs(type(ast.NameConstant(None)), ast.Constant) self.assertIs(type(ast.Ellipsis()), ast.Constant) - self.assertEqual([str(w.message) for w in wlog], [ - 'ast.Num is deprecated and will be removed in Python 3.14; use ast.Constant instead', - 'ast.Num is deprecated and will be removed in Python 3.14; use ast.Constant instead', - 'ast.Num is deprecated and will be removed in Python 3.14; use ast.Constant instead', - 'ast.Str is deprecated and will be removed in Python 3.14; use ast.Constant instead', - 'ast.Bytes is deprecated and will be removed in Python 3.14; use ast.Constant instead', - 'ast.NameConstant is deprecated and will be removed in Python 3.14; use ast.Constant instead', - 'ast.NameConstant is deprecated and will be removed in Python 3.14; use ast.Constant instead', - 'ast.NameConstant is deprecated and will be removed in Python 3.14; use ast.Constant instead', - 'ast.Ellipsis is deprecated and will be removed in Python 3.14; use ast.Constant instead', - ]) + self.assertEqual( + [str(w.message) for w in wlog], + [ + "ast.Num is deprecated and will be removed in Python 3.14; use ast.Constant instead", + "ast.Num is deprecated and will be removed in Python 3.14; use ast.Constant instead", + "ast.Num is deprecated and will be removed in Python 3.14; use ast.Constant instead", + "ast.Str is deprecated and will be removed in Python 3.14; use ast.Constant instead", + "ast.Bytes is deprecated and will be removed in Python 3.14; use ast.Constant instead", + "ast.NameConstant is deprecated and will be removed in Python 3.14; use ast.Constant instead", + "ast.NameConstant is deprecated and will be removed in Python 3.14; use ast.Constant instead", + "ast.NameConstant is deprecated and will be removed in Python 3.14; use ast.Constant instead", + "ast.Ellipsis is deprecated and will be removed in Python 3.14; use ast.Constant instead", + ], + ) def test_isinstance(self): from ast import Constant with warnings.catch_warnings(): - warnings.filterwarnings('ignore', '', DeprecationWarning) + warnings.filterwarnings("ignore", "", DeprecationWarning) from ast import Num, Str, Bytes, NameConstant, Ellipsis cls_depr_msg = ( - 'ast.{} is deprecated and will be removed in Python 3.14; ' - 'use ast.Constant instead' + "ast.{} is deprecated and will be removed in Python 3.14; " + "use ast.Constant instead" ) assertNumDeprecated = partial( @@ -693,7 +725,7 @@ def test_isinstance(self): assertNameConstantDeprecated = partial( self.assertWarnsRegex, DeprecationWarning, - cls_depr_msg.format("NameConstant") + cls_depr_msg.format("NameConstant"), ) assertEllipsisDeprecated = partial( self.assertWarnsRegex, DeprecationWarning, cls_depr_msg.format("Ellipsis") @@ -707,12 +739,12 @@ def test_isinstance(self): self.assertIsInstance(n, Num) with assertStrDeprecated(): - s = Str('42') + s = Str("42") with assertStrDeprecated(): self.assertIsInstance(s, Str) with assertBytesDeprecated(): - b = Bytes(b'42') + b = Bytes(b"42") with assertBytesDeprecated(): self.assertIsInstance(b, Bytes) @@ -734,10 +766,10 @@ def test_isinstance(self): self.assertIsInstance(Constant(arg), Num) with assertStrDeprecated(): - self.assertIsInstance(Constant('42'), Str) + self.assertIsInstance(Constant("42"), Str) with assertBytesDeprecated(): - self.assertIsInstance(Constant(b'42'), Bytes) + self.assertIsInstance(Constant(b"42"), Bytes) for arg in True, False, None: with self.subTest(arg=arg): @@ -748,7 +780,7 @@ def test_isinstance(self): self.assertIsInstance(Constant(...), Ellipsis) with assertStrDeprecated(): - s = Str('42') + s = Str("42") assertNumDeprecated(self.assertNotIsInstance, s, Num) assertBytesDeprecated(self.assertNotIsInstance, s, Bytes) @@ -768,14 +800,16 @@ def test_isinstance(self): with assertNumDeprecated(): self.assertNotIsInstance(n, Num) - for arg in '42', True, False: + for arg in "42", True, False: with self.subTest(arg=arg): with assertNumDeprecated(): self.assertNotIsInstance(Constant(arg), Num) assertStrDeprecated(self.assertNotIsInstance, Constant(42), Str) - assertBytesDeprecated(self.assertNotIsInstance, Constant('42'), Bytes) - assertNameConstantDeprecated(self.assertNotIsInstance, Constant(42), NameConstant) + assertBytesDeprecated(self.assertNotIsInstance, Constant("42"), Bytes) + assertNameConstantDeprecated( + self.assertNotIsInstance, Constant(42), NameConstant + ) assertEllipsisDeprecated(self.assertNotIsInstance, Constant(42), Ellipsis) assertNumDeprecated(self.assertNotIsInstance, Constant(), Num) assertStrDeprecated(self.assertNotIsInstance, Constant(), Str) @@ -783,29 +817,33 @@ def test_isinstance(self): assertNameConstantDeprecated(self.assertNotIsInstance, Constant(), NameConstant) assertEllipsisDeprecated(self.assertNotIsInstance, Constant(), Ellipsis) - class S(str): pass + class S(str): + pass + with assertStrDeprecated(): - self.assertIsInstance(Constant(S('42')), Str) + self.assertIsInstance(Constant(S("42")), Str) with assertNumDeprecated(): - self.assertNotIsInstance(Constant(S('42')), Num) + self.assertNotIsInstance(Constant(S("42")), Num) def test_constant_subclasses_deprecated(self): with warnings.catch_warnings(): - warnings.filterwarnings('ignore', '', DeprecationWarning) + warnings.filterwarnings("ignore", "", DeprecationWarning) from ast import Num with warnings.catch_warnings(record=True) as wlog: - warnings.filterwarnings('always', '', DeprecationWarning) + warnings.filterwarnings("always", "", DeprecationWarning) + class N(ast.Num): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - self.z = 'spam' + self.z = "spam" + class N2(ast.Num): pass n = N(42) self.assertEqual(n.n, 42) - self.assertEqual(n.z, 'spam') + self.assertEqual(n.z, "spam") self.assertIs(type(n), N) self.assertIsInstance(n, N) self.assertIsInstance(n, ast.Num) @@ -815,26 +853,30 @@ class N2(ast.Num): self.assertEqual(n.n, 42) self.assertIs(type(n), N) - self.assertEqual([str(w.message) for w in wlog], [ - 'Attribute n is deprecated and will be removed in Python 3.14; use value instead', - 'Attribute n is deprecated and will be removed in Python 3.14; use value instead', - 'ast.Num is deprecated and will be removed in Python 3.14; use ast.Constant instead', - 'ast.Num is deprecated and will be removed in Python 3.14; use ast.Constant instead', - 'Attribute n is deprecated and will be removed in Python 3.14; use value instead', - 'Attribute n is deprecated and will be removed in Python 3.14; use value instead', - ]) + self.assertEqual( + [str(w.message) for w in wlog], + [ + "Attribute n is deprecated and will be removed in Python 3.14; use value instead", + "Attribute n is deprecated and will be removed in Python 3.14; use value instead", + "ast.Num is deprecated and will be removed in Python 3.14; use ast.Constant instead", + "ast.Num is deprecated and will be removed in Python 3.14; use ast.Constant instead", + "Attribute n is deprecated and will be removed in Python 3.14; use value instead", + "Attribute n is deprecated and will be removed in Python 3.14; use value instead", + ], + ) def test_constant_subclasses(self): class N(ast.Constant): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - self.z = 'spam' + self.z = "spam" + class N2(ast.Constant): pass n = N(42) self.assertEqual(n.value, 42) - self.assertEqual(n.z, 'spam') + self.assertEqual(n.z, "spam") self.assertEqual(type(n), N) self.assertTrue(isinstance(n, N)) self.assertTrue(isinstance(n, ast.Constant)) @@ -852,7 +894,7 @@ def test_module(self): def test_nodeclasses(self): # Zero arguments constructor explicitly allowed x = ast.BinOp() - self.assertEqual(x._fields, ('left', 'op', 'right')) + self.assertEqual(x._fields, ("left", "op", "right")) # Random attribute allowed too x.foobarbaz = 5 @@ -924,9 +966,7 @@ def test_invalid_constant(self): for invalid_constant in int, (1, 2, int), frozenset((1, 2, int)): e = ast.Expression(body=ast.Constant(invalid_constant)) ast.fix_missing_locations(e) - with self.assertRaisesRegex( - TypeError, "invalid type in Constant: type" - ): + with self.assertRaisesRegex(TypeError, "invalid type in Constant: type"): compile(e, "", "eval") def test_empty_yield_from(self): @@ -942,13 +982,15 @@ def test_issue31592(self): # There shouldn't be an assertion failure in case of a bad # unicodedata.normalize(). import unicodedata + def bad_normalize(*args): return None - with support.swap_attr(unicodedata, 'normalize', bad_normalize): - self.assertRaises(TypeError, ast.parse, '\u03D5') + + with support.swap_attr(unicodedata, "normalize", bad_normalize): + self.assertRaises(TypeError, ast.parse, "\u03d5") def test_issue18374_binop_col_offset(self): - tree = ast.parse('4+5+6+7') + tree = ast.parse("4+5+6+7") parent_binop = tree.body[0].value child_binop = parent_binop.left grandchild_binop = child_binop.left @@ -959,7 +1001,7 @@ def test_issue18374_binop_col_offset(self): self.assertEqual(grandchild_binop.col_offset, 0) self.assertEqual(grandchild_binop.end_col_offset, 3) - tree = ast.parse('4+5-\\\n 6-7') + tree = ast.parse("4+5-\\\n 6-7") parent_binop = tree.body[0].value child_binop = parent_binop.left grandchild_binop = child_binop.left @@ -979,48 +1021,53 @@ def test_issue18374_binop_col_offset(self): self.assertEqual(grandchild_binop.end_lineno, 1) def test_issue39579_dotted_name_end_col_offset(self): - tree = ast.parse('@a.b.c\ndef f(): pass') + tree = ast.parse("@a.b.c\ndef f(): pass") attr_b = tree.body[0].decorator_list[0].value self.assertEqual(attr_b.end_col_offset, 4) def test_ast_asdl_signature(self): - self.assertEqual(ast.withitem.__doc__, "withitem(expr context_expr, expr? optional_vars)") + self.assertEqual( + ast.withitem.__doc__, "withitem(expr context_expr, expr? optional_vars)" + ) self.assertEqual(ast.GtE.__doc__, "GtE") self.assertEqual(ast.Name.__doc__, "Name(identifier id, expr_context ctx)") - self.assertEqual(ast.cmpop.__doc__, "cmpop = Eq | NotEq | Lt | LtE | Gt | GtE | Is | IsNot | In | NotIn") + self.assertEqual( + ast.cmpop.__doc__, + "cmpop = Eq | NotEq | Lt | LtE | Gt | GtE | Is | IsNot | In | NotIn", + ) expressions = [f" | {node.__doc__}" for node in ast.expr.__subclasses__()] expressions[0] = f"expr = {ast.expr.__subclasses__()[0].__doc__}" self.assertCountEqual(ast.expr.__doc__.split("\n"), expressions) def test_positional_only_feature_version(self): - ast.parse('def foo(x, /): ...', feature_version=(3, 8)) - ast.parse('def bar(x=1, /): ...', feature_version=(3, 8)) + ast.parse("def foo(x, /): ...", feature_version=(3, 8)) + ast.parse("def bar(x=1, /): ...", feature_version=(3, 8)) with self.assertRaises(SyntaxError): - ast.parse('def foo(x, /): ...', feature_version=(3, 7)) + ast.parse("def foo(x, /): ...", feature_version=(3, 7)) with self.assertRaises(SyntaxError): - ast.parse('def bar(x=1, /): ...', feature_version=(3, 7)) + ast.parse("def bar(x=1, /): ...", feature_version=(3, 7)) - ast.parse('lambda x, /: ...', feature_version=(3, 8)) - ast.parse('lambda x=1, /: ...', feature_version=(3, 8)) + ast.parse("lambda x, /: ...", feature_version=(3, 8)) + ast.parse("lambda x=1, /: ...", feature_version=(3, 8)) with self.assertRaises(SyntaxError): - ast.parse('lambda x, /: ...', feature_version=(3, 7)) + ast.parse("lambda x, /: ...", feature_version=(3, 7)) with self.assertRaises(SyntaxError): - ast.parse('lambda x=1, /: ...', feature_version=(3, 7)) + ast.parse("lambda x=1, /: ...", feature_version=(3, 7)) def test_assignment_expression_feature_version(self): - ast.parse('(x := 0)', feature_version=(3, 8)) + ast.parse("(x := 0)", feature_version=(3, 8)) with self.assertRaises(SyntaxError): - ast.parse('(x := 0)', feature_version=(3, 7)) + ast.parse("(x := 0)", feature_version=(3, 7)) def test_conditional_context_managers_parse_with_low_feature_version(self): # regression test for gh-115881 - ast.parse('with (x() if y else z()): ...', feature_version=(3, 8)) + ast.parse("with (x() if y else z()): ...", feature_version=(3, 8)) def test_exception_groups_feature_version(self): - code = dedent(''' + code = dedent(""" try: ... except* Exception: ... - ''') + """) ast.parse(code) with self.assertRaises(SyntaxError): ast.parse(code, feature_version=(3, 10)) @@ -1039,45 +1086,50 @@ def test_type_params_feature_version(self): def test_invalid_major_feature_version(self): with self.assertRaises(ValueError): - ast.parse('pass', feature_version=(2, 7)) + ast.parse("pass", feature_version=(2, 7)) with self.assertRaises(ValueError): - ast.parse('pass', feature_version=(4, 0)) + ast.parse("pass", feature_version=(4, 0)) def test_constant_as_name(self): for constant in "True", "False", "None": expr = ast.Expression(ast.Name(constant, ast.Load())) ast.fix_missing_locations(expr) - with self.assertRaisesRegex(ValueError, f"identifier field can't represent '{constant}' constant"): + with self.assertRaisesRegex( + ValueError, f"identifier field can't represent '{constant}' constant" + ): compile(expr, "", "eval") def test_precedence_enum(self): class _Precedence(enum.IntEnum): """Precedence table that originated from python grammar.""" - NAMED_EXPR = enum.auto() # := - TUPLE = enum.auto() # , - YIELD = enum.auto() # 'yield', 'yield from' - TEST = enum.auto() # 'if'-'else', 'lambda' - OR = enum.auto() # 'or' - AND = enum.auto() # 'and' - NOT = enum.auto() # 'not' - CMP = enum.auto() # '<', '>', '==', '>=', '<=', '!=', - # 'in', 'not in', 'is', 'is not' + + NAMED_EXPR = enum.auto() # := + TUPLE = enum.auto() # , + YIELD = enum.auto() # 'yield', 'yield from' + TEST = enum.auto() # 'if'-'else', 'lambda' + OR = enum.auto() # 'or' + AND = enum.auto() # 'and' + NOT = enum.auto() # 'not' + CMP = enum.auto() # '<', '>', '==', '>=', '<=', '!=', + # 'in', 'not in', 'is', 'is not' EXPR = enum.auto() - BOR = EXPR # '|' - BXOR = enum.auto() # '^' - BAND = enum.auto() # '&' - SHIFT = enum.auto() # '<<', '>>' - ARITH = enum.auto() # '+', '-' - TERM = enum.auto() # '*', '@', '/', '%', '//' - FACTOR = enum.auto() # unary '+', '-', '~' - POWER = enum.auto() # '**' - AWAIT = enum.auto() # 'await' + BOR = EXPR # '|' + BXOR = enum.auto() # '^' + BAND = enum.auto() # '&' + SHIFT = enum.auto() # '<<', '>>' + ARITH = enum.auto() # '+', '-' + TERM = enum.auto() # '*', '@', '/', '%', '//' + FACTOR = enum.auto() # unary '+', '-', '~' + POWER = enum.auto() # '**' + AWAIT = enum.auto() # 'await' ATOM = enum.auto() + def next(self): try: return self.__class__(self + 1) except ValueError: return self + enum._test_simple_enum(_Precedence, ast._Precedence) @unittest.skipIf(support.is_wasi, "exhausts limited stack on WASI") @@ -1092,8 +1144,7 @@ def check_limit(prefix, repeated): ast.parse(expect_ok) for depth in (fail_depth, crash_depth): broken = prefix + repeated * depth - details = "Compiling ({!r} + {!r} * {})".format( - prefix, repeated, depth) + details = "Compiling ({!r} + {!r} * {})".format(prefix, repeated, depth) with self.assertRaises(RecursionError, msg=details): with support.infinite_recursion(): ast.parse(broken) @@ -1104,8 +1155,9 @@ def check_limit(prefix, repeated): check_limit("a", "*a") def test_null_bytes(self): - with self.assertRaises(SyntaxError, - msg="source code string cannot contain null bytes"): + with self.assertRaises( + SyntaxError, msg="source code string cannot contain null bytes" + ): ast.parse("a\0b") def assert_none_check(self, node: type[ast.AST], attr: str, source: str) -> None: @@ -1134,17 +1186,18 @@ def test_none_checks(self) -> None: for node, attr, source in tests: self.assert_none_check(node, attr, source) + class ASTHelpers_Test(unittest.TestCase): maxDiff = None def test_parse(self): - a = ast.parse('foo(1 + 1)') - b = compile('foo(1 + 1)', '', 'exec', ast.PyCF_ONLY_AST) + a = ast.parse("foo(1 + 1)") + b = compile("foo(1 + 1)", "", "exec", ast.PyCF_ONLY_AST) self.assertEqual(ast.dump(a), ast.dump(b)) def test_parse_in_error(self): try: - 1/0 + 1 / 0 except Exception: with self.assertRaises(SyntaxError) as e: ast.literal_eval(r"'\U'") @@ -1152,28 +1205,33 @@ def test_parse_in_error(self): def test_dump(self): node = ast.parse('spam(eggs, "and cheese")') - self.assertEqual(ast.dump(node), + self.assertEqual( + ast.dump(node), "Module(body=[Expr(value=Call(func=Name(id='spam', ctx=Load()), " "args=[Name(id='eggs', ctx=Load()), Constant(value='and cheese')], " - "keywords=[]))], type_ignores=[])" + "keywords=[]))], type_ignores=[])", ) - self.assertEqual(ast.dump(node, annotate_fields=False), + self.assertEqual( + ast.dump(node, annotate_fields=False), "Module([Expr(Call(Name('spam', Load()), [Name('eggs', Load()), " - "Constant('and cheese')], []))], [])" + "Constant('and cheese')], []))], [])", ) - self.assertEqual(ast.dump(node, include_attributes=True), + self.assertEqual( + ast.dump(node, include_attributes=True), "Module(body=[Expr(value=Call(func=Name(id='spam', ctx=Load(), " "lineno=1, col_offset=0, end_lineno=1, end_col_offset=4), " "args=[Name(id='eggs', ctx=Load(), lineno=1, col_offset=5, " "end_lineno=1, end_col_offset=9), Constant(value='and cheese', " "lineno=1, col_offset=11, end_lineno=1, end_col_offset=23)], keywords=[], " "lineno=1, col_offset=0, end_lineno=1, end_col_offset=24), " - "lineno=1, col_offset=0, end_lineno=1, end_col_offset=24)], type_ignores=[])" + "lineno=1, col_offset=0, end_lineno=1, end_col_offset=24)], type_ignores=[])", ) def test_dump_indent(self): node = ast.parse('spam(eggs, "and cheese")') - self.assertEqual(ast.dump(node, indent=3), """\ + self.assertEqual( + ast.dump(node, indent=3), + """\ Module( body=[ Expr( @@ -1183,8 +1241,11 @@ def test_dump_indent(self): Name(id='eggs', ctx=Load()), Constant(value='and cheese')], keywords=[]))], - type_ignores=[])""") - self.assertEqual(ast.dump(node, annotate_fields=False, indent='\t'), """\ + type_ignores=[])""", + ) + self.assertEqual( + ast.dump(node, annotate_fields=False, indent="\t"), + """\ Module( \t[ \t\tExpr( @@ -1194,8 +1255,11 @@ def test_dump_indent(self): \t\t\t\t\tName('eggs', Load()), \t\t\t\t\tConstant('and cheese')], \t\t\t\t[]))], -\t[])""") - self.assertEqual(ast.dump(node, include_attributes=True, indent=3), """\ +\t[])""", + ) + self.assertEqual( + ast.dump(node, include_attributes=True, indent=3), + """\ Module( body=[ Expr( @@ -1230,45 +1294,43 @@ def test_dump_indent(self): col_offset=0, end_lineno=1, end_col_offset=24)], - type_ignores=[])""") + type_ignores=[])""", + ) def test_dump_incomplete(self): node = ast.Raise(lineno=3, col_offset=4) - self.assertEqual(ast.dump(node), - "Raise()" + self.assertEqual(ast.dump(node), "Raise()") + self.assertEqual( + ast.dump(node, include_attributes=True), "Raise(lineno=3, col_offset=4)" ) - self.assertEqual(ast.dump(node, include_attributes=True), - "Raise(lineno=3, col_offset=4)" + node = ast.Raise(exc=ast.Name(id="e", ctx=ast.Load()), lineno=3, col_offset=4) + self.assertEqual(ast.dump(node), "Raise(exc=Name(id='e', ctx=Load()))") + self.assertEqual( + ast.dump(node, annotate_fields=False), "Raise(Name('e', Load()))" ) - node = ast.Raise(exc=ast.Name(id='e', ctx=ast.Load()), lineno=3, col_offset=4) - self.assertEqual(ast.dump(node), - "Raise(exc=Name(id='e', ctx=Load()))" + self.assertEqual( + ast.dump(node, include_attributes=True), + "Raise(exc=Name(id='e', ctx=Load()), lineno=3, col_offset=4)", ) - self.assertEqual(ast.dump(node, annotate_fields=False), - "Raise(Name('e', Load()))" + self.assertEqual( + ast.dump(node, annotate_fields=False, include_attributes=True), + "Raise(Name('e', Load()), lineno=3, col_offset=4)", ) - self.assertEqual(ast.dump(node, include_attributes=True), - "Raise(exc=Name(id='e', ctx=Load()), lineno=3, col_offset=4)" - ) - self.assertEqual(ast.dump(node, annotate_fields=False, include_attributes=True), - "Raise(Name('e', Load()), lineno=3, col_offset=4)" - ) - node = ast.Raise(cause=ast.Name(id='e', ctx=ast.Load())) - self.assertEqual(ast.dump(node), - "Raise(cause=Name(id='e', ctx=Load()))" - ) - self.assertEqual(ast.dump(node, annotate_fields=False), - "Raise(cause=Name('e', Load()))" + node = ast.Raise(cause=ast.Name(id="e", ctx=ast.Load())) + self.assertEqual(ast.dump(node), "Raise(cause=Name(id='e', ctx=Load()))") + self.assertEqual( + ast.dump(node, annotate_fields=False), "Raise(cause=Name('e', Load()))" ) def test_copy_location(self): - src = ast.parse('1 + 1', mode='eval') + src = ast.parse("1 + 1", mode="eval") src.body.right = ast.copy_location(ast.Constant(2), src.body.right) - self.assertEqual(ast.dump(src, include_attributes=True), - 'Expression(body=BinOp(left=Constant(value=1, lineno=1, col_offset=0, ' - 'end_lineno=1, end_col_offset=1), op=Add(), right=Constant(value=2, ' - 'lineno=1, col_offset=4, end_lineno=1, end_col_offset=5), lineno=1, ' - 'col_offset=0, end_lineno=1, end_col_offset=5))' + self.assertEqual( + ast.dump(src, include_attributes=True), + "Expression(body=BinOp(left=Constant(value=1, lineno=1, col_offset=0, " + "end_lineno=1, end_col_offset=1), op=Add(), right=Constant(value=2, " + "lineno=1, col_offset=4, end_lineno=1, end_col_offset=5), lineno=1, " + "col_offset=0, end_lineno=1, end_col_offset=5))", ) src = ast.Call(col_offset=1, lineno=1, end_lineno=1, end_col_offset=1) new = ast.copy_location(src, ast.Call(col_offset=None, lineno=None)) @@ -1279,11 +1341,13 @@ def test_copy_location(self): def test_fix_missing_locations(self): src = ast.parse('write("spam")') - src.body.append(ast.Expr(ast.Call(ast.Name('spam', ast.Load()), - [ast.Constant('eggs')], []))) + src.body.append( + ast.Expr(ast.Call(ast.Name("spam", ast.Load()), [ast.Constant("eggs")], [])) + ) self.assertEqual(src, ast.fix_missing_locations(src)) self.maxDiff = None - self.assertEqual(ast.dump(src, include_attributes=True), + self.assertEqual( + ast.dump(src, include_attributes=True), "Module(body=[Expr(value=Call(func=Name(id='write', ctx=Load(), " "lineno=1, col_offset=0, end_lineno=1, end_col_offset=5), " "args=[Constant(value='spam', lineno=1, col_offset=6, end_lineno=1, " @@ -1294,26 +1358,28 @@ def test_fix_missing_locations(self): "args=[Constant(value='eggs', lineno=1, col_offset=0, end_lineno=1, " "end_col_offset=0)], keywords=[], lineno=1, col_offset=0, end_lineno=1, " "end_col_offset=0), lineno=1, col_offset=0, end_lineno=1, end_col_offset=0)], " - "type_ignores=[])" + "type_ignores=[])", ) def test_increment_lineno(self): - src = ast.parse('1 + 1', mode='eval') + src = ast.parse("1 + 1", mode="eval") self.assertEqual(ast.increment_lineno(src, n=3), src) - self.assertEqual(ast.dump(src, include_attributes=True), - 'Expression(body=BinOp(left=Constant(value=1, lineno=4, col_offset=0, ' - 'end_lineno=4, end_col_offset=1), op=Add(), right=Constant(value=1, ' - 'lineno=4, col_offset=4, end_lineno=4, end_col_offset=5), lineno=4, ' - 'col_offset=0, end_lineno=4, end_col_offset=5))' + self.assertEqual( + ast.dump(src, include_attributes=True), + "Expression(body=BinOp(left=Constant(value=1, lineno=4, col_offset=0, " + "end_lineno=4, end_col_offset=1), op=Add(), right=Constant(value=1, " + "lineno=4, col_offset=4, end_lineno=4, end_col_offset=5), lineno=4, " + "col_offset=0, end_lineno=4, end_col_offset=5))", ) # issue10869: do not increment lineno of root twice - src = ast.parse('1 + 1', mode='eval') + src = ast.parse("1 + 1", mode="eval") self.assertEqual(ast.increment_lineno(src.body, n=3), src.body) - self.assertEqual(ast.dump(src, include_attributes=True), - 'Expression(body=BinOp(left=Constant(value=1, lineno=4, col_offset=0, ' - 'end_lineno=4, end_col_offset=1), op=Add(), right=Constant(value=1, ' - 'lineno=4, col_offset=4, end_lineno=4, end_col_offset=5), lineno=4, ' - 'col_offset=0, end_lineno=4, end_col_offset=5))' + self.assertEqual( + ast.dump(src, include_attributes=True), + "Expression(body=BinOp(left=Constant(value=1, lineno=4, col_offset=0, " + "end_lineno=4, end_col_offset=1), op=Add(), right=Constant(value=1, " + "lineno=4, col_offset=4, end_lineno=4, end_col_offset=5), lineno=4, " + "col_offset=0, end_lineno=4, end_col_offset=5))", ) src = ast.Call( func=ast.Name("test", ast.Load()), args=[], keywords=[], lineno=1 @@ -1322,81 +1388,82 @@ def test_increment_lineno(self): self.assertIsNone(ast.increment_lineno(src).end_lineno) def test_increment_lineno_on_module(self): - src = ast.parse(dedent("""\ + src = ast.parse( + dedent("""\ a = 1 b = 2 # type: ignore c = 3 d = 4 # type: ignore@tag - """), type_comments=True) + """), + type_comments=True, + ) ast.increment_lineno(src, n=5) self.assertEqual(src.type_ignores[0].lineno, 7) self.assertEqual(src.type_ignores[1].lineno, 9) - self.assertEqual(src.type_ignores[1].tag, '@tag') + self.assertEqual(src.type_ignores[1].tag, "@tag") def test_iter_fields(self): - node = ast.parse('foo()', mode='eval') + node = ast.parse("foo()", mode="eval") d = dict(ast.iter_fields(node.body)) - self.assertEqual(d.pop('func').id, 'foo') - self.assertEqual(d, {'keywords': [], 'args': []}) + self.assertEqual(d.pop("func").id, "foo") + self.assertEqual(d, {"keywords": [], "args": []}) def test_iter_child_nodes(self): - node = ast.parse("spam(23, 42, eggs='leek')", mode='eval') + node = ast.parse("spam(23, 42, eggs='leek')", mode="eval") self.assertEqual(len(list(ast.iter_child_nodes(node.body))), 4) iterator = ast.iter_child_nodes(node.body) - self.assertEqual(next(iterator).id, 'spam') + self.assertEqual(next(iterator).id, "spam") self.assertEqual(next(iterator).value, 23) self.assertEqual(next(iterator).value, 42) - self.assertEqual(ast.dump(next(iterator)), - "keyword(arg='eggs', value=Constant(value='leek'))" + self.assertEqual( + ast.dump(next(iterator)), + "keyword(arg='eggs', value=Constant(value='leek'))", ) def test_get_docstring(self): node = ast.parse('"""line one\n line two"""') - self.assertEqual(ast.get_docstring(node), - 'line one\nline two') + self.assertEqual(ast.get_docstring(node), "line one\nline two") node = ast.parse('class foo:\n """line one\n line two"""') - self.assertEqual(ast.get_docstring(node.body[0]), - 'line one\nline two') + self.assertEqual(ast.get_docstring(node.body[0]), "line one\nline two") node = ast.parse('def foo():\n """line one\n line two"""') - self.assertEqual(ast.get_docstring(node.body[0]), - 'line one\nline two') + self.assertEqual(ast.get_docstring(node.body[0]), "line one\nline two") node = ast.parse('async def foo():\n """spam\n ham"""') - self.assertEqual(ast.get_docstring(node.body[0]), 'spam\nham') + self.assertEqual(ast.get_docstring(node.body[0]), "spam\nham") node = ast.parse('async def foo():\n """spam\n ham"""') - self.assertEqual(ast.get_docstring(node.body[0], clean=False), 'spam\n ham') + self.assertEqual(ast.get_docstring(node.body[0], clean=False), "spam\n ham") - node = ast.parse('x') + node = ast.parse("x") self.assertRaises(TypeError, ast.get_docstring, node.body[0]) def test_get_docstring_none(self): - self.assertIsNone(ast.get_docstring(ast.parse(''))) + self.assertIsNone(ast.get_docstring(ast.parse(""))) node = ast.parse('x = "not docstring"') self.assertIsNone(ast.get_docstring(node)) - node = ast.parse('def foo():\n pass') + node = ast.parse("def foo():\n pass") self.assertIsNone(ast.get_docstring(node)) - node = ast.parse('class foo:\n pass') + node = ast.parse("class foo:\n pass") self.assertIsNone(ast.get_docstring(node.body[0])) node = ast.parse('class foo:\n x = "not docstring"') self.assertIsNone(ast.get_docstring(node.body[0])) - node = ast.parse('class foo:\n def bar(self): pass') + node = ast.parse("class foo:\n def bar(self): pass") self.assertIsNone(ast.get_docstring(node.body[0])) - node = ast.parse('def foo():\n pass') + node = ast.parse("def foo():\n pass") self.assertIsNone(ast.get_docstring(node.body[0])) node = ast.parse('def foo():\n x = "not docstring"') self.assertIsNone(ast.get_docstring(node.body[0])) - node = ast.parse('async def foo():\n pass') + node = ast.parse("async def foo():\n pass") self.assertIsNone(ast.get_docstring(node.body[0])) node = ast.parse('async def foo():\n x = "not docstring"') self.assertIsNone(ast.get_docstring(node.body[0])) - node = ast.parse('async def foo():\n 42') + node = ast.parse("async def foo():\n 42") self.assertIsNone(ast.get_docstring(node.body[0])) def test_multi_line_docstring_col_offset_and_lineno_issue16806(self): @@ -1419,75 +1486,79 @@ def test_multi_line_docstring_col_offset_and_lineno_issue16806(self): self.assertEqual(node.body[2].lineno, 13) def test_elif_stmt_start_position(self): - node = ast.parse('if a:\n pass\nelif b:\n pass\n') + node = ast.parse("if a:\n pass\nelif b:\n pass\n") elif_stmt = node.body[0].orelse[0] self.assertEqual(elif_stmt.lineno, 3) self.assertEqual(elif_stmt.col_offset, 0) def test_elif_stmt_start_position_with_else(self): - node = ast.parse('if a:\n pass\nelif b:\n pass\nelse:\n pass\n') + node = ast.parse("if a:\n pass\nelif b:\n pass\nelse:\n pass\n") elif_stmt = node.body[0].orelse[0] self.assertEqual(elif_stmt.lineno, 3) self.assertEqual(elif_stmt.col_offset, 0) def test_starred_expr_end_position_within_call(self): - node = ast.parse('f(*[0, 1])') + node = ast.parse("f(*[0, 1])") starred_expr = node.body[0].value.args[0] self.assertEqual(starred_expr.end_lineno, 1) self.assertEqual(starred_expr.end_col_offset, 9) def test_literal_eval(self): - self.assertEqual(ast.literal_eval('[1, 2, 3]'), [1, 2, 3]) + self.assertEqual(ast.literal_eval("[1, 2, 3]"), [1, 2, 3]) self.assertEqual(ast.literal_eval('{"foo": 42}'), {"foo": 42}) - self.assertEqual(ast.literal_eval('(True, False, None)'), (True, False, None)) - self.assertEqual(ast.literal_eval('{1, 2, 3}'), {1, 2, 3}) + self.assertEqual(ast.literal_eval("(True, False, None)"), (True, False, None)) + self.assertEqual(ast.literal_eval("{1, 2, 3}"), {1, 2, 3}) self.assertEqual(ast.literal_eval('b"hi"'), b"hi") - self.assertEqual(ast.literal_eval('set()'), set()) - self.assertRaises(ValueError, ast.literal_eval, 'foo()') - self.assertEqual(ast.literal_eval('6'), 6) - self.assertEqual(ast.literal_eval('+6'), 6) - self.assertEqual(ast.literal_eval('-6'), -6) - self.assertEqual(ast.literal_eval('3.25'), 3.25) - self.assertEqual(ast.literal_eval('+3.25'), 3.25) - self.assertEqual(ast.literal_eval('-3.25'), -3.25) - self.assertEqual(repr(ast.literal_eval('-0.0')), '-0.0') - self.assertRaises(ValueError, ast.literal_eval, '++6') - self.assertRaises(ValueError, ast.literal_eval, '+True') - self.assertRaises(ValueError, ast.literal_eval, '2+3') + self.assertEqual(ast.literal_eval("set()"), set()) + self.assertRaises(ValueError, ast.literal_eval, "foo()") + self.assertEqual(ast.literal_eval("6"), 6) + self.assertEqual(ast.literal_eval("+6"), 6) + self.assertEqual(ast.literal_eval("-6"), -6) + self.assertEqual(ast.literal_eval("3.25"), 3.25) + self.assertEqual(ast.literal_eval("+3.25"), 3.25) + self.assertEqual(ast.literal_eval("-3.25"), -3.25) + self.assertEqual(repr(ast.literal_eval("-0.0")), "-0.0") + self.assertRaises(ValueError, ast.literal_eval, "++6") + self.assertRaises(ValueError, ast.literal_eval, "+True") + self.assertRaises(ValueError, ast.literal_eval, "2+3") def test_literal_eval_str_int_limit(self): with support.adjust_int_max_str_digits(4000): - ast.literal_eval('3'*4000) # no error + ast.literal_eval("3" * 4000) # no error with self.assertRaises(SyntaxError) as err_ctx: - ast.literal_eval('3'*4001) - self.assertIn('Exceeds the limit ', str(err_ctx.exception)) - self.assertIn(' Consider hexadecimal ', str(err_ctx.exception)) + ast.literal_eval("3" * 4001) + self.assertIn("Exceeds the limit ", str(err_ctx.exception)) + self.assertIn(" Consider hexadecimal ", str(err_ctx.exception)) def test_literal_eval_complex(self): # Issue #4907 - self.assertEqual(ast.literal_eval('6j'), 6j) - self.assertEqual(ast.literal_eval('-6j'), -6j) - self.assertEqual(ast.literal_eval('6.75j'), 6.75j) - self.assertEqual(ast.literal_eval('-6.75j'), -6.75j) - self.assertEqual(ast.literal_eval('3+6j'), 3+6j) - self.assertEqual(ast.literal_eval('-3+6j'), -3+6j) - self.assertEqual(ast.literal_eval('3-6j'), 3-6j) - self.assertEqual(ast.literal_eval('-3-6j'), -3-6j) - self.assertEqual(ast.literal_eval('3.25+6.75j'), 3.25+6.75j) - self.assertEqual(ast.literal_eval('-3.25+6.75j'), -3.25+6.75j) - self.assertEqual(ast.literal_eval('3.25-6.75j'), 3.25-6.75j) - self.assertEqual(ast.literal_eval('-3.25-6.75j'), -3.25-6.75j) - self.assertEqual(ast.literal_eval('(3+6j)'), 3+6j) - self.assertRaises(ValueError, ast.literal_eval, '-6j+3') - self.assertRaises(ValueError, ast.literal_eval, '-6j+3j') - self.assertRaises(ValueError, ast.literal_eval, '3+-6j') - self.assertRaises(ValueError, ast.literal_eval, '3+(0+6j)') - self.assertRaises(ValueError, ast.literal_eval, '-(3+6j)') + self.assertEqual(ast.literal_eval("6j"), 6j) + self.assertEqual(ast.literal_eval("-6j"), -6j) + self.assertEqual(ast.literal_eval("6.75j"), 6.75j) + self.assertEqual(ast.literal_eval("-6.75j"), -6.75j) + self.assertEqual(ast.literal_eval("3+6j"), 3 + 6j) + self.assertEqual(ast.literal_eval("-3+6j"), -3 + 6j) + self.assertEqual(ast.literal_eval("3-6j"), 3 - 6j) + self.assertEqual(ast.literal_eval("-3-6j"), -3 - 6j) + self.assertEqual(ast.literal_eval("3.25+6.75j"), 3.25 + 6.75j) + self.assertEqual(ast.literal_eval("-3.25+6.75j"), -3.25 + 6.75j) + self.assertEqual(ast.literal_eval("3.25-6.75j"), 3.25 - 6.75j) + self.assertEqual(ast.literal_eval("-3.25-6.75j"), -3.25 - 6.75j) + self.assertEqual(ast.literal_eval("(3+6j)"), 3 + 6j) + self.assertRaises(ValueError, ast.literal_eval, "-6j+3") + self.assertRaises(ValueError, ast.literal_eval, "-6j+3j") + self.assertRaises(ValueError, ast.literal_eval, "3+-6j") + self.assertRaises(ValueError, ast.literal_eval, "3+(0+6j)") + self.assertRaises(ValueError, ast.literal_eval, "-(3+6j)") def test_literal_eval_malformed_dict_nodes(self): - malformed = ast.Dict(keys=[ast.Constant(1), ast.Constant(2)], values=[ast.Constant(3)]) + malformed = ast.Dict( + keys=[ast.Constant(1), ast.Constant(2)], values=[ast.Constant(3)] + ) self.assertRaises(ValueError, ast.literal_eval, malformed) - malformed = ast.Dict(keys=[ast.Constant(1)], values=[ast.Constant(2), ast.Constant(3)]) + malformed = ast.Dict( + keys=[ast.Constant(1)], values=[ast.Constant(2), ast.Constant(3)] + ) self.assertRaises(ValueError, ast.literal_eval, malformed) def test_literal_eval_trailing_ws(self): @@ -1497,46 +1568,54 @@ def test_literal_eval_trailing_ws(self): self.assertRaises(IndentationError, ast.literal_eval, "\n -1") def test_literal_eval_malformed_lineno(self): - msg = r'malformed node or string on line 3:' + msg = r"malformed node or string on line 3:" with self.assertRaisesRegex(ValueError, msg): ast.literal_eval("{'a': 1,\n'b':2,\n'c':++3,\n'd':4}") - node = ast.UnaryOp( - ast.UAdd(), ast.UnaryOp(ast.UAdd(), ast.Constant(6))) - self.assertIsNone(getattr(node, 'lineno', None)) - msg = r'malformed node or string:' + node = ast.UnaryOp(ast.UAdd(), ast.UnaryOp(ast.UAdd(), ast.Constant(6))) + self.assertIsNone(getattr(node, "lineno", None)) + msg = r"malformed node or string:" with self.assertRaisesRegex(ValueError, msg): ast.literal_eval(node) def test_literal_eval_syntax_errors(self): with self.assertRaisesRegex(SyntaxError, "unexpected indent"): - ast.literal_eval(r''' + ast.literal_eval(r""" \ (\ - \ ''') + \ """) def test_bad_integer(self): # issue13436: Bad error message with invalid numeric values - body = [ast.ImportFrom(module='time', - names=[ast.alias(name='sleep')], - level=None, - lineno=None, col_offset=None)] + body = [ + ast.ImportFrom( + module="time", + names=[ast.alias(name="sleep")], + level=None, + lineno=None, + col_offset=None, + ) + ] mod = ast.Module(body, []) with self.assertRaises(ValueError) as cm: - compile(mod, 'test', 'exec') + compile(mod, "test", "exec") self.assertIn("invalid integer value: None", str(cm.exception)) def test_level_as_none(self): - body = [ast.ImportFrom(module='time', - names=[ast.alias(name='sleep', - lineno=0, col_offset=0)], - level=None, - lineno=0, col_offset=0)] + body = [ + ast.ImportFrom( + module="time", + names=[ast.alias(name="sleep", lineno=0, col_offset=0)], + level=None, + lineno=0, + col_offset=0, + ) + ] mod = ast.Module(body, []) - code = compile(mod, 'test', 'exec') + code = compile(mod, "test", "exec") ns = {} exec(code, ns) - self.assertIn('sleep', ns) + self.assertIn("sleep", ns) def test_recursion_direct(self): e = ast.UnaryOp(op=ast.Not(), lineno=0, col_offset=0) @@ -1556,7 +1635,6 @@ def test_recursion_indirect(self): class ASTValidatorTests(unittest.TestCase): - def mod(self, mod, msg=None, mode="exec", *, exc=ValueError): mod.lineno = mod.col_offset = 0 ast.fix_missing_locations(mod) @@ -1582,9 +1660,15 @@ def test_module(self): self.mod(m, "must have Load context", "eval") def _check_arguments(self, fac, check): - def arguments(args=None, posonlyargs=None, vararg=None, - kwonlyargs=None, kwarg=None, - defaults=None, kw_defaults=None): + def arguments( + args=None, + posonlyargs=None, + vararg=None, + kwonlyargs=None, + kwarg=None, + defaults=None, + kw_defaults=None, + ): if args is None: args = [] if posonlyargs is None: @@ -1595,49 +1679,67 @@ def arguments(args=None, posonlyargs=None, vararg=None, defaults = [] if kw_defaults is None: kw_defaults = [] - args = ast.arguments(args, posonlyargs, vararg, kwonlyargs, - kw_defaults, kwarg, defaults) + args = ast.arguments( + args, posonlyargs, vararg, kwonlyargs, kw_defaults, kwarg, defaults + ) return fac(args) + args = [ast.arg("x", ast.Name("x", ast.Store()))] check(arguments(args=args), "must have Load context") check(arguments(posonlyargs=args), "must have Load context") check(arguments(kwonlyargs=args), "must have Load context") - check(arguments(defaults=[ast.Constant(3)]), - "more positional defaults than args") - check(arguments(kw_defaults=[ast.Constant(4)]), - "length of kwonlyargs is not the same as kw_defaults") + check( + arguments(defaults=[ast.Constant(3)]), "more positional defaults than args" + ) + check( + arguments(kw_defaults=[ast.Constant(4)]), + "length of kwonlyargs is not the same as kw_defaults", + ) args = [ast.arg("x", ast.Name("x", ast.Load()))] - check(arguments(args=args, defaults=[ast.Name("x", ast.Store())]), - "must have Load context") - args = [ast.arg("a", ast.Name("x", ast.Load())), - ast.arg("b", ast.Name("y", ast.Load()))] - check(arguments(kwonlyargs=args, - kw_defaults=[None, ast.Name("x", ast.Store())]), - "must have Load context") + check( + arguments(args=args, defaults=[ast.Name("x", ast.Store())]), + "must have Load context", + ) + args = [ + ast.arg("a", ast.Name("x", ast.Load())), + ast.arg("b", ast.Name("y", ast.Load())), + ] + check( + arguments(kwonlyargs=args, kw_defaults=[None, ast.Name("x", ast.Store())]), + "must have Load context", + ) def test_funcdef(self): a = ast.arguments([], [], None, [], [], None, []) f = ast.FunctionDef("x", a, [], [], None, None, []) self.stmt(f, "empty body on FunctionDef") - f = ast.FunctionDef("x", a, [ast.Pass()], [ast.Name("x", ast.Store())], None, None, []) + f = ast.FunctionDef( + "x", a, [ast.Pass()], [ast.Name("x", ast.Store())], None, None, [] + ) self.stmt(f, "must have Load context") - f = ast.FunctionDef("x", a, [ast.Pass()], [], - ast.Name("x", ast.Store()), None, []) + f = ast.FunctionDef( + "x", a, [ast.Pass()], [], ast.Name("x", ast.Store()), None, [] + ) self.stmt(f, "must have Load context") f = ast.FunctionDef("x", ast.arguments(), [ast.Pass()]) self.stmt(f) + def fac(args): return ast.FunctionDef("x", args, [ast.Pass()], [], None, None, []) + self._check_arguments(fac, self.stmt) def test_funcdef_pattern_matching(self): # gh-104799: New fields on FunctionDef should be added at the end def matcher(node): match node: - case ast.FunctionDef("foo", ast.arguments(args=[ast.arg("bar")]), - [ast.Pass()], - [ast.Name("capybara", ast.Load())], - ast.Name("pacarana", ast.Load())): + case ast.FunctionDef( + "foo", + ast.arguments(args=[ast.arg("bar")]), + [ast.Pass()], + [ast.Name("capybara", ast.Load())], + ast.Name("pacarana", ast.Load()), + ): return True case _: return False @@ -1653,7 +1755,9 @@ def foo(bar) -> pacarana: self.assertTrue(matcher(funcdef)) def test_classdef(self): - def cls(bases=None, keywords=None, body=None, decorator_list=None, type_params=None): + def cls( + bases=None, keywords=None, body=None, decorator_list=None, type_params=None + ): if bases is None: bases = [] if keywords is None: @@ -1664,38 +1768,46 @@ def cls(bases=None, keywords=None, body=None, decorator_list=None, type_params=N decorator_list = [] if type_params is None: type_params = [] - return ast.ClassDef("myclass", bases, keywords, - body, decorator_list, type_params) - self.stmt(cls(bases=[ast.Name("x", ast.Store())]), - "must have Load context") - self.stmt(cls(keywords=[ast.keyword("x", ast.Name("x", ast.Store()))]), - "must have Load context") + return ast.ClassDef( + "myclass", bases, keywords, body, decorator_list, type_params + ) + + self.stmt(cls(bases=[ast.Name("x", ast.Store())]), "must have Load context") + self.stmt( + cls(keywords=[ast.keyword("x", ast.Name("x", ast.Store()))]), + "must have Load context", + ) self.stmt(cls(body=[]), "empty body on ClassDef") self.stmt(cls(body=[None]), "None disallowed") - self.stmt(cls(decorator_list=[ast.Name("x", ast.Store())]), - "must have Load context") + self.stmt( + cls(decorator_list=[ast.Name("x", ast.Store())]), "must have Load context" + ) def test_delete(self): self.stmt(ast.Delete([]), "empty targets on Delete") self.stmt(ast.Delete([None]), "None disallowed") - self.stmt(ast.Delete([ast.Name("x", ast.Load())]), - "must have Del context") + self.stmt(ast.Delete([ast.Name("x", ast.Load())]), "must have Del context") def test_assign(self): self.stmt(ast.Assign([], ast.Constant(3)), "empty targets on Assign") self.stmt(ast.Assign([None], ast.Constant(3)), "None disallowed") - self.stmt(ast.Assign([ast.Name("x", ast.Load())], ast.Constant(3)), - "must have Store context") - self.stmt(ast.Assign([ast.Name("x", ast.Store())], - ast.Name("y", ast.Store())), - "must have Load context") + self.stmt( + ast.Assign([ast.Name("x", ast.Load())], ast.Constant(3)), + "must have Store context", + ) + self.stmt( + ast.Assign([ast.Name("x", ast.Store())], ast.Name("y", ast.Store())), + "must have Load context", + ) def test_augassign(self): - aug = ast.AugAssign(ast.Name("x", ast.Load()), ast.Add(), - ast.Name("y", ast.Load())) + aug = ast.AugAssign( + ast.Name("x", ast.Load()), ast.Add(), ast.Name("y", ast.Load()) + ) self.stmt(aug, "must have Store context") - aug = ast.AugAssign(ast.Name("x", ast.Store()), ast.Add(), - ast.Name("y", ast.Store())) + aug = ast.AugAssign( + ast.Name("x", ast.Store()), ast.Add(), ast.Name("y", ast.Store()) + ) self.stmt(aug, "must have Load context") def test_for(self): @@ -1703,21 +1815,28 @@ def test_for(self): y = ast.Name("y", ast.Load()) p = ast.Pass() self.stmt(ast.For(x, y, [], []), "empty body on For") - self.stmt(ast.For(ast.Name("x", ast.Load()), y, [p], []), - "must have Store context") - self.stmt(ast.For(x, ast.Name("y", ast.Store()), [p], []), - "must have Load context") + self.stmt( + ast.For(ast.Name("x", ast.Load()), y, [p], []), "must have Store context" + ) + self.stmt( + ast.For(x, ast.Name("y", ast.Store()), [p], []), "must have Load context" + ) e = ast.Expr(ast.Name("x", ast.Store())) self.stmt(ast.For(x, y, [e], []), "must have Load context") self.stmt(ast.For(x, y, [p], [e]), "must have Load context") def test_while(self): self.stmt(ast.While(ast.Constant(3), [], []), "empty body on While") - self.stmt(ast.While(ast.Name("x", ast.Store()), [ast.Pass()], []), - "must have Load context") - self.stmt(ast.While(ast.Constant(3), [ast.Pass()], - [ast.Expr(ast.Name("x", ast.Store()))]), - "must have Load context") + self.stmt( + ast.While(ast.Name("x", ast.Store()), [ast.Pass()], []), + "must have Load context", + ) + self.stmt( + ast.While( + ast.Constant(3), [ast.Pass()], [ast.Expr(ast.Name("x", ast.Store()))] + ), + "must have Load context", + ) def test_if(self): self.stmt(ast.If(ast.Constant(3), [], []), "empty body on If") @@ -1725,8 +1844,9 @@ def test_if(self): self.stmt(i, "must have Load context") i = ast.If(ast.Constant(3), [ast.Expr(ast.Name("x", ast.Store()))], []) self.stmt(i, "must have Load context") - i = ast.If(ast.Constant(3), [ast.Pass()], - [ast.Expr(ast.Name("x", ast.Store()))]) + i = ast.If( + ast.Constant(3), [ast.Pass()], [ast.Expr(ast.Name("x", ast.Store()))] + ) self.stmt(i, "must have Load context") def test_with(self): @@ -1788,10 +1908,10 @@ def test_try_star(self): self.stmt(t, "must have Load context") def test_assert(self): - self.stmt(ast.Assert(ast.Name("x", ast.Store()), None), - "must have Load context") - assrt = ast.Assert(ast.Name("x", ast.Load()), - ast.Name("y", ast.Store())) + self.stmt( + ast.Assert(ast.Name("x", ast.Store()), None), "must have Load context" + ) + assrt = ast.Assert(ast.Name("x", ast.Load()), ast.Name("y", ast.Store())) self.stmt(assrt, "must have Load context") def test_import(self): @@ -1828,10 +1948,11 @@ def test_unaryop(self): def test_lambda(self): a = ast.arguments([], [], None, [], [], None, []) - self.expr(ast.Lambda(a, ast.Name("x", ast.Store())), - "must have Load context") + self.expr(ast.Lambda(a, ast.Name("x", ast.Store())), "must have Load context") + def fac(args): return ast.Lambda(args, ast.Name("x", ast.Load())) + self._check_arguments(fac, self.expr) def test_ifexp(self): @@ -1853,11 +1974,13 @@ def test_set(self): def _check_comprehension(self, fac): self.expr(fac([]), "comprehension with no generators") - g = ast.comprehension(ast.Name("x", ast.Load()), - ast.Name("x", ast.Load()), [], 0) + g = ast.comprehension( + ast.Name("x", ast.Load()), ast.Name("x", ast.Load()), [], 0 + ) self.expr(fac([g]), "must have Store context") - g = ast.comprehension(ast.Name("x", ast.Store()), - ast.Name("x", ast.Store()), [], 0) + g = ast.comprehension( + ast.Name("x", ast.Store()), ast.Name("x", ast.Store()), [], 0 + ) self.expr(fac([g]), "must have Load context") x = ast.Name("x", ast.Store()) y = ast.Name("y", ast.Load()) @@ -1867,12 +1990,14 @@ def _check_comprehension(self, fac): self.expr(fac([g]), "must have Load context") def _simple_comp(self, fac): - g = ast.comprehension(ast.Name("x", ast.Store()), - ast.Name("x", ast.Load()), [], 0) - self.expr(fac(ast.Name("x", ast.Store()), [g]), - "must have Load context") + g = ast.comprehension( + ast.Name("x", ast.Store()), ast.Name("x", ast.Load()), [], 0 + ) + self.expr(fac(ast.Name("x", ast.Store()), [g]), "must have Load context") + def wrap(gens): return fac(ast.Name("x", ast.Store()), gens) + self._check_comprehension(wrap) def test_listcomp(self): @@ -1885,18 +2010,19 @@ def test_generatorexp(self): self._simple_comp(ast.GeneratorExp) def test_dictcomp(self): - g = ast.comprehension(ast.Name("y", ast.Store()), - ast.Name("p", ast.Load()), [], 0) - c = ast.DictComp(ast.Name("x", ast.Store()), - ast.Name("y", ast.Load()), [g]) + g = ast.comprehension( + ast.Name("y", ast.Store()), ast.Name("p", ast.Load()), [], 0 + ) + c = ast.DictComp(ast.Name("x", ast.Store()), ast.Name("y", ast.Load()), [g]) self.expr(c, "must have Load context") - c = ast.DictComp(ast.Name("x", ast.Load()), - ast.Name("y", ast.Store()), [g]) + c = ast.DictComp(ast.Name("x", ast.Load()), ast.Name("y", ast.Store()), [g]) self.expr(c, "must have Load context") + def factory(comps): k = ast.Name("x", ast.Load()) v = ast.Name("y", ast.Load()) return ast.DictComp(k, v, comps) + self._check_comprehension(factory) def test_yield(self): @@ -1928,62 +2054,68 @@ def test_call(self): def test_num(self): with warnings.catch_warnings(record=True) as wlog: - warnings.filterwarnings('ignore', '', DeprecationWarning) + warnings.filterwarnings("ignore", "", DeprecationWarning) from ast import Num with warnings.catch_warnings(record=True) as wlog: - warnings.filterwarnings('always', '', DeprecationWarning) + warnings.filterwarnings("always", "", DeprecationWarning) + class subint(int): pass + class subfloat(float): pass + class subcomplex(complex): pass + for obj in "0", "hello": self.expr(ast.Num(obj)) for obj in subint(), subfloat(), subcomplex(): self.expr(ast.Num(obj), "invalid type", exc=TypeError) - self.assertEqual([str(w.message) for w in wlog], [ - 'ast.Num is deprecated and will be removed in Python 3.14; use ast.Constant instead', - 'ast.Num is deprecated and will be removed in Python 3.14; use ast.Constant instead', - 'ast.Num is deprecated and will be removed in Python 3.14; use ast.Constant instead', - 'ast.Num is deprecated and will be removed in Python 3.14; use ast.Constant instead', - 'ast.Num is deprecated and will be removed in Python 3.14; use ast.Constant instead', - ]) + self.assertEqual( + [str(w.message) for w in wlog], + [ + "ast.Num is deprecated and will be removed in Python 3.14; use ast.Constant instead", + "ast.Num is deprecated and will be removed in Python 3.14; use ast.Constant instead", + "ast.Num is deprecated and will be removed in Python 3.14; use ast.Constant instead", + "ast.Num is deprecated and will be removed in Python 3.14; use ast.Constant instead", + "ast.Num is deprecated and will be removed in Python 3.14; use ast.Constant instead", + ], + ) def test_attribute(self): attr = ast.Attribute(ast.Name("x", ast.Store()), "y", ast.Load()) self.expr(attr, "must have Load context") def test_subscript(self): - sub = ast.Subscript(ast.Name("x", ast.Store()), ast.Constant(3), - ast.Load()) + sub = ast.Subscript(ast.Name("x", ast.Store()), ast.Constant(3), ast.Load()) self.expr(sub, "must have Load context") x = ast.Name("x", ast.Load()) - sub = ast.Subscript(x, ast.Name("y", ast.Store()), - ast.Load()) + sub = ast.Subscript(x, ast.Name("y", ast.Store()), ast.Load()) self.expr(sub, "must have Load context") s = ast.Name("x", ast.Store()) for args in (s, None, None), (None, s, None), (None, None, s): sl = ast.Slice(*args) - self.expr(ast.Subscript(x, sl, ast.Load()), - "must have Load context") + self.expr(ast.Subscript(x, sl, ast.Load()), "must have Load context") sl = ast.Tuple([], ast.Load()) self.expr(ast.Subscript(x, sl, ast.Load())) sl = ast.Tuple([s], ast.Load()) self.expr(ast.Subscript(x, sl, ast.Load()), "must have Load context") def test_starred(self): - left = ast.List([ast.Starred(ast.Name("x", ast.Load()), ast.Store())], - ast.Store()) + left = ast.List( + [ast.Starred(ast.Name("x", ast.Load()), ast.Store())], ast.Store() + ) assign = ast.Assign([left], ast.Constant(4)) self.stmt(assign, "must have Store context") def _sequence(self, fac): self.expr(fac([None], ast.Load()), "None disallowed") - self.expr(fac([ast.Name("x", ast.Store())], ast.Load()), - "must have Load context") + self.expr( + fac([ast.Name("x", ast.Store())], ast.Load()), "must have Load context" + ) def test_list(self): self._sequence(ast.List) @@ -1993,18 +2125,21 @@ def test_tuple(self): def test_nameconstant(self): with warnings.catch_warnings(record=True) as wlog: - warnings.filterwarnings('ignore', '', DeprecationWarning) + warnings.filterwarnings("ignore", "", DeprecationWarning) from ast import NameConstant with warnings.catch_warnings(record=True) as wlog: - warnings.filterwarnings('always', '', DeprecationWarning) + warnings.filterwarnings("always", "", DeprecationWarning) self.expr(ast.NameConstant(4)) - self.assertEqual([str(w.message) for w in wlog], [ - 'ast.NameConstant is deprecated and will be removed in Python 3.14; use ast.Constant instead', - ]) + self.assertEqual( + [str(w.message) for w in wlog], + [ + "ast.NameConstant is deprecated and will be removed in Python 3.14; use ast.Constant instead", + ], + ) - @support.requires_resource('cpu') + @support.requires_resource("cpu") def test_stdlib_validates(self): stdlib = os.path.dirname(ast.__file__) tests = [fn for fn in os.listdir(stdlib) if fn.endswith(".py")] @@ -2020,130 +2155,79 @@ def test_stdlib_validates(self): constant_1 = ast.Constant(1) pattern_1 = ast.MatchValue(constant_1) - constant_x = ast.Constant('x') + constant_x = ast.Constant("x") pattern_x = ast.MatchValue(constant_x) constant_true = ast.Constant(True) pattern_true = ast.MatchSingleton(True) - name_carter = ast.Name('carter', ast.Load()) + name_carter = ast.Name("carter", ast.Load()) _MATCH_PATTERNS = [ ast.MatchValue( ast.Attribute( - ast.Attribute( - ast.Name('x', ast.Store()), - 'y', ast.Load() - ), - 'z', ast.Load() + ast.Attribute(ast.Name("x", ast.Store()), "y", ast.Load()), + "z", + ast.Load(), ) ), ast.MatchValue( ast.Attribute( - ast.Attribute( - ast.Name('x', ast.Load()), - 'y', ast.Store() - ), - 'z', ast.Load() + ast.Attribute(ast.Name("x", ast.Load()), "y", ast.Store()), + "z", + ast.Load(), ) ), - ast.MatchValue( - ast.Constant(...) - ), - ast.MatchValue( - ast.Constant(True) - ), - ast.MatchValue( - ast.Constant((1,2,3)) - ), - ast.MatchSingleton('string'), - ast.MatchSequence([ - ast.MatchSingleton('string') - ]), - ast.MatchSequence( - [ - ast.MatchSequence( - [ - ast.MatchSingleton('string') - ] - ) - ] - ), - ast.MatchMapping( - [constant_1, constant_true], - [pattern_x] - ), + ast.MatchValue(ast.Constant(...)), + ast.MatchValue(ast.Constant(True)), + ast.MatchValue(ast.Constant((1, 2, 3))), + ast.MatchSingleton("string"), + ast.MatchSequence([ast.MatchSingleton("string")]), + ast.MatchSequence([ast.MatchSequence([ast.MatchSingleton("string")])]), + ast.MatchMapping([constant_1, constant_true], [pattern_x]), ast.MatchMapping( - [constant_true, constant_1], - [pattern_x, pattern_1], - rest='True' + [constant_true, constant_1], [pattern_x, pattern_1], rest="True" ), ast.MatchMapping( - [constant_true, ast.Starred(ast.Name('lol', ast.Load()), ast.Load())], + [constant_true, ast.Starred(ast.Name("lol", ast.Load()), ast.Load())], [pattern_x, pattern_1], - rest='legit' + rest="legit", ), ast.MatchClass( - ast.Attribute( - ast.Attribute( - constant_x, - 'y', ast.Load()), - 'z', ast.Load()), - patterns=[], kwd_attrs=[], kwd_patterns=[] + ast.Attribute(ast.Attribute(constant_x, "y", ast.Load()), "z", ast.Load()), + patterns=[], + kwd_attrs=[], + kwd_patterns=[], ), ast.MatchClass( - name_carter, - patterns=[], - kwd_attrs=['True'], - kwd_patterns=[pattern_1] + name_carter, patterns=[], kwd_attrs=["True"], kwd_patterns=[pattern_1] ), ast.MatchClass( - name_carter, - patterns=[], - kwd_attrs=[], - kwd_patterns=[pattern_1] + name_carter, patterns=[], kwd_attrs=[], kwd_patterns=[pattern_1] ), ast.MatchClass( name_carter, - patterns=[ast.MatchSingleton('string')], + patterns=[ast.MatchSingleton("string")], kwd_attrs=[], - kwd_patterns=[] + kwd_patterns=[], ), ast.MatchClass( - name_carter, - patterns=[ast.MatchStar()], - kwd_attrs=[], - kwd_patterns=[] + name_carter, patterns=[ast.MatchStar()], kwd_attrs=[], kwd_patterns=[] ), ast.MatchClass( - name_carter, - patterns=[], - kwd_attrs=[], - kwd_patterns=[ast.MatchStar()] + name_carter, patterns=[], kwd_attrs=[], kwd_patterns=[ast.MatchStar()] ), ast.MatchClass( constant_true, # invalid name patterns=[], - kwd_attrs=['True'], - kwd_patterns=[pattern_1] - ), - ast.MatchSequence( - [ - ast.MatchStar("True") - ] - ), - ast.MatchAs( - name='False' - ), - ast.MatchOr( - [] - ), - ast.MatchOr( - [pattern_1] - ), - ast.MatchOr( - [pattern_1, pattern_x, ast.MatchSingleton('xxx')] + kwd_attrs=["True"], + kwd_patterns=[pattern_1], ), + ast.MatchSequence([ast.MatchStar("True")]), + ast.MatchAs(name="False"), + ast.MatchOr([]), + ast.MatchOr([pattern_1]), + ast.MatchOr([pattern_1, pattern_x, ast.MatchSingleton("xxx")]), ast.MatchAs(name="_"), ast.MatchStar(name="x"), ast.MatchSequence([ast.MatchStar("_")]), @@ -2151,17 +2235,12 @@ def test_stdlib_validates(self): ] def test_match_validation_pattern(self): - name_x = ast.Name('x', ast.Load()) + name_x = ast.Name("x", ast.Load()) for pattern in self._MATCH_PATTERNS: with self.subTest(ast.dump(pattern, indent=4)): node = ast.Match( subject=name_x, - cases = [ - ast.match_case( - pattern=pattern, - body = [ast.Pass()] - ) - ] + cases=[ast.match_case(pattern=pattern, body=[ast.Pass()])], ) node = ast.fix_missing_locations(node) module = ast.Module([node], []) @@ -2184,16 +2263,15 @@ def compile_constant(self, value): ns = {} exec(code, ns) - return ns['x'] + return ns["x"] def test_validation(self): with self.assertRaises(TypeError) as cm: self.compile_constant([1, 2, 3]) - self.assertEqual(str(cm.exception), - "got an invalid type in Constant: list") + self.assertEqual(str(cm.exception), "got an invalid type in Constant: list") def test_singletons(self): - for const in (None, False, True, Ellipsis, b'', frozenset()): + for const in (None, False, True, Ellipsis, b"", frozenset()): with self.subTest(const=const): value = self.compile_constant(const) self.assertIs(value, const) @@ -2204,10 +2282,17 @@ def test_values(self): for level in range(3): nested_tuple = (nested_tuple, 2) nested_frozenset = frozenset({nested_frozenset, 2}) - values = (123, 123.0, 123j, - "unicode", b'bytes', - tuple("tuple"), frozenset("frozenset"), - nested_tuple, nested_frozenset) + values = ( + 123, + 123.0, + 123j, + "unicode", + b"bytes", + tuple("tuple"), + frozenset("frozenset"), + nested_tuple, + nested_frozenset, + ) for value in values: with self.subTest(value=value): result = self.compile_constant(value) @@ -2223,42 +2308,35 @@ def test_assign_to_constant(self): with self.assertRaises(ValueError) as cm: compile(tree, "string", "exec") - self.assertEqual(str(cm.exception), - "expression which can't be assigned " - "to in Store context") + self.assertEqual( + str(cm.exception), + "expression which can't be assigned " "to in Store context", + ) def test_get_docstring(self): tree = ast.parse("'docstring'\nx = 1") - self.assertEqual(ast.get_docstring(tree), 'docstring') + self.assertEqual(ast.get_docstring(tree), "docstring") def get_load_const(self, tree): # Compile to bytecode, disassemble and get parameter of LOAD_CONST # instructions - co = compile(tree, '', 'exec') + co = compile(tree, "", "exec") consts = [] for instr in dis.get_instructions(co): - if instr.opname == 'LOAD_CONST' or instr.opname == 'RETURN_CONST': + if instr.opname == "LOAD_CONST" or instr.opname == "RETURN_CONST": consts.append(instr.argval) return consts @support.cpython_only def test_load_const(self): - consts = [None, - True, False, - 124, - 2.0, - 3j, - "unicode", - b'bytes', - (1, 2, 3)] - - code = '\n'.join(['x={!r}'.format(const) for const in consts]) - code += '\nx = ...' + consts = [None, True, False, 124, 2.0, 3j, "unicode", b"bytes", (1, 2, 3)] + + code = "\n".join(["x={!r}".format(const) for const in consts]) + code += "\nx = ..." consts.extend((Ellipsis, None)) tree = ast.parse(code) - self.assertEqual(self.get_load_const(tree), - consts) + self.assertEqual(self.get_load_const(tree), consts) # Replace expression nodes with constants for assign, const in zip(tree.body, consts): @@ -2267,8 +2345,7 @@ def test_load_const(self): ast.copy_location(new_node, assign.value) assign.value = new_node - self.assertEqual(self.get_load_const(tree), - consts) + self.assertEqual(self.get_load_const(tree), consts) def test_literal_eval(self): tree = ast.parse("1 + 2") @@ -2282,22 +2359,22 @@ def test_literal_eval(self): ast.copy_location(new_right, binop.right) binop.right = new_right - self.assertEqual(ast.literal_eval(binop), 10+20j) + self.assertEqual(ast.literal_eval(binop), 10 + 20j) def test_string_kind(self): - c = ast.parse('"x"', mode='eval').body + c = ast.parse('"x"', mode="eval").body self.assertEqual(c.value, "x") self.assertEqual(c.kind, None) - c = ast.parse('u"x"', mode='eval').body + c = ast.parse('u"x"', mode="eval").body self.assertEqual(c.value, "x") self.assertEqual(c.kind, "u") - c = ast.parse('r"x"', mode='eval').body + c = ast.parse('r"x"', mode="eval").body self.assertEqual(c.value, "x") self.assertEqual(c.kind, None) - c = ast.parse('b"x"', mode='eval').body + c = ast.parse('b"x"', mode="eval").body self.assertEqual(c.value, b"x") self.assertEqual(c.kind, None) @@ -2308,6 +2385,7 @@ class EndPositionTests(unittest.TestCase): Testing end positions of nodes requires a bit of extra care because of how LL parsers work. """ + def _check_end_pos(self, ast_node, end_lineno, end_col_offset): self.assertEqual(ast_node.end_lineno, end_lineno) self.assertEqual(ast_node.end_col_offset, end_col_offset) @@ -2321,55 +2399,55 @@ def _parse_value(self, s): return ast.parse(s).body[0].value def test_lambda(self): - s = 'lambda x, *y: None' + s = "lambda x, *y: None" lam = self._parse_value(s) - self._check_content(s, lam.body, 'None') - self._check_content(s, lam.args.args[0], 'x') - self._check_content(s, lam.args.vararg, 'y') + self._check_content(s, lam.body, "None") + self._check_content(s, lam.args.args[0], "x") + self._check_content(s, lam.args.vararg, "y") def test_func_def(self): - s = dedent(''' + s = dedent(""" def func(x: int, *args: str, z: float = 0, **kwargs: Any) -> bool: return True - ''').strip() + """).strip() fdef = ast.parse(s).body[0] self._check_end_pos(fdef, 5, 15) - self._check_content(s, fdef.body[0], 'return True') - self._check_content(s, fdef.args.args[0], 'x: int') - self._check_content(s, fdef.args.args[0].annotation, 'int') - self._check_content(s, fdef.args.kwarg, 'kwargs: Any') - self._check_content(s, fdef.args.kwarg.annotation, 'Any') + self._check_content(s, fdef.body[0], "return True") + self._check_content(s, fdef.args.args[0], "x: int") + self._check_content(s, fdef.args.args[0].annotation, "int") + self._check_content(s, fdef.args.kwarg, "kwargs: Any") + self._check_content(s, fdef.args.kwarg.annotation, "Any") def test_call(self): - s = 'func(x, y=2, **kw)' + s = "func(x, y=2, **kw)" call = self._parse_value(s) - self._check_content(s, call.func, 'func') - self._check_content(s, call.keywords[0].value, '2') - self._check_content(s, call.keywords[1].value, 'kw') + self._check_content(s, call.func, "func") + self._check_content(s, call.keywords[0].value, "2") + self._check_content(s, call.keywords[1].value, "kw") def test_call_noargs(self): - s = 'x[0]()' + s = "x[0]()" call = self._parse_value(s) - self._check_content(s, call.func, 'x[0]') + self._check_content(s, call.func, "x[0]") self._check_end_pos(call, 1, 6) def test_class_def(self): - s = dedent(''' + s = dedent(""" class C(A, B): x: int = 0 - ''').strip() + """).strip() cdef = ast.parse(s).body[0] self._check_end_pos(cdef, 2, 14) - self._check_content(s, cdef.bases[1], 'B') - self._check_content(s, cdef.body[0], 'x: int = 0') + self._check_content(s, cdef.bases[1], "B") + self._check_content(s, cdef.body[0], "x: int = 0") def test_class_kw(self): - s = 'class S(metaclass=abc.ABCMeta): pass' + s = "class S(metaclass=abc.ABCMeta): pass" cdef = ast.parse(s).body[0] - self._check_content(s, cdef.keywords[0].value, 'abc.ABCMeta') + self._check_content(s, cdef.keywords[0].value, "abc.ABCMeta") def test_multi_line_str(self): s = dedent(''' @@ -2382,10 +2460,10 @@ def test_multi_line_str(self): self._check_end_pos(assign.value, 3, 40) def test_continued_str(self): - s = dedent(''' + s = dedent(""" x = "first part" \\ "second part" - ''').strip() + """).strip() assign = ast.parse(s).body[0] self._check_end_pos(assign, 2, 13) self._check_end_pos(assign.value, 2, 13) @@ -2393,7 +2471,7 @@ def test_continued_str(self): def test_suites(self): # We intentionally put these into the same string to check # that empty lines are not part of the suite. - s = dedent(''' + s = dedent(""" while True: pass @@ -2413,7 +2491,7 @@ def test_suites(self): pass pass - ''').strip() + """).strip() mod = ast.parse(s) while_loop = mod.body[0] if_stmt = mod.body[1] @@ -2427,18 +2505,18 @@ def test_suites(self): self._check_end_pos(try_stmt, 17, 8) self._check_end_pos(pass_stmt, 19, 4) - self._check_content(s, while_loop.test, 'True') - self._check_content(s, if_stmt.body[0], 'x = None') - self._check_content(s, if_stmt.orelse[0].test, 'other()') - self._check_content(s, for_loop.target, 'x, y') - self._check_content(s, try_stmt.body[0], 'raise RuntimeError') - self._check_content(s, try_stmt.handlers[0].type, 'TypeError') + self._check_content(s, while_loop.test, "True") + self._check_content(s, if_stmt.body[0], "x = None") + self._check_content(s, if_stmt.orelse[0].test, "other()") + self._check_content(s, for_loop.target, "x, y") + self._check_content(s, try_stmt.body[0], "raise RuntimeError") + self._check_content(s, try_stmt.handlers[0].type, "TypeError") def test_fstring(self): s = 'x = f"abc {x + y} abc"' fstr = self._parse_value(s) binop = fstr.values[1].value - self._check_content(s, binop, 'x + y') + self._check_content(s, binop, "x + y") def test_fstring_multi_line(self): s = dedent(''' @@ -2453,200 +2531,198 @@ def test_fstring_multi_line(self): fstr = self._parse_value(s) binop = fstr.values[1].value self._check_end_pos(binop, 5, 7) - self._check_content(s, binop.left, 'arg_one') - self._check_content(s, binop.right, 'arg_two') + self._check_content(s, binop.left, "arg_one") + self._check_content(s, binop.right, "arg_two") def test_import_from_multi_line(self): - s = dedent(''' + s = dedent(""" from x.y.z import ( a, b, c as c ) - ''').strip() + """).strip() imp = ast.parse(s).body[0] self._check_end_pos(imp, 3, 1) self._check_end_pos(imp.names[2], 2, 16) def test_slices(self): - s1 = 'f()[1, 2] [0]' - s2 = 'x[ a.b: c.d]' - sm = dedent(''' + s1 = "f()[1, 2] [0]" + s2 = "x[ a.b: c.d]" + sm = dedent(""" x[ a.b: f () , g () : c.d ] - ''').strip() + """).strip() i1, i2, im = map(self._parse_value, (s1, s2, sm)) - self._check_content(s1, i1.value, 'f()[1, 2]') - self._check_content(s1, i1.value.slice, '1, 2') - self._check_content(s2, i2.slice.lower, 'a.b') - self._check_content(s2, i2.slice.upper, 'c.d') - self._check_content(sm, im.slice.elts[0].upper, 'f ()') - self._check_content(sm, im.slice.elts[1].lower, 'g ()') + self._check_content(s1, i1.value, "f()[1, 2]") + self._check_content(s1, i1.value.slice, "1, 2") + self._check_content(s2, i2.slice.lower, "a.b") + self._check_content(s2, i2.slice.upper, "c.d") + self._check_content(sm, im.slice.elts[0].upper, "f ()") + self._check_content(sm, im.slice.elts[1].lower, "g ()") self._check_end_pos(im, 3, 3) def test_binop(self): - s = dedent(''' + s = dedent(""" (1 * 2 + (3 ) + 4 ) - ''').strip() + """).strip() binop = self._parse_value(s) self._check_end_pos(binop, 2, 6) - self._check_content(s, binop.right, '4') - self._check_content(s, binop.left, '1 * 2 + (3 )') - self._check_content(s, binop.left.right, '3') + self._check_content(s, binop.right, "4") + self._check_content(s, binop.left, "1 * 2 + (3 )") + self._check_content(s, binop.left.right, "3") def test_boolop(self): - s = dedent(''' + s = dedent(""" if (one_condition and (other_condition or yet_another_one)): pass - ''').strip() + """).strip() bop = ast.parse(s).body[0].test self._check_end_pos(bop, 2, 44) - self._check_content(s, bop.values[1], - 'other_condition or yet_another_one') + self._check_content(s, bop.values[1], "other_condition or yet_another_one") def test_tuples(self): - s1 = 'x = () ;' - s2 = 'x = 1 , ;' - s3 = 'x = (1 , 2 ) ;' - sm = dedent(''' + s1 = "x = () ;" + s2 = "x = 1 , ;" + s3 = "x = (1 , 2 ) ;" + sm = dedent(""" x = ( a, b, ) - ''').strip() + """).strip() t1, t2, t3, tm = map(self._parse_value, (s1, s2, s3, sm)) - self._check_content(s1, t1, '()') - self._check_content(s2, t2, '1 ,') - self._check_content(s3, t3, '(1 , 2 )') + self._check_content(s1, t1, "()") + self._check_content(s2, t2, "1 ,") + self._check_content(s3, t3, "(1 , 2 )") self._check_end_pos(tm, 3, 1) def test_attribute_spaces(self): - s = 'func(x. y .z)' + s = "func(x. y .z)" call = self._parse_value(s) self._check_content(s, call, s) - self._check_content(s, call.args[0], 'x. y .z') + self._check_content(s, call.args[0], "x. y .z") def test_redundant_parenthesis(self): - s = '( ( ( a + b ) ) )' + s = "( ( ( a + b ) ) )" v = ast.parse(s).body[0].value - self.assertEqual(type(v).__name__, 'BinOp') - self._check_content(s, v, 'a + b') - s2 = 'await ' + s + self.assertEqual(type(v).__name__, "BinOp") + self._check_content(s, v, "a + b") + s2 = "await " + s v = ast.parse(s2).body[0].value.value - self.assertEqual(type(v).__name__, 'BinOp') - self._check_content(s2, v, 'a + b') + self.assertEqual(type(v).__name__, "BinOp") + self._check_content(s2, v, "a + b") def test_trailers_with_redundant_parenthesis(self): tests = ( - ('( ( ( a ) ) ) ( )', 'Call'), - ('( ( ( a ) ) ) ( b )', 'Call'), - ('( ( ( a ) ) ) [ b ]', 'Subscript'), - ('( ( ( a ) ) ) . b', 'Attribute'), + ("( ( ( a ) ) ) ( )", "Call"), + ("( ( ( a ) ) ) ( b )", "Call"), + ("( ( ( a ) ) ) [ b ]", "Subscript"), + ("( ( ( a ) ) ) . b", "Attribute"), ) for s, t in tests: with self.subTest(s): v = ast.parse(s).body[0].value self.assertEqual(type(v).__name__, t) self._check_content(s, v, s) - s2 = 'await ' + s + s2 = "await " + s v = ast.parse(s2).body[0].value.value self.assertEqual(type(v).__name__, t) self._check_content(s2, v, s) def test_displays(self): - s1 = '[{}, {1, }, {1, 2,} ]' - s2 = '{a: b, f (): g () ,}' + s1 = "[{}, {1, }, {1, 2,} ]" + s2 = "{a: b, f (): g () ,}" c1 = self._parse_value(s1) c2 = self._parse_value(s2) - self._check_content(s1, c1.elts[0], '{}') - self._check_content(s1, c1.elts[1], '{1, }') - self._check_content(s1, c1.elts[2], '{1, 2,}') - self._check_content(s2, c2.keys[1], 'f ()') - self._check_content(s2, c2.values[1], 'g ()') + self._check_content(s1, c1.elts[0], "{}") + self._check_content(s1, c1.elts[1], "{1, }") + self._check_content(s1, c1.elts[2], "{1, 2,}") + self._check_content(s2, c2.keys[1], "f ()") + self._check_content(s2, c2.values[1], "g ()") def test_comprehensions(self): - s = dedent(''' + s = dedent(""" x = [{x for x, y in stuff if cond.x} for stuff in things] - ''').strip() + """).strip() cmp = self._parse_value(s) self._check_end_pos(cmp, 2, 37) - self._check_content(s, cmp.generators[0].iter, 'things') - self._check_content(s, cmp.elt.generators[0].iter, 'stuff') - self._check_content(s, cmp.elt.generators[0].ifs[0], 'cond.x') - self._check_content(s, cmp.elt.generators[0].target, 'x, y') + self._check_content(s, cmp.generators[0].iter, "things") + self._check_content(s, cmp.elt.generators[0].iter, "stuff") + self._check_content(s, cmp.elt.generators[0].ifs[0], "cond.x") + self._check_content(s, cmp.elt.generators[0].target, "x, y") def test_yield_await(self): - s = dedent(''' + s = dedent(""" async def f(): yield x await y - ''').strip() + """).strip() fdef = ast.parse(s).body[0] - self._check_content(s, fdef.body[0].value, 'yield x') - self._check_content(s, fdef.body[1].value, 'await y') + self._check_content(s, fdef.body[0].value, "yield x") + self._check_content(s, fdef.body[1].value, "await y") def test_source_segment_multi(self): - s_orig = dedent(''' + s_orig = dedent(""" x = ( a, b, ) + () - ''').strip() - s_tuple = dedent(''' + """).strip() + s_tuple = dedent(""" ( a, b, ) - ''').strip() + """).strip() binop = self._parse_value(s_orig) self.assertEqual(ast.get_source_segment(s_orig, binop.left), s_tuple) def test_source_segment_padded(self): - s_orig = dedent(''' + s_orig = dedent(""" class C: def fun(self) -> None: "ЖЖЖЖЖ" - ''').strip() - s_method = ' def fun(self) -> None:\n' \ - ' "ЖЖЖЖЖ"' + """).strip() + s_method = " def fun(self) -> None:\n" ' "ЖЖЖЖЖ"' cdef = ast.parse(s_orig).body[0] - self.assertEqual(ast.get_source_segment(s_orig, cdef.body[0], padded=True), - s_method) + self.assertEqual( + ast.get_source_segment(s_orig, cdef.body[0], padded=True), s_method + ) def test_source_segment_endings(self): - s = 'v = 1\r\nw = 1\nx = 1\n\ry = 1\rz = 1\r\n' + s = "v = 1\r\nw = 1\nx = 1\n\ry = 1\rz = 1\r\n" v, w, x, y, z = ast.parse(s).body - self._check_content(s, v, 'v = 1') - self._check_content(s, w, 'w = 1') - self._check_content(s, x, 'x = 1') - self._check_content(s, y, 'y = 1') - self._check_content(s, z, 'z = 1') + self._check_content(s, v, "v = 1") + self._check_content(s, w, "w = 1") + self._check_content(s, x, "x = 1") + self._check_content(s, y, "y = 1") + self._check_content(s, z, "z = 1") def test_source_segment_tabs(self): - s = dedent(''' + s = dedent(""" class C: \t\f def fun(self) -> None: \t\f pass - ''').strip() - s_method = ' \t\f def fun(self) -> None:\n' \ - ' \t\f pass' + """).strip() + s_method = " \t\f def fun(self) -> None:\n" " \t\f pass" cdef = ast.parse(s).body[0] self.assertEqual(ast.get_source_segment(s, cdef.body[0], padded=True), s_method) def test_source_segment_newlines(self): - s = 'def f():\n pass\ndef g():\r pass\r\ndef h():\r\n pass\r\n' + s = "def f():\n pass\ndef g():\r pass\r\ndef h():\r\n pass\r\n" f, g, h = ast.parse(s).body - self._check_content(s, f, 'def f():\n pass') - self._check_content(s, g, 'def g():\r pass') - self._check_content(s, h, 'def h():\r\n pass') + self._check_content(s, f, "def f():\n pass") + self._check_content(s, g, "def g():\r pass") + self._check_content(s, h, "def h():\r\n pass") - s = 'def f():\n a = 1\r b = 2\r\n c = 3\n' + s = "def f():\n a = 1\r b = 2\r\n c = 3\n" f = ast.parse(s).body[0] self._check_content(s, f, s.rstrip()) def test_source_segment_missing_info(self): - s = 'v = 1\r\nw = 1\nx = 1\n\ry = 1\r\n' + s = "v = 1\r\nw = 1\nx = 1\n\ry = 1\r\n" v, w, x, y = ast.parse(s).body del v.lineno del w.end_lineno @@ -2657,21 +2733,28 @@ def test_source_segment_missing_info(self): self.assertIsNone(ast.get_source_segment(s, x)) self.assertIsNone(ast.get_source_segment(s, y)) + class BaseNodeVisitorCases: # Both `NodeVisitor` and `NodeTranformer` must raise these warnings: def test_old_constant_nodes(self): class Visitor(self.visitor_class): def visit_Num(self, node): - log.append((node.lineno, 'Num', node.n)) + log.append((node.lineno, "Num", node.n)) + def visit_Str(self, node): - log.append((node.lineno, 'Str', node.s)) + log.append((node.lineno, "Str", node.s)) + def visit_Bytes(self, node): - log.append((node.lineno, 'Bytes', node.s)) + log.append((node.lineno, "Bytes", node.s)) + def visit_NameConstant(self, node): - log.append((node.lineno, 'NameConstant', node.value)) + log.append((node.lineno, "NameConstant", node.value)) + def visit_Ellipsis(self, node): - log.append((node.lineno, 'Ellipsis', ...)) - mod = ast.parse(dedent('''\ + log.append((node.lineno, "Ellipsis", ...)) + + mod = ast.parse( + dedent("""\ i = 42 f = 4.25 c = 4.25j @@ -2680,37 +2763,44 @@ def visit_Ellipsis(self, node): t = True n = None e = ... - ''')) + """) + ) visitor = Visitor() log = [] with warnings.catch_warnings(record=True) as wlog: - warnings.filterwarnings('always', '', DeprecationWarning) + warnings.filterwarnings("always", "", DeprecationWarning) visitor.visit(mod) - self.assertEqual(log, [ - (1, 'Num', 42), - (2, 'Num', 4.25), - (3, 'Num', 4.25j), - (4, 'Str', 'string'), - (5, 'Bytes', b'bytes'), - (6, 'NameConstant', True), - (7, 'NameConstant', None), - (8, 'Ellipsis', ...), - ]) - self.assertEqual([str(w.message) for w in wlog], [ - 'visit_Num is deprecated; add visit_Constant', - 'Attribute n is deprecated and will be removed in Python 3.14; use value instead', - 'visit_Num is deprecated; add visit_Constant', - 'Attribute n is deprecated and will be removed in Python 3.14; use value instead', - 'visit_Num is deprecated; add visit_Constant', - 'Attribute n is deprecated and will be removed in Python 3.14; use value instead', - 'visit_Str is deprecated; add visit_Constant', - 'Attribute s is deprecated and will be removed in Python 3.14; use value instead', - 'visit_Bytes is deprecated; add visit_Constant', - 'Attribute s is deprecated and will be removed in Python 3.14; use value instead', - 'visit_NameConstant is deprecated; add visit_Constant', - 'visit_NameConstant is deprecated; add visit_Constant', - 'visit_Ellipsis is deprecated; add visit_Constant', - ]) + self.assertEqual( + log, + [ + (1, "Num", 42), + (2, "Num", 4.25), + (3, "Num", 4.25j), + (4, "Str", "string"), + (5, "Bytes", b"bytes"), + (6, "NameConstant", True), + (7, "NameConstant", None), + (8, "Ellipsis", ...), + ], + ) + self.assertEqual( + [str(w.message) for w in wlog], + [ + "visit_Num is deprecated; add visit_Constant", + "Attribute n is deprecated and will be removed in Python 3.14; use value instead", + "visit_Num is deprecated; add visit_Constant", + "Attribute n is deprecated and will be removed in Python 3.14; use value instead", + "visit_Num is deprecated; add visit_Constant", + "Attribute n is deprecated and will be removed in Python 3.14; use value instead", + "visit_Str is deprecated; add visit_Constant", + "Attribute s is deprecated and will be removed in Python 3.14; use value instead", + "visit_Bytes is deprecated; add visit_Constant", + "Attribute s is deprecated and will be removed in Python 3.14; use value instead", + "visit_NameConstant is deprecated; add visit_Constant", + "visit_NameConstant is deprecated; add visit_Constant", + "visit_Ellipsis is deprecated; add visit_Constant", + ], + ) class NodeVisitorTests(BaseNodeVisitorCases, unittest.TestCase): @@ -2720,8 +2810,7 @@ class NodeVisitorTests(BaseNodeVisitorCases, unittest.TestCase): class NodeTransformerTests(ASTTestMixin, BaseNodeVisitorCases, unittest.TestCase): visitor_class = ast.NodeTransformer - def assertASTTransformation(self, tranformer_class, - initial_code, expected_code): + def assertASTTransformation(self, tranformer_class, initial_code, expected_code): initial_ast = ast.parse(dedent(initial_code)) expected_ast = ast.parse(dedent(expected_code)) @@ -2731,15 +2820,15 @@ def assertASTTransformation(self, tranformer_class, self.assertASTEqual(result_ast, expected_ast) def test_node_remove_single(self): - code = 'def func(arg) -> SomeType: ...' - expected = 'def func(arg): ...' + code = "def func(arg) -> SomeType: ..." + expected = "def func(arg): ..." # Since `FunctionDef.returns` is defined as a single value, we test # the `if isinstance(old_value, AST):` branch here. class SomeTypeRemover(ast.NodeTransformer): def visit_Name(self, node: ast.Name): self.generic_visit(node) - if node.id == 'SomeType': + if node.id == "SomeType": return None return node @@ -2778,11 +2867,11 @@ class DSL(Base, kw1=True, kw2=True, kw3=False): ... class ExtendKeywords(ast.NodeTransformer): def visit_keyword(self, node: ast.keyword): self.generic_visit(node) - if node.arg == 'kw1': + if node.arg == "kw1": return [ node, - ast.keyword('kw2', ast.Constant(True)), - ast.keyword('kw3', ast.Constant(False)), + ast.keyword("kw2", ast.Constant(True)), + ast.keyword("kw3", ast.Constant(False)), ] return node @@ -2801,8 +2890,8 @@ def func(arg): class PrintToLog(ast.NodeTransformer): def visit_Call(self, node: ast.Call): self.generic_visit(node) - if isinstance(node.func, ast.Name) and node.func.id == 'print': - node.func.id = 'log' + if isinstance(node.func, ast.Name) and node.func.id == "print": + node.func.id = "log" return node self.assertASTTransformation(PrintToLog, code, expected) @@ -2820,15 +2909,15 @@ def func(arg): class PrintToLog(ast.NodeTransformer): def visit_Call(self, node: ast.Call): self.generic_visit(node) - if isinstance(node.func, ast.Name) and node.func.id == 'print': + if isinstance(node.func, ast.Name) and node.func.id == "print": return ast.Call( func=ast.Attribute( - ast.Name('logger', ctx=ast.Load()), - attr='log', + ast.Name("logger", ctx=ast.Load()), + attr="log", ctx=ast.Load(), ), args=node.args, - keywords=[ast.keyword('debug', ast.Constant(True))], + keywords=[ast.keyword("debug", ast.Constant(True))], ) return node @@ -2841,9 +2930,9 @@ class ModuleStateTests(unittest.TestCase): def check_ast_module(self): # Check that the _ast module still works as expected - code = 'x + 1' - filename = '' - mode = 'eval' + code = "x + 1" + filename = "" + mode = "eval" # Create _ast.AST subclasses instances ast_tree = compile(code, filename, mode, flags=ast.PyCF_ONLY_AST) @@ -2854,11 +2943,11 @@ def check_ast_module(self): def test_reload_module(self): # bpo-41194: Importing the _ast module twice must not crash. - with support.swap_item(sys.modules, '_ast', None): - del sys.modules['_ast'] + with support.swap_item(sys.modules, "_ast", None): + del sys.modules["_ast"] import _ast as ast1 - del sys.modules['_ast'] + del sys.modules["_ast"] import _ast as ast2 self.check_ast_module() @@ -2879,22 +2968,23 @@ def my_import(name, *args, **kw): sys.modules[name] = lazy_mod return lazy_mod - with support.swap_item(sys.modules, '_ast', None): - del sys.modules['_ast'] + with support.swap_item(sys.modules, "_ast", None): + del sys.modules["_ast"] - with support.swap_attr(builtins, '__import__', my_import): + with support.swap_attr(builtins, "__import__", my_import): # Test that compile() does not import the _ast module self.check_ast_module() - self.assertNotIn('_ast', sys.modules) + self.assertNotIn("_ast", sys.modules) # Sanity check of the test itself import _ast + self.assertIs(_ast, lazy_mod) def test_subinterpreter(self): # bpo-41631: Importing and using the _ast module in a subinterpreter # must not crash. - code = dedent(''' + code = dedent(""" import _ast import ast import gc @@ -2912,7 +3002,7 @@ def test_subinterpreter(self): del ast, _ast del sys.modules['ast'], sys.modules['_ast'] gc.collect() - ''') + """) res = support.run_in_subinterp(code) self.assertEqual(res, 0) @@ -2926,23 +3016,25 @@ def test_cli_file_input(self): with os_helper.temp_dir() as tmp_dir: filename = os.path.join(tmp_dir, "test_module.py") - with open(filename, 'w', encoding='utf-8') as f: + with open(filename, "w", encoding="utf-8") as f: f.write(code) res, _ = script_helper.run_python_until_end("-m", "ast", filename) self.assertEqual(res.err, b"") - self.assertEqual(expected.splitlines(), - res.out.decode("utf8").splitlines()) + self.assertEqual(expected.splitlines(), res.out.decode("utf8").splitlines()) self.assertEqual(res.rc, 0) def main(): - if __name__ != '__main__': + if __name__ != "__main__": return - if sys.argv[1:] == ['-g']: - for statements, kind in ((exec_tests, "exec"), (single_tests, "single"), - (eval_tests, "eval")): - print(kind+"_results = [") + if sys.argv[1:] == ["-g"]: + for statements, kind in ( + (exec_tests, "exec"), + (single_tests, "single"), + (eval_tests, "eval"), + ): + print(kind + "_results = [") for statement in statements: tree = ast.parse(statement, "?", kind) print("%r," % (to_tuple(tree),)) @@ -2951,137 +3043,2836 @@ def main(): raise SystemExit unittest.main() + #### EVERYTHING BELOW IS GENERATED BY python Lib/test/test_ast.py -g ##### exec_results = [ -('Module', [('Expr', (1, 0, 1, 4), ('Constant', (1, 0, 1, 4), None, None))], []), -('Module', [('Expr', (1, 0, 1, 18), ('Constant', (1, 0, 1, 18), 'module docstring', None))], []), -('Module', [('FunctionDef', (1, 0, 1, 13), 'f', ('arguments', [], [], None, [], [], None, []), [('Pass', (1, 9, 1, 13))], [], None, None, [])], []), -('Module', [('FunctionDef', (1, 0, 1, 29), 'f', ('arguments', [], [], None, [], [], None, []), [('Expr', (1, 9, 1, 29), ('Constant', (1, 9, 1, 29), 'function docstring', None))], [], None, None, [])], []), -('Module', [('FunctionDef', (1, 0, 1, 14), 'f', ('arguments', [], [('arg', (1, 6, 1, 7), 'a', None, None)], None, [], [], None, []), [('Pass', (1, 10, 1, 14))], [], None, None, [])], []), -('Module', [('FunctionDef', (1, 0, 1, 16), 'f', ('arguments', [], [('arg', (1, 6, 1, 7), 'a', None, None)], None, [], [], None, [('Constant', (1, 8, 1, 9), 0, None)]), [('Pass', (1, 12, 1, 16))], [], None, None, [])], []), -('Module', [('FunctionDef', (1, 0, 1, 18), 'f', ('arguments', [], [], ('arg', (1, 7, 1, 11), 'args', None, None), [], [], None, []), [('Pass', (1, 14, 1, 18))], [], None, None, [])], []), -('Module', [('FunctionDef', (1, 0, 1, 23), 'f', ('arguments', [], [], ('arg', (1, 7, 1, 16), 'args', ('Starred', (1, 13, 1, 16), ('Name', (1, 14, 1, 16), 'Ts', ('Load',)), ('Load',)), None), [], [], None, []), [('Pass', (1, 19, 1, 23))], [], None, None, [])], []), -('Module', [('FunctionDef', (1, 0, 1, 36), 'f', ('arguments', [], [], ('arg', (1, 7, 1, 29), 'args', ('Starred', (1, 13, 1, 29), ('Subscript', (1, 14, 1, 29), ('Name', (1, 14, 1, 19), 'tuple', ('Load',)), ('Tuple', (1, 20, 1, 28), [('Name', (1, 20, 1, 23), 'int', ('Load',)), ('Constant', (1, 25, 1, 28), Ellipsis, None)], ('Load',)), ('Load',)), ('Load',)), None), [], [], None, []), [('Pass', (1, 32, 1, 36))], [], None, None, [])], []), -('Module', [('FunctionDef', (1, 0, 1, 36), 'f', ('arguments', [], [], ('arg', (1, 7, 1, 29), 'args', ('Starred', (1, 13, 1, 29), ('Subscript', (1, 14, 1, 29), ('Name', (1, 14, 1, 19), 'tuple', ('Load',)), ('Tuple', (1, 20, 1, 28), [('Name', (1, 20, 1, 23), 'int', ('Load',)), ('Starred', (1, 25, 1, 28), ('Name', (1, 26, 1, 28), 'Ts', ('Load',)), ('Load',))], ('Load',)), ('Load',)), ('Load',)), None), [], [], None, []), [('Pass', (1, 32, 1, 36))], [], None, None, [])], []), -('Module', [('FunctionDef', (1, 0, 1, 21), 'f', ('arguments', [], [], None, [], [], ('arg', (1, 8, 1, 14), 'kwargs', None, None), []), [('Pass', (1, 17, 1, 21))], [], None, None, [])], []), -('Module', [('FunctionDef', (1, 0, 1, 71), 'f', ('arguments', [], [('arg', (1, 6, 1, 7), 'a', None, None), ('arg', (1, 9, 1, 10), 'b', None, None), ('arg', (1, 14, 1, 15), 'c', None, None), ('arg', (1, 22, 1, 23), 'd', None, None), ('arg', (1, 28, 1, 29), 'e', None, None)], ('arg', (1, 35, 1, 39), 'args', None, None), [('arg', (1, 41, 1, 42), 'f', None, None)], [('Constant', (1, 43, 1, 45), 42, None)], ('arg', (1, 49, 1, 55), 'kwargs', None, None), [('Constant', (1, 11, 1, 12), 1, None), ('Constant', (1, 16, 1, 20), None, None), ('List', (1, 24, 1, 26), [], ('Load',)), ('Dict', (1, 30, 1, 32), [], [])]), [('Expr', (1, 58, 1, 71), ('Constant', (1, 58, 1, 71), 'doc for f()', None))], [], None, None, [])], []), -('Module', [('FunctionDef', (1, 0, 1, 27), 'f', ('arguments', [], [], None, [], [], None, []), [('Pass', (1, 23, 1, 27))], [], ('Subscript', (1, 11, 1, 21), ('Name', (1, 11, 1, 16), 'tuple', ('Load',)), ('Tuple', (1, 17, 1, 20), [('Starred', (1, 17, 1, 20), ('Name', (1, 18, 1, 20), 'Ts', ('Load',)), ('Load',))], ('Load',)), ('Load',)), None, [])], []), -('Module', [('FunctionDef', (1, 0, 1, 32), 'f', ('arguments', [], [], None, [], [], None, []), [('Pass', (1, 28, 1, 32))], [], ('Subscript', (1, 11, 1, 26), ('Name', (1, 11, 1, 16), 'tuple', ('Load',)), ('Tuple', (1, 17, 1, 25), [('Name', (1, 17, 1, 20), 'int', ('Load',)), ('Starred', (1, 22, 1, 25), ('Name', (1, 23, 1, 25), 'Ts', ('Load',)), ('Load',))], ('Load',)), ('Load',)), None, [])], []), -('Module', [('FunctionDef', (1, 0, 1, 45), 'f', ('arguments', [], [], None, [], [], None, []), [('Pass', (1, 41, 1, 45))], [], ('Subscript', (1, 11, 1, 39), ('Name', (1, 11, 1, 16), 'tuple', ('Load',)), ('Tuple', (1, 17, 1, 38), [('Name', (1, 17, 1, 20), 'int', ('Load',)), ('Starred', (1, 22, 1, 38), ('Subscript', (1, 23, 1, 38), ('Name', (1, 23, 1, 28), 'tuple', ('Load',)), ('Tuple', (1, 29, 1, 37), [('Name', (1, 29, 1, 32), 'int', ('Load',)), ('Constant', (1, 34, 1, 37), Ellipsis, None)], ('Load',)), ('Load',)), ('Load',))], ('Load',)), ('Load',)), None, [])], []), -('Module', [('ClassDef', (1, 0, 1, 12), 'C', [], [], [('Pass', (1, 8, 1, 12))], [], [])], []), -('Module', [('ClassDef', (1, 0, 1, 32), 'C', [], [], [('Expr', (1, 9, 1, 32), ('Constant', (1, 9, 1, 32), 'docstring for class C', None))], [], [])], []), -('Module', [('ClassDef', (1, 0, 1, 21), 'C', [('Name', (1, 8, 1, 14), 'object', ('Load',))], [], [('Pass', (1, 17, 1, 21))], [], [])], []), -('Module', [('FunctionDef', (1, 0, 1, 16), 'f', ('arguments', [], [], None, [], [], None, []), [('Return', (1, 8, 1, 16), ('Constant', (1, 15, 1, 16), 1, None))], [], None, None, [])], []), -('Module', [('Delete', (1, 0, 1, 5), [('Name', (1, 4, 1, 5), 'v', ('Del',))])], []), -('Module', [('Assign', (1, 0, 1, 5), [('Name', (1, 0, 1, 1), 'v', ('Store',))], ('Constant', (1, 4, 1, 5), 1, None), None)], []), -('Module', [('Assign', (1, 0, 1, 7), [('Tuple', (1, 0, 1, 3), [('Name', (1, 0, 1, 1), 'a', ('Store',)), ('Name', (1, 2, 1, 3), 'b', ('Store',))], ('Store',))], ('Name', (1, 6, 1, 7), 'c', ('Load',)), None)], []), -('Module', [('Assign', (1, 0, 1, 9), [('Tuple', (1, 0, 1, 5), [('Name', (1, 1, 1, 2), 'a', ('Store',)), ('Name', (1, 3, 1, 4), 'b', ('Store',))], ('Store',))], ('Name', (1, 8, 1, 9), 'c', ('Load',)), None)], []), -('Module', [('Assign', (1, 0, 1, 9), [('List', (1, 0, 1, 5), [('Name', (1, 1, 1, 2), 'a', ('Store',)), ('Name', (1, 3, 1, 4), 'b', ('Store',))], ('Store',))], ('Name', (1, 8, 1, 9), 'c', ('Load',)), None)], []), -('Module', [('AnnAssign', (1, 0, 1, 13), ('Name', (1, 0, 1, 1), 'x', ('Store',)), ('Subscript', (1, 3, 1, 13), ('Name', (1, 3, 1, 8), 'tuple', ('Load',)), ('Tuple', (1, 9, 1, 12), [('Starred', (1, 9, 1, 12), ('Name', (1, 10, 1, 12), 'Ts', ('Load',)), ('Load',))], ('Load',)), ('Load',)), None, 1)], []), -('Module', [('AnnAssign', (1, 0, 1, 18), ('Name', (1, 0, 1, 1), 'x', ('Store',)), ('Subscript', (1, 3, 1, 18), ('Name', (1, 3, 1, 8), 'tuple', ('Load',)), ('Tuple', (1, 9, 1, 17), [('Name', (1, 9, 1, 12), 'int', ('Load',)), ('Starred', (1, 14, 1, 17), ('Name', (1, 15, 1, 17), 'Ts', ('Load',)), ('Load',))], ('Load',)), ('Load',)), None, 1)], []), -('Module', [('AnnAssign', (1, 0, 1, 31), ('Name', (1, 0, 1, 1), 'x', ('Store',)), ('Subscript', (1, 3, 1, 31), ('Name', (1, 3, 1, 8), 'tuple', ('Load',)), ('Tuple', (1, 9, 1, 30), [('Name', (1, 9, 1, 12), 'int', ('Load',)), ('Starred', (1, 14, 1, 30), ('Subscript', (1, 15, 1, 30), ('Name', (1, 15, 1, 20), 'tuple', ('Load',)), ('Tuple', (1, 21, 1, 29), [('Name', (1, 21, 1, 24), 'str', ('Load',)), ('Constant', (1, 26, 1, 29), Ellipsis, None)], ('Load',)), ('Load',)), ('Load',))], ('Load',)), ('Load',)), None, 1)], []), -('Module', [('AugAssign', (1, 0, 1, 6), ('Name', (1, 0, 1, 1), 'v', ('Store',)), ('Add',), ('Constant', (1, 5, 1, 6), 1, None))], []), -('Module', [('For', (1, 0, 1, 15), ('Name', (1, 4, 1, 5), 'v', ('Store',)), ('Name', (1, 9, 1, 10), 'v', ('Load',)), [('Pass', (1, 11, 1, 15))], [], None)], []), -('Module', [('While', (1, 0, 1, 12), ('Name', (1, 6, 1, 7), 'v', ('Load',)), [('Pass', (1, 8, 1, 12))], [])], []), -('Module', [('If', (1, 0, 1, 9), ('Name', (1, 3, 1, 4), 'v', ('Load',)), [('Pass', (1, 5, 1, 9))], [])], []), -('Module', [('If', (1, 0, 4, 6), ('Name', (1, 3, 1, 4), 'a', ('Load',)), [('Pass', (2, 2, 2, 6))], [('If', (3, 0, 4, 6), ('Name', (3, 5, 3, 6), 'b', ('Load',)), [('Pass', (4, 2, 4, 6))], [])])], []), -('Module', [('If', (1, 0, 6, 6), ('Name', (1, 3, 1, 4), 'a', ('Load',)), [('Pass', (2, 2, 2, 6))], [('If', (3, 0, 6, 6), ('Name', (3, 5, 3, 6), 'b', ('Load',)), [('Pass', (4, 2, 4, 6))], [('Pass', (6, 2, 6, 6))])])], []), -('Module', [('With', (1, 0, 1, 17), [('withitem', ('Name', (1, 5, 1, 6), 'x', ('Load',)), ('Name', (1, 10, 1, 11), 'y', ('Store',)))], [('Pass', (1, 13, 1, 17))], None)], []), -('Module', [('With', (1, 0, 1, 25), [('withitem', ('Name', (1, 5, 1, 6), 'x', ('Load',)), ('Name', (1, 10, 1, 11), 'y', ('Store',))), ('withitem', ('Name', (1, 13, 1, 14), 'z', ('Load',)), ('Name', (1, 18, 1, 19), 'q', ('Store',)))], [('Pass', (1, 21, 1, 25))], None)], []), -('Module', [('Raise', (1, 0, 1, 25), ('Call', (1, 6, 1, 25), ('Name', (1, 6, 1, 15), 'Exception', ('Load',)), [('Constant', (1, 16, 1, 24), 'string', None)], []), None)], []), -('Module', [('Try', (1, 0, 4, 6), [('Pass', (2, 2, 2, 6))], [('ExceptHandler', (3, 0, 4, 6), ('Name', (3, 7, 3, 16), 'Exception', ('Load',)), None, [('Pass', (4, 2, 4, 6))])], [], [])], []), -('Module', [('Try', (1, 0, 4, 6), [('Pass', (2, 2, 2, 6))], [], [], [('Pass', (4, 2, 4, 6))])], []), -('Module', [('TryStar', (1, 0, 4, 6), [('Pass', (2, 2, 2, 6))], [('ExceptHandler', (3, 0, 4, 6), ('Name', (3, 8, 3, 17), 'Exception', ('Load',)), None, [('Pass', (4, 2, 4, 6))])], [], [])], []), -('Module', [('Assert', (1, 0, 1, 8), ('Name', (1, 7, 1, 8), 'v', ('Load',)), None)], []), -('Module', [('Import', (1, 0, 1, 10), [('alias', (1, 7, 1, 10), 'sys', None)])], []), -('Module', [('ImportFrom', (1, 0, 1, 17), 'sys', [('alias', (1, 16, 1, 17), 'v', None)], 0)], []), -('Module', [('Global', (1, 0, 1, 8), ['v'])], []), -('Module', [('Expr', (1, 0, 1, 1), ('Constant', (1, 0, 1, 1), 1, None))], []), -('Module', [('Pass', (1, 0, 1, 4))], []), -('Module', [('For', (1, 0, 1, 16), ('Name', (1, 4, 1, 5), 'v', ('Store',)), ('Name', (1, 9, 1, 10), 'v', ('Load',)), [('Break', (1, 11, 1, 16))], [], None)], []), -('Module', [('For', (1, 0, 1, 19), ('Name', (1, 4, 1, 5), 'v', ('Store',)), ('Name', (1, 9, 1, 10), 'v', ('Load',)), [('Continue', (1, 11, 1, 19))], [], None)], []), -('Module', [('For', (1, 0, 1, 18), ('Tuple', (1, 4, 1, 7), [('Name', (1, 4, 1, 5), 'a', ('Store',)), ('Name', (1, 6, 1, 7), 'b', ('Store',))], ('Store',)), ('Name', (1, 11, 1, 12), 'c', ('Load',)), [('Pass', (1, 14, 1, 18))], [], None)], []), -('Module', [('For', (1, 0, 1, 20), ('Tuple', (1, 4, 1, 9), [('Name', (1, 5, 1, 6), 'a', ('Store',)), ('Name', (1, 7, 1, 8), 'b', ('Store',))], ('Store',)), ('Name', (1, 13, 1, 14), 'c', ('Load',)), [('Pass', (1, 16, 1, 20))], [], None)], []), -('Module', [('For', (1, 0, 1, 20), ('List', (1, 4, 1, 9), [('Name', (1, 5, 1, 6), 'a', ('Store',)), ('Name', (1, 7, 1, 8), 'b', ('Store',))], ('Store',)), ('Name', (1, 13, 1, 14), 'c', ('Load',)), [('Pass', (1, 16, 1, 20))], [], None)], []), -('Module', [('Expr', (1, 0, 11, 5), ('GeneratorExp', (1, 0, 11, 5), ('Tuple', (2, 4, 6, 5), [('Name', (3, 4, 3, 6), 'Aa', ('Load',)), ('Name', (5, 7, 5, 9), 'Bb', ('Load',))], ('Load',)), [('comprehension', ('Tuple', (8, 4, 10, 6), [('Name', (8, 4, 8, 6), 'Aa', ('Store',)), ('Name', (10, 4, 10, 6), 'Bb', ('Store',))], ('Store',)), ('Name', (10, 10, 10, 12), 'Cc', ('Load',)), [], 0)]))], []), -('Module', [('Expr', (1, 0, 1, 34), ('DictComp', (1, 0, 1, 34), ('Name', (1, 1, 1, 2), 'a', ('Load',)), ('Name', (1, 5, 1, 6), 'b', ('Load',)), [('comprehension', ('Name', (1, 11, 1, 12), 'w', ('Store',)), ('Name', (1, 16, 1, 17), 'x', ('Load',)), [], 0), ('comprehension', ('Name', (1, 22, 1, 23), 'm', ('Store',)), ('Name', (1, 27, 1, 28), 'p', ('Load',)), [('Name', (1, 32, 1, 33), 'g', ('Load',))], 0)]))], []), -('Module', [('Expr', (1, 0, 1, 20), ('DictComp', (1, 0, 1, 20), ('Name', (1, 1, 1, 2), 'a', ('Load',)), ('Name', (1, 5, 1, 6), 'b', ('Load',)), [('comprehension', ('Tuple', (1, 11, 1, 14), [('Name', (1, 11, 1, 12), 'v', ('Store',)), ('Name', (1, 13, 1, 14), 'w', ('Store',))], ('Store',)), ('Name', (1, 18, 1, 19), 'x', ('Load',)), [], 0)]))], []), -('Module', [('Expr', (1, 0, 1, 19), ('SetComp', (1, 0, 1, 19), ('Name', (1, 1, 1, 2), 'r', ('Load',)), [('comprehension', ('Name', (1, 7, 1, 8), 'l', ('Store',)), ('Name', (1, 12, 1, 13), 'x', ('Load',)), [('Name', (1, 17, 1, 18), 'g', ('Load',))], 0)]))], []), -('Module', [('Expr', (1, 0, 1, 16), ('SetComp', (1, 0, 1, 16), ('Name', (1, 1, 1, 2), 'r', ('Load',)), [('comprehension', ('Tuple', (1, 7, 1, 10), [('Name', (1, 7, 1, 8), 'l', ('Store',)), ('Name', (1, 9, 1, 10), 'm', ('Store',))], ('Store',)), ('Name', (1, 14, 1, 15), 'x', ('Load',)), [], 0)]))], []), -('Module', [('AsyncFunctionDef', (1, 0, 3, 18), 'f', ('arguments', [], [], None, [], [], None, []), [('Expr', (2, 1, 2, 17), ('Constant', (2, 1, 2, 17), 'async function', None)), ('Expr', (3, 1, 3, 18), ('Await', (3, 1, 3, 18), ('Call', (3, 7, 3, 18), ('Name', (3, 7, 3, 16), 'something', ('Load',)), [], [])))], [], None, None, [])], []), -('Module', [('AsyncFunctionDef', (1, 0, 3, 8), 'f', ('arguments', [], [], None, [], [], None, []), [('AsyncFor', (2, 1, 3, 8), ('Name', (2, 11, 2, 12), 'e', ('Store',)), ('Name', (2, 16, 2, 17), 'i', ('Load',)), [('Expr', (2, 19, 2, 20), ('Constant', (2, 19, 2, 20), 1, None))], [('Expr', (3, 7, 3, 8), ('Constant', (3, 7, 3, 8), 2, None))], None)], [], None, None, [])], []), -('Module', [('AsyncFunctionDef', (1, 0, 2, 21), 'f', ('arguments', [], [], None, [], [], None, []), [('AsyncWith', (2, 1, 2, 21), [('withitem', ('Name', (2, 12, 2, 13), 'a', ('Load',)), ('Name', (2, 17, 2, 18), 'b', ('Store',)))], [('Expr', (2, 20, 2, 21), ('Constant', (2, 20, 2, 21), 1, None))], None)], [], None, None, [])], []), -('Module', [('Expr', (1, 0, 1, 14), ('Dict', (1, 0, 1, 14), [None, ('Constant', (1, 10, 1, 11), 2, None)], [('Dict', (1, 3, 1, 8), [('Constant', (1, 4, 1, 5), 1, None)], [('Constant', (1, 6, 1, 7), 2, None)]), ('Constant', (1, 12, 1, 13), 3, None)]))], []), -('Module', [('Expr', (1, 0, 1, 12), ('Set', (1, 0, 1, 12), [('Starred', (1, 1, 1, 8), ('Set', (1, 2, 1, 8), [('Constant', (1, 3, 1, 4), 1, None), ('Constant', (1, 6, 1, 7), 2, None)]), ('Load',)), ('Constant', (1, 10, 1, 11), 3, None)]))], []), -('Module', [('AsyncFunctionDef', (1, 0, 2, 21), 'f', ('arguments', [], [], None, [], [], None, []), [('Expr', (2, 1, 2, 21), ('ListComp', (2, 1, 2, 21), ('Name', (2, 2, 2, 3), 'i', ('Load',)), [('comprehension', ('Name', (2, 14, 2, 15), 'b', ('Store',)), ('Name', (2, 19, 2, 20), 'c', ('Load',)), [], 1)]))], [], None, None, [])], []), -('Module', [('FunctionDef', (4, 0, 4, 13), 'f', ('arguments', [], [], None, [], [], None, []), [('Pass', (4, 9, 4, 13))], [('Name', (1, 1, 1, 6), 'deco1', ('Load',)), ('Call', (2, 1, 2, 8), ('Name', (2, 1, 2, 6), 'deco2', ('Load',)), [], []), ('Call', (3, 1, 3, 9), ('Name', (3, 1, 3, 6), 'deco3', ('Load',)), [('Constant', (3, 7, 3, 8), 1, None)], [])], None, None, [])], []), -('Module', [('AsyncFunctionDef', (4, 0, 4, 19), 'f', ('arguments', [], [], None, [], [], None, []), [('Pass', (4, 15, 4, 19))], [('Name', (1, 1, 1, 6), 'deco1', ('Load',)), ('Call', (2, 1, 2, 8), ('Name', (2, 1, 2, 6), 'deco2', ('Load',)), [], []), ('Call', (3, 1, 3, 9), ('Name', (3, 1, 3, 6), 'deco3', ('Load',)), [('Constant', (3, 7, 3, 8), 1, None)], [])], None, None, [])], []), -('Module', [('ClassDef', (4, 0, 4, 13), 'C', [], [], [('Pass', (4, 9, 4, 13))], [('Name', (1, 1, 1, 6), 'deco1', ('Load',)), ('Call', (2, 1, 2, 8), ('Name', (2, 1, 2, 6), 'deco2', ('Load',)), [], []), ('Call', (3, 1, 3, 9), ('Name', (3, 1, 3, 6), 'deco3', ('Load',)), [('Constant', (3, 7, 3, 8), 1, None)], [])], [])], []), -('Module', [('FunctionDef', (2, 0, 2, 13), 'f', ('arguments', [], [], None, [], [], None, []), [('Pass', (2, 9, 2, 13))], [('Call', (1, 1, 1, 19), ('Name', (1, 1, 1, 5), 'deco', ('Load',)), [('GeneratorExp', (1, 5, 1, 19), ('Name', (1, 6, 1, 7), 'a', ('Load',)), [('comprehension', ('Name', (1, 12, 1, 13), 'a', ('Store',)), ('Name', (1, 17, 1, 18), 'b', ('Load',)), [], 0)])], [])], None, None, [])], []), -('Module', [('FunctionDef', (2, 0, 2, 13), 'f', ('arguments', [], [], None, [], [], None, []), [('Pass', (2, 9, 2, 13))], [('Attribute', (1, 1, 1, 6), ('Attribute', (1, 1, 1, 4), ('Name', (1, 1, 1, 2), 'a', ('Load',)), 'b', ('Load',)), 'c', ('Load',))], None, None, [])], []), -('Module', [('Expr', (1, 0, 1, 8), ('NamedExpr', (1, 1, 1, 7), ('Name', (1, 1, 1, 2), 'a', ('Store',)), ('Constant', (1, 6, 1, 7), 1, None)))], []), -('Module', [('FunctionDef', (1, 0, 1, 18), 'f', ('arguments', [('arg', (1, 6, 1, 7), 'a', None, None)], [], None, [], [], None, []), [('Pass', (1, 14, 1, 18))], [], None, None, [])], []), -('Module', [('FunctionDef', (1, 0, 1, 26), 'f', ('arguments', [('arg', (1, 6, 1, 7), 'a', None, None)], [('arg', (1, 12, 1, 13), 'c', None, None), ('arg', (1, 15, 1, 16), 'd', None, None), ('arg', (1, 18, 1, 19), 'e', None, None)], None, [], [], None, []), [('Pass', (1, 22, 1, 26))], [], None, None, [])], []), -('Module', [('FunctionDef', (1, 0, 1, 29), 'f', ('arguments', [('arg', (1, 6, 1, 7), 'a', None, None)], [('arg', (1, 12, 1, 13), 'c', None, None)], None, [('arg', (1, 18, 1, 19), 'd', None, None), ('arg', (1, 21, 1, 22), 'e', None, None)], [None, None], None, []), [('Pass', (1, 25, 1, 29))], [], None, None, [])], []), -('Module', [('FunctionDef', (1, 0, 1, 39), 'f', ('arguments', [('arg', (1, 6, 1, 7), 'a', None, None)], [('arg', (1, 12, 1, 13), 'c', None, None)], None, [('arg', (1, 18, 1, 19), 'd', None, None), ('arg', (1, 21, 1, 22), 'e', None, None)], [None, None], ('arg', (1, 26, 1, 32), 'kwargs', None, None), []), [('Pass', (1, 35, 1, 39))], [], None, None, [])], []), -('Module', [('FunctionDef', (1, 0, 1, 20), 'f', ('arguments', [('arg', (1, 6, 1, 7), 'a', None, None)], [], None, [], [], None, [('Constant', (1, 8, 1, 9), 1, None)]), [('Pass', (1, 16, 1, 20))], [], None, None, [])], []), -('Module', [('FunctionDef', (1, 0, 1, 29), 'f', ('arguments', [('arg', (1, 6, 1, 7), 'a', None, None)], [('arg', (1, 14, 1, 15), 'b', None, None), ('arg', (1, 19, 1, 20), 'c', None, None)], None, [], [], None, [('Constant', (1, 8, 1, 9), 1, None), ('Constant', (1, 16, 1, 17), 2, None), ('Constant', (1, 21, 1, 22), 4, None)]), [('Pass', (1, 25, 1, 29))], [], None, None, [])], []), -('Module', [('FunctionDef', (1, 0, 1, 32), 'f', ('arguments', [('arg', (1, 6, 1, 7), 'a', None, None)], [('arg', (1, 14, 1, 15), 'b', None, None)], None, [('arg', (1, 22, 1, 23), 'c', None, None)], [('Constant', (1, 24, 1, 25), 4, None)], None, [('Constant', (1, 8, 1, 9), 1, None), ('Constant', (1, 16, 1, 17), 2, None)]), [('Pass', (1, 28, 1, 32))], [], None, None, [])], []), -('Module', [('FunctionDef', (1, 0, 1, 30), 'f', ('arguments', [('arg', (1, 6, 1, 7), 'a', None, None)], [('arg', (1, 14, 1, 15), 'b', None, None)], None, [('arg', (1, 22, 1, 23), 'c', None, None)], [None], None, [('Constant', (1, 8, 1, 9), 1, None), ('Constant', (1, 16, 1, 17), 2, None)]), [('Pass', (1, 26, 1, 30))], [], None, None, [])], []), -('Module', [('FunctionDef', (1, 0, 1, 42), 'f', ('arguments', [('arg', (1, 6, 1, 7), 'a', None, None)], [('arg', (1, 14, 1, 15), 'b', None, None)], None, [('arg', (1, 22, 1, 23), 'c', None, None)], [('Constant', (1, 24, 1, 25), 4, None)], ('arg', (1, 29, 1, 35), 'kwargs', None, None), [('Constant', (1, 8, 1, 9), 1, None), ('Constant', (1, 16, 1, 17), 2, None)]), [('Pass', (1, 38, 1, 42))], [], None, None, [])], []), -('Module', [('FunctionDef', (1, 0, 1, 40), 'f', ('arguments', [('arg', (1, 6, 1, 7), 'a', None, None)], [('arg', (1, 14, 1, 15), 'b', None, None)], None, [('arg', (1, 22, 1, 23), 'c', None, None)], [None], ('arg', (1, 27, 1, 33), 'kwargs', None, None), [('Constant', (1, 8, 1, 9), 1, None), ('Constant', (1, 16, 1, 17), 2, None)]), [('Pass', (1, 36, 1, 40))], [], None, None, [])], []), -('Module', [('TypeAlias', (1, 0, 1, 12), ('Name', (1, 5, 1, 6), 'X', ('Store',)), [], ('Name', (1, 9, 1, 12), 'int', ('Load',)))], []), -('Module', [('TypeAlias', (1, 0, 1, 15), ('Name', (1, 5, 1, 6), 'X', ('Store',)), [('TypeVar', (1, 7, 1, 8), 'T', None)], ('Name', (1, 12, 1, 15), 'int', ('Load',)))], []), -('Module', [('TypeAlias', (1, 0, 1, 32), ('Name', (1, 5, 1, 6), 'X', ('Store',)), [('TypeVar', (1, 7, 1, 8), 'T', None), ('TypeVarTuple', (1, 10, 1, 13), 'Ts'), ('ParamSpec', (1, 15, 1, 18), 'P')], ('Tuple', (1, 22, 1, 32), [('Name', (1, 23, 1, 24), 'T', ('Load',)), ('Name', (1, 26, 1, 28), 'Ts', ('Load',)), ('Name', (1, 30, 1, 31), 'P', ('Load',))], ('Load',)))], []), -('Module', [('TypeAlias', (1, 0, 1, 37), ('Name', (1, 5, 1, 6), 'X', ('Store',)), [('TypeVar', (1, 7, 1, 13), 'T', ('Name', (1, 10, 1, 13), 'int', ('Load',))), ('TypeVarTuple', (1, 15, 1, 18), 'Ts'), ('ParamSpec', (1, 20, 1, 23), 'P')], ('Tuple', (1, 27, 1, 37), [('Name', (1, 28, 1, 29), 'T', ('Load',)), ('Name', (1, 31, 1, 33), 'Ts', ('Load',)), ('Name', (1, 35, 1, 36), 'P', ('Load',))], ('Load',)))], []), -('Module', [('TypeAlias', (1, 0, 1, 44), ('Name', (1, 5, 1, 6), 'X', ('Store',)), [('TypeVar', (1, 7, 1, 20), 'T', ('Tuple', (1, 10, 1, 20), [('Name', (1, 11, 1, 14), 'int', ('Load',)), ('Name', (1, 16, 1, 19), 'str', ('Load',))], ('Load',))), ('TypeVarTuple', (1, 22, 1, 25), 'Ts'), ('ParamSpec', (1, 27, 1, 30), 'P')], ('Tuple', (1, 34, 1, 44), [('Name', (1, 35, 1, 36), 'T', ('Load',)), ('Name', (1, 38, 1, 40), 'Ts', ('Load',)), ('Name', (1, 42, 1, 43), 'P', ('Load',))], ('Load',)))], []), -('Module', [('ClassDef', (1, 0, 1, 16), 'X', [], [], [('Pass', (1, 12, 1, 16))], [], [('TypeVar', (1, 8, 1, 9), 'T', None)])], []), -('Module', [('ClassDef', (1, 0, 1, 26), 'X', [], [], [('Pass', (1, 22, 1, 26))], [], [('TypeVar', (1, 8, 1, 9), 'T', None), ('TypeVarTuple', (1, 11, 1, 14), 'Ts'), ('ParamSpec', (1, 16, 1, 19), 'P')])], []), -('Module', [('ClassDef', (1, 0, 1, 31), 'X', [], [], [('Pass', (1, 27, 1, 31))], [], [('TypeVar', (1, 8, 1, 14), 'T', ('Name', (1, 11, 1, 14), 'int', ('Load',))), ('TypeVarTuple', (1, 16, 1, 19), 'Ts'), ('ParamSpec', (1, 21, 1, 24), 'P')])], []), -('Module', [('ClassDef', (1, 0, 1, 38), 'X', [], [], [('Pass', (1, 34, 1, 38))], [], [('TypeVar', (1, 8, 1, 21), 'T', ('Tuple', (1, 11, 1, 21), [('Name', (1, 12, 1, 15), 'int', ('Load',)), ('Name', (1, 17, 1, 20), 'str', ('Load',))], ('Load',))), ('TypeVarTuple', (1, 23, 1, 26), 'Ts'), ('ParamSpec', (1, 28, 1, 31), 'P')])], []), -('Module', [('FunctionDef', (1, 0, 1, 16), 'f', ('arguments', [], [], None, [], [], None, []), [('Pass', (1, 12, 1, 16))], [], None, None, [('TypeVar', (1, 6, 1, 7), 'T', None)])], []), -('Module', [('FunctionDef', (1, 0, 1, 26), 'f', ('arguments', [], [], None, [], [], None, []), [('Pass', (1, 22, 1, 26))], [], None, None, [('TypeVar', (1, 6, 1, 7), 'T', None), ('TypeVarTuple', (1, 9, 1, 12), 'Ts'), ('ParamSpec', (1, 14, 1, 17), 'P')])], []), -('Module', [('FunctionDef', (1, 0, 1, 31), 'f', ('arguments', [], [], None, [], [], None, []), [('Pass', (1, 27, 1, 31))], [], None, None, [('TypeVar', (1, 6, 1, 12), 'T', ('Name', (1, 9, 1, 12), 'int', ('Load',))), ('TypeVarTuple', (1, 14, 1, 17), 'Ts'), ('ParamSpec', (1, 19, 1, 22), 'P')])], []), -('Module', [('FunctionDef', (1, 0, 1, 38), 'f', ('arguments', [], [], None, [], [], None, []), [('Pass', (1, 34, 1, 38))], [], None, None, [('TypeVar', (1, 6, 1, 19), 'T', ('Tuple', (1, 9, 1, 19), [('Name', (1, 10, 1, 13), 'int', ('Load',)), ('Name', (1, 15, 1, 18), 'str', ('Load',))], ('Load',))), ('TypeVarTuple', (1, 21, 1, 24), 'Ts'), ('ParamSpec', (1, 26, 1, 29), 'P')])], []), -] -single_results = [ -('Interactive', [('Expr', (1, 0, 1, 3), ('BinOp', (1, 0, 1, 3), ('Constant', (1, 0, 1, 1), 1, None), ('Add',), ('Constant', (1, 2, 1, 3), 2, None)))]), -] -eval_results = [ -('Expression', ('Constant', (1, 0, 1, 4), None, None)), -('Expression', ('BoolOp', (1, 0, 1, 7), ('And',), [('Name', (1, 0, 1, 1), 'a', ('Load',)), ('Name', (1, 6, 1, 7), 'b', ('Load',))])), -('Expression', ('BinOp', (1, 0, 1, 5), ('Name', (1, 0, 1, 1), 'a', ('Load',)), ('Add',), ('Name', (1, 4, 1, 5), 'b', ('Load',)))), -('Expression', ('UnaryOp', (1, 0, 1, 5), ('Not',), ('Name', (1, 4, 1, 5), 'v', ('Load',)))), -('Expression', ('Lambda', (1, 0, 1, 11), ('arguments', [], [], None, [], [], None, []), ('Constant', (1, 7, 1, 11), None, None))), -('Expression', ('Dict', (1, 0, 1, 7), [('Constant', (1, 2, 1, 3), 1, None)], [('Constant', (1, 4, 1, 5), 2, None)])), -('Expression', ('Dict', (1, 0, 1, 2), [], [])), -('Expression', ('Set', (1, 0, 1, 7), [('Constant', (1, 1, 1, 5), None, None)])), -('Expression', ('Dict', (1, 0, 5, 6), [('Constant', (2, 6, 2, 7), 1, None)], [('Constant', (4, 10, 4, 11), 2, None)])), -('Expression', ('ListComp', (1, 0, 1, 19), ('Name', (1, 1, 1, 2), 'a', ('Load',)), [('comprehension', ('Name', (1, 7, 1, 8), 'b', ('Store',)), ('Name', (1, 12, 1, 13), 'c', ('Load',)), [('Name', (1, 17, 1, 18), 'd', ('Load',))], 0)])), -('Expression', ('GeneratorExp', (1, 0, 1, 19), ('Name', (1, 1, 1, 2), 'a', ('Load',)), [('comprehension', ('Name', (1, 7, 1, 8), 'b', ('Store',)), ('Name', (1, 12, 1, 13), 'c', ('Load',)), [('Name', (1, 17, 1, 18), 'd', ('Load',))], 0)])), -('Expression', ('ListComp', (1, 0, 1, 20), ('Tuple', (1, 1, 1, 6), [('Name', (1, 2, 1, 3), 'a', ('Load',)), ('Name', (1, 4, 1, 5), 'b', ('Load',))], ('Load',)), [('comprehension', ('Tuple', (1, 11, 1, 14), [('Name', (1, 11, 1, 12), 'a', ('Store',)), ('Name', (1, 13, 1, 14), 'b', ('Store',))], ('Store',)), ('Name', (1, 18, 1, 19), 'c', ('Load',)), [], 0)])), -('Expression', ('ListComp', (1, 0, 1, 22), ('Tuple', (1, 1, 1, 6), [('Name', (1, 2, 1, 3), 'a', ('Load',)), ('Name', (1, 4, 1, 5), 'b', ('Load',))], ('Load',)), [('comprehension', ('Tuple', (1, 11, 1, 16), [('Name', (1, 12, 1, 13), 'a', ('Store',)), ('Name', (1, 14, 1, 15), 'b', ('Store',))], ('Store',)), ('Name', (1, 20, 1, 21), 'c', ('Load',)), [], 0)])), -('Expression', ('ListComp', (1, 0, 1, 22), ('Tuple', (1, 1, 1, 6), [('Name', (1, 2, 1, 3), 'a', ('Load',)), ('Name', (1, 4, 1, 5), 'b', ('Load',))], ('Load',)), [('comprehension', ('List', (1, 11, 1, 16), [('Name', (1, 12, 1, 13), 'a', ('Store',)), ('Name', (1, 14, 1, 15), 'b', ('Store',))], ('Store',)), ('Name', (1, 20, 1, 21), 'c', ('Load',)), [], 0)])), -('Expression', ('SetComp', (1, 0, 1, 20), ('Tuple', (1, 1, 1, 6), [('Name', (1, 2, 1, 3), 'a', ('Load',)), ('Name', (1, 4, 1, 5), 'b', ('Load',))], ('Load',)), [('comprehension', ('Tuple', (1, 11, 1, 14), [('Name', (1, 11, 1, 12), 'a', ('Store',)), ('Name', (1, 13, 1, 14), 'b', ('Store',))], ('Store',)), ('Name', (1, 18, 1, 19), 'c', ('Load',)), [], 0)])), -('Expression', ('SetComp', (1, 0, 1, 22), ('Tuple', (1, 1, 1, 6), [('Name', (1, 2, 1, 3), 'a', ('Load',)), ('Name', (1, 4, 1, 5), 'b', ('Load',))], ('Load',)), [('comprehension', ('Tuple', (1, 11, 1, 16), [('Name', (1, 12, 1, 13), 'a', ('Store',)), ('Name', (1, 14, 1, 15), 'b', ('Store',))], ('Store',)), ('Name', (1, 20, 1, 21), 'c', ('Load',)), [], 0)])), -('Expression', ('SetComp', (1, 0, 1, 22), ('Tuple', (1, 1, 1, 6), [('Name', (1, 2, 1, 3), 'a', ('Load',)), ('Name', (1, 4, 1, 5), 'b', ('Load',))], ('Load',)), [('comprehension', ('List', (1, 11, 1, 16), [('Name', (1, 12, 1, 13), 'a', ('Store',)), ('Name', (1, 14, 1, 15), 'b', ('Store',))], ('Store',)), ('Name', (1, 20, 1, 21), 'c', ('Load',)), [], 0)])), -('Expression', ('GeneratorExp', (1, 0, 1, 20), ('Tuple', (1, 1, 1, 6), [('Name', (1, 2, 1, 3), 'a', ('Load',)), ('Name', (1, 4, 1, 5), 'b', ('Load',))], ('Load',)), [('comprehension', ('Tuple', (1, 11, 1, 14), [('Name', (1, 11, 1, 12), 'a', ('Store',)), ('Name', (1, 13, 1, 14), 'b', ('Store',))], ('Store',)), ('Name', (1, 18, 1, 19), 'c', ('Load',)), [], 0)])), -('Expression', ('GeneratorExp', (1, 0, 1, 22), ('Tuple', (1, 1, 1, 6), [('Name', (1, 2, 1, 3), 'a', ('Load',)), ('Name', (1, 4, 1, 5), 'b', ('Load',))], ('Load',)), [('comprehension', ('Tuple', (1, 11, 1, 16), [('Name', (1, 12, 1, 13), 'a', ('Store',)), ('Name', (1, 14, 1, 15), 'b', ('Store',))], ('Store',)), ('Name', (1, 20, 1, 21), 'c', ('Load',)), [], 0)])), -('Expression', ('GeneratorExp', (1, 0, 1, 22), ('Tuple', (1, 1, 1, 6), [('Name', (1, 2, 1, 3), 'a', ('Load',)), ('Name', (1, 4, 1, 5), 'b', ('Load',))], ('Load',)), [('comprehension', ('List', (1, 11, 1, 16), [('Name', (1, 12, 1, 13), 'a', ('Store',)), ('Name', (1, 14, 1, 15), 'b', ('Store',))], ('Store',)), ('Name', (1, 20, 1, 21), 'c', ('Load',)), [], 0)])), -('Expression', ('Compare', (1, 0, 1, 9), ('Constant', (1, 0, 1, 1), 1, None), [('Lt',), ('Lt',)], [('Constant', (1, 4, 1, 5), 2, None), ('Constant', (1, 8, 1, 9), 3, None)])), -('Expression', ('Call', (1, 0, 1, 17), ('Name', (1, 0, 1, 1), 'f', ('Load',)), [('Constant', (1, 2, 1, 3), 1, None), ('Constant', (1, 4, 1, 5), 2, None), ('Starred', (1, 10, 1, 12), ('Name', (1, 11, 1, 12), 'd', ('Load',)), ('Load',))], [('keyword', (1, 6, 1, 9), 'c', ('Constant', (1, 8, 1, 9), 3, None)), ('keyword', (1, 13, 1, 16), None, ('Name', (1, 15, 1, 16), 'e', ('Load',)))])), -('Expression', ('Call', (1, 0, 1, 10), ('Name', (1, 0, 1, 1), 'f', ('Load',)), [('Starred', (1, 2, 1, 9), ('List', (1, 3, 1, 9), [('Constant', (1, 4, 1, 5), 0, None), ('Constant', (1, 7, 1, 8), 1, None)], ('Load',)), ('Load',))], [])), -('Expression', ('Call', (1, 0, 1, 15), ('Name', (1, 0, 1, 1), 'f', ('Load',)), [('GeneratorExp', (1, 1, 1, 15), ('Name', (1, 2, 1, 3), 'a', ('Load',)), [('comprehension', ('Name', (1, 8, 1, 9), 'a', ('Store',)), ('Name', (1, 13, 1, 14), 'b', ('Load',)), [], 0)])], [])), -('Expression', ('Constant', (1, 0, 1, 2), 10, None)), -('Expression', ('Constant', (1, 0, 1, 8), 'string', None)), -('Expression', ('Attribute', (1, 0, 1, 3), ('Name', (1, 0, 1, 1), 'a', ('Load',)), 'b', ('Load',))), -('Expression', ('Subscript', (1, 0, 1, 6), ('Name', (1, 0, 1, 1), 'a', ('Load',)), ('Slice', (1, 2, 1, 5), ('Name', (1, 2, 1, 3), 'b', ('Load',)), ('Name', (1, 4, 1, 5), 'c', ('Load',)), None), ('Load',))), -('Expression', ('Name', (1, 0, 1, 1), 'v', ('Load',))), -('Expression', ('List', (1, 0, 1, 7), [('Constant', (1, 1, 1, 2), 1, None), ('Constant', (1, 3, 1, 4), 2, None), ('Constant', (1, 5, 1, 6), 3, None)], ('Load',))), -('Expression', ('List', (1, 0, 1, 2), [], ('Load',))), -('Expression', ('Tuple', (1, 0, 1, 5), [('Constant', (1, 0, 1, 1), 1, None), ('Constant', (1, 2, 1, 3), 2, None), ('Constant', (1, 4, 1, 5), 3, None)], ('Load',))), -('Expression', ('Tuple', (1, 0, 1, 7), [('Constant', (1, 1, 1, 2), 1, None), ('Constant', (1, 3, 1, 4), 2, None), ('Constant', (1, 5, 1, 6), 3, None)], ('Load',))), -('Expression', ('Tuple', (1, 0, 1, 2), [], ('Load',))), -('Expression', ('Call', (1, 0, 1, 17), ('Attribute', (1, 0, 1, 7), ('Attribute', (1, 0, 1, 5), ('Attribute', (1, 0, 1, 3), ('Name', (1, 0, 1, 1), 'a', ('Load',)), 'b', ('Load',)), 'c', ('Load',)), 'd', ('Load',)), [('Subscript', (1, 8, 1, 16), ('Attribute', (1, 8, 1, 11), ('Name', (1, 8, 1, 9), 'a', ('Load',)), 'b', ('Load',)), ('Slice', (1, 12, 1, 15), ('Constant', (1, 12, 1, 13), 1, None), ('Constant', (1, 14, 1, 15), 2, None), None), ('Load',))], [])), + ("Module", [("Expr", (1, 0, 1, 4), ("Constant", (1, 0, 1, 4), None, None))], []), + ( + "Module", + [ + ( + "Expr", + (1, 0, 1, 18), + ("Constant", (1, 0, 1, 18), "module docstring", None), + ) + ], + [], + ), + ( + "Module", + [ + ( + "FunctionDef", + (1, 0, 1, 13), + "f", + ("arguments", [], [], None, [], [], None, []), + [("Pass", (1, 9, 1, 13))], + [], + None, + None, + [], + ) + ], + [], + ), + ( + "Module", + [ + ( + "FunctionDef", + (1, 0, 1, 29), + "f", + ("arguments", [], [], None, [], [], None, []), + [ + ( + "Expr", + (1, 9, 1, 29), + ("Constant", (1, 9, 1, 29), "function docstring", None), + ) + ], + [], + None, + None, + [], + ) + ], + [], + ), + ( + "Module", + [ + ( + "FunctionDef", + (1, 0, 1, 14), + "f", + ( + "arguments", + [], + [("arg", (1, 6, 1, 7), "a", None, None)], + None, + [], + [], + None, + [], + ), + [("Pass", (1, 10, 1, 14))], + [], + None, + None, + [], + ) + ], + [], + ), + ( + "Module", + [ + ( + "FunctionDef", + (1, 0, 1, 16), + "f", + ( + "arguments", + [], + [("arg", (1, 6, 1, 7), "a", None, None)], + None, + [], + [], + None, + [("Constant", (1, 8, 1, 9), 0, None)], + ), + [("Pass", (1, 12, 1, 16))], + [], + None, + None, + [], + ) + ], + [], + ), + ( + "Module", + [ + ( + "FunctionDef", + (1, 0, 1, 18), + "f", + ( + "arguments", + [], + [], + ("arg", (1, 7, 1, 11), "args", None, None), + [], + [], + None, + [], + ), + [("Pass", (1, 14, 1, 18))], + [], + None, + None, + [], + ) + ], + [], + ), + ( + "Module", + [ + ( + "FunctionDef", + (1, 0, 1, 23), + "f", + ( + "arguments", + [], + [], + ( + "arg", + (1, 7, 1, 16), + "args", + ( + "Starred", + (1, 13, 1, 16), + ("Name", (1, 14, 1, 16), "Ts", ("Load",)), + ("Load",), + ), + None, + ), + [], + [], + None, + [], + ), + [("Pass", (1, 19, 1, 23))], + [], + None, + None, + [], + ) + ], + [], + ), + ( + "Module", + [ + ( + "FunctionDef", + (1, 0, 1, 36), + "f", + ( + "arguments", + [], + [], + ( + "arg", + (1, 7, 1, 29), + "args", + ( + "Starred", + (1, 13, 1, 29), + ( + "Subscript", + (1, 14, 1, 29), + ("Name", (1, 14, 1, 19), "tuple", ("Load",)), + ( + "Tuple", + (1, 20, 1, 28), + [ + ("Name", (1, 20, 1, 23), "int", ("Load",)), + ("Constant", (1, 25, 1, 28), Ellipsis, None), + ], + ("Load",), + ), + ("Load",), + ), + ("Load",), + ), + None, + ), + [], + [], + None, + [], + ), + [("Pass", (1, 32, 1, 36))], + [], + None, + None, + [], + ) + ], + [], + ), + ( + "Module", + [ + ( + "FunctionDef", + (1, 0, 1, 36), + "f", + ( + "arguments", + [], + [], + ( + "arg", + (1, 7, 1, 29), + "args", + ( + "Starred", + (1, 13, 1, 29), + ( + "Subscript", + (1, 14, 1, 29), + ("Name", (1, 14, 1, 19), "tuple", ("Load",)), + ( + "Tuple", + (1, 20, 1, 28), + [ + ("Name", (1, 20, 1, 23), "int", ("Load",)), + ( + "Starred", + (1, 25, 1, 28), + ("Name", (1, 26, 1, 28), "Ts", ("Load",)), + ("Load",), + ), + ], + ("Load",), + ), + ("Load",), + ), + ("Load",), + ), + None, + ), + [], + [], + None, + [], + ), + [("Pass", (1, 32, 1, 36))], + [], + None, + None, + [], + ) + ], + [], + ), + ( + "Module", + [ + ( + "FunctionDef", + (1, 0, 1, 21), + "f", + ( + "arguments", + [], + [], + None, + [], + [], + ("arg", (1, 8, 1, 14), "kwargs", None, None), + [], + ), + [("Pass", (1, 17, 1, 21))], + [], + None, + None, + [], + ) + ], + [], + ), + ( + "Module", + [ + ( + "FunctionDef", + (1, 0, 1, 71), + "f", + ( + "arguments", + [], + [ + ("arg", (1, 6, 1, 7), "a", None, None), + ("arg", (1, 9, 1, 10), "b", None, None), + ("arg", (1, 14, 1, 15), "c", None, None), + ("arg", (1, 22, 1, 23), "d", None, None), + ("arg", (1, 28, 1, 29), "e", None, None), + ], + ("arg", (1, 35, 1, 39), "args", None, None), + [("arg", (1, 41, 1, 42), "f", None, None)], + [("Constant", (1, 43, 1, 45), 42, None)], + ("arg", (1, 49, 1, 55), "kwargs", None, None), + [ + ("Constant", (1, 11, 1, 12), 1, None), + ("Constant", (1, 16, 1, 20), None, None), + ("List", (1, 24, 1, 26), [], ("Load",)), + ("Dict", (1, 30, 1, 32), [], []), + ], + ), + [ + ( + "Expr", + (1, 58, 1, 71), + ("Constant", (1, 58, 1, 71), "doc for f()", None), + ) + ], + [], + None, + None, + [], + ) + ], + [], + ), + ( + "Module", + [ + ( + "FunctionDef", + (1, 0, 1, 27), + "f", + ("arguments", [], [], None, [], [], None, []), + [("Pass", (1, 23, 1, 27))], + [], + ( + "Subscript", + (1, 11, 1, 21), + ("Name", (1, 11, 1, 16), "tuple", ("Load",)), + ( + "Tuple", + (1, 17, 1, 20), + [ + ( + "Starred", + (1, 17, 1, 20), + ("Name", (1, 18, 1, 20), "Ts", ("Load",)), + ("Load",), + ) + ], + ("Load",), + ), + ("Load",), + ), + None, + [], + ) + ], + [], + ), + ( + "Module", + [ + ( + "FunctionDef", + (1, 0, 1, 32), + "f", + ("arguments", [], [], None, [], [], None, []), + [("Pass", (1, 28, 1, 32))], + [], + ( + "Subscript", + (1, 11, 1, 26), + ("Name", (1, 11, 1, 16), "tuple", ("Load",)), + ( + "Tuple", + (1, 17, 1, 25), + [ + ("Name", (1, 17, 1, 20), "int", ("Load",)), + ( + "Starred", + (1, 22, 1, 25), + ("Name", (1, 23, 1, 25), "Ts", ("Load",)), + ("Load",), + ), + ], + ("Load",), + ), + ("Load",), + ), + None, + [], + ) + ], + [], + ), + ( + "Module", + [ + ( + "FunctionDef", + (1, 0, 1, 45), + "f", + ("arguments", [], [], None, [], [], None, []), + [("Pass", (1, 41, 1, 45))], + [], + ( + "Subscript", + (1, 11, 1, 39), + ("Name", (1, 11, 1, 16), "tuple", ("Load",)), + ( + "Tuple", + (1, 17, 1, 38), + [ + ("Name", (1, 17, 1, 20), "int", ("Load",)), + ( + "Starred", + (1, 22, 1, 38), + ( + "Subscript", + (1, 23, 1, 38), + ("Name", (1, 23, 1, 28), "tuple", ("Load",)), + ( + "Tuple", + (1, 29, 1, 37), + [ + ("Name", (1, 29, 1, 32), "int", ("Load",)), + ( + "Constant", + (1, 34, 1, 37), + Ellipsis, + None, + ), + ], + ("Load",), + ), + ("Load",), + ), + ("Load",), + ), + ], + ("Load",), + ), + ("Load",), + ), + None, + [], + ) + ], + [], + ), + ( + "Module", + [("ClassDef", (1, 0, 1, 12), "C", [], [], [("Pass", (1, 8, 1, 12))], [], [])], + [], + ), + ( + "Module", + [ + ( + "ClassDef", + (1, 0, 1, 32), + "C", + [], + [], + [ + ( + "Expr", + (1, 9, 1, 32), + ("Constant", (1, 9, 1, 32), "docstring for class C", None), + ) + ], + [], + [], + ) + ], + [], + ), + ( + "Module", + [ + ( + "ClassDef", + (1, 0, 1, 21), + "C", + [("Name", (1, 8, 1, 14), "object", ("Load",))], + [], + [("Pass", (1, 17, 1, 21))], + [], + [], + ) + ], + [], + ), + ( + "Module", + [ + ( + "FunctionDef", + (1, 0, 1, 16), + "f", + ("arguments", [], [], None, [], [], None, []), + [("Return", (1, 8, 1, 16), ("Constant", (1, 15, 1, 16), 1, None))], + [], + None, + None, + [], + ) + ], + [], + ), + ("Module", [("Delete", (1, 0, 1, 5), [("Name", (1, 4, 1, 5), "v", ("Del",))])], []), + ( + "Module", + [ + ( + "Assign", + (1, 0, 1, 5), + [("Name", (1, 0, 1, 1), "v", ("Store",))], + ("Constant", (1, 4, 1, 5), 1, None), + None, + ) + ], + [], + ), + ( + "Module", + [ + ( + "Assign", + (1, 0, 1, 7), + [ + ( + "Tuple", + (1, 0, 1, 3), + [ + ("Name", (1, 0, 1, 1), "a", ("Store",)), + ("Name", (1, 2, 1, 3), "b", ("Store",)), + ], + ("Store",), + ) + ], + ("Name", (1, 6, 1, 7), "c", ("Load",)), + None, + ) + ], + [], + ), + ( + "Module", + [ + ( + "Assign", + (1, 0, 1, 9), + [ + ( + "Tuple", + (1, 0, 1, 5), + [ + ("Name", (1, 1, 1, 2), "a", ("Store",)), + ("Name", (1, 3, 1, 4), "b", ("Store",)), + ], + ("Store",), + ) + ], + ("Name", (1, 8, 1, 9), "c", ("Load",)), + None, + ) + ], + [], + ), + ( + "Module", + [ + ( + "Assign", + (1, 0, 1, 9), + [ + ( + "List", + (1, 0, 1, 5), + [ + ("Name", (1, 1, 1, 2), "a", ("Store",)), + ("Name", (1, 3, 1, 4), "b", ("Store",)), + ], + ("Store",), + ) + ], + ("Name", (1, 8, 1, 9), "c", ("Load",)), + None, + ) + ], + [], + ), + ( + "Module", + [ + ( + "AnnAssign", + (1, 0, 1, 13), + ("Name", (1, 0, 1, 1), "x", ("Store",)), + ( + "Subscript", + (1, 3, 1, 13), + ("Name", (1, 3, 1, 8), "tuple", ("Load",)), + ( + "Tuple", + (1, 9, 1, 12), + [ + ( + "Starred", + (1, 9, 1, 12), + ("Name", (1, 10, 1, 12), "Ts", ("Load",)), + ("Load",), + ) + ], + ("Load",), + ), + ("Load",), + ), + None, + 1, + ) + ], + [], + ), + ( + "Module", + [ + ( + "AnnAssign", + (1, 0, 1, 18), + ("Name", (1, 0, 1, 1), "x", ("Store",)), + ( + "Subscript", + (1, 3, 1, 18), + ("Name", (1, 3, 1, 8), "tuple", ("Load",)), + ( + "Tuple", + (1, 9, 1, 17), + [ + ("Name", (1, 9, 1, 12), "int", ("Load",)), + ( + "Starred", + (1, 14, 1, 17), + ("Name", (1, 15, 1, 17), "Ts", ("Load",)), + ("Load",), + ), + ], + ("Load",), + ), + ("Load",), + ), + None, + 1, + ) + ], + [], + ), + ( + "Module", + [ + ( + "AnnAssign", + (1, 0, 1, 31), + ("Name", (1, 0, 1, 1), "x", ("Store",)), + ( + "Subscript", + (1, 3, 1, 31), + ("Name", (1, 3, 1, 8), "tuple", ("Load",)), + ( + "Tuple", + (1, 9, 1, 30), + [ + ("Name", (1, 9, 1, 12), "int", ("Load",)), + ( + "Starred", + (1, 14, 1, 30), + ( + "Subscript", + (1, 15, 1, 30), + ("Name", (1, 15, 1, 20), "tuple", ("Load",)), + ( + "Tuple", + (1, 21, 1, 29), + [ + ("Name", (1, 21, 1, 24), "str", ("Load",)), + ( + "Constant", + (1, 26, 1, 29), + Ellipsis, + None, + ), + ], + ("Load",), + ), + ("Load",), + ), + ("Load",), + ), + ], + ("Load",), + ), + ("Load",), + ), + None, + 1, + ) + ], + [], + ), + ( + "Module", + [ + ( + "AugAssign", + (1, 0, 1, 6), + ("Name", (1, 0, 1, 1), "v", ("Store",)), + ("Add",), + ("Constant", (1, 5, 1, 6), 1, None), + ) + ], + [], + ), + ( + "Module", + [ + ( + "For", + (1, 0, 1, 15), + ("Name", (1, 4, 1, 5), "v", ("Store",)), + ("Name", (1, 9, 1, 10), "v", ("Load",)), + [("Pass", (1, 11, 1, 15))], + [], + None, + ) + ], + [], + ), + ( + "Module", + [ + ( + "While", + (1, 0, 1, 12), + ("Name", (1, 6, 1, 7), "v", ("Load",)), + [("Pass", (1, 8, 1, 12))], + [], + ) + ], + [], + ), + ( + "Module", + [ + ( + "If", + (1, 0, 1, 9), + ("Name", (1, 3, 1, 4), "v", ("Load",)), + [("Pass", (1, 5, 1, 9))], + [], + ) + ], + [], + ), + ( + "Module", + [ + ( + "If", + (1, 0, 4, 6), + ("Name", (1, 3, 1, 4), "a", ("Load",)), + [("Pass", (2, 2, 2, 6))], + [ + ( + "If", + (3, 0, 4, 6), + ("Name", (3, 5, 3, 6), "b", ("Load",)), + [("Pass", (4, 2, 4, 6))], + [], + ) + ], + ) + ], + [], + ), + ( + "Module", + [ + ( + "If", + (1, 0, 6, 6), + ("Name", (1, 3, 1, 4), "a", ("Load",)), + [("Pass", (2, 2, 2, 6))], + [ + ( + "If", + (3, 0, 6, 6), + ("Name", (3, 5, 3, 6), "b", ("Load",)), + [("Pass", (4, 2, 4, 6))], + [("Pass", (6, 2, 6, 6))], + ) + ], + ) + ], + [], + ), + ( + "Module", + [ + ( + "With", + (1, 0, 1, 17), + [ + ( + "withitem", + ("Name", (1, 5, 1, 6), "x", ("Load",)), + ("Name", (1, 10, 1, 11), "y", ("Store",)), + ) + ], + [("Pass", (1, 13, 1, 17))], + None, + ) + ], + [], + ), + ( + "Module", + [ + ( + "With", + (1, 0, 1, 25), + [ + ( + "withitem", + ("Name", (1, 5, 1, 6), "x", ("Load",)), + ("Name", (1, 10, 1, 11), "y", ("Store",)), + ), + ( + "withitem", + ("Name", (1, 13, 1, 14), "z", ("Load",)), + ("Name", (1, 18, 1, 19), "q", ("Store",)), + ), + ], + [("Pass", (1, 21, 1, 25))], + None, + ) + ], + [], + ), + ( + "Module", + [ + ( + "Raise", + (1, 0, 1, 25), + ( + "Call", + (1, 6, 1, 25), + ("Name", (1, 6, 1, 15), "Exception", ("Load",)), + [("Constant", (1, 16, 1, 24), "string", None)], + [], + ), + None, + ) + ], + [], + ), + ( + "Module", + [ + ( + "Try", + (1, 0, 4, 6), + [("Pass", (2, 2, 2, 6))], + [ + ( + "ExceptHandler", + (3, 0, 4, 6), + ("Name", (3, 7, 3, 16), "Exception", ("Load",)), + None, + [("Pass", (4, 2, 4, 6))], + ) + ], + [], + [], + ) + ], + [], + ), + ( + "Module", + [ + ( + "Try", + (1, 0, 4, 6), + [("Pass", (2, 2, 2, 6))], + [], + [], + [("Pass", (4, 2, 4, 6))], + ) + ], + [], + ), + ( + "Module", + [ + ( + "TryStar", + (1, 0, 4, 6), + [("Pass", (2, 2, 2, 6))], + [ + ( + "ExceptHandler", + (3, 0, 4, 6), + ("Name", (3, 8, 3, 17), "Exception", ("Load",)), + None, + [("Pass", (4, 2, 4, 6))], + ) + ], + [], + [], + ) + ], + [], + ), + ( + "Module", + [("Assert", (1, 0, 1, 8), ("Name", (1, 7, 1, 8), "v", ("Load",)), None)], + [], + ), + ( + "Module", + [("Import", (1, 0, 1, 10), [("alias", (1, 7, 1, 10), "sys", None)])], + [], + ), + ( + "Module", + [ + ( + "ImportFrom", + (1, 0, 1, 17), + "sys", + [("alias", (1, 16, 1, 17), "v", None)], + 0, + ) + ], + [], + ), + ("Module", [("Global", (1, 0, 1, 8), ["v"])], []), + ("Module", [("Expr", (1, 0, 1, 1), ("Constant", (1, 0, 1, 1), 1, None))], []), + ("Module", [("Pass", (1, 0, 1, 4))], []), + ( + "Module", + [ + ( + "For", + (1, 0, 1, 16), + ("Name", (1, 4, 1, 5), "v", ("Store",)), + ("Name", (1, 9, 1, 10), "v", ("Load",)), + [("Break", (1, 11, 1, 16))], + [], + None, + ) + ], + [], + ), + ( + "Module", + [ + ( + "For", + (1, 0, 1, 19), + ("Name", (1, 4, 1, 5), "v", ("Store",)), + ("Name", (1, 9, 1, 10), "v", ("Load",)), + [("Continue", (1, 11, 1, 19))], + [], + None, + ) + ], + [], + ), + ( + "Module", + [ + ( + "For", + (1, 0, 1, 18), + ( + "Tuple", + (1, 4, 1, 7), + [ + ("Name", (1, 4, 1, 5), "a", ("Store",)), + ("Name", (1, 6, 1, 7), "b", ("Store",)), + ], + ("Store",), + ), + ("Name", (1, 11, 1, 12), "c", ("Load",)), + [("Pass", (1, 14, 1, 18))], + [], + None, + ) + ], + [], + ), + ( + "Module", + [ + ( + "For", + (1, 0, 1, 20), + ( + "Tuple", + (1, 4, 1, 9), + [ + ("Name", (1, 5, 1, 6), "a", ("Store",)), + ("Name", (1, 7, 1, 8), "b", ("Store",)), + ], + ("Store",), + ), + ("Name", (1, 13, 1, 14), "c", ("Load",)), + [("Pass", (1, 16, 1, 20))], + [], + None, + ) + ], + [], + ), + ( + "Module", + [ + ( + "For", + (1, 0, 1, 20), + ( + "List", + (1, 4, 1, 9), + [ + ("Name", (1, 5, 1, 6), "a", ("Store",)), + ("Name", (1, 7, 1, 8), "b", ("Store",)), + ], + ("Store",), + ), + ("Name", (1, 13, 1, 14), "c", ("Load",)), + [("Pass", (1, 16, 1, 20))], + [], + None, + ) + ], + [], + ), + ( + "Module", + [ + ( + "Expr", + (1, 0, 11, 5), + ( + "GeneratorExp", + (1, 0, 11, 5), + ( + "Tuple", + (2, 4, 6, 5), + [ + ("Name", (3, 4, 3, 6), "Aa", ("Load",)), + ("Name", (5, 7, 5, 9), "Bb", ("Load",)), + ], + ("Load",), + ), + [ + ( + "comprehension", + ( + "Tuple", + (8, 4, 10, 6), + [ + ("Name", (8, 4, 8, 6), "Aa", ("Store",)), + ("Name", (10, 4, 10, 6), "Bb", ("Store",)), + ], + ("Store",), + ), + ("Name", (10, 10, 10, 12), "Cc", ("Load",)), + [], + 0, + ) + ], + ), + ) + ], + [], + ), + ( + "Module", + [ + ( + "Expr", + (1, 0, 1, 34), + ( + "DictComp", + (1, 0, 1, 34), + ("Name", (1, 1, 1, 2), "a", ("Load",)), + ("Name", (1, 5, 1, 6), "b", ("Load",)), + [ + ( + "comprehension", + ("Name", (1, 11, 1, 12), "w", ("Store",)), + ("Name", (1, 16, 1, 17), "x", ("Load",)), + [], + 0, + ), + ( + "comprehension", + ("Name", (1, 22, 1, 23), "m", ("Store",)), + ("Name", (1, 27, 1, 28), "p", ("Load",)), + [("Name", (1, 32, 1, 33), "g", ("Load",))], + 0, + ), + ], + ), + ) + ], + [], + ), + ( + "Module", + [ + ( + "Expr", + (1, 0, 1, 20), + ( + "DictComp", + (1, 0, 1, 20), + ("Name", (1, 1, 1, 2), "a", ("Load",)), + ("Name", (1, 5, 1, 6), "b", ("Load",)), + [ + ( + "comprehension", + ( + "Tuple", + (1, 11, 1, 14), + [ + ("Name", (1, 11, 1, 12), "v", ("Store",)), + ("Name", (1, 13, 1, 14), "w", ("Store",)), + ], + ("Store",), + ), + ("Name", (1, 18, 1, 19), "x", ("Load",)), + [], + 0, + ) + ], + ), + ) + ], + [], + ), + ( + "Module", + [ + ( + "Expr", + (1, 0, 1, 19), + ( + "SetComp", + (1, 0, 1, 19), + ("Name", (1, 1, 1, 2), "r", ("Load",)), + [ + ( + "comprehension", + ("Name", (1, 7, 1, 8), "l", ("Store",)), + ("Name", (1, 12, 1, 13), "x", ("Load",)), + [("Name", (1, 17, 1, 18), "g", ("Load",))], + 0, + ) + ], + ), + ) + ], + [], + ), + ( + "Module", + [ + ( + "Expr", + (1, 0, 1, 16), + ( + "SetComp", + (1, 0, 1, 16), + ("Name", (1, 1, 1, 2), "r", ("Load",)), + [ + ( + "comprehension", + ( + "Tuple", + (1, 7, 1, 10), + [ + ("Name", (1, 7, 1, 8), "l", ("Store",)), + ("Name", (1, 9, 1, 10), "m", ("Store",)), + ], + ("Store",), + ), + ("Name", (1, 14, 1, 15), "x", ("Load",)), + [], + 0, + ) + ], + ), + ) + ], + [], + ), + ( + "Module", + [ + ( + "AsyncFunctionDef", + (1, 0, 3, 18), + "f", + ("arguments", [], [], None, [], [], None, []), + [ + ( + "Expr", + (2, 1, 2, 17), + ("Constant", (2, 1, 2, 17), "async function", None), + ), + ( + "Expr", + (3, 1, 3, 18), + ( + "Await", + (3, 1, 3, 18), + ( + "Call", + (3, 7, 3, 18), + ("Name", (3, 7, 3, 16), "something", ("Load",)), + [], + [], + ), + ), + ), + ], + [], + None, + None, + [], + ) + ], + [], + ), + ( + "Module", + [ + ( + "AsyncFunctionDef", + (1, 0, 3, 8), + "f", + ("arguments", [], [], None, [], [], None, []), + [ + ( + "AsyncFor", + (2, 1, 3, 8), + ("Name", (2, 11, 2, 12), "e", ("Store",)), + ("Name", (2, 16, 2, 17), "i", ("Load",)), + [ + ( + "Expr", + (2, 19, 2, 20), + ("Constant", (2, 19, 2, 20), 1, None), + ) + ], + [("Expr", (3, 7, 3, 8), ("Constant", (3, 7, 3, 8), 2, None))], + None, + ) + ], + [], + None, + None, + [], + ) + ], + [], + ), + ( + "Module", + [ + ( + "AsyncFunctionDef", + (1, 0, 2, 21), + "f", + ("arguments", [], [], None, [], [], None, []), + [ + ( + "AsyncWith", + (2, 1, 2, 21), + [ + ( + "withitem", + ("Name", (2, 12, 2, 13), "a", ("Load",)), + ("Name", (2, 17, 2, 18), "b", ("Store",)), + ) + ], + [ + ( + "Expr", + (2, 20, 2, 21), + ("Constant", (2, 20, 2, 21), 1, None), + ) + ], + None, + ) + ], + [], + None, + None, + [], + ) + ], + [], + ), + ( + "Module", + [ + ( + "Expr", + (1, 0, 1, 14), + ( + "Dict", + (1, 0, 1, 14), + [None, ("Constant", (1, 10, 1, 11), 2, None)], + [ + ( + "Dict", + (1, 3, 1, 8), + [("Constant", (1, 4, 1, 5), 1, None)], + [("Constant", (1, 6, 1, 7), 2, None)], + ), + ("Constant", (1, 12, 1, 13), 3, None), + ], + ), + ) + ], + [], + ), + ( + "Module", + [ + ( + "Expr", + (1, 0, 1, 12), + ( + "Set", + (1, 0, 1, 12), + [ + ( + "Starred", + (1, 1, 1, 8), + ( + "Set", + (1, 2, 1, 8), + [ + ("Constant", (1, 3, 1, 4), 1, None), + ("Constant", (1, 6, 1, 7), 2, None), + ], + ), + ("Load",), + ), + ("Constant", (1, 10, 1, 11), 3, None), + ], + ), + ) + ], + [], + ), + ( + "Module", + [ + ( + "AsyncFunctionDef", + (1, 0, 2, 21), + "f", + ("arguments", [], [], None, [], [], None, []), + [ + ( + "Expr", + (2, 1, 2, 21), + ( + "ListComp", + (2, 1, 2, 21), + ("Name", (2, 2, 2, 3), "i", ("Load",)), + [ + ( + "comprehension", + ("Name", (2, 14, 2, 15), "b", ("Store",)), + ("Name", (2, 19, 2, 20), "c", ("Load",)), + [], + 1, + ) + ], + ), + ) + ], + [], + None, + None, + [], + ) + ], + [], + ), + ( + "Module", + [ + ( + "FunctionDef", + (4, 0, 4, 13), + "f", + ("arguments", [], [], None, [], [], None, []), + [("Pass", (4, 9, 4, 13))], + [ + ("Name", (1, 1, 1, 6), "deco1", ("Load",)), + ( + "Call", + (2, 1, 2, 8), + ("Name", (2, 1, 2, 6), "deco2", ("Load",)), + [], + [], + ), + ( + "Call", + (3, 1, 3, 9), + ("Name", (3, 1, 3, 6), "deco3", ("Load",)), + [("Constant", (3, 7, 3, 8), 1, None)], + [], + ), + ], + None, + None, + [], + ) + ], + [], + ), + ( + "Module", + [ + ( + "AsyncFunctionDef", + (4, 0, 4, 19), + "f", + ("arguments", [], [], None, [], [], None, []), + [("Pass", (4, 15, 4, 19))], + [ + ("Name", (1, 1, 1, 6), "deco1", ("Load",)), + ( + "Call", + (2, 1, 2, 8), + ("Name", (2, 1, 2, 6), "deco2", ("Load",)), + [], + [], + ), + ( + "Call", + (3, 1, 3, 9), + ("Name", (3, 1, 3, 6), "deco3", ("Load",)), + [("Constant", (3, 7, 3, 8), 1, None)], + [], + ), + ], + None, + None, + [], + ) + ], + [], + ), + ( + "Module", + [ + ( + "ClassDef", + (4, 0, 4, 13), + "C", + [], + [], + [("Pass", (4, 9, 4, 13))], + [ + ("Name", (1, 1, 1, 6), "deco1", ("Load",)), + ( + "Call", + (2, 1, 2, 8), + ("Name", (2, 1, 2, 6), "deco2", ("Load",)), + [], + [], + ), + ( + "Call", + (3, 1, 3, 9), + ("Name", (3, 1, 3, 6), "deco3", ("Load",)), + [("Constant", (3, 7, 3, 8), 1, None)], + [], + ), + ], + [], + ) + ], + [], + ), + ( + "Module", + [ + ( + "FunctionDef", + (2, 0, 2, 13), + "f", + ("arguments", [], [], None, [], [], None, []), + [("Pass", (2, 9, 2, 13))], + [ + ( + "Call", + (1, 1, 1, 19), + ("Name", (1, 1, 1, 5), "deco", ("Load",)), + [ + ( + "GeneratorExp", + (1, 5, 1, 19), + ("Name", (1, 6, 1, 7), "a", ("Load",)), + [ + ( + "comprehension", + ("Name", (1, 12, 1, 13), "a", ("Store",)), + ("Name", (1, 17, 1, 18), "b", ("Load",)), + [], + 0, + ) + ], + ) + ], + [], + ) + ], + None, + None, + [], + ) + ], + [], + ), + ( + "Module", + [ + ( + "FunctionDef", + (2, 0, 2, 13), + "f", + ("arguments", [], [], None, [], [], None, []), + [("Pass", (2, 9, 2, 13))], + [ + ( + "Attribute", + (1, 1, 1, 6), + ( + "Attribute", + (1, 1, 1, 4), + ("Name", (1, 1, 1, 2), "a", ("Load",)), + "b", + ("Load",), + ), + "c", + ("Load",), + ) + ], + None, + None, + [], + ) + ], + [], + ), + ( + "Module", + [ + ( + "Expr", + (1, 0, 1, 8), + ( + "NamedExpr", + (1, 1, 1, 7), + ("Name", (1, 1, 1, 2), "a", ("Store",)), + ("Constant", (1, 6, 1, 7), 1, None), + ), + ) + ], + [], + ), + ( + "Module", + [ + ( + "FunctionDef", + (1, 0, 1, 18), + "f", + ( + "arguments", + [("arg", (1, 6, 1, 7), "a", None, None)], + [], + None, + [], + [], + None, + [], + ), + [("Pass", (1, 14, 1, 18))], + [], + None, + None, + [], + ) + ], + [], + ), + ( + "Module", + [ + ( + "FunctionDef", + (1, 0, 1, 26), + "f", + ( + "arguments", + [("arg", (1, 6, 1, 7), "a", None, None)], + [ + ("arg", (1, 12, 1, 13), "c", None, None), + ("arg", (1, 15, 1, 16), "d", None, None), + ("arg", (1, 18, 1, 19), "e", None, None), + ], + None, + [], + [], + None, + [], + ), + [("Pass", (1, 22, 1, 26))], + [], + None, + None, + [], + ) + ], + [], + ), + ( + "Module", + [ + ( + "FunctionDef", + (1, 0, 1, 29), + "f", + ( + "arguments", + [("arg", (1, 6, 1, 7), "a", None, None)], + [("arg", (1, 12, 1, 13), "c", None, None)], + None, + [ + ("arg", (1, 18, 1, 19), "d", None, None), + ("arg", (1, 21, 1, 22), "e", None, None), + ], + [None, None], + None, + [], + ), + [("Pass", (1, 25, 1, 29))], + [], + None, + None, + [], + ) + ], + [], + ), + ( + "Module", + [ + ( + "FunctionDef", + (1, 0, 1, 39), + "f", + ( + "arguments", + [("arg", (1, 6, 1, 7), "a", None, None)], + [("arg", (1, 12, 1, 13), "c", None, None)], + None, + [ + ("arg", (1, 18, 1, 19), "d", None, None), + ("arg", (1, 21, 1, 22), "e", None, None), + ], + [None, None], + ("arg", (1, 26, 1, 32), "kwargs", None, None), + [], + ), + [("Pass", (1, 35, 1, 39))], + [], + None, + None, + [], + ) + ], + [], + ), + ( + "Module", + [ + ( + "FunctionDef", + (1, 0, 1, 20), + "f", + ( + "arguments", + [("arg", (1, 6, 1, 7), "a", None, None)], + [], + None, + [], + [], + None, + [("Constant", (1, 8, 1, 9), 1, None)], + ), + [("Pass", (1, 16, 1, 20))], + [], + None, + None, + [], + ) + ], + [], + ), + ( + "Module", + [ + ( + "FunctionDef", + (1, 0, 1, 29), + "f", + ( + "arguments", + [("arg", (1, 6, 1, 7), "a", None, None)], + [ + ("arg", (1, 14, 1, 15), "b", None, None), + ("arg", (1, 19, 1, 20), "c", None, None), + ], + None, + [], + [], + None, + [ + ("Constant", (1, 8, 1, 9), 1, None), + ("Constant", (1, 16, 1, 17), 2, None), + ("Constant", (1, 21, 1, 22), 4, None), + ], + ), + [("Pass", (1, 25, 1, 29))], + [], + None, + None, + [], + ) + ], + [], + ), + ( + "Module", + [ + ( + "FunctionDef", + (1, 0, 1, 32), + "f", + ( + "arguments", + [("arg", (1, 6, 1, 7), "a", None, None)], + [("arg", (1, 14, 1, 15), "b", None, None)], + None, + [("arg", (1, 22, 1, 23), "c", None, None)], + [("Constant", (1, 24, 1, 25), 4, None)], + None, + [ + ("Constant", (1, 8, 1, 9), 1, None), + ("Constant", (1, 16, 1, 17), 2, None), + ], + ), + [("Pass", (1, 28, 1, 32))], + [], + None, + None, + [], + ) + ], + [], + ), + ( + "Module", + [ + ( + "FunctionDef", + (1, 0, 1, 30), + "f", + ( + "arguments", + [("arg", (1, 6, 1, 7), "a", None, None)], + [("arg", (1, 14, 1, 15), "b", None, None)], + None, + [("arg", (1, 22, 1, 23), "c", None, None)], + [None], + None, + [ + ("Constant", (1, 8, 1, 9), 1, None), + ("Constant", (1, 16, 1, 17), 2, None), + ], + ), + [("Pass", (1, 26, 1, 30))], + [], + None, + None, + [], + ) + ], + [], + ), + ( + "Module", + [ + ( + "FunctionDef", + (1, 0, 1, 42), + "f", + ( + "arguments", + [("arg", (1, 6, 1, 7), "a", None, None)], + [("arg", (1, 14, 1, 15), "b", None, None)], + None, + [("arg", (1, 22, 1, 23), "c", None, None)], + [("Constant", (1, 24, 1, 25), 4, None)], + ("arg", (1, 29, 1, 35), "kwargs", None, None), + [ + ("Constant", (1, 8, 1, 9), 1, None), + ("Constant", (1, 16, 1, 17), 2, None), + ], + ), + [("Pass", (1, 38, 1, 42))], + [], + None, + None, + [], + ) + ], + [], + ), + ( + "Module", + [ + ( + "FunctionDef", + (1, 0, 1, 40), + "f", + ( + "arguments", + [("arg", (1, 6, 1, 7), "a", None, None)], + [("arg", (1, 14, 1, 15), "b", None, None)], + None, + [("arg", (1, 22, 1, 23), "c", None, None)], + [None], + ("arg", (1, 27, 1, 33), "kwargs", None, None), + [ + ("Constant", (1, 8, 1, 9), 1, None), + ("Constant", (1, 16, 1, 17), 2, None), + ], + ), + [("Pass", (1, 36, 1, 40))], + [], + None, + None, + [], + ) + ], + [], + ), + ( + "Module", + [ + ( + "TypeAlias", + (1, 0, 1, 12), + ("Name", (1, 5, 1, 6), "X", ("Store",)), + [], + ("Name", (1, 9, 1, 12), "int", ("Load",)), + ) + ], + [], + ), + ( + "Module", + [ + ( + "TypeAlias", + (1, 0, 1, 15), + ("Name", (1, 5, 1, 6), "X", ("Store",)), + [("TypeVar", (1, 7, 1, 8), "T", None)], + ("Name", (1, 12, 1, 15), "int", ("Load",)), + ) + ], + [], + ), + ( + "Module", + [ + ( + "TypeAlias", + (1, 0, 1, 32), + ("Name", (1, 5, 1, 6), "X", ("Store",)), + [ + ("TypeVar", (1, 7, 1, 8), "T", None), + ("TypeVarTuple", (1, 10, 1, 13), "Ts"), + ("ParamSpec", (1, 15, 1, 18), "P"), + ], + ( + "Tuple", + (1, 22, 1, 32), + [ + ("Name", (1, 23, 1, 24), "T", ("Load",)), + ("Name", (1, 26, 1, 28), "Ts", ("Load",)), + ("Name", (1, 30, 1, 31), "P", ("Load",)), + ], + ("Load",), + ), + ) + ], + [], + ), + ( + "Module", + [ + ( + "TypeAlias", + (1, 0, 1, 37), + ("Name", (1, 5, 1, 6), "X", ("Store",)), + [ + ( + "TypeVar", + (1, 7, 1, 13), + "T", + ("Name", (1, 10, 1, 13), "int", ("Load",)), + ), + ("TypeVarTuple", (1, 15, 1, 18), "Ts"), + ("ParamSpec", (1, 20, 1, 23), "P"), + ], + ( + "Tuple", + (1, 27, 1, 37), + [ + ("Name", (1, 28, 1, 29), "T", ("Load",)), + ("Name", (1, 31, 1, 33), "Ts", ("Load",)), + ("Name", (1, 35, 1, 36), "P", ("Load",)), + ], + ("Load",), + ), + ) + ], + [], + ), + ( + "Module", + [ + ( + "TypeAlias", + (1, 0, 1, 44), + ("Name", (1, 5, 1, 6), "X", ("Store",)), + [ + ( + "TypeVar", + (1, 7, 1, 20), + "T", + ( + "Tuple", + (1, 10, 1, 20), + [ + ("Name", (1, 11, 1, 14), "int", ("Load",)), + ("Name", (1, 16, 1, 19), "str", ("Load",)), + ], + ("Load",), + ), + ), + ("TypeVarTuple", (1, 22, 1, 25), "Ts"), + ("ParamSpec", (1, 27, 1, 30), "P"), + ], + ( + "Tuple", + (1, 34, 1, 44), + [ + ("Name", (1, 35, 1, 36), "T", ("Load",)), + ("Name", (1, 38, 1, 40), "Ts", ("Load",)), + ("Name", (1, 42, 1, 43), "P", ("Load",)), + ], + ("Load",), + ), + ) + ], + [], + ), + ( + "Module", + [ + ( + "ClassDef", + (1, 0, 1, 16), + "X", + [], + [], + [("Pass", (1, 12, 1, 16))], + [], + [("TypeVar", (1, 8, 1, 9), "T", None)], + ) + ], + [], + ), + ( + "Module", + [ + ( + "ClassDef", + (1, 0, 1, 26), + "X", + [], + [], + [("Pass", (1, 22, 1, 26))], + [], + [ + ("TypeVar", (1, 8, 1, 9), "T", None), + ("TypeVarTuple", (1, 11, 1, 14), "Ts"), + ("ParamSpec", (1, 16, 1, 19), "P"), + ], + ) + ], + [], + ), + ( + "Module", + [ + ( + "ClassDef", + (1, 0, 1, 31), + "X", + [], + [], + [("Pass", (1, 27, 1, 31))], + [], + [ + ( + "TypeVar", + (1, 8, 1, 14), + "T", + ("Name", (1, 11, 1, 14), "int", ("Load",)), + ), + ("TypeVarTuple", (1, 16, 1, 19), "Ts"), + ("ParamSpec", (1, 21, 1, 24), "P"), + ], + ) + ], + [], + ), + ( + "Module", + [ + ( + "ClassDef", + (1, 0, 1, 38), + "X", + [], + [], + [("Pass", (1, 34, 1, 38))], + [], + [ + ( + "TypeVar", + (1, 8, 1, 21), + "T", + ( + "Tuple", + (1, 11, 1, 21), + [ + ("Name", (1, 12, 1, 15), "int", ("Load",)), + ("Name", (1, 17, 1, 20), "str", ("Load",)), + ], + ("Load",), + ), + ), + ("TypeVarTuple", (1, 23, 1, 26), "Ts"), + ("ParamSpec", (1, 28, 1, 31), "P"), + ], + ) + ], + [], + ), + ( + "Module", + [ + ( + "FunctionDef", + (1, 0, 1, 16), + "f", + ("arguments", [], [], None, [], [], None, []), + [("Pass", (1, 12, 1, 16))], + [], + None, + None, + [("TypeVar", (1, 6, 1, 7), "T", None)], + ) + ], + [], + ), + ( + "Module", + [ + ( + "FunctionDef", + (1, 0, 1, 26), + "f", + ("arguments", [], [], None, [], [], None, []), + [("Pass", (1, 22, 1, 26))], + [], + None, + None, + [ + ("TypeVar", (1, 6, 1, 7), "T", None), + ("TypeVarTuple", (1, 9, 1, 12), "Ts"), + ("ParamSpec", (1, 14, 1, 17), "P"), + ], + ) + ], + [], + ), + ( + "Module", + [ + ( + "FunctionDef", + (1, 0, 1, 31), + "f", + ("arguments", [], [], None, [], [], None, []), + [("Pass", (1, 27, 1, 31))], + [], + None, + None, + [ + ( + "TypeVar", + (1, 6, 1, 12), + "T", + ("Name", (1, 9, 1, 12), "int", ("Load",)), + ), + ("TypeVarTuple", (1, 14, 1, 17), "Ts"), + ("ParamSpec", (1, 19, 1, 22), "P"), + ], + ) + ], + [], + ), + ( + "Module", + [ + ( + "FunctionDef", + (1, 0, 1, 38), + "f", + ("arguments", [], [], None, [], [], None, []), + [("Pass", (1, 34, 1, 38))], + [], + None, + None, + [ + ( + "TypeVar", + (1, 6, 1, 19), + "T", + ( + "Tuple", + (1, 9, 1, 19), + [ + ("Name", (1, 10, 1, 13), "int", ("Load",)), + ("Name", (1, 15, 1, 18), "str", ("Load",)), + ], + ("Load",), + ), + ), + ("TypeVarTuple", (1, 21, 1, 24), "Ts"), + ("ParamSpec", (1, 26, 1, 29), "P"), + ], + ) + ], + [], + ), +] +single_results = [ + ( + "Interactive", + [ + ( + "Expr", + (1, 0, 1, 3), + ( + "BinOp", + (1, 0, 1, 3), + ("Constant", (1, 0, 1, 1), 1, None), + ("Add",), + ("Constant", (1, 2, 1, 3), 2, None), + ), + ) + ], + ), +] +eval_results = [ + ("Expression", ("Constant", (1, 0, 1, 4), None, None)), + ( + "Expression", + ( + "BoolOp", + (1, 0, 1, 7), + ("And",), + [ + ("Name", (1, 0, 1, 1), "a", ("Load",)), + ("Name", (1, 6, 1, 7), "b", ("Load",)), + ], + ), + ), + ( + "Expression", + ( + "BinOp", + (1, 0, 1, 5), + ("Name", (1, 0, 1, 1), "a", ("Load",)), + ("Add",), + ("Name", (1, 4, 1, 5), "b", ("Load",)), + ), + ), + ( + "Expression", + ("UnaryOp", (1, 0, 1, 5), ("Not",), ("Name", (1, 4, 1, 5), "v", ("Load",))), + ), + ( + "Expression", + ( + "Lambda", + (1, 0, 1, 11), + ("arguments", [], [], None, [], [], None, []), + ("Constant", (1, 7, 1, 11), None, None), + ), + ), + ( + "Expression", + ( + "Dict", + (1, 0, 1, 7), + [("Constant", (1, 2, 1, 3), 1, None)], + [("Constant", (1, 4, 1, 5), 2, None)], + ), + ), + ("Expression", ("Dict", (1, 0, 1, 2), [], [])), + ("Expression", ("Set", (1, 0, 1, 7), [("Constant", (1, 1, 1, 5), None, None)])), + ( + "Expression", + ( + "Dict", + (1, 0, 5, 6), + [("Constant", (2, 6, 2, 7), 1, None)], + [("Constant", (4, 10, 4, 11), 2, None)], + ), + ), + ( + "Expression", + ( + "ListComp", + (1, 0, 1, 19), + ("Name", (1, 1, 1, 2), "a", ("Load",)), + [ + ( + "comprehension", + ("Name", (1, 7, 1, 8), "b", ("Store",)), + ("Name", (1, 12, 1, 13), "c", ("Load",)), + [("Name", (1, 17, 1, 18), "d", ("Load",))], + 0, + ) + ], + ), + ), + ( + "Expression", + ( + "GeneratorExp", + (1, 0, 1, 19), + ("Name", (1, 1, 1, 2), "a", ("Load",)), + [ + ( + "comprehension", + ("Name", (1, 7, 1, 8), "b", ("Store",)), + ("Name", (1, 12, 1, 13), "c", ("Load",)), + [("Name", (1, 17, 1, 18), "d", ("Load",))], + 0, + ) + ], + ), + ), + ( + "Expression", + ( + "ListComp", + (1, 0, 1, 20), + ( + "Tuple", + (1, 1, 1, 6), + [ + ("Name", (1, 2, 1, 3), "a", ("Load",)), + ("Name", (1, 4, 1, 5), "b", ("Load",)), + ], + ("Load",), + ), + [ + ( + "comprehension", + ( + "Tuple", + (1, 11, 1, 14), + [ + ("Name", (1, 11, 1, 12), "a", ("Store",)), + ("Name", (1, 13, 1, 14), "b", ("Store",)), + ], + ("Store",), + ), + ("Name", (1, 18, 1, 19), "c", ("Load",)), + [], + 0, + ) + ], + ), + ), + ( + "Expression", + ( + "ListComp", + (1, 0, 1, 22), + ( + "Tuple", + (1, 1, 1, 6), + [ + ("Name", (1, 2, 1, 3), "a", ("Load",)), + ("Name", (1, 4, 1, 5), "b", ("Load",)), + ], + ("Load",), + ), + [ + ( + "comprehension", + ( + "Tuple", + (1, 11, 1, 16), + [ + ("Name", (1, 12, 1, 13), "a", ("Store",)), + ("Name", (1, 14, 1, 15), "b", ("Store",)), + ], + ("Store",), + ), + ("Name", (1, 20, 1, 21), "c", ("Load",)), + [], + 0, + ) + ], + ), + ), + ( + "Expression", + ( + "ListComp", + (1, 0, 1, 22), + ( + "Tuple", + (1, 1, 1, 6), + [ + ("Name", (1, 2, 1, 3), "a", ("Load",)), + ("Name", (1, 4, 1, 5), "b", ("Load",)), + ], + ("Load",), + ), + [ + ( + "comprehension", + ( + "List", + (1, 11, 1, 16), + [ + ("Name", (1, 12, 1, 13), "a", ("Store",)), + ("Name", (1, 14, 1, 15), "b", ("Store",)), + ], + ("Store",), + ), + ("Name", (1, 20, 1, 21), "c", ("Load",)), + [], + 0, + ) + ], + ), + ), + ( + "Expression", + ( + "SetComp", + (1, 0, 1, 20), + ( + "Tuple", + (1, 1, 1, 6), + [ + ("Name", (1, 2, 1, 3), "a", ("Load",)), + ("Name", (1, 4, 1, 5), "b", ("Load",)), + ], + ("Load",), + ), + [ + ( + "comprehension", + ( + "Tuple", + (1, 11, 1, 14), + [ + ("Name", (1, 11, 1, 12), "a", ("Store",)), + ("Name", (1, 13, 1, 14), "b", ("Store",)), + ], + ("Store",), + ), + ("Name", (1, 18, 1, 19), "c", ("Load",)), + [], + 0, + ) + ], + ), + ), + ( + "Expression", + ( + "SetComp", + (1, 0, 1, 22), + ( + "Tuple", + (1, 1, 1, 6), + [ + ("Name", (1, 2, 1, 3), "a", ("Load",)), + ("Name", (1, 4, 1, 5), "b", ("Load",)), + ], + ("Load",), + ), + [ + ( + "comprehension", + ( + "Tuple", + (1, 11, 1, 16), + [ + ("Name", (1, 12, 1, 13), "a", ("Store",)), + ("Name", (1, 14, 1, 15), "b", ("Store",)), + ], + ("Store",), + ), + ("Name", (1, 20, 1, 21), "c", ("Load",)), + [], + 0, + ) + ], + ), + ), + ( + "Expression", + ( + "SetComp", + (1, 0, 1, 22), + ( + "Tuple", + (1, 1, 1, 6), + [ + ("Name", (1, 2, 1, 3), "a", ("Load",)), + ("Name", (1, 4, 1, 5), "b", ("Load",)), + ], + ("Load",), + ), + [ + ( + "comprehension", + ( + "List", + (1, 11, 1, 16), + [ + ("Name", (1, 12, 1, 13), "a", ("Store",)), + ("Name", (1, 14, 1, 15), "b", ("Store",)), + ], + ("Store",), + ), + ("Name", (1, 20, 1, 21), "c", ("Load",)), + [], + 0, + ) + ], + ), + ), + ( + "Expression", + ( + "GeneratorExp", + (1, 0, 1, 20), + ( + "Tuple", + (1, 1, 1, 6), + [ + ("Name", (1, 2, 1, 3), "a", ("Load",)), + ("Name", (1, 4, 1, 5), "b", ("Load",)), + ], + ("Load",), + ), + [ + ( + "comprehension", + ( + "Tuple", + (1, 11, 1, 14), + [ + ("Name", (1, 11, 1, 12), "a", ("Store",)), + ("Name", (1, 13, 1, 14), "b", ("Store",)), + ], + ("Store",), + ), + ("Name", (1, 18, 1, 19), "c", ("Load",)), + [], + 0, + ) + ], + ), + ), + ( + "Expression", + ( + "GeneratorExp", + (1, 0, 1, 22), + ( + "Tuple", + (1, 1, 1, 6), + [ + ("Name", (1, 2, 1, 3), "a", ("Load",)), + ("Name", (1, 4, 1, 5), "b", ("Load",)), + ], + ("Load",), + ), + [ + ( + "comprehension", + ( + "Tuple", + (1, 11, 1, 16), + [ + ("Name", (1, 12, 1, 13), "a", ("Store",)), + ("Name", (1, 14, 1, 15), "b", ("Store",)), + ], + ("Store",), + ), + ("Name", (1, 20, 1, 21), "c", ("Load",)), + [], + 0, + ) + ], + ), + ), + ( + "Expression", + ( + "GeneratorExp", + (1, 0, 1, 22), + ( + "Tuple", + (1, 1, 1, 6), + [ + ("Name", (1, 2, 1, 3), "a", ("Load",)), + ("Name", (1, 4, 1, 5), "b", ("Load",)), + ], + ("Load",), + ), + [ + ( + "comprehension", + ( + "List", + (1, 11, 1, 16), + [ + ("Name", (1, 12, 1, 13), "a", ("Store",)), + ("Name", (1, 14, 1, 15), "b", ("Store",)), + ], + ("Store",), + ), + ("Name", (1, 20, 1, 21), "c", ("Load",)), + [], + 0, + ) + ], + ), + ), + ( + "Expression", + ( + "Compare", + (1, 0, 1, 9), + ("Constant", (1, 0, 1, 1), 1, None), + [("Lt",), ("Lt",)], + [("Constant", (1, 4, 1, 5), 2, None), ("Constant", (1, 8, 1, 9), 3, None)], + ), + ), + ( + "Expression", + ( + "Call", + (1, 0, 1, 17), + ("Name", (1, 0, 1, 1), "f", ("Load",)), + [ + ("Constant", (1, 2, 1, 3), 1, None), + ("Constant", (1, 4, 1, 5), 2, None), + ( + "Starred", + (1, 10, 1, 12), + ("Name", (1, 11, 1, 12), "d", ("Load",)), + ("Load",), + ), + ], + [ + ("keyword", (1, 6, 1, 9), "c", ("Constant", (1, 8, 1, 9), 3, None)), + ( + "keyword", + (1, 13, 1, 16), + None, + ("Name", (1, 15, 1, 16), "e", ("Load",)), + ), + ], + ), + ), + ( + "Expression", + ( + "Call", + (1, 0, 1, 10), + ("Name", (1, 0, 1, 1), "f", ("Load",)), + [ + ( + "Starred", + (1, 2, 1, 9), + ( + "List", + (1, 3, 1, 9), + [ + ("Constant", (1, 4, 1, 5), 0, None), + ("Constant", (1, 7, 1, 8), 1, None), + ], + ("Load",), + ), + ("Load",), + ) + ], + [], + ), + ), + ( + "Expression", + ( + "Call", + (1, 0, 1, 15), + ("Name", (1, 0, 1, 1), "f", ("Load",)), + [ + ( + "GeneratorExp", + (1, 1, 1, 15), + ("Name", (1, 2, 1, 3), "a", ("Load",)), + [ + ( + "comprehension", + ("Name", (1, 8, 1, 9), "a", ("Store",)), + ("Name", (1, 13, 1, 14), "b", ("Load",)), + [], + 0, + ) + ], + ) + ], + [], + ), + ), + ("Expression", ("Constant", (1, 0, 1, 2), 10, None)), + ("Expression", ("Constant", (1, 0, 1, 8), "string", None)), + ( + "Expression", + ( + "Attribute", + (1, 0, 1, 3), + ("Name", (1, 0, 1, 1), "a", ("Load",)), + "b", + ("Load",), + ), + ), + ( + "Expression", + ( + "Subscript", + (1, 0, 1, 6), + ("Name", (1, 0, 1, 1), "a", ("Load",)), + ( + "Slice", + (1, 2, 1, 5), + ("Name", (1, 2, 1, 3), "b", ("Load",)), + ("Name", (1, 4, 1, 5), "c", ("Load",)), + None, + ), + ("Load",), + ), + ), + ("Expression", ("Name", (1, 0, 1, 1), "v", ("Load",))), + ( + "Expression", + ( + "List", + (1, 0, 1, 7), + [ + ("Constant", (1, 1, 1, 2), 1, None), + ("Constant", (1, 3, 1, 4), 2, None), + ("Constant", (1, 5, 1, 6), 3, None), + ], + ("Load",), + ), + ), + ("Expression", ("List", (1, 0, 1, 2), [], ("Load",))), + ( + "Expression", + ( + "Tuple", + (1, 0, 1, 5), + [ + ("Constant", (1, 0, 1, 1), 1, None), + ("Constant", (1, 2, 1, 3), 2, None), + ("Constant", (1, 4, 1, 5), 3, None), + ], + ("Load",), + ), + ), + ( + "Expression", + ( + "Tuple", + (1, 0, 1, 7), + [ + ("Constant", (1, 1, 1, 2), 1, None), + ("Constant", (1, 3, 1, 4), 2, None), + ("Constant", (1, 5, 1, 6), 3, None), + ], + ("Load",), + ), + ), + ("Expression", ("Tuple", (1, 0, 1, 2), [], ("Load",))), + ( + "Expression", + ( + "Call", + (1, 0, 1, 17), + ( + "Attribute", + (1, 0, 1, 7), + ( + "Attribute", + (1, 0, 1, 5), + ( + "Attribute", + (1, 0, 1, 3), + ("Name", (1, 0, 1, 1), "a", ("Load",)), + "b", + ("Load",), + ), + "c", + ("Load",), + ), + "d", + ("Load",), + ), + [ + ( + "Subscript", + (1, 8, 1, 16), + ( + "Attribute", + (1, 8, 1, 11), + ("Name", (1, 8, 1, 9), "a", ("Load",)), + "b", + ("Load",), + ), + ( + "Slice", + (1, 12, 1, 15), + ("Constant", (1, 12, 1, 13), 1, None), + ("Constant", (1, 14, 1, 15), 2, None), + None, + ), + ("Load",), + ) + ], + [], + ), + ), ] main() diff --git a/Lib/test/test_fstring.py b/Lib/test/test_fstring.py index fb364e9c684bd1..76dd9e89ca3d8e 100644 --- a/Lib/test/test_fstring.py +++ b/Lib/test/test_fstring.py @@ -8,6 +8,7 @@ # Unicode identifiers in tests is allowed by PEP 3131. import ast +import datetime import os import re import types @@ -18,7 +19,7 @@ from test.support.os_helper import temp_cwd from test.support.script_helper import assert_python_failure, assert_python_ok -a_global = 'global variable' +a_global = "global variable" # You could argue that I'm too strict in looking for specific error # values with assertRaisesRegex, but without it it's way too easy to @@ -27,6 +28,7 @@ # worthwhile tradeoff. When I switched to this method, I found many # examples where I wasn't testing what I thought I was. + class TestCase(unittest.TestCase): def assertAllRaise(self, exception_type, regex, error_strings): for str in error_strings: @@ -38,43 +40,45 @@ def test__format__lookup(self): # Make sure __format__ is looked up on the type, not the instance. class X: def __format__(self, spec): - return 'class' + return "class" x = X() # Add a bound __format__ method to the 'y' instance, but not # the 'x' instance. y = X() - y.__format__ = types.MethodType(lambda self, spec: 'instance', y) + y.__format__ = types.MethodType(lambda self, spec: "instance", y) - self.assertEqual(f'{y}', format(y)) - self.assertEqual(f'{y}', 'class') + self.assertEqual(f"{y}", format(y)) + self.assertEqual(f"{y}", "class") self.assertEqual(format(x), format(y)) # __format__ is not called this way, but still make sure it # returns what we expect (so we can make sure we're bypassing # it). - self.assertEqual(x.__format__(''), 'class') - self.assertEqual(y.__format__(''), 'instance') + self.assertEqual(x.__format__(""), "class") + self.assertEqual(y.__format__(""), "instance") # This is how __format__ is actually called. - self.assertEqual(type(x).__format__(x, ''), 'class') - self.assertEqual(type(y).__format__(y, ''), 'class') + self.assertEqual(type(x).__format__(x, ""), "class") + self.assertEqual(type(y).__format__(y, ""), "class") def test_ast(self): # Inspired by http://bugs.python.org/issue24975 class X: def __init__(self): self.called = False + def __call__(self): self.called = True return 4 + x = X() expr = """ a = 10 f'{a * x()}'""" t = ast.parse(expr) - c = compile(t, '', 'exec') + c = compile(t, "", "exec") # Make sure x was not called. self.assertFalse(x.called) @@ -280,7 +284,6 @@ def test_ast_line_numbers_duplicate_expression(self): self.assertEqual(binop.right.col_offset, 27) def test_ast_numbers_fstring_with_formatting(self): - t = ast.parse('f"Here is that pesky {xxx:.3f} again"') self.assertEqual(len(t.body), 1) self.assertEqual(t.body[0].lineno, 1) @@ -436,24 +439,12 @@ def test_ast_line_numbers_with_parentheses(self): x, y = t.body # Check the single quoted string offsets first. - offsets = [ - (elt.col_offset, elt.end_col_offset) - for elt in x.value.elts - ] - self.assertTrue(all( - offset == (4, 10) - for offset in offsets - )) + offsets = [(elt.col_offset, elt.end_col_offset) for elt in x.value.elts] + self.assertTrue(all(offset == (4, 10) for offset in offsets)) # Check the triple quoted string offsets. - offsets = [ - (elt.col_offset, elt.end_col_offset) - for elt in y.value.elts - ] - self.assertTrue(all( - offset == (4, 14) - for offset in offsets - )) + offsets = [(elt.col_offset, elt.end_col_offset) for elt in y.value.elts] + self.assertTrue(all(offset == (4, 14) for offset in offsets)) expr = """ x = ( @@ -516,463 +507,572 @@ def test_ast_fstring_empty_format_spec(self): def test_docstring(self): def f(): - f'''Not a docstring''' + f"""Not a docstring""" + self.assertIsNone(f.__doc__) + def g(): - '''Not a docstring''' \ - f'' + """Not a docstring""" f"" + self.assertIsNone(g.__doc__) def test_literal_eval(self): - with self.assertRaisesRegex(ValueError, 'malformed node or string'): + with self.assertRaisesRegex(ValueError, "malformed node or string"): ast.literal_eval("f'x'") def test_ast_compile_time_concat(self): - x = [''] + x = [""] expr = """x[0] = 'foo' f'{3}'""" t = ast.parse(expr) - c = compile(t, '', 'exec') + c = compile(t, "", "exec") exec(c) - self.assertEqual(x[0], 'foo3') + self.assertEqual(x[0], "foo3") def test_compile_time_concat_errors(self): - self.assertAllRaise(SyntaxError, - 'cannot mix bytes and nonbytes literals', - [r"""f'' b''""", - r"""b'' f''""", - ]) + self.assertAllRaise( + SyntaxError, + "cannot mix bytes and nonbytes literals", + [ + r"""f'' b''""", + r"""b'' f''""", + ], + ) def test_literal(self): - self.assertEqual(f'', '') - self.assertEqual(f'a', 'a') - self.assertEqual(f' ', ' ') + self.assertEqual(f"", "") + self.assertEqual(f"a", "a") + self.assertEqual(f" ", " ") def test_unterminated_string(self): - self.assertAllRaise(SyntaxError, 'unterminated string', - [r"""f'{"x'""", - r"""f'{"x}'""", - r"""f'{("x'""", - r"""f'{("x}'""", - ]) + self.assertAllRaise( + SyntaxError, + "unterminated string", + [ + r"""f'{"x'""", + r"""f'{"x}'""", + r"""f'{("x'""", + r"""f'{("x}'""", + ], + ) @unittest.skipIf(support.is_wasi, "exhausts limited stack on WASI") def test_mismatched_parens(self): - self.assertAllRaise(SyntaxError, r"closing parenthesis '\}' " - r"does not match opening parenthesis '\('", - ["f'{((}'", - ]) - self.assertAllRaise(SyntaxError, r"closing parenthesis '\)' " - r"does not match opening parenthesis '\['", - ["f'{a[4)}'", - ]) - self.assertAllRaise(SyntaxError, r"closing parenthesis '\]' " - r"does not match opening parenthesis '\('", - ["f'{a(4]}'", - ]) - self.assertAllRaise(SyntaxError, r"closing parenthesis '\}' " - r"does not match opening parenthesis '\['", - ["f'{a[4}'", - ]) - self.assertAllRaise(SyntaxError, r"closing parenthesis '\}' " - r"does not match opening parenthesis '\('", - ["f'{a(4}'", - ]) - self.assertRaises(SyntaxError, eval, "f'{" + "("*500 + "}'") + self.assertAllRaise( + SyntaxError, + r"closing parenthesis '\}' " r"does not match opening parenthesis '\('", + [ + "f'{((}'", + ], + ) + self.assertAllRaise( + SyntaxError, + r"closing parenthesis '\)' " r"does not match opening parenthesis '\['", + [ + "f'{a[4)}'", + ], + ) + self.assertAllRaise( + SyntaxError, + r"closing parenthesis '\]' " r"does not match opening parenthesis '\('", + [ + "f'{a(4]}'", + ], + ) + self.assertAllRaise( + SyntaxError, + r"closing parenthesis '\}' " r"does not match opening parenthesis '\['", + [ + "f'{a[4}'", + ], + ) + self.assertAllRaise( + SyntaxError, + r"closing parenthesis '\}' " r"does not match opening parenthesis '\('", + [ + "f'{a(4}'", + ], + ) + self.assertRaises(SyntaxError, eval, "f'{" + "(" * 500 + "}'") @unittest.skipIf(support.is_wasi, "exhausts limited stack on WASI") def test_fstring_nested_too_deeply(self): - self.assertAllRaise(SyntaxError, - "f-string: expressions nested too deeply", - ['f"{1+2:{1+2:{1+1:{1}}}}"']) + self.assertAllRaise( + SyntaxError, + "f-string: expressions nested too deeply", + ['f"{1+2:{1+2:{1+1:{1}}}}"'], + ) def create_nested_fstring(n): if n == 0: return "1+1" - prev = create_nested_fstring(n-1) + prev = create_nested_fstring(n - 1) return f'f"{{{prev}}}"' - self.assertAllRaise(SyntaxError, - "too many nested f-strings", - [create_nested_fstring(160)]) + self.assertAllRaise( + SyntaxError, "too many nested f-strings", [create_nested_fstring(160)] + ) def test_syntax_error_in_nested_fstring(self): # See gh-104016 for more information on this crash - self.assertAllRaise(SyntaxError, - "invalid syntax", - ['f"{1 1:' + ('{f"1:' * 199)]) + self.assertAllRaise( + SyntaxError, "invalid syntax", ['f"{1 1:' + ('{f"1:' * 199)] + ) def test_double_braces(self): - self.assertEqual(f'{{', '{') - self.assertEqual(f'a{{', 'a{') - self.assertEqual(f'{{b', '{b') - self.assertEqual(f'a{{b', 'a{b') - self.assertEqual(f'}}', '}') - self.assertEqual(f'a}}', 'a}') - self.assertEqual(f'}}b', '}b') - self.assertEqual(f'a}}b', 'a}b') - self.assertEqual(f'{{}}', '{}') - self.assertEqual(f'a{{}}', 'a{}') - self.assertEqual(f'{{b}}', '{b}') - self.assertEqual(f'{{}}c', '{}c') - self.assertEqual(f'a{{b}}', 'a{b}') - self.assertEqual(f'a{{}}c', 'a{}c') - self.assertEqual(f'{{b}}c', '{b}c') - self.assertEqual(f'a{{b}}c', 'a{b}c') - - self.assertEqual(f'{{{10}', '{10') - self.assertEqual(f'}}{10}', '}10') - self.assertEqual(f'}}{{{10}', '}{10') - self.assertEqual(f'}}a{{{10}', '}a{10') - - self.assertEqual(f'{10}{{', '10{') - self.assertEqual(f'{10}}}', '10}') - self.assertEqual(f'{10}}}{{', '10}{') - self.assertEqual(f'{10}}}a{{' '}', '10}a{}') + self.assertEqual(f"{{", "{") + self.assertEqual(f"a{{", "a{") + self.assertEqual(f"{{b", "{b") + self.assertEqual(f"a{{b", "a{b") + self.assertEqual(f"}}", "}") + self.assertEqual(f"a}}", "a}") + self.assertEqual(f"}}b", "}b") + self.assertEqual(f"a}}b", "a}b") + self.assertEqual(f"{{}}", "{}") + self.assertEqual(f"a{{}}", "a{}") + self.assertEqual(f"{{b}}", "{b}") + self.assertEqual(f"{{}}c", "{}c") + self.assertEqual(f"a{{b}}", "a{b}") + self.assertEqual(f"a{{}}c", "a{}c") + self.assertEqual(f"{{b}}c", "{b}c") + self.assertEqual(f"a{{b}}c", "a{b}c") + + self.assertEqual(f"{{{10}", "{10") + self.assertEqual(f"}}{10}", "}10") + self.assertEqual(f"}}{{{10}", "}{10") + self.assertEqual(f"}}a{{{10}", "}a{10") + + self.assertEqual(f"{10}{{", "10{") + self.assertEqual(f"{10}}}", "10}") + self.assertEqual(f"{10}}}{{", "10}{") + self.assertEqual(f"{10}}}a{{" "}", "10}a{}") # Inside of strings, don't interpret doubled brackets. - self.assertEqual(f'{"{{}}"}', '{{}}') + self.assertEqual(f'{"{{}}"}', "{{}}") - self.assertAllRaise(TypeError, 'unhashable type', - ["f'{ {{}} }'", # dict in a set - ]) + self.assertAllRaise( + TypeError, + "unhashable type", + [ + "f'{ {{}} }'", # dict in a set + ], + ) def test_compile_time_concat(self): - x = 'def' - self.assertEqual('abc' f'## {x}ghi', 'abc## defghi') - self.assertEqual('abc' f'{x}' 'ghi', 'abcdefghi') - self.assertEqual('abc' f'{x}' 'gh' f'i{x:4}', 'abcdefghidef ') - self.assertEqual('{x}' f'{x}', '{x}def') - self.assertEqual('{x' f'{x}', '{xdef') - self.assertEqual('{x}' f'{x}', '{x}def') - self.assertEqual('{{x}}' f'{x}', '{{x}}def') - self.assertEqual('{{x' f'{x}', '{{xdef') - self.assertEqual('x}}' f'{x}', 'x}}def') - self.assertEqual(f'{x}' 'x}}', 'defx}}') - self.assertEqual(f'{x}' '', 'def') - self.assertEqual('' f'{x}' '', 'def') - self.assertEqual('' f'{x}', 'def') - self.assertEqual(f'{x}' '2', 'def2') - self.assertEqual('1' f'{x}' '2', '1def2') - self.assertEqual('1' f'{x}', '1def') - self.assertEqual(f'{x}' f'-{x}', 'def-def') - self.assertEqual('' f'', '') - self.assertEqual('' f'' '', '') - self.assertEqual('' f'' '' f'', '') - self.assertEqual(f'', '') - self.assertEqual(f'' '', '') - self.assertEqual(f'' '' f'', '') - self.assertEqual(f'' '' f'' '', '') + x = "def" + self.assertEqual("abc" f"## {x}ghi", "abc## defghi") + self.assertEqual("abc" f"{x}" "ghi", "abcdefghi") + self.assertEqual("abc" f"{x}" "gh" f"i{x:4}", "abcdefghidef ") + self.assertEqual("{x}" f"{x}", "{x}def") + self.assertEqual("{x" f"{x}", "{xdef") + self.assertEqual("{x}" f"{x}", "{x}def") + self.assertEqual("{{x}}" f"{x}", "{{x}}def") + self.assertEqual("{{x" f"{x}", "{{xdef") + self.assertEqual("x}}" f"{x}", "x}}def") + self.assertEqual(f"{x}" "x}}", "defx}}") + self.assertEqual(f"{x}" "", "def") + self.assertEqual("" f"{x}" "", "def") + self.assertEqual("" f"{x}", "def") + self.assertEqual(f"{x}" "2", "def2") + self.assertEqual("1" f"{x}" "2", "1def2") + self.assertEqual("1" f"{x}", "1def") + self.assertEqual(f"{x}" f"-{x}", "def-def") + self.assertEqual("" f"", "") + self.assertEqual("" f"" "", "") + self.assertEqual("" f"" "" f"", "") + self.assertEqual(f"", "") + self.assertEqual(f"" "", "") + self.assertEqual(f"" "" f"", "") + self.assertEqual(f"" "" f"" "", "") # This is not really [f'{'] + [f'}'] since we treat the inside # of braces as a purely new context, so it is actually f'{ and # then eval(' f') (a valid expression) and then }' which would # constitute a valid f-string. - self.assertEqual(f'{' f'}', ' f') + self.assertEqual(f'{' f'}', " f") - self.assertAllRaise(SyntaxError, "expecting '}'", - ['''f'{3' f"}"''', # can't concat to get a valid f-string - ]) + self.assertAllRaise( + SyntaxError, + "expecting '}'", + [ + '''f'{3' f"}"''', # can't concat to get a valid f-string + ], + ) def test_comments(self): # These aren't comments, since they're in strings. - d = {'#': 'hash'} - self.assertEqual(f'{"#"}', '#') - self.assertEqual(f'{d["#"]}', 'hash') - - self.assertAllRaise(SyntaxError, "'{' was never closed", - ["f'{1#}'", # error because everything after '#' is a comment - "f'{#}'", - "f'one: {1#}'", - "f'{1# one} {2 this is a comment still#}'", - ]) - self.assertAllRaise(SyntaxError, r"f-string: unmatched '\)'", - ["f'{)#}'", # When wrapped in parens, this becomes - # '()#)'. Make sure that doesn't compile. - ]) - self.assertEqual(f'''A complex trick: { + d = {"#": "hash"} + self.assertEqual(f'{"#"}', "#") + self.assertEqual(f'{d["#"]}', "hash") + + self.assertAllRaise( + SyntaxError, + "'{' was never closed", + [ + "f'{1#}'", # error because everything after '#' is a comment + "f'{#}'", + "f'one: {1#}'", + "f'{1# one} {2 this is a comment still#}'", + ], + ) + self.assertAllRaise( + SyntaxError, + r"f-string: unmatched '\)'", + [ + "f'{)#}'", # When wrapped in parens, this becomes + # '()#)'. Make sure that doesn't compile. + ], + ) + self.assertEqual( + f"""A complex trick: { 2 # two -}''', 'A complex trick: 2') - self.assertEqual(f''' +}""", + "A complex trick: 2", + ) + self.assertEqual( + f""" { 40 # fourty + # plus 2 # two -}''', '\n42') - self.assertEqual(f''' +}""", + "\n42", + ) + self.assertEqual( + f""" { 40 # fourty + # plus 2 # two -}''', '\n42') +}""", + "\n42", + ) - self.assertEqual(f''' + self.assertEqual( + f""" # this is not a comment { # the following operation it's 3 # this is a number -* 2}''', '\n# this is not a comment\n6') - self.assertEqual(f''' +* 2}""", + "\n# this is not a comment\n6", + ) + self.assertEqual( + f""" {# f'a {comment}' 86 # constant # nothing more -}''', '\n86') - - self.assertAllRaise(SyntaxError, r"f-string: valid expression required before '}'", - ["""f''' +}""", + "\n86", + ) + + self.assertAllRaise( + SyntaxError, + r"f-string: valid expression required before '}'", + [ + """f''' { # only a comment }''' -""", # this is equivalent to f'{}' - ]) +""", # this is equivalent to f'{}' + ], + ) def test_many_expressions(self): # Create a string with many expressions in it. Note that # because we have a space in here as a literal, we're actually # going to use twice as many ast nodes: one for each literal # plus one for each expression. - def build_fstr(n, extra=''): - return "f'" + ('{x} ' * n) + extra + "'" + def build_fstr(n, extra=""): + return "f'" + ("{x} " * n) + extra + "'" - x = 'X' + x = "X" width = 1 # Test around 256. for i in range(250, 260): - self.assertEqual(eval(build_fstr(i)), (x+' ')*i) + self.assertEqual(eval(build_fstr(i)), (x + " ") * i) # Test concatenating 2 largs fstrings. - self.assertEqual(eval(build_fstr(255)*256), (x+' ')*(255*256)) + self.assertEqual(eval(build_fstr(255) * 256), (x + " ") * (255 * 256)) - s = build_fstr(253, '{x:{width}} ') - self.assertEqual(eval(s), (x+' ')*254) + s = build_fstr(253, "{x:{width}} ") + self.assertEqual(eval(s), (x + " ") * 254) # Test lots of expressions and constants, concatenated. s = "f'{1}' 'x' 'y'" * 1024 - self.assertEqual(eval(s), '1xy' * 1024) + self.assertEqual(eval(s), "1xy" * 1024) def test_format_specifier_expressions(self): width = 10 precision = 4 - value = decimal.Decimal('12.34567') - self.assertEqual(f'result: {value:{width}.{precision}}', 'result: 12.35') - self.assertEqual(f'result: {value:{width!r}.{precision}}', 'result: 12.35') - self.assertEqual(f'result: {value:{width:0}.{precision:1}}', 'result: 12.35') - self.assertEqual(f'result: {value:{1}{0:0}.{precision:1}}', 'result: 12.35') - self.assertEqual(f'result: {value:{ 1}{ 0:0}.{ precision:1}}', 'result: 12.35') - self.assertEqual(f'{10:#{1}0x}', ' 0xa') - self.assertEqual(f'{10:{"#"}1{0}{"x"}}', ' 0xa') - self.assertEqual(f'{-10:-{"#"}1{0}x}', ' -0xa') - self.assertEqual(f'{-10:{"-"}#{1}0{"x"}}', ' -0xa') - self.assertEqual(f'{10:#{3 != {4:5} and width}x}', ' 0xa') - self.assertEqual(f'result: {value:{width:{0}}.{precision:1}}', 'result: 12.35') - - self.assertAllRaise(SyntaxError, "f-string: expecting ':' or '}'", - ["""f'{"s"!r{":10"}}'""", - # This looks like a nested format spec. - ]) - - self.assertAllRaise(SyntaxError, - "f-string: expecting a valid expression after '{'", - [# Invalid syntax inside a nested spec. - "f'{4:{/5}}'", - ]) - - self.assertAllRaise(SyntaxError, 'f-string: invalid conversion character', - [# No expansion inside conversion or for - # the : or ! itself. - """f'{"s"!{"r"}}'""", - ]) + value = decimal.Decimal("12.34567") + self.assertEqual(f"result: {value:{width}.{precision}}", "result: 12.35") + self.assertEqual(f"result: {value:{width!r}.{precision}}", "result: 12.35") + self.assertEqual( + f"result: {value:{width:0}.{precision:1}}", "result: 12.35" + ) + self.assertEqual( + f"result: {value:{1}{0:0}.{precision:1}}", "result: 12.35" + ) + self.assertEqual( + f"result: {value:{ 1}{ 0:0}.{ precision:1}}", "result: 12.35" + ) + self.assertEqual(f"{10:#{1}0x}", " 0xa") + self.assertEqual(f'{10:{"#"}1{0}{"x"}}', " 0xa") + self.assertEqual(f'{-10:-{"#"}1{0}x}', " -0xa") + self.assertEqual(f'{-10:{"-"}#{1}0{"x"}}', " -0xa") + self.assertEqual(f"{10:#{3 != {4:5} and width}x}", " 0xa") + self.assertEqual( + f"result: {value:{width:{0}}.{precision:1}}", "result: 12.35" + ) + + self.assertAllRaise( + SyntaxError, + "f-string: expecting ':' or '}'", + [ + """f'{"s"!r{":10"}}'""", + # This looks like a nested format spec. + ], + ) + + self.assertAllRaise( + SyntaxError, + "f-string: expecting a valid expression after '{'", + [ # Invalid syntax inside a nested spec. + "f'{4:{/5}}'", + ], + ) + + self.assertAllRaise( + SyntaxError, + "f-string: invalid conversion character", + [ # No expansion inside conversion or for + # the : or ! itself. + """f'{"s"!{"r"}}'""", + ], + ) def test_custom_format_specifier(self): class CustomFormat: def __format__(self, format_spec): return format_spec - self.assertEqual(f'{CustomFormat():\n}', '\n') - self.assertEqual(f'{CustomFormat():\u2603}', '☃') + self.assertEqual(f"{CustomFormat():\n}", "\n") + self.assertEqual(f"{CustomFormat():\u2603}", "☃") with self.assertWarns(SyntaxWarning): - exec(r'f"{F():¯\_(ツ)_/¯}"', {'F': CustomFormat}) + exec(r'f"{F():¯\_(ツ)_/¯}"', {"F": CustomFormat}) def test_side_effect_order(self): class X: def __init__(self): self.i = 0 + def __format__(self, spec): self.i += 1 return str(self.i) x = X() - self.assertEqual(f'{x} {x}', '1 2') + self.assertEqual(f"{x} {x}", "1 2") def test_missing_expression(self): - self.assertAllRaise(SyntaxError, - "f-string: valid expression required before '}'", - ["f'{}'", - "f'{ }'" - "f' {} '", - "f'{10:{ }}'", - "f' { } '", - - # The Python parser ignores also the following - # whitespace characters in additional to a space. - "f'''{\t\f\r\n}'''", - ]) - - self.assertAllRaise(SyntaxError, - "f-string: valid expression required before '!'", - ["f'{!r}'", - "f'{ !r}'", - "f'{!}'", - "f'''{\t\f\r\n!a}'''", - - # Catch empty expression before the - # missing closing brace. - "f'{!'", - "f'{!s:'", - - # Catch empty expression before the - # invalid conversion. - "f'{!x}'", - "f'{ !xr}'", - "f'{!x:}'", - "f'{!x:a}'", - "f'{ !xr:}'", - "f'{ !xr:a}'", - ]) - - self.assertAllRaise(SyntaxError, - "f-string: valid expression required before ':'", - ["f'{:}'", - "f'{ :!}'", - "f'{:2}'", - "f'''{\t\f\r\n:a}'''", - "f'{:'", - ]) - - self.assertAllRaise(SyntaxError, - "f-string: valid expression required before '='", - ["f'{=}'", - "f'{ =}'", - "f'{ =:}'", - "f'{ =!}'", - "f'''{\t\f\r\n=}'''", - "f'{='", - ]) + self.assertAllRaise( + SyntaxError, + "f-string: valid expression required before '}'", + [ + "f'{}'", + "f'{ }'" "f' {} '", + "f'{10:{ }}'", + "f' { } '", + # The Python parser ignores also the following + # whitespace characters in additional to a space. + "f'''{\t\f\r\n}'''", + ], + ) + + self.assertAllRaise( + SyntaxError, + "f-string: valid expression required before '!'", + [ + "f'{!r}'", + "f'{ !r}'", + "f'{!}'", + "f'''{\t\f\r\n!a}'''", + # Catch empty expression before the + # missing closing brace. + "f'{!'", + "f'{!s:'", + # Catch empty expression before the + # invalid conversion. + "f'{!x}'", + "f'{ !xr}'", + "f'{!x:}'", + "f'{!x:a}'", + "f'{ !xr:}'", + "f'{ !xr:a}'", + ], + ) + + self.assertAllRaise( + SyntaxError, + "f-string: valid expression required before ':'", + [ + "f'{:}'", + "f'{ :!}'", + "f'{:2}'", + "f'''{\t\f\r\n:a}'''", + "f'{:'", + ], + ) + + self.assertAllRaise( + SyntaxError, + "f-string: valid expression required before '='", + [ + "f'{=}'", + "f'{ =}'", + "f'{ =:}'", + "f'{ =!}'", + "f'''{\t\f\r\n=}'''", + "f'{='", + ], + ) # Different error message is raised for other whitespace characters. - self.assertAllRaise(SyntaxError, r"invalid non-printable character U\+00A0", - ["f'''{\xa0}'''", - "\xa0", - ]) + self.assertAllRaise( + SyntaxError, + r"invalid non-printable character U\+00A0", + [ + "f'''{\xa0}'''", + "\xa0", + ], + ) def test_parens_in_expressions(self): - self.assertEqual(f'{3,}', '(3,)') - - self.assertAllRaise(SyntaxError, - "f-string: expecting a valid expression after '{'", - ["f'{,}'", - ]) - - self.assertAllRaise(SyntaxError, r"f-string: unmatched '\)'", - ["f'{3)+(4}'", - ]) + self.assertEqual(f"{3,}", "(3,)") + + self.assertAllRaise( + SyntaxError, + "f-string: expecting a valid expression after '{'", + [ + "f'{,}'", + ], + ) + + self.assertAllRaise( + SyntaxError, + r"f-string: unmatched '\)'", + [ + "f'{3)+(4}'", + ], + ) def test_newlines_before_syntax_error(self): - self.assertAllRaise(SyntaxError, - "f-string: expecting a valid expression after '{'", - ["f'{.}'", "\nf'{.}'", "\n\nf'{.}'"]) + self.assertAllRaise( + SyntaxError, + "f-string: expecting a valid expression after '{'", + ["f'{.}'", "\nf'{.}'", "\n\nf'{.}'"], + ) def test_backslashes_in_string_part(self): - self.assertEqual(f'\t', '\t') - self.assertEqual(r'\t', '\\t') - self.assertEqual(rf'\t', '\\t') - self.assertEqual(f'{2}\t', '2\t') - self.assertEqual(f'{2}\t{3}', '2\t3') - self.assertEqual(f'\t{3}', '\t3') - - self.assertEqual(f'\u0394', '\u0394') - self.assertEqual(r'\u0394', '\\u0394') - self.assertEqual(rf'\u0394', '\\u0394') - self.assertEqual(f'{2}\u0394', '2\u0394') - self.assertEqual(f'{2}\u0394{3}', '2\u03943') - self.assertEqual(f'\u0394{3}', '\u03943') - - self.assertEqual(f'\U00000394', '\u0394') - self.assertEqual(r'\U00000394', '\\U00000394') - self.assertEqual(rf'\U00000394', '\\U00000394') - self.assertEqual(f'{2}\U00000394', '2\u0394') - self.assertEqual(f'{2}\U00000394{3}', '2\u03943') - self.assertEqual(f'\U00000394{3}', '\u03943') - - self.assertEqual(f'\N{GREEK CAPITAL LETTER DELTA}', '\u0394') - self.assertEqual(f'{2}\N{GREEK CAPITAL LETTER DELTA}', '2\u0394') - self.assertEqual(f'{2}\N{GREEK CAPITAL LETTER DELTA}{3}', '2\u03943') - self.assertEqual(f'\N{GREEK CAPITAL LETTER DELTA}{3}', '\u03943') - self.assertEqual(f'2\N{GREEK CAPITAL LETTER DELTA}', '2\u0394') - self.assertEqual(f'2\N{GREEK CAPITAL LETTER DELTA}3', '2\u03943') - self.assertEqual(f'\N{GREEK CAPITAL LETTER DELTA}3', '\u03943') - - self.assertEqual(f'\x20', ' ') - self.assertEqual(r'\x20', '\\x20') - self.assertEqual(rf'\x20', '\\x20') - self.assertEqual(f'{2}\x20', '2 ') - self.assertEqual(f'{2}\x20{3}', '2 3') - self.assertEqual(f'\x20{3}', ' 3') - - self.assertEqual(f'2\x20', '2 ') - self.assertEqual(f'2\x203', '2 3') - self.assertEqual(f'\x203', ' 3') + self.assertEqual(f"\t", "\t") + self.assertEqual(r"\t", "\\t") + self.assertEqual(rf"\t", "\\t") + self.assertEqual(f"{2}\t", "2\t") + self.assertEqual(f"{2}\t{3}", "2\t3") + self.assertEqual(f"\t{3}", "\t3") + + self.assertEqual(f"\u0394", "\u0394") + self.assertEqual(r"\u0394", "\\u0394") + self.assertEqual(rf"\u0394", "\\u0394") + self.assertEqual(f"{2}\u0394", "2\u0394") + self.assertEqual(f"{2}\u0394{3}", "2\u03943") + self.assertEqual(f"\u0394{3}", "\u03943") + + self.assertEqual(f"\U00000394", "\u0394") + self.assertEqual(r"\U00000394", "\\U00000394") + self.assertEqual(rf"\U00000394", "\\U00000394") + self.assertEqual(f"{2}\U00000394", "2\u0394") + self.assertEqual(f"{2}\U00000394{3}", "2\u03943") + self.assertEqual(f"\U00000394{3}", "\u03943") + + self.assertEqual(f"\N{GREEK CAPITAL LETTER DELTA}", "\u0394") + self.assertEqual(f"{2}\N{GREEK CAPITAL LETTER DELTA}", "2\u0394") + self.assertEqual(f"{2}\N{GREEK CAPITAL LETTER DELTA}{3}", "2\u03943") + self.assertEqual(f"\N{GREEK CAPITAL LETTER DELTA}{3}", "\u03943") + self.assertEqual(f"2\N{GREEK CAPITAL LETTER DELTA}", "2\u0394") + self.assertEqual(f"2\N{GREEK CAPITAL LETTER DELTA}3", "2\u03943") + self.assertEqual(f"\N{GREEK CAPITAL LETTER DELTA}3", "\u03943") + + self.assertEqual(f"\x20", " ") + self.assertEqual(r"\x20", "\\x20") + self.assertEqual(rf"\x20", "\\x20") + self.assertEqual(f"{2}\x20", "2 ") + self.assertEqual(f"{2}\x20{3}", "2 3") + self.assertEqual(f"\x20{3}", " 3") + + self.assertEqual(f"2\x20", "2 ") + self.assertEqual(f"2\x203", "2 3") + self.assertEqual(f"\x203", " 3") with self.assertWarns(SyntaxWarning): # invalid escape sequence value = eval(r"f'\{6*7}'") - self.assertEqual(value, '\\42') + self.assertEqual(value, "\\42") with self.assertWarns(SyntaxWarning): # invalid escape sequence value = eval(r"f'\g'") - self.assertEqual(value, '\\g') - self.assertEqual(f'\\{6*7}', '\\42') - self.assertEqual(fr'\{6*7}', '\\42') + self.assertEqual(value, "\\g") + self.assertEqual(f"\\{6*7}", "\\42") + self.assertEqual(rf"\{6*7}", "\\42") - AMPERSAND = 'spam' + AMPERSAND = "spam" # Get the right unicode character (&), or pick up local variable # depending on the number of backslashes. - self.assertEqual(f'\N{AMPERSAND}', '&') - self.assertEqual(f'\\N{AMPERSAND}', '\\Nspam') - self.assertEqual(fr'\N{AMPERSAND}', '\\Nspam') - self.assertEqual(f'\\\N{AMPERSAND}', '\\&') + self.assertEqual(f"\N{AMPERSAND}", "&") + self.assertEqual(f"\\N{AMPERSAND}", "\\Nspam") + self.assertEqual(rf"\N{AMPERSAND}", "\\Nspam") + self.assertEqual(f"\\\N{AMPERSAND}", "\\&") def test_misformed_unicode_character_name(self): # These test are needed because unicode names are parsed # differently inside f-strings. - self.assertAllRaise(SyntaxError, r"\(unicode error\) 'unicodeescape' codec can't decode bytes in position .*: malformed \\N character escape", - [r"f'\N'", - r"f'\N '", - r"f'\N '", # See bpo-46503. - r"f'\N{'", - r"f'\N{GREEK CAPITAL LETTER DELTA'", - - # Here are the non-f-string versions, - # which should give the same errors. - r"'\N'", - r"'\N '", - r"'\N '", - r"'\N{'", - r"'\N{GREEK CAPITAL LETTER DELTA'", - ]) + self.assertAllRaise( + SyntaxError, + r"\(unicode error\) 'unicodeescape' codec can't decode bytes in position .*: malformed \\N character escape", + [ + r"f'\N'", + r"f'\N '", + r"f'\N '", # See bpo-46503. + r"f'\N{'", + r"f'\N{GREEK CAPITAL LETTER DELTA'", + # Here are the non-f-string versions, + # which should give the same errors. + r"'\N'", + r"'\N '", + r"'\N '", + r"'\N{'", + r"'\N{GREEK CAPITAL LETTER DELTA'", + ], + ) def test_backslashes_in_expression_part(self): - self.assertEqual(f"{( + self.assertEqual( + f"{( 1 + 2 - )}", "3") - - self.assertEqual("\N{LEFT CURLY BRACKET}", '{') - self.assertEqual(f'{"\N{LEFT CURLY BRACKET}"}', '{') - self.assertEqual(rf'{"\N{LEFT CURLY BRACKET}"}', '{') - - self.assertAllRaise(SyntaxError, - "f-string: valid expression required before '}'", - ["f'{\n}'", - ]) + )}", + "3", + ) + + self.assertEqual("\N{LEFT CURLY BRACKET}", "{") + self.assertEqual(f'{"\N{LEFT CURLY BRACKET}"}', "{") + self.assertEqual(rf'{"\N{LEFT CURLY BRACKET}"}', "{") + + self.assertAllRaise( + SyntaxError, + "f-string: valid expression required before '}'", + [ + "f'{\n}'", + ], + ) def test_invalid_backslashes_inside_fstring_context(self): # All of these variations are invalid python syntax, @@ -990,23 +1090,27 @@ def test_invalid_backslashes_inside_fstring_context(self): r"\\"[0], ] ] - self.assertAllRaise(SyntaxError, 'unexpected character after line continuation', - cases) + self.assertAllRaise( + SyntaxError, "unexpected character after line continuation", cases + ) def test_no_escapes_for_braces(self): """ Only literal curly braces begin an expression. """ # \x7b is '{'. - self.assertEqual(f'\x7b1+1}}', '{1+1}') - self.assertEqual(f'\x7b1+1', '{1+1') - self.assertEqual(f'\u007b1+1', '{1+1') - self.assertEqual(f'\N{LEFT CURLY BRACKET}1+1\N{RIGHT CURLY BRACKET}', '{1+1}') + self.assertEqual(f"\x7b1+1}}", "{1+1}") + self.assertEqual(f"\x7b1+1", "{1+1") + self.assertEqual(f"\u007b1+1", "{1+1") + self.assertEqual(f"\N{LEFT CURLY BRACKET}1+1\N{RIGHT CURLY BRACKET}", "{1+1}") def test_newlines_in_expressions(self): - self.assertEqual(f'{0}', '0') - self.assertEqual(rf'''{3+ -4}''', '7') + self.assertEqual(f"{0}", "0") + self.assertEqual( + rf"""{3+ +4}""", + "7", + ) def test_lambda(self): x = 5 @@ -1017,57 +1121,61 @@ def test_lambda(self): # lambda doesn't work without parens, because the colon # makes the parser think it's a format_spec # emit warning if we can match a format_spec - self.assertAllRaise(SyntaxError, - "f-string: lambda expressions are not allowed " - "without parentheses", - ["f'{lambda x:x}'", - "f'{lambda :x}'", - "f'{lambda *arg, :x}'", - "f'{1, lambda:x}'", - "f'{lambda x:}'", - "f'{lambda :}'", - ]) + self.assertAllRaise( + SyntaxError, + "f-string: lambda expressions are not allowed " "without parentheses", + [ + "f'{lambda x:x}'", + "f'{lambda :x}'", + "f'{lambda *arg, :x}'", + "f'{1, lambda:x}'", + "f'{lambda x:}'", + "f'{lambda :}'", + ], + ) # Ensure the detection of invalid lambdas doesn't trigger detection # for valid lambdas in the second error pass with self.assertRaisesRegex(SyntaxError, "invalid syntax"): compile("lambda name_3=f'{name_4}': {name_3}\n1 $ 1", "", "exec") # but don't emit the paren warning in general cases - with self.assertRaisesRegex(SyntaxError, "f-string: expecting a valid expression after '{'"): + with self.assertRaisesRegex( + SyntaxError, "f-string: expecting a valid expression after '{'" + ): eval("f'{+ lambda:None}'") def test_valid_prefixes(self): - self.assertEqual(F'{1}', "1") - self.assertEqual(FR'{2}', "2") - self.assertEqual(fR'{3}', "3") + self.assertEqual(f"{1}", "1") + self.assertEqual(Rf"{2}", "2") + self.assertEqual(Rf"{3}", "3") def test_roundtrip_raw_quotes(self): - self.assertEqual(fr"\'", "\\'") - self.assertEqual(fr'\"', '\\"') - self.assertEqual(fr'\"\'', '\\"\\\'') - self.assertEqual(fr'\'\"', '\\\'\\"') - self.assertEqual(fr'\"\'\"', '\\"\\\'\\"') - self.assertEqual(fr'\'\"\'', '\\\'\\"\\\'') - self.assertEqual(fr'\"\'\"\'', '\\"\\\'\\"\\\'') + self.assertEqual(rf"\'", "\\'") + self.assertEqual(rf"\"", '\\"') + self.assertEqual(rf"\"\'", "\\\"\\'") + self.assertEqual(rf"\'\"", "\\'\\\"") + self.assertEqual(rf"\"\'\"", '\\"\\\'\\"') + self.assertEqual(rf"\'\"\'", "\\'\\\"\\'") + self.assertEqual(rf"\"\'\"\'", "\\\"\\'\\\"\\'") def test_fstring_backslash_before_double_bracket(self): deprecated_cases = [ - (r"f'\{{\}}'", '\\{\\}'), - (r"f'\{{'", '\\{'), - (r"f'\{{{1+1}'", '\\{2'), - (r"f'\}}{1+1}'", '\\}2'), - (r"f'{1+1}\}}'", '2\\}') + (r"f'\{{\}}'", "\\{\\}"), + (r"f'\{{'", "\\{"), + (r"f'\{{{1+1}'", "\\{2"), + (r"f'\}}{1+1}'", "\\}2"), + (r"f'{1+1}\}}'", "2\\}"), ] for case, expected_result in deprecated_cases: with self.subTest(case=case, expected_result=expected_result): with self.assertWarns(SyntaxWarning): result = eval(case) self.assertEqual(result, expected_result) - self.assertEqual(fr'\{{\}}', '\\{\\}') - self.assertEqual(fr'\{{', '\\{') - self.assertEqual(fr'\{{{1+1}', '\\{2') - self.assertEqual(fr'\}}{1+1}', '\\}2') - self.assertEqual(fr'{1+1}\}}', '2\\}') + self.assertEqual(rf"\{{\}}", "\\{\\}") + self.assertEqual(rf"\{{", "\\{") + self.assertEqual(rf"\{{{1+1}", "\\{2") + self.assertEqual(rf"\}}{1+1}", "\\}2") + self.assertEqual(rf"{1+1}\}}", "2\\}") def test_fstring_backslash_before_double_bracket_warns_once(self): with self.assertWarns(SyntaxWarning) as w: @@ -1076,18 +1184,18 @@ def test_fstring_backslash_before_double_bracket_warns_once(self): self.assertEqual(w.warnings[0].category, SyntaxWarning) def test_fstring_backslash_prefix_raw(self): - self.assertEqual(f'\\', '\\') - self.assertEqual(f'\\\\', '\\\\') - self.assertEqual(fr'\\', r'\\') - self.assertEqual(fr'\\\\', r'\\\\') - self.assertEqual(rf'\\', r'\\') - self.assertEqual(rf'\\\\', r'\\\\') - self.assertEqual(Rf'\\', R'\\') - self.assertEqual(Rf'\\\\', R'\\\\') - self.assertEqual(fR'\\', R'\\') - self.assertEqual(fR'\\\\', R'\\\\') - self.assertEqual(FR'\\', R'\\') - self.assertEqual(FR'\\\\', R'\\\\') + self.assertEqual(f"\\", "\\") + self.assertEqual(f"\\\\", "\\\\") + self.assertEqual(rf"\\", r"\\") + self.assertEqual(rf"\\\\", r"\\\\") + self.assertEqual(rf"\\", r"\\") + self.assertEqual(rf"\\\\", r"\\\\") + self.assertEqual(Rf"\\", R"\\") + self.assertEqual(Rf"\\\\", R"\\\\") + self.assertEqual(Rf"\\", R"\\") + self.assertEqual(Rf"\\\\", R"\\\\") + self.assertEqual(Rf"\\", R"\\") + self.assertEqual(Rf"\\\\", R"\\\\") def test_fstring_format_spec_greedy_matching(self): self.assertEqual(f"{1:}}}", "1}") @@ -1097,8 +1205,8 @@ def test_yield(self): # Not terribly useful, but make sure the yield turns # a function into a generator def fn(y): - f'y:{yield y*2}' - f'{yield}' + f"y:{yield y*2}" + f"{yield}" g = fn(4) self.assertEqual(next(g), 8) @@ -1106,15 +1214,15 @@ def fn(y): def test_yield_send(self): def fn(x): - yield f'x:{yield (lambda i: x * i)}' + yield f"x:{yield (lambda i: x * i)}" g = fn(10) the_lambda = next(g) self.assertEqual(the_lambda(4), 40) - self.assertEqual(g.send('string'), 'x:string') + self.assertEqual(g.send("string"), "x:string") def test_expressions_with_triple_quoted_strings(self): - self.assertEqual(f"{'''x'''}", 'x') + self.assertEqual(f"{'''x'''}", "x") self.assertEqual(f"{'''eric's'''}", "eric's") # Test concatenation within an expression @@ -1127,263 +1235,302 @@ def test_expressions_with_triple_quoted_strings(self): def test_multiple_vars(self): x = 98 - y = 'abc' - self.assertEqual(f'{x}{y}', '98abc') + y = "abc" + self.assertEqual(f"{x}{y}", "98abc") - self.assertEqual(f'X{x}{y}', 'X98abc') - self.assertEqual(f'{x}X{y}', '98Xabc') - self.assertEqual(f'{x}{y}X', '98abcX') + self.assertEqual(f"X{x}{y}", "X98abc") + self.assertEqual(f"{x}X{y}", "98Xabc") + self.assertEqual(f"{x}{y}X", "98abcX") - self.assertEqual(f'X{x}Y{y}', 'X98Yabc') - self.assertEqual(f'X{x}{y}Y', 'X98abcY') - self.assertEqual(f'{x}X{y}Y', '98XabcY') + self.assertEqual(f"X{x}Y{y}", "X98Yabc") + self.assertEqual(f"X{x}{y}Y", "X98abcY") + self.assertEqual(f"{x}X{y}Y", "98XabcY") - self.assertEqual(f'X{x}Y{y}Z', 'X98YabcZ') + self.assertEqual(f"X{x}Y{y}Z", "X98YabcZ") def test_closure(self): def outer(x): def inner(): - return f'x:{x}' + return f"x:{x}" + return inner - self.assertEqual(outer('987')(), 'x:987') - self.assertEqual(outer(7)(), 'x:7') + self.assertEqual(outer("987")(), "x:987") + self.assertEqual(outer(7)(), "x:7") def test_arguments(self): y = 2 + def f(x, width): - return f'x={x*y:{width}}' + return f"x={x*y:{width}}" - self.assertEqual(f('foo', 10), 'x=foofoo ') - x = 'bar' - self.assertEqual(f(10, 10), 'x= 20') + self.assertEqual(f("foo", 10), "x=foofoo ") + x = "bar" + self.assertEqual(f(10, 10), "x= 20") def test_locals(self): value = 123 - self.assertEqual(f'v:{value}', 'v:123') + self.assertEqual(f"v:{value}", "v:123") def test_missing_variable(self): with self.assertRaises(NameError): - f'v:{value}' + f"v:{value}" def test_missing_format_spec(self): class O: def __format__(self, spec): if not spec: - return '*' + return "*" return spec - self.assertEqual(f'{O():x}', 'x') - self.assertEqual(f'{O()}', '*') - self.assertEqual(f'{O():}', '*') + self.assertEqual(f"{O():x}", "x") + self.assertEqual(f"{O()}", "*") + self.assertEqual(f"{O():}", "*") - self.assertEqual(f'{3:}', '3') - self.assertEqual(f'{3!s:}', '3') + self.assertEqual(f"{3:}", "3") + self.assertEqual(f"{3!s:}", "3") def test_global(self): - self.assertEqual(f'g:{a_global}', 'g:global variable') - self.assertEqual(f'g:{a_global!r}', "g:'global variable'") + self.assertEqual(f"g:{a_global}", "g:global variable") + self.assertEqual(f"g:{a_global!r}", "g:'global variable'") - a_local = 'local variable' - self.assertEqual(f'g:{a_global} l:{a_local}', - 'g:global variable l:local variable') - self.assertEqual(f'g:{a_global!r}', - "g:'global variable'") - self.assertEqual(f'g:{a_global} l:{a_local!r}', - "g:global variable l:'local variable'") + a_local = "local variable" + self.assertEqual( + f"g:{a_global} l:{a_local}", "g:global variable l:local variable" + ) + self.assertEqual(f"g:{a_global!r}", "g:'global variable'") + self.assertEqual( + f"g:{a_global} l:{a_local!r}", "g:global variable l:'local variable'" + ) - self.assertIn("module 'unittest' from", f'{unittest}') + self.assertIn("module 'unittest' from", f"{unittest}") def test_shadowed_global(self): - a_global = 'really a local' - self.assertEqual(f'g:{a_global}', 'g:really a local') - self.assertEqual(f'g:{a_global!r}', "g:'really a local'") - - a_local = 'local variable' - self.assertEqual(f'g:{a_global} l:{a_local}', - 'g:really a local l:local variable') - self.assertEqual(f'g:{a_global!r}', - "g:'really a local'") - self.assertEqual(f'g:{a_global} l:{a_local!r}', - "g:really a local l:'local variable'") + a_global = "really a local" + self.assertEqual(f"g:{a_global}", "g:really a local") + self.assertEqual(f"g:{a_global!r}", "g:'really a local'") + + a_local = "local variable" + self.assertEqual( + f"g:{a_global} l:{a_local}", "g:really a local l:local variable" + ) + self.assertEqual(f"g:{a_global!r}", "g:'really a local'") + self.assertEqual( + f"g:{a_global} l:{a_local!r}", "g:really a local l:'local variable'" + ) def test_call(self): def foo(x): - return 'x=' + str(x) + return "x=" + str(x) - self.assertEqual(f'{foo(10)}', 'x=10') + self.assertEqual(f"{foo(10)}", "x=10") def test_nested_fstrings(self): y = 5 - self.assertEqual(f'{f"{0}"*3}', '000') - self.assertEqual(f'{f"{y}"*3}', '555') + self.assertEqual(f'{f"{0}"*3}', "000") + self.assertEqual(f'{f"{y}"*3}', "555") def test_invalid_string_prefixes(self): - single_quote_cases = ["fu''", - "uf''", - "Fu''", - "fU''", - "Uf''", - "uF''", - "ufr''", - "urf''", - "fur''", - "fru''", - "rfu''", - "ruf''", - "FUR''", - "Fur''", - "fb''", - "fB''", - "Fb''", - "FB''", - "bf''", - "bF''", - "Bf''", - "BF''",] + single_quote_cases = [ + "fu''", + "uf''", + "Fu''", + "fU''", + "Uf''", + "uF''", + "ufr''", + "urf''", + "fur''", + "fru''", + "rfu''", + "ruf''", + "FUR''", + "Fur''", + "fb''", + "fB''", + "Fb''", + "FB''", + "bf''", + "bF''", + "Bf''", + "BF''", + ] double_quote_cases = [case.replace("'", '"') for case in single_quote_cases] - self.assertAllRaise(SyntaxError, 'invalid syntax', - single_quote_cases + double_quote_cases) + self.assertAllRaise( + SyntaxError, "invalid syntax", single_quote_cases + double_quote_cases + ) def test_leading_trailing_spaces(self): - self.assertEqual(f'{ 3}', '3') - self.assertEqual(f'{ 3}', '3') - self.assertEqual(f'{3 }', '3') - self.assertEqual(f'{3 }', '3') + self.assertEqual(f"{ 3}", "3") + self.assertEqual(f"{ 3}", "3") + self.assertEqual(f"{3 }", "3") + self.assertEqual(f"{3 }", "3") - self.assertEqual(f'expr={ {x: y for x, y in [(1, 2), ]}}', - 'expr={1: 2}') - self.assertEqual(f'expr={ {x: y for x, y in [(1, 2), ]} }', - 'expr={1: 2}') + self.assertEqual(f"expr={ {x: y for x, y in [(1, 2), ]}}", "expr={1: 2}") + self.assertEqual(f"expr={ {x: y for x, y in [(1, 2), ]} }", "expr={1: 2}") def test_not_equal(self): # There's a special test for this because there's a special # case in the f-string parser to look for != as not ending an # expression. Normally it would, while looking for !s or !r. - self.assertEqual(f'{3!=4}', 'True') - self.assertEqual(f'{3!=4:}', 'True') - self.assertEqual(f'{3!=4!s}', 'True') - self.assertEqual(f'{3!=4!s:.3}', 'Tru') + self.assertEqual(f"{3!=4}", "True") + self.assertEqual(f"{3!=4:}", "True") + self.assertEqual(f"{3!=4!s}", "True") + self.assertEqual(f"{3!=4!s:.3}", "Tru") def test_equal_equal(self): # Because an expression ending in = has special meaning, # there's a special test for ==. Make sure it works. - self.assertEqual(f'{0==1}', 'False') + self.assertEqual(f"{0==1}", "False") def test_conversions(self): - self.assertEqual(f'{3.14:10.10}', ' 3.14') - self.assertEqual(f'{3.14!s:10.10}', '3.14 ') - self.assertEqual(f'{3.14!r:10.10}', '3.14 ') - self.assertEqual(f'{3.14!a:10.10}', '3.14 ') + self.assertEqual(f"{3.14:10.10}", " 3.14") + self.assertEqual(f"{3.14!s:10.10}", "3.14 ") + self.assertEqual(f"{3.14!r:10.10}", "3.14 ") + self.assertEqual(f"{3.14!a:10.10}", "3.14 ") - self.assertEqual(f'{"a"}', 'a') + self.assertEqual(f'{"a"}', "a") self.assertEqual(f'{"a"!r}', "'a'") self.assertEqual(f'{"a"!a}', "'a'") # Conversions can have trailing whitespace after them since it # does not provide any significance self.assertEqual(f"{3!s }", "3") - self.assertEqual(f'{3.14!s :10.10}', '3.14 ') + self.assertEqual(f"{3.14!s :10.10}", "3.14 ") # Not a conversion. self.assertEqual(f'{"a!r"}', "a!r") # Not a conversion, but show that ! is allowed in a format spec. - self.assertEqual(f'{3.14:!<10.10}', '3.14!!!!!!') - - self.assertAllRaise(SyntaxError, "f-string: expecting '}'", - ["f'{3!'", - "f'{3!s'", - "f'{3!g'", - ]) - - self.assertAllRaise(SyntaxError, 'f-string: missing conversion character', - ["f'{3!}'", - "f'{3!:'", - "f'{3!:}'", - ]) - - for conv_identifier in 'g', 'A', 'G', 'ä', 'ɐ': - self.assertAllRaise(SyntaxError, - "f-string: invalid conversion character %r: " - "expected 's', 'r', or 'a'" % conv_identifier, - ["f'{3!" + conv_identifier + "}'"]) - - for conv_non_identifier in '3', '!': - self.assertAllRaise(SyntaxError, - "f-string: invalid conversion character", - ["f'{3!" + conv_non_identifier + "}'"]) - - for conv in ' s', ' s ': - self.assertAllRaise(SyntaxError, - "f-string: conversion type must come right after the" - " exclamanation mark", - ["f'{3!" + conv + "}'"]) - - self.assertAllRaise(SyntaxError, - "f-string: invalid conversion character 'ss': " - "expected 's', 'r', or 'a'", - ["f'{3!ss}'", - "f'{3!ss:}'", - "f'{3!ss:s}'", - ]) + self.assertEqual(f"{3.14:!<10.10}", "3.14!!!!!!") + + self.assertAllRaise( + SyntaxError, + "f-string: expecting '}'", + [ + "f'{3!'", + "f'{3!s'", + "f'{3!g'", + ], + ) + + self.assertAllRaise( + SyntaxError, + "f-string: missing conversion character", + [ + "f'{3!}'", + "f'{3!:'", + "f'{3!:}'", + ], + ) + + for conv_identifier in "g", "A", "G", "ä", "ɐ": + self.assertAllRaise( + SyntaxError, + "f-string: invalid conversion character %r: " + "expected 's', 'r', or 'a'" % conv_identifier, + ["f'{3!" + conv_identifier + "}'"], + ) + + for conv_non_identifier in "3", "!": + self.assertAllRaise( + SyntaxError, + "f-string: invalid conversion character", + ["f'{3!" + conv_non_identifier + "}'"], + ) + + for conv in " s", " s ": + self.assertAllRaise( + SyntaxError, + "f-string: conversion type must come right after the" + " exclamanation mark", + ["f'{3!" + conv + "}'"], + ) + + self.assertAllRaise( + SyntaxError, + "f-string: invalid conversion character 'ss': " "expected 's', 'r', or 'a'", + [ + "f'{3!ss}'", + "f'{3!ss:}'", + "f'{3!ss:s}'", + ], + ) def test_assignment(self): - self.assertAllRaise(SyntaxError, r'invalid syntax', - ["f'' = 3", - "f'{0}' = x", - "f'{x}' = x", - ]) + self.assertAllRaise( + SyntaxError, + r"invalid syntax", + [ + "f'' = 3", + "f'{0}' = x", + "f'{x}' = x", + ], + ) def test_del(self): - self.assertAllRaise(SyntaxError, 'invalid syntax', - ["del f''", - "del '' f''", - ]) + self.assertAllRaise( + SyntaxError, + "invalid syntax", + [ + "del f''", + "del '' f''", + ], + ) def test_mismatched_braces(self): - self.assertAllRaise(SyntaxError, "f-string: single '}' is not allowed", - ["f'{{}'", - "f'{{}}}'", - "f'}'", - "f'x}'", - "f'x}x'", - r"f'\u007b}'", - - # Can't have { or } in a format spec. - "f'{3:}>10}'", - "f'{3:}}>10}'", - ]) - - self.assertAllRaise(SyntaxError, "f-string: expecting '}'", - ["f'{3'", - "f'{3!'", - "f'{3:'", - "f'{3!s'", - "f'{3!s:'", - "f'{3!s:3'", - "f'x{'", - "f'x{x'", - "f'{x'", - "f'{3:s'", - "f'{{{'", - "f'{{}}{'", - "f'{'", - "f'{i='", # See gh-93418. - ]) - - self.assertAllRaise(SyntaxError, - "f-string: expecting a valid expression after '{'", - ["f'{3:{{>10}'", - ]) + self.assertAllRaise( + SyntaxError, + "f-string: single '}' is not allowed", + [ + "f'{{}'", + "f'{{}}}'", + "f'}'", + "f'x}'", + "f'x}x'", + r"f'\u007b}'", + # Can't have { or } in a format spec. + "f'{3:}>10}'", + "f'{3:}}>10}'", + ], + ) + + self.assertAllRaise( + SyntaxError, + "f-string: expecting '}'", + [ + "f'{3'", + "f'{3!'", + "f'{3:'", + "f'{3!s'", + "f'{3!s:'", + "f'{3!s:3'", + "f'x{'", + "f'x{x'", + "f'{x'", + "f'{3:s'", + "f'{{{'", + "f'{{}}{'", + "f'{'", + "f'{i='", # See gh-93418. + ], + ) + + self.assertAllRaise( + SyntaxError, + "f-string: expecting a valid expression after '{'", + [ + "f'{3:{{>10}'", + ], + ) # But these are just normal strings. - self.assertEqual(f'{"{"}', '{') - self.assertEqual(f'{"}"}', '}') - self.assertEqual(f'{3:{"}"}>10}', '}}}}}}}}}3') - self.assertEqual(f'{2:{"{"}>10}', '{{{{{{{{{2') + self.assertEqual(f'{"{"}', "{") + self.assertEqual(f'{"}"}', "}") + self.assertEqual(f'{3:{"}"}>10}', "}}}}}}}}}3") + self.assertEqual(f'{2:{"{"}>10}', "{{{{{{{{{2") def test_if_conditional(self): # There's special logic in compile.c to test if the @@ -1392,7 +1539,7 @@ def test_if_conditional(self): def test_fstring(x, expected): flag = 0 - if f'{x}': + if f"{x}": flag = 1 else: flag = 2 @@ -1400,7 +1547,7 @@ def test_fstring(x, expected): def test_concat_empty(x, expected): flag = 0 - if '' f'{x}': + if "" f"{x}": flag = 1 else: flag = 2 @@ -1408,137 +1555,149 @@ def test_concat_empty(x, expected): def test_concat_non_empty(x, expected): flag = 0 - if ' ' f'{x}': + if " " f"{x}": flag = 1 else: flag = 2 self.assertEqual(flag, expected) - test_fstring('', 2) - test_fstring(' ', 1) + test_fstring("", 2) + test_fstring(" ", 1) - test_concat_empty('', 2) - test_concat_empty(' ', 1) + test_concat_empty("", 2) + test_concat_empty(" ", 1) - test_concat_non_empty('', 1) - test_concat_non_empty(' ', 1) + test_concat_non_empty("", 1) + test_concat_non_empty(" ", 1) def test_empty_format_specifier(self): - x = 'test' - self.assertEqual(f'{x}', 'test') - self.assertEqual(f'{x:}', 'test') - self.assertEqual(f'{x!s:}', 'test') - self.assertEqual(f'{x!r:}', "'test'") + x = "test" + self.assertEqual(f"{x}", "test") + self.assertEqual(f"{x:}", "test") + self.assertEqual(f"{x!s:}", "test") + self.assertEqual(f"{x!r:}", "'test'") def test_str_format_differences(self): - d = {'a': 'string', - 0: 'integer', - } + d = { + "a": "string", + 0: "integer", + } a = 0 - self.assertEqual(f'{d[0]}', 'integer') - self.assertEqual(f'{d["a"]}', 'string') - self.assertEqual(f'{d[a]}', 'integer') - self.assertEqual('{d[a]}'.format(d=d), 'string') - self.assertEqual('{d[0]}'.format(d=d), 'integer') + self.assertEqual(f"{d[0]}", "integer") + self.assertEqual(f'{d["a"]}', "string") + self.assertEqual(f"{d[a]}", "integer") + self.assertEqual("{d[a]}".format(d=d), "string") + self.assertEqual("{d[0]}".format(d=d), "integer") def test_errors(self): # see issue 26287 - self.assertAllRaise(TypeError, 'unsupported', - [r"f'{(lambda: 0):x}'", - r"f'{(0,):x}'", - ]) - self.assertAllRaise(ValueError, 'Unknown format code', - [r"f'{1000:j}'", - r"f'{1000:j}'", - ]) + self.assertAllRaise( + TypeError, + "unsupported", + [ + r"f'{(lambda: 0):x}'", + r"f'{(0,):x}'", + ], + ) + self.assertAllRaise( + ValueError, + "Unknown format code", + [ + r"f'{1000:j}'", + r"f'{1000:j}'", + ], + ) def test_filename_in_syntaxerror(self): # see issue 38964 with temp_cwd() as cwd: - file_path = os.path.join(cwd, 't.py') - with open(file_path, 'w', encoding="utf-8") as f: - f.write('f"{a b}"') # This generates a SyntaxError - _, _, stderr = assert_python_failure(file_path, - PYTHONIOENCODING='ascii') - self.assertIn(file_path.encode('ascii', 'backslashreplace'), stderr) + file_path = os.path.join(cwd, "t.py") + with open(file_path, "w", encoding="utf-8") as f: + f.write('f"{a b}"') # This generates a SyntaxError + _, _, stderr = assert_python_failure(file_path, PYTHONIOENCODING="ascii") + self.assertIn(file_path.encode("ascii", "backslashreplace"), stderr) def test_loop(self): for i in range(1000): - self.assertEqual(f'i:{i}', 'i:' + str(i)) + self.assertEqual(f"i:{i}", "i:" + str(i)) def test_dict(self): - d = {'"': 'dquote', - "'": 'squote', - 'foo': 'bar', - } - self.assertEqual(f'''{d["'"]}''', 'squote') - self.assertEqual(f"""{d['"']}""", 'dquote') + d = { + '"': "dquote", + "'": "squote", + "foo": "bar", + } + self.assertEqual(f"""{d["'"]}""", "squote") + self.assertEqual(f"""{d['"']}""", "dquote") - self.assertEqual(f'{d["foo"]}', 'bar') - self.assertEqual(f"{d['foo']}", 'bar') + self.assertEqual(f'{d["foo"]}', "bar") + self.assertEqual(f"{d['foo']}", "bar") def test_backslash_char(self): # Check eval of a backslash followed by a control char. # See bpo-30682: this used to raise an assert in pydebug mode. - self.assertEqual(eval('f"\\\n"'), '') - self.assertEqual(eval('f"\\\r"'), '') + self.assertEqual(eval('f"\\\n"'), "") + self.assertEqual(eval('f"\\\r"'), "") def test_debug_conversion(self): - x = 'A string' - self.assertEqual(f'{x=}', 'x=' + repr(x)) - self.assertEqual(f'{x =}', 'x =' + repr(x)) - self.assertEqual(f'{x=!s}', 'x=' + str(x)) - self.assertEqual(f'{x=!r}', 'x=' + repr(x)) - self.assertEqual(f'{x=!a}', 'x=' + ascii(x)) + x = "A string" + self.assertEqual(f"{x=}", "x=" + repr(x)) + self.assertEqual(f"{x =}", "x =" + repr(x)) + self.assertEqual(f"{x=!s}", "x=" + str(x)) + self.assertEqual(f"{x=!r}", "x=" + repr(x)) + self.assertEqual(f"{x=!a}", "x=" + ascii(x)) x = 2.71828 - self.assertEqual(f'{x=:.2f}', 'x=' + format(x, '.2f')) - self.assertEqual(f'{x=:}', 'x=' + format(x, '')) - self.assertEqual(f'{x=!r:^20}', 'x=' + format(repr(x), '^20')) - self.assertEqual(f'{x=!s:^20}', 'x=' + format(str(x), '^20')) - self.assertEqual(f'{x=!a:^20}', 'x=' + format(ascii(x), '^20')) + self.assertEqual(f"{x=:.2f}", "x=" + format(x, ".2f")) + self.assertEqual(f"{x=:}", "x=" + format(x, "")) + self.assertEqual(f"{x=!r:^20}", "x=" + format(repr(x), "^20")) + self.assertEqual(f"{x=!s:^20}", "x=" + format(str(x), "^20")) + self.assertEqual(f"{x=!a:^20}", "x=" + format(ascii(x), "^20")) x = 9 - self.assertEqual(f'{3*x+15=}', '3*x+15=42') + self.assertEqual(f"{3*x+15=}", "3*x+15=42") # There is code in ast.c that deals with non-ascii expression values. So, # use a unicode identifier to trigger that. tenπ = 31.4 - self.assertEqual(f'{tenπ=:.2f}', 'tenπ=31.40') + self.assertEqual(f"{tenπ=:.2f}", "tenπ=31.40") # Also test with Unicode in non-identifiers. - self.assertEqual(f'{"Σ"=}', '"Σ"=\'Σ\'') + self.assertEqual(f'{"Σ"=}', "\"Σ\"='Σ'") # Make sure nested fstrings still work. - self.assertEqual(f'{f"{3.1415=:.1f}":*^20}', '*****3.1415=3.1*****') + self.assertEqual(f'{f"{3.1415=:.1f}":*^20}', "*****3.1415=3.1*****") # Make sure text before and after an expression with = works # correctly. - pi = 'π' - self.assertEqual(f'alpha α {pi=} ω omega', "alpha α pi='π' ω omega") + pi = "π" + self.assertEqual(f"alpha α {pi=} ω omega", "alpha α pi='π' ω omega") # Check multi-line expressions. - self.assertEqual(f'''{ + self.assertEqual( + f"""{ 3 -=}''', '\n3\n=3') +=}""", + "\n3\n=3", + ) # Since = is handled specially, make sure all existing uses of # it still work. - self.assertEqual(f'{0==1}', 'False') - self.assertEqual(f'{0!=1}', 'True') - self.assertEqual(f'{0<=1}', 'True') - self.assertEqual(f'{0>=1}', 'False') - self.assertEqual(f'{(x:="5")}', '5') - self.assertEqual(x, '5') - self.assertEqual(f'{(x:=5)}', '5') + self.assertEqual(f"{0==1}", "False") + self.assertEqual(f"{0!=1}", "True") + self.assertEqual(f"{0<=1}", "True") + self.assertEqual(f"{0>=1}", "False") + self.assertEqual(f'{(x:="5")}', "5") + self.assertEqual(x, "5") + self.assertEqual(f"{(x:=5)}", "5") self.assertEqual(x, 5) - self.assertEqual(f'{"="}', '=') + self.assertEqual(f'{"="}', "=") x = 20 # This isn't an assignment expression, it's 'x', with a format # spec of '=10'. See test_walrus: you need to use parens. - self.assertEqual(f'{x:=10}', ' 20') + self.assertEqual(f"{x:=10}", " 20") # Test named function parameters, to make sure '=' parsing works # there. @@ -1547,40 +1706,53 @@ def f(a): oldx = x x = a return oldx + x = 0 - self.assertEqual(f'{f(a="3=")}', '0') - self.assertEqual(x, '3=') - self.assertEqual(f'{f(a=4)}', '3=') + self.assertEqual(f'{f(a="3=")}', "0") + self.assertEqual(x, "3=") + self.assertEqual(f"{f(a=4)}", "3=") self.assertEqual(x, 4) + # Check debug expressions in format spec + y = 20 + self.assertEqual(f"{2:{y=}}", "yyyyyyyyyyyyyyyyyyy2") + self.assertEqual( + f"{datetime.datetime.now():h1{y=}h2{y=}h3{y=}}", "h1y=20h2y=20h3y=20" + ) + # Make sure __format__ is being called. class C: def __format__(self, s): - return f'FORMAT-{s}' + return f"FORMAT-{s}" + def __repr__(self): - return 'REPR' + return "REPR" - self.assertEqual(f'{C()=}', 'C()=REPR') - self.assertEqual(f'{C()=!r}', 'C()=REPR') - self.assertEqual(f'{C()=:}', 'C()=FORMAT-') - self.assertEqual(f'{C()=: }', 'C()=FORMAT- ') - self.assertEqual(f'{C()=:x}', 'C()=FORMAT-x') - self.assertEqual(f'{C()=!r:*^20}', 'C()=********REPR********') + self.assertEqual(f"{C()=}", "C()=REPR") + self.assertEqual(f"{C()=!r}", "C()=REPR") + self.assertEqual(f"{C()=:}", "C()=FORMAT-") + self.assertEqual(f"{C()=: }", "C()=FORMAT- ") + self.assertEqual(f"{C()=:x}", "C()=FORMAT-x") + self.assertEqual(f"{C()=!r:*^20}", "C()=********REPR********") + self.assertEqual(f"{C():{20=}}", "FORMAT-20=20") self.assertRaises(SyntaxError, eval, "f'{C=]'") # Make sure leading and following text works. - x = 'foo' - self.assertEqual(f'X{x=}Y', 'Xx='+repr(x)+'Y') + x = "foo" + self.assertEqual(f"X{x=}Y", "Xx=" + repr(x) + "Y") # Make sure whitespace around the = works. - self.assertEqual(f'X{x =}Y', 'Xx ='+repr(x)+'Y') - self.assertEqual(f'X{x= }Y', 'Xx= '+repr(x)+'Y') - self.assertEqual(f'X{x = }Y', 'Xx = '+repr(x)+'Y') + self.assertEqual(f"X{x =}Y", "Xx =" + repr(x) + "Y") + self.assertEqual(f"X{x= }Y", "Xx= " + repr(x) + "Y") + self.assertEqual(f"X{x = }Y", "Xx = " + repr(x) + "Y") self.assertEqual(f"sadsd {1 + 1 = :{1 + 1:1d}f}", "sadsd 1 + 1 = 2.000000") - self.assertEqual(f"{1+2 = # my comment - }", '1+2 = \n 3') + self.assertEqual( + f"{1+2 = # my comment + }", + "1+2 = \n 3", + ) # These next lines contains tabs. Backslash escapes don't # work in f-strings. @@ -1588,56 +1760,59 @@ def __repr__(self): # this will be to dynamically created and exec the f-strings. But # that's such a hassle I'll save it for another day. For now, convert # the tabs to spaces just to shut up patchcheck. - #self.assertEqual(f'X{x =}Y', 'Xx\t='+repr(x)+'Y') - #self.assertEqual(f'X{x = }Y', 'Xx\t=\t'+repr(x)+'Y') + # self.assertEqual(f'X{x =}Y', 'Xx\t='+repr(x)+'Y') + # self.assertEqual(f'X{x = }Y', 'Xx\t=\t'+repr(x)+'Y') def test_walrus(self): x = 20 # This isn't an assignment expression, it's 'x', with a format # spec of '=10'. - self.assertEqual(f'{x:=10}', ' 20') + self.assertEqual(f"{x:=10}", " 20") # This is an assignment expression, which requires parens. - self.assertEqual(f'{(x:=10)}', '10') + self.assertEqual(f"{(x:=10)}", "10") self.assertEqual(x, 10) def test_invalid_syntax_error_message(self): - with self.assertRaisesRegex(SyntaxError, - "f-string: expecting '=', or '!', or ':', or '}'"): + with self.assertRaisesRegex( + SyntaxError, "f-string: expecting '=', or '!', or ':', or '}'" + ): compile("f'{a $ b}'", "?", "exec") def test_with_two_commas_in_format_specifier(self): error_msg = re.escape("Cannot specify ',' with ','.") with self.assertRaisesRegex(ValueError, error_msg): - f'{1:,,}' + f"{1:,,}" def test_with_two_underscore_in_format_specifier(self): error_msg = re.escape("Cannot specify '_' with '_'.") with self.assertRaisesRegex(ValueError, error_msg): - f'{1:__}' + f"{1:__}" def test_with_a_commas_and_an_underscore_in_format_specifier(self): error_msg = re.escape("Cannot specify both ',' and '_'.") with self.assertRaisesRegex(ValueError, error_msg): - f'{1:,_}' + f"{1:,_}" def test_with_an_underscore_and_a_comma_in_format_specifier(self): error_msg = re.escape("Cannot specify both ',' and '_'.") with self.assertRaisesRegex(ValueError, error_msg): - f'{1:_,}' + f"{1:_,}" def test_syntax_error_for_starred_expressions(self): with self.assertRaisesRegex(SyntaxError, "can't use starred expression here"): compile("f'{*a}'", "?", "exec") - with self.assertRaisesRegex(SyntaxError, - "f-string: expecting a valid expression after '{'"): + with self.assertRaisesRegex( + SyntaxError, "f-string: expecting a valid expression after '{'" + ): compile("f'{**a}'", "?", "exec") def test_not_closing_quotes(self): self.assertAllRaise(SyntaxError, "unterminated f-string literal", ['f"', "f'"]) - self.assertAllRaise(SyntaxError, "unterminated triple-quoted f-string literal", - ['f"""', "f'''"]) + self.assertAllRaise( + SyntaxError, "unterminated triple-quoted f-string literal", ['f"""', "f'''"] + ) # Ensure that the errors are reported at the correct line number. data = '''\ x = 1 + 1 @@ -1653,42 +1828,52 @@ def test_not_closing_quotes(self): except SyntaxError as e: self.assertEqual(e.text, 'z = f"""') self.assertEqual(e.lineno, 3) + def test_syntax_error_after_debug(self): - self.assertAllRaise(SyntaxError, "f-string: expecting a valid expression after '{'", - [ - "f'{1=}{;'", - "f'{1=}{+;'", - "f'{1=}{2}{;'", - "f'{1=}{3}{;'", - ]) - self.assertAllRaise(SyntaxError, "f-string: expecting '=', or '!', or ':', or '}'", - [ - "f'{1=}{1;'", - "f'{1=}{1;}'", - ]) + self.assertAllRaise( + SyntaxError, + "f-string: expecting a valid expression after '{'", + [ + "f'{1=}{;'", + "f'{1=}{+;'", + "f'{1=}{2}{;'", + "f'{1=}{3}{;'", + ], + ) + self.assertAllRaise( + SyntaxError, + "f-string: expecting '=', or '!', or ':', or '}'", + [ + "f'{1=}{1;'", + "f'{1=}{1;}'", + ], + ) def test_debug_in_file(self): with temp_cwd(): - script = 'script.py' - with open('script.py', 'w') as f: + script = "script.py" + with open("script.py", "w") as f: f.write(f"""\ print(f'''{{ 3 =}}''')""") _, stdout, _ = assert_python_ok(script) - self.assertEqual(stdout.decode('utf-8').strip().replace('\r\n', '\n').replace('\r', '\n'), - "3\n=3") + self.assertEqual( + stdout.decode("utf-8").strip().replace("\r\n", "\n").replace("\r", "\n"), + "3\n=3", + ) def test_syntax_warning_infinite_recursion_in_file(self): with temp_cwd(): - script = 'script.py' - with open(script, 'w') as f: + script = "script.py" + with open(script, "w") as f: f.write(r"print(f'\{1}')") _, stdout, stderr = assert_python_ok(script) - self.assertIn(rb'\1', stdout) + self.assertIn(rb"\1", stdout) self.assertEqual(len(stderr.strip().splitlines()), 2) -if __name__ == '__main__': + +if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-06-29-10-46-14.gh-issue-121130.Rj66Xs.rst b/Misc/NEWS.d/next/Core and Builtins/2024-06-29-10-46-14.gh-issue-121130.Rj66Xs.rst new file mode 100644 index 00000000000000..7084f0cbebbb73 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-06-29-10-46-14.gh-issue-121130.Rj66Xs.rst @@ -0,0 +1,2 @@ +Fix f-strings with debug expressions in format specifiers. Patch by Pablo +Galindo diff --git a/Parser/action_helpers.c b/Parser/action_helpers.c index e68a9cac25908c..78b306afa6c19f 100644 --- a/Parser/action_helpers.c +++ b/Parser/action_helpers.c @@ -1,1680 +1,1599 @@ #include #include "pegen.h" -#include "tokenizer.h" +#include "pycore_runtime.h" // _PyRuntime #include "string_parser.h" -#include "pycore_runtime.h" // _PyRuntime +#include "tokenizer.h" -void * -_PyPegen_dummy_name(Parser *p, ...) -{ - return &_PyRuntime.parser.dummy_name; +void *_PyPegen_dummy_name(Parser *p, ...) { + return &_PyRuntime.parser.dummy_name; } /* Creates a single-element asdl_seq* that contains a */ -asdl_seq * -_PyPegen_singleton_seq(Parser *p, void *a) -{ - assert(a != NULL); - asdl_seq *seq = (asdl_seq*)_Py_asdl_generic_seq_new(1, p->arena); - if (!seq) { - return NULL; - } - asdl_seq_SET_UNTYPED(seq, 0, a); - return seq; +asdl_seq *_PyPegen_singleton_seq(Parser *p, void *a) { + assert(a != NULL); + asdl_seq *seq = (asdl_seq *)_Py_asdl_generic_seq_new(1, p->arena); + if (!seq) { + return NULL; + } + asdl_seq_SET_UNTYPED(seq, 0, a); + return seq; } /* Creates a copy of seq and prepends a to it */ -asdl_seq * -_PyPegen_seq_insert_in_front(Parser *p, void *a, asdl_seq *seq) -{ - assert(a != NULL); - if (!seq) { - return _PyPegen_singleton_seq(p, a); - } +asdl_seq *_PyPegen_seq_insert_in_front(Parser *p, void *a, asdl_seq *seq) { + assert(a != NULL); + if (!seq) { + return _PyPegen_singleton_seq(p, a); + } - asdl_seq *new_seq = (asdl_seq*)_Py_asdl_generic_seq_new(asdl_seq_LEN(seq) + 1, p->arena); - if (!new_seq) { - return NULL; - } + asdl_seq *new_seq = + (asdl_seq *)_Py_asdl_generic_seq_new(asdl_seq_LEN(seq) + 1, p->arena); + if (!new_seq) { + return NULL; + } - asdl_seq_SET_UNTYPED(new_seq, 0, a); - for (Py_ssize_t i = 1, l = asdl_seq_LEN(new_seq); i < l; i++) { - asdl_seq_SET_UNTYPED(new_seq, i, asdl_seq_GET_UNTYPED(seq, i - 1)); - } - return new_seq; + asdl_seq_SET_UNTYPED(new_seq, 0, a); + for (Py_ssize_t i = 1, l = asdl_seq_LEN(new_seq); i < l; i++) { + asdl_seq_SET_UNTYPED(new_seq, i, asdl_seq_GET_UNTYPED(seq, i - 1)); + } + return new_seq; } /* Creates a copy of seq and appends a to it */ -asdl_seq * -_PyPegen_seq_append_to_end(Parser *p, asdl_seq *seq, void *a) -{ - assert(a != NULL); - if (!seq) { - return _PyPegen_singleton_seq(p, a); - } - - asdl_seq *new_seq = (asdl_seq*)_Py_asdl_generic_seq_new(asdl_seq_LEN(seq) + 1, p->arena); - if (!new_seq) { - return NULL; - } - - for (Py_ssize_t i = 0, l = asdl_seq_LEN(new_seq); i + 1 < l; i++) { - asdl_seq_SET_UNTYPED(new_seq, i, asdl_seq_GET_UNTYPED(seq, i)); - } - asdl_seq_SET_UNTYPED(new_seq, asdl_seq_LEN(new_seq) - 1, a); - return new_seq; -} - -static Py_ssize_t -_get_flattened_seq_size(asdl_seq *seqs) -{ - Py_ssize_t size = 0; - for (Py_ssize_t i = 0, l = asdl_seq_LEN(seqs); i < l; i++) { - asdl_seq *inner_seq = asdl_seq_GET_UNTYPED(seqs, i); - size += asdl_seq_LEN(inner_seq); - } - return size; +asdl_seq *_PyPegen_seq_append_to_end(Parser *p, asdl_seq *seq, void *a) { + assert(a != NULL); + if (!seq) { + return _PyPegen_singleton_seq(p, a); + } + + asdl_seq *new_seq = + (asdl_seq *)_Py_asdl_generic_seq_new(asdl_seq_LEN(seq) + 1, p->arena); + if (!new_seq) { + return NULL; + } + + for (Py_ssize_t i = 0, l = asdl_seq_LEN(new_seq); i + 1 < l; i++) { + asdl_seq_SET_UNTYPED(new_seq, i, asdl_seq_GET_UNTYPED(seq, i)); + } + asdl_seq_SET_UNTYPED(new_seq, asdl_seq_LEN(new_seq) - 1, a); + return new_seq; +} + +static Py_ssize_t _get_flattened_seq_size(asdl_seq *seqs) { + Py_ssize_t size = 0; + for (Py_ssize_t i = 0, l = asdl_seq_LEN(seqs); i < l; i++) { + asdl_seq *inner_seq = asdl_seq_GET_UNTYPED(seqs, i); + size += asdl_seq_LEN(inner_seq); + } + return size; } /* Flattens an asdl_seq* of asdl_seq*s */ -asdl_seq * -_PyPegen_seq_flatten(Parser *p, asdl_seq *seqs) -{ - Py_ssize_t flattened_seq_size = _get_flattened_seq_size(seqs); - assert(flattened_seq_size > 0); - - asdl_seq *flattened_seq = (asdl_seq*)_Py_asdl_generic_seq_new(flattened_seq_size, p->arena); - if (!flattened_seq) { - return NULL; - } +asdl_seq *_PyPegen_seq_flatten(Parser *p, asdl_seq *seqs) { + Py_ssize_t flattened_seq_size = _get_flattened_seq_size(seqs); + assert(flattened_seq_size > 0); - int flattened_seq_idx = 0; - for (Py_ssize_t i = 0, l = asdl_seq_LEN(seqs); i < l; i++) { - asdl_seq *inner_seq = asdl_seq_GET_UNTYPED(seqs, i); - for (Py_ssize_t j = 0, li = asdl_seq_LEN(inner_seq); j < li; j++) { - asdl_seq_SET_UNTYPED(flattened_seq, flattened_seq_idx++, asdl_seq_GET_UNTYPED(inner_seq, j)); - } + asdl_seq *flattened_seq = + (asdl_seq *)_Py_asdl_generic_seq_new(flattened_seq_size, p->arena); + if (!flattened_seq) { + return NULL; + } + + int flattened_seq_idx = 0; + for (Py_ssize_t i = 0, l = asdl_seq_LEN(seqs); i < l; i++) { + asdl_seq *inner_seq = asdl_seq_GET_UNTYPED(seqs, i); + for (Py_ssize_t j = 0, li = asdl_seq_LEN(inner_seq); j < li; j++) { + asdl_seq_SET_UNTYPED(flattened_seq, flattened_seq_idx++, + asdl_seq_GET_UNTYPED(inner_seq, j)); } - assert(flattened_seq_idx == flattened_seq_size); + } + assert(flattened_seq_idx == flattened_seq_size); - return flattened_seq; + return flattened_seq; } -void * -_PyPegen_seq_last_item(asdl_seq *seq) -{ - Py_ssize_t len = asdl_seq_LEN(seq); - return asdl_seq_GET_UNTYPED(seq, len - 1); +void *_PyPegen_seq_last_item(asdl_seq *seq) { + Py_ssize_t len = asdl_seq_LEN(seq); + return asdl_seq_GET_UNTYPED(seq, len - 1); } -void * -_PyPegen_seq_first_item(asdl_seq *seq) -{ - return asdl_seq_GET_UNTYPED(seq, 0); +void *_PyPegen_seq_first_item(asdl_seq *seq) { + return asdl_seq_GET_UNTYPED(seq, 0); } /* Creates a new name of the form . */ -expr_ty -_PyPegen_join_names_with_dot(Parser *p, expr_ty first_name, expr_ty second_name) -{ - assert(first_name != NULL && second_name != NULL); - PyObject *first_identifier = first_name->v.Name.id; - PyObject *second_identifier = second_name->v.Name.id; - - if (PyUnicode_READY(first_identifier) == -1) { - return NULL; - } - if (PyUnicode_READY(second_identifier) == -1) { - return NULL; - } - const char *first_str = PyUnicode_AsUTF8(first_identifier); - if (!first_str) { - return NULL; - } - const char *second_str = PyUnicode_AsUTF8(second_identifier); - if (!second_str) { - return NULL; - } - Py_ssize_t len = strlen(first_str) + strlen(second_str) + 1; // +1 for the dot - - PyObject *str = PyBytes_FromStringAndSize(NULL, len); - if (!str) { - return NULL; - } - - char *s = PyBytes_AS_STRING(str); - if (!s) { - return NULL; - } - - strcpy(s, first_str); - s += strlen(first_str); - *s++ = '.'; - strcpy(s, second_str); - s += strlen(second_str); - *s = '\0'; - - PyObject *uni = PyUnicode_DecodeUTF8(PyBytes_AS_STRING(str), PyBytes_GET_SIZE(str), NULL); - Py_DECREF(str); - if (!uni) { - return NULL; - } - PyUnicode_InternInPlace(&uni); - if (_PyArena_AddPyObject(p->arena, uni) < 0) { - Py_DECREF(uni); - return NULL; - } - - return _PyAST_Name(uni, Load, EXTRA_EXPR(first_name, second_name)); +expr_ty _PyPegen_join_names_with_dot(Parser *p, expr_ty first_name, + expr_ty second_name) { + assert(first_name != NULL && second_name != NULL); + PyObject *first_identifier = first_name->v.Name.id; + PyObject *second_identifier = second_name->v.Name.id; + + if (PyUnicode_READY(first_identifier) == -1) { + return NULL; + } + if (PyUnicode_READY(second_identifier) == -1) { + return NULL; + } + const char *first_str = PyUnicode_AsUTF8(first_identifier); + if (!first_str) { + return NULL; + } + const char *second_str = PyUnicode_AsUTF8(second_identifier); + if (!second_str) { + return NULL; + } + Py_ssize_t len = strlen(first_str) + strlen(second_str) + 1; // +1 for the dot + + PyObject *str = PyBytes_FromStringAndSize(NULL, len); + if (!str) { + return NULL; + } + + char *s = PyBytes_AS_STRING(str); + if (!s) { + return NULL; + } + + strcpy(s, first_str); + s += strlen(first_str); + *s++ = '.'; + strcpy(s, second_str); + s += strlen(second_str); + *s = '\0'; + + PyObject *uni = + PyUnicode_DecodeUTF8(PyBytes_AS_STRING(str), PyBytes_GET_SIZE(str), NULL); + Py_DECREF(str); + if (!uni) { + return NULL; + } + PyUnicode_InternInPlace(&uni); + if (_PyArena_AddPyObject(p->arena, uni) < 0) { + Py_DECREF(uni); + return NULL; + } + + return _PyAST_Name(uni, Load, EXTRA_EXPR(first_name, second_name)); } /* Counts the total number of dots in seq's tokens */ -int -_PyPegen_seq_count_dots(asdl_seq *seq) -{ - int number_of_dots = 0; - for (Py_ssize_t i = 0, l = asdl_seq_LEN(seq); i < l; i++) { - Token *current_expr = asdl_seq_GET_UNTYPED(seq, i); - switch (current_expr->type) { - case ELLIPSIS: - number_of_dots += 3; - break; - case DOT: - number_of_dots += 1; - break; - default: - Py_UNREACHABLE(); - } +int _PyPegen_seq_count_dots(asdl_seq *seq) { + int number_of_dots = 0; + for (Py_ssize_t i = 0, l = asdl_seq_LEN(seq); i < l; i++) { + Token *current_expr = asdl_seq_GET_UNTYPED(seq, i); + switch (current_expr->type) { + case ELLIPSIS: + number_of_dots += 3; + break; + case DOT: + number_of_dots += 1; + break; + default: + Py_UNREACHABLE(); } + } - return number_of_dots; + return number_of_dots; } /* Creates an alias with '*' as the identifier name */ -alias_ty -_PyPegen_alias_for_star(Parser *p, int lineno, int col_offset, int end_lineno, - int end_col_offset, PyArena *arena) { - PyObject *str = PyUnicode_InternFromString("*"); - if (!str) { - return NULL; - } - if (_PyArena_AddPyObject(p->arena, str) < 0) { - Py_DECREF(str); - return NULL; - } - return _PyAST_alias(str, NULL, lineno, col_offset, end_lineno, end_col_offset, arena); +alias_ty _PyPegen_alias_for_star(Parser *p, int lineno, int col_offset, + int end_lineno, int end_col_offset, + PyArena *arena) { + PyObject *str = PyUnicode_InternFromString("*"); + if (!str) { + return NULL; + } + if (_PyArena_AddPyObject(p->arena, str) < 0) { + Py_DECREF(str); + return NULL; + } + return _PyAST_alias(str, NULL, lineno, col_offset, end_lineno, end_col_offset, + arena); } /* Creates a new asdl_seq* with the identifiers of all the names in seq */ -asdl_identifier_seq * -_PyPegen_map_names_to_ids(Parser *p, asdl_expr_seq *seq) -{ - Py_ssize_t len = asdl_seq_LEN(seq); - assert(len > 0); - - asdl_identifier_seq *new_seq = _Py_asdl_identifier_seq_new(len, p->arena); - if (!new_seq) { - return NULL; - } - for (Py_ssize_t i = 0; i < len; i++) { - expr_ty e = asdl_seq_GET(seq, i); - asdl_seq_SET(new_seq, i, e->v.Name.id); - } - return new_seq; +asdl_identifier_seq *_PyPegen_map_names_to_ids(Parser *p, asdl_expr_seq *seq) { + Py_ssize_t len = asdl_seq_LEN(seq); + assert(len > 0); + + asdl_identifier_seq *new_seq = _Py_asdl_identifier_seq_new(len, p->arena); + if (!new_seq) { + return NULL; + } + for (Py_ssize_t i = 0; i < len; i++) { + expr_ty e = asdl_seq_GET(seq, i); + asdl_seq_SET(new_seq, i, e->v.Name.id); + } + return new_seq; } /* Constructs a CmpopExprPair */ -CmpopExprPair * -_PyPegen_cmpop_expr_pair(Parser *p, cmpop_ty cmpop, expr_ty expr) -{ - assert(expr != NULL); - CmpopExprPair *a = _PyArena_Malloc(p->arena, sizeof(CmpopExprPair)); - if (!a) { - return NULL; - } - a->cmpop = cmpop; - a->expr = expr; - return a; +CmpopExprPair *_PyPegen_cmpop_expr_pair(Parser *p, cmpop_ty cmpop, + expr_ty expr) { + assert(expr != NULL); + CmpopExprPair *a = _PyArena_Malloc(p->arena, sizeof(CmpopExprPair)); + if (!a) { + return NULL; + } + a->cmpop = cmpop; + a->expr = expr; + return a; } -asdl_int_seq * -_PyPegen_get_cmpops(Parser *p, asdl_seq *seq) -{ - Py_ssize_t len = asdl_seq_LEN(seq); - assert(len > 0); +asdl_int_seq *_PyPegen_get_cmpops(Parser *p, asdl_seq *seq) { + Py_ssize_t len = asdl_seq_LEN(seq); + assert(len > 0); - asdl_int_seq *new_seq = _Py_asdl_int_seq_new(len, p->arena); - if (!new_seq) { - return NULL; - } - for (Py_ssize_t i = 0; i < len; i++) { - CmpopExprPair *pair = asdl_seq_GET_UNTYPED(seq, i); - asdl_seq_SET(new_seq, i, pair->cmpop); - } - return new_seq; + asdl_int_seq *new_seq = _Py_asdl_int_seq_new(len, p->arena); + if (!new_seq) { + return NULL; + } + for (Py_ssize_t i = 0; i < len; i++) { + CmpopExprPair *pair = asdl_seq_GET_UNTYPED(seq, i); + asdl_seq_SET(new_seq, i, pair->cmpop); + } + return new_seq; } -asdl_expr_seq * -_PyPegen_get_exprs(Parser *p, asdl_seq *seq) -{ - Py_ssize_t len = asdl_seq_LEN(seq); - assert(len > 0); +asdl_expr_seq *_PyPegen_get_exprs(Parser *p, asdl_seq *seq) { + Py_ssize_t len = asdl_seq_LEN(seq); + assert(len > 0); - asdl_expr_seq *new_seq = _Py_asdl_expr_seq_new(len, p->arena); - if (!new_seq) { - return NULL; - } - for (Py_ssize_t i = 0; i < len; i++) { - CmpopExprPair *pair = asdl_seq_GET_UNTYPED(seq, i); - asdl_seq_SET(new_seq, i, pair->expr); - } - return new_seq; + asdl_expr_seq *new_seq = _Py_asdl_expr_seq_new(len, p->arena); + if (!new_seq) { + return NULL; + } + for (Py_ssize_t i = 0; i < len; i++) { + CmpopExprPair *pair = asdl_seq_GET_UNTYPED(seq, i); + asdl_seq_SET(new_seq, i, pair->expr); + } + return new_seq; } -/* Creates an asdl_seq* where all the elements have been changed to have ctx as context */ -static asdl_expr_seq * -_set_seq_context(Parser *p, asdl_expr_seq *seq, expr_context_ty ctx) -{ - Py_ssize_t len = asdl_seq_LEN(seq); - if (len == 0) { - return NULL; - } +/* Creates an asdl_seq* where all the elements have been changed to have ctx as + * context */ +static asdl_expr_seq *_set_seq_context(Parser *p, asdl_expr_seq *seq, + expr_context_ty ctx) { + Py_ssize_t len = asdl_seq_LEN(seq); + if (len == 0) { + return NULL; + } - asdl_expr_seq *new_seq = _Py_asdl_expr_seq_new(len, p->arena); - if (!new_seq) { - return NULL; - } - for (Py_ssize_t i = 0; i < len; i++) { - expr_ty e = asdl_seq_GET(seq, i); - asdl_seq_SET(new_seq, i, _PyPegen_set_expr_context(p, e, ctx)); - } - return new_seq; + asdl_expr_seq *new_seq = _Py_asdl_expr_seq_new(len, p->arena); + if (!new_seq) { + return NULL; + } + for (Py_ssize_t i = 0; i < len; i++) { + expr_ty e = asdl_seq_GET(seq, i); + asdl_seq_SET(new_seq, i, _PyPegen_set_expr_context(p, e, ctx)); + } + return new_seq; } -static expr_ty -_set_name_context(Parser *p, expr_ty e, expr_context_ty ctx) -{ - return _PyAST_Name(e->v.Name.id, ctx, EXTRA_EXPR(e, e)); +static expr_ty _set_name_context(Parser *p, expr_ty e, expr_context_ty ctx) { + return _PyAST_Name(e->v.Name.id, ctx, EXTRA_EXPR(e, e)); } -static expr_ty -_set_tuple_context(Parser *p, expr_ty e, expr_context_ty ctx) -{ - return _PyAST_Tuple( - _set_seq_context(p, e->v.Tuple.elts, ctx), - ctx, - EXTRA_EXPR(e, e)); +static expr_ty _set_tuple_context(Parser *p, expr_ty e, expr_context_ty ctx) { + return _PyAST_Tuple(_set_seq_context(p, e->v.Tuple.elts, ctx), ctx, + EXTRA_EXPR(e, e)); } -static expr_ty -_set_list_context(Parser *p, expr_ty e, expr_context_ty ctx) -{ - return _PyAST_List( - _set_seq_context(p, e->v.List.elts, ctx), - ctx, - EXTRA_EXPR(e, e)); +static expr_ty _set_list_context(Parser *p, expr_ty e, expr_context_ty ctx) { + return _PyAST_List(_set_seq_context(p, e->v.List.elts, ctx), ctx, + EXTRA_EXPR(e, e)); } -static expr_ty -_set_subscript_context(Parser *p, expr_ty e, expr_context_ty ctx) -{ - return _PyAST_Subscript(e->v.Subscript.value, e->v.Subscript.slice, - ctx, EXTRA_EXPR(e, e)); +static expr_ty _set_subscript_context(Parser *p, expr_ty e, + expr_context_ty ctx) { + return _PyAST_Subscript(e->v.Subscript.value, e->v.Subscript.slice, ctx, + EXTRA_EXPR(e, e)); } -static expr_ty -_set_attribute_context(Parser *p, expr_ty e, expr_context_ty ctx) -{ - return _PyAST_Attribute(e->v.Attribute.value, e->v.Attribute.attr, - ctx, EXTRA_EXPR(e, e)); +static expr_ty _set_attribute_context(Parser *p, expr_ty e, + expr_context_ty ctx) { + return _PyAST_Attribute(e->v.Attribute.value, e->v.Attribute.attr, ctx, + EXTRA_EXPR(e, e)); } -static expr_ty -_set_starred_context(Parser *p, expr_ty e, expr_context_ty ctx) -{ - return _PyAST_Starred(_PyPegen_set_expr_context(p, e->v.Starred.value, ctx), - ctx, EXTRA_EXPR(e, e)); +static expr_ty _set_starred_context(Parser *p, expr_ty e, expr_context_ty ctx) { + return _PyAST_Starred(_PyPegen_set_expr_context(p, e->v.Starred.value, ctx), + ctx, EXTRA_EXPR(e, e)); } /* Creates an `expr_ty` equivalent to `expr` but with `ctx` as context */ -expr_ty -_PyPegen_set_expr_context(Parser *p, expr_ty expr, expr_context_ty ctx) -{ - assert(expr != NULL); - - expr_ty new = NULL; - switch (expr->kind) { - case Name_kind: - new = _set_name_context(p, expr, ctx); - break; - case Tuple_kind: - new = _set_tuple_context(p, expr, ctx); - break; - case List_kind: - new = _set_list_context(p, expr, ctx); - break; - case Subscript_kind: - new = _set_subscript_context(p, expr, ctx); - break; - case Attribute_kind: - new = _set_attribute_context(p, expr, ctx); - break; - case Starred_kind: - new = _set_starred_context(p, expr, ctx); - break; - default: - new = expr; - } - return new; -} - -/* Constructs a KeyValuePair that is used when parsing a dict's key value pairs */ -KeyValuePair * -_PyPegen_key_value_pair(Parser *p, expr_ty key, expr_ty value) -{ - KeyValuePair *a = _PyArena_Malloc(p->arena, sizeof(KeyValuePair)); - if (!a) { - return NULL; - } - a->key = key; - a->value = value; - return a; +expr_ty _PyPegen_set_expr_context(Parser *p, expr_ty expr, + expr_context_ty ctx) { + assert(expr != NULL); + + expr_ty new = NULL; + switch (expr->kind) { + case Name_kind: + new = _set_name_context(p, expr, ctx); + break; + case Tuple_kind: + new = _set_tuple_context(p, expr, ctx); + break; + case List_kind: + new = _set_list_context(p, expr, ctx); + break; + case Subscript_kind: + new = _set_subscript_context(p, expr, ctx); + break; + case Attribute_kind: + new = _set_attribute_context(p, expr, ctx); + break; + case Starred_kind: + new = _set_starred_context(p, expr, ctx); + break; + default: + new = expr; + } + return new; +} + +/* Constructs a KeyValuePair that is used when parsing a dict's key value pairs + */ +KeyValuePair *_PyPegen_key_value_pair(Parser *p, expr_ty key, expr_ty value) { + KeyValuePair *a = _PyArena_Malloc(p->arena, sizeof(KeyValuePair)); + if (!a) { + return NULL; + } + a->key = key; + a->value = value; + return a; } /* Extracts all keys from an asdl_seq* of KeyValuePair*'s */ -asdl_expr_seq * -_PyPegen_get_keys(Parser *p, asdl_seq *seq) -{ - Py_ssize_t len = asdl_seq_LEN(seq); - asdl_expr_seq *new_seq = _Py_asdl_expr_seq_new(len, p->arena); - if (!new_seq) { - return NULL; - } - for (Py_ssize_t i = 0; i < len; i++) { - KeyValuePair *pair = asdl_seq_GET_UNTYPED(seq, i); - asdl_seq_SET(new_seq, i, pair->key); - } - return new_seq; +asdl_expr_seq *_PyPegen_get_keys(Parser *p, asdl_seq *seq) { + Py_ssize_t len = asdl_seq_LEN(seq); + asdl_expr_seq *new_seq = _Py_asdl_expr_seq_new(len, p->arena); + if (!new_seq) { + return NULL; + } + for (Py_ssize_t i = 0; i < len; i++) { + KeyValuePair *pair = asdl_seq_GET_UNTYPED(seq, i); + asdl_seq_SET(new_seq, i, pair->key); + } + return new_seq; } /* Extracts all values from an asdl_seq* of KeyValuePair*'s */ -asdl_expr_seq * -_PyPegen_get_values(Parser *p, asdl_seq *seq) -{ - Py_ssize_t len = asdl_seq_LEN(seq); - asdl_expr_seq *new_seq = _Py_asdl_expr_seq_new(len, p->arena); - if (!new_seq) { - return NULL; - } - for (Py_ssize_t i = 0; i < len; i++) { - KeyValuePair *pair = asdl_seq_GET_UNTYPED(seq, i); - asdl_seq_SET(new_seq, i, pair->value); - } - return new_seq; -} - -/* Constructs a KeyPatternPair that is used when parsing mapping & class patterns */ -KeyPatternPair * -_PyPegen_key_pattern_pair(Parser *p, expr_ty key, pattern_ty pattern) -{ - KeyPatternPair *a = _PyArena_Malloc(p->arena, sizeof(KeyPatternPair)); - if (!a) { - return NULL; - } - a->key = key; - a->pattern = pattern; - return a; +asdl_expr_seq *_PyPegen_get_values(Parser *p, asdl_seq *seq) { + Py_ssize_t len = asdl_seq_LEN(seq); + asdl_expr_seq *new_seq = _Py_asdl_expr_seq_new(len, p->arena); + if (!new_seq) { + return NULL; + } + for (Py_ssize_t i = 0; i < len; i++) { + KeyValuePair *pair = asdl_seq_GET_UNTYPED(seq, i); + asdl_seq_SET(new_seq, i, pair->value); + } + return new_seq; +} + +/* Constructs a KeyPatternPair that is used when parsing mapping & class + * patterns */ +KeyPatternPair *_PyPegen_key_pattern_pair(Parser *p, expr_ty key, + pattern_ty pattern) { + KeyPatternPair *a = _PyArena_Malloc(p->arena, sizeof(KeyPatternPair)); + if (!a) { + return NULL; + } + a->key = key; + a->pattern = pattern; + return a; } /* Extracts all keys from an asdl_seq* of KeyPatternPair*'s */ -asdl_expr_seq * -_PyPegen_get_pattern_keys(Parser *p, asdl_seq *seq) -{ - Py_ssize_t len = asdl_seq_LEN(seq); - asdl_expr_seq *new_seq = _Py_asdl_expr_seq_new(len, p->arena); - if (!new_seq) { - return NULL; - } - for (Py_ssize_t i = 0; i < len; i++) { - KeyPatternPair *pair = asdl_seq_GET_UNTYPED(seq, i); - asdl_seq_SET(new_seq, i, pair->key); - } - return new_seq; +asdl_expr_seq *_PyPegen_get_pattern_keys(Parser *p, asdl_seq *seq) { + Py_ssize_t len = asdl_seq_LEN(seq); + asdl_expr_seq *new_seq = _Py_asdl_expr_seq_new(len, p->arena); + if (!new_seq) { + return NULL; + } + for (Py_ssize_t i = 0; i < len; i++) { + KeyPatternPair *pair = asdl_seq_GET_UNTYPED(seq, i); + asdl_seq_SET(new_seq, i, pair->key); + } + return new_seq; } /* Extracts all patterns from an asdl_seq* of KeyPatternPair*'s */ -asdl_pattern_seq * -_PyPegen_get_patterns(Parser *p, asdl_seq *seq) -{ - Py_ssize_t len = asdl_seq_LEN(seq); - asdl_pattern_seq *new_seq = _Py_asdl_pattern_seq_new(len, p->arena); - if (!new_seq) { - return NULL; - } - for (Py_ssize_t i = 0; i < len; i++) { - KeyPatternPair *pair = asdl_seq_GET_UNTYPED(seq, i); - asdl_seq_SET(new_seq, i, pair->pattern); - } - return new_seq; +asdl_pattern_seq *_PyPegen_get_patterns(Parser *p, asdl_seq *seq) { + Py_ssize_t len = asdl_seq_LEN(seq); + asdl_pattern_seq *new_seq = _Py_asdl_pattern_seq_new(len, p->arena); + if (!new_seq) { + return NULL; + } + for (Py_ssize_t i = 0; i < len; i++) { + KeyPatternPair *pair = asdl_seq_GET_UNTYPED(seq, i); + asdl_seq_SET(new_seq, i, pair->pattern); + } + return new_seq; } /* Constructs a NameDefaultPair */ -NameDefaultPair * -_PyPegen_name_default_pair(Parser *p, arg_ty arg, expr_ty value, Token *tc) -{ - NameDefaultPair *a = _PyArena_Malloc(p->arena, sizeof(NameDefaultPair)); - if (!a) { - return NULL; - } - a->arg = _PyPegen_add_type_comment_to_arg(p, arg, tc); - a->value = value; - return a; +NameDefaultPair *_PyPegen_name_default_pair(Parser *p, arg_ty arg, + expr_ty value, Token *tc) { + NameDefaultPair *a = _PyArena_Malloc(p->arena, sizeof(NameDefaultPair)); + if (!a) { + return NULL; + } + a->arg = _PyPegen_add_type_comment_to_arg(p, arg, tc); + a->value = value; + return a; } /* Constructs a SlashWithDefault */ -SlashWithDefault * -_PyPegen_slash_with_default(Parser *p, asdl_arg_seq *plain_names, asdl_seq *names_with_defaults) -{ - SlashWithDefault *a = _PyArena_Malloc(p->arena, sizeof(SlashWithDefault)); - if (!a) { - return NULL; - } - a->plain_names = plain_names; - a->names_with_defaults = names_with_defaults; - return a; +SlashWithDefault *_PyPegen_slash_with_default(Parser *p, + asdl_arg_seq *plain_names, + asdl_seq *names_with_defaults) { + SlashWithDefault *a = _PyArena_Malloc(p->arena, sizeof(SlashWithDefault)); + if (!a) { + return NULL; + } + a->plain_names = plain_names; + a->names_with_defaults = names_with_defaults; + return a; } /* Constructs a StarEtc */ -StarEtc * -_PyPegen_star_etc(Parser *p, arg_ty vararg, asdl_seq *kwonlyargs, arg_ty kwarg) -{ - StarEtc *a = _PyArena_Malloc(p->arena, sizeof(StarEtc)); - if (!a) { - return NULL; - } - a->vararg = vararg; - a->kwonlyargs = kwonlyargs; - a->kwarg = kwarg; - return a; -} - -asdl_seq * -_PyPegen_join_sequences(Parser *p, asdl_seq *a, asdl_seq *b) -{ - Py_ssize_t first_len = asdl_seq_LEN(a); - Py_ssize_t second_len = asdl_seq_LEN(b); - asdl_seq *new_seq = (asdl_seq*)_Py_asdl_generic_seq_new(first_len + second_len, p->arena); - if (!new_seq) { - return NULL; - } - - int k = 0; - for (Py_ssize_t i = 0; i < first_len; i++) { - asdl_seq_SET_UNTYPED(new_seq, k++, asdl_seq_GET_UNTYPED(a, i)); - } - for (Py_ssize_t i = 0; i < second_len; i++) { - asdl_seq_SET_UNTYPED(new_seq, k++, asdl_seq_GET_UNTYPED(b, i)); - } - - return new_seq; -} - -static asdl_arg_seq* -_get_names(Parser *p, asdl_seq *names_with_defaults) -{ - Py_ssize_t len = asdl_seq_LEN(names_with_defaults); - asdl_arg_seq *seq = _Py_asdl_arg_seq_new(len, p->arena); - if (!seq) { - return NULL; - } - for (Py_ssize_t i = 0; i < len; i++) { - NameDefaultPair *pair = asdl_seq_GET_UNTYPED(names_with_defaults, i); - asdl_seq_SET(seq, i, pair->arg); - } - return seq; -} - -static asdl_expr_seq * -_get_defaults(Parser *p, asdl_seq *names_with_defaults) -{ - Py_ssize_t len = asdl_seq_LEN(names_with_defaults); - asdl_expr_seq *seq = _Py_asdl_expr_seq_new(len, p->arena); - if (!seq) { - return NULL; - } - for (Py_ssize_t i = 0; i < len; i++) { - NameDefaultPair *pair = asdl_seq_GET_UNTYPED(names_with_defaults, i); - asdl_seq_SET(seq, i, pair->value); - } - return seq; -} - -static int -_make_posonlyargs(Parser *p, - asdl_arg_seq *slash_without_default, - SlashWithDefault *slash_with_default, - asdl_arg_seq **posonlyargs) { - if (slash_without_default != NULL) { - *posonlyargs = slash_without_default; - } - else if (slash_with_default != NULL) { - asdl_arg_seq *slash_with_default_names = - _get_names(p, slash_with_default->names_with_defaults); - if (!slash_with_default_names) { - return -1; - } - *posonlyargs = (asdl_arg_seq*)_PyPegen_join_sequences( - p, - (asdl_seq*)slash_with_default->plain_names, - (asdl_seq*)slash_with_default_names); - } - else { - *posonlyargs = _Py_asdl_arg_seq_new(0, p->arena); - } - return *posonlyargs == NULL ? -1 : 0; -} - -static int -_make_posargs(Parser *p, - asdl_arg_seq *plain_names, - asdl_seq *names_with_default, - asdl_arg_seq **posargs) { - if (plain_names != NULL && names_with_default != NULL) { - asdl_arg_seq *names_with_default_names = _get_names(p, names_with_default); - if (!names_with_default_names) { - return -1; - } - *posargs = (asdl_arg_seq*)_PyPegen_join_sequences( - p,(asdl_seq*)plain_names, (asdl_seq*)names_with_default_names); - } - else if (plain_names == NULL && names_with_default != NULL) { - *posargs = _get_names(p, names_with_default); - } - else if (plain_names != NULL && names_with_default == NULL) { - *posargs = plain_names; - } - else { - *posargs = _Py_asdl_arg_seq_new(0, p->arena); - } - return *posargs == NULL ? -1 : 0; -} - -static int -_make_posdefaults(Parser *p, - SlashWithDefault *slash_with_default, - asdl_seq *names_with_default, - asdl_expr_seq **posdefaults) { - if (slash_with_default != NULL && names_with_default != NULL) { - asdl_expr_seq *slash_with_default_values = - _get_defaults(p, slash_with_default->names_with_defaults); - if (!slash_with_default_values) { - return -1; - } - asdl_expr_seq *names_with_default_values = _get_defaults(p, names_with_default); - if (!names_with_default_values) { - return -1; - } - *posdefaults = (asdl_expr_seq*)_PyPegen_join_sequences( - p, - (asdl_seq*)slash_with_default_values, - (asdl_seq*)names_with_default_values); - } - else if (slash_with_default == NULL && names_with_default != NULL) { - *posdefaults = _get_defaults(p, names_with_default); - } - else if (slash_with_default != NULL && names_with_default == NULL) { - *posdefaults = _get_defaults(p, slash_with_default->names_with_defaults); - } - else { - *posdefaults = _Py_asdl_expr_seq_new(0, p->arena); - } - return *posdefaults == NULL ? -1 : 0; -} - -static int -_make_kwargs(Parser *p, StarEtc *star_etc, - asdl_arg_seq **kwonlyargs, - asdl_expr_seq **kwdefaults) { - if (star_etc != NULL && star_etc->kwonlyargs != NULL) { - *kwonlyargs = _get_names(p, star_etc->kwonlyargs); - } - else { - *kwonlyargs = _Py_asdl_arg_seq_new(0, p->arena); - } - - if (*kwonlyargs == NULL) { - return -1; - } - - if (star_etc != NULL && star_etc->kwonlyargs != NULL) { - *kwdefaults = _get_defaults(p, star_etc->kwonlyargs); - } - else { - *kwdefaults = _Py_asdl_expr_seq_new(0, p->arena); - } - - if (*kwdefaults == NULL) { - return -1; - } - - return 0; -} - -/* Constructs an arguments_ty object out of all the parsed constructs in the parameters rule */ -arguments_ty -_PyPegen_make_arguments(Parser *p, asdl_arg_seq *slash_without_default, - SlashWithDefault *slash_with_default, asdl_arg_seq *plain_names, - asdl_seq *names_with_default, StarEtc *star_etc) -{ - asdl_arg_seq *posonlyargs; - if (_make_posonlyargs(p, slash_without_default, slash_with_default, &posonlyargs) == -1) { - return NULL; - } - - asdl_arg_seq *posargs; - if (_make_posargs(p, plain_names, names_with_default, &posargs) == -1) { - return NULL; - } - - asdl_expr_seq *posdefaults; - if (_make_posdefaults(p,slash_with_default, names_with_default, &posdefaults) == -1) { - return NULL; - } - - arg_ty vararg = NULL; - if (star_etc != NULL && star_etc->vararg != NULL) { - vararg = star_etc->vararg; - } - - asdl_arg_seq *kwonlyargs; - asdl_expr_seq *kwdefaults; - if (_make_kwargs(p, star_etc, &kwonlyargs, &kwdefaults) == -1) { - return NULL; - } - - arg_ty kwarg = NULL; - if (star_etc != NULL && star_etc->kwarg != NULL) { - kwarg = star_etc->kwarg; - } - - return _PyAST_arguments(posonlyargs, posargs, vararg, kwonlyargs, - kwdefaults, kwarg, posdefaults, p->arena); -} - - -/* Constructs an empty arguments_ty object, that gets used when a function accepts no - * arguments. */ -arguments_ty -_PyPegen_empty_arguments(Parser *p) -{ - asdl_arg_seq *posonlyargs = _Py_asdl_arg_seq_new(0, p->arena); - if (!posonlyargs) { - return NULL; - } - asdl_arg_seq *posargs = _Py_asdl_arg_seq_new(0, p->arena); - if (!posargs) { - return NULL; - } - asdl_expr_seq *posdefaults = _Py_asdl_expr_seq_new(0, p->arena); - if (!posdefaults) { - return NULL; - } - asdl_arg_seq *kwonlyargs = _Py_asdl_arg_seq_new(0, p->arena); - if (!kwonlyargs) { - return NULL; - } - asdl_expr_seq *kwdefaults = _Py_asdl_expr_seq_new(0, p->arena); - if (!kwdefaults) { - return NULL; - } - - return _PyAST_arguments(posonlyargs, posargs, NULL, kwonlyargs, - kwdefaults, NULL, posdefaults, p->arena); +StarEtc *_PyPegen_star_etc(Parser *p, arg_ty vararg, asdl_seq *kwonlyargs, + arg_ty kwarg) { + StarEtc *a = _PyArena_Malloc(p->arena, sizeof(StarEtc)); + if (!a) { + return NULL; + } + a->vararg = vararg; + a->kwonlyargs = kwonlyargs; + a->kwarg = kwarg; + return a; +} + +asdl_seq *_PyPegen_join_sequences(Parser *p, asdl_seq *a, asdl_seq *b) { + Py_ssize_t first_len = asdl_seq_LEN(a); + Py_ssize_t second_len = asdl_seq_LEN(b); + asdl_seq *new_seq = + (asdl_seq *)_Py_asdl_generic_seq_new(first_len + second_len, p->arena); + if (!new_seq) { + return NULL; + } + + int k = 0; + for (Py_ssize_t i = 0; i < first_len; i++) { + asdl_seq_SET_UNTYPED(new_seq, k++, asdl_seq_GET_UNTYPED(a, i)); + } + for (Py_ssize_t i = 0; i < second_len; i++) { + asdl_seq_SET_UNTYPED(new_seq, k++, asdl_seq_GET_UNTYPED(b, i)); + } + + return new_seq; +} + +static asdl_arg_seq *_get_names(Parser *p, asdl_seq *names_with_defaults) { + Py_ssize_t len = asdl_seq_LEN(names_with_defaults); + asdl_arg_seq *seq = _Py_asdl_arg_seq_new(len, p->arena); + if (!seq) { + return NULL; + } + for (Py_ssize_t i = 0; i < len; i++) { + NameDefaultPair *pair = asdl_seq_GET_UNTYPED(names_with_defaults, i); + asdl_seq_SET(seq, i, pair->arg); + } + return seq; +} + +static asdl_expr_seq *_get_defaults(Parser *p, asdl_seq *names_with_defaults) { + Py_ssize_t len = asdl_seq_LEN(names_with_defaults); + asdl_expr_seq *seq = _Py_asdl_expr_seq_new(len, p->arena); + if (!seq) { + return NULL; + } + for (Py_ssize_t i = 0; i < len; i++) { + NameDefaultPair *pair = asdl_seq_GET_UNTYPED(names_with_defaults, i); + asdl_seq_SET(seq, i, pair->value); + } + return seq; +} + +static int _make_posonlyargs(Parser *p, asdl_arg_seq *slash_without_default, + SlashWithDefault *slash_with_default, + asdl_arg_seq **posonlyargs) { + if (slash_without_default != NULL) { + *posonlyargs = slash_without_default; + } else if (slash_with_default != NULL) { + asdl_arg_seq *slash_with_default_names = + _get_names(p, slash_with_default->names_with_defaults); + if (!slash_with_default_names) { + return -1; + } + *posonlyargs = (asdl_arg_seq *)_PyPegen_join_sequences( + p, (asdl_seq *)slash_with_default->plain_names, + (asdl_seq *)slash_with_default_names); + } else { + *posonlyargs = _Py_asdl_arg_seq_new(0, p->arena); + } + return *posonlyargs == NULL ? -1 : 0; +} + +static int _make_posargs(Parser *p, asdl_arg_seq *plain_names, + asdl_seq *names_with_default, asdl_arg_seq **posargs) { + if (plain_names != NULL && names_with_default != NULL) { + asdl_arg_seq *names_with_default_names = _get_names(p, names_with_default); + if (!names_with_default_names) { + return -1; + } + *posargs = (asdl_arg_seq *)_PyPegen_join_sequences( + p, (asdl_seq *)plain_names, (asdl_seq *)names_with_default_names); + } else if (plain_names == NULL && names_with_default != NULL) { + *posargs = _get_names(p, names_with_default); + } else if (plain_names != NULL && names_with_default == NULL) { + *posargs = plain_names; + } else { + *posargs = _Py_asdl_arg_seq_new(0, p->arena); + } + return *posargs == NULL ? -1 : 0; +} + +static int _make_posdefaults(Parser *p, SlashWithDefault *slash_with_default, + asdl_seq *names_with_default, + asdl_expr_seq **posdefaults) { + if (slash_with_default != NULL && names_with_default != NULL) { + asdl_expr_seq *slash_with_default_values = + _get_defaults(p, slash_with_default->names_with_defaults); + if (!slash_with_default_values) { + return -1; + } + asdl_expr_seq *names_with_default_values = + _get_defaults(p, names_with_default); + if (!names_with_default_values) { + return -1; + } + *posdefaults = (asdl_expr_seq *)_PyPegen_join_sequences( + p, (asdl_seq *)slash_with_default_values, + (asdl_seq *)names_with_default_values); + } else if (slash_with_default == NULL && names_with_default != NULL) { + *posdefaults = _get_defaults(p, names_with_default); + } else if (slash_with_default != NULL && names_with_default == NULL) { + *posdefaults = _get_defaults(p, slash_with_default->names_with_defaults); + } else { + *posdefaults = _Py_asdl_expr_seq_new(0, p->arena); + } + return *posdefaults == NULL ? -1 : 0; +} + +static int _make_kwargs(Parser *p, StarEtc *star_etc, asdl_arg_seq **kwonlyargs, + asdl_expr_seq **kwdefaults) { + if (star_etc != NULL && star_etc->kwonlyargs != NULL) { + *kwonlyargs = _get_names(p, star_etc->kwonlyargs); + } else { + *kwonlyargs = _Py_asdl_arg_seq_new(0, p->arena); + } + + if (*kwonlyargs == NULL) { + return -1; + } + + if (star_etc != NULL && star_etc->kwonlyargs != NULL) { + *kwdefaults = _get_defaults(p, star_etc->kwonlyargs); + } else { + *kwdefaults = _Py_asdl_expr_seq_new(0, p->arena); + } + + if (*kwdefaults == NULL) { + return -1; + } + + return 0; +} + +/* Constructs an arguments_ty object out of all the parsed constructs in the + * parameters rule */ +arguments_ty _PyPegen_make_arguments(Parser *p, + asdl_arg_seq *slash_without_default, + SlashWithDefault *slash_with_default, + asdl_arg_seq *plain_names, + asdl_seq *names_with_default, + StarEtc *star_etc) { + asdl_arg_seq *posonlyargs; + if (_make_posonlyargs(p, slash_without_default, slash_with_default, + &posonlyargs) == -1) { + return NULL; + } + + asdl_arg_seq *posargs; + if (_make_posargs(p, plain_names, names_with_default, &posargs) == -1) { + return NULL; + } + + asdl_expr_seq *posdefaults; + if (_make_posdefaults(p, slash_with_default, names_with_default, + &posdefaults) == -1) { + return NULL; + } + + arg_ty vararg = NULL; + if (star_etc != NULL && star_etc->vararg != NULL) { + vararg = star_etc->vararg; + } + + asdl_arg_seq *kwonlyargs; + asdl_expr_seq *kwdefaults; + if (_make_kwargs(p, star_etc, &kwonlyargs, &kwdefaults) == -1) { + return NULL; + } + + arg_ty kwarg = NULL; + if (star_etc != NULL && star_etc->kwarg != NULL) { + kwarg = star_etc->kwarg; + } + + return _PyAST_arguments(posonlyargs, posargs, vararg, kwonlyargs, kwdefaults, + kwarg, posdefaults, p->arena); +} + +/* Constructs an empty arguments_ty object, that gets used when a function + * accepts no arguments. */ +arguments_ty _PyPegen_empty_arguments(Parser *p) { + asdl_arg_seq *posonlyargs = _Py_asdl_arg_seq_new(0, p->arena); + if (!posonlyargs) { + return NULL; + } + asdl_arg_seq *posargs = _Py_asdl_arg_seq_new(0, p->arena); + if (!posargs) { + return NULL; + } + asdl_expr_seq *posdefaults = _Py_asdl_expr_seq_new(0, p->arena); + if (!posdefaults) { + return NULL; + } + asdl_arg_seq *kwonlyargs = _Py_asdl_arg_seq_new(0, p->arena); + if (!kwonlyargs) { + return NULL; + } + asdl_expr_seq *kwdefaults = _Py_asdl_expr_seq_new(0, p->arena); + if (!kwdefaults) { + return NULL; + } + + return _PyAST_arguments(posonlyargs, posargs, NULL, kwonlyargs, kwdefaults, + NULL, posdefaults, p->arena); } /* Encapsulates the value of an operator_ty into an AugOperator struct */ -AugOperator * -_PyPegen_augoperator(Parser *p, operator_ty kind) -{ - AugOperator *a = _PyArena_Malloc(p->arena, sizeof(AugOperator)); - if (!a) { - return NULL; - } - a->kind = kind; - return a; +AugOperator *_PyPegen_augoperator(Parser *p, operator_ty kind) { + AugOperator *a = _PyArena_Malloc(p->arena, sizeof(AugOperator)); + if (!a) { + return NULL; + } + a->kind = kind; + return a; } /* Construct a FunctionDef equivalent to function_def, but with decorators */ -stmt_ty -_PyPegen_function_def_decorators(Parser *p, asdl_expr_seq *decorators, stmt_ty function_def) -{ - assert(function_def != NULL); - if (function_def->kind == AsyncFunctionDef_kind) { - return _PyAST_AsyncFunctionDef( - function_def->v.AsyncFunctionDef.name, - function_def->v.AsyncFunctionDef.args, - function_def->v.AsyncFunctionDef.body, decorators, - function_def->v.AsyncFunctionDef.returns, - function_def->v.AsyncFunctionDef.type_comment, - function_def->v.AsyncFunctionDef.type_params, - function_def->lineno, function_def->col_offset, - function_def->end_lineno, function_def->end_col_offset, p->arena); - } - - return _PyAST_FunctionDef( - function_def->v.FunctionDef.name, - function_def->v.FunctionDef.args, - function_def->v.FunctionDef.body, decorators, - function_def->v.FunctionDef.returns, - function_def->v.FunctionDef.type_comment, - function_def->v.FunctionDef.type_params, - function_def->lineno, function_def->col_offset, - function_def->end_lineno, function_def->end_col_offset, p->arena); +stmt_ty _PyPegen_function_def_decorators(Parser *p, asdl_expr_seq *decorators, + stmt_ty function_def) { + assert(function_def != NULL); + if (function_def->kind == AsyncFunctionDef_kind) { + return _PyAST_AsyncFunctionDef( + function_def->v.AsyncFunctionDef.name, + function_def->v.AsyncFunctionDef.args, + function_def->v.AsyncFunctionDef.body, decorators, + function_def->v.AsyncFunctionDef.returns, + function_def->v.AsyncFunctionDef.type_comment, + function_def->v.AsyncFunctionDef.type_params, function_def->lineno, + function_def->col_offset, function_def->end_lineno, + function_def->end_col_offset, p->arena); + } + + return _PyAST_FunctionDef( + function_def->v.FunctionDef.name, function_def->v.FunctionDef.args, + function_def->v.FunctionDef.body, decorators, + function_def->v.FunctionDef.returns, + function_def->v.FunctionDef.type_comment, + function_def->v.FunctionDef.type_params, function_def->lineno, + function_def->col_offset, function_def->end_lineno, + function_def->end_col_offset, p->arena); } /* Construct a ClassDef equivalent to class_def, but with decorators */ -stmt_ty -_PyPegen_class_def_decorators(Parser *p, asdl_expr_seq *decorators, stmt_ty class_def) -{ - assert(class_def != NULL); - return _PyAST_ClassDef( - class_def->v.ClassDef.name, - class_def->v.ClassDef.bases, class_def->v.ClassDef.keywords, - class_def->v.ClassDef.body, decorators, - class_def->v.ClassDef.type_params, - class_def->lineno, class_def->col_offset, class_def->end_lineno, - class_def->end_col_offset, p->arena); +stmt_ty _PyPegen_class_def_decorators(Parser *p, asdl_expr_seq *decorators, + stmt_ty class_def) { + assert(class_def != NULL); + return _PyAST_ClassDef( + class_def->v.ClassDef.name, class_def->v.ClassDef.bases, + class_def->v.ClassDef.keywords, class_def->v.ClassDef.body, decorators, + class_def->v.ClassDef.type_params, class_def->lineno, + class_def->col_offset, class_def->end_lineno, class_def->end_col_offset, + p->arena); } /* Construct a KeywordOrStarred */ -KeywordOrStarred * -_PyPegen_keyword_or_starred(Parser *p, void *element, int is_keyword) -{ - KeywordOrStarred *a = _PyArena_Malloc(p->arena, sizeof(KeywordOrStarred)); - if (!a) { - return NULL; - } - a->element = element; - a->is_keyword = is_keyword; - return a; -} - -/* Get the number of starred expressions in an asdl_seq* of KeywordOrStarred*s */ -static int -_seq_number_of_starred_exprs(asdl_seq *seq) -{ - int n = 0; - for (Py_ssize_t i = 0, l = asdl_seq_LEN(seq); i < l; i++) { - KeywordOrStarred *k = asdl_seq_GET_UNTYPED(seq, i); - if (!k->is_keyword) { - n++; - } - } - return n; +KeywordOrStarred *_PyPegen_keyword_or_starred(Parser *p, void *element, + int is_keyword) { + KeywordOrStarred *a = _PyArena_Malloc(p->arena, sizeof(KeywordOrStarred)); + if (!a) { + return NULL; + } + a->element = element; + a->is_keyword = is_keyword; + return a; +} + +/* Get the number of starred expressions in an asdl_seq* of KeywordOrStarred*s + */ +static int _seq_number_of_starred_exprs(asdl_seq *seq) { + int n = 0; + for (Py_ssize_t i = 0, l = asdl_seq_LEN(seq); i < l; i++) { + KeywordOrStarred *k = asdl_seq_GET_UNTYPED(seq, i); + if (!k->is_keyword) { + n++; + } + } + return n; } /* Extract the starred expressions of an asdl_seq* of KeywordOrStarred*s */ -asdl_expr_seq * -_PyPegen_seq_extract_starred_exprs(Parser *p, asdl_seq *kwargs) -{ - int new_len = _seq_number_of_starred_exprs(kwargs); - if (new_len == 0) { - return NULL; - } - asdl_expr_seq *new_seq = _Py_asdl_expr_seq_new(new_len, p->arena); - if (!new_seq) { - return NULL; - } - - int idx = 0; - for (Py_ssize_t i = 0, len = asdl_seq_LEN(kwargs); i < len; i++) { - KeywordOrStarred *k = asdl_seq_GET_UNTYPED(kwargs, i); - if (!k->is_keyword) { - asdl_seq_SET(new_seq, idx++, k->element); - } - } - return new_seq; +asdl_expr_seq *_PyPegen_seq_extract_starred_exprs(Parser *p, asdl_seq *kwargs) { + int new_len = _seq_number_of_starred_exprs(kwargs); + if (new_len == 0) { + return NULL; + } + asdl_expr_seq *new_seq = _Py_asdl_expr_seq_new(new_len, p->arena); + if (!new_seq) { + return NULL; + } + + int idx = 0; + for (Py_ssize_t i = 0, len = asdl_seq_LEN(kwargs); i < len; i++) { + KeywordOrStarred *k = asdl_seq_GET_UNTYPED(kwargs, i); + if (!k->is_keyword) { + asdl_seq_SET(new_seq, idx++, k->element); + } + } + return new_seq; } /* Return a new asdl_seq* with only the keywords in kwargs */ -asdl_keyword_seq* -_PyPegen_seq_delete_starred_exprs(Parser *p, asdl_seq *kwargs) -{ - Py_ssize_t len = asdl_seq_LEN(kwargs); - Py_ssize_t new_len = len - _seq_number_of_starred_exprs(kwargs); - if (new_len == 0) { - return NULL; - } - asdl_keyword_seq *new_seq = _Py_asdl_keyword_seq_new(new_len, p->arena); - if (!new_seq) { - return NULL; - } - - int idx = 0; - for (Py_ssize_t i = 0; i < len; i++) { - KeywordOrStarred *k = asdl_seq_GET_UNTYPED(kwargs, i); - if (k->is_keyword) { - asdl_seq_SET(new_seq, idx++, k->element); - } - } - return new_seq; -} - -expr_ty -_PyPegen_ensure_imaginary(Parser *p, expr_ty exp) -{ - if (exp->kind != Constant_kind || !PyComplex_CheckExact(exp->v.Constant.value)) { - RAISE_SYNTAX_ERROR_KNOWN_LOCATION(exp, "imaginary number required in complex literal"); - return NULL; - } - return exp; -} - -expr_ty -_PyPegen_ensure_real(Parser *p, expr_ty exp) -{ - if (exp->kind != Constant_kind || PyComplex_CheckExact(exp->v.Constant.value)) { - RAISE_SYNTAX_ERROR_KNOWN_LOCATION(exp, "real number required in complex literal"); - return NULL; - } - return exp; -} - -mod_ty -_PyPegen_make_module(Parser *p, asdl_stmt_seq *a) { - asdl_type_ignore_seq *type_ignores = NULL; - Py_ssize_t num = p->type_ignore_comments.num_items; - if (num > 0) { - // Turn the raw (comment, lineno) pairs into TypeIgnore objects in the arena - type_ignores = _Py_asdl_type_ignore_seq_new(num, p->arena); - if (type_ignores == NULL) { - return NULL; - } - for (int i = 0; i < num; i++) { - PyObject *tag = _PyPegen_new_type_comment(p, p->type_ignore_comments.items[i].comment); - if (tag == NULL) { - return NULL; - } - type_ignore_ty ti = _PyAST_TypeIgnore(p->type_ignore_comments.items[i].lineno, - tag, p->arena); - if (ti == NULL) { - return NULL; - } - asdl_seq_SET(type_ignores, i, ti); - } - } - return _PyAST_Module(a, type_ignores, p->arena); -} - -PyObject * -_PyPegen_new_type_comment(Parser *p, const char *s) -{ - PyObject *res = PyUnicode_DecodeUTF8(s, strlen(s), NULL); - if (res == NULL) { - return NULL; - } - if (_PyArena_AddPyObject(p->arena, res) < 0) { - Py_DECREF(res); - return NULL; - } - return res; -} - -arg_ty -_PyPegen_add_type_comment_to_arg(Parser *p, arg_ty a, Token *tc) -{ - if (tc == NULL) { - return a; - } - const char *bytes = PyBytes_AsString(tc->bytes); - if (bytes == NULL) { - return NULL; - } - PyObject *tco = _PyPegen_new_type_comment(p, bytes); - if (tco == NULL) { - return NULL; - } - return _PyAST_arg(a->arg, a->annotation, tco, - a->lineno, a->col_offset, a->end_lineno, a->end_col_offset, - p->arena); +asdl_keyword_seq *_PyPegen_seq_delete_starred_exprs(Parser *p, + asdl_seq *kwargs) { + Py_ssize_t len = asdl_seq_LEN(kwargs); + Py_ssize_t new_len = len - _seq_number_of_starred_exprs(kwargs); + if (new_len == 0) { + return NULL; + } + asdl_keyword_seq *new_seq = _Py_asdl_keyword_seq_new(new_len, p->arena); + if (!new_seq) { + return NULL; + } + + int idx = 0; + for (Py_ssize_t i = 0; i < len; i++) { + KeywordOrStarred *k = asdl_seq_GET_UNTYPED(kwargs, i); + if (k->is_keyword) { + asdl_seq_SET(new_seq, idx++, k->element); + } + } + return new_seq; +} + +expr_ty _PyPegen_ensure_imaginary(Parser *p, expr_ty exp) { + if (exp->kind != Constant_kind || + !PyComplex_CheckExact(exp->v.Constant.value)) { + RAISE_SYNTAX_ERROR_KNOWN_LOCATION( + exp, "imaginary number required in complex literal"); + return NULL; + } + return exp; +} + +expr_ty _PyPegen_ensure_real(Parser *p, expr_ty exp) { + if (exp->kind != Constant_kind || + PyComplex_CheckExact(exp->v.Constant.value)) { + RAISE_SYNTAX_ERROR_KNOWN_LOCATION( + exp, "real number required in complex literal"); + return NULL; + } + return exp; +} + +mod_ty _PyPegen_make_module(Parser *p, asdl_stmt_seq *a) { + asdl_type_ignore_seq *type_ignores = NULL; + Py_ssize_t num = p->type_ignore_comments.num_items; + if (num > 0) { + // Turn the raw (comment, lineno) pairs into TypeIgnore objects in the arena + type_ignores = _Py_asdl_type_ignore_seq_new(num, p->arena); + if (type_ignores == NULL) { + return NULL; + } + for (int i = 0; i < num; i++) { + PyObject *tag = _PyPegen_new_type_comment( + p, p->type_ignore_comments.items[i].comment); + if (tag == NULL) { + return NULL; + } + type_ignore_ty ti = _PyAST_TypeIgnore( + p->type_ignore_comments.items[i].lineno, tag, p->arena); + if (ti == NULL) { + return NULL; + } + asdl_seq_SET(type_ignores, i, ti); + } + } + return _PyAST_Module(a, type_ignores, p->arena); +} + +PyObject *_PyPegen_new_type_comment(Parser *p, const char *s) { + PyObject *res = PyUnicode_DecodeUTF8(s, strlen(s), NULL); + if (res == NULL) { + return NULL; + } + if (_PyArena_AddPyObject(p->arena, res) < 0) { + Py_DECREF(res); + return NULL; + } + return res; +} + +arg_ty _PyPegen_add_type_comment_to_arg(Parser *p, arg_ty a, Token *tc) { + if (tc == NULL) { + return a; + } + const char *bytes = PyBytes_AsString(tc->bytes); + if (bytes == NULL) { + return NULL; + } + PyObject *tco = _PyPegen_new_type_comment(p, bytes); + if (tco == NULL) { + return NULL; + } + return _PyAST_arg(a->arg, a->annotation, tco, a->lineno, a->col_offset, + a->end_lineno, a->end_col_offset, p->arena); } /* Checks if the NOTEQUAL token is valid given the current parser flags 0 indicates success and nonzero indicates failure (an exception may be set) */ -int -_PyPegen_check_barry_as_flufl(Parser *p, Token* t) { - assert(t->bytes != NULL); - assert(t->type == NOTEQUAL); - - const char* tok_str = PyBytes_AS_STRING(t->bytes); - if (p->flags & PyPARSE_BARRY_AS_BDFL && strcmp(tok_str, "<>") != 0) { - RAISE_SYNTAX_ERROR("with Barry as BDFL, use '<>' instead of '!='"); - return -1; - } - if (!(p->flags & PyPARSE_BARRY_AS_BDFL)) { - return strcmp(tok_str, "!="); - } +int _PyPegen_check_barry_as_flufl(Parser *p, Token *t) { + assert(t->bytes != NULL); + assert(t->type == NOTEQUAL); + + const char *tok_str = PyBytes_AS_STRING(t->bytes); + if (p->flags & PyPARSE_BARRY_AS_BDFL && strcmp(tok_str, "<>") != 0) { + RAISE_SYNTAX_ERROR("with Barry as BDFL, use '<>' instead of '!='"); + return -1; + } + if (!(p->flags & PyPARSE_BARRY_AS_BDFL)) { + return strcmp(tok_str, "!="); + } + return 0; +} + +int _PyPegen_check_legacy_stmt(Parser *p, expr_ty name) { + if (name->kind != Name_kind) { return 0; -} - -int -_PyPegen_check_legacy_stmt(Parser *p, expr_ty name) { - if (name->kind != Name_kind) { - return 0; - } - const char* candidates[2] = {"print", "exec"}; - for (int i=0; i<2; i++) { - if (PyUnicode_CompareWithASCIIString(name->v.Name.id, candidates[i]) == 0) { - return 1; - } + } + const char *candidates[2] = {"print", "exec"}; + for (int i = 0; i < 2; i++) { + if (PyUnicode_CompareWithASCIIString(name->v.Name.id, candidates[i]) == 0) { + return 1; } - return 0; + } + return 0; } static ResultTokenWithMetadata * -result_token_with_metadata(Parser *p, void *result, PyObject *metadata) -{ - ResultTokenWithMetadata *res = _PyArena_Malloc(p->arena, sizeof(ResultTokenWithMetadata)); - if (res == NULL) { - return NULL; - } - res->metadata = metadata; - res->result = result; - return res; +result_token_with_metadata(Parser *p, void *result, PyObject *metadata) { + ResultTokenWithMetadata *res = + _PyArena_Malloc(p->arena, sizeof(ResultTokenWithMetadata)); + if (res == NULL) { + return NULL; + } + res->metadata = metadata; + res->result = result; + return res; } ResultTokenWithMetadata * -_PyPegen_check_fstring_conversion(Parser *p, Token* conv_token, expr_ty conv) -{ - if (conv_token->lineno != conv->lineno || conv_token->end_col_offset != conv->col_offset) { - return RAISE_SYNTAX_ERROR_KNOWN_RANGE( - conv_token, conv, - "f-string: conversion type must come right after the exclamanation mark" - ); - } - return result_token_with_metadata(p, conv, conv_token->metadata); +_PyPegen_check_fstring_conversion(Parser *p, Token *conv_token, expr_ty conv) { + if (conv_token->lineno != conv->lineno || + conv_token->end_col_offset != conv->col_offset) { + return RAISE_SYNTAX_ERROR_KNOWN_RANGE(conv_token, conv, + "f-string: conversion type must come " + "right after the exclamanation mark"); + } + return result_token_with_metadata(p, conv, conv_token->metadata); } +static asdl_expr_seq * +unpack_top_level_joined_strs(Parser *p, asdl_expr_seq *raw_expressions); ResultTokenWithMetadata * -_PyPegen_setup_full_format_spec(Parser *p, Token *colon, asdl_expr_seq *spec, int lineno, int col_offset, - int end_lineno, int end_col_offset, PyArena *arena) -{ - if (!spec) { - return NULL; - } - - // This is needed to keep compatibility with 3.11, where an empty format spec is parsed - // as an *empty* JoinedStr node, instead of having an empty constant in it. - if (asdl_seq_LEN(spec) == 1) { - expr_ty e = asdl_seq_GET(spec, 0); - if (e->kind == Constant_kind - && PyUnicode_Check(e->v.Constant.value) - && PyUnicode_GetLength(e->v.Constant.value) == 0) { - spec = _Py_asdl_expr_seq_new(0, arena); - } - } - - expr_ty res = _PyAST_JoinedStr(spec, lineno, col_offset, end_lineno, end_col_offset, p->arena); - if (!res) { - return NULL; - } - return result_token_with_metadata(p, res, colon->metadata); -} - -const char * -_PyPegen_get_expr_name(expr_ty e) -{ - assert(e != NULL); - switch (e->kind) { - case Attribute_kind: - return "attribute"; - case Subscript_kind: - return "subscript"; - case Starred_kind: - return "starred"; - case Name_kind: - return "name"; - case List_kind: - return "list"; - case Tuple_kind: - return "tuple"; - case Lambda_kind: - return "lambda"; - case Call_kind: - return "function call"; - case BoolOp_kind: - case BinOp_kind: - case UnaryOp_kind: - return "expression"; - case GeneratorExp_kind: - return "generator expression"; - case Yield_kind: - case YieldFrom_kind: - return "yield expression"; - case Await_kind: - return "await expression"; - case ListComp_kind: - return "list comprehension"; - case SetComp_kind: - return "set comprehension"; - case DictComp_kind: - return "dict comprehension"; - case Dict_kind: - return "dict literal"; - case Set_kind: - return "set display"; - case JoinedStr_kind: - case FormattedValue_kind: - return "f-string expression"; - case Constant_kind: { - PyObject *value = e->v.Constant.value; - if (value == Py_None) { - return "None"; - } - if (value == Py_False) { - return "False"; - } - if (value == Py_True) { - return "True"; - } - if (value == Py_Ellipsis) { - return "ellipsis"; - } - return "literal"; - } - case Compare_kind: - return "comparison"; - case IfExp_kind: - return "conditional expression"; - case NamedExpr_kind: - return "named expression"; - default: - PyErr_Format(PyExc_SystemError, - "unexpected expression in assignment %d (line %d)", - e->kind, e->lineno); - return NULL; - } -} - -expr_ty -_PyPegen_get_last_comprehension_item(comprehension_ty comprehension) { - if (comprehension->ifs == NULL || asdl_seq_LEN(comprehension->ifs) == 0) { - return comprehension->iter; - } - return PyPegen_last_item(comprehension->ifs, expr_ty); +_PyPegen_setup_full_format_spec(Parser *p, Token *colon, asdl_expr_seq *spec, + int lineno, int col_offset, int end_lineno, + int end_col_offset, PyArena *arena) { + if (!spec) { + return NULL; + } + + // This is needed to keep compatibility with 3.11, where an empty format spec + // is parsed as an *empty* JoinedStr node, instead of having an empty constant + // in it. + if (asdl_seq_LEN(spec) == 1) { + expr_ty e = asdl_seq_GET(spec, 0); + if (e->kind == Constant_kind && PyUnicode_Check(e->v.Constant.value) && + PyUnicode_GetLength(e->v.Constant.value) == 0) { + spec = _Py_asdl_expr_seq_new(0, arena); + } + } + expr_ty res; + if (asdl_seq_LEN(spec) == 0) { + res = _PyAST_JoinedStr(spec, lineno, col_offset, end_lineno, end_col_offset, + p->arena); + } else { + res = _PyPegen_concatenate_strings(p, spec, lineno, col_offset, end_lineno, + end_col_offset, arena); + } + if (!res) { + return NULL; + } + return result_token_with_metadata(p, res, colon->metadata); +} + +const char *_PyPegen_get_expr_name(expr_ty e) { + assert(e != NULL); + switch (e->kind) { + case Attribute_kind: + return "attribute"; + case Subscript_kind: + return "subscript"; + case Starred_kind: + return "starred"; + case Name_kind: + return "name"; + case List_kind: + return "list"; + case Tuple_kind: + return "tuple"; + case Lambda_kind: + return "lambda"; + case Call_kind: + return "function call"; + case BoolOp_kind: + case BinOp_kind: + case UnaryOp_kind: + return "expression"; + case GeneratorExp_kind: + return "generator expression"; + case Yield_kind: + case YieldFrom_kind: + return "yield expression"; + case Await_kind: + return "await expression"; + case ListComp_kind: + return "list comprehension"; + case SetComp_kind: + return "set comprehension"; + case DictComp_kind: + return "dict comprehension"; + case Dict_kind: + return "dict literal"; + case Set_kind: + return "set display"; + case JoinedStr_kind: + case FormattedValue_kind: + return "f-string expression"; + case Constant_kind: { + PyObject *value = e->v.Constant.value; + if (value == Py_None) { + return "None"; + } + if (value == Py_False) { + return "False"; + } + if (value == Py_True) { + return "True"; + } + if (value == Py_Ellipsis) { + return "ellipsis"; + } + return "literal"; + } + case Compare_kind: + return "comparison"; + case IfExp_kind: + return "conditional expression"; + case NamedExpr_kind: + return "named expression"; + default: + PyErr_Format(PyExc_SystemError, + "unexpected expression in assignment %d (line %d)", e->kind, + e->lineno); + return NULL; + } +} + +expr_ty _PyPegen_get_last_comprehension_item(comprehension_ty comprehension) { + if (comprehension->ifs == NULL || asdl_seq_LEN(comprehension->ifs) == 0) { + return comprehension->iter; + } + return PyPegen_last_item(comprehension->ifs, expr_ty); } expr_ty _PyPegen_collect_call_seqs(Parser *p, asdl_expr_seq *a, asdl_seq *b, - int lineno, int col_offset, int end_lineno, - int end_col_offset, PyArena *arena) { - Py_ssize_t args_len = asdl_seq_LEN(a); - Py_ssize_t total_len = args_len; + int lineno, int col_offset, int end_lineno, + int end_col_offset, PyArena *arena) { + Py_ssize_t args_len = asdl_seq_LEN(a); + Py_ssize_t total_len = args_len; - if (b == NULL) { - return _PyAST_Call(_PyPegen_dummy_name(p), a, NULL, lineno, col_offset, - end_lineno, end_col_offset, arena); + if (b == NULL) { + return _PyAST_Call(_PyPegen_dummy_name(p), a, NULL, lineno, col_offset, + end_lineno, end_col_offset, arena); + } - } + asdl_expr_seq *starreds = _PyPegen_seq_extract_starred_exprs(p, b); + asdl_keyword_seq *keywords = _PyPegen_seq_delete_starred_exprs(p, b); - asdl_expr_seq *starreds = _PyPegen_seq_extract_starred_exprs(p, b); - asdl_keyword_seq *keywords = _PyPegen_seq_delete_starred_exprs(p, b); + if (starreds) { + total_len += asdl_seq_LEN(starreds); + } - if (starreds) { - total_len += asdl_seq_LEN(starreds); - } + asdl_expr_seq *args = _Py_asdl_expr_seq_new(total_len, arena); - asdl_expr_seq *args = _Py_asdl_expr_seq_new(total_len, arena); - - Py_ssize_t i = 0; - for (i = 0; i < args_len; i++) { - asdl_seq_SET(args, i, asdl_seq_GET(a, i)); - } - for (; i < total_len; i++) { - asdl_seq_SET(args, i, asdl_seq_GET(starreds, i - args_len)); - } + Py_ssize_t i = 0; + for (i = 0; i < args_len; i++) { + asdl_seq_SET(args, i, asdl_seq_GET(a, i)); + } + for (; i < total_len; i++) { + asdl_seq_SET(args, i, asdl_seq_GET(starreds, i - args_len)); + } - return _PyAST_Call(_PyPegen_dummy_name(p), args, keywords, lineno, - col_offset, end_lineno, end_col_offset, arena); + return _PyAST_Call(_PyPegen_dummy_name(p), args, keywords, lineno, col_offset, + end_lineno, end_col_offset, arena); } // AST Error reporting helpers -expr_ty -_PyPegen_get_invalid_target(expr_ty e, TARGETS_TYPE targets_type) -{ - if (e == NULL) { - return NULL; - } - -#define VISIT_CONTAINER(CONTAINER, TYPE) do { \ - Py_ssize_t len = asdl_seq_LEN((CONTAINER)->v.TYPE.elts);\ - for (Py_ssize_t i = 0; i < len; i++) {\ - expr_ty other = asdl_seq_GET((CONTAINER)->v.TYPE.elts, i);\ - expr_ty child = _PyPegen_get_invalid_target(other, targets_type);\ - if (child != NULL) {\ - return child;\ - }\ - }\ - } while (0) - - // We only need to visit List and Tuple nodes recursively as those - // are the only ones that can contain valid names in targets when - // they are parsed as expressions. Any other kind of expression - // that is a container (like Sets or Dicts) is directly invalid and - // we don't need to visit it recursively. - - switch (e->kind) { - case List_kind: - VISIT_CONTAINER(e, List); - return NULL; - case Tuple_kind: - VISIT_CONTAINER(e, Tuple); - return NULL; - case Starred_kind: - if (targets_type == DEL_TARGETS) { - return e; - } - return _PyPegen_get_invalid_target(e->v.Starred.value, targets_type); - case Compare_kind: - // This is needed, because the `a in b` in `for a in b` gets parsed - // as a comparison, and so we need to search the left side of the comparison - // for invalid targets. - if (targets_type == FOR_TARGETS) { - cmpop_ty cmpop = (cmpop_ty) asdl_seq_GET(e->v.Compare.ops, 0); - if (cmpop == In) { - return _PyPegen_get_invalid_target(e->v.Compare.left, targets_type); - } - return NULL; - } - return e; - case Name_kind: - case Subscript_kind: - case Attribute_kind: - return NULL; - default: - return e; - } +expr_ty _PyPegen_get_invalid_target(expr_ty e, TARGETS_TYPE targets_type) { + if (e == NULL) { + return NULL; + } + +#define VISIT_CONTAINER(CONTAINER, TYPE) \ + do { \ + Py_ssize_t len = asdl_seq_LEN((CONTAINER)->v.TYPE.elts); \ + for (Py_ssize_t i = 0; i < len; i++) { \ + expr_ty other = asdl_seq_GET((CONTAINER)->v.TYPE.elts, i); \ + expr_ty child = _PyPegen_get_invalid_target(other, targets_type); \ + if (child != NULL) { \ + return child; \ + } \ + } \ + } while (0) + + // We only need to visit List and Tuple nodes recursively as those + // are the only ones that can contain valid names in targets when + // they are parsed as expressions. Any other kind of expression + // that is a container (like Sets or Dicts) is directly invalid and + // we don't need to visit it recursively. + + switch (e->kind) { + case List_kind: + VISIT_CONTAINER(e, List); + return NULL; + case Tuple_kind: + VISIT_CONTAINER(e, Tuple); + return NULL; + case Starred_kind: + if (targets_type == DEL_TARGETS) { + return e; + } + return _PyPegen_get_invalid_target(e->v.Starred.value, targets_type); + case Compare_kind: + // This is needed, because the `a in b` in `for a in b` gets parsed + // as a comparison, and so we need to search the left side of the comparison + // for invalid targets. + if (targets_type == FOR_TARGETS) { + cmpop_ty cmpop = (cmpop_ty)asdl_seq_GET(e->v.Compare.ops, 0); + if (cmpop == In) { + return _PyPegen_get_invalid_target(e->v.Compare.left, targets_type); + } + return NULL; + } + return e; + case Name_kind: + case Subscript_kind: + case Attribute_kind: + return NULL; + default: + return e; + } } void *_PyPegen_arguments_parsing_error(Parser *p, expr_ty e) { - int kwarg_unpacking = 0; - for (Py_ssize_t i = 0, l = asdl_seq_LEN(e->v.Call.keywords); i < l; i++) { - keyword_ty keyword = asdl_seq_GET(e->v.Call.keywords, i); - if (!keyword->arg) { - kwarg_unpacking = 1; - } - } - - const char *msg = NULL; - if (kwarg_unpacking) { - msg = "positional argument follows keyword argument unpacking"; - } else { - msg = "positional argument follows keyword argument"; - } - - return RAISE_SYNTAX_ERROR(msg); -} - -void * -_PyPegen_nonparen_genexp_in_call(Parser *p, expr_ty args, asdl_comprehension_seq *comprehensions) -{ - /* The rule that calls this function is 'args for_if_clauses'. - For the input f(L, x for x in y), L and x are in args and - the for is parsed as a for_if_clause. We have to check if - len <= 1, so that input like dict((a, b) for a, b in x) - gets successfully parsed and then we pass the last - argument (x in the above example) as the location of the - error */ - Py_ssize_t len = asdl_seq_LEN(args->v.Call.args); - if (len <= 1) { - return NULL; - } - - comprehension_ty last_comprehension = PyPegen_last_item(comprehensions, comprehension_ty); - - return RAISE_SYNTAX_ERROR_KNOWN_RANGE( - (expr_ty) asdl_seq_GET(args->v.Call.args, len - 1), - _PyPegen_get_last_comprehension_item(last_comprehension), - "Generator expression must be parenthesized" - ); + int kwarg_unpacking = 0; + for (Py_ssize_t i = 0, l = asdl_seq_LEN(e->v.Call.keywords); i < l; i++) { + keyword_ty keyword = asdl_seq_GET(e->v.Call.keywords, i); + if (!keyword->arg) { + kwarg_unpacking = 1; + } + } + + const char *msg = NULL; + if (kwarg_unpacking) { + msg = "positional argument follows keyword argument unpacking"; + } else { + msg = "positional argument follows keyword argument"; + } + + return RAISE_SYNTAX_ERROR(msg); +} + +void *_PyPegen_nonparen_genexp_in_call(Parser *p, expr_ty args, + asdl_comprehension_seq *comprehensions) { + /* The rule that calls this function is 'args for_if_clauses'. + For the input f(L, x for x in y), L and x are in args and + the for is parsed as a for_if_clause. We have to check if + len <= 1, so that input like dict((a, b) for a, b in x) + gets successfully parsed and then we pass the last + argument (x in the above example) as the location of the + error */ + Py_ssize_t len = asdl_seq_LEN(args->v.Call.args); + if (len <= 1) { + return NULL; + } + + comprehension_ty last_comprehension = + PyPegen_last_item(comprehensions, comprehension_ty); + + return RAISE_SYNTAX_ERROR_KNOWN_RANGE( + (expr_ty)asdl_seq_GET(args->v.Call.args, len - 1), + _PyPegen_get_last_comprehension_item(last_comprehension), + "Generator expression must be parenthesized"); } // Fstring stuff -static expr_ty -_PyPegen_decode_fstring_part(Parser* p, int is_raw, expr_ty constant, Token* token) { - assert(PyUnicode_CheckExact(constant->v.Constant.value)); - - const char* bstr = PyUnicode_AsUTF8(constant->v.Constant.value); - if (bstr == NULL) { - return NULL; - } - - size_t len; - if (strcmp(bstr, "{{") == 0 || strcmp(bstr, "}}") == 0) { - len = 1; - } else { - len = strlen(bstr); - } - - is_raw = is_raw || strchr(bstr, '\\') == NULL; - PyObject *str = _PyPegen_decode_string(p, is_raw, bstr, len, token); - if (str == NULL) { - _Pypegen_raise_decode_error(p); - return NULL; - } - if (_PyArena_AddPyObject(p->arena, str) < 0) { - Py_DECREF(str); - return NULL; - } - return _PyAST_Constant(str, NULL, constant->lineno, constant->col_offset, - constant->end_lineno, constant->end_col_offset, - p->arena); +static expr_ty _PyPegen_decode_fstring_part(Parser *p, int is_raw, + expr_ty constant, Token *token) { + assert(PyUnicode_CheckExact(constant->v.Constant.value)); + + const char *bstr = PyUnicode_AsUTF8(constant->v.Constant.value); + if (bstr == NULL) { + return NULL; + } + + size_t len; + if (strcmp(bstr, "{{") == 0 || strcmp(bstr, "}}") == 0) { + len = 1; + } else { + len = strlen(bstr); + } + + is_raw = is_raw || strchr(bstr, '\\') == NULL; + PyObject *str = _PyPegen_decode_string(p, is_raw, bstr, len, token); + if (str == NULL) { + _Pypegen_raise_decode_error(p); + return NULL; + } + if (_PyArena_AddPyObject(p->arena, str) < 0) { + Py_DECREF(str); + return NULL; + } + return _PyAST_Constant(str, NULL, constant->lineno, constant->col_offset, + constant->end_lineno, constant->end_col_offset, + p->arena); } static asdl_expr_seq * -unpack_top_level_joined_strs(Parser *p, asdl_expr_seq *raw_expressions) -{ - /* The parser might put multiple f-string values into an individual - * JoinedStr node at the top level due to stuff like f-string debugging - * expressions. This function flattens those and promotes them to the - * upper level. Only simplifies AST, but the compiler already takes care - * of the regular output, so this is not necessary if you are not going - * to expose the output AST to Python level. */ - - Py_ssize_t i, req_size, raw_size; - - req_size = raw_size = asdl_seq_LEN(raw_expressions); - expr_ty expr; - for (i = 0; i < raw_size; i++) { - expr = asdl_seq_GET(raw_expressions, i); - if (expr->kind == JoinedStr_kind) { - req_size += asdl_seq_LEN(expr->v.JoinedStr.values) - 1; - } - } - - asdl_expr_seq *expressions = _Py_asdl_expr_seq_new(req_size, p->arena); - - Py_ssize_t raw_index, req_index = 0; - for (raw_index = 0; raw_index < raw_size; raw_index++) { - expr = asdl_seq_GET(raw_expressions, raw_index); - if (expr->kind == JoinedStr_kind) { - asdl_expr_seq *values = expr->v.JoinedStr.values; - for (Py_ssize_t n = 0; n < asdl_seq_LEN(values); n++) { - asdl_seq_SET(expressions, req_index, asdl_seq_GET(values, n)); - req_index++; - } - } else { - asdl_seq_SET(expressions, req_index, expr); - req_index++; - } - } - return expressions; -} - -expr_ty -_PyPegen_joined_str(Parser *p, Token* a, asdl_expr_seq* raw_expressions, Token*b) { - asdl_expr_seq *expr = unpack_top_level_joined_strs(p, raw_expressions); - Py_ssize_t n_items = asdl_seq_LEN(expr); - - const char* quote_str = PyBytes_AsString(a->bytes); - if (quote_str == NULL) { - return NULL; - } - int is_raw = strpbrk(quote_str, "rR") != NULL; - - asdl_expr_seq *seq = _Py_asdl_expr_seq_new(n_items, p->arena); - if (seq == NULL) { - return NULL; - } - - Py_ssize_t index = 0; - for (Py_ssize_t i = 0; i < n_items; i++) { - expr_ty item = asdl_seq_GET(expr, i); - if (item->kind == Constant_kind) { - item = _PyPegen_decode_fstring_part(p, is_raw, item, b); - if (item == NULL) { - return NULL; - } - - /* Tokenizer emits string parts even when the underlying string - might become an empty value (e.g. FSTRING_MIDDLE with the value \\n) - so we need to check for them and simplify it here. */ - if (PyUnicode_CheckExact(item->v.Constant.value) - && PyUnicode_GET_LENGTH(item->v.Constant.value) == 0) { - continue; - } - } - asdl_seq_SET(seq, index++, item); - } - - asdl_expr_seq *resized_exprs; - if (index != n_items) { - resized_exprs = _Py_asdl_expr_seq_new(index, p->arena); - if (resized_exprs == NULL) { - return NULL; - } - for (Py_ssize_t i = 0; i < index; i++) { - asdl_seq_SET(resized_exprs, i, asdl_seq_GET(seq, i)); - } - } - else { - resized_exprs = seq; - } - - return _PyAST_JoinedStr(resized_exprs, a->lineno, a->col_offset, - b->end_lineno, b->end_col_offset, - p->arena); -} - -expr_ty _PyPegen_decoded_constant_from_token(Parser* p, Token* tok) { - Py_ssize_t bsize; - char* bstr; - if (PyBytes_AsStringAndSize(tok->bytes, &bstr, &bsize) == -1) { - return NULL; - } - PyObject* str = _PyPegen_decode_string(p, 0, bstr, bsize, tok); - if (str == NULL) { - return NULL; - } - if (_PyArena_AddPyObject(p->arena, str) < 0) { - Py_DECREF(str); - return NULL; - } - return _PyAST_Constant(str, NULL, tok->lineno, tok->col_offset, - tok->end_lineno, tok->end_col_offset, - p->arena); -} - -expr_ty _PyPegen_constant_from_token(Parser* p, Token* tok) { - char* bstr = PyBytes_AsString(tok->bytes); - if (bstr == NULL) { - return NULL; - } - PyObject* str = PyUnicode_FromString(bstr); - if (str == NULL) { - return NULL; - } - if (_PyArena_AddPyObject(p->arena, str) < 0) { - Py_DECREF(str); - return NULL; - } - return _PyAST_Constant(str, NULL, tok->lineno, tok->col_offset, - tok->end_lineno, tok->end_col_offset, - p->arena); -} - -expr_ty _PyPegen_constant_from_string(Parser* p, Token* tok) { - char* the_str = PyBytes_AsString(tok->bytes); - if (the_str == NULL) { - return NULL; - } - PyObject *s = _PyPegen_parse_string(p, tok); - if (s == NULL) { - _Pypegen_raise_decode_error(p); - return NULL; - } - if (_PyArena_AddPyObject(p->arena, s) < 0) { - Py_DECREF(s); - return NULL; - } - PyObject *kind = NULL; - if (the_str && the_str[0] == 'u') { - kind = _PyPegen_new_identifier(p, "u"); - if (kind == NULL) { - return NULL; - } - } - return _PyAST_Constant(s, kind, tok->lineno, tok->col_offset, tok->end_lineno, tok->end_col_offset, p->arena); -} - -expr_ty _PyPegen_formatted_value(Parser *p, expr_ty expression, Token *debug, ResultTokenWithMetadata *conversion, - ResultTokenWithMetadata *format, Token *closing_brace, int lineno, int col_offset, - int end_lineno, int end_col_offset, PyArena *arena) { - int conversion_val = -1; - if (conversion != NULL) { - expr_ty conversion_expr = (expr_ty) conversion->result; - assert(conversion_expr->kind == Name_kind); - Py_UCS4 first = PyUnicode_READ_CHAR(conversion_expr->v.Name.id, 0); - - if (PyUnicode_GET_LENGTH(conversion_expr->v.Name.id) > 1 || - !(first == 's' || first == 'r' || first == 'a')) { - RAISE_SYNTAX_ERROR_KNOWN_LOCATION(conversion_expr, - "f-string: invalid conversion character %R: expected 's', 'r', or 'a'", - conversion_expr->v.Name.id); - return NULL; - } - - conversion_val = Py_SAFE_DOWNCAST(first, Py_UCS4, int); - } - else if (debug && !format) { - /* If no conversion is specified, use !r for debug expressions */ - conversion_val = (int)'r'; - } - - expr_ty formatted_value = _PyAST_FormattedValue( - expression, conversion_val, format ? (expr_ty) format->result : NULL, - lineno, col_offset, end_lineno, - end_col_offset, arena - ); - - if (debug) { - /* Find the non whitespace token after the "=" */ - int debug_end_line, debug_end_offset; - PyObject *debug_metadata; - - if (conversion) { - debug_end_line = ((expr_ty) conversion->result)->lineno; - debug_end_offset = ((expr_ty) conversion->result)->col_offset; - debug_metadata = conversion->metadata; - } - else if (format) { - debug_end_line = ((expr_ty) format->result)->lineno; - debug_end_offset = ((expr_ty) format->result)->col_offset + 1; - debug_metadata = format->metadata; - } - else { - debug_end_line = end_lineno; - debug_end_offset = end_col_offset; - debug_metadata = closing_brace->metadata; - } - - expr_ty debug_text = _PyAST_Constant(debug_metadata, NULL, lineno, col_offset + 1, debug_end_line, - debug_end_offset - 1, p->arena); - if (!debug_text) { - return NULL; - } - - asdl_expr_seq *values = _Py_asdl_expr_seq_new(2, arena); - asdl_seq_SET(values, 0, debug_text); - asdl_seq_SET(values, 1, formatted_value); - return _PyAST_JoinedStr(values, lineno, col_offset, debug_end_line, debug_end_offset, p->arena); - } - else { - return formatted_value; - } -} - -expr_ty -_PyPegen_concatenate_strings(Parser *p, asdl_expr_seq *strings, - int lineno, int col_offset, int end_lineno, - int end_col_offset, PyArena *arena) -{ - Py_ssize_t len = asdl_seq_LEN(strings); - assert(len > 0); - - int f_string_found = 0; - int unicode_string_found = 0; - int bytes_found = 0; - - Py_ssize_t i = 0; - Py_ssize_t n_flattened_elements = 0; +unpack_top_level_joined_strs(Parser *p, asdl_expr_seq *raw_expressions) { + /* The parser might put multiple f-string values into an individual + * JoinedStr node at the top level due to stuff like f-string debugging + * expressions. This function flattens those and promotes them to the + * upper level. Only simplifies AST, but the compiler already takes care + * of the regular output, so this is not necessary if you are not going + * to expose the output AST to Python level. */ + + Py_ssize_t i, req_size, raw_size; + + req_size = raw_size = asdl_seq_LEN(raw_expressions); + expr_ty expr; + for (i = 0; i < raw_size; i++) { + expr = asdl_seq_GET(raw_expressions, i); + if (expr->kind == JoinedStr_kind) { + req_size += asdl_seq_LEN(expr->v.JoinedStr.values) - 1; + } + } + + asdl_expr_seq *expressions = _Py_asdl_expr_seq_new(req_size, p->arena); + + Py_ssize_t raw_index, req_index = 0; + for (raw_index = 0; raw_index < raw_size; raw_index++) { + expr = asdl_seq_GET(raw_expressions, raw_index); + if (expr->kind == JoinedStr_kind) { + asdl_expr_seq *values = expr->v.JoinedStr.values; + for (Py_ssize_t n = 0; n < asdl_seq_LEN(values); n++) { + asdl_seq_SET(expressions, req_index, asdl_seq_GET(values, n)); + req_index++; + } + } else { + asdl_seq_SET(expressions, req_index, expr); + req_index++; + } + } + return expressions; +} + +expr_ty _PyPegen_joined_str(Parser *p, Token *a, asdl_expr_seq *raw_expressions, + Token *b) { + + asdl_expr_seq *expr = unpack_top_level_joined_strs(p, raw_expressions); + Py_ssize_t n_items = asdl_seq_LEN(expr); + + const char *quote_str = PyBytes_AsString(a->bytes); + if (quote_str == NULL) { + return NULL; + } + int is_raw = strpbrk(quote_str, "rR") != NULL; + + asdl_expr_seq *seq = _Py_asdl_expr_seq_new(n_items, p->arena); + if (seq == NULL) { + return NULL; + } + + Py_ssize_t index = 0; + for (Py_ssize_t i = 0; i < n_items; i++) { + expr_ty item = asdl_seq_GET(expr, i); + if (item->kind == Constant_kind) { + item = _PyPegen_decode_fstring_part(p, is_raw, item, b); + if (item == NULL) { + return NULL; + } + + /* Tokenizer emits string parts even when the underlying string + might become an empty value (e.g. FSTRING_MIDDLE with the value \\n) + so we need to check for them and simplify it here. */ + if (PyUnicode_CheckExact(item->v.Constant.value) && + PyUnicode_GET_LENGTH(item->v.Constant.value) == 0) { + continue; + } + } + asdl_seq_SET(seq, index++, item); + } + + asdl_expr_seq *resized_exprs; + if (index != n_items) { + resized_exprs = _Py_asdl_expr_seq_new(index, p->arena); + if (resized_exprs == NULL) { + return NULL; + } + for (Py_ssize_t i = 0; i < index; i++) { + asdl_seq_SET(resized_exprs, i, asdl_seq_GET(seq, i)); + } + } else { + resized_exprs = seq; + } + + return _PyAST_JoinedStr(resized_exprs, a->lineno, a->col_offset, + b->end_lineno, b->end_col_offset, p->arena); +} + +expr_ty _PyPegen_decoded_constant_from_token(Parser *p, Token *tok) { + Py_ssize_t bsize; + char *bstr; + if (PyBytes_AsStringAndSize(tok->bytes, &bstr, &bsize) == -1) { + return NULL; + } + PyObject *str = _PyPegen_decode_string(p, 0, bstr, bsize, tok); + if (str == NULL) { + return NULL; + } + if (_PyArena_AddPyObject(p->arena, str) < 0) { + Py_DECREF(str); + return NULL; + } + return _PyAST_Constant(str, NULL, tok->lineno, tok->col_offset, + tok->end_lineno, tok->end_col_offset, p->arena); +} + +expr_ty _PyPegen_constant_from_token(Parser *p, Token *tok) { + char *bstr = PyBytes_AsString(tok->bytes); + if (bstr == NULL) { + return NULL; + } + PyObject *str = PyUnicode_FromString(bstr); + if (str == NULL) { + return NULL; + } + if (_PyArena_AddPyObject(p->arena, str) < 0) { + Py_DECREF(str); + return NULL; + } + return _PyAST_Constant(str, NULL, tok->lineno, tok->col_offset, + tok->end_lineno, tok->end_col_offset, p->arena); +} + +expr_ty _PyPegen_constant_from_string(Parser *p, Token *tok) { + char *the_str = PyBytes_AsString(tok->bytes); + if (the_str == NULL) { + return NULL; + } + PyObject *s = _PyPegen_parse_string(p, tok); + if (s == NULL) { + _Pypegen_raise_decode_error(p); + return NULL; + } + if (_PyArena_AddPyObject(p->arena, s) < 0) { + Py_DECREF(s); + return NULL; + } + PyObject *kind = NULL; + if (the_str && the_str[0] == 'u') { + kind = _PyPegen_new_identifier(p, "u"); + if (kind == NULL) { + return NULL; + } + } + return _PyAST_Constant(s, kind, tok->lineno, tok->col_offset, tok->end_lineno, + tok->end_col_offset, p->arena); +} + +expr_ty _PyPegen_formatted_value(Parser *p, expr_ty expression, Token *debug, + ResultTokenWithMetadata *conversion, + ResultTokenWithMetadata *format, + Token *closing_brace, int lineno, + int col_offset, int end_lineno, + int end_col_offset, PyArena *arena) { + int conversion_val = -1; + if (conversion != NULL) { + expr_ty conversion_expr = (expr_ty)conversion->result; + assert(conversion_expr->kind == Name_kind); + Py_UCS4 first = PyUnicode_READ_CHAR(conversion_expr->v.Name.id, 0); + + if (PyUnicode_GET_LENGTH(conversion_expr->v.Name.id) > 1 || + !(first == 's' || first == 'r' || first == 'a')) { + RAISE_SYNTAX_ERROR_KNOWN_LOCATION( + conversion_expr, + "f-string: invalid conversion character %R: expected 's', 'r', or " + "'a'", + conversion_expr->v.Name.id); + return NULL; + } + + conversion_val = Py_SAFE_DOWNCAST(first, Py_UCS4, int); + } else if (debug && !format) { + /* If no conversion is specified, use !r for debug expressions */ + conversion_val = (int)'r'; + } + + expr_ty formatted_value = _PyAST_FormattedValue( + expression, conversion_val, format ? (expr_ty)format->result : NULL, + lineno, col_offset, end_lineno, end_col_offset, arena); + + if (debug) { + /* Find the non whitespace token after the "=" */ + int debug_end_line, debug_end_offset; + PyObject *debug_metadata; + + if (conversion) { + debug_end_line = ((expr_ty)conversion->result)->lineno; + debug_end_offset = ((expr_ty)conversion->result)->col_offset; + debug_metadata = conversion->metadata; + } else if (format) { + debug_end_line = ((expr_ty)format->result)->lineno; + debug_end_offset = ((expr_ty)format->result)->col_offset + 1; + debug_metadata = format->metadata; + } else { + debug_end_line = end_lineno; + debug_end_offset = end_col_offset; + debug_metadata = closing_brace->metadata; + } + expr_ty debug_text = + _PyAST_Constant(debug_metadata, NULL, lineno, col_offset + 1, + debug_end_line, debug_end_offset - 1, p->arena); + if (!debug_text) { + return NULL; + } + + asdl_expr_seq *values = _Py_asdl_expr_seq_new(2, arena); + asdl_seq_SET(values, 0, debug_text); + asdl_seq_SET(values, 1, formatted_value); + return _PyAST_JoinedStr(values, lineno, col_offset, debug_end_line, + debug_end_offset, p->arena); + } else { + return formatted_value; + } +} + +expr_ty _PyPegen_concatenate_strings(Parser *p, asdl_expr_seq *strings, + int lineno, int col_offset, int end_lineno, + int end_col_offset, PyArena *arena) { + Py_ssize_t len = asdl_seq_LEN(strings); + assert(len > 0); + + int f_string_found = 0; + int unicode_string_found = 0; + int bytes_found = 0; + + Py_ssize_t i = 0; + Py_ssize_t n_flattened_elements = 0; + for (i = 0; i < len; i++) { + expr_ty elem = asdl_seq_GET(strings, i); + switch (elem->kind) { + case Constant_kind: + if (PyBytes_CheckExact(elem->v.Constant.value)) { + bytes_found = 1; + } else { + unicode_string_found = 1; + } + n_flattened_elements++; + break; + case JoinedStr_kind: + n_flattened_elements += asdl_seq_LEN(elem->v.JoinedStr.values); + f_string_found = 1; + break; + default: + n_flattened_elements++; + f_string_found = 1; + break; + } + } + + if ((unicode_string_found || f_string_found) && bytes_found) { + RAISE_SYNTAX_ERROR("cannot mix bytes and nonbytes literals"); + return NULL; + } + + if (bytes_found) { + PyObject *res = PyBytes_FromString(""); + + /* Bytes literals never get a kind, but just for consistency + since they are represented as Constant nodes, we'll mirror + the same behavior as unicode strings for determining the + kind. */ + PyObject *kind = asdl_seq_GET(strings, 0)->v.Constant.kind; for (i = 0; i < len; i++) { - expr_ty elem = asdl_seq_GET(strings, i); - if (elem->kind == Constant_kind) { - if (PyBytes_CheckExact(elem->v.Constant.value)) { - bytes_found = 1; - } else { - unicode_string_found = 1; + expr_ty elem = asdl_seq_GET(strings, i); + PyBytes_Concat(&res, elem->v.Constant.value); + } + if (!res || _PyArena_AddPyObject(arena, res) < 0) { + Py_XDECREF(res); + return NULL; + } + return _PyAST_Constant(res, kind, lineno, col_offset, end_lineno, + end_col_offset, p->arena); + } + + if (!f_string_found && len == 1) { + return asdl_seq_GET(strings, 0); + } + + asdl_expr_seq *flattened = + _Py_asdl_expr_seq_new(n_flattened_elements, p->arena); + if (flattened == NULL) { + return NULL; + } + + /* build flattened list */ + Py_ssize_t current_pos = 0; + Py_ssize_t j = 0; + for (i = 0; i < len; i++) { + expr_ty elem = asdl_seq_GET(strings, i); + switch (elem->kind) { + case JoinedStr_kind: + for (j = 0; j < asdl_seq_LEN(elem->v.JoinedStr.values); j++) { + expr_ty subvalue = asdl_seq_GET(elem->v.JoinedStr.values, j); + if (subvalue == NULL) { + return NULL; + } + asdl_seq_SET(flattened, current_pos++, subvalue); + } + break; + default: + asdl_seq_SET(flattened, current_pos++, elem); + break; + } + } + + /* calculate folded element count */ + Py_ssize_t n_elements = 0; + int prev_is_constant = 0; + for (i = 0; i < n_flattened_elements; i++) { + expr_ty elem = asdl_seq_GET(flattened, i); + + /* The concatenation of a FormattedValue and an empty Contant should + lead to the FormattedValue itself. Thus, we will not take any empty + constants into account, just as in `_PyPegen_joined_str` */ + if (f_string_found && elem->kind == Constant_kind && + PyUnicode_CheckExact(elem->v.Constant.value) && + PyUnicode_GET_LENGTH(elem->v.Constant.value) == 0) + continue; + + if (!prev_is_constant || elem->kind != Constant_kind) { + n_elements++; + } + prev_is_constant = elem->kind == Constant_kind; + } + + asdl_expr_seq *values = _Py_asdl_expr_seq_new(n_elements, p->arena); + if (values == NULL) { + return NULL; + } + + /* build folded list */ + _PyUnicodeWriter writer; + current_pos = 0; + for (i = 0; i < n_flattened_elements; i++) { + expr_ty elem = asdl_seq_GET(flattened, i); + + /* if the current elem and the following are constants, + fold them and all consequent constants */ + if (elem->kind == Constant_kind) { + if (i + 1 < n_flattened_elements && + asdl_seq_GET(flattened, i + 1)->kind == Constant_kind) { + expr_ty first_elem = elem; + + /* When a string is getting concatenated, the kind of the string + is determined by the first string in the concatenation + sequence. + + u"abc" "def" -> u"abcdef" + "abc" u"abc" -> "abcabc" */ + PyObject *kind = elem->v.Constant.kind; + + _PyUnicodeWriter_Init(&writer); + expr_ty last_elem = elem; + for (j = i; j < n_flattened_elements; j++) { + expr_ty current_elem = asdl_seq_GET(flattened, j); + if (current_elem->kind == Constant_kind) { + if (_PyUnicodeWriter_WriteStr(&writer, + current_elem->v.Constant.value)) { + _PyUnicodeWriter_Dealloc(&writer); + return NULL; } - n_flattened_elements++; - } else { - n_flattened_elements += asdl_seq_LEN(elem->v.JoinedStr.values); - f_string_found = 1; + last_elem = current_elem; + } else { + break; + } } - } - - if ((unicode_string_found || f_string_found) && bytes_found) { - RAISE_SYNTAX_ERROR("cannot mix bytes and nonbytes literals"); - return NULL; - } + i = j - 1; - if (bytes_found) { - PyObject* res = PyBytes_FromString(""); - - /* Bytes literals never get a kind, but just for consistency - since they are represented as Constant nodes, we'll mirror - the same behavior as unicode strings for determining the - kind. */ - PyObject* kind = asdl_seq_GET(strings, 0)->v.Constant.kind; - for (i = 0; i < len; i++) { - expr_ty elem = asdl_seq_GET(strings, i); - PyBytes_Concat(&res, elem->v.Constant.value); + PyObject *concat_str = _PyUnicodeWriter_Finish(&writer); + if (concat_str == NULL) { + _PyUnicodeWriter_Dealloc(&writer); + return NULL; } - if (!res || _PyArena_AddPyObject(arena, res) < 0) { - Py_XDECREF(res); - return NULL; + if (_PyArena_AddPyObject(p->arena, concat_str) < 0) { + Py_DECREF(concat_str); + return NULL; } - return _PyAST_Constant(res, kind, lineno, col_offset, end_lineno, end_col_offset, p->arena); - } - - if (!f_string_found && len == 1) { - return asdl_seq_GET(strings, 0); - } - - asdl_expr_seq* flattened = _Py_asdl_expr_seq_new(n_flattened_elements, p->arena); - if (flattened == NULL) { - return NULL; - } - - /* build flattened list */ - Py_ssize_t current_pos = 0; - Py_ssize_t j = 0; - for (i = 0; i < len; i++) { - expr_ty elem = asdl_seq_GET(strings, i); - if (elem->kind == Constant_kind) { - asdl_seq_SET(flattened, current_pos++, elem); - } else { - for (j = 0; j < asdl_seq_LEN(elem->v.JoinedStr.values); j++) { - expr_ty subvalue = asdl_seq_GET(elem->v.JoinedStr.values, j); - if (subvalue == NULL) { - return NULL; - } - asdl_seq_SET(flattened, current_pos++, subvalue); - } + elem = _PyAST_Constant(concat_str, kind, first_elem->lineno, + first_elem->col_offset, last_elem->end_lineno, + last_elem->end_col_offset, p->arena); + if (elem == NULL) { + return NULL; } - } - - /* calculate folded element count */ - Py_ssize_t n_elements = 0; - int prev_is_constant = 0; - for (i = 0; i < n_flattened_elements; i++) { - expr_ty elem = asdl_seq_GET(flattened, i); - - /* The concatenation of a FormattedValue and an empty Contant should - lead to the FormattedValue itself. Thus, we will not take any empty - constants into account, just as in `_PyPegen_joined_str` */ - if (f_string_found && elem->kind == Constant_kind && - PyUnicode_CheckExact(elem->v.Constant.value) && - PyUnicode_GET_LENGTH(elem->v.Constant.value) == 0) - continue; - - if (!prev_is_constant || elem->kind != Constant_kind) { - n_elements++; - } - prev_is_constant = elem->kind == Constant_kind; - } + } - asdl_expr_seq* values = _Py_asdl_expr_seq_new(n_elements, p->arena); - if (values == NULL) { - return NULL; + /* Drop all empty contanst strings */ + if (f_string_found && PyUnicode_CheckExact(elem->v.Constant.value) && + PyUnicode_GET_LENGTH(elem->v.Constant.value) == 0) { + continue; + } } - /* build folded list */ - _PyUnicodeWriter writer; - current_pos = 0; - for (i = 0; i < n_flattened_elements; i++) { - expr_ty elem = asdl_seq_GET(flattened, i); - - /* if the current elem and the following are constants, - fold them and all consequent constants */ - if (elem->kind == Constant_kind) { - if (i + 1 < n_flattened_elements && - asdl_seq_GET(flattened, i + 1)->kind == Constant_kind) { - expr_ty first_elem = elem; - - /* When a string is getting concatenated, the kind of the string - is determined by the first string in the concatenation - sequence. - - u"abc" "def" -> u"abcdef" - "abc" u"abc" -> "abcabc" */ - PyObject *kind = elem->v.Constant.kind; - - _PyUnicodeWriter_Init(&writer); - expr_ty last_elem = elem; - for (j = i; j < n_flattened_elements; j++) { - expr_ty current_elem = asdl_seq_GET(flattened, j); - if (current_elem->kind == Constant_kind) { - if (_PyUnicodeWriter_WriteStr( - &writer, current_elem->v.Constant.value)) { - _PyUnicodeWriter_Dealloc(&writer); - return NULL; - } - last_elem = current_elem; - } else { - break; - } - } - i = j - 1; - - PyObject *concat_str = _PyUnicodeWriter_Finish(&writer); - if (concat_str == NULL) { - _PyUnicodeWriter_Dealloc(&writer); - return NULL; - } - if (_PyArena_AddPyObject(p->arena, concat_str) < 0) { - Py_DECREF(concat_str); - return NULL; - } - elem = _PyAST_Constant(concat_str, kind, first_elem->lineno, - first_elem->col_offset, - last_elem->end_lineno, - last_elem->end_col_offset, p->arena); - if (elem == NULL) { - return NULL; - } - } - - /* Drop all empty contanst strings */ - if (f_string_found && - PyUnicode_CheckExact(elem->v.Constant.value) && - PyUnicode_GET_LENGTH(elem->v.Constant.value) == 0) { - continue; - } - } - - asdl_seq_SET(values, current_pos++, elem); - } + asdl_seq_SET(values, current_pos++, elem); + } - if (!f_string_found) { - assert(n_elements == 1); - expr_ty elem = asdl_seq_GET(values, 0); - assert(elem->kind == Constant_kind); - return elem; - } + if (!f_string_found) { + assert(n_elements == 1); + expr_ty elem = asdl_seq_GET(values, 0); + assert(elem->kind == Constant_kind); + return elem; + } - assert(current_pos == n_elements); - return _PyAST_JoinedStr(values, lineno, col_offset, end_lineno, end_col_offset, p->arena); + assert(current_pos == n_elements); + return _PyAST_JoinedStr(values, lineno, col_offset, end_lineno, + end_col_offset, p->arena); } diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index 04ba04428fef62..703b3ec7a6b661 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -3,53 +3,48 @@ #define PY_SSIZE_T_CLEAN #include "Python.h" -#include "pycore_call.h" // _PyObject_CallNoArgs() +#include "pycore_call.h" // _PyObject_CallNoArgs() -#include #include +#include -#include "tokenizer.h" #include "errcode.h" +#include "tokenizer.h" /* Alternate tab spacing */ #define ALTTABSIZE 1 -#define is_potential_identifier_start(c) (\ - (c >= 'a' && c <= 'z')\ - || (c >= 'A' && c <= 'Z')\ - || c == '_'\ - || (c >= 128)) - -#define is_potential_identifier_char(c) (\ - (c >= 'a' && c <= 'z')\ - || (c >= 'A' && c <= 'Z')\ - || (c >= '0' && c <= '9')\ - || c == '_'\ - || (c >= 128)) +#define is_potential_identifier_start(c) \ + ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || (c >= 128)) +#define is_potential_identifier_char(c) \ + ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || \ + (c >= '0' && c <= '9') || c == '_' || (c >= 128)) /* Don't ever change this -- it would break the portability of Python code */ #define TABSIZE 8 -#define MAKE_TOKEN(token_type) token_setup(tok, token, token_type, p_start, p_end) -#define MAKE_TYPE_COMMENT_TOKEN(token_type, col_offset, end_col_offset) (\ - type_comment_token_setup(tok, token, token_type, col_offset, end_col_offset, p_start, p_end)) -#define ADVANCE_LINENO() \ - tok->lineno++; \ - tok->col_offset = 0; +#define MAKE_TOKEN(token_type) \ + token_setup(tok, token, token_type, p_start, p_end) +#define MAKE_TYPE_COMMENT_TOKEN(token_type, col_offset, end_col_offset) \ + (type_comment_token_setup(tok, token, token_type, col_offset, \ + end_col_offset, p_start, p_end)) +#define ADVANCE_LINENO() \ + tok->lineno++; \ + tok->col_offset = 0; #define INSIDE_FSTRING(tok) (tok->tok_mode_stack_index > 0) #define INSIDE_FSTRING_EXPR(tok) (tok->curly_bracket_expr_start_depth >= 0) #ifdef Py_DEBUG -static inline tokenizer_mode* TOK_GET_MODE(struct tok_state* tok) { - assert(tok->tok_mode_stack_index >= 0); - assert(tok->tok_mode_stack_index < MAXFSTRINGLEVEL); - return &(tok->tok_mode_stack[tok->tok_mode_stack_index]); +static inline tokenizer_mode *TOK_GET_MODE(struct tok_state *tok) { + assert(tok->tok_mode_stack_index >= 0); + assert(tok->tok_mode_stack_index < MAXFSTRINGLEVEL); + return &(tok->tok_mode_stack[tok->tok_mode_stack_index]); } -static inline tokenizer_mode* TOK_NEXT_MODE(struct tok_state* tok) { - assert(tok->tok_mode_stack_index >= 0); - assert(tok->tok_mode_stack_index + 1 < MAXFSTRINGLEVEL); - return &(tok->tok_mode_stack[++tok->tok_mode_stack_index]); +static inline tokenizer_mode *TOK_NEXT_MODE(struct tok_state *tok) { + assert(tok->tok_mode_stack_index >= 0); + assert(tok->tok_mode_stack_index + 1 < MAXFSTRINGLEVEL); + return &(tok->tok_mode_stack[++tok->tok_mode_stack_index]); } #else #define TOK_GET_MODE(tok) (&(tok->tok_mode_stack[tok->tok_mode_stack_index])) @@ -64,171 +59,164 @@ static int syntaxerror(struct tok_state *tok, const char *format, ...); /* Spaces in this constant are treated as "zero or more spaces or tabs" when tokenizing. */ -static const char* type_comment_prefix = "# type: "; +static const char *type_comment_prefix = "# type: "; /* Create and initialize a new tok_state structure */ -static struct tok_state * -tok_new(void) -{ - struct tok_state *tok = (struct tok_state *)PyMem_Malloc( - sizeof(struct tok_state)); - if (tok == NULL) - return NULL; - tok->buf = tok->cur = tok->inp = NULL; - tok->fp_interactive = 0; - tok->interactive_src_start = NULL; - tok->interactive_src_end = NULL; - tok->start = NULL; - tok->end = NULL; - tok->done = E_OK; - tok->fp = NULL; - tok->input = NULL; - tok->tabsize = TABSIZE; - tok->indent = 0; - tok->indstack[0] = 0; - tok->atbol = 1; - tok->pendin = 0; - tok->prompt = tok->nextprompt = NULL; - tok->lineno = 0; - tok->starting_col_offset = -1; - tok->col_offset = -1; - tok->level = 0; - tok->altindstack[0] = 0; - tok->decoding_state = STATE_INIT; - tok->decoding_erred = 0; - tok->enc = NULL; - tok->encoding = NULL; - tok->cont_line = 0; - tok->filename = NULL; - tok->decoding_readline = NULL; - tok->decoding_buffer = NULL; - tok->readline = NULL; - tok->type_comments = 0; - tok->async_hacks = 0; - tok->async_def = 0; - tok->async_def_indent = 0; - tok->async_def_nl = 0; - tok->interactive_underflow = IUNDERFLOW_NORMAL; - tok->str = NULL; - tok->report_warnings = 1; - tok->tok_extra_tokens = 0; - tok->comment_newline = 0; - tok->implicit_newline = 0; - tok->tok_mode_stack[0] = (tokenizer_mode){.kind =TOK_REGULAR_MODE, .f_string_quote='\0', .f_string_quote_size = 0, .f_string_debug=0}; - tok->tok_mode_stack_index = 0; +static struct tok_state *tok_new(void) { + struct tok_state *tok = + (struct tok_state *)PyMem_Malloc(sizeof(struct tok_state)); + if (tok == NULL) + return NULL; + tok->buf = tok->cur = tok->inp = NULL; + tok->fp_interactive = 0; + tok->interactive_src_start = NULL; + tok->interactive_src_end = NULL; + tok->start = NULL; + tok->end = NULL; + tok->done = E_OK; + tok->fp = NULL; + tok->input = NULL; + tok->tabsize = TABSIZE; + tok->indent = 0; + tok->indstack[0] = 0; + tok->atbol = 1; + tok->pendin = 0; + tok->prompt = tok->nextprompt = NULL; + tok->lineno = 0; + tok->starting_col_offset = -1; + tok->col_offset = -1; + tok->level = 0; + tok->altindstack[0] = 0; + tok->decoding_state = STATE_INIT; + tok->decoding_erred = 0; + tok->enc = NULL; + tok->encoding = NULL; + tok->cont_line = 0; + tok->filename = NULL; + tok->decoding_readline = NULL; + tok->decoding_buffer = NULL; + tok->readline = NULL; + tok->type_comments = 0; + tok->async_hacks = 0; + tok->async_def = 0; + tok->async_def_indent = 0; + tok->async_def_nl = 0; + tok->interactive_underflow = IUNDERFLOW_NORMAL; + tok->str = NULL; + tok->report_warnings = 1; + tok->tok_extra_tokens = 0; + tok->comment_newline = 0; + tok->implicit_newline = 0; + tok->tok_mode_stack[0] = (tokenizer_mode){.kind = TOK_REGULAR_MODE, + .f_string_quote = '\0', + .f_string_quote_size = 0, + .f_string_debug = 0}; + tok->tok_mode_stack_index = 0; #ifdef Py_DEBUG - tok->debug = _Py_GetConfig()->parser_debug; + tok->debug = _Py_GetConfig()->parser_debug; #endif - return tok; + return tok; } -static char * -new_string(const char *s, Py_ssize_t len, struct tok_state *tok) -{ - char* result = (char *)PyMem_Malloc(len + 1); - if (!result) { - tok->done = E_NOMEM; - return NULL; - } - memcpy(result, s, len); - result[len] = '\0'; - return result; +static char *new_string(const char *s, Py_ssize_t len, struct tok_state *tok) { + char *result = (char *)PyMem_Malloc(len + 1); + if (!result) { + tok->done = E_NOMEM; + return NULL; + } + memcpy(result, s, len); + result[len] = '\0'; + return result; } -static char * -error_ret(struct tok_state *tok) /* XXX */ +static char *error_ret(struct tok_state *tok) /* XXX */ { - tok->decoding_erred = 1; - if ((tok->fp != NULL || tok->readline != NULL) && tok->buf != NULL) {/* see _PyTokenizer_Free */ - PyMem_Free(tok->buf); - } - tok->buf = tok->cur = tok->inp = NULL; - tok->start = NULL; - tok->end = NULL; - tok->done = E_DECODE; - return NULL; /* as if it were EOF */ -} - - -static const char * -get_normal_name(const char *s) /* for utf-8 and latin-1 */ + tok->decoding_erred = 1; + if ((tok->fp != NULL || tok->readline != NULL) && + tok->buf != NULL) { /* see _PyTokenizer_Free */ + PyMem_Free(tok->buf); + } + tok->buf = tok->cur = tok->inp = NULL; + tok->start = NULL; + tok->end = NULL; + tok->done = E_DECODE; + return NULL; /* as if it were EOF */ +} + +static const char *get_normal_name(const char *s) /* for utf-8 and latin-1 */ { - char buf[13]; - int i; - for (i = 0; i < 12; i++) { - int c = s[i]; - if (c == '\0') - break; - else if (c == '_') - buf[i] = '-'; - else - buf[i] = tolower(c); - } - buf[i] = '\0'; - if (strcmp(buf, "utf-8") == 0 || - strncmp(buf, "utf-8-", 6) == 0) - return "utf-8"; - else if (strcmp(buf, "latin-1") == 0 || - strcmp(buf, "iso-8859-1") == 0 || - strcmp(buf, "iso-latin-1") == 0 || - strncmp(buf, "latin-1-", 8) == 0 || - strncmp(buf, "iso-8859-1-", 11) == 0 || - strncmp(buf, "iso-latin-1-", 12) == 0) - return "iso-8859-1"; + char buf[13]; + int i; + for (i = 0; i < 12; i++) { + int c = s[i]; + if (c == '\0') + break; + else if (c == '_') + buf[i] = '-'; else - return s; + buf[i] = tolower(c); + } + buf[i] = '\0'; + if (strcmp(buf, "utf-8") == 0 || strncmp(buf, "utf-8-", 6) == 0) + return "utf-8"; + else if (strcmp(buf, "latin-1") == 0 || strcmp(buf, "iso-8859-1") == 0 || + strcmp(buf, "iso-latin-1") == 0 || + strncmp(buf, "latin-1-", 8) == 0 || + strncmp(buf, "iso-8859-1-", 11) == 0 || + strncmp(buf, "iso-latin-1-", 12) == 0) + return "iso-8859-1"; + else + return s; } /* Return the coding spec in S, or NULL if none is found. */ -static int -get_coding_spec(const char *s, char **spec, Py_ssize_t size, struct tok_state *tok) -{ - Py_ssize_t i; - *spec = NULL; - /* Coding spec must be in a comment, and that comment must be - * the only statement on the source code line. */ - for (i = 0; i < size - 6; i++) { - if (s[i] == '#') - break; - if (s[i] != ' ' && s[i] != '\t' && s[i] != '\014') - return 1; - } - for (; i < size - 6; i++) { /* XXX inefficient search */ - const char* t = s + i; - if (memcmp(t, "coding", 6) == 0) { - const char* begin = NULL; - t += 6; - if (t[0] != ':' && t[0] != '=') - continue; - do { - t++; - } while (t[0] == ' ' || t[0] == '\t'); - - begin = t; - while (Py_ISALNUM(t[0]) || - t[0] == '-' || t[0] == '_' || t[0] == '.') - t++; - - if (begin < t) { - char* r = new_string(begin, t - begin, tok); - const char* q; - if (!r) - return 0; - q = get_normal_name(r); - if (r != q) { - PyMem_Free(r); - r = new_string(q, strlen(q), tok); - if (!r) - return 0; - } - *spec = r; - break; - } +static int get_coding_spec(const char *s, char **spec, Py_ssize_t size, + struct tok_state *tok) { + Py_ssize_t i; + *spec = NULL; + /* Coding spec must be in a comment, and that comment must be + * the only statement on the source code line. */ + for (i = 0; i < size - 6; i++) { + if (s[i] == '#') + break; + if (s[i] != ' ' && s[i] != '\t' && s[i] != '\014') + return 1; + } + for (; i < size - 6; i++) { /* XXX inefficient search */ + const char *t = s + i; + if (memcmp(t, "coding", 6) == 0) { + const char *begin = NULL; + t += 6; + if (t[0] != ':' && t[0] != '=') + continue; + do { + t++; + } while (t[0] == ' ' || t[0] == '\t'); + + begin = t; + while (Py_ISALNUM(t[0]) || t[0] == '-' || t[0] == '_' || t[0] == '.') + t++; + + if (begin < t) { + char *r = new_string(begin, t - begin, tok); + const char *q; + if (!r) + return 0; + q = get_normal_name(r); + if (r != q) { + PyMem_Free(r); + r = new_string(q, strlen(q), tok); + if (!r) + return 0; } + *spec = r; + break; + } } - return 1; + } + return 1; } /* Check whether the line contains a coding spec. If it does, @@ -237,299 +225,288 @@ get_coding_spec(const char *s, char **spec, Py_ssize_t size, struct tok_state *t Return 1 on success, 0 on failure. */ static int -check_coding_spec(const char* line, Py_ssize_t size, struct tok_state *tok, - int set_readline(struct tok_state *, const char *)) -{ - char *cs; - if (tok->cont_line) { - /* It's a continuation line, so it can't be a coding spec. */ - tok->decoding_state = STATE_NORMAL; - return 1; - } - if (!get_coding_spec(line, &cs, size, tok)) { - return 0; - } - if (!cs) { - Py_ssize_t i; - for (i = 0; i < size; i++) { - if (line[i] == '#' || line[i] == '\n' || line[i] == '\r') - break; - if (line[i] != ' ' && line[i] != '\t' && line[i] != '\014') { - /* Stop checking coding spec after a line containing - * anything except a comment. */ - tok->decoding_state = STATE_NORMAL; - break; - } - } - return 1; - } +check_coding_spec(const char *line, Py_ssize_t size, struct tok_state *tok, + int set_readline(struct tok_state *, const char *)) { + char *cs; + if (tok->cont_line) { + /* It's a continuation line, so it can't be a coding spec. */ tok->decoding_state = STATE_NORMAL; - if (tok->encoding == NULL) { - assert(tok->decoding_readline == NULL); - if (strcmp(cs, "utf-8") != 0 && !set_readline(tok, cs)) { - error_ret(tok); - PyErr_Format(PyExc_SyntaxError, "encoding problem: %s", cs); - PyMem_Free(cs); - return 0; - } - tok->encoding = cs; - } else { /* then, compare cs with BOM */ - if (strcmp(tok->encoding, cs) != 0) { - error_ret(tok); - PyErr_Format(PyExc_SyntaxError, - "encoding problem: %s with BOM", cs); - PyMem_Free(cs); - return 0; - } - PyMem_Free(cs); + return 1; + } + if (!get_coding_spec(line, &cs, size, tok)) { + return 0; + } + if (!cs) { + Py_ssize_t i; + for (i = 0; i < size; i++) { + if (line[i] == '#' || line[i] == '\n' || line[i] == '\r') + break; + if (line[i] != ' ' && line[i] != '\t' && line[i] != '\014') { + /* Stop checking coding spec after a line containing + * anything except a comment. */ + tok->decoding_state = STATE_NORMAL; + break; + } } return 1; + } + tok->decoding_state = STATE_NORMAL; + if (tok->encoding == NULL) { + assert(tok->decoding_readline == NULL); + if (strcmp(cs, "utf-8") != 0 && !set_readline(tok, cs)) { + error_ret(tok); + PyErr_Format(PyExc_SyntaxError, "encoding problem: %s", cs); + PyMem_Free(cs); + return 0; + } + tok->encoding = cs; + } else { /* then, compare cs with BOM */ + if (strcmp(tok->encoding, cs) != 0) { + error_ret(tok); + PyErr_Format(PyExc_SyntaxError, "encoding problem: %s with BOM", cs); + PyMem_Free(cs); + return 0; + } + PyMem_Free(cs); + } + return 1; } /* See whether the file starts with a BOM. If it does, invoke the set_readline function with the new encoding. Return 1 on success, 0 on failure. */ -static int -check_bom(int get_char(struct tok_state *), - void unget_char(int, struct tok_state *), - int set_readline(struct tok_state *, const char *), - struct tok_state *tok) -{ - int ch1, ch2, ch3; - ch1 = get_char(tok); - tok->decoding_state = STATE_SEEK_CODING; - if (ch1 == EOF) { - return 1; - } else if (ch1 == 0xEF) { - ch2 = get_char(tok); - if (ch2 != 0xBB) { - unget_char(ch2, tok); - unget_char(ch1, tok); - return 1; - } - ch3 = get_char(tok); - if (ch3 != 0xBF) { - unget_char(ch3, tok); - unget_char(ch2, tok); - unget_char(ch1, tok); - return 1; - } - } else { - unget_char(ch1, tok); - return 1; - } - if (tok->encoding != NULL) - PyMem_Free(tok->encoding); - tok->encoding = new_string("utf-8", 5, tok); - if (!tok->encoding) - return 0; - /* No need to set_readline: input is already utf-8 */ +static int check_bom(int get_char(struct tok_state *), + void unget_char(int, struct tok_state *), + int set_readline(struct tok_state *, const char *), + struct tok_state *tok) { + int ch1, ch2, ch3; + ch1 = get_char(tok); + tok->decoding_state = STATE_SEEK_CODING; + if (ch1 == EOF) { + return 1; + } else if (ch1 == 0xEF) { + ch2 = get_char(tok); + if (ch2 != 0xBB) { + unget_char(ch2, tok); + unget_char(ch1, tok); + return 1; + } + ch3 = get_char(tok); + if (ch3 != 0xBF) { + unget_char(ch3, tok); + unget_char(ch2, tok); + unget_char(ch1, tok); + return 1; + } + } else { + unget_char(ch1, tok); return 1; + } + if (tok->encoding != NULL) + PyMem_Free(tok->encoding); + tok->encoding = new_string("utf-8", 5, tok); + if (!tok->encoding) + return 0; + /* No need to set_readline: input is already utf-8 */ + return 1; } -static int -tok_concatenate_interactive_new_line(struct tok_state *tok, const char *line) { - assert(tok->fp_interactive); - - if (!line) { - return 0; - } - - Py_ssize_t current_size = tok->interactive_src_end - tok->interactive_src_start; - Py_ssize_t line_size = strlen(line); - char last_char = line[line_size > 0 ? line_size - 1 : line_size]; - if (last_char != '\n') { - line_size += 1; - } - char* new_str = tok->interactive_src_start; +static int tok_concatenate_interactive_new_line(struct tok_state *tok, + const char *line) { + assert(tok->fp_interactive); - new_str = PyMem_Realloc(new_str, current_size + line_size + 1); - if (!new_str) { - if (tok->interactive_src_start) { - PyMem_Free(tok->interactive_src_start); - } - tok->interactive_src_start = NULL; - tok->interactive_src_end = NULL; - tok->done = E_NOMEM; - return -1; - } - strcpy(new_str + current_size, line); - tok->implicit_newline = 0; - if (last_char != '\n') { - /* Last line does not end in \n, fake one */ - new_str[current_size + line_size - 1] = '\n'; - new_str[current_size + line_size] = '\0'; - tok->implicit_newline = 1; - } - tok->interactive_src_start = new_str; - tok->interactive_src_end = new_str + current_size + line_size; + if (!line) { return 0; + } + + Py_ssize_t current_size = + tok->interactive_src_end - tok->interactive_src_start; + Py_ssize_t line_size = strlen(line); + char last_char = line[line_size > 0 ? line_size - 1 : line_size]; + if (last_char != '\n') { + line_size += 1; + } + char *new_str = tok->interactive_src_start; + + new_str = PyMem_Realloc(new_str, current_size + line_size + 1); + if (!new_str) { + if (tok->interactive_src_start) { + PyMem_Free(tok->interactive_src_start); + } + tok->interactive_src_start = NULL; + tok->interactive_src_end = NULL; + tok->done = E_NOMEM; + return -1; + } + strcpy(new_str + current_size, line); + tok->implicit_newline = 0; + if (last_char != '\n') { + /* Last line does not end in \n, fake one */ + new_str[current_size + line_size - 1] = '\n'; + new_str[current_size + line_size] = '\0'; + tok->implicit_newline = 1; + } + tok->interactive_src_start = new_str; + tok->interactive_src_end = new_str + current_size + line_size; + return 0; } /* Traverse and remember all f-string buffers, in order to be able to restore them after reallocating tok->buf */ -static void -remember_fstring_buffers(struct tok_state *tok) -{ - int index; - tokenizer_mode *mode; +static void remember_fstring_buffers(struct tok_state *tok) { + int index; + tokenizer_mode *mode; - for (index = tok->tok_mode_stack_index; index >= 0; --index) { - mode = &(tok->tok_mode_stack[index]); - mode->f_string_start_offset = mode->f_string_start - tok->buf; - mode->f_string_multi_line_start_offset = mode->f_string_multi_line_start - tok->buf; - } + for (index = tok->tok_mode_stack_index; index >= 0; --index) { + mode = &(tok->tok_mode_stack[index]); + mode->f_string_start_offset = mode->f_string_start - tok->buf; + mode->f_string_multi_line_start_offset = + mode->f_string_multi_line_start - tok->buf; + } } /* Traverse and restore all f-string buffers after reallocating tok->buf */ -static void -restore_fstring_buffers(struct tok_state *tok) -{ - int index; - tokenizer_mode *mode; +static void restore_fstring_buffers(struct tok_state *tok) { + int index; + tokenizer_mode *mode; - for (index = tok->tok_mode_stack_index; index >= 0; --index) { - mode = &(tok->tok_mode_stack[index]); - mode->f_string_start = tok->buf + mode->f_string_start_offset; - mode->f_string_multi_line_start = tok->buf + mode->f_string_multi_line_start_offset; - } + for (index = tok->tok_mode_stack_index; index >= 0; --index) { + mode = &(tok->tok_mode_stack[index]); + mode->f_string_start = tok->buf + mode->f_string_start_offset; + mode->f_string_multi_line_start = + tok->buf + mode->f_string_multi_line_start_offset; + } } -static int -set_fstring_expr(struct tok_state* tok, struct token *token, char c) { - assert(token != NULL); - assert(c == '}' || c == ':' || c == '!'); - tokenizer_mode *tok_mode = TOK_GET_MODE(tok); +static int set_fstring_expr(struct tok_state *tok, struct token *token, + char c) { + assert(token != NULL); + assert(c == '}' || c == ':' || c == '!'); + tokenizer_mode *tok_mode = TOK_GET_MODE(tok); - if (!tok_mode->f_string_debug || token->metadata) { - return 0; - } + if (!tok_mode->f_string_debug || token->metadata) { + return 0; + } - PyObject *res = NULL; + PyObject *res = NULL; - // Check if there is a # character in the expression - int hash_detected = 0; - for (Py_ssize_t i = 0; i < tok_mode->last_expr_size - tok_mode->last_expr_end; i++) { - if (tok_mode->last_expr_buffer[i] == '#') { - hash_detected = 1; - break; - } + // Check if there is a # character in the expression + int hash_detected = 0; + for (Py_ssize_t i = 0; i < tok_mode->last_expr_size - tok_mode->last_expr_end; + i++) { + if (tok_mode->last_expr_buffer[i] == '#') { + hash_detected = 1; + break; } + } - if (hash_detected) { - Py_ssize_t input_length = tok_mode->last_expr_size - tok_mode->last_expr_end; - char *result = (char *)PyObject_Malloc((input_length + 1) * sizeof(char)); - if (!result) { - return -1; - } - - Py_ssize_t i = 0; - Py_ssize_t j = 0; - - for (i = 0, j = 0; i < input_length; i++) { - if (tok_mode->last_expr_buffer[i] == '#') { - // Skip characters until newline or end of string - while (tok_mode->last_expr_buffer[i] != '\0' && i < input_length) { - if (tok_mode->last_expr_buffer[i] == '\n') { - result[j++] = tok_mode->last_expr_buffer[i]; - break; - } - i++; - } - } else { - result[j++] = tok_mode->last_expr_buffer[i]; - } - } - - result[j] = '\0'; // Null-terminate the result string - res = PyUnicode_DecodeUTF8(result, j, NULL); - PyObject_Free(result); - } else { - res = PyUnicode_DecodeUTF8( - tok_mode->last_expr_buffer, - tok_mode->last_expr_size - tok_mode->last_expr_end, - NULL - ); - + if (hash_detected) { + Py_ssize_t input_length = + tok_mode->last_expr_size - tok_mode->last_expr_end; + char *result = (char *)PyObject_Malloc((input_length + 1) * sizeof(char)); + if (!result) { + return -1; } + Py_ssize_t i = 0; + Py_ssize_t j = 0; - if (!res) { - return -1; + for (i = 0, j = 0; i < input_length; i++) { + if (tok_mode->last_expr_buffer[i] == '#') { + // Skip characters until newline or end of string + while (tok_mode->last_expr_buffer[i] != '\0' && i < input_length) { + if (tok_mode->last_expr_buffer[i] == '\n') { + result[j++] = tok_mode->last_expr_buffer[i]; + break; + } + i++; + } + } else { + result[j++] = tok_mode->last_expr_buffer[i]; + } } - token->metadata = res; - return 0; -} -static int -update_fstring_expr(struct tok_state *tok, char cur) -{ - assert(tok->cur != NULL); + result[j] = '\0'; // Null-terminate the result string + res = PyUnicode_DecodeUTF8(result, j, NULL); + PyObject_Free(result); + } else { + res = PyUnicode_DecodeUTF8( + tok_mode->last_expr_buffer, + tok_mode->last_expr_size - tok_mode->last_expr_end, NULL); + } - Py_ssize_t size = strlen(tok->cur); - tokenizer_mode *tok_mode = TOK_GET_MODE(tok); - - switch (cur) { - case 0: - if (!tok_mode->last_expr_buffer || tok_mode->last_expr_end >= 0) { - return 1; - } - char *new_buffer = PyMem_Realloc( - tok_mode->last_expr_buffer, - tok_mode->last_expr_size + size - ); - if (new_buffer == NULL) { - PyMem_Free(tok_mode->last_expr_buffer); - goto error; - } - tok_mode->last_expr_buffer = new_buffer; - strncpy(tok_mode->last_expr_buffer + tok_mode->last_expr_size, tok->cur, size); - tok_mode->last_expr_size += size; - break; - case '{': - if (tok_mode->last_expr_buffer != NULL) { - PyMem_Free(tok_mode->last_expr_buffer); - } - tok_mode->last_expr_buffer = PyMem_Malloc(size); - if (tok_mode->last_expr_buffer == NULL) { - goto error; - } - tok_mode->last_expr_size = size; - tok_mode->last_expr_end = -1; - strncpy(tok_mode->last_expr_buffer, tok->cur, size); - break; - case '}': - case '!': - case ':': - if (tok_mode->last_expr_end == -1) { - tok_mode->last_expr_end = strlen(tok->start); - } - break; - default: - Py_UNREACHABLE(); - } - return 1; + if (!res) { + return -1; + } + token->metadata = res; + return 0; +} + +static int update_fstring_expr(struct tok_state *tok, char cur) { + assert(tok->cur != NULL); + + Py_ssize_t size = strlen(tok->cur); + tokenizer_mode *tok_mode = TOK_GET_MODE(tok); + + switch (cur) { + case 0: + if (!tok_mode->last_expr_buffer || tok_mode->last_expr_end >= 0) { + return 1; + } + char *new_buffer = PyMem_Realloc(tok_mode->last_expr_buffer, + tok_mode->last_expr_size + size); + if (new_buffer == NULL) { + PyMem_Free(tok_mode->last_expr_buffer); + goto error; + } + tok_mode->last_expr_buffer = new_buffer; + strncpy(tok_mode->last_expr_buffer + tok_mode->last_expr_size, tok->cur, + size); + tok_mode->last_expr_size += size; + break; + case '{': + if (tok_mode->last_expr_buffer != NULL) { + PyMem_Free(tok_mode->last_expr_buffer); + } + tok_mode->last_expr_buffer = PyMem_Malloc(size); + if (tok_mode->last_expr_buffer == NULL) { + goto error; + } + tok_mode->last_expr_size = size; + tok_mode->last_expr_end = -1; + strncpy(tok_mode->last_expr_buffer, tok->cur, size); + break; + case '}': + case '!': + case ':': + if (tok_mode->last_expr_end == -1) { + tok_mode->last_expr_end = strlen(tok->start); + } + break; + default: + Py_UNREACHABLE(); + } + return 1; error: - tok->done = E_NOMEM; - return 0; + tok->done = E_NOMEM; + return 0; } -static void -free_fstring_expressions(struct tok_state *tok) -{ - int index; - tokenizer_mode *mode; - - for (index = tok->tok_mode_stack_index; index >= 0; --index) { - mode = &(tok->tok_mode_stack[index]); - if (mode->last_expr_buffer != NULL) { - PyMem_Free(mode->last_expr_buffer); - mode->last_expr_buffer = NULL; - mode->last_expr_size = 0; - mode->last_expr_end = -1; - } +static void free_fstring_expressions(struct tok_state *tok) { + int index; + tokenizer_mode *mode; + + for (index = tok->tok_mode_stack_index; index >= 0; --index) { + mode = &(tok->tok_mode_stack[index]); + if (mode->last_expr_buffer != NULL) { + PyMem_Free(mode->last_expr_buffer); + mode->last_expr_buffer = NULL; + mode->last_expr_size = 0; + mode->last_expr_end = -1; + mode->in_format_spec = 0; } + } } /* Read a line of text from TOK into S, using the stream in TOK. @@ -539,88 +516,85 @@ free_fstring_expressions(struct tok_state *tok) 1) NULL: need to call tok->decoding_readline to get a new line 2) PyUnicodeObject *: decoding_feof has called tok->decoding_readline and stored the result in tok->decoding_buffer - 3) PyByteArrayObject *: previous call to tok_readline_recode did not have enough room - (in the s buffer) to copy entire contents of the line read - by tok->decoding_readline. tok->decoding_buffer has the overflow. - In this case, tok_readline_recode is called in a loop (with an expanded buffer) - until the buffer ends with a '\n' (or until the end of the file is - reached): see tok_nextc and its calls to tok_reserve_buf. + 3) PyByteArrayObject *: previous call to tok_readline_recode did not have + enough room (in the s buffer) to copy entire contents of the line read by + tok->decoding_readline. tok->decoding_buffer has the overflow. In this case, + tok_readline_recode is called in a loop (with an expanded buffer) until the + buffer ends with a '\n' (or until the end of the file is reached): see + tok_nextc and its calls to tok_reserve_buf. */ -static int -tok_reserve_buf(struct tok_state *tok, Py_ssize_t size) -{ - Py_ssize_t cur = tok->cur - tok->buf; - Py_ssize_t oldsize = tok->inp - tok->buf; - Py_ssize_t newsize = oldsize + Py_MAX(size, oldsize >> 1); - if (newsize > tok->end - tok->buf) { - char *newbuf = tok->buf; - Py_ssize_t start = tok->start == NULL ? -1 : tok->start - tok->buf; - Py_ssize_t line_start = tok->start == NULL ? -1 : tok->line_start - tok->buf; - Py_ssize_t multi_line_start = tok->multi_line_start - tok->buf; - remember_fstring_buffers(tok); - newbuf = (char *)PyMem_Realloc(newbuf, newsize); - if (newbuf == NULL) { - tok->done = E_NOMEM; - return 0; - } - tok->buf = newbuf; - tok->cur = tok->buf + cur; - tok->inp = tok->buf + oldsize; - tok->end = tok->buf + newsize; - tok->start = start < 0 ? NULL : tok->buf + start; - tok->line_start = line_start < 0 ? NULL : tok->buf + line_start; - tok->multi_line_start = multi_line_start < 0 ? NULL : tok->buf + multi_line_start; - restore_fstring_buffers(tok); - } - return 1; -} - -static inline int -contains_null_bytes(const char* str, size_t size) { - return memchr(str, 0, size) != NULL; -} - -static int -tok_readline_recode(struct tok_state *tok) { - PyObject *line; - const char *buf; - Py_ssize_t buflen; - line = tok->decoding_buffer; +static int tok_reserve_buf(struct tok_state *tok, Py_ssize_t size) { + Py_ssize_t cur = tok->cur - tok->buf; + Py_ssize_t oldsize = tok->inp - tok->buf; + Py_ssize_t newsize = oldsize + Py_MAX(size, oldsize >> 1); + if (newsize > tok->end - tok->buf) { + char *newbuf = tok->buf; + Py_ssize_t start = tok->start == NULL ? -1 : tok->start - tok->buf; + Py_ssize_t line_start = + tok->start == NULL ? -1 : tok->line_start - tok->buf; + Py_ssize_t multi_line_start = tok->multi_line_start - tok->buf; + remember_fstring_buffers(tok); + newbuf = (char *)PyMem_Realloc(newbuf, newsize); + if (newbuf == NULL) { + tok->done = E_NOMEM; + return 0; + } + tok->buf = newbuf; + tok->cur = tok->buf + cur; + tok->inp = tok->buf + oldsize; + tok->end = tok->buf + newsize; + tok->start = start < 0 ? NULL : tok->buf + start; + tok->line_start = line_start < 0 ? NULL : tok->buf + line_start; + tok->multi_line_start = + multi_line_start < 0 ? NULL : tok->buf + multi_line_start; + restore_fstring_buffers(tok); + } + return 1; +} + +static inline int contains_null_bytes(const char *str, size_t size) { + return memchr(str, 0, size) != NULL; +} + +static int tok_readline_recode(struct tok_state *tok) { + PyObject *line; + const char *buf; + Py_ssize_t buflen; + line = tok->decoding_buffer; + if (line == NULL) { + line = PyObject_CallNoArgs(tok->decoding_readline); if (line == NULL) { - line = PyObject_CallNoArgs(tok->decoding_readline); - if (line == NULL) { - error_ret(tok); - goto error; - } - } - else { - tok->decoding_buffer = NULL; - } - buf = PyUnicode_AsUTF8AndSize(line, &buflen); - if (buf == NULL) { - error_ret(tok); - goto error; - } - // Make room for the null terminator *and* potentially - // an extra newline character that we may need to artificially - // add. - size_t buffer_size = buflen + 2; - if (!tok_reserve_buf(tok, buffer_size)) { - goto error; + error_ret(tok); + goto error; } - memcpy(tok->inp, buf, buflen); - tok->inp += buflen; - *tok->inp = '\0'; - if (tok->fp_interactive && - tok_concatenate_interactive_new_line(tok, buf) == -1) { - goto error; - } - Py_DECREF(line); - return 1; + } else { + tok->decoding_buffer = NULL; + } + buf = PyUnicode_AsUTF8AndSize(line, &buflen); + if (buf == NULL) { + error_ret(tok); + goto error; + } + // Make room for the null terminator *and* potentially + // an extra newline character that we may need to artificially + // add. + size_t buffer_size = buflen + 2; + if (!tok_reserve_buf(tok, buffer_size)) { + goto error; + } + memcpy(tok->inp, buf, buflen); + tok->inp += buflen; + *tok->inp = '\0'; + if (tok->fp_interactive && + tok_concatenate_interactive_new_line(tok, buf) == -1) { + goto error; + } + Py_DECREF(line); + return 1; error: - Py_XDECREF(line); - return 0; + Py_XDECREF(line); + return 0; } /* Set the readline function for TOK to a StreamReader's @@ -633,2383 +607,2290 @@ tok_readline_recode(struct tok_state *tok) { Return 1 on success, 0 on failure. */ -static int -fp_setreadl(struct tok_state *tok, const char* enc) -{ - PyObject *readline, *open, *stream; - int fd; - long pos; - - fd = fileno(tok->fp); - /* Due to buffering the file offset for fd can be different from the file - * position of tok->fp. If tok->fp was opened in text mode on Windows, - * its file position counts CRLF as one char and can't be directly mapped - * to the file offset for fd. Instead we step back one byte and read to - * the end of line.*/ - pos = ftell(tok->fp); - if (pos == -1 || - lseek(fd, (off_t)(pos > 0 ? pos - 1 : pos), SEEK_SET) == (off_t)-1) { - PyErr_SetFromErrnoWithFilename(PyExc_OSError, NULL); - return 0; - } +static int fp_setreadl(struct tok_state *tok, const char *enc) { + PyObject *readline, *open, *stream; + int fd; + long pos; + + fd = fileno(tok->fp); + /* Due to buffering the file offset for fd can be different from the file + * position of tok->fp. If tok->fp was opened in text mode on Windows, + * its file position counts CRLF as one char and can't be directly mapped + * to the file offset for fd. Instead we step back one byte and read to + * the end of line.*/ + pos = ftell(tok->fp); + if (pos == -1 || + lseek(fd, (off_t)(pos > 0 ? pos - 1 : pos), SEEK_SET) == (off_t)-1) { + PyErr_SetFromErrnoWithFilename(PyExc_OSError, NULL); + return 0; + } - open = _PyImport_GetModuleAttrString("io", "open"); - if (open == NULL) { - return 0; - } - stream = PyObject_CallFunction(open, "isisOOO", - fd, "r", -1, enc, Py_None, Py_None, Py_False); - Py_DECREF(open); - if (stream == NULL) { - return 0; - } + open = _PyImport_GetModuleAttrString("io", "open"); + if (open == NULL) { + return 0; + } + stream = PyObject_CallFunction(open, "isisOOO", fd, "r", -1, enc, Py_None, + Py_None, Py_False); + Py_DECREF(open); + if (stream == NULL) { + return 0; + } - readline = PyObject_GetAttr(stream, &_Py_ID(readline)); - Py_DECREF(stream); - if (readline == NULL) { - return 0; - } - Py_XSETREF(tok->decoding_readline, readline); + readline = PyObject_GetAttr(stream, &_Py_ID(readline)); + Py_DECREF(stream); + if (readline == NULL) { + return 0; + } + Py_XSETREF(tok->decoding_readline, readline); - if (pos > 0) { - PyObject *bufobj = _PyObject_CallNoArgs(readline); - if (bufobj == NULL) { - return 0; - } - Py_DECREF(bufobj); + if (pos > 0) { + PyObject *bufobj = _PyObject_CallNoArgs(readline); + if (bufobj == NULL) { + return 0; } + Py_DECREF(bufobj); + } - return 1; + return 1; } /* Fetch the next byte from TOK. */ -static int fp_getc(struct tok_state *tok) { - return getc(tok->fp); -} +static int fp_getc(struct tok_state *tok) { return getc(tok->fp); } /* Unfetch the last byte back into TOK. */ -static void fp_ungetc(int c, struct tok_state *tok) { - ungetc(c, tok->fp); -} +static void fp_ungetc(int c, struct tok_state *tok) { ungetc(c, tok->fp); } /* Check whether the characters at s start a valid UTF-8 sequence. Return the number of characters forming the sequence if yes, 0 if not. The special cases match those in stringlib/codecs.h:utf8_decode. */ -static int -valid_utf8(const unsigned char* s) -{ - int expected = 0; - int length; - if (*s < 0x80) { - /* single-byte code */ - return 1; - } - else if (*s < 0xE0) { - /* \xC2\x80-\xDF\xBF -- 0080-07FF */ - if (*s < 0xC2) { - /* invalid sequence - \x80-\xBF -- continuation byte - \xC0-\xC1 -- fake 0000-007F */ - return 0; - } - expected = 1; - } - else if (*s < 0xF0) { - /* \xE0\xA0\x80-\xEF\xBF\xBF -- 0800-FFFF */ - if (*s == 0xE0 && *(s + 1) < 0xA0) { - /* invalid sequence - \xE0\x80\x80-\xE0\x9F\xBF -- fake 0000-0800 */ - return 0; - } - else if (*s == 0xED && *(s + 1) >= 0xA0) { - /* Decoding UTF-8 sequences in range \xED\xA0\x80-\xED\xBF\xBF - will result in surrogates in range D800-DFFF. Surrogates are - not valid UTF-8 so they are rejected. - See https://www.unicode.org/versions/Unicode5.2.0/ch03.pdf - (table 3-7) and http://www.rfc-editor.org/rfc/rfc3629.txt */ - return 0; - } - expected = 2; - } - else if (*s < 0xF5) { - /* \xF0\x90\x80\x80-\xF4\x8F\xBF\xBF -- 10000-10FFFF */ - if (*(s + 1) < 0x90 ? *s == 0xF0 : *s == 0xF4) { - /* invalid sequence -- one of: - \xF0\x80\x80\x80-\xF0\x8F\xBF\xBF -- fake 0000-FFFF - \xF4\x90\x80\x80- -- 110000- overflow */ - return 0; - } - expected = 3; - } - else { - /* invalid start byte */ - return 0; - } - length = expected + 1; - for (; expected; expected--) - if (s[expected] < 0x80 || s[expected] >= 0xC0) - return 0; - return length; -} - -static int -ensure_utf8(char *line, struct tok_state *tok) -{ - int badchar = 0; - unsigned char *c; - int length; - for (c = (unsigned char *)line; *c; c += length) { - if (!(length = valid_utf8(c))) { - badchar = *c; - break; - } - } - if (badchar) { - PyErr_Format(PyExc_SyntaxError, - "Non-UTF-8 code starting with '\\x%.2x' " - "in file %U on line %i, " - "but no encoding declared; " - "see https://peps.python.org/pep-0263/ for details", - badchar, tok->filename, tok->lineno); - return 0; - } +static int valid_utf8(const unsigned char *s) { + int expected = 0; + int length; + if (*s < 0x80) { + /* single-byte code */ return 1; + } else if (*s < 0xE0) { + /* \xC2\x80-\xDF\xBF -- 0080-07FF */ + if (*s < 0xC2) { + /* invalid sequence + \x80-\xBF -- continuation byte + \xC0-\xC1 -- fake 0000-007F */ + return 0; + } + expected = 1; + } else if (*s < 0xF0) { + /* \xE0\xA0\x80-\xEF\xBF\xBF -- 0800-FFFF */ + if (*s == 0xE0 && *(s + 1) < 0xA0) { + /* invalid sequence + \xE0\x80\x80-\xE0\x9F\xBF -- fake 0000-0800 */ + return 0; + } else if (*s == 0xED && *(s + 1) >= 0xA0) { + /* Decoding UTF-8 sequences in range \xED\xA0\x80-\xED\xBF\xBF + will result in surrogates in range D800-DFFF. Surrogates are + not valid UTF-8 so they are rejected. + See https://www.unicode.org/versions/Unicode5.2.0/ch03.pdf + (table 3-7) and http://www.rfc-editor.org/rfc/rfc3629.txt */ + return 0; + } + expected = 2; + } else if (*s < 0xF5) { + /* \xF0\x90\x80\x80-\xF4\x8F\xBF\xBF -- 10000-10FFFF */ + if (*(s + 1) < 0x90 ? *s == 0xF0 : *s == 0xF4) { + /* invalid sequence -- one of: + \xF0\x80\x80\x80-\xF0\x8F\xBF\xBF -- fake 0000-FFFF + \xF4\x90\x80\x80- -- 110000- overflow */ + return 0; + } + expected = 3; + } else { + /* invalid start byte */ + return 0; + } + length = expected + 1; + for (; expected; expected--) + if (s[expected] < 0x80 || s[expected] >= 0xC0) + return 0; + return length; +} + +static int ensure_utf8(char *line, struct tok_state *tok) { + int badchar = 0; + unsigned char *c; + int length; + for (c = (unsigned char *)line; *c; c += length) { + if (!(length = valid_utf8(c))) { + badchar = *c; + break; + } + } + if (badchar) { + PyErr_Format(PyExc_SyntaxError, + "Non-UTF-8 code starting with '\\x%.2x' " + "in file %U on line %i, " + "but no encoding declared; " + "see https://peps.python.org/pep-0263/ for details", + badchar, tok->filename, tok->lineno); + return 0; + } + return 1; } /* Fetch a byte from TOK, using the string buffer. */ -static int -buf_getc(struct tok_state *tok) { - return Py_CHARMASK(*tok->str++); -} +static int buf_getc(struct tok_state *tok) { return Py_CHARMASK(*tok->str++); } /* Unfetch a byte from TOK, using the string buffer. */ -static void -buf_ungetc(int c, struct tok_state *tok) { - tok->str--; - assert(Py_CHARMASK(*tok->str) == c); /* tok->cur may point to read-only segment */ +static void buf_ungetc(int c, struct tok_state *tok) { + tok->str--; + assert(Py_CHARMASK(*tok->str) == + c); /* tok->cur may point to read-only segment */ } /* Set the readline function for TOK to ENC. For the string-based tokenizer, this means to just record the encoding. */ -static int -buf_setreadl(struct tok_state *tok, const char* enc) { - tok->enc = enc; - return 1; +static int buf_setreadl(struct tok_state *tok, const char *enc) { + tok->enc = enc; + return 1; } /* Return a UTF-8 encoding Python string object from the C byte string STR, which is encoded with ENC. */ -static PyObject * -translate_into_utf8(const char* str, const char* enc) { - PyObject *utf8; - PyObject* buf = PyUnicode_Decode(str, strlen(str), enc, NULL); - if (buf == NULL) - return NULL; - utf8 = PyUnicode_AsUTF8String(buf); - Py_DECREF(buf); - return utf8; -} - - -static char * -translate_newlines(const char *s, int exec_input, int preserve_crlf, - struct tok_state *tok) { - int skip_next_lf = 0; - size_t needed_length = strlen(s) + 2, final_length; - char *buf, *current; - char c = '\0'; - buf = PyMem_Malloc(needed_length); - if (buf == NULL) { - tok->done = E_NOMEM; - return NULL; - } - for (current = buf; *s; s++, current++) { - c = *s; - if (skip_next_lf) { - skip_next_lf = 0; - if (c == '\n') { - c = *++s; - if (!c) - break; - } - } - if (!preserve_crlf && c == '\r') { - skip_next_lf = 1; - c = '\n'; - } - *current = c; - } - /* If this is exec input, add a newline to the end of the string if - there isn't one already. */ - if (exec_input && c != '\n' && c != '\0') { - *current = '\n'; - current++; - } - *current = '\0'; - final_length = current - buf + 1; - if (final_length < needed_length && final_length) { - /* should never fail */ - char* result = PyMem_Realloc(buf, final_length); - if (result == NULL) { - PyMem_Free(buf); - } - buf = result; - } - return buf; +static PyObject *translate_into_utf8(const char *str, const char *enc) { + PyObject *utf8; + PyObject *buf = PyUnicode_Decode(str, strlen(str), enc, NULL); + if (buf == NULL) + return NULL; + utf8 = PyUnicode_AsUTF8String(buf); + Py_DECREF(buf); + return utf8; +} + +static char *translate_newlines(const char *s, int exec_input, + int preserve_crlf, struct tok_state *tok) { + int skip_next_lf = 0; + size_t needed_length = strlen(s) + 2, final_length; + char *buf, *current; + char c = '\0'; + buf = PyMem_Malloc(needed_length); + if (buf == NULL) { + tok->done = E_NOMEM; + return NULL; + } + for (current = buf; *s; s++, current++) { + c = *s; + if (skip_next_lf) { + skip_next_lf = 0; + if (c == '\n') { + c = *++s; + if (!c) + break; + } + } + if (!preserve_crlf && c == '\r') { + skip_next_lf = 1; + c = '\n'; + } + *current = c; + } + /* If this is exec input, add a newline to the end of the string if + there isn't one already. */ + if (exec_input && c != '\n' && c != '\0') { + *current = '\n'; + current++; + } + *current = '\0'; + final_length = current - buf + 1; + if (final_length < needed_length && final_length) { + /* should never fail */ + char *result = PyMem_Realloc(buf, final_length); + if (result == NULL) { + PyMem_Free(buf); + } + buf = result; + } + return buf; } /* Decode a byte string STR for use as the buffer of TOK. Look for encoding declarations inside STR, and record them inside TOK. */ -static char * -decode_str(const char *input, int single, struct tok_state *tok, int preserve_crlf) -{ - PyObject* utf8 = NULL; - char *str; - const char *s; - const char *newl[2] = {NULL, NULL}; - int lineno = 0; - tok->input = str = translate_newlines(input, single, preserve_crlf, tok); - if (str == NULL) - return NULL; - tok->enc = NULL; - tok->str = str; - if (!check_bom(buf_getc, buf_ungetc, buf_setreadl, tok)) - return error_ret(tok); - str = tok->str; /* string after BOM if any */ - assert(str); - if (tok->enc != NULL) { - utf8 = translate_into_utf8(str, tok->enc); - if (utf8 == NULL) - return error_ret(tok); - str = PyBytes_AsString(utf8); - } - for (s = str;; s++) { - if (*s == '\0') break; - else if (*s == '\n') { - assert(lineno < 2); - newl[lineno] = s; - lineno++; - if (lineno == 2) break; - } +static char *decode_str(const char *input, int single, struct tok_state *tok, + int preserve_crlf) { + PyObject *utf8 = NULL; + char *str; + const char *s; + const char *newl[2] = {NULL, NULL}; + int lineno = 0; + tok->input = str = translate_newlines(input, single, preserve_crlf, tok); + if (str == NULL) + return NULL; + tok->enc = NULL; + tok->str = str; + if (!check_bom(buf_getc, buf_ungetc, buf_setreadl, tok)) + return error_ret(tok); + str = tok->str; /* string after BOM if any */ + assert(str); + if (tok->enc != NULL) { + utf8 = translate_into_utf8(str, tok->enc); + if (utf8 == NULL) + return error_ret(tok); + str = PyBytes_AsString(utf8); + } + for (s = str;; s++) { + if (*s == '\0') + break; + else if (*s == '\n') { + assert(lineno < 2); + newl[lineno] = s; + lineno++; + if (lineno == 2) + break; } - tok->enc = NULL; - /* need to check line 1 and 2 separately since check_coding_spec - assumes a single line as input */ - if (newl[0]) { - if (!check_coding_spec(str, newl[0] - str, tok, buf_setreadl)) { - return NULL; - } - if (tok->enc == NULL && tok->decoding_state != STATE_NORMAL && newl[1]) { - if (!check_coding_spec(newl[0]+1, newl[1] - newl[0], - tok, buf_setreadl)) - return NULL; - } + } + tok->enc = NULL; + /* need to check line 1 and 2 separately since check_coding_spec + assumes a single line as input */ + if (newl[0]) { + if (!check_coding_spec(str, newl[0] - str, tok, buf_setreadl)) { + return NULL; } - if (tok->enc != NULL) { - assert(utf8 == NULL); - utf8 = translate_into_utf8(str, tok->enc); - if (utf8 == NULL) - return error_ret(tok); - str = PyBytes_AS_STRING(utf8); + if (tok->enc == NULL && tok->decoding_state != STATE_NORMAL && newl[1]) { + if (!check_coding_spec(newl[0] + 1, newl[1] - newl[0], tok, buf_setreadl)) + return NULL; } - assert(tok->decoding_buffer == NULL); - tok->decoding_buffer = utf8; /* CAUTION */ - return str; + } + if (tok->enc != NULL) { + assert(utf8 == NULL); + utf8 = translate_into_utf8(str, tok->enc); + if (utf8 == NULL) + return error_ret(tok); + str = PyBytes_AS_STRING(utf8); + } + assert(tok->decoding_buffer == NULL); + tok->decoding_buffer = utf8; /* CAUTION */ + return str; } /* Set up tokenizer for string */ -struct tok_state * -_PyTokenizer_FromString(const char *str, int exec_input, int preserve_crlf) -{ - struct tok_state *tok = tok_new(); - char *decoded; +struct tok_state *_PyTokenizer_FromString(const char *str, int exec_input, + int preserve_crlf) { + struct tok_state *tok = tok_new(); + char *decoded; - if (tok == NULL) - return NULL; - decoded = decode_str(str, exec_input, tok, preserve_crlf); - if (decoded == NULL) { - _PyTokenizer_Free(tok); - return NULL; - } + if (tok == NULL) + return NULL; + decoded = decode_str(str, exec_input, tok, preserve_crlf); + if (decoded == NULL) { + _PyTokenizer_Free(tok); + return NULL; + } - tok->buf = tok->cur = tok->inp = decoded; - tok->end = decoded; - return tok; + tok->buf = tok->cur = tok->inp = decoded; + tok->end = decoded; + return tok; } -struct tok_state * -_PyTokenizer_FromReadline(PyObject* readline, const char* enc, - int exec_input, int preserve_crlf) -{ - struct tok_state *tok = tok_new(); - if (tok == NULL) - return NULL; - if ((tok->buf = (char *)PyMem_Malloc(BUFSIZ)) == NULL) { - _PyTokenizer_Free(tok); - return NULL; - } - tok->cur = tok->inp = tok->buf; - tok->end = tok->buf + BUFSIZ; - tok->fp = NULL; - if (enc != NULL) { - tok->encoding = new_string(enc, strlen(enc), tok); - if (!tok->encoding) { - _PyTokenizer_Free(tok); - return NULL; - } +struct tok_state *_PyTokenizer_FromReadline(PyObject *readline, const char *enc, + int exec_input, int preserve_crlf) { + struct tok_state *tok = tok_new(); + if (tok == NULL) + return NULL; + if ((tok->buf = (char *)PyMem_Malloc(BUFSIZ)) == NULL) { + _PyTokenizer_Free(tok); + return NULL; + } + tok->cur = tok->inp = tok->buf; + tok->end = tok->buf + BUFSIZ; + tok->fp = NULL; + if (enc != NULL) { + tok->encoding = new_string(enc, strlen(enc), tok); + if (!tok->encoding) { + _PyTokenizer_Free(tok); + return NULL; } - tok->decoding_state = STATE_NORMAL; - Py_INCREF(readline); - tok->readline = readline; - return tok; + } + tok->decoding_state = STATE_NORMAL; + Py_INCREF(readline); + tok->readline = readline; + return tok; } /* Set up tokenizer for UTF-8 string */ -struct tok_state * -_PyTokenizer_FromUTF8(const char *str, int exec_input, int preserve_crlf) -{ - struct tok_state *tok = tok_new(); - char *translated; - if (tok == NULL) - return NULL; - tok->input = translated = translate_newlines(str, exec_input, preserve_crlf, tok); - if (translated == NULL) { - _PyTokenizer_Free(tok); - return NULL; - } - tok->decoding_state = STATE_NORMAL; - tok->enc = NULL; - tok->str = translated; - tok->encoding = new_string("utf-8", 5, tok); - if (!tok->encoding) { - _PyTokenizer_Free(tok); - return NULL; - } +struct tok_state *_PyTokenizer_FromUTF8(const char *str, int exec_input, + int preserve_crlf) { + struct tok_state *tok = tok_new(); + char *translated; + if (tok == NULL) + return NULL; + tok->input = translated = + translate_newlines(str, exec_input, preserve_crlf, tok); + if (translated == NULL) { + _PyTokenizer_Free(tok); + return NULL; + } + tok->decoding_state = STATE_NORMAL; + tok->enc = NULL; + tok->str = translated; + tok->encoding = new_string("utf-8", 5, tok); + if (!tok->encoding) { + _PyTokenizer_Free(tok); + return NULL; + } - tok->buf = tok->cur = tok->inp = translated; - tok->end = translated; - return tok; + tok->buf = tok->cur = tok->inp = translated; + tok->end = translated; + return tok; } /* Set up tokenizer for file */ -struct tok_state * -_PyTokenizer_FromFile(FILE *fp, const char* enc, - const char *ps1, const char *ps2) -{ - struct tok_state *tok = tok_new(); - if (tok == NULL) - return NULL; - if ((tok->buf = (char *)PyMem_Malloc(BUFSIZ)) == NULL) { - _PyTokenizer_Free(tok); - return NULL; - } - tok->cur = tok->inp = tok->buf; - tok->end = tok->buf + BUFSIZ; - tok->fp = fp; - tok->prompt = ps1; - tok->nextprompt = ps2; - if (enc != NULL) { - /* Must copy encoding declaration since it - gets copied into the parse tree. */ - tok->encoding = new_string(enc, strlen(enc), tok); - if (!tok->encoding) { - _PyTokenizer_Free(tok); - return NULL; - } - tok->decoding_state = STATE_NORMAL; +struct tok_state *_PyTokenizer_FromFile(FILE *fp, const char *enc, + const char *ps1, const char *ps2) { + struct tok_state *tok = tok_new(); + if (tok == NULL) + return NULL; + if ((tok->buf = (char *)PyMem_Malloc(BUFSIZ)) == NULL) { + _PyTokenizer_Free(tok); + return NULL; + } + tok->cur = tok->inp = tok->buf; + tok->end = tok->buf + BUFSIZ; + tok->fp = fp; + tok->prompt = ps1; + tok->nextprompt = ps2; + if (enc != NULL) { + /* Must copy encoding declaration since it + gets copied into the parse tree. */ + tok->encoding = new_string(enc, strlen(enc), tok); + if (!tok->encoding) { + _PyTokenizer_Free(tok); + return NULL; } - return tok; + tok->decoding_state = STATE_NORMAL; + } + return tok; } /* Free a tok_state structure */ -void -_PyTokenizer_Free(struct tok_state *tok) -{ - if (tok->encoding != NULL) { - PyMem_Free(tok->encoding); - } - Py_XDECREF(tok->decoding_readline); - Py_XDECREF(tok->decoding_buffer); - Py_XDECREF(tok->readline); - Py_XDECREF(tok->filename); - if ((tok->readline != NULL || tok->fp != NULL ) && tok->buf != NULL) { - PyMem_Free(tok->buf); +void _PyTokenizer_Free(struct tok_state *tok) { + if (tok->encoding != NULL) { + PyMem_Free(tok->encoding); + } + Py_XDECREF(tok->decoding_readline); + Py_XDECREF(tok->decoding_buffer); + Py_XDECREF(tok->readline); + Py_XDECREF(tok->filename); + if ((tok->readline != NULL || tok->fp != NULL) && tok->buf != NULL) { + PyMem_Free(tok->buf); + } + if (tok->input) { + PyMem_Free(tok->input); + } + if (tok->interactive_src_start != NULL) { + PyMem_Free(tok->interactive_src_start); + } + free_fstring_expressions(tok); + PyMem_Free(tok); +} + +void _PyToken_Free(struct token *token) { Py_XDECREF(token->metadata); } + +void _PyToken_Init(struct token *token) { token->metadata = NULL; } + +static int tok_readline_raw(struct tok_state *tok) { + do { + if (!tok_reserve_buf(tok, BUFSIZ)) { + return 0; + } + int n_chars = (int)(tok->end - tok->inp); + size_t line_size = 0; + char *line = _Py_UniversalNewlineFgetsWithSize(tok->inp, n_chars, tok->fp, + NULL, &line_size); + if (line == NULL) { + return 1; } - if (tok->input) { - PyMem_Free(tok->input); + if (tok->fp_interactive && + tok_concatenate_interactive_new_line(tok, line) == -1) { + return 0; + } + tok->inp += line_size; + if (tok->inp == tok->buf) { + return 0; + } + } while (tok->inp[-1] != '\n'); + return 1; +} + +static int tok_readline_string(struct tok_state *tok) { + PyObject *line = NULL; + PyObject *raw_line = PyObject_CallNoArgs(tok->readline); + if (raw_line == NULL) { + if (PyErr_ExceptionMatches(PyExc_StopIteration)) { + PyErr_Clear(); + return 1; + } + error_ret(tok); + goto error; + } + if (tok->encoding != NULL) { + if (!PyBytes_Check(raw_line)) { + PyErr_Format(PyExc_TypeError, "readline() returned a non-bytes object"); + error_ret(tok); + goto error; + } + line = + PyUnicode_Decode(PyBytes_AS_STRING(raw_line), + PyBytes_GET_SIZE(raw_line), tok->encoding, "replace"); + Py_CLEAR(raw_line); + if (line == NULL) { + error_ret(tok); + goto error; + } + } else { + if (!PyUnicode_Check(raw_line)) { + PyErr_Format(PyExc_TypeError, "readline() returned a non-string object"); + error_ret(tok); + goto error; + } + line = raw_line; + raw_line = NULL; + } + Py_ssize_t buflen; + const char *buf = PyUnicode_AsUTF8AndSize(line, &buflen); + if (buf == NULL) { + error_ret(tok); + goto error; + } + + // Make room for the null terminator *and* potentially + // an extra newline character that we may need to artificially + // add. + size_t buffer_size = buflen + 2; + if (!tok_reserve_buf(tok, buffer_size)) { + goto error; + } + memcpy(tok->inp, buf, buflen); + tok->inp += buflen; + *tok->inp = '\0'; + + tok->line_start = tok->cur; + Py_DECREF(line); + return 1; +error: + Py_XDECREF(raw_line); + Py_XDECREF(line); + return 0; +} + +static int tok_underflow_string(struct tok_state *tok) { + char *end = strchr(tok->inp, '\n'); + if (end != NULL) { + end++; + } else { + end = strchr(tok->inp, '\0'); + if (end == tok->inp) { + tok->done = E_EOF; + return 0; + } + } + if (tok->start == NULL) { + tok->buf = tok->cur; + } + tok->line_start = tok->cur; + ADVANCE_LINENO(); + tok->inp = end; + return 1; +} + +static int tok_underflow_interactive(struct tok_state *tok) { + if (tok->interactive_underflow == IUNDERFLOW_STOP) { + tok->done = E_INTERACT_STOP; + return 1; + } + char *newtok = PyOS_Readline(tok->fp ? tok->fp : stdin, stdout, tok->prompt); + if (newtok != NULL) { + char *translated = translate_newlines(newtok, 0, 0, tok); + PyMem_Free(newtok); + if (translated == NULL) { + return 0; } - if (tok->interactive_src_start != NULL) { - PyMem_Free(tok->interactive_src_start); + newtok = translated; + } + if (tok->encoding && newtok && *newtok) { + /* Recode to UTF-8 */ + Py_ssize_t buflen; + const char *buf; + PyObject *u = translate_into_utf8(newtok, tok->encoding); + PyMem_Free(newtok); + if (u == NULL) { + tok->done = E_DECODE; + return 0; + } + buflen = PyBytes_GET_SIZE(u); + buf = PyBytes_AS_STRING(u); + newtok = PyMem_Malloc(buflen + 1); + if (newtok == NULL) { + Py_DECREF(u); + tok->done = E_NOMEM; + return 0; + } + strcpy(newtok, buf); + Py_DECREF(u); + } + if (tok->fp_interactive && + tok_concatenate_interactive_new_line(tok, newtok) == -1) { + PyMem_Free(newtok); + return 0; + } + if (tok->nextprompt != NULL) { + tok->prompt = tok->nextprompt; + } + if (newtok == NULL) { + tok->done = E_INTR; + } else if (*newtok == '\0') { + PyMem_Free(newtok); + tok->done = E_EOF; + } else if (tok->start != NULL) { + Py_ssize_t cur_multi_line_start = tok->multi_line_start - tok->buf; + remember_fstring_buffers(tok); + size_t size = strlen(newtok); + ADVANCE_LINENO(); + if (!tok_reserve_buf(tok, size + 1)) { + PyMem_Free(tok->buf); + tok->buf = NULL; + PyMem_Free(newtok); + return 0; + } + memcpy(tok->cur, newtok, size + 1); + PyMem_Free(newtok); + tok->inp += size; + tok->multi_line_start = tok->buf + cur_multi_line_start; + restore_fstring_buffers(tok); + } else { + remember_fstring_buffers(tok); + ADVANCE_LINENO(); + PyMem_Free(tok->buf); + tok->buf = newtok; + tok->cur = tok->buf; + tok->line_start = tok->buf; + tok->inp = strchr(tok->buf, '\0'); + tok->end = tok->inp + 1; + restore_fstring_buffers(tok); + } + if (tok->done != E_OK) { + if (tok->prompt != NULL) { + PySys_WriteStderr("\n"); } - free_fstring_expressions(tok); - PyMem_Free(tok); -} + return 0; + } -void -_PyToken_Free(struct token *token) { - Py_XDECREF(token->metadata); -} - -void -_PyToken_Init(struct token *token) { - token->metadata = NULL; -} - -static int -tok_readline_raw(struct tok_state *tok) -{ - do { - if (!tok_reserve_buf(tok, BUFSIZ)) { - return 0; - } - int n_chars = (int)(tok->end - tok->inp); - size_t line_size = 0; - char *line = _Py_UniversalNewlineFgetsWithSize(tok->inp, n_chars, tok->fp, NULL, &line_size); - if (line == NULL) { - return 1; - } - if (tok->fp_interactive && - tok_concatenate_interactive_new_line(tok, line) == -1) { - return 0; - } - tok->inp += line_size; - if (tok->inp == tok->buf) { - return 0; - } - } while (tok->inp[-1] != '\n'); - return 1; + if (tok->tok_mode_stack_index && !update_fstring_expr(tok, 0)) { + return 0; + } + return 1; } -static int -tok_readline_string(struct tok_state* tok) { - PyObject* line = NULL; - PyObject* raw_line = PyObject_CallNoArgs(tok->readline); - if (raw_line == NULL) { - if (PyErr_ExceptionMatches(PyExc_StopIteration)) { - PyErr_Clear(); - return 1; - } - error_ret(tok); - goto error; - } - if(tok->encoding != NULL) { - if (!PyBytes_Check(raw_line)) { - PyErr_Format(PyExc_TypeError, "readline() returned a non-bytes object"); - error_ret(tok); - goto error; - } - line = PyUnicode_Decode(PyBytes_AS_STRING(raw_line), PyBytes_GET_SIZE(raw_line), - tok->encoding, "replace"); - Py_CLEAR(raw_line); - if (line == NULL) { - error_ret(tok); - goto error; - } - } else { - if(!PyUnicode_Check(raw_line)) { - PyErr_Format(PyExc_TypeError, "readline() returned a non-string object"); - error_ret(tok); - goto error; - } - line = raw_line; - raw_line = NULL; - } - Py_ssize_t buflen; - const char* buf = PyUnicode_AsUTF8AndSize(line, &buflen); - if (buf == NULL) { - error_ret(tok); - goto error; - } - - // Make room for the null terminator *and* potentially - // an extra newline character that we may need to artificially - // add. - size_t buffer_size = buflen + 2; - if (!tok_reserve_buf(tok, buffer_size)) { - goto error; - } - memcpy(tok->inp, buf, buflen); - tok->inp += buflen; +static int tok_underflow_file(struct tok_state *tok) { + if (tok->start == NULL && !INSIDE_FSTRING(tok)) { + tok->cur = tok->inp = tok->buf; + } + if (tok->decoding_state == STATE_INIT) { + /* We have not yet determined the encoding. + If an encoding is found, use the file-pointer + reader functions from now on. */ + if (!check_bom(fp_getc, fp_ungetc, fp_setreadl, tok)) { + error_ret(tok); + return 0; + } + assert(tok->decoding_state != STATE_INIT); + } + /* Read until '\n' or EOF */ + if (tok->decoding_readline != NULL) { + /* We already have a codec associated with this input. */ + if (!tok_readline_recode(tok)) { + return 0; + } + } else { + /* We want a 'raw' read. */ + if (!tok_readline_raw(tok)) { + return 0; + } + } + if (tok->inp == tok->cur) { + tok->done = E_EOF; + return 0; + } + tok->implicit_newline = 0; + if (tok->inp[-1] != '\n') { + assert(tok->inp + 1 < tok->end); + /* Last line does not end in \n, fake one */ + *tok->inp++ = '\n'; *tok->inp = '\0'; + tok->implicit_newline = 1; + } - tok->line_start = tok->cur; - Py_DECREF(line); - return 1; -error: - Py_XDECREF(raw_line); - Py_XDECREF(line); + if (tok->tok_mode_stack_index && !update_fstring_expr(tok, 0)) { return 0; + } + + ADVANCE_LINENO(); + if (tok->decoding_state != STATE_NORMAL) { + if (tok->lineno > 2) { + tok->decoding_state = STATE_NORMAL; + } else if (!check_coding_spec(tok->cur, strlen(tok->cur), tok, + fp_setreadl)) { + return 0; + } + } + /* The default encoding is UTF-8, so make sure we don't have any + non-UTF-8 sequences in it. */ + if (!tok->encoding && !ensure_utf8(tok->cur, tok)) { + error_ret(tok); + return 0; + } + assert(tok->done == E_OK); + return tok->done == E_OK; } -static int -tok_underflow_string(struct tok_state *tok) { - char *end = strchr(tok->inp, '\n'); - if (end != NULL) { - end++; - } - else { - end = strchr(tok->inp, '\0'); - if (end == tok->inp) { - tok->done = E_EOF; - return 0; - } - } - if (tok->start == NULL) { - tok->buf = tok->cur; - } - tok->line_start = tok->cur; - ADVANCE_LINENO(); - tok->inp = end; - return 1; -} - -static int -tok_underflow_interactive(struct tok_state *tok) { - if (tok->interactive_underflow == IUNDERFLOW_STOP) { - tok->done = E_INTERACT_STOP; - return 1; - } - char *newtok = PyOS_Readline(tok->fp ? tok->fp : stdin, stdout, tok->prompt); - if (newtok != NULL) { - char *translated = translate_newlines(newtok, 0, 0, tok); - PyMem_Free(newtok); - if (translated == NULL) { - return 0; - } - newtok = translated; - } - if (tok->encoding && newtok && *newtok) { - /* Recode to UTF-8 */ - Py_ssize_t buflen; - const char* buf; - PyObject *u = translate_into_utf8(newtok, tok->encoding); - PyMem_Free(newtok); - if (u == NULL) { - tok->done = E_DECODE; - return 0; - } - buflen = PyBytes_GET_SIZE(u); - buf = PyBytes_AS_STRING(u); - newtok = PyMem_Malloc(buflen+1); - if (newtok == NULL) { - Py_DECREF(u); - tok->done = E_NOMEM; - return 0; - } - strcpy(newtok, buf); - Py_DECREF(u); - } - if (tok->fp_interactive && - tok_concatenate_interactive_new_line(tok, newtok) == -1) { - PyMem_Free(newtok); - return 0; - } - if (tok->nextprompt != NULL) { - tok->prompt = tok->nextprompt; - } - if (newtok == NULL) { - tok->done = E_INTR; - } - else if (*newtok == '\0') { - PyMem_Free(newtok); - tok->done = E_EOF; - } - else if (tok->start != NULL) { - Py_ssize_t cur_multi_line_start = tok->multi_line_start - tok->buf; - remember_fstring_buffers(tok); - size_t size = strlen(newtok); - ADVANCE_LINENO(); - if (!tok_reserve_buf(tok, size + 1)) { - PyMem_Free(tok->buf); - tok->buf = NULL; - PyMem_Free(newtok); - return 0; - } - memcpy(tok->cur, newtok, size + 1); - PyMem_Free(newtok); - tok->inp += size; - tok->multi_line_start = tok->buf + cur_multi_line_start; - restore_fstring_buffers(tok); - } - else { - remember_fstring_buffers(tok); - ADVANCE_LINENO(); - PyMem_Free(tok->buf); - tok->buf = newtok; - tok->cur = tok->buf; - tok->line_start = tok->buf; - tok->inp = strchr(tok->buf, '\0'); - tok->end = tok->inp + 1; - restore_fstring_buffers(tok); - } - if (tok->done != E_OK) { - if (tok->prompt != NULL) { - PySys_WriteStderr("\n"); - } - return 0; - } - - if (tok->tok_mode_stack_index && !update_fstring_expr(tok, 0)) { - return 0; - } - return 1; -} - -static int -tok_underflow_file(struct tok_state *tok) { - if (tok->start == NULL && !INSIDE_FSTRING(tok)) { - tok->cur = tok->inp = tok->buf; - } - if (tok->decoding_state == STATE_INIT) { - /* We have not yet determined the encoding. - If an encoding is found, use the file-pointer - reader functions from now on. */ - if (!check_bom(fp_getc, fp_ungetc, fp_setreadl, tok)) { - error_ret(tok); - return 0; - } - assert(tok->decoding_state != STATE_INIT); - } - /* Read until '\n' or EOF */ - if (tok->decoding_readline != NULL) { - /* We already have a codec associated with this input. */ - if (!tok_readline_recode(tok)) { - return 0; - } - } - else { - /* We want a 'raw' read. */ - if (!tok_readline_raw(tok)) { - return 0; - } - } - if (tok->inp == tok->cur) { - tok->done = E_EOF; - return 0; - } - tok->implicit_newline = 0; - if (tok->inp[-1] != '\n') { - assert(tok->inp + 1 < tok->end); - /* Last line does not end in \n, fake one */ - *tok->inp++ = '\n'; - *tok->inp = '\0'; - tok->implicit_newline = 1; - } - - if (tok->tok_mode_stack_index && !update_fstring_expr(tok, 0)) { - return 0; - } - - ADVANCE_LINENO(); - if (tok->decoding_state != STATE_NORMAL) { - if (tok->lineno > 2) { - tok->decoding_state = STATE_NORMAL; - } - else if (!check_coding_spec(tok->cur, strlen(tok->cur), - tok, fp_setreadl)) - { - return 0; - } - } - /* The default encoding is UTF-8, so make sure we don't have any - non-UTF-8 sequences in it. */ - if (!tok->encoding && !ensure_utf8(tok->cur, tok)) { - error_ret(tok); - return 0; - } - assert(tok->done == E_OK); - return tok->done == E_OK; -} - -static int -tok_underflow_readline(struct tok_state* tok) { - assert(tok->decoding_state == STATE_NORMAL); - assert(tok->fp == NULL && tok->input == NULL && tok->decoding_readline == NULL); - if (tok->start == NULL && !INSIDE_FSTRING(tok)) { - tok->cur = tok->inp = tok->buf; - } - if (!tok_readline_string(tok)) { - return 0; - } - if (tok->inp == tok->cur) { - tok->done = E_EOF; - return 0; - } - tok->implicit_newline = 0; - if (tok->inp[-1] != '\n') { - assert(tok->inp + 1 < tok->end); - /* Last line does not end in \n, fake one */ - *tok->inp++ = '\n'; - *tok->inp = '\0'; - tok->implicit_newline = 1; - } +static int tok_underflow_readline(struct tok_state *tok) { + assert(tok->decoding_state == STATE_NORMAL); + assert(tok->fp == NULL && tok->input == NULL && + tok->decoding_readline == NULL); + if (tok->start == NULL && !INSIDE_FSTRING(tok)) { + tok->cur = tok->inp = tok->buf; + } + if (!tok_readline_string(tok)) { + return 0; + } + if (tok->inp == tok->cur) { + tok->done = E_EOF; + return 0; + } + tok->implicit_newline = 0; + if (tok->inp[-1] != '\n') { + assert(tok->inp + 1 < tok->end); + /* Last line does not end in \n, fake one */ + *tok->inp++ = '\n'; + *tok->inp = '\0'; + tok->implicit_newline = 1; + } - if (tok->tok_mode_stack_index && !update_fstring_expr(tok, 0)) { - return 0; - } + if (tok->tok_mode_stack_index && !update_fstring_expr(tok, 0)) { + return 0; + } - ADVANCE_LINENO(); - /* The default encoding is UTF-8, so make sure we don't have any - non-UTF-8 sequences in it. */ - if (!tok->encoding && !ensure_utf8(tok->cur, tok)) { - error_ret(tok); - return 0; - } - assert(tok->done == E_OK); - return tok->done == E_OK; + ADVANCE_LINENO(); + /* The default encoding is UTF-8, so make sure we don't have any + non-UTF-8 sequences in it. */ + if (!tok->encoding && !ensure_utf8(tok->cur, tok)) { + error_ret(tok); + return 0; + } + assert(tok->done == E_OK); + return tok->done == E_OK; } #if defined(Py_DEBUG) -static void -print_escape(FILE *f, const char *s, Py_ssize_t size) -{ - if (s == NULL) { - fputs("NULL", f); - return; - } - putc('"', f); - while (size-- > 0) { - unsigned char c = *s++; - switch (c) { - case '\n': fputs("\\n", f); break; - case '\r': fputs("\\r", f); break; - case '\t': fputs("\\t", f); break; - case '\f': fputs("\\f", f); break; - case '\'': fputs("\\'", f); break; - case '"': fputs("\\\"", f); break; - default: - if (0x20 <= c && c <= 0x7f) - putc(c, f); - else - fprintf(f, "\\x%02x", c); - } +static void print_escape(FILE *f, const char *s, Py_ssize_t size) { + if (s == NULL) { + fputs("NULL", f); + return; + } + putc('"', f); + while (size-- > 0) { + unsigned char c = *s++; + switch (c) { + case '\n': + fputs("\\n", f); + break; + case '\r': + fputs("\\r", f); + break; + case '\t': + fputs("\\t", f); + break; + case '\f': + fputs("\\f", f); + break; + case '\'': + fputs("\\'", f); + break; + case '"': + fputs("\\\"", f); + break; + default: + if (0x20 <= c && c <= 0x7f) + putc(c, f); + else + fprintf(f, "\\x%02x", c); } - putc('"', f); + } + putc('"', f); } #endif /* Get next char, updating state; error code goes into tok->done */ -static int -tok_nextc(struct tok_state *tok) -{ - int rc; - for (;;) { - if (tok->cur != tok->inp) { - if ((unsigned int) tok->col_offset >= (unsigned int) INT_MAX) { - tok->done = E_COLUMNOVERFLOW; - return EOF; - } - tok->col_offset++; - return Py_CHARMASK(*tok->cur++); /* Fast path */ - } - if (tok->done != E_OK) { - return EOF; - } - if (tok->readline) { - rc = tok_underflow_readline(tok); - } - else if (tok->fp == NULL) { - rc = tok_underflow_string(tok); - } - else if (tok->prompt != NULL) { - rc = tok_underflow_interactive(tok); - } - else { - rc = tok_underflow_file(tok); - } -#if defined(Py_DEBUG) - if (tok->debug) { - fprintf(stderr, "line[%d] = ", tok->lineno); - print_escape(stderr, tok->cur, tok->inp - tok->cur); - fprintf(stderr, " tok->done = %d\n", tok->done); - } -#endif - if (!rc) { - tok->cur = tok->inp; - return EOF; - } - tok->line_start = tok->cur; - - if (contains_null_bytes(tok->line_start, tok->inp - tok->line_start)) { - syntaxerror(tok, "source code cannot contain null bytes"); - tok->cur = tok->inp; - return EOF; - } +static int tok_nextc(struct tok_state *tok) { + int rc; + for (;;) { + if (tok->cur != tok->inp) { + if ((unsigned int)tok->col_offset >= (unsigned int)INT_MAX) { + tok->done = E_COLUMNOVERFLOW; + return EOF; + } + tok->col_offset++; + return Py_CHARMASK(*tok->cur++); /* Fast path */ } - Py_UNREACHABLE(); -} - -/* Back-up one character */ - -static void -tok_backup(struct tok_state *tok, int c) -{ - if (c != EOF) { - if (--tok->cur < tok->buf) { - Py_FatalError("tokenizer beginning of buffer"); - } - if ((int)(unsigned char)*tok->cur != Py_CHARMASK(c)) { - Py_FatalError("tok_backup: wrong character"); - } - tok->col_offset--; + if (tok->done != E_OK) { + return EOF; + } + if (tok->readline) { + rc = tok_underflow_readline(tok); + } else if (tok->fp == NULL) { + rc = tok_underflow_string(tok); + } else if (tok->prompt != NULL) { + rc = tok_underflow_interactive(tok); + } else { + rc = tok_underflow_file(tok); } -} - -static int -_syntaxerror_range(struct tok_state *tok, const char *format, - int col_offset, int end_col_offset, - va_list vargs) -{ - // In release builds, we don't want to overwrite a previous error, but in debug builds we - // want to fail if we are not doing it so we can fix it. - assert(tok->done != E_ERROR); - if (tok->done == E_ERROR) { - return ERRORTOKEN; +#if defined(Py_DEBUG) + if (tok->debug) { + fprintf(stderr, "line[%d] = ", tok->lineno); + print_escape(stderr, tok->cur, tok->inp - tok->cur); + fprintf(stderr, " tok->done = %d\n", tok->done); } - PyObject *errmsg, *errtext, *args; - errmsg = PyUnicode_FromFormatV(format, vargs); - if (!errmsg) { - goto error; +#endif + if (!rc) { + tok->cur = tok->inp; + return EOF; } + tok->line_start = tok->cur; - errtext = PyUnicode_DecodeUTF8(tok->line_start, tok->cur - tok->line_start, - "replace"); - if (!errtext) { - goto error; + if (contains_null_bytes(tok->line_start, tok->inp - tok->line_start)) { + syntaxerror(tok, "source code cannot contain null bytes"); + tok->cur = tok->inp; + return EOF; } + } + Py_UNREACHABLE(); +} - if (col_offset == -1) { - col_offset = (int)PyUnicode_GET_LENGTH(errtext); - } - if (end_col_offset == -1) { - end_col_offset = col_offset; - } +/* Back-up one character */ - Py_ssize_t line_len = strcspn(tok->line_start, "\n"); - if (line_len != tok->cur - tok->line_start) { - Py_DECREF(errtext); - errtext = PyUnicode_DecodeUTF8(tok->line_start, line_len, - "replace"); +static void tok_backup(struct tok_state *tok, int c) { + if (c != EOF) { + if (--tok->cur < tok->buf) { + Py_FatalError("tokenizer beginning of buffer"); } - if (!errtext) { - goto error; + if ((int)(unsigned char)*tok->cur != Py_CHARMASK(c)) { + Py_FatalError("tok_backup: wrong character"); } + tok->col_offset--; + } +} - args = Py_BuildValue("(O(OiiNii))", errmsg, tok->filename, tok->lineno, - col_offset, errtext, tok->lineno, end_col_offset); - if (args) { - PyErr_SetObject(PyExc_SyntaxError, args); - Py_DECREF(args); - } +static int _syntaxerror_range(struct tok_state *tok, const char *format, + int col_offset, int end_col_offset, + va_list vargs) { + // In release builds, we don't want to overwrite a previous error, but in + // debug builds we want to fail if we are not doing it so we can fix it. + assert(tok->done != E_ERROR); + if (tok->done == E_ERROR) { + return ERRORTOKEN; + } + PyObject *errmsg, *errtext, *args; + errmsg = PyUnicode_FromFormatV(format, vargs); + if (!errmsg) { + goto error; + } + + errtext = PyUnicode_DecodeUTF8(tok->line_start, tok->cur - tok->line_start, + "replace"); + if (!errtext) { + goto error; + } + + if (col_offset == -1) { + col_offset = (int)PyUnicode_GET_LENGTH(errtext); + } + if (end_col_offset == -1) { + end_col_offset = col_offset; + } + + Py_ssize_t line_len = strcspn(tok->line_start, "\n"); + if (line_len != tok->cur - tok->line_start) { + Py_DECREF(errtext); + errtext = PyUnicode_DecodeUTF8(tok->line_start, line_len, "replace"); + } + if (!errtext) { + goto error; + } + + args = Py_BuildValue("(O(OiiNii))", errmsg, tok->filename, tok->lineno, + col_offset, errtext, tok->lineno, end_col_offset); + if (args) { + PyErr_SetObject(PyExc_SyntaxError, args); + Py_DECREF(args); + } error: - Py_XDECREF(errmsg); - tok->done = E_ERROR; - return ERRORTOKEN; + Py_XDECREF(errmsg); + tok->done = E_ERROR; + return ERRORTOKEN; } -static int -syntaxerror(struct tok_state *tok, const char *format, ...) -{ - // This errors are cleaned on startup. Todo: Fix it. - va_list vargs; - va_start(vargs, format); - int ret = _syntaxerror_range(tok, format, -1, -1, vargs); - va_end(vargs); - return ret; +static int syntaxerror(struct tok_state *tok, const char *format, ...) { + // This errors are cleaned on startup. Todo: Fix it. + va_list vargs; + va_start(vargs, format); + int ret = _syntaxerror_range(tok, format, -1, -1, vargs); + va_end(vargs); + return ret; } -static int -syntaxerror_known_range(struct tok_state *tok, - int col_offset, int end_col_offset, - const char *format, ...) -{ - va_list vargs; - va_start(vargs, format); - int ret = _syntaxerror_range(tok, format, col_offset, end_col_offset, vargs); - va_end(vargs); - return ret; +static int syntaxerror_known_range(struct tok_state *tok, int col_offset, + int end_col_offset, const char *format, + ...) { + va_list vargs; + va_start(vargs, format); + int ret = _syntaxerror_range(tok, format, col_offset, end_col_offset, vargs); + va_end(vargs); + return ret; } -static int -indenterror(struct tok_state *tok) -{ - tok->done = E_TABSPACE; - tok->cur = tok->inp; - return ERRORTOKEN; +static int indenterror(struct tok_state *tok) { + tok->done = E_TABSPACE; + tok->cur = tok->inp; + return ERRORTOKEN; } -static int -parser_warn(struct tok_state *tok, PyObject *category, const char *format, ...) -{ - if (!tok->report_warnings) { - return 0; - } - - PyObject *errmsg; - va_list vargs; - va_start(vargs, format); - errmsg = PyUnicode_FromFormatV(format, vargs); - va_end(vargs); - if (!errmsg) { - goto error; - } - - if (PyErr_WarnExplicitObject(category, errmsg, tok->filename, - tok->lineno, NULL, NULL) < 0) { - if (PyErr_ExceptionMatches(category)) { - /* Replace the DeprecationWarning exception with a SyntaxError - to get a more accurate error report */ - PyErr_Clear(); - syntaxerror(tok, "%U", errmsg); - } - goto error; - } - Py_DECREF(errmsg); +static int parser_warn(struct tok_state *tok, PyObject *category, + const char *format, ...) { + if (!tok->report_warnings) { return 0; + } + + PyObject *errmsg; + va_list vargs; + va_start(vargs, format); + errmsg = PyUnicode_FromFormatV(format, vargs); + va_end(vargs); + if (!errmsg) { + goto error; + } + + if (PyErr_WarnExplicitObject(category, errmsg, tok->filename, tok->lineno, + NULL, NULL) < 0) { + if (PyErr_ExceptionMatches(category)) { + /* Replace the DeprecationWarning exception with a SyntaxError + to get a more accurate error report */ + PyErr_Clear(); + syntaxerror(tok, "%U", errmsg); + } + goto error; + } + Py_DECREF(errmsg); + return 0; error: - Py_XDECREF(errmsg); - tok->done = E_ERROR; - return -1; + Py_XDECREF(errmsg); + tok->done = E_ERROR; + return -1; } -static int -warn_invalid_escape_sequence(struct tok_state *tok, int first_invalid_escape_char) -{ - if (!tok->report_warnings) { - return 0; - } - - PyObject *msg = PyUnicode_FromFormat( - "invalid escape sequence '\\%c'", - (char) first_invalid_escape_char - ); +static int warn_invalid_escape_sequence(struct tok_state *tok, + int first_invalid_escape_char) { + if (!tok->report_warnings) { + return 0; + } - if (msg == NULL) { - return -1; - } + PyObject *msg = PyUnicode_FromFormat("invalid escape sequence '\\%c'", + (char)first_invalid_escape_char); - if (PyErr_WarnExplicitObject(PyExc_SyntaxWarning, msg, tok->filename, - tok->lineno, NULL, NULL) < 0) { - Py_DECREF(msg); + if (msg == NULL) { + return -1; + } - if (PyErr_ExceptionMatches(PyExc_SyntaxWarning)) { - /* Replace the SyntaxWarning exception with a SyntaxError - to get a more accurate error report */ - PyErr_Clear(); - return syntaxerror(tok, "invalid escape sequence '\\%c'", (char) first_invalid_escape_char); - } + if (PyErr_WarnExplicitObject(PyExc_SyntaxWarning, msg, tok->filename, + tok->lineno, NULL, NULL) < 0) { + Py_DECREF(msg); - return -1; + if (PyErr_ExceptionMatches(PyExc_SyntaxWarning)) { + /* Replace the SyntaxWarning exception with a SyntaxError + to get a more accurate error report */ + PyErr_Clear(); + return syntaxerror(tok, "invalid escape sequence '\\%c'", + (char)first_invalid_escape_char); } - Py_DECREF(msg); - return 0; + return -1; + } + + Py_DECREF(msg); + return 0; } -static int -lookahead(struct tok_state *tok, const char *test) -{ - const char *s = test; - int res = 0; - while (1) { - int c = tok_nextc(tok); - if (*s == 0) { - res = !is_potential_identifier_char(c); - } - else if (c == *s) { - s++; - continue; - } +static int lookahead(struct tok_state *tok, const char *test) { + const char *s = test; + int res = 0; + while (1) { + int c = tok_nextc(tok); + if (*s == 0) { + res = !is_potential_identifier_char(c); + } else if (c == *s) { + s++; + continue; + } - tok_backup(tok, c); - while (s != test) { - tok_backup(tok, *--s); - } - return res; + tok_backup(tok, c); + while (s != test) { + tok_backup(tok, *--s); } + return res; + } } -static int -verify_end_of_number(struct tok_state *tok, int c, const char *kind) { - if (tok->tok_extra_tokens) { - // When we are parsing extra tokens, we don't want to emit warnings - // about invalid literals, because we want to be a bit more liberal. - return 1; - } - /* Emit a deprecation warning only if the numeric literal is immediately - * followed by one of keywords which can occur after a numeric literal - * in valid code: "and", "else", "for", "if", "in", "is" and "or". - * It allows to gradually deprecate existing valid code without adding - * warning before error in most cases of invalid numeric literal (which - * would be confusing and break existing tests). - * Raise a syntax error with slightly better message than plain - * "invalid syntax" if the numeric literal is immediately followed by - * other keyword or identifier. - */ - int r = 0; - if (c == 'a') { - r = lookahead(tok, "nd"); - } - else if (c == 'e') { - r = lookahead(tok, "lse"); - } - else if (c == 'f') { - r = lookahead(tok, "or"); - } - else if (c == 'i') { - int c2 = tok_nextc(tok); - if (c2 == 'f' || c2 == 'n' || c2 == 's') { - r = 1; - } - tok_backup(tok, c2); - } - else if (c == 'o') { - r = lookahead(tok, "r"); - } - else if (c == 'n') { - r = lookahead(tok, "ot"); - } - if (r) { - tok_backup(tok, c); - if (parser_warn(tok, PyExc_SyntaxWarning, - "invalid %s literal", kind)) - { - return 0; - } - tok_nextc(tok); +static int verify_end_of_number(struct tok_state *tok, int c, + const char *kind) { + if (tok->tok_extra_tokens) { + // When we are parsing extra tokens, we don't want to emit warnings + // about invalid literals, because we want to be a bit more liberal. + return 1; + } + /* Emit a deprecation warning only if the numeric literal is immediately + * followed by one of keywords which can occur after a numeric literal + * in valid code: "and", "else", "for", "if", "in", "is" and "or". + * It allows to gradually deprecate existing valid code without adding + * warning before error in most cases of invalid numeric literal (which + * would be confusing and break existing tests). + * Raise a syntax error with slightly better message than plain + * "invalid syntax" if the numeric literal is immediately followed by + * other keyword or identifier. + */ + int r = 0; + if (c == 'a') { + r = lookahead(tok, "nd"); + } else if (c == 'e') { + r = lookahead(tok, "lse"); + } else if (c == 'f') { + r = lookahead(tok, "or"); + } else if (c == 'i') { + int c2 = tok_nextc(tok); + if (c2 == 'f' || c2 == 'n' || c2 == 's') { + r = 1; + } + tok_backup(tok, c2); + } else if (c == 'o') { + r = lookahead(tok, "r"); + } else if (c == 'n') { + r = lookahead(tok, "ot"); + } + if (r) { + tok_backup(tok, c); + if (parser_warn(tok, PyExc_SyntaxWarning, "invalid %s literal", kind)) { + return 0; } - else /* In future releases, only error will remain. */ + tok_nextc(tok); + } else /* In future releases, only error will remain. */ if (c < 128 && is_potential_identifier_char(c)) { - tok_backup(tok, c); - syntaxerror(tok, "invalid %s literal", kind); - return 0; + tok_backup(tok, c); + syntaxerror(tok, "invalid %s literal", kind); + return 0; } - return 1; + return 1; } /* Verify that the identifier follows PEP 3131. All identifier strings are guaranteed to be "ready" unicode objects. */ -static int -verify_identifier(struct tok_state *tok) -{ - if (tok->tok_extra_tokens) { - return 1; - } - PyObject *s; - if (tok->decoding_erred) - return 0; - s = PyUnicode_DecodeUTF8(tok->start, tok->cur - tok->start, NULL); - if (s == NULL) { - if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) { - tok->done = E_DECODE; - } - else { - tok->done = E_ERROR; - } - return 0; +static int verify_identifier(struct tok_state *tok) { + if (tok->tok_extra_tokens) { + return 1; + } + PyObject *s; + if (tok->decoding_erred) + return 0; + s = PyUnicode_DecodeUTF8(tok->start, tok->cur - tok->start, NULL); + if (s == NULL) { + if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) { + tok->done = E_DECODE; + } else { + tok->done = E_ERROR; } - Py_ssize_t invalid = _PyUnicode_ScanIdentifier(s); - if (invalid < 0) { - Py_DECREF(s); + return 0; + } + Py_ssize_t invalid = _PyUnicode_ScanIdentifier(s); + if (invalid < 0) { + Py_DECREF(s); + tok->done = E_ERROR; + return 0; + } + assert(PyUnicode_GET_LENGTH(s) > 0); + if (invalid < PyUnicode_GET_LENGTH(s)) { + Py_UCS4 ch = PyUnicode_READ_CHAR(s, invalid); + if (invalid + 1 < PyUnicode_GET_LENGTH(s)) { + /* Determine the offset in UTF-8 encoded input */ + Py_SETREF(s, PyUnicode_Substring(s, 0, invalid + 1)); + if (s != NULL) { + Py_SETREF(s, PyUnicode_AsUTF8String(s)); + } + if (s == NULL) { tok->done = E_ERROR; return 0; - } - assert(PyUnicode_GET_LENGTH(s) > 0); - if (invalid < PyUnicode_GET_LENGTH(s)) { - Py_UCS4 ch = PyUnicode_READ_CHAR(s, invalid); - if (invalid + 1 < PyUnicode_GET_LENGTH(s)) { - /* Determine the offset in UTF-8 encoded input */ - Py_SETREF(s, PyUnicode_Substring(s, 0, invalid + 1)); - if (s != NULL) { - Py_SETREF(s, PyUnicode_AsUTF8String(s)); - } - if (s == NULL) { - tok->done = E_ERROR; - return 0; - } - tok->cur = (char *)tok->start + PyBytes_GET_SIZE(s); - } - Py_DECREF(s); - if (Py_UNICODE_ISPRINTABLE(ch)) { - syntaxerror(tok, "invalid character '%c' (U+%04X)", ch, ch); - } - else { - syntaxerror(tok, "invalid non-printable character U+%04X", ch); - } - return 0; + } + tok->cur = (char *)tok->start + PyBytes_GET_SIZE(s); } Py_DECREF(s); - return 1; -} - -static int -tok_decimal_tail(struct tok_state *tok) -{ - int c; - - while (1) { - do { - c = tok_nextc(tok); - } while (isdigit(c)); - if (c != '_') { - break; - } - c = tok_nextc(tok); - if (!isdigit(c)) { - tok_backup(tok, c); - syntaxerror(tok, "invalid decimal literal"); - return 0; - } - } - return c; -} - - -static inline int -tok_continuation_line(struct tok_state *tok) { - int c = tok_nextc(tok); - if (c == '\r') { - c = tok_nextc(tok); - } - if (c != '\n') { - tok->done = E_LINECONT; - return -1; - } - c = tok_nextc(tok); - if (c == EOF) { - tok->done = E_EOF; - tok->cur = tok->inp; - return -1; + if (Py_UNICODE_ISPRINTABLE(ch)) { + syntaxerror(tok, "invalid character '%c' (U+%04X)", ch, ch); } else { - tok_backup(tok, c); + syntaxerror(tok, "invalid non-printable character U+%04X", ch); } - return c; + return 0; + } + Py_DECREF(s); + return 1; } -static int -type_comment_token_setup(struct tok_state *tok, struct token *token, int type, int col_offset, - int end_col_offset, const char *start, const char *end) -{ - token->level = tok->level; - token->lineno = token->end_lineno = tok->lineno; - token->col_offset = col_offset; - token->end_col_offset = end_col_offset; - token->start = start; - token->end = end; - return type; -} +static int tok_decimal_tail(struct tok_state *tok) { + int c; -static int -token_setup(struct tok_state *tok, struct token *token, int type, const char *start, const char *end) -{ - assert((start == NULL && end == NULL) || (start != NULL && end != NULL)); - token->level = tok->level; - if (ISSTRINGLIT(type)) { - token->lineno = tok->first_lineno; - } - else { - token->lineno = tok->lineno; + while (1) { + do { + c = tok_nextc(tok); + } while (isdigit(c)); + if (c != '_') { + break; } - token->end_lineno = tok->lineno; - token->col_offset = token->end_col_offset = -1; - token->start = start; - token->end = end; - - if (start != NULL && end != NULL) { - token->col_offset = tok->starting_col_offset; - token->end_col_offset = tok->col_offset; + c = tok_nextc(tok); + if (!isdigit(c)) { + tok_backup(tok, c); + syntaxerror(tok, "invalid decimal literal"); + return 0; } - return type; + } + return c; } - -static int -tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct token *token) -{ - int c; - int blankline, nonascii; - - const char *p_start = NULL; - const char *p_end = NULL; - nextline: - tok->start = NULL; - tok->starting_col_offset = -1; - blankline = 0; - - - /* Get indentation level */ - if (tok->atbol) { - int col = 0; - int altcol = 0; - tok->atbol = 0; - int cont_line_col = 0; - for (;;) { - c = tok_nextc(tok); - if (c == ' ') { - col++, altcol++; - } - else if (c == '\t') { - col = (col / tok->tabsize + 1) * tok->tabsize; - altcol = (altcol / ALTTABSIZE + 1) * ALTTABSIZE; - } - else if (c == '\014') {/* Control-L (formfeed) */ - col = altcol = 0; /* For Emacs users */ - } - else if (c == '\\') { - // Indentation cannot be split over multiple physical lines - // using backslashes. This means that if we found a backslash - // preceded by whitespace, **the first one we find** determines - // the level of indentation of whatever comes next. - cont_line_col = cont_line_col ? cont_line_col : col; - if ((c = tok_continuation_line(tok)) == -1) { - return MAKE_TOKEN(ERRORTOKEN); - } - } - else { - break; - } - } - tok_backup(tok, c); - if (c == '#' || c == '\n' || c == '\r') { - /* Lines with only whitespace and/or comments - shouldn't affect the indentation and are - not passed to the parser as NEWLINE tokens, - except *totally* empty lines in interactive - mode, which signal the end of a command group. */ - if (col == 0 && c == '\n' && tok->prompt != NULL) { - blankline = 0; /* Let it through */ - } - else if (tok->prompt != NULL && tok->lineno == 1) { - /* In interactive mode, if the first line contains - only spaces and/or a comment, let it through. */ - blankline = 0; - col = altcol = 0; - } - else { - blankline = 1; /* Ignore completely */ - } - /* We can't jump back right here since we still - may need to skip to the end of a comment */ - } - if (!blankline && tok->level == 0) { - col = cont_line_col ? cont_line_col : col; - altcol = cont_line_col ? cont_line_col : altcol; - if (col == tok->indstack[tok->indent]) { - /* No change */ - if (altcol != tok->altindstack[tok->indent]) { - return MAKE_TOKEN(indenterror(tok)); - } - } - else if (col > tok->indstack[tok->indent]) { - /* Indent -- always one */ - if (tok->indent+1 >= MAXINDENT) { - tok->done = E_TOODEEP; - tok->cur = tok->inp; - return MAKE_TOKEN(ERRORTOKEN); - } - if (altcol <= tok->altindstack[tok->indent]) { - return MAKE_TOKEN(indenterror(tok)); - } - tok->pendin++; - tok->indstack[++tok->indent] = col; - tok->altindstack[tok->indent] = altcol; - } - else /* col < tok->indstack[tok->indent] */ { - /* Dedent -- any number, must be consistent */ - while (tok->indent > 0 && - col < tok->indstack[tok->indent]) { - tok->pendin--; - tok->indent--; - } - if (col != tok->indstack[tok->indent]) { - tok->done = E_DEDENT; - tok->cur = tok->inp; - return MAKE_TOKEN(ERRORTOKEN); - } - if (altcol != tok->altindstack[tok->indent]) { - return MAKE_TOKEN(indenterror(tok)); - } - } - } - } - - tok->start = tok->cur; - tok->starting_col_offset = tok->col_offset; - - /* Return pending indents/dedents */ - if (tok->pendin != 0) { - if (tok->pendin < 0) { - if (tok->tok_extra_tokens) { - p_start = tok->cur; - p_end = tok->cur; - } - tok->pendin++; - return MAKE_TOKEN(DEDENT); - } - else { - if (tok->tok_extra_tokens) { - p_start = tok->buf; - p_end = tok->cur; - } - tok->pendin--; - return MAKE_TOKEN(INDENT); +static inline int tok_continuation_line(struct tok_state *tok) { + int c = tok_nextc(tok); + if (c == '\r') { + c = tok_nextc(tok); + } + if (c != '\n') { + tok->done = E_LINECONT; + return -1; + } + c = tok_nextc(tok); + if (c == EOF) { + tok->done = E_EOF; + tok->cur = tok->inp; + return -1; + } else { + tok_backup(tok, c); + } + return c; +} + +static int type_comment_token_setup(struct tok_state *tok, struct token *token, + int type, int col_offset, + int end_col_offset, const char *start, + const char *end) { + token->level = tok->level; + token->lineno = token->end_lineno = tok->lineno; + token->col_offset = col_offset; + token->end_col_offset = end_col_offset; + token->start = start; + token->end = end; + return type; +} + +static int token_setup(struct tok_state *tok, struct token *token, int type, + const char *start, const char *end) { + assert((start == NULL && end == NULL) || (start != NULL && end != NULL)); + token->level = tok->level; + if (ISSTRINGLIT(type)) { + token->lineno = tok->first_lineno; + } else { + token->lineno = tok->lineno; + } + token->end_lineno = tok->lineno; + token->col_offset = token->end_col_offset = -1; + token->start = start; + token->end = end; + + if (start != NULL && end != NULL) { + token->col_offset = tok->starting_col_offset; + token->end_col_offset = tok->col_offset; + } + return type; +} + +static int tok_get_normal_mode(struct tok_state *tok, + tokenizer_mode *current_tok, + struct token *token) { + int c; + int blankline, nonascii; + + const char *p_start = NULL; + const char *p_end = NULL; +nextline: + tok->start = NULL; + tok->starting_col_offset = -1; + blankline = 0; + + /* Get indentation level */ + if (tok->atbol) { + int col = 0; + int altcol = 0; + tok->atbol = 0; + int cont_line_col = 0; + for (;;) { + c = tok_nextc(tok); + if (c == ' ') { + col++, altcol++; + } else if (c == '\t') { + col = (col / tok->tabsize + 1) * tok->tabsize; + altcol = (altcol / ALTTABSIZE + 1) * ALTTABSIZE; + } else if (c == '\014') { /* Control-L (formfeed) */ + col = altcol = 0; /* For Emacs users */ + } else if (c == '\\') { + // Indentation cannot be split over multiple physical lines + // using backslashes. This means that if we found a backslash + // preceded by whitespace, **the first one we find** determines + // the level of indentation of whatever comes next. + cont_line_col = cont_line_col ? cont_line_col : col; + if ((c = tok_continuation_line(tok)) == -1) { + return MAKE_TOKEN(ERRORTOKEN); } + } else { + break; + } } - - /* Peek ahead at the next character */ - c = tok_nextc(tok); tok_backup(tok, c); - /* Check if we are closing an async function */ - if (tok->async_def - && !blankline - /* Due to some implementation artifacts of type comments, - * a TYPE_COMMENT at the start of a function won't set an - * indentation level and it will produce a NEWLINE after it. - * To avoid spuriously ending an async function due to this, - * wait until we have some non-newline char in front of us. */ - && c != '\n' - && tok->level == 0 - /* There was a NEWLINE after ASYNC DEF, - so we're past the signature. */ - && tok->async_def_nl - /* Current indentation level is less than where - the async function was defined */ - && tok->async_def_indent >= tok->indent) - { - tok->async_def = 0; - tok->async_def_indent = 0; - tok->async_def_nl = 0; - } - - again: - tok->start = NULL; - /* Skip spaces */ - do { - c = tok_nextc(tok); - } while (c == ' ' || c == '\t' || c == '\014'); - - /* Set start of current token */ - tok->start = tok->cur == NULL ? NULL : tok->cur - 1; - tok->starting_col_offset = tok->col_offset - 1; - - /* Skip comment, unless it's a type comment */ - if (c == '#') { + if (c == '#' || c == '\n' || c == '\r') { + /* Lines with only whitespace and/or comments + shouldn't affect the indentation and are + not passed to the parser as NEWLINE tokens, + except *totally* empty lines in interactive + mode, which signal the end of a command group. */ + if (col == 0 && c == '\n' && tok->prompt != NULL) { + blankline = 0; /* Let it through */ + } else if (tok->prompt != NULL && tok->lineno == 1) { + /* In interactive mode, if the first line contains + only spaces and/or a comment, let it through. */ + blankline = 0; + col = altcol = 0; + } else { + blankline = 1; /* Ignore completely */ + } + /* We can't jump back right here since we still + may need to skip to the end of a comment */ + } + if (!blankline && tok->level == 0) { + col = cont_line_col ? cont_line_col : col; + altcol = cont_line_col ? cont_line_col : altcol; + if (col == tok->indstack[tok->indent]) { + /* No change */ + if (altcol != tok->altindstack[tok->indent]) { + return MAKE_TOKEN(indenterror(tok)); + } + } else if (col > tok->indstack[tok->indent]) { + /* Indent -- always one */ + if (tok->indent + 1 >= MAXINDENT) { + tok->done = E_TOODEEP; + tok->cur = tok->inp; + return MAKE_TOKEN(ERRORTOKEN); + } + if (altcol <= tok->altindstack[tok->indent]) { + return MAKE_TOKEN(indenterror(tok)); + } + tok->pendin++; + tok->indstack[++tok->indent] = col; + tok->altindstack[tok->indent] = altcol; + } else /* col < tok->indstack[tok->indent] */ { + /* Dedent -- any number, must be consistent */ + while (tok->indent > 0 && col < tok->indstack[tok->indent]) { + tok->pendin--; + tok->indent--; + } + if (col != tok->indstack[tok->indent]) { + tok->done = E_DEDENT; + tok->cur = tok->inp; + return MAKE_TOKEN(ERRORTOKEN); + } + if (altcol != tok->altindstack[tok->indent]) { + return MAKE_TOKEN(indenterror(tok)); + } + } + } + } + + tok->start = tok->cur; + tok->starting_col_offset = tok->col_offset; + + /* Return pending indents/dedents */ + if (tok->pendin != 0) { + if (tok->pendin < 0) { + if (tok->tok_extra_tokens) { + p_start = tok->cur; + p_end = tok->cur; + } + tok->pendin++; + return MAKE_TOKEN(DEDENT); + } else { + if (tok->tok_extra_tokens) { + p_start = tok->buf; + p_end = tok->cur; + } + tok->pendin--; + return MAKE_TOKEN(INDENT); + } + } + + /* Peek ahead at the next character */ + c = tok_nextc(tok); + tok_backup(tok, c); + /* Check if we are closing an async function */ + if (tok->async_def && + !blankline + /* Due to some implementation artifacts of type comments, + * a TYPE_COMMENT at the start of a function won't set an + * indentation level and it will produce a NEWLINE after it. + * To avoid spuriously ending an async function due to this, + * wait until we have some non-newline char in front of us. */ + && c != '\n' && + tok->level == 0 + /* There was a NEWLINE after ASYNC DEF, + so we're past the signature. */ + && tok->async_def_nl + /* Current indentation level is less than where + the async function was defined */ + && tok->async_def_indent >= tok->indent) { + tok->async_def = 0; + tok->async_def_indent = 0; + tok->async_def_nl = 0; + } - const char* p = NULL; - const char *prefix, *type_start; - int current_starting_col_offset; +again: + tok->start = NULL; + /* Skip spaces */ + do { + c = tok_nextc(tok); + } while (c == ' ' || c == '\t' || c == '\014'); - while (c != EOF && c != '\n' && c != '\r') { - c = tok_nextc(tok); - } + /* Set start of current token */ + tok->start = tok->cur == NULL ? NULL : tok->cur - 1; + tok->starting_col_offset = tok->col_offset - 1; - if (tok->tok_extra_tokens) { - p = tok->start; - } + /* Skip comment, unless it's a type comment */ + if (c == '#') { - if (tok->type_comments) { - p = tok->start; - current_starting_col_offset = tok->starting_col_offset; - prefix = type_comment_prefix; - while (*prefix && p < tok->cur) { - if (*prefix == ' ') { - while (*p == ' ' || *p == '\t') { - p++; - current_starting_col_offset++; - } - } else if (*prefix == *p) { - p++; - current_starting_col_offset++; - } else { - break; - } - - prefix++; - } + const char *p = NULL; + const char *prefix, *type_start; + int current_starting_col_offset; - /* This is a type comment if we matched all of type_comment_prefix. */ - if (!*prefix) { - int is_type_ignore = 1; - // +6 in order to skip the word 'ignore' - const char *ignore_end = p + 6; - const int ignore_end_col_offset = current_starting_col_offset + 6; - tok_backup(tok, c); /* don't eat the newline or EOF */ - - type_start = p; - - /* A TYPE_IGNORE is "type: ignore" followed by the end of the token - * or anything ASCII and non-alphanumeric. */ - is_type_ignore = ( - tok->cur >= ignore_end && memcmp(p, "ignore", 6) == 0 - && !(tok->cur > ignore_end - && ((unsigned char)ignore_end[0] >= 128 || Py_ISALNUM(ignore_end[0])))); - - if (is_type_ignore) { - p_start = ignore_end; - p_end = tok->cur; - - /* If this type ignore is the only thing on the line, consume the newline also. */ - if (blankline) { - tok_nextc(tok); - tok->atbol = 1; - } - return MAKE_TYPE_COMMENT_TOKEN(TYPE_IGNORE, ignore_end_col_offset, tok->col_offset); - } else { - p_start = type_start; - p_end = tok->cur; - return MAKE_TYPE_COMMENT_TOKEN(TYPE_COMMENT, current_starting_col_offset, tok->col_offset); - } - } - } - if (tok->tok_extra_tokens) { - tok_backup(tok, c); /* don't eat the newline or EOF */ - p_start = p; - p_end = tok->cur; - tok->comment_newline = blankline; - return MAKE_TOKEN(COMMENT); - } + while (c != EOF && c != '\n' && c != '\r') { + c = tok_nextc(tok); } - if (tok->done == E_INTERACT_STOP) { - return MAKE_TOKEN(ENDMARKER); + if (tok->tok_extra_tokens) { + p = tok->start; + } + + if (tok->type_comments) { + p = tok->start; + current_starting_col_offset = tok->starting_col_offset; + prefix = type_comment_prefix; + while (*prefix && p < tok->cur) { + if (*prefix == ' ') { + while (*p == ' ' || *p == '\t') { + p++; + current_starting_col_offset++; + } + } else if (*prefix == *p) { + p++; + current_starting_col_offset++; + } else { + break; + } + + prefix++; + } + + /* This is a type comment if we matched all of type_comment_prefix. */ + if (!*prefix) { + int is_type_ignore = 1; + // +6 in order to skip the word 'ignore' + const char *ignore_end = p + 6; + const int ignore_end_col_offset = current_starting_col_offset + 6; + tok_backup(tok, c); /* don't eat the newline or EOF */ + + type_start = p; + + /* A TYPE_IGNORE is "type: ignore" followed by the end of the token + * or anything ASCII and non-alphanumeric. */ + is_type_ignore = + (tok->cur >= ignore_end && memcmp(p, "ignore", 6) == 0 && + !(tok->cur > ignore_end && ((unsigned char)ignore_end[0] >= 128 || + Py_ISALNUM(ignore_end[0])))); + + if (is_type_ignore) { + p_start = ignore_end; + p_end = tok->cur; + + /* If this type ignore is the only thing on the line, consume the + * newline also. */ + if (blankline) { + tok_nextc(tok); + tok->atbol = 1; + } + return MAKE_TYPE_COMMENT_TOKEN(TYPE_IGNORE, ignore_end_col_offset, + tok->col_offset); + } else { + p_start = type_start; + p_end = tok->cur; + return MAKE_TYPE_COMMENT_TOKEN( + TYPE_COMMENT, current_starting_col_offset, tok->col_offset); + } + } } - - /* Check for EOF and errors now */ - if (c == EOF) { - if (tok->level) { - return MAKE_TOKEN(ERRORTOKEN); + if (tok->tok_extra_tokens) { + tok_backup(tok, c); /* don't eat the newline or EOF */ + p_start = p; + p_end = tok->cur; + tok->comment_newline = blankline; + return MAKE_TOKEN(COMMENT); + } + } + + if (tok->done == E_INTERACT_STOP) { + return MAKE_TOKEN(ENDMARKER); + } + + /* Check for EOF and errors now */ + if (c == EOF) { + if (tok->level) { + return MAKE_TOKEN(ERRORTOKEN); + } + return MAKE_TOKEN(tok->done == E_EOF ? ENDMARKER : ERRORTOKEN); + } + + /* Identifier (most frequent token!) */ + nonascii = 0; + if (is_potential_identifier_start(c)) { + /* Process the various legal combinations of b"", r"", u"", and f"". */ + int saw_b = 0, saw_r = 0, saw_u = 0, saw_f = 0; + while (1) { + if (!(saw_b || saw_u || saw_f) && (c == 'b' || c == 'B')) + saw_b = 1; + /* Since this is a backwards compatibility support literal we don't + want to support it in arbitrary order like byte literals. */ + else if (!(saw_b || saw_u || saw_r || saw_f) && (c == 'u' || c == 'U')) { + saw_u = 1; + } + /* ur"" and ru"" are not supported */ + else if (!(saw_r || saw_u) && (c == 'r' || c == 'R')) { + saw_r = 1; + } else if (!(saw_f || saw_b || saw_u) && (c == 'f' || c == 'F')) { + saw_f = 1; + } else { + break; + } + c = tok_nextc(tok); + if (c == '"' || c == '\'') { + if (saw_f) { + goto f_string_quote; } - return MAKE_TOKEN(tok->done == E_EOF ? ENDMARKER : ERRORTOKEN); + goto letter_quote; + } + } + while (is_potential_identifier_char(c)) { + if (c >= 128) { + nonascii = 1; + } + c = tok_nextc(tok); + } + tok_backup(tok, c); + if (nonascii && !verify_identifier(tok)) { + return MAKE_TOKEN(ERRORTOKEN); } - /* Identifier (most frequent token!) */ - nonascii = 0; - if (is_potential_identifier_start(c)) { - /* Process the various legal combinations of b"", r"", u"", and f"". */ - int saw_b = 0, saw_r = 0, saw_u = 0, saw_f = 0; - while (1) { - if (!(saw_b || saw_u || saw_f) && (c == 'b' || c == 'B')) - saw_b = 1; - /* Since this is a backwards compatibility support literal we don't - want to support it in arbitrary order like byte literals. */ - else if (!(saw_b || saw_u || saw_r || saw_f) - && (c == 'u'|| c == 'U')) { - saw_u = 1; - } - /* ur"" and ru"" are not supported */ - else if (!(saw_r || saw_u) && (c == 'r' || c == 'R')) { - saw_r = 1; - } - else if (!(saw_f || saw_b || saw_u) && (c == 'f' || c == 'F')) { - saw_f = 1; - } - else { - break; - } - c = tok_nextc(tok); - if (c == '"' || c == '\'') { - if (saw_f) { - goto f_string_quote; - } - goto letter_quote; - } - } - while (is_potential_identifier_char(c)) { - if (c >= 128) { - nonascii = 1; - } - c = tok_nextc(tok); - } - tok_backup(tok, c); - if (nonascii && !verify_identifier(tok)) { - return MAKE_TOKEN(ERRORTOKEN); - } + p_start = tok->start; + p_end = tok->cur; - p_start = tok->start; - p_end = tok->cur; + /* async/await parsing block. */ + if (tok->cur - tok->start == 5 && tok->start[0] == 'a') { + /* May be an 'async' or 'await' token. For Python 3.7 or + later we recognize them unconditionally. For Python + 3.5 or 3.6 we recognize 'async' in front of 'def', and + either one inside of 'async def'. (Technically we + shouldn't recognize these at all for 3.4 or earlier, + but there's no *valid* Python 3.4 code that would be + rejected, and async functions will be rejected in a + later phase.) */ + if (!tok->async_hacks || tok->async_def) { + /* Always recognize the keywords. */ + if (memcmp(tok->start, "async", 5) == 0) { + return MAKE_TOKEN(ASYNC); + } + if (memcmp(tok->start, "await", 5) == 0) { + return MAKE_TOKEN(AWAIT); + } + } else if (memcmp(tok->start, "async", 5) == 0) { + /* The current token is 'async'. + Look ahead one token to see if that is 'def'. */ + + struct tok_state ahead_tok; + struct token ahead_token; + _PyToken_Init(&ahead_token); + int ahead_tok_kind; + + memcpy(&ahead_tok, tok, sizeof(ahead_tok)); + ahead_tok_kind = + tok_get_normal_mode(&ahead_tok, current_tok, &ahead_token); + + if (ahead_tok_kind == NAME && ahead_tok.cur - ahead_tok.start == 3 && + memcmp(ahead_tok.start, "def", 3) == 0) { + /* The next token is going to be 'def', so instead of + returning a plain NAME token, return ASYNC. */ + tok->async_def_indent = tok->indent; + tok->async_def = 1; + _PyToken_Free(&ahead_token); + return MAKE_TOKEN(ASYNC); + } + _PyToken_Free(&ahead_token); + } + } + + return MAKE_TOKEN(NAME); + } + + if (c == '\r') { + c = tok_nextc(tok); + } - /* async/await parsing block. */ - if (tok->cur - tok->start == 5 && tok->start[0] == 'a') { - /* May be an 'async' or 'await' token. For Python 3.7 or - later we recognize them unconditionally. For Python - 3.5 or 3.6 we recognize 'async' in front of 'def', and - either one inside of 'async def'. (Technically we - shouldn't recognize these at all for 3.4 or earlier, - but there's no *valid* Python 3.4 code that would be - rejected, and async functions will be rejected in a - later phase.) */ - if (!tok->async_hacks || tok->async_def) { - /* Always recognize the keywords. */ - if (memcmp(tok->start, "async", 5) == 0) { - return MAKE_TOKEN(ASYNC); - } - if (memcmp(tok->start, "await", 5) == 0) { - return MAKE_TOKEN(AWAIT); - } - } - else if (memcmp(tok->start, "async", 5) == 0) { - /* The current token is 'async'. - Look ahead one token to see if that is 'def'. */ - - struct tok_state ahead_tok; - struct token ahead_token; - _PyToken_Init(&ahead_token); - int ahead_tok_kind; - - memcpy(&ahead_tok, tok, sizeof(ahead_tok)); - ahead_tok_kind = tok_get_normal_mode(&ahead_tok, - current_tok, - &ahead_token); - - if (ahead_tok_kind == NAME - && ahead_tok.cur - ahead_tok.start == 3 - && memcmp(ahead_tok.start, "def", 3) == 0) - { - /* The next token is going to be 'def', so instead of - returning a plain NAME token, return ASYNC. */ - tok->async_def_indent = tok->indent; - tok->async_def = 1; - _PyToken_Free(&ahead_token); - return MAKE_TOKEN(ASYNC); - } - _PyToken_Free(&ahead_token); - } + /* Newline */ + if (c == '\n') { + tok->atbol = 1; + if (blankline || tok->level > 0) { + if (tok->tok_extra_tokens) { + if (tok->comment_newline) { + tok->comment_newline = 0; } - - return MAKE_TOKEN(NAME); + p_start = tok->start; + p_end = tok->cur; + return MAKE_TOKEN(NL); + } + goto nextline; } - - if (c == '\r') { - c = tok_nextc(tok); + if (tok->comment_newline && tok->tok_extra_tokens) { + tok->comment_newline = 0; + p_start = tok->start; + p_end = tok->cur; + return MAKE_TOKEN(NL); + } + p_start = tok->start; + p_end = tok->cur - 1; /* Leave '\n' out of the string */ + tok->cont_line = 0; + if (tok->async_def) { + /* We're somewhere inside an 'async def' function, and + we've encountered a NEWLINE after its signature. */ + tok->async_def_nl = 1; } + return MAKE_TOKEN(NEWLINE); + } - /* Newline */ - if (c == '\n') { - tok->atbol = 1; - if (blankline || tok->level > 0) { - if (tok->tok_extra_tokens) { - if (tok->comment_newline) { - tok->comment_newline = 0; - } - p_start = tok->start; - p_end = tok->cur; - return MAKE_TOKEN(NL); - } - goto nextline; - } - if (tok->comment_newline && tok->tok_extra_tokens) { - tok->comment_newline = 0; - p_start = tok->start; - p_end = tok->cur; - return MAKE_TOKEN(NL); - } + /* Period or number starting with period? */ + if (c == '.') { + c = tok_nextc(tok); + if (isdigit(c)) { + goto fraction; + } else if (c == '.') { + c = tok_nextc(tok); + if (c == '.') { p_start = tok->start; - p_end = tok->cur - 1; /* Leave '\n' out of the string */ - tok->cont_line = 0; - if (tok->async_def) { - /* We're somewhere inside an 'async def' function, and - we've encountered a NEWLINE after its signature. */ - tok->async_def_nl = 1; - } - return MAKE_TOKEN(NEWLINE); + p_end = tok->cur; + return MAKE_TOKEN(ELLIPSIS); + } else { + tok_backup(tok, c); + } + tok_backup(tok, '.'); + } else { + tok_backup(tok, c); } - - /* Period or number starting with period? */ - if (c == '.') { + p_start = tok->start; + p_end = tok->cur; + return MAKE_TOKEN(DOT); + } + + /* Number */ + if (isdigit(c)) { + if (c == '0') { + /* Hex, octal or binary -- maybe. */ + c = tok_nextc(tok); + if (c == 'x' || c == 'X') { + /* Hex */ c = tok_nextc(tok); - if (isdigit(c)) { - goto fraction; - } else if (c == '.') { + do { + if (c == '_') { c = tok_nextc(tok); - if (c == '.') { - p_start = tok->start; - p_end = tok->cur; - return MAKE_TOKEN(ELLIPSIS); - } - else { - tok_backup(tok, c); - } - tok_backup(tok, '.'); - } - else { + } + if (!isxdigit(c)) { tok_backup(tok, c); - } - p_start = tok->start; - p_end = tok->cur; - return MAKE_TOKEN(DOT); - } - - /* Number */ - if (isdigit(c)) { - if (c == '0') { - /* Hex, octal or binary -- maybe. */ + return MAKE_TOKEN(syntaxerror(tok, "invalid hexadecimal literal")); + } + do { c = tok_nextc(tok); - if (c == 'x' || c == 'X') { - /* Hex */ - c = tok_nextc(tok); - do { - if (c == '_') { - c = tok_nextc(tok); - } - if (!isxdigit(c)) { - tok_backup(tok, c); - return MAKE_TOKEN(syntaxerror(tok, "invalid hexadecimal literal")); - } - do { - c = tok_nextc(tok); - } while (isxdigit(c)); - } while (c == '_'); - if (!verify_end_of_number(tok, c, "hexadecimal")) { - return MAKE_TOKEN(ERRORTOKEN); - } - } - else if (c == 'o' || c == 'O') { - /* Octal */ - c = tok_nextc(tok); - do { - if (c == '_') { - c = tok_nextc(tok); - } - if (c < '0' || c >= '8') { - if (isdigit(c)) { - return MAKE_TOKEN(syntaxerror(tok, - "invalid digit '%c' in octal literal", c)); - } - else { - tok_backup(tok, c); - return MAKE_TOKEN(syntaxerror(tok, "invalid octal literal")); - } - } - do { - c = tok_nextc(tok); - } while ('0' <= c && c < '8'); - } while (c == '_'); - if (isdigit(c)) { - return MAKE_TOKEN(syntaxerror(tok, - "invalid digit '%c' in octal literal", c)); - } - if (!verify_end_of_number(tok, c, "octal")) { - return MAKE_TOKEN(ERRORTOKEN); - } - } - else if (c == 'b' || c == 'B') { - /* Binary */ - c = tok_nextc(tok); - do { - if (c == '_') { - c = tok_nextc(tok); - } - if (c != '0' && c != '1') { - if (isdigit(c)) { - return MAKE_TOKEN(syntaxerror(tok, "invalid digit '%c' in binary literal", c)); - } - else { - tok_backup(tok, c); - return MAKE_TOKEN(syntaxerror(tok, "invalid binary literal")); - } - } - do { - c = tok_nextc(tok); - } while (c == '0' || c == '1'); - } while (c == '_'); - if (isdigit(c)) { - return MAKE_TOKEN(syntaxerror(tok, "invalid digit '%c' in binary literal", c)); - } - if (!verify_end_of_number(tok, c, "binary")) { - return MAKE_TOKEN(ERRORTOKEN); - } - } - else { - int nonzero = 0; - /* maybe old-style octal; c is first char of it */ - /* in any case, allow '0' as a literal */ - while (1) { - if (c == '_') { - c = tok_nextc(tok); - if (!isdigit(c)) { - tok_backup(tok, c); - return MAKE_TOKEN(syntaxerror(tok, "invalid decimal literal")); - } - } - if (c != '0') { - break; - } - c = tok_nextc(tok); - } - char* zeros_end = tok->cur; - if (isdigit(c)) { - nonzero = 1; - c = tok_decimal_tail(tok); - if (c == 0) { - return MAKE_TOKEN(ERRORTOKEN); - } - } - if (c == '.') { - c = tok_nextc(tok); - goto fraction; - } - else if (c == 'e' || c == 'E') { - goto exponent; - } - else if (c == 'j' || c == 'J') { - goto imaginary; - } - else if (nonzero && !tok->tok_extra_tokens) { - /* Old-style octal: now disallowed. */ - tok_backup(tok, c); - return MAKE_TOKEN(syntaxerror_known_range( - tok, (int)(tok->start + 1 - tok->line_start), - (int)(zeros_end - tok->line_start), - "leading zeros in decimal integer " - "literals are not permitted; " - "use an 0o prefix for octal integers")); - } - if (!verify_end_of_number(tok, c, "decimal")) { - return MAKE_TOKEN(ERRORTOKEN); - } - } - } - else { - /* Decimal */ - c = tok_decimal_tail(tok); - if (c == 0) { - return MAKE_TOKEN(ERRORTOKEN); - } - { - /* Accept floating point numbers. */ - if (c == '.') { - c = tok_nextc(tok); - fraction: - /* Fraction */ - if (isdigit(c)) { - c = tok_decimal_tail(tok); - if (c == 0) { - return MAKE_TOKEN(ERRORTOKEN); - } - } - } - if (c == 'e' || c == 'E') { - int e; - exponent: - e = c; - /* Exponent part */ - c = tok_nextc(tok); - if (c == '+' || c == '-') { - c = tok_nextc(tok); - if (!isdigit(c)) { - tok_backup(tok, c); - return MAKE_TOKEN(syntaxerror(tok, "invalid decimal literal")); - } - } else if (!isdigit(c)) { - tok_backup(tok, c); - if (!verify_end_of_number(tok, e, "decimal")) { - return MAKE_TOKEN(ERRORTOKEN); - } - tok_backup(tok, e); - p_start = tok->start; - p_end = tok->cur; - return MAKE_TOKEN(NUMBER); - } - c = tok_decimal_tail(tok); - if (c == 0) { - return MAKE_TOKEN(ERRORTOKEN); - } - } - if (c == 'j' || c == 'J') { - /* Imaginary part */ - imaginary: - c = tok_nextc(tok); - if (!verify_end_of_number(tok, c, "imaginary")) { - return MAKE_TOKEN(ERRORTOKEN); - } - } - else if (!verify_end_of_number(tok, c, "decimal")) { - return MAKE_TOKEN(ERRORTOKEN); - } - } + } while (isxdigit(c)); + } while (c == '_'); + if (!verify_end_of_number(tok, c, "hexadecimal")) { + return MAKE_TOKEN(ERRORTOKEN); } - tok_backup(tok, c); - p_start = tok->start; - p_end = tok->cur; - return MAKE_TOKEN(NUMBER); - } - - f_string_quote: - if (((tolower(*tok->start) == 'f' || tolower(*tok->start) == 'r') && (c == '\'' || c == '"'))) { - int quote = c; - int quote_size = 1; /* 1 or 3 */ - - /* Nodes of type STRING, especially multi line strings - must be handled differently in order to get both - the starting line number and the column offset right. - (cf. issue 16806) */ - tok->first_lineno = tok->lineno; - tok->multi_line_start = tok->line_start; - - /* Find the quote size and start of string */ - int after_quote = tok_nextc(tok); - if (after_quote == quote) { - int after_after_quote = tok_nextc(tok); - if (after_after_quote == quote) { - quote_size = 3; - } - else { - // TODO: Check this - tok_backup(tok, after_after_quote); - tok_backup(tok, after_quote); + } else if (c == 'o' || c == 'O') { + /* Octal */ + c = tok_nextc(tok); + do { + if (c == '_') { + c = tok_nextc(tok); + } + if (c < '0' || c >= '8') { + if (isdigit(c)) { + return MAKE_TOKEN( + syntaxerror(tok, "invalid digit '%c' in octal literal", c)); + } else { + tok_backup(tok, c); + return MAKE_TOKEN(syntaxerror(tok, "invalid octal literal")); } + } + do { + c = tok_nextc(tok); + } while ('0' <= c && c < '8'); + } while (c == '_'); + if (isdigit(c)) { + return MAKE_TOKEN( + syntaxerror(tok, "invalid digit '%c' in octal literal", c)); } - if (after_quote != quote) { - tok_backup(tok, after_quote); - } - - - p_start = tok->start; - p_end = tok->cur; - if (tok->tok_mode_stack_index + 1 >= MAXFSTRINGLEVEL) { - return MAKE_TOKEN(syntaxerror(tok, "too many nested f-strings")); - } - tokenizer_mode *the_current_tok = TOK_NEXT_MODE(tok); - the_current_tok->kind = TOK_FSTRING_MODE; - the_current_tok->f_string_quote = quote; - the_current_tok->f_string_quote_size = quote_size; - the_current_tok->f_string_start = tok->start; - the_current_tok->f_string_multi_line_start = tok->line_start; - the_current_tok->f_string_line_start = tok->lineno; - the_current_tok->f_string_start_offset = -1; - the_current_tok->f_string_multi_line_start_offset = -1; - the_current_tok->last_expr_buffer = NULL; - the_current_tok->last_expr_size = 0; - the_current_tok->last_expr_end = -1; - the_current_tok->f_string_debug = 0; - - switch (*tok->start) { - case 'F': - case 'f': - the_current_tok->f_string_raw = tolower(*(tok->start + 1)) == 'r'; - break; - case 'R': - case 'r': - the_current_tok->f_string_raw = 1; - break; - default: - Py_UNREACHABLE(); + if (!verify_end_of_number(tok, c, "octal")) { + return MAKE_TOKEN(ERRORTOKEN); } - - the_current_tok->curly_bracket_depth = 0; - the_current_tok->curly_bracket_expr_start_depth = -1; - return MAKE_TOKEN(FSTRING_START); - } - - letter_quote: - /* String */ - if (c == '\'' || c == '"') { - int quote = c; - int quote_size = 1; /* 1 or 3 */ - int end_quote_size = 0; - - /* Nodes of type STRING, especially multi line strings - must be handled differently in order to get both - the starting line number and the column offset right. - (cf. issue 16806) */ - tok->first_lineno = tok->lineno; - tok->multi_line_start = tok->line_start; - - /* Find the quote size and start of string */ + } else if (c == 'b' || c == 'B') { + /* Binary */ c = tok_nextc(tok); - if (c == quote) { + do { + if (c == '_') { c = tok_nextc(tok); - if (c == quote) { - quote_size = 3; - } - else { - end_quote_size = 1; /* empty string found */ + } + if (c != '0' && c != '1') { + if (isdigit(c)) { + return MAKE_TOKEN( + syntaxerror(tok, "invalid digit '%c' in binary literal", c)); + } else { + tok_backup(tok, c); + return MAKE_TOKEN(syntaxerror(tok, "invalid binary literal")); } + } + do { + c = tok_nextc(tok); + } while (c == '0' || c == '1'); + } while (c == '_'); + if (isdigit(c)) { + return MAKE_TOKEN( + syntaxerror(tok, "invalid digit '%c' in binary literal", c)); } - if (c != quote) { - tok_backup(tok, c); + if (!verify_end_of_number(tok, c, "binary")) { + return MAKE_TOKEN(ERRORTOKEN); } - - /* Get rest of string */ - while (end_quote_size != quote_size) { + } else { + int nonzero = 0; + /* maybe old-style octal; c is first char of it */ + /* in any case, allow '0' as a literal */ + while (1) { + if (c == '_') { c = tok_nextc(tok); - if (tok->done == E_ERROR) { - return MAKE_TOKEN(ERRORTOKEN); - } - if (tok->done == E_DECODE) { - break; - } - if (c == EOF || (quote_size == 1 && c == '\n')) { - assert(tok->multi_line_start != NULL); - // shift the tok_state's location into - // the start of string, and report the error - // from the initial quote character - tok->cur = (char *)tok->start; - tok->cur++; - tok->line_start = tok->multi_line_start; - int start = tok->lineno; - tok->lineno = tok->first_lineno; - - if (INSIDE_FSTRING(tok)) { - /* When we are in an f-string, before raising the - * unterminated string literal error, check whether - * does the initial quote matches with f-strings quotes - * and if it is, then this must be a missing '}' token - * so raise the proper error */ - tokenizer_mode *the_current_tok = TOK_GET_MODE(tok); - if (the_current_tok->f_string_quote == quote && - the_current_tok->f_string_quote_size == quote_size) { - return MAKE_TOKEN(syntaxerror(tok, "f-string: expecting '}'", start)); - } - } - - if (quote_size == 3) { - syntaxerror(tok, "unterminated triple-quoted string literal" - " (detected at line %d)", start); - if (c != '\n') { - tok->done = E_EOFS; - } - return MAKE_TOKEN(ERRORTOKEN); - } - else { - syntaxerror(tok, "unterminated string literal (detected at" - " line %d)", start); - if (c != '\n') { - tok->done = E_EOLS; - } - return MAKE_TOKEN(ERRORTOKEN); - } + if (!isdigit(c)) { + tok_backup(tok, c); + return MAKE_TOKEN(syntaxerror(tok, "invalid decimal literal")); } - if (c == quote) { - end_quote_size += 1; - } - else { - end_quote_size = 0; - if (c == '\\') { - c = tok_nextc(tok); /* skip escaped char */ - if (c == '\r') { - c = tok_nextc(tok); - } - } - } - } - - p_start = tok->start; - p_end = tok->cur; - return MAKE_TOKEN(STRING); - } - - /* Line continuation */ - if (c == '\\') { - if ((c = tok_continuation_line(tok)) == -1) { - return MAKE_TOKEN(ERRORTOKEN); - } - tok->cont_line = 1; - goto again; /* Read next line */ - } - - /* Punctuation character */ - int is_punctuation = (c == ':' || c == '}' || c == '!' || c == '{'); - if (is_punctuation && INSIDE_FSTRING(tok) && INSIDE_FSTRING_EXPR(current_tok)) { - /* This code block gets executed before the curly_bracket_depth is incremented - * by the `{` case, so for ensuring that we are on the 0th level, we need - * to adjust it manually */ - int cursor = current_tok->curly_bracket_depth - (c != '{'); - if (cursor == 0 && !update_fstring_expr(tok, c)) { - return MAKE_TOKEN(ENDMARKER); + } + if (c != '0') { + break; + } + c = tok_nextc(tok); } - if (cursor == 0 && c != '{' && set_fstring_expr(tok, token, c)) { + char *zeros_end = tok->cur; + if (isdigit(c)) { + nonzero = 1; + c = tok_decimal_tail(tok); + if (c == 0) { return MAKE_TOKEN(ERRORTOKEN); + } + } + if (c == '.') { + c = tok_nextc(tok); + goto fraction; + } else if (c == 'e' || c == 'E') { + goto exponent; + } else if (c == 'j' || c == 'J') { + goto imaginary; + } else if (nonzero && !tok->tok_extra_tokens) { + /* Old-style octal: now disallowed. */ + tok_backup(tok, c); + return MAKE_TOKEN(syntaxerror_known_range( + tok, (int)(tok->start + 1 - tok->line_start), + (int)(zeros_end - tok->line_start), + "leading zeros in decimal integer " + "literals are not permitted; " + "use an 0o prefix for octal integers")); + } + if (!verify_end_of_number(tok, c, "decimal")) { + return MAKE_TOKEN(ERRORTOKEN); + } + } + } else { + /* Decimal */ + c = tok_decimal_tail(tok); + if (c == 0) { + return MAKE_TOKEN(ERRORTOKEN); + } + { + /* Accept floating point numbers. */ + if (c == '.') { + c = tok_nextc(tok); + fraction: + /* Fraction */ + if (isdigit(c)) { + c = tok_decimal_tail(tok); + if (c == 0) { + return MAKE_TOKEN(ERRORTOKEN); + } + } } - - if (c == ':' && cursor == current_tok->curly_bracket_expr_start_depth) { - current_tok->kind = TOK_FSTRING_MODE; - p_start = tok->start; - p_end = tok->cur; - return MAKE_TOKEN(_PyToken_OneChar(c)); - } - } - - /* Check for two-character token */ - { - int c2 = tok_nextc(tok); - int current_token = _PyToken_TwoChars(c, c2); - if (current_token != OP) { - int c3 = tok_nextc(tok); - int current_token3 = _PyToken_ThreeChars(c, c2, c3); - if (current_token3 != OP) { - current_token = current_token3; + if (c == 'e' || c == 'E') { + int e; + exponent: + e = c; + /* Exponent part */ + c = tok_nextc(tok); + if (c == '+' || c == '-') { + c = tok_nextc(tok); + if (!isdigit(c)) { + tok_backup(tok, c); + return MAKE_TOKEN(syntaxerror(tok, "invalid decimal literal")); } - else { - tok_backup(tok, c3); + } else if (!isdigit(c)) { + tok_backup(tok, c); + if (!verify_end_of_number(tok, e, "decimal")) { + return MAKE_TOKEN(ERRORTOKEN); } + tok_backup(tok, e); p_start = tok->start; p_end = tok->cur; - return MAKE_TOKEN(current_token); - } - tok_backup(tok, c2); - } - - /* Keep track of parentheses nesting level */ - switch (c) { - case '(': - case '[': - case '{': - if (tok->level >= MAXLEVEL) { - return MAKE_TOKEN(syntaxerror(tok, "too many nested parentheses")); - } - tok->parenstack[tok->level] = c; - tok->parenlinenostack[tok->level] = tok->lineno; - tok->parencolstack[tok->level] = (int)(tok->start - tok->line_start); - tok->level++; - if (INSIDE_FSTRING(tok)) { - current_tok->curly_bracket_depth++; - } - break; - case ')': - case ']': - case '}': - if (INSIDE_FSTRING(tok) && !current_tok->curly_bracket_depth && c == '}') { - return MAKE_TOKEN(syntaxerror(tok, "f-string: single '}' is not allowed")); - } - if (!tok->tok_extra_tokens && !tok->level) { - return MAKE_TOKEN(syntaxerror(tok, "unmatched '%c'", c)); - } - if (tok->level > 0) { - tok->level--; - int opening = tok->parenstack[tok->level]; - if (!tok->tok_extra_tokens && !((opening == '(' && c == ')') || - (opening == '[' && c == ']') || - (opening == '{' && c == '}'))) { - /* If the opening bracket belongs to an f-string's expression - part (e.g. f"{)}") and the closing bracket is an arbitrary - nested expression, then instead of matching a different - syntactical construct with it; we'll throw an unmatched - parentheses error. */ - if (INSIDE_FSTRING(tok) && opening == '{') { - assert(current_tok->curly_bracket_depth >= 0); - int previous_bracket = current_tok->curly_bracket_depth - 1; - if (previous_bracket == current_tok->curly_bracket_expr_start_depth) { - return MAKE_TOKEN(syntaxerror(tok, "f-string: unmatched '%c'", c)); - } - } - if (tok->parenlinenostack[tok->level] != tok->lineno) { - return MAKE_TOKEN(syntaxerror(tok, - "closing parenthesis '%c' does not match " - "opening parenthesis '%c' on line %d", - c, opening, tok->parenlinenostack[tok->level])); - } - else { - return MAKE_TOKEN(syntaxerror(tok, - "closing parenthesis '%c' does not match " - "opening parenthesis '%c'", - c, opening)); - } - } + return MAKE_TOKEN(NUMBER); + } + c = tok_decimal_tail(tok); + if (c == 0) { + return MAKE_TOKEN(ERRORTOKEN); + } } - - if (INSIDE_FSTRING(tok)) { - current_tok->curly_bracket_depth--; - if (c == '}' && current_tok->curly_bracket_depth == current_tok->curly_bracket_expr_start_depth) { - current_tok->curly_bracket_expr_start_depth--; - current_tok->kind = TOK_FSTRING_MODE; - current_tok->f_string_debug = 0; - } + if (c == 'j' || c == 'J') { + /* Imaginary part */ + imaginary: + c = tok_nextc(tok); + if (!verify_end_of_number(tok, c, "imaginary")) { + return MAKE_TOKEN(ERRORTOKEN); + } + } else if (!verify_end_of_number(tok, c, "decimal")) { + return MAKE_TOKEN(ERRORTOKEN); } - break; - default: - break; - } - - if (!Py_UNICODE_ISPRINTABLE(c)) { - return MAKE_TOKEN(syntaxerror(tok, "invalid non-printable character U+%04X", c)); + } } - - if( c == '=' && INSIDE_FSTRING_EXPR(current_tok)) { - current_tok->f_string_debug = 1; - } - - /* Punctuation character */ + tok_backup(tok, c); p_start = tok->start; p_end = tok->cur; - return MAKE_TOKEN(_PyToken_OneChar(c)); -} - -static int -tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct token *token) -{ - const char *p_start = NULL; - const char *p_end = NULL; - int end_quote_size = 0; - int unicode_escape = 0; - - tok->start = tok->cur; + return MAKE_TOKEN(NUMBER); + } + +f_string_quote: + if (((tolower(*tok->start) == 'f' || tolower(*tok->start) == 'r') && + (c == '\'' || c == '"'))) { + int quote = c; + int quote_size = 1; /* 1 or 3 */ + + /* Nodes of type STRING, especially multi line strings + must be handled differently in order to get both + the starting line number and the column offset right. + (cf. issue 16806) */ tok->first_lineno = tok->lineno; - tok->starting_col_offset = tok->col_offset; - - // If we start with a bracket, we defer to the normal mode as there is nothing for us to tokenize - // before it. - int start_char = tok_nextc(tok); - if (start_char == '{') { - int peek1 = tok_nextc(tok); - tok_backup(tok, peek1); - tok_backup(tok, start_char); - if (peek1 != '{') { - current_tok->curly_bracket_expr_start_depth++; - if (current_tok->curly_bracket_expr_start_depth >= MAX_EXPR_NESTING) { - return MAKE_TOKEN(syntaxerror(tok, "f-string: expressions nested too deeply")); - } - TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE; - return tok_get_normal_mode(tok, current_tok, token); - } - } - else { - tok_backup(tok, start_char); - } + tok->multi_line_start = tok->line_start; - // Check if we are at the end of the string - for (int i = 0; i < current_tok->f_string_quote_size; i++) { - int quote = tok_nextc(tok); - if (quote != current_tok->f_string_quote) { - tok_backup(tok, quote); - goto f_string_middle; - } + /* Find the quote size and start of string */ + int after_quote = tok_nextc(tok); + if (after_quote == quote) { + int after_after_quote = tok_nextc(tok); + if (after_after_quote == quote) { + quote_size = 3; + } else { + // TODO: Check this + tok_backup(tok, after_after_quote); + tok_backup(tok, after_quote); + } } - - if (current_tok->last_expr_buffer != NULL) { - PyMem_Free(current_tok->last_expr_buffer); - current_tok->last_expr_buffer = NULL; - current_tok->last_expr_size = 0; - current_tok->last_expr_end = -1; + if (after_quote != quote) { + tok_backup(tok, after_quote); } p_start = tok->start; p_end = tok->cur; - tok->tok_mode_stack_index--; - return MAKE_TOKEN(FSTRING_END); + if (tok->tok_mode_stack_index + 1 >= MAXFSTRINGLEVEL) { + return MAKE_TOKEN(syntaxerror(tok, "too many nested f-strings")); + } + tokenizer_mode *the_current_tok = TOK_NEXT_MODE(tok); + the_current_tok->kind = TOK_FSTRING_MODE; + the_current_tok->f_string_quote = quote; + the_current_tok->f_string_quote_size = quote_size; + the_current_tok->f_string_start = tok->start; + the_current_tok->f_string_multi_line_start = tok->line_start; + the_current_tok->f_string_line_start = tok->lineno; + the_current_tok->f_string_start_offset = -1; + the_current_tok->f_string_multi_line_start_offset = -1; + the_current_tok->last_expr_buffer = NULL; + the_current_tok->last_expr_size = 0; + the_current_tok->last_expr_end = -1; + the_current_tok->f_string_debug = 0; + + switch (*tok->start) { + case 'F': + case 'f': + the_current_tok->f_string_raw = tolower(*(tok->start + 1)) == 'r'; + break; + case 'R': + case 'r': + the_current_tok->f_string_raw = 1; + break; + default: + Py_UNREACHABLE(); + } -f_string_middle: + the_current_tok->curly_bracket_depth = 0; + the_current_tok->curly_bracket_expr_start_depth = -1; + return MAKE_TOKEN(FSTRING_START); + } - // TODO: This is a bit of a hack, but it works for now. We need to find a better way to handle - // this. - tok->multi_line_start = tok->line_start; - while (end_quote_size != current_tok->f_string_quote_size) { - int c = tok_nextc(tok); - if (tok->done == E_ERROR) { - return MAKE_TOKEN(ERRORTOKEN); - } - int in_format_spec = ( - current_tok->last_expr_end != -1 - && - INSIDE_FSTRING_EXPR(current_tok) - ); - - if (c == EOF || (current_tok->f_string_quote_size == 1 && c == '\n')) { - if (tok->decoding_erred) { - return MAKE_TOKEN(ERRORTOKEN); - } +letter_quote: + /* String */ + if (c == '\'' || c == '"') { + int quote = c; + int quote_size = 1; /* 1 or 3 */ + int end_quote_size = 0; - // If we are in a format spec and we found a newline, - // it means that the format spec ends here and we should - // return to the regular mode. - if (in_format_spec && c == '\n') { - tok_backup(tok, c); - TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE; - p_start = tok->start; - p_end = tok->cur; - return MAKE_TOKEN(FSTRING_MIDDLE); - } + /* Nodes of type STRING, especially multi line strings + must be handled differently in order to get both + the starting line number and the column offset right. + (cf. issue 16806) */ + tok->first_lineno = tok->lineno; + tok->multi_line_start = tok->line_start; - assert(tok->multi_line_start != NULL); - // shift the tok_state's location into - // the start of string, and report the error - // from the initial quote character - tok->cur = (char *)current_tok->f_string_start; - tok->cur++; - tok->line_start = current_tok->f_string_multi_line_start; - int start = tok->lineno; - - tokenizer_mode *the_current_tok = TOK_GET_MODE(tok); - tok->lineno = the_current_tok->f_string_line_start; - - if (current_tok->f_string_quote_size == 3) { - syntaxerror(tok, - "unterminated triple-quoted f-string literal" - " (detected at line %d)", start); - if (c != '\n') { - tok->done = E_EOFS; - } - return MAKE_TOKEN(ERRORTOKEN); - } - else { - return MAKE_TOKEN(syntaxerror(tok, - "unterminated f-string literal (detected at" - " line %d)", start)); - } - } + /* Find the quote size and start of string */ + c = tok_nextc(tok); + if (c == quote) { + c = tok_nextc(tok); + if (c == quote) { + quote_size = 3; + } else { + end_quote_size = 1; /* empty string found */ + } + } + if (c != quote) { + tok_backup(tok, c); + } + + /* Get rest of string */ + while (end_quote_size != quote_size) { + c = tok_nextc(tok); + if (tok->done == E_ERROR) { + return MAKE_TOKEN(ERRORTOKEN); + } + if (tok->done == E_DECODE) { + break; + } + if (c == EOF || (quote_size == 1 && c == '\n')) { + assert(tok->multi_line_start != NULL); + // shift the tok_state's location into + // the start of string, and report the error + // from the initial quote character + tok->cur = (char *)tok->start; + tok->cur++; + tok->line_start = tok->multi_line_start; + int start = tok->lineno; + tok->lineno = tok->first_lineno; - if (c == current_tok->f_string_quote) { - end_quote_size += 1; - continue; + if (INSIDE_FSTRING(tok)) { + /* When we are in an f-string, before raising the + * unterminated string literal error, check whether + * does the initial quote matches with f-strings quotes + * and if it is, then this must be a missing '}' token + * so raise the proper error */ + tokenizer_mode *the_current_tok = TOK_GET_MODE(tok); + if (the_current_tok->f_string_quote == quote && + the_current_tok->f_string_quote_size == quote_size) { + return MAKE_TOKEN( + syntaxerror(tok, "f-string: expecting '}'", start)); + } + } + + if (quote_size == 3) { + syntaxerror(tok, + "unterminated triple-quoted string literal" + " (detected at line %d)", + start); + if (c != '\n') { + tok->done = E_EOFS; + } + return MAKE_TOKEN(ERRORTOKEN); } else { - end_quote_size = 0; - } - - if (c == '{') { - int peek = tok_nextc(tok); - if (peek != '{' || in_format_spec) { - tok_backup(tok, peek); - tok_backup(tok, c); - current_tok->curly_bracket_expr_start_depth++; - if (current_tok->curly_bracket_expr_start_depth >= MAX_EXPR_NESTING) { - return MAKE_TOKEN(syntaxerror(tok, "f-string: expressions nested too deeply")); - } - TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE; - p_start = tok->start; - p_end = tok->cur; - } else { - p_start = tok->start; - p_end = tok->cur - 1; - } - return MAKE_TOKEN(FSTRING_MIDDLE); - } else if (c == '}') { - if (unicode_escape) { - p_start = tok->start; - p_end = tok->cur; - return MAKE_TOKEN(FSTRING_MIDDLE); - } - int peek = tok_nextc(tok); - - // The tokenizer can only be in the format spec if we have already completed the expression - // scanning (indicated by the end of the expression being set) and we are not at the top level - // of the bracket stack (-1 is the top level). Since format specifiers can't legally use double - // brackets, we can bypass it here. - if (peek == '}' && !in_format_spec) { - p_start = tok->start; - p_end = tok->cur - 1; - } else { - tok_backup(tok, peek); - tok_backup(tok, c); - TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE; - p_start = tok->start; - p_end = tok->cur; - } - return MAKE_TOKEN(FSTRING_MIDDLE); - } else if (c == '\\') { - int peek = tok_nextc(tok); - if (peek == '\r') { - peek = tok_nextc(tok); - } - // Special case when the backslash is right before a curly - // brace. We have to restore and return the control back - // to the loop for the next iteration. - if (peek == '{' || peek == '}') { - if (!current_tok->f_string_raw) { - if (warn_invalid_escape_sequence(tok, peek)) { - return MAKE_TOKEN(ERRORTOKEN); - } - } - tok_backup(tok, peek); - continue; - } - - if (!current_tok->f_string_raw) { - if (peek == 'N') { - /* Handle named unicode escapes (\N{BULLET}) */ - peek = tok_nextc(tok); - if (peek == '{') { - unicode_escape = 1; - } else { - tok_backup(tok, peek); - } - } - } /* else { - skip the escaped character - }*/ + syntaxerror(tok, + "unterminated string literal (detected at" + " line %d)", + start); + if (c != '\n') { + tok->done = E_EOLS; + } + return MAKE_TOKEN(ERRORTOKEN); + } + } + if (c == quote) { + end_quote_size += 1; + } else { + end_quote_size = 0; + if (c == '\\') { + c = tok_nextc(tok); /* skip escaped char */ + if (c == '\r') { + c = tok_nextc(tok); + } } + } } - // Backup the f-string quotes to emit a final FSTRING_MIDDLE and - // add the quotes to the FSTRING_END in the next tokenizer iteration. - for (int i = 0; i < current_tok->f_string_quote_size; i++) { - tok_backup(tok, current_tok->f_string_quote); - } p_start = tok->start; p_end = tok->cur; - return MAKE_TOKEN(FSTRING_MIDDLE); -} + return MAKE_TOKEN(STRING); + } + + /* Line continuation */ + if (c == '\\') { + if ((c = tok_continuation_line(tok)) == -1) { + return MAKE_TOKEN(ERRORTOKEN); + } + tok->cont_line = 1; + goto again; /* Read next line */ + } + + /* Punctuation character */ + int is_punctuation = (c == ':' || c == '}' || c == '!' || c == '{'); + if (is_punctuation && INSIDE_FSTRING(tok) && + INSIDE_FSTRING_EXPR(current_tok)) { + /* This code block gets executed before the curly_bracket_depth is + * incremented by the `{` case, so for ensuring that we are on the 0th + * level, we need to adjust it manually */ + int cursor = current_tok->curly_bracket_depth - (c != '{'); + int in_format_spec = current_tok->in_format_spec; + int cursor_in_format_with_debug = + cursor == 1 && (current_tok->f_string_debug || in_format_spec); + int cursor_valid = cursor == 0 || cursor_in_format_with_debug; + if (cursor_valid && !update_fstring_expr(tok, c)) { + return MAKE_TOKEN(ENDMARKER); + } + if (cursor_valid && c != '{' && set_fstring_expr(tok, token, c)) { + return MAKE_TOKEN(ERRORTOKEN); + } + + if (c == ':' && cursor == current_tok->curly_bracket_expr_start_depth) { + current_tok->kind = TOK_FSTRING_MODE; + current_tok->in_format_spec = 1; + p_start = tok->start; + p_end = tok->cur; + return MAKE_TOKEN(_PyToken_OneChar(c)); + } + } + + /* Check for two-character token */ + { + int c2 = tok_nextc(tok); + int current_token = _PyToken_TwoChars(c, c2); + if (current_token != OP) { + int c3 = tok_nextc(tok); + int current_token3 = _PyToken_ThreeChars(c, c2, c3); + if (current_token3 != OP) { + current_token = current_token3; + } else { + tok_backup(tok, c3); + } + p_start = tok->start; + p_end = tok->cur; + return MAKE_TOKEN(current_token); + } + tok_backup(tok, c2); + } + + /* Keep track of parentheses nesting level */ + switch (c) { + case '(': + case '[': + case '{': + if (tok->level >= MAXLEVEL) { + return MAKE_TOKEN(syntaxerror(tok, "too many nested parentheses")); + } + tok->parenstack[tok->level] = c; + tok->parenlinenostack[tok->level] = tok->lineno; + tok->parencolstack[tok->level] = (int)(tok->start - tok->line_start); + tok->level++; + if (INSIDE_FSTRING(tok)) { + current_tok->curly_bracket_depth++; + } + break; + case ')': + case ']': + case '}': + if (INSIDE_FSTRING(tok) && !current_tok->curly_bracket_depth && c == '}') { + return MAKE_TOKEN( + syntaxerror(tok, "f-string: single '}' is not allowed")); + } + if (!tok->tok_extra_tokens && !tok->level) { + return MAKE_TOKEN(syntaxerror(tok, "unmatched '%c'", c)); + } + if (tok->level > 0) { + tok->level--; + int opening = tok->parenstack[tok->level]; + if (!tok->tok_extra_tokens && + !((opening == '(' && c == ')') || (opening == '[' && c == ']') || + (opening == '{' && c == '}'))) { + /* If the opening bracket belongs to an f-string's expression + part (e.g. f"{)}") and the closing bracket is an arbitrary + nested expression, then instead of matching a different + syntactical construct with it; we'll throw an unmatched + parentheses error. */ + if (INSIDE_FSTRING(tok) && opening == '{') { + assert(current_tok->curly_bracket_depth >= 0); + int previous_bracket = current_tok->curly_bracket_depth - 1; + if (previous_bracket == current_tok->curly_bracket_expr_start_depth) { + return MAKE_TOKEN(syntaxerror(tok, "f-string: unmatched '%c'", c)); + } + } + if (tok->parenlinenostack[tok->level] != tok->lineno) { + return MAKE_TOKEN( + syntaxerror(tok, + "closing parenthesis '%c' does not match " + "opening parenthesis '%c' on line %d", + c, opening, tok->parenlinenostack[tok->level])); + } else { + return MAKE_TOKEN( + syntaxerror(tok, + "closing parenthesis '%c' does not match " + "opening parenthesis '%c'", + c, opening)); + } + } + } + + if (INSIDE_FSTRING(tok)) { + current_tok->curly_bracket_depth--; + if (c == '}' && current_tok->curly_bracket_depth == + current_tok->curly_bracket_expr_start_depth) { + current_tok->curly_bracket_expr_start_depth--; + current_tok->kind = TOK_FSTRING_MODE; + current_tok->in_format_spec = 0; + current_tok->f_string_debug = 0; + } + } + break; + default: + break; + } + + if (!Py_UNICODE_ISPRINTABLE(c)) { + return MAKE_TOKEN( + syntaxerror(tok, "invalid non-printable character U+%04X", c)); + } + + if (c == '=' && INSIDE_FSTRING_EXPR(current_tok)) { + current_tok->f_string_debug = 1; + } + + /* Punctuation character */ + p_start = tok->start; + p_end = tok->cur; + return MAKE_TOKEN(_PyToken_OneChar(c)); +} + +static int tok_get_fstring_mode(struct tok_state *tok, + tokenizer_mode *current_tok, + struct token *token) { + const char *p_start = NULL; + const char *p_end = NULL; + int end_quote_size = 0; + int unicode_escape = 0; + + tok->start = tok->cur; + tok->first_lineno = tok->lineno; + tok->starting_col_offset = tok->col_offset; + + // If we start with a bracket, we defer to the normal mode as there is nothing + // for us to tokenize before it. + int start_char = tok_nextc(tok); + if (start_char == '{') { + int peek1 = tok_nextc(tok); + tok_backup(tok, peek1); + tok_backup(tok, start_char); + if (peek1 != '{') { + current_tok->curly_bracket_expr_start_depth++; + if (current_tok->curly_bracket_expr_start_depth >= MAX_EXPR_NESTING) { + return MAKE_TOKEN( + syntaxerror(tok, "f-string: expressions nested too deeply")); + } + TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE; + return tok_get_normal_mode(tok, current_tok, token); + } + } else { + tok_backup(tok, start_char); + } + + // Check if we are at the end of the string + for (int i = 0; i < current_tok->f_string_quote_size; i++) { + int quote = tok_nextc(tok); + if (quote != current_tok->f_string_quote) { + tok_backup(tok, quote); + goto f_string_middle; + } + } + + if (current_tok->last_expr_buffer != NULL) { + PyMem_Free(current_tok->last_expr_buffer); + current_tok->last_expr_buffer = NULL; + current_tok->last_expr_size = 0; + current_tok->last_expr_end = -1; + } + + p_start = tok->start; + p_end = tok->cur; + tok->tok_mode_stack_index--; + return MAKE_TOKEN(FSTRING_END); +f_string_middle: -static int -tok_get(struct tok_state *tok, struct token *token) -{ - tokenizer_mode *current_tok = TOK_GET_MODE(tok); - if (current_tok->kind == TOK_REGULAR_MODE) { - return tok_get_normal_mode(tok, current_tok, token); - } else { - return tok_get_fstring_mode(tok, current_tok, token); + // TODO: This is a bit of a hack, but it works for now. We need to find a + // better way to handle this. + tok->multi_line_start = tok->line_start; + while (end_quote_size != current_tok->f_string_quote_size) { + int c = tok_nextc(tok); + if (tok->done == E_ERROR || tok->done == E_DECODE) { + return MAKE_TOKEN(ERRORTOKEN); } -} + int in_format_spec = + (current_tok->in_format_spec && INSIDE_FSTRING_EXPR(current_tok)); -int -_PyTokenizer_Get(struct tok_state *tok, struct token *token) -{ - int result = tok_get(tok, token); - if (tok->decoding_erred) { - result = ERRORTOKEN; - tok->done = E_DECODE; + if (c == EOF || (current_tok->f_string_quote_size == 1 && c == '\n')) { + if (tok->decoding_erred) { + return MAKE_TOKEN(ERRORTOKEN); + } + + // If we are in a format spec and we found a newline, + // it means that the format spec ends here and we should + // return to the regular mode. + if (in_format_spec && c == '\n') { + tok_backup(tok, c); + TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE; + current_tok->in_format_spec = 0; + p_start = tok->start; + p_end = tok->cur; + return MAKE_TOKEN(FSTRING_MIDDLE); + } + + assert(tok->multi_line_start != NULL); + // shift the tok_state's location into + // the start of string, and report the error + // from the initial quote character + tok->cur = (char *)current_tok->f_string_start; + tok->cur++; + tok->line_start = current_tok->f_string_multi_line_start; + int start = tok->lineno; + + tokenizer_mode *the_current_tok = TOK_GET_MODE(tok); + tok->lineno = the_current_tok->f_string_line_start; + + if (current_tok->f_string_quote_size == 3) { + syntaxerror(tok, + "unterminated triple-quoted f-string literal" + " (detected at line %d)", + start); + if (c != '\n') { + tok->done = E_EOFS; + } + return MAKE_TOKEN(ERRORTOKEN); + } else { + return MAKE_TOKEN( + syntaxerror(tok, + "unterminated f-string literal (detected at" + " line %d)", + start)); + } + } + + if (c == current_tok->f_string_quote) { + end_quote_size += 1; + continue; + } else { + end_quote_size = 0; } - return result; + + if (c == '{') { + if (!update_fstring_expr(tok, c)) { + return MAKE_TOKEN(ENDMARKER); + } + int peek = tok_nextc(tok); + if (peek != '{' || in_format_spec) { + tok_backup(tok, peek); + tok_backup(tok, c); + current_tok->curly_bracket_expr_start_depth++; + if (current_tok->curly_bracket_expr_start_depth >= MAX_EXPR_NESTING) { + return MAKE_TOKEN( + syntaxerror(tok, "f-string: expressions nested too deeply")); + } + TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE; + current_tok->in_format_spec = 0; + p_start = tok->start; + p_end = tok->cur; + } else { + p_start = tok->start; + p_end = tok->cur - 1; + } + return MAKE_TOKEN(FSTRING_MIDDLE); + } else if (c == '}') { + if (unicode_escape) { + p_start = tok->start; + p_end = tok->cur; + return MAKE_TOKEN(FSTRING_MIDDLE); + } + int peek = tok_nextc(tok); + + // The tokenizer can only be in the format spec if we have already + // completed the expression scanning (indicated by the end of the + // expression being set) and we are not at the top level of the bracket + // stack (-1 is the top level). Since format specifiers can't legally use + // double brackets, we can bypass it here. + int cursor = current_tok->curly_bracket_depth; + if (peek == '}' && !in_format_spec && cursor == 0) { + p_start = tok->start; + p_end = tok->cur - 1; + } else { + tok_backup(tok, peek); + tok_backup(tok, c); + TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE; + p_start = tok->start; + p_end = tok->cur; + } + return MAKE_TOKEN(FSTRING_MIDDLE); + } else if (c == '\\') { + int peek = tok_nextc(tok); + if (peek == '\r') { + peek = tok_nextc(tok); + } + // Special case when the backslash is right before a curly + // brace. We have to restore and return the control back + // to the loop for the next iteration. + if (peek == '{' || peek == '}') { + if (!current_tok->f_string_raw) { + if (warn_invalid_escape_sequence(tok, peek)) { + return MAKE_TOKEN(ERRORTOKEN); + } + } + tok_backup(tok, peek); + continue; + } + + if (!current_tok->f_string_raw) { + if (peek == 'N') { + /* Handle named unicode escapes (\N{BULLET}) */ + peek = tok_nextc(tok); + if (peek == '{') { + unicode_escape = 1; + } else { + tok_backup(tok, peek); + } + } + } /* else { + skip the escaped character + }*/ + } + } + + // Backup the f-string quotes to emit a final FSTRING_MIDDLE and + // add the quotes to the FSTRING_END in the next tokenizer iteration. + for (int i = 0; i < current_tok->f_string_quote_size; i++) { + tok_backup(tok, current_tok->f_string_quote); + } + p_start = tok->start; + p_end = tok->cur; + return MAKE_TOKEN(FSTRING_MIDDLE); +} + +static int tok_get(struct tok_state *tok, struct token *token) { + tokenizer_mode *current_tok = TOK_GET_MODE(tok); + if (current_tok->kind == TOK_REGULAR_MODE) { + return tok_get_normal_mode(tok, current_tok, token); + } else { + return tok_get_fstring_mode(tok, current_tok, token); + } +} + +int _PyTokenizer_Get(struct tok_state *tok, struct token *token) { + int result = tok_get(tok, token); + if (tok->decoding_erred) { + result = ERRORTOKEN; + tok->done = E_DECODE; + } + return result; } -#if defined(__wasi__) || (defined(__EMSCRIPTEN__) && (__EMSCRIPTEN_major__ >= 3)) +#if defined(__wasi__) || \ + (defined(__EMSCRIPTEN__) && (__EMSCRIPTEN_major__ >= 3)) // fdopen() with borrowed fd. WASI does not provide dup() and Emscripten's // dup() emulation with open() is slow. typedef union { - void *cookie; - int fd; + void *cookie; + int fd; } borrowed; -static ssize_t -borrow_read(void *cookie, char *buf, size_t size) -{ - borrowed b = {.cookie = cookie}; - return read(b.fd, (void *)buf, size); +static ssize_t borrow_read(void *cookie, char *buf, size_t size) { + borrowed b = {.cookie = cookie}; + return read(b.fd, (void *)buf, size); } -static FILE * -fdopen_borrow(int fd) { - // supports only reading. seek fails. close and write are no-ops. - cookie_io_functions_t io_cb = {borrow_read, NULL, NULL, NULL}; - borrowed b = {.fd = fd}; - return fopencookie(b.cookie, "r", io_cb); +static FILE *fdopen_borrow(int fd) { + // supports only reading. seek fails. close and write are no-ops. + cookie_io_functions_t io_cb = {borrow_read, NULL, NULL, NULL}; + borrowed b = {.fd = fd}; + return fopencookie(b.cookie, "r", io_cb); } #else -static FILE * -fdopen_borrow(int fd) { - fd = _Py_dup(fd); - if (fd < 0) { - return NULL; - } - return fdopen(fd, "r"); +static FILE *fdopen_borrow(int fd) { + fd = _Py_dup(fd); + if (fd < 0) { + return NULL; + } + return fdopen(fd, "r"); } #endif @@ -3023,59 +2904,54 @@ fdopen_borrow(int fd) { The char* returned is malloc'ed via PyMem_Malloc() and thus must be freed by the caller. */ -char * -_PyTokenizer_FindEncodingFilename(int fd, PyObject *filename) -{ - struct tok_state *tok; - FILE *fp; - char *encoding = NULL; - - fp = fdopen_borrow(fd); - if (fp == NULL) { - return NULL; - } - tok = _PyTokenizer_FromFile(fp, NULL, NULL, NULL); - if (tok == NULL) { - fclose(fp); - return NULL; - } - if (filename != NULL) { - tok->filename = Py_NewRef(filename); - } - else { - tok->filename = PyUnicode_FromString(""); - if (tok->filename == NULL) { - fclose(fp); - _PyTokenizer_Free(tok); - return encoding; - } - } - struct token token; - // We don't want to report warnings here because it could cause infinite recursion - // if fetching the encoding shows a warning. - tok->report_warnings = 0; - while (tok->lineno < 2 && tok->done == E_OK) { - _PyToken_Init(&token); - _PyTokenizer_Get(tok, &token); - _PyToken_Free(&token); - } +char *_PyTokenizer_FindEncodingFilename(int fd, PyObject *filename) { + struct tok_state *tok; + FILE *fp; + char *encoding = NULL; + + fp = fdopen_borrow(fd); + if (fp == NULL) { + return NULL; + } + tok = _PyTokenizer_FromFile(fp, NULL, NULL, NULL); + if (tok == NULL) { fclose(fp); - if (tok->encoding) { - encoding = (char *)PyMem_Malloc(strlen(tok->encoding) + 1); - if (encoding) { - strcpy(encoding, tok->encoding); - } - } - _PyTokenizer_Free(tok); - return encoding; + return NULL; + } + if (filename != NULL) { + tok->filename = Py_NewRef(filename); + } else { + tok->filename = PyUnicode_FromString(""); + if (tok->filename == NULL) { + fclose(fp); + _PyTokenizer_Free(tok); + return encoding; + } + } + struct token token; + // We don't want to report warnings here because it could cause infinite + // recursion if fetching the encoding shows a warning. + tok->report_warnings = 0; + while (tok->lineno < 2 && tok->done == E_OK) { + _PyToken_Init(&token); + _PyTokenizer_Get(tok, &token); + _PyToken_Free(&token); + } + fclose(fp); + if (tok->encoding) { + encoding = (char *)PyMem_Malloc(strlen(tok->encoding) + 1); + if (encoding) { + strcpy(encoding, tok->encoding); + } + } + _PyTokenizer_Free(tok); + return encoding; } #ifdef Py_DEBUG -void -tok_dump(int type, char *start, char *end) -{ - fprintf(stderr, "%s", _PyParser_TokenNames[type]); - if (type == NAME || type == NUMBER || type == STRING || type == OP) - fprintf(stderr, "(%.*s)", (int)(end - start), start); +void tok_dump(int type, char *start, char *end) { + fprintf(stderr, "%s", _PyParser_TokenNames[type]); + if (type == NAME || type == NUMBER || type == STRING || type == OP) + fprintf(stderr, "(%.*s)", (int)(end - start), start); } -#endif // Py_DEBUG +#endif // Py_DEBUG diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h index 1e1daa3648f5d0..4eeeb1f234e07a 100644 --- a/Parser/tokenizer.h +++ b/Parser/tokenizer.h @@ -14,133 +14,136 @@ extern "C" { #define MAXLEVEL 200 /* Max parentheses level */ #define MAXFSTRINGLEVEL 150 /* Max f-string nesting level */ -enum decoding_state { - STATE_INIT, - STATE_SEEK_CODING, - STATE_NORMAL -}; +enum decoding_state { STATE_INIT, STATE_SEEK_CODING, STATE_NORMAL }; enum interactive_underflow_t { - /* Normal mode of operation: return a new token when asked in interactive mode */ - IUNDERFLOW_NORMAL, - /* Forcefully return ENDMARKER when asked for a new token in interactive mode. This - * can be used to prevent the tokenizer to prompt the user for new tokens */ - IUNDERFLOW_STOP, + /* Normal mode of operation: return a new token when asked in interactive mode + */ + IUNDERFLOW_NORMAL, + /* Forcefully return ENDMARKER when asked for a new token in interactive mode. + * This can be used to prevent the tokenizer to prompt the user for new tokens + */ + IUNDERFLOW_STOP, }; struct token { - int level; - int lineno, col_offset, end_lineno, end_col_offset; - const char *start, *end; - PyObject *metadata; + int level; + int lineno, col_offset, end_lineno, end_col_offset; + const char *start, *end; + PyObject *metadata; }; enum tokenizer_mode_kind_t { - TOK_REGULAR_MODE, - TOK_FSTRING_MODE, + TOK_REGULAR_MODE, + TOK_FSTRING_MODE, }; #define MAX_EXPR_NESTING 3 typedef struct _tokenizer_mode { - enum tokenizer_mode_kind_t kind; - - int curly_bracket_depth; - int curly_bracket_expr_start_depth; - - char f_string_quote; - int f_string_quote_size; - int f_string_raw; - const char* f_string_start; - const char* f_string_multi_line_start; - int f_string_line_start; - - Py_ssize_t f_string_start_offset; - Py_ssize_t f_string_multi_line_start_offset; - - Py_ssize_t last_expr_size; - Py_ssize_t last_expr_end; - char* last_expr_buffer; - int f_string_debug; + enum tokenizer_mode_kind_t kind; + + int curly_bracket_depth; + int curly_bracket_expr_start_depth; + + char f_string_quote; + int f_string_quote_size; + int f_string_raw; + const char *f_string_start; + const char *f_string_multi_line_start; + int f_string_line_start; + + Py_ssize_t f_string_start_offset; + Py_ssize_t f_string_multi_line_start_offset; + + Py_ssize_t last_expr_size; + Py_ssize_t last_expr_end; + char *last_expr_buffer; + int f_string_debug; + int in_format_spec; } tokenizer_mode; /* Tokenizer state */ struct tok_state { - /* Input state; buf <= cur <= inp <= end */ - /* NB an entire line is held in the buffer */ - char *buf; /* Input buffer, or NULL; malloc'ed if fp != NULL or readline != NULL */ - char *cur; /* Next character in buffer */ - char *inp; /* End of data in buffer */ - int fp_interactive; /* If the file descriptor is interactive */ - char *interactive_src_start; /* The start of the source parsed so far in interactive mode */ - char *interactive_src_end; /* The end of the source parsed so far in interactive mode */ - const char *end; /* End of input buffer if buf != NULL */ - const char *start; /* Start of current token if not NULL */ - int done; /* E_OK normally, E_EOF at EOF, otherwise error code */ - /* NB If done != E_OK, cur must be == inp!!! */ - FILE *fp; /* Rest of input; NULL if tokenizing a string */ - int tabsize; /* Tab spacing */ - int indent; /* Current indentation index */ - int indstack[MAXINDENT]; /* Stack of indents */ - int atbol; /* Nonzero if at begin of new line */ - int pendin; /* Pending indents (if > 0) or dedents (if < 0) */ - const char *prompt, *nextprompt; /* For interactive prompting */ - int lineno; /* Current line number */ - int first_lineno; /* First line of a single line or multi line string - expression (cf. issue 16806) */ - int starting_col_offset; /* The column offset at the beginning of a token */ - int col_offset; /* Current col offset */ - int level; /* () [] {} Parentheses nesting level */ - /* Used to allow free continuations inside them */ - char parenstack[MAXLEVEL]; - int parenlinenostack[MAXLEVEL]; - int parencolstack[MAXLEVEL]; - PyObject *filename; - /* Stuff for checking on different tab sizes */ - int altindstack[MAXINDENT]; /* Stack of alternate indents */ - /* Stuff for PEP 0263 */ - enum decoding_state decoding_state; - int decoding_erred; /* whether erred in decoding */ - char *encoding; /* Source encoding. */ - int cont_line; /* whether we are in a continuation line. */ - const char* line_start; /* pointer to start of current line */ - const char* multi_line_start; /* pointer to start of first line of - a single line or multi line string - expression (cf. issue 16806) */ - PyObject *decoding_readline; /* open(...).readline */ - PyObject *decoding_buffer; - PyObject *readline; /* readline() function */ - const char* enc; /* Encoding for the current str. */ - char* str; /* Source string being tokenized (if tokenizing from a string)*/ - char* input; /* Tokenizer's newline translated copy of the string. */ - - int type_comments; /* Whether to look for type comments */ - - /* async/await related fields (still needed depending on feature_version) */ - int async_hacks; /* =1 if async/await aren't always keywords */ - int async_def; /* =1 if tokens are inside an 'async def' body. */ - int async_def_indent; /* Indentation level of the outermost 'async def'. */ - int async_def_nl; /* =1 if the outermost 'async def' had at least one - NEWLINE token after it. */ - /* How to proceed when asked for a new token in interactive mode */ - enum interactive_underflow_t interactive_underflow; - int report_warnings; - // TODO: Factor this into its own thing - tokenizer_mode tok_mode_stack[MAXFSTRINGLEVEL]; - int tok_mode_stack_index; - int tok_extra_tokens; - int comment_newline; - int implicit_newline; + /* Input state; buf <= cur <= inp <= end */ + /* NB an entire line is held in the buffer */ + char *buf; /* Input buffer, or NULL; malloc'ed if fp != NULL or readline != + NULL */ + char *cur; /* Next character in buffer */ + char *inp; /* End of data in buffer */ + int fp_interactive; /* If the file descriptor is interactive */ + char *interactive_src_start; /* The start of the source parsed so far in + interactive mode */ + char *interactive_src_end; /* The end of the source parsed so far in + interactive mode */ + const char *end; /* End of input buffer if buf != NULL */ + const char *start; /* Start of current token if not NULL */ + int done; /* E_OK normally, E_EOF at EOF, otherwise error code */ + /* NB If done != E_OK, cur must be == inp!!! */ + FILE *fp; /* Rest of input; NULL if tokenizing a string */ + int tabsize; /* Tab spacing */ + int indent; /* Current indentation index */ + int indstack[MAXINDENT]; /* Stack of indents */ + int atbol; /* Nonzero if at begin of new line */ + int pendin; /* Pending indents (if > 0) or dedents (if < 0) */ + const char *prompt, *nextprompt; /* For interactive prompting */ + int lineno; /* Current line number */ + int first_lineno; /* First line of a single line or multi line string + expression (cf. issue 16806) */ + int starting_col_offset; /* The column offset at the beginning of a token */ + int col_offset; /* Current col offset */ + int level; /* () [] {} Parentheses nesting level */ + /* Used to allow free continuations inside them */ + char parenstack[MAXLEVEL]; + int parenlinenostack[MAXLEVEL]; + int parencolstack[MAXLEVEL]; + PyObject *filename; + /* Stuff for checking on different tab sizes */ + int altindstack[MAXINDENT]; /* Stack of alternate indents */ + /* Stuff for PEP 0263 */ + enum decoding_state decoding_state; + int decoding_erred; /* whether erred in decoding */ + char *encoding; /* Source encoding. */ + int cont_line; /* whether we are in a continuation line. */ + const char *line_start; /* pointer to start of current line */ + const char *multi_line_start; /* pointer to start of first line of + a single line or multi line string + expression (cf. issue 16806) */ + PyObject *decoding_readline; /* open(...).readline */ + PyObject *decoding_buffer; + PyObject *readline; /* readline() function */ + const char *enc; /* Encoding for the current str. */ + char *str; /* Source string being tokenized (if tokenizing from a string)*/ + char *input; /* Tokenizer's newline translated copy of the string. */ + + int type_comments; /* Whether to look for type comments */ + + /* async/await related fields (still needed depending on feature_version) */ + int async_hacks; /* =1 if async/await aren't always keywords */ + int async_def; /* =1 if tokens are inside an 'async def' body. */ + int async_def_indent; /* Indentation level of the outermost 'async def'. */ + int async_def_nl; /* =1 if the outermost 'async def' had at least one + NEWLINE token after it. */ + /* How to proceed when asked for a new token in interactive mode */ + enum interactive_underflow_t interactive_underflow; + int report_warnings; + // TODO: Factor this into its own thing + tokenizer_mode tok_mode_stack[MAXFSTRINGLEVEL]; + int tok_mode_stack_index; + int tok_extra_tokens; + int comment_newline; + int implicit_newline; #ifdef Py_DEBUG - int debug; + int debug; #endif }; extern struct tok_state *_PyTokenizer_FromString(const char *, int, int); extern struct tok_state *_PyTokenizer_FromUTF8(const char *, int, int); -extern struct tok_state *_PyTokenizer_FromReadline(PyObject*, const char*, int, int); -extern struct tok_state *_PyTokenizer_FromFile(FILE *, const char*, - const char *, const char *); +extern struct tok_state *_PyTokenizer_FromReadline(PyObject *, const char *, + int, int); +extern struct tok_state *_PyTokenizer_FromFile(FILE *, const char *, + const char *, const char *); extern void _PyTokenizer_Free(struct tok_state *); extern void _PyToken_Free(struct token *); extern void _PyToken_Init(struct token *);