diff --git a/.gitattributes b/.gitattributes index 2a48df079e1aeb..07d877027b09f6 100644 --- a/.gitattributes +++ b/.gitattributes @@ -94,6 +94,7 @@ Programs/test_frozenmain.h generated Python/Python-ast.c generated Python/executor_cases.c.h generated Python/generated_cases.c.h generated +Python/tier2_redundancy_eliminator_bytecodes.c.h generated Python/opcode_targets.h generated Python/stdlib_module_names.h generated Tools/peg_generator/pegen/grammar_parser.py generated diff --git a/Include/cpython/pystats.h b/Include/cpython/pystats.h index 0f50439b73848e..db9aaedec950e4 100644 --- a/Include/cpython/pystats.h +++ b/Include/cpython/pystats.h @@ -120,6 +120,9 @@ typedef struct _optimization_stats { uint64_t trace_length_hist[_Py_UOP_HIST_SIZE]; uint64_t trace_run_length_hist[_Py_UOP_HIST_SIZE]; uint64_t optimized_trace_length_hist[_Py_UOP_HIST_SIZE]; + uint64_t optimizer_attempts; + uint64_t optimizer_successes; + uint64_t optimizer_failure_reason_no_memory; } OptimizationStats; typedef struct _rare_event_stats { diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 75d7f44025328e..6b60a6fbffdc5e 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1094,7 +1094,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[268] = { [MATCH_KEYS] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [MATCH_MAPPING] = { true, INSTR_FMT_IX, 0 }, [MATCH_SEQUENCE] = { true, INSTR_FMT_IX, 0 }, - [NOP] = { true, INSTR_FMT_IX, 0 }, + [NOP] = { true, INSTR_FMT_IX, HAS_PURE_FLAG }, [POP_EXCEPT] = { true, INSTR_FMT_IX, HAS_ESCAPES_FLAG }, [POP_JUMP_IF_FALSE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG }, [POP_JUMP_IF_NONE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG }, @@ -1156,10 +1156,10 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[268] = { [LOAD_SUPER_METHOD] = { true, -1, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [LOAD_ZERO_SUPER_ATTR] = { true, -1, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [LOAD_ZERO_SUPER_METHOD] = { true, -1, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, - [POP_BLOCK] = { true, -1, 0 }, - [SETUP_CLEANUP] = { true, -1, HAS_ARG_FLAG }, - [SETUP_FINALLY] = { true, -1, HAS_ARG_FLAG }, - [SETUP_WITH] = { true, -1, HAS_ARG_FLAG }, + [POP_BLOCK] = { true, -1, HAS_PURE_FLAG }, + [SETUP_CLEANUP] = { true, -1, HAS_PURE_FLAG | HAS_ARG_FLAG }, + [SETUP_FINALLY] = { true, -1, HAS_PURE_FLAG | HAS_ARG_FLAG }, + [SETUP_WITH] = { true, -1, HAS_PURE_FLAG | HAS_ARG_FLAG }, [STORE_FAST_MAYBE_NULL] = { true, -1, HAS_ARG_FLAG | HAS_LOCAL_FLAG }, }; #endif diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index e21412fc815540..eee71c700d4904 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -8,6 +8,13 @@ extern "C" { # error "this header requires Py_BUILD_CORE define" #endif +#include "pycore_uop_ids.h" + +// This is the length of the trace we project initially. +#define UOP_MAX_TRACE_LENGTH 512 + +#define TRACE_STACK_SIZE 5 + int _Py_uop_analyze_and_optimize(_PyInterpreterFrame *frame, _PyUOpInstruction *trace, int trace_len, int curr_stackentries, _PyBloomFilter *dependencies); diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 2b5b37e6b8d6a4..30dc5a881574e7 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -16,7 +16,7 @@ extern const char * const _PyOpcode_uop_name[MAX_UOP_ID+1]; #ifdef NEED_OPCODE_METADATA const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { - [_NOP] = 0, + [_NOP] = HAS_PURE_FLAG, [_RESUME_CHECK] = HAS_DEOPT_FLAG, [_LOAD_FAST_CHECK] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ERROR_FLAG, [_LOAD_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_PURE_FLAG, @@ -202,10 +202,10 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_SAVE_RETURN_OFFSET] = HAS_ARG_FLAG, [_EXIT_TRACE] = HAS_DEOPT_FLAG, [_CHECK_VALIDITY] = HAS_DEOPT_FLAG, - [_LOAD_CONST_INLINE] = 0, - [_LOAD_CONST_INLINE_BORROW] = 0, - [_LOAD_CONST_INLINE_WITH_NULL] = 0, - [_LOAD_CONST_INLINE_BORROW_WITH_NULL] = 0, + [_LOAD_CONST_INLINE] = HAS_PURE_FLAG, + [_LOAD_CONST_INLINE_BORROW] = HAS_PURE_FLAG, + [_LOAD_CONST_INLINE_WITH_NULL] = HAS_PURE_FLAG, + [_LOAD_CONST_INLINE_BORROW_WITH_NULL] = HAS_PURE_FLAG, [_CHECK_GLOBALS] = HAS_DEOPT_FLAG, [_CHECK_BUILTINS] = HAS_DEOPT_FLAG, [_INTERNAL_INCREMENT_OPT_COUNTER] = 0, diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index e6b1b554c9af10..b64aed10d2d653 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -3,6 +3,7 @@ import sys import textwrap import unittest +import gc import _testinternalcapi @@ -556,6 +557,214 @@ def testfunc(n): # too much already. self.assertEqual(count, 1) +class TestUopsOptimization(unittest.TestCase): + + def test_int_type_propagation(self): + def testfunc(loops): + num = 0 + while num < loops: + x = num + num + a = x + 1 + num += 1 + return a + + opt = _testinternalcapi.get_uop_optimizer() + res = None + with temporary_optimizer(opt): + res = testfunc(32) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + self.assertEqual(res, 63) + binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] + guard_both_int_count = [opname for opname, _, _ in ex if opname == "_GUARD_BOTH_INT"] + self.assertGreaterEqual(len(binop_count), 3) + self.assertLessEqual(len(guard_both_int_count), 1) + + def test_int_type_propagation_through_frame(self): + def double(x): + return x + x + def testfunc(loops): + num = 0 + while num < loops: + x = num + num + a = double(x) + num += 1 + return a + + opt = _testinternalcapi.get_uop_optimizer() + res = None + with temporary_optimizer(opt): + res = testfunc(32) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + self.assertEqual(res, 124) + binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] + guard_both_int_count = [opname for opname, _, _ in ex if opname == "_GUARD_BOTH_INT"] + self.assertGreaterEqual(len(binop_count), 3) + self.assertLessEqual(len(guard_both_int_count), 1) + + def test_int_type_propagation_from_frame(self): + def double(x): + return x + x + def testfunc(loops): + num = 0 + while num < loops: + a = double(num) + x = a + a + num += 1 + return x + + opt = _testinternalcapi.get_uop_optimizer() + res = None + with temporary_optimizer(opt): + res = testfunc(32) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + self.assertEqual(res, 124) + binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] + guard_both_int_count = [opname for opname, _, _ in ex if opname == "_GUARD_BOTH_INT"] + self.assertGreaterEqual(len(binop_count), 3) + self.assertLessEqual(len(guard_both_int_count), 1) + + def test_int_impure_region(self): + def testfunc(loops): + num = 0 + while num < loops: + x = num + num + y = 1 + x // 2 + a = x + y + num += 1 + return a + + opt = _testinternalcapi.get_uop_optimizer() + res = None + with temporary_optimizer(opt): + res = testfunc(64) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] + self.assertGreaterEqual(len(binop_count), 3) + + def test_call_py_exact_args(self): + def testfunc(n): + def dummy(x): + return x+1 + for i in range(n): + dummy(i) + + opt = _testinternalcapi.get_uop_optimizer() + with temporary_optimizer(opt): + testfunc(20) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + uops = {opname for opname, _, _ in ex} + self.assertIn("_PUSH_FRAME", uops) + self.assertIn("_BINARY_OP_ADD_INT", uops) + self.assertNotIn("_CHECK_PEP_523", uops) + + def test_int_type_propagate_through_range(self): + def testfunc(n): + + for i in range(n): + x = i + i + return x + + opt = _testinternalcapi.get_uop_optimizer() + with temporary_optimizer(opt): + res = testfunc(20) + + ex = get_first_executor(testfunc) + self.assertEqual(res, 19 * 2) + self.assertIsNotNone(ex) + uops = {opname for opname, _, _ in ex} + self.assertNotIn("_GUARD_BOTH_INT", uops) + + def test_int_value_numbering(self): + def testfunc(n): + + y = 1 + for i in range(n): + x = y + z = x + a = z + b = a + res = x + z + a + b + return res + + opt = _testinternalcapi.get_uop_optimizer() + with temporary_optimizer(opt): + res = testfunc(20) + + ex = get_first_executor(testfunc) + self.assertEqual(res, 4) + self.assertIsNotNone(ex) + uops = {opname for opname, _, _ in ex} + self.assertIn("_GUARD_BOTH_INT", uops) + guard_count = [opname for opname, _, _ in ex if opname == "_GUARD_BOTH_INT"] + self.assertEqual(len(guard_count), 1) + + def test_comprehension(self): + def testfunc(n): + for _ in range(n): + return [i for i in range(n)] + + opt = _testinternalcapi.get_uop_optimizer() + with temporary_optimizer(opt): + testfunc(20) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + uops = {opname for opname, _, _ in ex} + self.assertNotIn("_BINARY_OP_ADD_INT", uops) + + def test_call_py_exact_args_disappearing(self): + def dummy(x): + return x+1 + + def testfunc(n): + for i in range(n): + dummy(i) + + opt = _testinternalcapi.get_uop_optimizer() + # Trigger specialization + testfunc(8) + with temporary_optimizer(opt): + del dummy + gc.collect() + + def dummy(x): + return x + 2 + testfunc(10) + + ex = get_first_executor(testfunc) + # Honestly as long as it doesn't crash it's fine. + # Whether we get an executor or not is non-deterministic, + # because it's decided by when the function is freed. + # This test is a little implementation specific. + + def test_promote_globals_to_constants(self): + def testfunc(n): + for i in range(n): + x = range(i) + return x + + opt = _testinternalcapi.get_uop_optimizer() + with temporary_optimizer(opt): + testfunc(20) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + uops = {opname for opname, _, _ in ex} + self.assertNotIn("_LOAD_GLOBAL_BUILTIN", uops) + self.assertIn("_LOAD_CONST_INLINE_BORROW_WITH_NULL", uops) + + if __name__ == "__main__": unittest.main() diff --git a/Lib/test/test_generated_cases.py b/Lib/test/test_generated_cases.py index ca1228ee7008a9..a7ad6c7320b4ee 100644 --- a/Lib/test/test_generated_cases.py +++ b/Lib/test/test_generated_cases.py @@ -33,6 +33,7 @@ def skip_if_different_mount_drives(): import parser from stack import Stack import tier1_generator + import tier2_abstract_generator def handle_stderr(): @@ -793,5 +794,157 @@ def test_annotated_op(self): self.run_cases_test(input, output) +class TestGeneratedAbstractCases(unittest.TestCase): + def setUp(self) -> None: + super().setUp() + self.maxDiff = None + + self.temp_dir = tempfile.gettempdir() + self.temp_input_filename = os.path.join(self.temp_dir, "input.txt") + self.temp_input2_filename = os.path.join(self.temp_dir, "input2.txt") + self.temp_output_filename = os.path.join(self.temp_dir, "output.txt") + + def tearDown(self) -> None: + for filename in [ + self.temp_input_filename, + self.temp_input2_filename, + self.temp_output_filename, + ]: + try: + os.remove(filename) + except: + pass + super().tearDown() + + def run_cases_test(self, input: str, input2: str, expected: str): + with open(self.temp_input_filename, "w+") as temp_input: + temp_input.write(parser.BEGIN_MARKER) + temp_input.write(input) + temp_input.write(parser.END_MARKER) + temp_input.flush() + + with open(self.temp_input2_filename, "w+") as temp_input: + temp_input.write(parser.BEGIN_MARKER) + temp_input.write(input2) + temp_input.write(parser.END_MARKER) + temp_input.flush() + + with handle_stderr(): + tier2_abstract_generator.generate_tier2_abstract_from_files( + [self.temp_input_filename, self.temp_input2_filename], + self.temp_output_filename + ) + + with open(self.temp_output_filename) as temp_output: + lines = temp_output.readlines() + while lines and lines[0].startswith(("// ", "#", " #", "\n")): + lines.pop(0) + while lines and lines[-1].startswith(("#", "\n")): + lines.pop(-1) + actual = "".join(lines) + self.assertEqual(actual.strip(), expected.strip()) + + def test_overridden_abstract(self): + input = """ + pure op(OP, (--)) { + spam(); + } + """ + input2 = """ + pure op(OP, (--)) { + eggs(); + } + """ + output = """ + case OP: { + eggs(); + break; + } + """ + self.run_cases_test(input, input2, output) + + def test_overridden_abstract_args(self): + input = """ + pure op(OP, (arg1 -- out)) { + spam(); + } + op(OP2, (arg1 -- out)) { + eggs(); + } + """ + input2 = """ + op(OP, (arg1 -- out)) { + eggs(); + } + """ + output = """ + case OP: { + _Py_UOpsSymType *arg1; + _Py_UOpsSymType *out; + arg1 = stack_pointer[-1]; + eggs(); + stack_pointer[-1] = out; + break; + } + + case OP2: { + _Py_UOpsSymType *out; + out = sym_new_unknown(ctx); + if (out == NULL) goto out_of_space; + stack_pointer[-1] = out; + break; + } + """ + self.run_cases_test(input, input2, output) + + def test_no_overridden_case(self): + input = """ + pure op(OP, (arg1 -- out)) { + spam(); + } + + pure op(OP2, (arg1 -- out)) { + } + + """ + input2 = """ + pure op(OP2, (arg1 -- out)) { + } + """ + output = """ + case OP: { + _Py_UOpsSymType *out; + out = sym_new_unknown(ctx); + if (out == NULL) goto out_of_space; + stack_pointer[-1] = out; + break; + } + + case OP2: { + _Py_UOpsSymType *arg1; + _Py_UOpsSymType *out; + arg1 = stack_pointer[-1]; + stack_pointer[-1] = out; + break; + } + """ + self.run_cases_test(input, input2, output) + + def test_missing_override_failure(self): + input = """ + pure op(OP, (arg1 -- out)) { + spam(); + } + """ + input2 = """ + pure op(OTHER, (arg1 -- out)) { + } + """ + output = """ + """ + with self.assertRaisesRegex(AssertionError, "All abstract uops"): + self.run_cases_test(input, input2, output) + + if __name__ == "__main__": unittest.main() diff --git a/Makefile.pre.in b/Makefile.pre.in index cf182980c120ee..d3b18acad61ce5 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -1863,6 +1863,10 @@ regen-cases: -o $(srcdir)/Python/generated_cases.c.h.new $(srcdir)/Python/bytecodes.c $(PYTHON_FOR_REGEN) $(srcdir)/Tools/cases_generator/tier2_generator.py \ -o $(srcdir)/Python/executor_cases.c.h.new $(srcdir)/Python/bytecodes.c + $(PYTHON_FOR_REGEN) $(srcdir)/Tools/cases_generator/tier2_abstract_generator.py \ + -o $(srcdir)/Python/tier2_redundancy_eliminator_cases.c.h.new \ + $(srcdir)/Python/tier2_redundancy_eliminator_bytecodes.c \ + $(srcdir)/Python/bytecodes.c $(PYTHON_FOR_REGEN) $(srcdir)/Tools/cases_generator/opcode_metadata_generator.py \ -o $(srcdir)/Include/internal/pycore_opcode_metadata.h.new $(srcdir)/Python/bytecodes.c $(PYTHON_FOR_REGEN) $(srcdir)/Tools/cases_generator/uop_metadata_generator.py -o \ @@ -1874,6 +1878,7 @@ regen-cases: $(UPDATE_FILE) $(srcdir)/Include/internal/pycore_opcode_metadata.h $(srcdir)/Include/internal/pycore_opcode_metadata.h.new $(UPDATE_FILE) $(srcdir)/Include/internal/pycore_uop_metadata.h $(srcdir)/Include/internal/pycore_uop_metadata.h.new $(UPDATE_FILE) $(srcdir)/Python/executor_cases.c.h $(srcdir)/Python/executor_cases.c.h.new + $(UPDATE_FILE) $(srcdir)/Python/tier2_redundancy_eliminator_cases.c.h $(srcdir)/Python/tier2_redundancy_eliminator_cases.c.h.new $(UPDATE_FILE) $(srcdir)/Lib/_opcode_metadata.py $(srcdir)/Lib/_opcode_metadata.py.new Python/compile.o: $(srcdir)/Include/internal/pycore_opcode_metadata.h @@ -1895,7 +1900,8 @@ Python/optimizer.o: \ Python/optimizer_analysis.o: \ $(srcdir)/Include/internal/pycore_opcode_metadata.h \ - $(srcdir)/Include/internal/pycore_optimizer.h + $(srcdir)/Include/internal/pycore_optimizer.h \ + $(srcdir)/Python/tier2_redundancy_eliminator_cases.c.h Python/frozen.o: $(FROZEN_FILES_OUT) diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-01-16-14-41-54.gh-issue-114058.Cb2b8h.rst b/Misc/NEWS.d/next/Core and Builtins/2024-01-16-14-41-54.gh-issue-114058.Cb2b8h.rst new file mode 100644 index 00000000000000..beb82dbcd3cccd --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-01-16-14-41-54.gh-issue-114058.Cb2b8h.rst @@ -0,0 +1 @@ +Implement the foundations of the Tier 2 redundancy eliminator. diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 197dff4b9888ce..f7c7e3669b7e6f 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -133,7 +133,7 @@ dummy_func( switch (opcode) { // BEGIN BYTECODES // - inst(NOP, (--)) { + pure inst(NOP, (--)) { } family(RESUME, 0) = { @@ -411,12 +411,12 @@ dummy_func( // BINARY_OP_INPLACE_ADD_UNICODE, // See comments at that opcode. }; - op(_GUARD_BOTH_INT, (left, right -- left: &PYLONG_TYPE, right: &PYLONG_TYPE)) { + op(_GUARD_BOTH_INT, (left, right -- left, right)) { DEOPT_IF(!PyLong_CheckExact(left)); DEOPT_IF(!PyLong_CheckExact(right)); } - pure op(_BINARY_OP_MULTIPLY_INT, (left, right -- res: &PYLONG_TYPE)) { + pure op(_BINARY_OP_MULTIPLY_INT, (left, right -- res)) { STAT_INC(BINARY_OP, hit); res = _PyLong_Multiply((PyLongObject *)left, (PyLongObject *)right); _Py_DECREF_SPECIALIZED(right, (destructor)PyObject_Free); @@ -424,7 +424,7 @@ dummy_func( ERROR_IF(res == NULL, error); } - pure op(_BINARY_OP_ADD_INT, (left, right -- res: &PYLONG_TYPE)) { + pure op(_BINARY_OP_ADD_INT, (left, right -- res)) { STAT_INC(BINARY_OP, hit); res = _PyLong_Add((PyLongObject *)left, (PyLongObject *)right); _Py_DECREF_SPECIALIZED(right, (destructor)PyObject_Free); @@ -432,7 +432,7 @@ dummy_func( ERROR_IF(res == NULL, error); } - pure op(_BINARY_OP_SUBTRACT_INT, (left, right -- res: &PYLONG_TYPE)) { + pure op(_BINARY_OP_SUBTRACT_INT, (left, right -- res)) { STAT_INC(BINARY_OP, hit); res = _PyLong_Subtract((PyLongObject *)left, (PyLongObject *)right); _Py_DECREF_SPECIALIZED(right, (destructor)PyObject_Free); @@ -447,12 +447,12 @@ dummy_func( macro(BINARY_OP_SUBTRACT_INT) = _GUARD_BOTH_INT + unused/1 + _BINARY_OP_SUBTRACT_INT; - op(_GUARD_BOTH_FLOAT, (left, right -- left: &PYFLOAT_TYPE, right: &PYFLOAT_TYPE)) { + op(_GUARD_BOTH_FLOAT, (left, right -- left, right)) { DEOPT_IF(!PyFloat_CheckExact(left)); DEOPT_IF(!PyFloat_CheckExact(right)); } - pure op(_BINARY_OP_MULTIPLY_FLOAT, (left, right -- res: &PYFLOAT_TYPE)) { + pure op(_BINARY_OP_MULTIPLY_FLOAT, (left, right -- res)) { STAT_INC(BINARY_OP, hit); double dres = ((PyFloatObject *)left)->ob_fval * @@ -460,7 +460,7 @@ dummy_func( DECREF_INPUTS_AND_REUSE_FLOAT(left, right, dres, res); } - pure op(_BINARY_OP_ADD_FLOAT, (left, right -- res: &PYFLOAT_TYPE)) { + pure op(_BINARY_OP_ADD_FLOAT, (left, right -- res)) { STAT_INC(BINARY_OP, hit); double dres = ((PyFloatObject *)left)->ob_fval + @@ -468,7 +468,7 @@ dummy_func( DECREF_INPUTS_AND_REUSE_FLOAT(left, right, dres, res); } - pure op(_BINARY_OP_SUBTRACT_FLOAT, (left, right -- res: &PYFLOAT_TYPE)) { + pure op(_BINARY_OP_SUBTRACT_FLOAT, (left, right -- res)) { STAT_INC(BINARY_OP, hit); double dres = ((PyFloatObject *)left)->ob_fval - @@ -483,12 +483,12 @@ dummy_func( macro(BINARY_OP_SUBTRACT_FLOAT) = _GUARD_BOTH_FLOAT + unused/1 + _BINARY_OP_SUBTRACT_FLOAT; - op(_GUARD_BOTH_UNICODE, (left, right -- left: &PYUNICODE_TYPE, right: &PYUNICODE_TYPE)) { + op(_GUARD_BOTH_UNICODE, (left, right -- left, right)) { DEOPT_IF(!PyUnicode_CheckExact(left)); DEOPT_IF(!PyUnicode_CheckExact(right)); } - pure op(_BINARY_OP_ADD_UNICODE, (left, right -- res: &PYUNICODE_TYPE)) { + pure op(_BINARY_OP_ADD_UNICODE, (left, right -- res)) { STAT_INC(BINARY_OP, hit); res = PyUnicode_Concat(left, right); _Py_DECREF_SPECIALIZED(left, _PyUnicode_ExactDealloc); @@ -1877,7 +1877,7 @@ dummy_func( something was returned by a descriptor protocol). Set the second element of the stack to NULL, to signal CALL that it's not a method call. - NULL | meth | arg1 | ... | argN + meth | NULL | arg1 | ... | argN */ DECREF_INPUTS(); ERROR_IF(attr == NULL, error); @@ -1901,7 +1901,7 @@ dummy_func( LOAD_ATTR, }; - op(_GUARD_TYPE_VERSION, (type_version/2, owner -- owner: &(GUARD_TYPE_VERSION_TYPE + type_version))) { + op(_GUARD_TYPE_VERSION, (type_version/2, owner -- owner)) { PyTypeObject *tp = Py_TYPE(owner); assert(type_version != 0); DEOPT_IF(tp->tp_version_tag != type_version); @@ -2082,7 +2082,7 @@ dummy_func( DISPATCH_INLINED(new_frame); } - op(_GUARD_DORV_VALUES, (owner -- owner: &GUARD_DORV_VALUES_TYPE)) { + op(_GUARD_DORV_VALUES, (owner -- owner)) { assert(Py_TYPE(owner)->tp_flags & Py_TPFLAGS_MANAGED_DICT); PyDictOrValues dorv = *_PyObject_DictOrValuesPointer(owner); DEOPT_IF(!_PyDictOrValues_IsValues(dorv)); @@ -2711,7 +2711,7 @@ dummy_func( DEOPT_IF(r->len <= 0); } - op(_ITER_NEXT_RANGE, (iter -- iter, next: &PYLONG_TYPE)) { + op(_ITER_NEXT_RANGE, (iter -- iter, next)) { _PyRangeIterObject *r = (_PyRangeIterObject *)iter; assert(Py_TYPE(r) == &PyRangeIter_Type); assert(r->len > 0); @@ -2869,13 +2869,13 @@ dummy_func( exc_info->exc_value = Py_NewRef(new_exc); } - op(_GUARD_DORV_VALUES_INST_ATTR_FROM_DICT, (owner -- owner: &GUARD_DORV_VALUES_INST_ATTR_FROM_DICT_TYPE)) { + op(_GUARD_DORV_VALUES_INST_ATTR_FROM_DICT, (owner -- owner)) { assert(Py_TYPE(owner)->tp_flags & Py_TPFLAGS_MANAGED_DICT); PyDictOrValues *dorv = _PyObject_DictOrValuesPointer(owner); DEOPT_IF(!_PyDictOrValues_IsValues(*dorv) && !_PyObject_MakeInstanceAttributesFromDict(owner, dorv)); } - op(_GUARD_KEYS_VERSION, (keys_version/2, owner -- owner: &(GUARD_KEYS_VERSION_TYPE + keys_version))) { + op(_GUARD_KEYS_VERSION, (keys_version/2, owner -- owner)) { PyTypeObject *owner_cls = Py_TYPE(owner); PyHeapTypeObject *owner_heap_type = (PyHeapTypeObject *)owner_cls; DEOPT_IF(owner_heap_type->ht_cached_keys->dk_version != keys_version); @@ -3090,7 +3090,7 @@ dummy_func( macro(CALL) = _SPECIALIZE_CALL + unused/2 + _CALL; - op(_CHECK_CALL_BOUND_METHOD_EXACT_ARGS, (callable, null, unused[oparg] -- callable: &PYMETHOD_TYPE, null: &NULL_TYPE, unused[oparg])) { + op(_CHECK_CALL_BOUND_METHOD_EXACT_ARGS, (callable, null, unused[oparg] -- callable, null, unused[oparg])) { DEOPT_IF(null != NULL); DEOPT_IF(Py_TYPE(callable) != &PyMethod_Type); } @@ -3108,7 +3108,7 @@ dummy_func( DEOPT_IF(tstate->interp->eval_frame); } - op(_CHECK_FUNCTION_EXACT_ARGS, (func_version/2, callable, self_or_null, unused[oparg] -- callable: &(PYFUNCTION_TYPE_VERSION_TYPE + func_version), self_or_null, unused[oparg])) { + op(_CHECK_FUNCTION_EXACT_ARGS, (func_version/2, callable, self_or_null, unused[oparg] -- callable, self_or_null, unused[oparg])) { DEOPT_IF(!PyFunction_Check(callable)); PyFunctionObject *func = (PyFunctionObject *)callable; DEOPT_IF(func->func_version != func_version); @@ -4059,23 +4059,23 @@ dummy_func( DEOPT_IF(!current_executor->vm_data.valid); } - op(_LOAD_CONST_INLINE, (ptr/4 -- value)) { + pure op(_LOAD_CONST_INLINE, (ptr/4 -- value)) { TIER_TWO_ONLY value = Py_NewRef(ptr); } - op(_LOAD_CONST_INLINE_BORROW, (ptr/4 -- value)) { + pure op(_LOAD_CONST_INLINE_BORROW, (ptr/4 -- value)) { TIER_TWO_ONLY value = ptr; } - op(_LOAD_CONST_INLINE_WITH_NULL, (ptr/4 -- value, null)) { + pure op(_LOAD_CONST_INLINE_WITH_NULL, (ptr/4 -- value, null)) { TIER_TWO_ONLY value = Py_NewRef(ptr); null = NULL; } - op(_LOAD_CONST_INLINE_BORROW_WITH_NULL, (ptr/4 -- value, null)) { + pure op(_LOAD_CONST_INLINE_BORROW_WITH_NULL, (ptr/4 -- value, null)) { TIER_TWO_ONLY value = ptr; null = NULL; diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 2d914b82dbf88f..7d48d6a05a17b0 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -1598,7 +1598,7 @@ something was returned by a descriptor protocol). Set the second element of the stack to NULL, to signal CALL that it's not a method call. - NULL | meth | arg1 | ... | argN + meth | NULL | arg1 | ... | argN */ Py_DECREF(owner); if (attr == NULL) goto pop_1_error_tier_two; diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index e5244147d499af..afb6650e5920fb 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -3420,7 +3420,7 @@ something was returned by a descriptor protocol). Set the second element of the stack to NULL, to signal CALL that it's not a method call. - NULL | meth | arg1 | ... | argN + meth | NULL | arg1 | ... | argN */ Py_DECREF(owner); if (attr == NULL) goto pop_1_error; diff --git a/Python/optimizer.c b/Python/optimizer.c index ad9ac382d300ef..f31f83113d3f25 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -17,8 +17,6 @@ #include "pycore_uop_metadata.h" // Uop tables #undef NEED_OPCODE_METADATA -#define UOP_MAX_TRACE_LENGTH 512 - #define MAX_EXECUTORS_SIZE 256 @@ -308,8 +306,6 @@ BRANCH_TO_GUARD[4][2] = { [POP_JUMP_IF_NOT_NONE - POP_JUMP_IF_FALSE][1] = _GUARD_IS_NOT_NONE_POP, }; -#define TRACE_STACK_SIZE 5 - #define CONFIDENCE_RANGE 1000 #define CONFIDENCE_CUTOFF 333 @@ -323,10 +319,11 @@ BRANCH_TO_GUARD[4][2] = { #define ADD_TO_TRACE(OPCODE, OPARG, OPERAND, TARGET) \ DPRINTF(2, \ - " ADD_TO_TRACE(%s, %d, %" PRIu64 ")\n", \ + " ADD_TO_TRACE(%s, %d, %" PRIu64 ", %d)\n", \ _PyUOpName(OPCODE), \ (OPARG), \ - (uint64_t)(OPERAND)); \ + (uint64_t)(OPERAND), \ + TARGET); \ assert(trace_length < max_length); \ trace[trace_length].opcode = (OPCODE); \ trace[trace_length].oparg = (OPARG); \ @@ -825,11 +822,13 @@ uop_optimize( char *uop_optimize = Py_GETENV("PYTHONUOPSOPTIMIZE"); if (uop_optimize == NULL || *uop_optimize > '0') { err = _Py_uop_analyze_and_optimize(frame, buffer, - UOP_MAX_TRACE_LENGTH, curr_stackentries, &dependencies); + UOP_MAX_TRACE_LENGTH, + curr_stackentries, &dependencies); if (err <= 0) { return err; } } + assert(err == 1); _PyExecutorObject *executor = make_executor_from_uops(buffer, &dependencies); if (executor == NULL) { return -1; diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index b14e6950b4a06b..e02ca4d6acf6c1 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -1,3 +1,14 @@ +/* + * This file contains the support code for CPython's uops redundancy eliminator. + * It also performs some simple optimizations. + * It performs a traditional data-flow analysis[1] over the trace of uops. + * Using the information gained, it chooses to emit, or skip certain instructions + * if possible. + * + * [1] For information on data-flow analysis, please see + * https://clang.llvm.org/docs/DataFlowAnalysisIntro.html + * + * */ #include "Python.h" #include "opcode.h" #include "pycore_dict.h" @@ -9,10 +20,355 @@ #include "pycore_dict.h" #include "pycore_long.h" #include "cpython/optimizer.h" +#include "pycore_optimizer.h" +#include "pycore_object.h" +#include "pycore_dict.h" +#include "pycore_function.h" +#include "pycore_uop_metadata.h" +#include "pycore_uop_ids.h" +#include "pycore_range.h" + +#include #include #include #include -#include "pycore_optimizer.h" + +// Holds locals, stack, locals, stack ... co_consts (in that order) +#define MAX_ABSTRACT_INTERP_SIZE 4096 + +#define OVERALLOCATE_FACTOR 5 + +#define TY_ARENA_SIZE (UOP_MAX_TRACE_LENGTH * OVERALLOCATE_FACTOR) + +// Need extras for root frame and for overflow frame (see TRACE_STACK_PUSH()) +#define MAX_ABSTRACT_FRAME_DEPTH (TRACE_STACK_SIZE + 2) + +#ifdef Py_DEBUG + static const char *const DEBUG_ENV = "PYTHON_OPT_DEBUG"; + static inline int get_lltrace(void) { + char *uop_debug = Py_GETENV(DEBUG_ENV); + int lltrace = 0; + if (uop_debug != NULL && *uop_debug >= '0') { + lltrace = *uop_debug - '0'; // TODO: Parse an int and all that + } + return lltrace; + } + #define DPRINTF(level, ...) \ + if (get_lltrace() >= (level)) { printf(__VA_ARGS__); } +#else + #define DPRINTF(level, ...) +#endif + + +// Flags for below. +#define KNOWN 1 << 0 +#define TRUE_CONST 1 << 1 +#define IS_NULL 1 << 2 +#define NOT_NULL 1 << 3 + +typedef struct { + int flags; + PyTypeObject *typ; + // constant propagated value (might be NULL) + PyObject *const_val; +} _Py_UOpsSymType; + + +typedef struct _Py_UOpsAbstractFrame { + // Max stacklen + int stack_len; + int locals_len; + + _Py_UOpsSymType **stack_pointer; + _Py_UOpsSymType **stack; + _Py_UOpsSymType **locals; +} _Py_UOpsAbstractFrame; + + +typedef struct ty_arena { + int ty_curr_number; + int ty_max_number; + _Py_UOpsSymType arena[TY_ARENA_SIZE]; +} ty_arena; + +// Tier 2 types meta interpreter +typedef struct _Py_UOpsAbstractInterpContext { + PyObject_HEAD + // The current "executing" frame. + _Py_UOpsAbstractFrame *frame; + _Py_UOpsAbstractFrame frames[MAX_ABSTRACT_FRAME_DEPTH]; + int curr_frame_depth; + + // Arena for the symbolic types. + ty_arena t_arena; + + _Py_UOpsSymType **n_consumed; + _Py_UOpsSymType **limit; + _Py_UOpsSymType *locals_and_stack[MAX_ABSTRACT_INTERP_SIZE]; +} _Py_UOpsAbstractInterpContext; + +static inline _Py_UOpsSymType* sym_new_unknown(_Py_UOpsAbstractInterpContext *ctx); + +// 0 on success, -1 on error. +static _Py_UOpsAbstractFrame * +ctx_frame_new( + _Py_UOpsAbstractInterpContext *ctx, + PyCodeObject *co, + _Py_UOpsSymType **localsplus_start, + int n_locals_already_filled, + int curr_stackentries +) +{ + assert(ctx->curr_frame_depth < MAX_ABSTRACT_FRAME_DEPTH); + _Py_UOpsAbstractFrame *frame = &ctx->frames[ctx->curr_frame_depth]; + + frame->stack_len = co->co_stacksize; + frame->locals_len = co->co_nlocalsplus; + + frame->locals = localsplus_start; + frame->stack = frame->locals + co->co_nlocalsplus; + frame->stack_pointer = frame->stack + curr_stackentries; + ctx->n_consumed = localsplus_start + (co->co_nlocalsplus + co->co_stacksize); + if (ctx->n_consumed >= ctx->limit) { + return NULL; + } + + + // Initialize with the initial state of all local variables + for (int i = n_locals_already_filled; i < co->co_nlocalsplus; i++) { + _Py_UOpsSymType *local = sym_new_unknown(ctx); + if (local == NULL) { + return NULL; + } + frame->locals[i] = local; + } + + + // Initialize the stack as well + for (int i = 0; i < curr_stackentries; i++) { + _Py_UOpsSymType *stackvar = sym_new_unknown(ctx); + if (stackvar == NULL) { + return NULL; + } + frame->stack[i] = stackvar; + } + + return frame; +} + +static void +abstractcontext_fini(_Py_UOpsAbstractInterpContext *ctx) +{ + if (ctx == NULL) { + return; + } + ctx->curr_frame_depth = 0; + int tys = ctx->t_arena.ty_curr_number; + for (int i = 0; i < tys; i++) { + Py_CLEAR(ctx->t_arena.arena[i].const_val); + } +} + +static int +abstractcontext_init( + _Py_UOpsAbstractInterpContext *ctx, + PyCodeObject *co, + int curr_stacklen, + int ir_entries +) +{ + ctx->limit = ctx->locals_and_stack + MAX_ABSTRACT_INTERP_SIZE; + ctx->n_consumed = ctx->locals_and_stack; +#ifdef Py_DEBUG // Aids debugging a little. There should never be NULL in the abstract interpreter. + for (int i = 0 ; i < MAX_ABSTRACT_INTERP_SIZE; i++) { + ctx->locals_and_stack[i] = NULL; + } +#endif + + // Setup the arena for sym expressions. + ctx->t_arena.ty_curr_number = 0; + ctx->t_arena.ty_max_number = TY_ARENA_SIZE; + + // Frame setup + ctx->curr_frame_depth = 0; + _Py_UOpsAbstractFrame *frame = ctx_frame_new(ctx, co, ctx->n_consumed, 0, curr_stacklen); + if (frame == NULL) { + return -1; + } + ctx->curr_frame_depth++; + ctx->frame = frame; + return 0; +} + + +static int +ctx_frame_pop( + _Py_UOpsAbstractInterpContext *ctx +) +{ + _Py_UOpsAbstractFrame *frame = ctx->frame; + + ctx->n_consumed = frame->locals; + ctx->curr_frame_depth--; + assert(ctx->curr_frame_depth >= 1); + ctx->frame = &ctx->frames[ctx->curr_frame_depth - 1]; + + return 0; +} + + +// Takes a borrowed reference to const_val, turns that into a strong reference. +static _Py_UOpsSymType* +sym_new(_Py_UOpsAbstractInterpContext *ctx, + PyObject *const_val) +{ + _Py_UOpsSymType *self = &ctx->t_arena.arena[ctx->t_arena.ty_curr_number]; + if (ctx->t_arena.ty_curr_number >= ctx->t_arena.ty_max_number) { + OPT_STAT_INC(optimizer_failure_reason_no_memory); + DPRINTF(1, "out of space for symbolic expression type\n"); + return NULL; + } + ctx->t_arena.ty_curr_number++; + self->const_val = NULL; + self->typ = NULL; + self->flags = 0; + + if (const_val != NULL) { + self->const_val = Py_NewRef(const_val); + } + + return self; +} + +static inline void +sym_set_flag(_Py_UOpsSymType *sym, int flag) +{ + sym->flags |= flag; +} + +static inline void +sym_clear_flag(_Py_UOpsSymType *sym, int flag) +{ + sym->flags &= (~flag); +} + +static inline bool +sym_has_flag(_Py_UOpsSymType *sym, int flag) +{ + return (sym->flags & flag) != 0; +} + +static inline bool +sym_is_known(_Py_UOpsSymType *sym) +{ + return sym_has_flag(sym, KNOWN); +} + +static inline bool +sym_is_not_null(_Py_UOpsSymType *sym) +{ + return (sym->flags & (IS_NULL | NOT_NULL)) == NOT_NULL; +} + +static inline bool +sym_is_null(_Py_UOpsSymType *sym) +{ + return (sym->flags & (IS_NULL | NOT_NULL)) == IS_NULL; +} + +static inline void +sym_set_type(_Py_UOpsSymType *sym, PyTypeObject *tp) +{ + assert(PyType_Check(tp)); + sym->typ = tp; + sym_set_flag(sym, KNOWN); + sym_set_flag(sym, NOT_NULL); +} + +static inline void +sym_set_null(_Py_UOpsSymType *sym) +{ + sym_set_flag(sym, IS_NULL); + sym_set_flag(sym, KNOWN); +} + + +static inline _Py_UOpsSymType* +sym_new_unknown(_Py_UOpsAbstractInterpContext *ctx) +{ + return sym_new(ctx,NULL); +} + +static inline _Py_UOpsSymType* +sym_new_known_notnull(_Py_UOpsAbstractInterpContext *ctx) +{ + _Py_UOpsSymType *res = sym_new_unknown(ctx); + if (res == NULL) { + return NULL; + } + sym_set_flag(res, NOT_NULL); + return res; +} + +static inline _Py_UOpsSymType* +sym_new_known_type(_Py_UOpsAbstractInterpContext *ctx, + PyTypeObject *typ) +{ + _Py_UOpsSymType *res = sym_new(ctx,NULL); + if (res == NULL) { + return NULL; + } + sym_set_type(res, typ); + return res; +} + +// Takes a borrowed reference to const_val. +static inline _Py_UOpsSymType* +sym_new_const(_Py_UOpsAbstractInterpContext *ctx, PyObject *const_val) +{ + assert(const_val != NULL); + _Py_UOpsSymType *temp = sym_new( + ctx, + const_val + ); + if (temp == NULL) { + return NULL; + } + sym_set_type(temp, Py_TYPE(const_val)); + sym_set_flag(temp, TRUE_CONST); + sym_set_flag(temp, KNOWN); + sym_set_flag(temp, NOT_NULL); + return temp; +} + +static _Py_UOpsSymType* +sym_new_null(_Py_UOpsAbstractInterpContext *ctx) +{ + _Py_UOpsSymType *null_sym = sym_new_unknown(ctx); + if (null_sym == NULL) { + return NULL; + } + sym_set_null(null_sym); + return null_sym; +} + + +static inline bool +sym_matches_type(_Py_UOpsSymType *sym, PyTypeObject *typ) +{ + assert(typ == NULL || PyType_Check(typ)); + if (!sym_has_flag(sym, KNOWN)) { + return false; + } + return sym->typ == typ; +} + + +static inline bool +op_is_end(uint32_t opcode) +{ + return opcode == _EXIT_TRACE || opcode == _JUMP_TO_TOP; +} static int get_mutations(PyObject* dict) { @@ -199,14 +555,138 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, builtins = func->func_builtins; break; } - case _JUMP_TO_TOP: - case _EXIT_TRACE: - return 1; + default: + if (op_is_end(opcode)) { + return 1; + } + break; + } + } + return 0; +} + + + +#define STACK_LEVEL() ((int)(stack_pointer - ctx->frame->stack)) + +#define GETLOCAL(idx) ((ctx->frame->locals[idx])) + +#define REPLACE_OP(INST, OP, ARG, OPERAND) \ + INST->opcode = OP; \ + INST->oparg = ARG; \ + INST->operand = OPERAND; + +#define _LOAD_ATTR_NOT_NULL \ + do { \ + attr = sym_new_known_notnull(ctx); \ + if (attr == NULL) { \ + goto error; \ + } \ + null = sym_new_null(ctx); \ + if (null == NULL) { \ + goto error; \ + } \ + } while (0); + + +/* 1 for success, 0 for not ready, cannot error at the moment. */ +static int +uop_redundancy_eliminator( + PyCodeObject *co, + _PyUOpInstruction *trace, + int trace_len, + int curr_stacklen +) +{ + + _Py_UOpsAbstractInterpContext context; + _Py_UOpsAbstractInterpContext *ctx = &context; + + if (abstractcontext_init( + ctx, + co, curr_stacklen, + trace_len) < 0) { + goto out_of_space; + } + + for (_PyUOpInstruction *this_instr = trace; + this_instr < trace + trace_len && !op_is_end(this_instr->opcode); + this_instr++) { + + int oparg = this_instr->oparg; + uint32_t opcode = this_instr->opcode; + + _Py_UOpsSymType **stack_pointer = ctx->frame->stack_pointer; + + DPRINTF(3, "Abstract interpreting %s:%d ", + _PyOpcode_uop_name[opcode], + oparg); + switch (opcode) { +#include "tier2_redundancy_eliminator_cases.c.h" + + default: + DPRINTF(1, "Unknown opcode in abstract interpreter\n"); + Py_UNREACHABLE(); } + assert(ctx->frame != NULL); + DPRINTF(3, " stack_level %d\n", STACK_LEVEL()); + ctx->frame->stack_pointer = stack_pointer; + assert(STACK_LEVEL() >= 0); } + + abstractcontext_fini(ctx); + return 1; + +out_of_space: + DPRINTF(1, "Out of space in abstract interpreter\n"); + abstractcontext_fini(ctx); + return 0; + +error: + DPRINTF(1, "Encountered error in abstract interpreter\n"); + abstractcontext_fini(ctx); return 0; } + +static void +remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) +{ + int last_set_ip = -1; + bool maybe_invalid = false; + for (int pc = 0; pc < buffer_size; pc++) { + int opcode = buffer[pc].opcode; + if (opcode == _SET_IP) { + buffer[pc].opcode = NOP; + last_set_ip = pc; + } + else if (opcode == _CHECK_VALIDITY) { + if (maybe_invalid) { + maybe_invalid = false; + } + else { + buffer[pc].opcode = NOP; + } + } + else if (op_is_end(opcode)) { + break; + } + else { + if (_PyUop_Flags[opcode] & HAS_ESCAPES_FLAG) { + maybe_invalid = true; + if (last_set_ip >= 0) { + buffer[last_set_ip].opcode = _SET_IP; + } + } + if ((_PyUop_Flags[opcode] & HAS_ERROR_FLAG) || opcode == _PUSH_FRAME) { + if (last_set_ip >= 0) { + buffer[last_set_ip].opcode = _SET_IP; + } + } + } + } +} + static void peephole_opt(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, int buffer_size) { @@ -250,44 +730,9 @@ peephole_opt(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, int buffer_s } } -static void -remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) -{ - int last_set_ip = -1; - bool maybe_invalid = false; - for (int pc = 0; pc < buffer_size; pc++) { - int opcode = buffer[pc].opcode; - if (opcode == _SET_IP) { - buffer[pc].opcode = NOP; - last_set_ip = pc; - } - else if (opcode == _CHECK_VALIDITY) { - if (maybe_invalid) { - maybe_invalid = false; - } - else { - buffer[pc].opcode = NOP; - } - } - else if (opcode == _JUMP_TO_TOP || opcode == _EXIT_TRACE) { - break; - } - else { - if (_PyUop_Flags[opcode] & HAS_ESCAPES_FLAG) { - maybe_invalid = true; - if (last_set_ip >= 0) { - buffer[last_set_ip].opcode = _SET_IP; - } - } - if ((_PyUop_Flags[opcode] & HAS_ERROR_FLAG) || opcode == _PUSH_FRAME) { - if (last_set_ip >= 0) { - buffer[last_set_ip].opcode = _SET_IP; - } - } - } - } -} - +// 0 - failure, no error raised, just fall back to Tier 1 +// -1 - failure, and raise error +// 1 - optimizer success int _Py_uop_analyze_and_optimize( _PyInterpreterFrame *frame, @@ -297,11 +742,33 @@ _Py_uop_analyze_and_optimize( _PyBloomFilter *dependencies ) { + OPT_STAT_INC(optimizer_attempts); + int err = remove_globals(frame, buffer, buffer_size, dependencies); - if (err <= 0) { - return err; + if (err == 0) { + goto not_ready; + } + if (err < 0) { + goto error; } + peephole_opt(frame, buffer, buffer_size); + + err = uop_redundancy_eliminator( + (PyCodeObject *)frame->f_executable, buffer, + buffer_size, curr_stacklen); + + if (err == 0) { + goto not_ready; + } + assert(err == 1); + remove_unneeded_uops(buffer, buffer_size); + + OPT_STAT_INC(optimizer_successes); return 1; +not_ready: + return 0; +error: + return -1; } diff --git a/Python/specialize.c b/Python/specialize.c index ea2638570f22d0..2256d79b387c56 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -240,6 +240,11 @@ print_optimization_stats(FILE *out, OptimizationStats *stats) print_histogram(out, "Trace run length", stats->trace_run_length_hist); print_histogram(out, "Optimized trace length", stats->optimized_trace_length_hist); + fprintf(out, "Optimization optimizer attempts: %" PRIu64 "\n", stats->optimizer_attempts); + fprintf(out, "Optimization optimizer successes: %" PRIu64 "\n", stats->optimizer_successes); + fprintf(out, "Optimization optimizer failure no memory: %" PRIu64 "\n", + stats->optimizer_failure_reason_no_memory); + const char* const* names; for (int i = 0; i < 512; i++) { if (i < 256) { diff --git a/Python/tier2_redundancy_eliminator_bytecodes.c b/Python/tier2_redundancy_eliminator_bytecodes.c new file mode 100644 index 00000000000000..3272b187f20d0e --- /dev/null +++ b/Python/tier2_redundancy_eliminator_bytecodes.c @@ -0,0 +1,272 @@ +#include "Python.h" +#include "pycore_uops.h" +#include "pycore_uop_ids.h" + +#define op(name, ...) /* NAME is ignored */ + +typedef struct _Py_UOpsSymType _Py_UOpsSymType; +typedef struct _Py_UOpsAbstractInterpContext _Py_UOpsAbstractInterpContext; +typedef struct _Py_UOpsAbstractFrame _Py_UOpsAbstractFrame; + +static int +dummy_func(void) { + + PyCodeObject *code; + int oparg; + _Py_UOpsSymType *flag; + _Py_UOpsSymType *left; + _Py_UOpsSymType *right; + _Py_UOpsSymType *value; + _Py_UOpsSymType *res; + _Py_UOpsSymType *iter; + _Py_UOpsSymType *top; + _Py_UOpsSymType *bottom; + _Py_UOpsAbstractFrame *frame; + _Py_UOpsAbstractInterpContext *ctx; + _PyUOpInstruction *this_instr; + _PyBloomFilter *dependencies; + int modified; + +// BEGIN BYTECODES // + + op(_LOAD_FAST_CHECK, (-- value)) { + value = GETLOCAL(oparg); + // We guarantee this will error - just bail and don't optimize it. + if (sym_is_null(value)) { + goto out_of_space; + } + } + + op(_LOAD_FAST, (-- value)) { + value = GETLOCAL(oparg); + } + + op(_LOAD_FAST_AND_CLEAR, (-- value)) { + value = GETLOCAL(oparg); + _Py_UOpsSymType *temp = sym_new_null(ctx); + if (temp == NULL) { + goto out_of_space; + } + GETLOCAL(oparg) = temp; + } + + op(_STORE_FAST, (value --)) { + GETLOCAL(oparg) = value; + } + + op(_PUSH_NULL, (-- res)) { + res = sym_new_null(ctx); + if (res == NULL) { + goto out_of_space; + }; + } + + op(_GUARD_BOTH_INT, (left, right -- left, right)) { + if (sym_matches_type(left, &PyLong_Type) && + sym_matches_type(right, &PyLong_Type)) { + REPLACE_OP(this_instr, _NOP, 0, 0); + } + sym_set_type(left, &PyLong_Type); + sym_set_type(right, &PyLong_Type); + } + + op(_GUARD_BOTH_FLOAT, (left, right -- left, right)) { + if (sym_matches_type(left, &PyFloat_Type) && + sym_matches_type(right, &PyFloat_Type)) { + REPLACE_OP(this_instr, _NOP, 0 ,0); + } + sym_set_type(left, &PyFloat_Type); + sym_set_type(right, &PyFloat_Type); + } + + + op(_BINARY_OP_ADD_INT, (left, right -- res)) { + // TODO constant propagation + (void)left; + (void)right; + res = sym_new_known_type(ctx, &PyLong_Type); + if (res == NULL) { + goto out_of_space; + } + } + + op(_LOAD_CONST, (-- value)) { + // There should be no LOAD_CONST. It should be all + // replaced by peephole_opt. + Py_UNREACHABLE(); + } + + op(_LOAD_CONST_INLINE, (ptr/4 -- value)) { + value = sym_new_const(ctx, ptr); + if (value == NULL) { + goto out_of_space; + } + } + + op(_LOAD_CONST_INLINE_BORROW, (ptr/4 -- value)) { + value = sym_new_const(ctx, ptr); + if (value == NULL) { + goto out_of_space; + } + } + + op(_LOAD_CONST_INLINE_WITH_NULL, (ptr/4 -- value, null)) { + value = sym_new_const(ctx, ptr); + if (value == NULL) { + goto out_of_space; + } + null = sym_new_null(ctx); + if (null == NULL) { + goto out_of_space; + } + } + + op(_LOAD_CONST_INLINE_BORROW_WITH_NULL, (ptr/4 -- value, null)) { + value = sym_new_const(ctx, ptr); + if (value == NULL) { + goto out_of_space; + } + null = sym_new_null(ctx); + if (null == NULL) { + goto out_of_space; + } + } + + + op(_COPY, (bottom, unused[oparg-1] -- bottom, unused[oparg-1], top)) { + assert(oparg > 0); + top = bottom; + } + + op(_SWAP, (bottom, unused[oparg-2], top -- + top, unused[oparg-2], bottom)) { + } + + op(_LOAD_ATTR_INSTANCE_VALUE, (index/1, owner -- attr, null if (oparg & 1))) { + _LOAD_ATTR_NOT_NULL + (void)index; + (void)owner; + } + + op(_LOAD_ATTR_MODULE, (index/1, owner -- attr, null if (oparg & 1))) { + _LOAD_ATTR_NOT_NULL + (void)index; + (void)owner; + } + + op(_LOAD_ATTR_WITH_HINT, (hint/1, owner -- attr, null if (oparg & 1))) { + _LOAD_ATTR_NOT_NULL + (void)hint; + (void)owner; + } + + op(_LOAD_ATTR_SLOT, (index/1, owner -- attr, null if (oparg & 1))) { + _LOAD_ATTR_NOT_NULL + (void)index; + (void)owner; + } + + op(_LOAD_ATTR_CLASS, (descr/4, owner -- attr, null if (oparg & 1))) { + _LOAD_ATTR_NOT_NULL + (void)descr; + (void)owner; + } + + op(_CHECK_FUNCTION_EXACT_ARGS, (func_version/2, callable, self_or_null, unused[oparg] -- callable, self_or_null, unused[oparg])) { + sym_set_type(callable, &PyFunction_Type); + (void)self_or_null; + (void)func_version; + } + + op(_CHECK_CALL_BOUND_METHOD_EXACT_ARGS, (callable, null, unused[oparg] -- callable, null, unused[oparg])) { + sym_set_null(null); + sym_set_type(callable, &PyMethod_Type); + } + + op(_INIT_CALL_PY_EXACT_ARGS, (callable, self_or_null, args[oparg] -- new_frame: _Py_UOpsAbstractFrame *)) { + int argcount = oparg; + + (void)callable; + + PyFunctionObject *func = (PyFunctionObject *)(this_instr + 2)->operand; + if (func == NULL) { + goto error; + } + PyCodeObject *co = (PyCodeObject *)func->func_code; + + assert(self_or_null != NULL); + assert(args != NULL); + if (sym_is_not_null(self_or_null)) { + // Bound method fiddling, same as _INIT_CALL_PY_EXACT_ARGS in VM + args--; + argcount++; + } + + _Py_UOpsSymType **localsplus_start = ctx->n_consumed; + int n_locals_already_filled = 0; + // Can determine statically, so we interleave the new locals + // and make the current stack the new locals. + // This also sets up for true call inlining. + if (sym_is_known(self_or_null)) { + localsplus_start = args; + n_locals_already_filled = argcount; + } + new_frame = ctx_frame_new(ctx, co, localsplus_start, n_locals_already_filled, 0); + if (new_frame == NULL){ + goto out_of_space; + } + } + + op(_POP_FRAME, (retval -- res)) { + SYNC_SP(); + ctx->frame->stack_pointer = stack_pointer; + ctx_frame_pop(ctx); + stack_pointer = ctx->frame->stack_pointer; + res = retval; + } + + op(_PUSH_FRAME, (new_frame: _Py_UOpsAbstractFrame * -- unused if (0))) { + SYNC_SP(); + ctx->frame->stack_pointer = stack_pointer; + ctx->frame = new_frame; + ctx->curr_frame_depth++; + stack_pointer = new_frame->stack_pointer; + } + + op(_UNPACK_SEQUENCE, (seq -- values[oparg])) { + /* This has to be done manually */ + (void)seq; + for (int i = 0; i < oparg; i++) { + values[i] = sym_new_unknown(ctx); + if (values[i] == NULL) { + goto out_of_space; + } + } + } + + op(_UNPACK_EX, (seq -- values[oparg & 0xFF], unused, unused[oparg >> 8])) { + /* This has to be done manually */ + (void)seq; + int totalargs = (oparg & 0xFF) + (oparg >> 8) + 1; + for (int i = 0; i < totalargs; i++) { + values[i] = sym_new_unknown(ctx); + if (values[i] == NULL) { + goto out_of_space; + } + } + } + + op(_ITER_NEXT_RANGE, (iter -- iter, next)) { + next = sym_new_known_type(ctx, &PyLong_Type); + if (next == NULL) { + goto out_of_space; + } + (void)iter; + } + + + + +// END BYTECODES // + +} \ No newline at end of file diff --git a/Python/tier2_redundancy_eliminator_cases.c.h b/Python/tier2_redundancy_eliminator_cases.c.h new file mode 100644 index 00000000000000..77a7f5b2360c3b --- /dev/null +++ b/Python/tier2_redundancy_eliminator_cases.c.h @@ -0,0 +1,1676 @@ +// This file is generated by Tools/cases_generator/tier2_abstract_generator.py +// from: +// Python/tier2_redundancy_eliminator_bytecodes.c +// Do not edit! + + case _NOP: { + break; + } + + case _RESUME_CHECK: { + break; + } + + /* _INSTRUMENTED_RESUME is not a viable micro-op for tier 2 */ + + case _LOAD_FAST_CHECK: { + _Py_UOpsSymType *value; + value = GETLOCAL(oparg); + // We guarantee this will error - just bail and don't optimize it. + if (sym_is_null(value)) { + goto out_of_space; + } + stack_pointer[0] = value; + stack_pointer += 1; + break; + } + + case _LOAD_FAST: { + _Py_UOpsSymType *value; + value = GETLOCAL(oparg); + stack_pointer[0] = value; + stack_pointer += 1; + break; + } + + case _LOAD_FAST_AND_CLEAR: { + _Py_UOpsSymType *value; + value = GETLOCAL(oparg); + _Py_UOpsSymType *temp = sym_new_null(ctx); + if (temp == NULL) { + goto out_of_space; + } + GETLOCAL(oparg) = temp; + stack_pointer[0] = value; + stack_pointer += 1; + break; + } + + case _LOAD_CONST: { + _Py_UOpsSymType *value; + // There should be no LOAD_CONST. It should be all + // replaced by peephole_opt. + Py_UNREACHABLE(); + stack_pointer[0] = value; + stack_pointer += 1; + break; + } + + case _STORE_FAST: { + _Py_UOpsSymType *value; + value = stack_pointer[-1]; + GETLOCAL(oparg) = value; + stack_pointer += -1; + break; + } + + case _POP_TOP: { + stack_pointer += -1; + break; + } + + case _PUSH_NULL: { + _Py_UOpsSymType *res; + res = sym_new_null(ctx); + if (res == NULL) { + goto out_of_space; + }; + stack_pointer[0] = res; + stack_pointer += 1; + break; + } + + case _END_SEND: { + _Py_UOpsSymType *value; + value = sym_new_unknown(ctx); + if (value == NULL) goto out_of_space; + stack_pointer[-2] = value; + stack_pointer += -1; + break; + } + + case _UNARY_NEGATIVE: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-1] = res; + break; + } + + case _UNARY_NOT: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-1] = res; + break; + } + + case _TO_BOOL: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-1] = res; + break; + } + + case _TO_BOOL_BOOL: { + break; + } + + case _TO_BOOL_INT: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-1] = res; + break; + } + + case _TO_BOOL_LIST: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-1] = res; + break; + } + + case _TO_BOOL_NONE: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-1] = res; + break; + } + + case _TO_BOOL_STR: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-1] = res; + break; + } + + case _TO_BOOL_ALWAYS_TRUE: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-1] = res; + break; + } + + case _UNARY_INVERT: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-1] = res; + break; + } + + case _GUARD_BOTH_INT: { + _Py_UOpsSymType *right; + _Py_UOpsSymType *left; + right = stack_pointer[-1]; + left = stack_pointer[-2]; + if (sym_matches_type(left, &PyLong_Type) && + sym_matches_type(right, &PyLong_Type)) { + REPLACE_OP(this_instr, _NOP, 0, 0); + } + sym_set_type(left, &PyLong_Type); + sym_set_type(right, &PyLong_Type); + break; + } + + case _BINARY_OP_MULTIPLY_INT: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-2] = res; + stack_pointer += -1; + break; + } + + case _BINARY_OP_ADD_INT: { + _Py_UOpsSymType *right; + _Py_UOpsSymType *left; + _Py_UOpsSymType *res; + right = stack_pointer[-1]; + left = stack_pointer[-2]; + // TODO constant propagation + (void)left; + (void)right; + res = sym_new_known_type(ctx, &PyLong_Type); + if (res == NULL) { + goto out_of_space; + } + stack_pointer[-2] = res; + stack_pointer += -1; + break; + } + + case _BINARY_OP_SUBTRACT_INT: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-2] = res; + stack_pointer += -1; + break; + } + + case _GUARD_BOTH_FLOAT: { + _Py_UOpsSymType *right; + _Py_UOpsSymType *left; + right = stack_pointer[-1]; + left = stack_pointer[-2]; + if (sym_matches_type(left, &PyFloat_Type) && + sym_matches_type(right, &PyFloat_Type)) { + REPLACE_OP(this_instr, _NOP, 0 ,0); + } + sym_set_type(left, &PyFloat_Type); + sym_set_type(right, &PyFloat_Type); + break; + } + + case _BINARY_OP_MULTIPLY_FLOAT: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-2] = res; + stack_pointer += -1; + break; + } + + case _BINARY_OP_ADD_FLOAT: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-2] = res; + stack_pointer += -1; + break; + } + + case _BINARY_OP_SUBTRACT_FLOAT: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-2] = res; + stack_pointer += -1; + break; + } + + case _GUARD_BOTH_UNICODE: { + break; + } + + case _BINARY_OP_ADD_UNICODE: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-2] = res; + stack_pointer += -1; + break; + } + + case _BINARY_SUBSCR: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-2] = res; + stack_pointer += -1; + break; + } + + case _BINARY_SLICE: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-3] = res; + stack_pointer += -2; + break; + } + + case _STORE_SLICE: { + stack_pointer += -4; + break; + } + + case _BINARY_SUBSCR_LIST_INT: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-2] = res; + stack_pointer += -1; + break; + } + + case _BINARY_SUBSCR_STR_INT: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-2] = res; + stack_pointer += -1; + break; + } + + case _BINARY_SUBSCR_TUPLE_INT: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-2] = res; + stack_pointer += -1; + break; + } + + case _BINARY_SUBSCR_DICT: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-2] = res; + stack_pointer += -1; + break; + } + + /* _BINARY_SUBSCR_GETITEM is not a viable micro-op for tier 2 */ + + case _LIST_APPEND: { + stack_pointer += -1; + break; + } + + case _SET_ADD: { + stack_pointer += -1; + break; + } + + case _STORE_SUBSCR: { + stack_pointer += -3; + break; + } + + case _STORE_SUBSCR_LIST_INT: { + stack_pointer += -3; + break; + } + + case _STORE_SUBSCR_DICT: { + stack_pointer += -3; + break; + } + + case _DELETE_SUBSCR: { + stack_pointer += -2; + break; + } + + case _CALL_INTRINSIC_1: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-1] = res; + break; + } + + case _CALL_INTRINSIC_2: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-2] = res; + stack_pointer += -1; + break; + } + + case _POP_FRAME: { + _Py_UOpsSymType *retval; + _Py_UOpsSymType *res; + retval = stack_pointer[-1]; + stack_pointer += -1; + ctx->frame->stack_pointer = stack_pointer; + ctx_frame_pop(ctx); + stack_pointer = ctx->frame->stack_pointer; + res = retval; + stack_pointer[0] = res; + stack_pointer += 1; + break; + } + + /* _INSTRUMENTED_RETURN_VALUE is not a viable micro-op for tier 2 */ + + /* _INSTRUMENTED_RETURN_CONST is not a viable micro-op for tier 2 */ + + case _GET_AITER: { + _Py_UOpsSymType *iter; + iter = sym_new_unknown(ctx); + if (iter == NULL) goto out_of_space; + stack_pointer[-1] = iter; + break; + } + + case _GET_ANEXT: { + _Py_UOpsSymType *awaitable; + awaitable = sym_new_unknown(ctx); + if (awaitable == NULL) goto out_of_space; + stack_pointer[0] = awaitable; + stack_pointer += 1; + break; + } + + case _GET_AWAITABLE: { + _Py_UOpsSymType *iter; + iter = sym_new_unknown(ctx); + if (iter == NULL) goto out_of_space; + stack_pointer[-1] = iter; + break; + } + + /* _SEND is not a viable micro-op for tier 2 */ + + /* _SEND_GEN is not a viable micro-op for tier 2 */ + + /* _INSTRUMENTED_YIELD_VALUE is not a viable micro-op for tier 2 */ + + case _POP_EXCEPT: { + stack_pointer += -1; + break; + } + + case _LOAD_ASSERTION_ERROR: { + _Py_UOpsSymType *value; + value = sym_new_unknown(ctx); + if (value == NULL) goto out_of_space; + stack_pointer[0] = value; + stack_pointer += 1; + break; + } + + case _LOAD_BUILD_CLASS: { + _Py_UOpsSymType *bc; + bc = sym_new_unknown(ctx); + if (bc == NULL) goto out_of_space; + stack_pointer[0] = bc; + stack_pointer += 1; + break; + } + + case _STORE_NAME: { + stack_pointer += -1; + break; + } + + case _DELETE_NAME: { + break; + } + + case _UNPACK_SEQUENCE: { + _Py_UOpsSymType *seq; + _Py_UOpsSymType **values; + seq = stack_pointer[-1]; + values = &stack_pointer[-1]; + /* This has to be done manually */ + (void)seq; + for (int i = 0; i < oparg; i++) { + values[i] = sym_new_unknown(ctx); + if (values[i] == NULL) { + goto out_of_space; + } + } + stack_pointer += -1 + oparg; + break; + } + + case _UNPACK_SEQUENCE_TWO_TUPLE: { + _Py_UOpsSymType **values; + values = &stack_pointer[-1]; + for (int _i = oparg; --_i >= 0;) { + values[_i] = sym_new_unknown(ctx); + if (values[_i] == NULL) goto out_of_space; + } + stack_pointer += -1 + oparg; + break; + } + + case _UNPACK_SEQUENCE_TUPLE: { + _Py_UOpsSymType **values; + values = &stack_pointer[-1]; + for (int _i = oparg; --_i >= 0;) { + values[_i] = sym_new_unknown(ctx); + if (values[_i] == NULL) goto out_of_space; + } + stack_pointer += -1 + oparg; + break; + } + + case _UNPACK_SEQUENCE_LIST: { + _Py_UOpsSymType **values; + values = &stack_pointer[-1]; + for (int _i = oparg; --_i >= 0;) { + values[_i] = sym_new_unknown(ctx); + if (values[_i] == NULL) goto out_of_space; + } + stack_pointer += -1 + oparg; + break; + } + + case _UNPACK_EX: { + _Py_UOpsSymType *seq; + _Py_UOpsSymType **values; + seq = stack_pointer[-1]; + values = &stack_pointer[-1]; + /* This has to be done manually */ + (void)seq; + int totalargs = (oparg & 0xFF) + (oparg >> 8) + 1; + for (int i = 0; i < totalargs; i++) { + values[i] = sym_new_unknown(ctx); + if (values[i] == NULL) { + goto out_of_space; + } + } + stack_pointer += (oparg >> 8) + (oparg & 0xFF); + break; + } + + case _STORE_ATTR: { + stack_pointer += -2; + break; + } + + case _DELETE_ATTR: { + stack_pointer += -1; + break; + } + + case _STORE_GLOBAL: { + stack_pointer += -1; + break; + } + + case _DELETE_GLOBAL: { + break; + } + + case _LOAD_LOCALS: { + _Py_UOpsSymType *locals; + locals = sym_new_unknown(ctx); + if (locals == NULL) goto out_of_space; + stack_pointer[0] = locals; + stack_pointer += 1; + break; + } + + case _LOAD_FROM_DICT_OR_GLOBALS: { + _Py_UOpsSymType *v; + v = sym_new_unknown(ctx); + if (v == NULL) goto out_of_space; + stack_pointer[-1] = v; + break; + } + + case _LOAD_NAME: { + _Py_UOpsSymType *v; + v = sym_new_unknown(ctx); + if (v == NULL) goto out_of_space; + stack_pointer[0] = v; + stack_pointer += 1; + break; + } + + case _LOAD_GLOBAL: { + _Py_UOpsSymType *res; + _Py_UOpsSymType *null = NULL; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + null = sym_new_null(ctx); + if (null == NULL) goto out_of_space; + stack_pointer[0] = res; + if (oparg & 1) stack_pointer[1] = null; + stack_pointer += 1 + (oparg & 1); + break; + } + + case _GUARD_GLOBALS_VERSION: { + break; + } + + case _GUARD_BUILTINS_VERSION: { + break; + } + + case _LOAD_GLOBAL_MODULE: { + _Py_UOpsSymType *res; + _Py_UOpsSymType *null = NULL; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + null = sym_new_null(ctx); + if (null == NULL) goto out_of_space; + stack_pointer[0] = res; + if (oparg & 1) stack_pointer[1] = null; + stack_pointer += 1 + (oparg & 1); + break; + } + + case _LOAD_GLOBAL_BUILTINS: { + _Py_UOpsSymType *res; + _Py_UOpsSymType *null = NULL; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + null = sym_new_null(ctx); + if (null == NULL) goto out_of_space; + stack_pointer[0] = res; + if (oparg & 1) stack_pointer[1] = null; + stack_pointer += 1 + (oparg & 1); + break; + } + + case _DELETE_FAST: { + break; + } + + case _MAKE_CELL: { + break; + } + + case _DELETE_DEREF: { + break; + } + + case _LOAD_FROM_DICT_OR_DEREF: { + _Py_UOpsSymType *value; + value = sym_new_unknown(ctx); + if (value == NULL) goto out_of_space; + stack_pointer[-1] = value; + break; + } + + case _LOAD_DEREF: { + _Py_UOpsSymType *value; + value = sym_new_unknown(ctx); + if (value == NULL) goto out_of_space; + stack_pointer[0] = value; + stack_pointer += 1; + break; + } + + case _STORE_DEREF: { + stack_pointer += -1; + break; + } + + case _COPY_FREE_VARS: { + break; + } + + case _BUILD_STRING: { + _Py_UOpsSymType *str; + str = sym_new_unknown(ctx); + if (str == NULL) goto out_of_space; + stack_pointer[-oparg] = str; + stack_pointer += 1 - oparg; + break; + } + + case _BUILD_TUPLE: { + _Py_UOpsSymType *tup; + tup = sym_new_unknown(ctx); + if (tup == NULL) goto out_of_space; + stack_pointer[-oparg] = tup; + stack_pointer += 1 - oparg; + break; + } + + case _BUILD_LIST: { + _Py_UOpsSymType *list; + list = sym_new_unknown(ctx); + if (list == NULL) goto out_of_space; + stack_pointer[-oparg] = list; + stack_pointer += 1 - oparg; + break; + } + + case _LIST_EXTEND: { + stack_pointer += -1; + break; + } + + case _SET_UPDATE: { + stack_pointer += -1; + break; + } + + case _BUILD_SET: { + _Py_UOpsSymType *set; + set = sym_new_unknown(ctx); + if (set == NULL) goto out_of_space; + stack_pointer[-oparg] = set; + stack_pointer += 1 - oparg; + break; + } + + case _BUILD_MAP: { + _Py_UOpsSymType *map; + map = sym_new_unknown(ctx); + if (map == NULL) goto out_of_space; + stack_pointer[-oparg*2] = map; + stack_pointer += 1 - oparg*2; + break; + } + + case _SETUP_ANNOTATIONS: { + break; + } + + case _BUILD_CONST_KEY_MAP: { + _Py_UOpsSymType *map; + map = sym_new_unknown(ctx); + if (map == NULL) goto out_of_space; + stack_pointer[-1 - oparg] = map; + stack_pointer += -oparg; + break; + } + + case _DICT_UPDATE: { + stack_pointer += -1; + break; + } + + case _DICT_MERGE: { + stack_pointer += -1; + break; + } + + case _MAP_ADD: { + stack_pointer += -2; + break; + } + + /* _INSTRUMENTED_LOAD_SUPER_ATTR is not a viable micro-op for tier 2 */ + + case _LOAD_SUPER_ATTR_ATTR: { + _Py_UOpsSymType *attr; + attr = sym_new_unknown(ctx); + if (attr == NULL) goto out_of_space; + stack_pointer[-3] = attr; + stack_pointer += -2 + ((0) ? 1 : 0); + break; + } + + case _LOAD_SUPER_ATTR_METHOD: { + _Py_UOpsSymType *attr; + _Py_UOpsSymType *self_or_null; + attr = sym_new_unknown(ctx); + if (attr == NULL) goto out_of_space; + self_or_null = sym_new_unknown(ctx); + if (self_or_null == NULL) goto out_of_space; + stack_pointer[-3] = attr; + stack_pointer[-2] = self_or_null; + stack_pointer += -1; + break; + } + + case _LOAD_ATTR: { + _Py_UOpsSymType *attr; + _Py_UOpsSymType *self_or_null = NULL; + attr = sym_new_unknown(ctx); + if (attr == NULL) goto out_of_space; + self_or_null = sym_new_unknown(ctx); + if (self_or_null == NULL) goto out_of_space; + stack_pointer[-1] = attr; + if (oparg & 1) stack_pointer[0] = self_or_null; + stack_pointer += (oparg & 1); + break; + } + + case _GUARD_TYPE_VERSION: { + break; + } + + case _CHECK_MANAGED_OBJECT_HAS_VALUES: { + break; + } + + case _LOAD_ATTR_INSTANCE_VALUE: { + _Py_UOpsSymType *owner; + _Py_UOpsSymType *attr; + _Py_UOpsSymType *null = NULL; + owner = stack_pointer[-1]; + uint16_t index = (uint16_t)this_instr->operand; + _LOAD_ATTR_NOT_NULL + (void)index; + (void)owner; + stack_pointer[-1] = attr; + if (oparg & 1) stack_pointer[0] = null; + stack_pointer += (oparg & 1); + break; + } + + case _CHECK_ATTR_MODULE: { + break; + } + + case _LOAD_ATTR_MODULE: { + _Py_UOpsSymType *owner; + _Py_UOpsSymType *attr; + _Py_UOpsSymType *null = NULL; + owner = stack_pointer[-1]; + uint16_t index = (uint16_t)this_instr->operand; + _LOAD_ATTR_NOT_NULL + (void)index; + (void)owner; + stack_pointer[-1] = attr; + if (oparg & 1) stack_pointer[0] = null; + stack_pointer += (oparg & 1); + break; + } + + case _CHECK_ATTR_WITH_HINT: { + break; + } + + case _LOAD_ATTR_WITH_HINT: { + _Py_UOpsSymType *owner; + _Py_UOpsSymType *attr; + _Py_UOpsSymType *null = NULL; + owner = stack_pointer[-1]; + uint16_t hint = (uint16_t)this_instr->operand; + _LOAD_ATTR_NOT_NULL + (void)hint; + (void)owner; + stack_pointer[-1] = attr; + if (oparg & 1) stack_pointer[0] = null; + stack_pointer += (oparg & 1); + break; + } + + case _LOAD_ATTR_SLOT: { + _Py_UOpsSymType *owner; + _Py_UOpsSymType *attr; + _Py_UOpsSymType *null = NULL; + owner = stack_pointer[-1]; + uint16_t index = (uint16_t)this_instr->operand; + _LOAD_ATTR_NOT_NULL + (void)index; + (void)owner; + stack_pointer[-1] = attr; + if (oparg & 1) stack_pointer[0] = null; + stack_pointer += (oparg & 1); + break; + } + + case _CHECK_ATTR_CLASS: { + break; + } + + case _LOAD_ATTR_CLASS: { + _Py_UOpsSymType *owner; + _Py_UOpsSymType *attr; + _Py_UOpsSymType *null = NULL; + owner = stack_pointer[-1]; + PyObject *descr = (PyObject *)this_instr->operand; + _LOAD_ATTR_NOT_NULL + (void)descr; + (void)owner; + stack_pointer[-1] = attr; + if (oparg & 1) stack_pointer[0] = null; + stack_pointer += (oparg & 1); + break; + } + + /* _LOAD_ATTR_PROPERTY is not a viable micro-op for tier 2 */ + + /* _LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN is not a viable micro-op for tier 2 */ + + case _GUARD_DORV_VALUES: { + break; + } + + case _STORE_ATTR_INSTANCE_VALUE: { + stack_pointer += -2; + break; + } + + /* _STORE_ATTR_WITH_HINT is not a viable micro-op for tier 2 */ + + case _STORE_ATTR_SLOT: { + stack_pointer += -2; + break; + } + + case _COMPARE_OP: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-2] = res; + stack_pointer += -1; + break; + } + + case _COMPARE_OP_FLOAT: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-2] = res; + stack_pointer += -1; + break; + } + + case _COMPARE_OP_INT: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-2] = res; + stack_pointer += -1; + break; + } + + case _COMPARE_OP_STR: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-2] = res; + stack_pointer += -1; + break; + } + + case _IS_OP: { + _Py_UOpsSymType *b; + b = sym_new_unknown(ctx); + if (b == NULL) goto out_of_space; + stack_pointer[-2] = b; + stack_pointer += -1; + break; + } + + case _CONTAINS_OP: { + _Py_UOpsSymType *b; + b = sym_new_unknown(ctx); + if (b == NULL) goto out_of_space; + stack_pointer[-2] = b; + stack_pointer += -1; + break; + } + + case _CHECK_EG_MATCH: { + _Py_UOpsSymType *rest; + _Py_UOpsSymType *match; + rest = sym_new_unknown(ctx); + if (rest == NULL) goto out_of_space; + match = sym_new_unknown(ctx); + if (match == NULL) goto out_of_space; + stack_pointer[-2] = rest; + stack_pointer[-1] = match; + break; + } + + case _CHECK_EXC_MATCH: { + _Py_UOpsSymType *b; + b = sym_new_unknown(ctx); + if (b == NULL) goto out_of_space; + stack_pointer[-1] = b; + break; + } + + /* _JUMP_BACKWARD is not a viable micro-op for tier 2 */ + + /* _POP_JUMP_IF_FALSE is not a viable micro-op for tier 2 */ + + /* _POP_JUMP_IF_TRUE is not a viable micro-op for tier 2 */ + + case _IS_NONE: { + _Py_UOpsSymType *b; + b = sym_new_unknown(ctx); + if (b == NULL) goto out_of_space; + stack_pointer[-1] = b; + break; + } + + case _GET_LEN: { + _Py_UOpsSymType *len_o; + len_o = sym_new_unknown(ctx); + if (len_o == NULL) goto out_of_space; + stack_pointer[0] = len_o; + stack_pointer += 1; + break; + } + + case _MATCH_CLASS: { + _Py_UOpsSymType *attrs; + attrs = sym_new_unknown(ctx); + if (attrs == NULL) goto out_of_space; + stack_pointer[-3] = attrs; + stack_pointer += -2; + break; + } + + case _MATCH_MAPPING: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[0] = res; + stack_pointer += 1; + break; + } + + case _MATCH_SEQUENCE: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[0] = res; + stack_pointer += 1; + break; + } + + case _MATCH_KEYS: { + _Py_UOpsSymType *values_or_none; + values_or_none = sym_new_unknown(ctx); + if (values_or_none == NULL) goto out_of_space; + stack_pointer[0] = values_or_none; + stack_pointer += 1; + break; + } + + case _GET_ITER: { + _Py_UOpsSymType *iter; + iter = sym_new_unknown(ctx); + if (iter == NULL) goto out_of_space; + stack_pointer[-1] = iter; + break; + } + + case _GET_YIELD_FROM_ITER: { + _Py_UOpsSymType *iter; + iter = sym_new_unknown(ctx); + if (iter == NULL) goto out_of_space; + stack_pointer[-1] = iter; + break; + } + + /* _FOR_ITER is not a viable micro-op for tier 2 */ + + case _FOR_ITER_TIER_TWO: { + _Py_UOpsSymType *next; + next = sym_new_unknown(ctx); + if (next == NULL) goto out_of_space; + stack_pointer[0] = next; + stack_pointer += 1; + break; + } + + /* _INSTRUMENTED_FOR_ITER is not a viable micro-op for tier 2 */ + + case _ITER_CHECK_LIST: { + break; + } + + /* _ITER_JUMP_LIST is not a viable micro-op for tier 2 */ + + case _GUARD_NOT_EXHAUSTED_LIST: { + break; + } + + case _ITER_NEXT_LIST: { + _Py_UOpsSymType *next; + next = sym_new_unknown(ctx); + if (next == NULL) goto out_of_space; + stack_pointer[0] = next; + stack_pointer += 1; + break; + } + + case _ITER_CHECK_TUPLE: { + break; + } + + /* _ITER_JUMP_TUPLE is not a viable micro-op for tier 2 */ + + case _GUARD_NOT_EXHAUSTED_TUPLE: { + break; + } + + case _ITER_NEXT_TUPLE: { + _Py_UOpsSymType *next; + next = sym_new_unknown(ctx); + if (next == NULL) goto out_of_space; + stack_pointer[0] = next; + stack_pointer += 1; + break; + } + + case _ITER_CHECK_RANGE: { + break; + } + + /* _ITER_JUMP_RANGE is not a viable micro-op for tier 2 */ + + case _GUARD_NOT_EXHAUSTED_RANGE: { + break; + } + + case _ITER_NEXT_RANGE: { + _Py_UOpsSymType *iter; + _Py_UOpsSymType *next; + iter = stack_pointer[-1]; + next = sym_new_known_type(ctx, &PyLong_Type); + if (next == NULL) { + goto out_of_space; + } + (void)iter; + stack_pointer[0] = next; + stack_pointer += 1; + break; + } + + /* _FOR_ITER_GEN is not a viable micro-op for tier 2 */ + + case _BEFORE_ASYNC_WITH: { + _Py_UOpsSymType *exit; + _Py_UOpsSymType *res; + exit = sym_new_unknown(ctx); + if (exit == NULL) goto out_of_space; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-1] = exit; + stack_pointer[0] = res; + stack_pointer += 1; + break; + } + + case _BEFORE_WITH: { + _Py_UOpsSymType *exit; + _Py_UOpsSymType *res; + exit = sym_new_unknown(ctx); + if (exit == NULL) goto out_of_space; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-1] = exit; + stack_pointer[0] = res; + stack_pointer += 1; + break; + } + + case _WITH_EXCEPT_START: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[0] = res; + stack_pointer += 1; + break; + } + + case _PUSH_EXC_INFO: { + _Py_UOpsSymType *prev_exc; + _Py_UOpsSymType *new_exc; + prev_exc = sym_new_unknown(ctx); + if (prev_exc == NULL) goto out_of_space; + new_exc = sym_new_unknown(ctx); + if (new_exc == NULL) goto out_of_space; + stack_pointer[-1] = prev_exc; + stack_pointer[0] = new_exc; + stack_pointer += 1; + break; + } + + case _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT: { + break; + } + + case _GUARD_KEYS_VERSION: { + break; + } + + case _LOAD_ATTR_METHOD_WITH_VALUES: { + _Py_UOpsSymType *attr; + _Py_UOpsSymType *self = NULL; + attr = sym_new_unknown(ctx); + if (attr == NULL) goto out_of_space; + self = sym_new_unknown(ctx); + if (self == NULL) goto out_of_space; + stack_pointer[-1] = attr; + if (1) stack_pointer[0] = self; + stack_pointer += ((1) ? 1 : 0); + break; + } + + case _LOAD_ATTR_METHOD_NO_DICT: { + _Py_UOpsSymType *attr; + _Py_UOpsSymType *self = NULL; + attr = sym_new_unknown(ctx); + if (attr == NULL) goto out_of_space; + self = sym_new_unknown(ctx); + if (self == NULL) goto out_of_space; + stack_pointer[-1] = attr; + if (1) stack_pointer[0] = self; + stack_pointer += ((1) ? 1 : 0); + break; + } + + case _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES: { + _Py_UOpsSymType *attr; + attr = sym_new_unknown(ctx); + if (attr == NULL) goto out_of_space; + stack_pointer[-1] = attr; + stack_pointer += ((0) ? 1 : 0); + break; + } + + case _LOAD_ATTR_NONDESCRIPTOR_NO_DICT: { + _Py_UOpsSymType *attr; + attr = sym_new_unknown(ctx); + if (attr == NULL) goto out_of_space; + stack_pointer[-1] = attr; + stack_pointer += ((0) ? 1 : 0); + break; + } + + case _CHECK_ATTR_METHOD_LAZY_DICT: { + break; + } + + case _LOAD_ATTR_METHOD_LAZY_DICT: { + _Py_UOpsSymType *attr; + _Py_UOpsSymType *self = NULL; + attr = sym_new_unknown(ctx); + if (attr == NULL) goto out_of_space; + self = sym_new_unknown(ctx); + if (self == NULL) goto out_of_space; + stack_pointer[-1] = attr; + if (1) stack_pointer[0] = self; + stack_pointer += ((1) ? 1 : 0); + break; + } + + /* _INSTRUMENTED_CALL is not a viable micro-op for tier 2 */ + + /* _CALL is not a viable micro-op for tier 2 */ + + case _CHECK_CALL_BOUND_METHOD_EXACT_ARGS: { + _Py_UOpsSymType *null; + _Py_UOpsSymType *callable; + null = stack_pointer[-1 - oparg]; + callable = stack_pointer[-2 - oparg]; + sym_set_null(null); + sym_set_type(callable, &PyMethod_Type); + break; + } + + case _INIT_CALL_BOUND_METHOD_EXACT_ARGS: { + _Py_UOpsSymType *func; + _Py_UOpsSymType *self; + func = sym_new_unknown(ctx); + if (func == NULL) goto out_of_space; + self = sym_new_unknown(ctx); + if (self == NULL) goto out_of_space; + stack_pointer[-2 - oparg] = func; + stack_pointer[-1 - oparg] = self; + break; + } + + case _CHECK_PEP_523: { + break; + } + + case _CHECK_FUNCTION_EXACT_ARGS: { + _Py_UOpsSymType *self_or_null; + _Py_UOpsSymType *callable; + self_or_null = stack_pointer[-1 - oparg]; + callable = stack_pointer[-2 - oparg]; + uint32_t func_version = (uint32_t)this_instr->operand; + sym_set_type(callable, &PyFunction_Type); + (void)self_or_null; + (void)func_version; + break; + } + + case _CHECK_STACK_SPACE: { + break; + } + + case _INIT_CALL_PY_EXACT_ARGS: { + _Py_UOpsSymType **args; + _Py_UOpsSymType *self_or_null; + _Py_UOpsSymType *callable; + _Py_UOpsAbstractFrame *new_frame; + args = &stack_pointer[-oparg]; + self_or_null = stack_pointer[-1 - oparg]; + callable = stack_pointer[-2 - oparg]; + int argcount = oparg; + (void)callable; + PyFunctionObject *func = (PyFunctionObject *)(this_instr + 2)->operand; + if (func == NULL) { + goto error; + } + PyCodeObject *co = (PyCodeObject *)func->func_code; + assert(self_or_null != NULL); + assert(args != NULL); + if (sym_is_not_null(self_or_null)) { + // Bound method fiddling, same as _INIT_CALL_PY_EXACT_ARGS in VM + args--; + argcount++; + } + _Py_UOpsSymType **localsplus_start = ctx->n_consumed; + int n_locals_already_filled = 0; + // Can determine statically, so we interleave the new locals + // and make the current stack the new locals. + // This also sets up for true call inlining. + if (sym_is_known(self_or_null)) { + localsplus_start = args; + n_locals_already_filled = argcount; + } + new_frame = ctx_frame_new(ctx, co, localsplus_start, n_locals_already_filled, 0); + if (new_frame == NULL){ + goto out_of_space; + } + stack_pointer[-2 - oparg] = (_Py_UOpsSymType *)new_frame; + stack_pointer += -1 - oparg; + break; + } + + case _PUSH_FRAME: { + _Py_UOpsAbstractFrame *new_frame; + new_frame = (_Py_UOpsAbstractFrame *)stack_pointer[-1]; + stack_pointer += -1; + ctx->frame->stack_pointer = stack_pointer; + ctx->frame = new_frame; + ctx->curr_frame_depth++; + stack_pointer = new_frame->stack_pointer; + stack_pointer += ((0) ? 1 : 0); + break; + } + + /* _CALL_PY_WITH_DEFAULTS is not a viable micro-op for tier 2 */ + + case _CALL_TYPE_1: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-2 - oparg] = res; + stack_pointer += -1 - oparg; + break; + } + + case _CALL_STR_1: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-2 - oparg] = res; + stack_pointer += -1 - oparg; + break; + } + + case _CALL_TUPLE_1: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-2 - oparg] = res; + stack_pointer += -1 - oparg; + break; + } + + /* _CALL_ALLOC_AND_ENTER_INIT is not a viable micro-op for tier 2 */ + + case _EXIT_INIT_CHECK: { + stack_pointer += -1; + break; + } + + case _CALL_BUILTIN_CLASS: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-2 - oparg] = res; + stack_pointer += -1 - oparg; + break; + } + + case _CALL_BUILTIN_O: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-2 - oparg] = res; + stack_pointer += -1 - oparg; + break; + } + + case _CALL_BUILTIN_FAST: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-2 - oparg] = res; + stack_pointer += -1 - oparg; + break; + } + + case _CALL_BUILTIN_FAST_WITH_KEYWORDS: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-2 - oparg] = res; + stack_pointer += -1 - oparg; + break; + } + + case _CALL_LEN: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-2 - oparg] = res; + stack_pointer += -1 - oparg; + break; + } + + case _CALL_ISINSTANCE: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-2 - oparg] = res; + stack_pointer += -1 - oparg; + break; + } + + case _CALL_METHOD_DESCRIPTOR_O: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-2 - oparg] = res; + stack_pointer += -1 - oparg; + break; + } + + case _CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-2 - oparg] = res; + stack_pointer += -1 - oparg; + break; + } + + case _CALL_METHOD_DESCRIPTOR_NOARGS: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-2 - oparg] = res; + stack_pointer += -1 - oparg; + break; + } + + case _CALL_METHOD_DESCRIPTOR_FAST: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-2 - oparg] = res; + stack_pointer += -1 - oparg; + break; + } + + /* _INSTRUMENTED_CALL_KW is not a viable micro-op for tier 2 */ + + /* _CALL_KW is not a viable micro-op for tier 2 */ + + /* _INSTRUMENTED_CALL_FUNCTION_EX is not a viable micro-op for tier 2 */ + + /* _CALL_FUNCTION_EX is not a viable micro-op for tier 2 */ + + case _MAKE_FUNCTION: { + _Py_UOpsSymType *func; + func = sym_new_unknown(ctx); + if (func == NULL) goto out_of_space; + stack_pointer[-1] = func; + break; + } + + case _SET_FUNCTION_ATTRIBUTE: { + _Py_UOpsSymType *func; + func = sym_new_unknown(ctx); + if (func == NULL) goto out_of_space; + stack_pointer[-2] = func; + stack_pointer += -1; + break; + } + + case _BUILD_SLICE: { + _Py_UOpsSymType *slice; + slice = sym_new_unknown(ctx); + if (slice == NULL) goto out_of_space; + stack_pointer[-2 - ((oparg == 3) ? 1 : 0)] = slice; + stack_pointer += -1 - ((oparg == 3) ? 1 : 0); + break; + } + + case _CONVERT_VALUE: { + _Py_UOpsSymType *result; + result = sym_new_unknown(ctx); + if (result == NULL) goto out_of_space; + stack_pointer[-1] = result; + break; + } + + case _FORMAT_SIMPLE: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-1] = res; + break; + } + + case _FORMAT_WITH_SPEC: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-2] = res; + stack_pointer += -1; + break; + } + + case _COPY: { + _Py_UOpsSymType *bottom; + _Py_UOpsSymType *top; + bottom = stack_pointer[-1 - (oparg-1)]; + assert(oparg > 0); + top = bottom; + stack_pointer[0] = top; + stack_pointer += 1; + break; + } + + case _BINARY_OP: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-2] = res; + stack_pointer += -1; + break; + } + + case _SWAP: { + _Py_UOpsSymType *top; + _Py_UOpsSymType *bottom; + top = stack_pointer[-1]; + bottom = stack_pointer[-2 - (oparg-2)]; + stack_pointer[-2 - (oparg-2)] = top; + stack_pointer[-1] = bottom; + break; + } + + /* _INSTRUMENTED_INSTRUCTION is not a viable micro-op for tier 2 */ + + /* _INSTRUMENTED_JUMP_FORWARD is not a viable micro-op for tier 2 */ + + /* _INSTRUMENTED_JUMP_BACKWARD is not a viable micro-op for tier 2 */ + + /* _INSTRUMENTED_POP_JUMP_IF_TRUE is not a viable micro-op for tier 2 */ + + /* _INSTRUMENTED_POP_JUMP_IF_FALSE is not a viable micro-op for tier 2 */ + + /* _INSTRUMENTED_POP_JUMP_IF_NONE is not a viable micro-op for tier 2 */ + + /* _INSTRUMENTED_POP_JUMP_IF_NOT_NONE is not a viable micro-op for tier 2 */ + + case _GUARD_IS_TRUE_POP: { + stack_pointer += -1; + break; + } + + case _GUARD_IS_FALSE_POP: { + stack_pointer += -1; + break; + } + + case _GUARD_IS_NONE_POP: { + stack_pointer += -1; + break; + } + + case _GUARD_IS_NOT_NONE_POP: { + stack_pointer += -1; + break; + } + + case _JUMP_TO_TOP: { + break; + } + + case _SET_IP: { + break; + } + + case _SAVE_RETURN_OFFSET: { + break; + } + + case _EXIT_TRACE: { + break; + } + + case _CHECK_VALIDITY: { + break; + } + + case _LOAD_CONST_INLINE: { + _Py_UOpsSymType *value; + PyObject *ptr = (PyObject *)this_instr->operand; + value = sym_new_const(ctx, ptr); + if (value == NULL) { + goto out_of_space; + } + stack_pointer[0] = value; + stack_pointer += 1; + break; + } + + case _LOAD_CONST_INLINE_BORROW: { + _Py_UOpsSymType *value; + PyObject *ptr = (PyObject *)this_instr->operand; + value = sym_new_const(ctx, ptr); + if (value == NULL) { + goto out_of_space; + } + stack_pointer[0] = value; + stack_pointer += 1; + break; + } + + case _LOAD_CONST_INLINE_WITH_NULL: { + _Py_UOpsSymType *value; + _Py_UOpsSymType *null; + PyObject *ptr = (PyObject *)this_instr->operand; + value = sym_new_const(ctx, ptr); + if (value == NULL) { + goto out_of_space; + } + null = sym_new_null(ctx); + if (null == NULL) { + goto out_of_space; + } + stack_pointer[0] = value; + stack_pointer[1] = null; + stack_pointer += 2; + break; + } + + case _LOAD_CONST_INLINE_BORROW_WITH_NULL: { + _Py_UOpsSymType *value; + _Py_UOpsSymType *null; + PyObject *ptr = (PyObject *)this_instr->operand; + value = sym_new_const(ctx, ptr); + if (value == NULL) { + goto out_of_space; + } + null = sym_new_null(ctx); + if (null == NULL) { + goto out_of_space; + } + stack_pointer[0] = value; + stack_pointer[1] = null; + stack_pointer += 2; + break; + } + + case _CHECK_GLOBALS: { + break; + } + + case _CHECK_BUILTINS: { + break; + } + + case _INTERNAL_INCREMENT_OPT_COUNTER: { + stack_pointer += -1; + break; + } + diff --git a/Tools/c-analyzer/cpython/_parser.py b/Tools/c-analyzer/cpython/_parser.py index 444063d2148934..be89a26058e8e8 100644 --- a/Tools/c-analyzer/cpython/_parser.py +++ b/Tools/c-analyzer/cpython/_parser.py @@ -83,9 +83,11 @@ def clean_lines(text): Python/frozen_modules/*.h Python/generated_cases.c.h Python/executor_cases.c.h +Python/tier2_redundancy_eliminator_cases.c.h # not actually source Python/bytecodes.c +Python/tier2_redundancy_eliminator_bytecodes.c # mimalloc Objects/mimalloc/*.c diff --git a/Tools/c-analyzer/cpython/ignored.tsv b/Tools/c-analyzer/cpython/ignored.tsv index c75aff8c1723c1..14bcd85b9eae59 100644 --- a/Tools/c-analyzer/cpython/ignored.tsv +++ b/Tools/c-analyzer/cpython/ignored.tsv @@ -734,6 +734,6 @@ Modules/expat/xmlrole.c - error - ## other Modules/_io/_iomodule.c - _PyIO_Module - Modules/_sqlite/module.c - _sqlite3module - -Python/optimizer_analysis.c - _Py_PartitionRootNode_Type - +Python/optimizer_analysis.c - _Py_UOpsAbstractFrame_Type - Python/optimizer_analysis.c - _Py_UOpsAbstractInterpContext_Type - Modules/clinic/md5module.c.h _md5_md5 _keywords - diff --git a/Tools/cases_generator/README.md b/Tools/cases_generator/README.md index 7fec8a882336cd..d35a868b42ea9e 100644 --- a/Tools/cases_generator/README.md +++ b/Tools/cases_generator/README.md @@ -13,6 +13,9 @@ What's currently here: - `parser.py` helper for interactions with `parsing.py` - `tierN_generator.py`: a couple of driver scripts to read `Python/bytecodes.c` and write `Python/generated_cases.c.h` (and several other files) +- `tier2_abstract_generator.py`: reads `Python/bytecodes.c` and + `Python/tier2_redundancy_eliminator_bytecodes.c` and writes + `Python/tier2_redundancy_eliminator_cases.c.h` - `stack.py`: code to handle generalized stack effects - `cwriter.py`: code which understands tokens and how to format C code; main class: `CWriter` diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index b80fa66e2a159a..3497b7fcdf35d3 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -24,7 +24,6 @@ class Properties: pure: bool passthrough: bool - guard: bool def dump(self, indent: str) -> None: print(indent, end="") @@ -51,7 +50,6 @@ def from_list(properties: list["Properties"]) -> "Properties": has_free=any(p.has_free for p in properties), pure=all(p.pure for p in properties), passthrough=all(p.passthrough for p in properties), - guard=all(p.guard for p in properties), ) @@ -73,7 +71,6 @@ def from_list(properties: list["Properties"]) -> "Properties": has_free=False, pure=False, passthrough=False, - guard=False, ) @@ -273,7 +270,7 @@ def override_error( def convert_stack_item(item: parser.StackEffect) -> StackItem: return StackItem( - item.name, item.type, item.cond, (item.size or "1"), type_prop=item.type_prop + item.name, item.type, item.cond, (item.size or "1") ) @@ -473,7 +470,6 @@ def compute_properties(op: parser.InstDef) -> Properties: has_free=has_free, pure="pure" in op.annotations, passthrough=passthrough, - guard=passthrough and deopts, ) diff --git a/Tools/cases_generator/interpreter_definition.md b/Tools/cases_generator/interpreter_definition.md index e87aff43762b11..9b5733562f77b4 100644 --- a/Tools/cases_generator/interpreter_definition.md +++ b/Tools/cases_generator/interpreter_definition.md @@ -109,10 +109,7 @@ and a piece of C code describing its semantics:: NAME [":" type] [ "if" "(" C-expression ")" ] type: - NAME ["*"] | type_prop - - type_prop: - "&" "(" NAME ["+" NAME] ")" + NAME ["*"] stream: NAME "/" size @@ -142,26 +139,7 @@ The following definitions may occur: The optional `type` in an `object` is the C type. It defaults to `PyObject *`. The objects before the "--" are the objects on top of the stack at the start of the instruction. Those after the "--" are the objects on top of the stack at the -end of the instruction. When prefixed by a `&`, the `type` production rule follows the -`type_prop` production rule. This indicates the type of the value is of that specific type -after the operation. In this case, the type may also contain 64-bit refinement information -that is fetched from a previously defined operand in the instruction header, such as -a type version tag. This follows the format `type + refinement`. The list of possible types -and their refinements are below. They obey the following predicates: - - -* `PYLONG_TYPE`: `Py_TYPE(val) == &PyLong_Type` -* `PYFLOAT_TYPE`: `Py_TYPE(val) == &PyFloat_Type` -* `PYUNICODE_TYPE`: `Py_TYPE(val) == &PYUNICODE_TYPE` -* `NULL_TYPE`: `val == NULL` -* `GUARD_TYPE_VERSION_TYPE`: `type->tp_version_tag == auxillary` -* `GUARD_DORV_VALUES_TYPE`: `_PyDictOrValues_IsValues(obj)` -* `GUARD_DORV_VALUES_INST_ATTR_FROM_DICT_TYPE`: - `_PyDictOrValues_IsValues(obj) || _PyObject_MakeInstanceAttributesFromDict(obj, dorv)` -* `GUARD_KEYS_VERSION_TYPE`: `owner_heap_type->ht_cached_keys->dk_version == auxillary` -* `PYMETHOD_TYPE`: `Py_TYPE(val) == &PyMethod_Type` -* `PYFUNCTION_TYPE_VERSION_TYPE`: - `PyFunction_Check(callable) && func->func_version == auxillary && code->co_argcount == oparg + (self_or_null != NULL)` +end of the instruction. An `inst` without `stack_effect` is a transitional form to allow the original C code diff --git a/Tools/cases_generator/parsing.py b/Tools/cases_generator/parsing.py index 307919cb37ce1e..a8961f28babea1 100644 --- a/Tools/cases_generator/parsing.py +++ b/Tools/cases_generator/parsing.py @@ -75,11 +75,6 @@ class StackEffect(Node): size: str = "" # Optional `[size]` # Note: size cannot be combined with type or cond - # Optional `(type, refinement)` - type_prop: None | tuple[str, None | str] = field( - default_factory=lambda: None, init=True, compare=False, hash=False - ) - def __repr__(self) -> str: items = [self.name, self.type, self.cond, self.size] while items and items[-1] == "": @@ -260,25 +255,14 @@ def cache_effect(self) -> CacheEffect | None: @contextual def stack_effect(self) -> StackEffect | None: - # IDENTIFIER [':' [IDENTIFIER [TIMES]] ['&' '(' IDENTIFIER ['+' IDENTIFIER] ')']] ['if' '(' expression ')'] + # IDENTIFIER [':' IDENTIFIER [TIMES]] ['if' '(' expression ')'] # | IDENTIFIER '[' expression ']' if tkn := self.expect(lx.IDENTIFIER): type_text = "" - type_prop = None if self.expect(lx.COLON): - if i := self.expect(lx.IDENTIFIER): - type_text = i.text.strip() - if self.expect(lx.TIMES): - type_text += " *" - if self.expect(lx.AND): - consumed_bracket = self.expect(lx.LPAREN) is not None - type_prop_text = self.require(lx.IDENTIFIER).text.strip() - refinement = None - if self.expect(lx.PLUS): - refinement = self.require(lx.IDENTIFIER).text.strip() - type_prop = (type_prop_text, refinement) - if consumed_bracket: - self.require(lx.RPAREN) + type_text = self.require(lx.IDENTIFIER).text.strip() + if self.expect(lx.TIMES): + type_text += " *" cond_text = "" if self.expect(lx.IF): self.require(lx.LPAREN) @@ -295,7 +279,7 @@ def stack_effect(self) -> StackEffect | None: self.require(lx.RBRACKET) type_text = "PyObject **" size_text = size.text.strip() - return StackEffect(tkn.text, type_text, cond_text, size_text, type_prop) + return StackEffect(tkn.text, type_text, cond_text, size_text) return None @contextual diff --git a/Tools/cases_generator/stack.py b/Tools/cases_generator/stack.py index f62ece43c1be7f..97a301142d59c7 100644 --- a/Tools/cases_generator/stack.py +++ b/Tools/cases_generator/stack.py @@ -168,11 +168,11 @@ def push(self, var: StackItem) -> str: self.top_offset.push(var) return "" - def flush(self, out: CWriter) -> None: + def flush(self, out: CWriter, cast_type: str = "PyObject *") -> None: out.start_line() for var in self.variables: if not var.peek: - cast = "(PyObject *)" if var.type else "" + cast = f"({cast_type})" if var.type else "" if var.name not in UNUSED and not var.is_array(): if var.condition: out.emit(f"if ({var.condition}) ") diff --git a/Tools/cases_generator/tier2_abstract_generator.py b/Tools/cases_generator/tier2_abstract_generator.py new file mode 100644 index 00000000000000..cc29b1660d26ed --- /dev/null +++ b/Tools/cases_generator/tier2_abstract_generator.py @@ -0,0 +1,235 @@ +"""Generate the cases for the tier 2 redundancy eliminator/abstract interpreter. +Reads the instruction definitions from bytecodes.c. and tier2_redundancy_eliminator.bytecodes.c +Writes the cases to tier2_redundancy_eliminator_cases.c.h, which is #included in Python/optimizer_analysis.c. +""" + +import argparse +import os.path +import sys + +from analyzer import ( + Analysis, + Instruction, + Uop, + Part, + analyze_files, + Skip, + StackItem, + analysis_error, +) +from generators_common import ( + DEFAULT_INPUT, + ROOT, + write_header, + emit_tokens, + emit_to, + replace_sync_sp, +) +from cwriter import CWriter +from typing import TextIO, Iterator +from lexer import Token +from stack import StackOffset, Stack, SizeMismatch, UNUSED + +DEFAULT_OUTPUT = ROOT / "Python/tier2_redundancy_eliminator_cases.c.h" +DEFAULT_ABSTRACT_INPUT = ROOT / "Python/tier2_redundancy_eliminator_bytecodes.c" + + +def validate_uop(override: Uop, uop: Uop) -> None: + # To do + pass + + +def type_name(var: StackItem) -> str: + if var.is_array(): + return f"_Py_UOpsSymType **" + if var.type: + return var.type + return f"_Py_UOpsSymType *" + + +def declare_variables(uop: Uop, out: CWriter, skip_inputs: bool) -> None: + variables = {"unused"} + if not skip_inputs: + for var in reversed(uop.stack.inputs): + if var.name not in variables: + variables.add(var.name) + if var.condition: + out.emit(f"{type_name(var)}{var.name} = NULL;\n") + else: + out.emit(f"{type_name(var)}{var.name};\n") + for var in uop.stack.outputs: + if var.peek: + continue + if var.name not in variables: + variables.add(var.name) + if var.condition: + out.emit(f"{type_name(var)}{var.name} = NULL;\n") + else: + out.emit(f"{type_name(var)}{var.name};\n") + + +def decref_inputs( + out: CWriter, + tkn: Token, + tkn_iter: Iterator[Token], + uop: Uop, + stack: Stack, + inst: Instruction | None, +) -> None: + next(tkn_iter) + next(tkn_iter) + next(tkn_iter) + out.emit_at("", tkn) + + +def emit_default(out: CWriter, uop: Uop) -> None: + for i, var in enumerate(uop.stack.outputs): + if var.name != "unused" and not var.peek: + if var.is_array(): + out.emit(f"for (int _i = {var.size}; --_i >= 0;) {{\n") + out.emit(f"{var.name}[_i] = sym_new_unknown(ctx);\n") + out.emit(f"if ({var.name}[_i] == NULL) goto out_of_space;\n") + out.emit("}\n") + elif var.name == "null": + out.emit(f"{var.name} = sym_new_null(ctx);\n") + out.emit(f"if ({var.name} == NULL) goto out_of_space;\n") + else: + out.emit(f"{var.name} = sym_new_unknown(ctx);\n") + out.emit(f"if ({var.name} == NULL) goto out_of_space;\n") + + +def write_uop( + override: Uop | None, + uop: Uop, + out: CWriter, + stack: Stack, + debug: bool, + skip_inputs: bool, +) -> None: + try: + prototype = override if override else uop + is_override = override is not None + out.start_line() + for var in reversed(prototype.stack.inputs): + res = stack.pop(var) + if not skip_inputs: + out.emit(res) + if not prototype.properties.stores_sp: + for i, var in enumerate(prototype.stack.outputs): + res = stack.push(var) + if not var.peek or is_override: + out.emit(res) + if debug: + args = [] + for var in prototype.stack.inputs: + if not var.peek or is_override: + args.append(var.name) + out.emit(f'DEBUG_PRINTF({", ".join(args)});\n') + if override: + for cache in uop.caches: + if cache.name != "unused": + if cache.size == 4: + type = cast = "PyObject *" + else: + type = f"uint{cache.size*16}_t " + cast = f"uint{cache.size*16}_t" + out.emit(f"{type}{cache.name} = ({cast})this_instr->operand;\n") + if override: + replacement_funcs = { + "DECREF_INPUTS": decref_inputs, + "SYNC_SP": replace_sync_sp, + } + emit_tokens(out, override, stack, None, replacement_funcs) + else: + emit_default(out, uop) + + if prototype.properties.stores_sp: + for i, var in enumerate(prototype.stack.outputs): + if not var.peek or is_override: + out.emit(stack.push(var)) + out.start_line() + stack.flush(out, cast_type="_Py_UOpsSymType *") + except SizeMismatch as ex: + raise analysis_error(ex.args[0], uop.body[0]) + + +SKIPS = ("_EXTENDED_ARG",) + + +def generate_abstract_interpreter( + filenames: list[str], + abstract: Analysis, + base: Analysis, + outfile: TextIO, + debug: bool, +) -> None: + write_header(__file__, filenames, outfile) + out = CWriter(outfile, 2, False) + out.emit("\n") + base_uop_names = set([uop.name for uop in base.uops.values()]) + for abstract_uop_name in abstract.uops: + assert abstract_uop_name in base_uop_names,\ + f"All abstract uops should override base uops, but {abstract_uop_name} is not." + + for uop in base.uops.values(): + override: Uop | None = None + if uop.name in abstract.uops: + override = abstract.uops[uop.name] + validate_uop(override, uop) + if uop.properties.tier_one_only: + continue + if uop.is_super(): + continue + if not uop.is_viable(): + out.emit(f"/* {uop.name} is not a viable micro-op for tier 2 */\n\n") + continue + out.emit(f"case {uop.name}: {{\n") + if override: + declare_variables(override, out, skip_inputs=False) + else: + declare_variables(uop, out, skip_inputs=True) + stack = Stack() + write_uop(override, uop, out, stack, debug, skip_inputs=(override is None)) + out.start_line() + out.emit("break;\n") + out.emit("}") + out.emit("\n\n") + + +def generate_tier2_abstract_from_files( + filenames: list[str], outfilename: str, debug: bool=False +) -> None: + assert len(filenames) == 2, "Need a base file and an abstract cases file." + base = analyze_files([filenames[0]]) + abstract = analyze_files([filenames[1]]) + with open(outfilename, "w") as outfile: + generate_abstract_interpreter(filenames, abstract, base, outfile, debug) + + +arg_parser = argparse.ArgumentParser( + description="Generate the code for the tier 2 interpreter.", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, +) + +arg_parser.add_argument( + "-o", "--output", type=str, help="Generated code", default=DEFAULT_OUTPUT +) + + +arg_parser.add_argument("input", nargs=1, help="Abstract interpreter definition file") + +arg_parser.add_argument( + "base", nargs=argparse.REMAINDER, help="The base instruction definition file(s)" +) + +arg_parser.add_argument("-d", "--debug", help="Insert debug calls", action="store_true") + +if __name__ == "__main__": + args = arg_parser.parse_args() + if len(args.base) == 0: + args.input.append(DEFAULT_INPUT) + args.input.append(DEFAULT_ABSTRACT_INPUT) + abstract = analyze_files(args.input) + base = analyze_files(args.base) + with open(args.output, "w") as outfile: + generate_abstract_interpreter(args.input, abstract, base, outfile, args.debug)