Skip to content

Commit

Permalink
pythongh-114058: Foundations of the Tier2 redundancy eliminator (pyth…
Browse files Browse the repository at this point in the history
…onGH-115085)

---------

Co-authored-by: Mark Shannon <9448417+markshannon@users.noreply.github.com>
Co-authored-by: Jules <57632293+JuliaPoo@users.noreply.github.com>
Co-authored-by: Guido van Rossum <gvanrossum@users.noreply.github.com>
  • Loading branch information
4 people authored Feb 13, 2024
1 parent ccc76c3 commit 7cce857
Show file tree
Hide file tree
Showing 25 changed files with 3,137 additions and 140 deletions.
1 change: 1 addition & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ Programs/test_frozenmain.h generated
Python/Python-ast.c generated
Python/executor_cases.c.h generated
Python/generated_cases.c.h generated
Python/tier2_redundancy_eliminator_bytecodes.c.h generated
Python/opcode_targets.h generated
Python/stdlib_module_names.h generated
Tools/peg_generator/pegen/grammar_parser.py generated
Expand Down
3 changes: 3 additions & 0 deletions Include/cpython/pystats.h
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,9 @@ typedef struct _optimization_stats {
uint64_t trace_length_hist[_Py_UOP_HIST_SIZE];
uint64_t trace_run_length_hist[_Py_UOP_HIST_SIZE];
uint64_t optimized_trace_length_hist[_Py_UOP_HIST_SIZE];
uint64_t optimizer_attempts;
uint64_t optimizer_successes;
uint64_t optimizer_failure_reason_no_memory;
} OptimizationStats;

typedef struct _rare_event_stats {
Expand Down
10 changes: 5 additions & 5 deletions Include/internal/pycore_opcode_metadata.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions Include/internal/pycore_optimizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,13 @@ extern "C" {
# error "this header requires Py_BUILD_CORE define"
#endif

#include "pycore_uop_ids.h"

// This is the length of the trace we project initially.
#define UOP_MAX_TRACE_LENGTH 512

#define TRACE_STACK_SIZE 5

int _Py_uop_analyze_and_optimize(_PyInterpreterFrame *frame,
_PyUOpInstruction *trace, int trace_len, int curr_stackentries,
_PyBloomFilter *dependencies);
Expand Down
10 changes: 5 additions & 5 deletions Include/internal/pycore_uop_metadata.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ extern const char * const _PyOpcode_uop_name[MAX_UOP_ID+1];

#ifdef NEED_OPCODE_METADATA
const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = {
[_NOP] = 0,
[_NOP] = HAS_PURE_FLAG,
[_RESUME_CHECK] = HAS_DEOPT_FLAG,
[_LOAD_FAST_CHECK] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ERROR_FLAG,
[_LOAD_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_PURE_FLAG,
Expand Down Expand Up @@ -202,10 +202,10 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = {
[_SAVE_RETURN_OFFSET] = HAS_ARG_FLAG,
[_EXIT_TRACE] = HAS_DEOPT_FLAG,
[_CHECK_VALIDITY] = HAS_DEOPT_FLAG,
[_LOAD_CONST_INLINE] = 0,
[_LOAD_CONST_INLINE_BORROW] = 0,
[_LOAD_CONST_INLINE_WITH_NULL] = 0,
[_LOAD_CONST_INLINE_BORROW_WITH_NULL] = 0,
[_LOAD_CONST_INLINE] = HAS_PURE_FLAG,
[_LOAD_CONST_INLINE_BORROW] = HAS_PURE_FLAG,
[_LOAD_CONST_INLINE_WITH_NULL] = HAS_PURE_FLAG,
[_LOAD_CONST_INLINE_BORROW_WITH_NULL] = HAS_PURE_FLAG,
[_CHECK_GLOBALS] = HAS_DEOPT_FLAG,
[_CHECK_BUILTINS] = HAS_DEOPT_FLAG,
[_INTERNAL_INCREMENT_OPT_COUNTER] = 0,
Expand Down
209 changes: 209 additions & 0 deletions Lib/test/test_capi/test_opt.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import sys
import textwrap
import unittest
import gc

import _testinternalcapi

Expand Down Expand Up @@ -556,6 +557,214 @@ def testfunc(n):
# too much already.
self.assertEqual(count, 1)

class TestUopsOptimization(unittest.TestCase):

def test_int_type_propagation(self):
def testfunc(loops):
num = 0
while num < loops:
x = num + num
a = x + 1
num += 1
return a

opt = _testinternalcapi.get_uop_optimizer()
res = None
with temporary_optimizer(opt):
res = testfunc(32)

ex = get_first_executor(testfunc)
self.assertIsNotNone(ex)
self.assertEqual(res, 63)
binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"]
guard_both_int_count = [opname for opname, _, _ in ex if opname == "_GUARD_BOTH_INT"]
self.assertGreaterEqual(len(binop_count), 3)
self.assertLessEqual(len(guard_both_int_count), 1)

def test_int_type_propagation_through_frame(self):
def double(x):
return x + x
def testfunc(loops):
num = 0
while num < loops:
x = num + num
a = double(x)
num += 1
return a

opt = _testinternalcapi.get_uop_optimizer()
res = None
with temporary_optimizer(opt):
res = testfunc(32)

ex = get_first_executor(testfunc)
self.assertIsNotNone(ex)
self.assertEqual(res, 124)
binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"]
guard_both_int_count = [opname for opname, _, _ in ex if opname == "_GUARD_BOTH_INT"]
self.assertGreaterEqual(len(binop_count), 3)
self.assertLessEqual(len(guard_both_int_count), 1)

def test_int_type_propagation_from_frame(self):
def double(x):
return x + x
def testfunc(loops):
num = 0
while num < loops:
a = double(num)
x = a + a
num += 1
return x

opt = _testinternalcapi.get_uop_optimizer()
res = None
with temporary_optimizer(opt):
res = testfunc(32)

ex = get_first_executor(testfunc)
self.assertIsNotNone(ex)
self.assertEqual(res, 124)
binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"]
guard_both_int_count = [opname for opname, _, _ in ex if opname == "_GUARD_BOTH_INT"]
self.assertGreaterEqual(len(binop_count), 3)
self.assertLessEqual(len(guard_both_int_count), 1)

def test_int_impure_region(self):
def testfunc(loops):
num = 0
while num < loops:
x = num + num
y = 1
x // 2
a = x + y
num += 1
return a

opt = _testinternalcapi.get_uop_optimizer()
res = None
with temporary_optimizer(opt):
res = testfunc(64)

ex = get_first_executor(testfunc)
self.assertIsNotNone(ex)
binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"]
self.assertGreaterEqual(len(binop_count), 3)

def test_call_py_exact_args(self):
def testfunc(n):
def dummy(x):
return x+1
for i in range(n):
dummy(i)

opt = _testinternalcapi.get_uop_optimizer()
with temporary_optimizer(opt):
testfunc(20)

ex = get_first_executor(testfunc)
self.assertIsNotNone(ex)
uops = {opname for opname, _, _ in ex}
self.assertIn("_PUSH_FRAME", uops)
self.assertIn("_BINARY_OP_ADD_INT", uops)
self.assertNotIn("_CHECK_PEP_523", uops)

def test_int_type_propagate_through_range(self):
def testfunc(n):

for i in range(n):
x = i + i
return x

opt = _testinternalcapi.get_uop_optimizer()
with temporary_optimizer(opt):
res = testfunc(20)

ex = get_first_executor(testfunc)
self.assertEqual(res, 19 * 2)
self.assertIsNotNone(ex)
uops = {opname for opname, _, _ in ex}
self.assertNotIn("_GUARD_BOTH_INT", uops)

def test_int_value_numbering(self):
def testfunc(n):

y = 1
for i in range(n):
x = y
z = x
a = z
b = a
res = x + z + a + b
return res

opt = _testinternalcapi.get_uop_optimizer()
with temporary_optimizer(opt):
res = testfunc(20)

ex = get_first_executor(testfunc)
self.assertEqual(res, 4)
self.assertIsNotNone(ex)
uops = {opname for opname, _, _ in ex}
self.assertIn("_GUARD_BOTH_INT", uops)
guard_count = [opname for opname, _, _ in ex if opname == "_GUARD_BOTH_INT"]
self.assertEqual(len(guard_count), 1)

def test_comprehension(self):
def testfunc(n):
for _ in range(n):
return [i for i in range(n)]

opt = _testinternalcapi.get_uop_optimizer()
with temporary_optimizer(opt):
testfunc(20)

ex = get_first_executor(testfunc)
self.assertIsNotNone(ex)
uops = {opname for opname, _, _ in ex}
self.assertNotIn("_BINARY_OP_ADD_INT", uops)

def test_call_py_exact_args_disappearing(self):
def dummy(x):
return x+1

def testfunc(n):
for i in range(n):
dummy(i)

opt = _testinternalcapi.get_uop_optimizer()
# Trigger specialization
testfunc(8)
with temporary_optimizer(opt):
del dummy
gc.collect()

def dummy(x):
return x + 2
testfunc(10)

ex = get_first_executor(testfunc)
# Honestly as long as it doesn't crash it's fine.
# Whether we get an executor or not is non-deterministic,
# because it's decided by when the function is freed.
# This test is a little implementation specific.

def test_promote_globals_to_constants(self):
def testfunc(n):
for i in range(n):
x = range(i)
return x

opt = _testinternalcapi.get_uop_optimizer()
with temporary_optimizer(opt):
testfunc(20)

ex = get_first_executor(testfunc)
self.assertIsNotNone(ex)
uops = {opname for opname, _, _ in ex}
self.assertNotIn("_LOAD_GLOBAL_BUILTIN", uops)
self.assertIn("_LOAD_CONST_INLINE_BORROW_WITH_NULL", uops)



if __name__ == "__main__":
unittest.main()
Loading

0 comments on commit 7cce857

Please sign in to comment.