From 0eac77b7d571b63f223f5b8453b7a1c7ee6da342 Mon Sep 17 00:00:00 2001 From: Savannah Ostrowski Date: Thu, 25 Jul 2024 20:48:14 -0700 Subject: [PATCH 01/61] resolve conflict --- Include/cpython/optimizer.h | 137 ++++++++++++++++++++++++++++++++++++ Python/jit.c | 5 ++ Python/optimizer.c | 9 +++ 3 files changed, 151 insertions(+) create mode 100644 Include/cpython/optimizer.h diff --git a/Include/cpython/optimizer.h b/Include/cpython/optimizer.h new file mode 100644 index 00000000000000..5367be1c631240 --- /dev/null +++ b/Include/cpython/optimizer.h @@ -0,0 +1,137 @@ + +#ifndef Py_LIMITED_API +#ifndef Py_OPTIMIZER_H +#define Py_OPTIMIZER_H +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct _PyExecutorLinkListNode { + struct _PyExecutorObject *next; + struct _PyExecutorObject *previous; +} _PyExecutorLinkListNode; + + +/* Bloom filter with m = 256 + * https://en.wikipedia.org/wiki/Bloom_filter */ +#define _Py_BLOOM_FILTER_WORDS 8 + +typedef struct { + uint32_t bits[_Py_BLOOM_FILTER_WORDS]; +} _PyBloomFilter; + +typedef struct { + uint8_t opcode; + uint8_t oparg; + uint8_t valid; + uint8_t linked; + int index; // Index of ENTER_EXECUTOR (if code isn't NULL, below). + _PyBloomFilter bloom; + _PyExecutorLinkListNode links; + PyCodeObject *code; // Weak (NULL if no corresponding ENTER_EXECUTOR). +} _PyVMData; + +/* Depending on the format, + * the 32 bits between the oparg and operand are: + * UOP_FORMAT_TARGET: + * uint32_t target; + * UOP_FORMAT_EXIT + * uint16_t exit_index; + * uint16_t error_target; + * UOP_FORMAT_JUMP + * uint16_t jump_target; + * uint16_t error_target; + */ +typedef struct { + uint16_t opcode:14; + uint16_t format:2; + uint16_t oparg; + union { + uint32_t target; + struct { + union { + uint16_t exit_index; + uint16_t jump_target; + }; + uint16_t error_target; + }; + }; + uint64_t operand; // A cache entry +} _PyUOpInstruction; + +typedef struct { + uint32_t target; + _Py_BackoffCounter temperature; + const struct _PyExecutorObject *executor; +} _PyExitData; + +typedef struct _PyExecutorObject { + PyObject_VAR_HEAD + const _PyUOpInstruction *trace; + _PyVMData vm_data; /* Used by the VM, but opaque to the optimizer */ + uint32_t exit_count; + uint32_t code_size; + size_t jit_size; + void *jit_code; + void *jit_side_entry; + _PyExitData exits[1]; +} _PyExecutorObject; + +typedef struct _PyOptimizerObject _PyOptimizerObject; + +/* Should return > 0 if a new executor is created. O if no executor is produced and < 0 if an error occurred. */ +typedef int (*_Py_optimize_func)( + _PyOptimizerObject* self, struct _PyInterpreterFrame *frame, + _Py_CODEUNIT *instr, _PyExecutorObject **exec_ptr, + int curr_stackentries); + +struct _PyOptimizerObject { + PyObject_HEAD + _Py_optimize_func optimize; + /* Data needed by the optimizer goes here, but is opaque to the VM */ +}; + +/** Test support **/ +typedef struct { + _PyOptimizerObject base; + int64_t count; +} _PyCounterOptimizerObject; + +PyAPI_FUNC(int) PyUnstable_Replace_Executor(PyCodeObject *code, _Py_CODEUNIT *instr, _PyExecutorObject *executor); + +_PyOptimizerObject *_Py_SetOptimizer(PyInterpreterState *interp, _PyOptimizerObject* optimizer); + +PyAPI_FUNC(int) PyUnstable_SetOptimizer(_PyOptimizerObject* optimizer); + +PyAPI_FUNC(_PyOptimizerObject *) PyUnstable_GetOptimizer(void); + +PyAPI_FUNC(_PyExecutorObject *) PyUnstable_GetExecutor(PyCodeObject *code, int offset); + +void _Py_ExecutorInit(_PyExecutorObject *, const _PyBloomFilter *); +void _Py_ExecutorDetach(_PyExecutorObject *); +void _Py_BloomFilter_Init(_PyBloomFilter *); +void _Py_BloomFilter_Add(_PyBloomFilter *bloom, void *obj); +PyAPI_FUNC(void) _Py_Executor_DependsOn(_PyExecutorObject *executor, void *obj); +/* For testing */ +PyAPI_FUNC(PyObject *)PyUnstable_Optimizer_NewCounter(void); +PyAPI_FUNC(PyObject *)PyUnstable_Optimizer_NewUOpOptimizer(void); + +#define _Py_MAX_ALLOWED_BUILTINS_MODIFICATIONS 3 +#define _Py_MAX_ALLOWED_GLOBALS_MODIFICATIONS 6 + +#ifdef _Py_TIER2 +PyAPI_FUNC(void) _Py_Executors_InvalidateDependency(PyInterpreterState *interp, void *obj, int is_invalidation); +PyAPI_FUNC(void) _Py_Executors_InvalidateAll(PyInterpreterState *interp, int is_invalidation); +PyAPI_FUNC(void) _Py_Executor_Invalidate(_PyExecutorObject *executor); +#else +# define _Py_Executors_InvalidateDependency(A, B, C) ((void)0) +# define _Py_Executors_InvalidateAll(A, B) ((void)0) +# define _Py_Executor_Invalidate(A) ((void)0) +#endif + + +#ifdef __cplusplus +} +#endif +#endif /* !Py_OPTIMIZER_H */ +#endif /* Py_LIMITED_API */ diff --git a/Python/jit.c b/Python/jit.c index 33320761621c4c..8c10c9570e9930 100644 --- a/Python/jit.c +++ b/Python/jit.c @@ -461,6 +461,11 @@ _PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction trace[], siz executor->jit_code = memory; executor->jit_side_entry = memory + trampoline.code_size; executor->jit_size = total_size; + + // static int compile_count = 0; + // if (++compile_count == 100) { + // _Py_Executor_Invalidate(executor); + // } return 0; } diff --git a/Python/optimizer.c b/Python/optimizer.c index 9198e410627dd4..8f7496f9e66c90 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -1659,4 +1659,13 @@ _Py_Executors_InvalidateAll(PyInterpreterState *interp, int is_invalidation) } } +void _Py_Executor_Invalidate(_PyExecutorObject *executor) +{ + if (executor->vm_data.valid) { + printf("Invalidating executor %p\n", executor); + unlink_executor(executor); + executor_clear(executor); + } +} + #endif /* _Py_TIER2 */ From d576296d97451ee80b1d6185c8b709eb1a94d5d0 Mon Sep 17 00:00:00 2001 From: Savannah Ostrowski Date: Mon, 24 Jun 2024 19:24:49 -0700 Subject: [PATCH 02/61] tests pass except ssl --- Python/jit.c | 8 ++++---- Python/optimizer.c | 1 - 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/Python/jit.c b/Python/jit.c index 8c10c9570e9930..939e6cfabc72b2 100644 --- a/Python/jit.c +++ b/Python/jit.c @@ -462,10 +462,10 @@ _PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction trace[], siz executor->jit_side_entry = memory + trampoline.code_size; executor->jit_size = total_size; - // static int compile_count = 0; - // if (++compile_count == 100) { - // _Py_Executor_Invalidate(executor); - // } + static int compile_count = 0; + if (++compile_count == 100) { + _Py_Executor_Invalidate(executor); + } return 0; } diff --git a/Python/optimizer.c b/Python/optimizer.c index 8f7496f9e66c90..9f6ea0f93a8397 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -1662,7 +1662,6 @@ _Py_Executors_InvalidateAll(PyInterpreterState *interp, int is_invalidation) void _Py_Executor_Invalidate(_PyExecutorObject *executor) { if (executor->vm_data.valid) { - printf("Invalidating executor %p\n", executor); unlink_executor(executor); executor_clear(executor); } From 68e95d69754c51c29f63bf45a310eda9b98165f2 Mon Sep 17 00:00:00 2001 From: Savannah Ostrowski Date: Thu, 25 Jul 2024 21:04:24 -0700 Subject: [PATCH 03/61] remove file --- Include/cpython/optimizer.h | 137 ------------------------------------ 1 file changed, 137 deletions(-) delete mode 100644 Include/cpython/optimizer.h diff --git a/Include/cpython/optimizer.h b/Include/cpython/optimizer.h deleted file mode 100644 index 5367be1c631240..00000000000000 --- a/Include/cpython/optimizer.h +++ /dev/null @@ -1,137 +0,0 @@ - -#ifndef Py_LIMITED_API -#ifndef Py_OPTIMIZER_H -#define Py_OPTIMIZER_H -#ifdef __cplusplus -extern "C" { -#endif - -typedef struct _PyExecutorLinkListNode { - struct _PyExecutorObject *next; - struct _PyExecutorObject *previous; -} _PyExecutorLinkListNode; - - -/* Bloom filter with m = 256 - * https://en.wikipedia.org/wiki/Bloom_filter */ -#define _Py_BLOOM_FILTER_WORDS 8 - -typedef struct { - uint32_t bits[_Py_BLOOM_FILTER_WORDS]; -} _PyBloomFilter; - -typedef struct { - uint8_t opcode; - uint8_t oparg; - uint8_t valid; - uint8_t linked; - int index; // Index of ENTER_EXECUTOR (if code isn't NULL, below). - _PyBloomFilter bloom; - _PyExecutorLinkListNode links; - PyCodeObject *code; // Weak (NULL if no corresponding ENTER_EXECUTOR). -} _PyVMData; - -/* Depending on the format, - * the 32 bits between the oparg and operand are: - * UOP_FORMAT_TARGET: - * uint32_t target; - * UOP_FORMAT_EXIT - * uint16_t exit_index; - * uint16_t error_target; - * UOP_FORMAT_JUMP - * uint16_t jump_target; - * uint16_t error_target; - */ -typedef struct { - uint16_t opcode:14; - uint16_t format:2; - uint16_t oparg; - union { - uint32_t target; - struct { - union { - uint16_t exit_index; - uint16_t jump_target; - }; - uint16_t error_target; - }; - }; - uint64_t operand; // A cache entry -} _PyUOpInstruction; - -typedef struct { - uint32_t target; - _Py_BackoffCounter temperature; - const struct _PyExecutorObject *executor; -} _PyExitData; - -typedef struct _PyExecutorObject { - PyObject_VAR_HEAD - const _PyUOpInstruction *trace; - _PyVMData vm_data; /* Used by the VM, but opaque to the optimizer */ - uint32_t exit_count; - uint32_t code_size; - size_t jit_size; - void *jit_code; - void *jit_side_entry; - _PyExitData exits[1]; -} _PyExecutorObject; - -typedef struct _PyOptimizerObject _PyOptimizerObject; - -/* Should return > 0 if a new executor is created. O if no executor is produced and < 0 if an error occurred. */ -typedef int (*_Py_optimize_func)( - _PyOptimizerObject* self, struct _PyInterpreterFrame *frame, - _Py_CODEUNIT *instr, _PyExecutorObject **exec_ptr, - int curr_stackentries); - -struct _PyOptimizerObject { - PyObject_HEAD - _Py_optimize_func optimize; - /* Data needed by the optimizer goes here, but is opaque to the VM */ -}; - -/** Test support **/ -typedef struct { - _PyOptimizerObject base; - int64_t count; -} _PyCounterOptimizerObject; - -PyAPI_FUNC(int) PyUnstable_Replace_Executor(PyCodeObject *code, _Py_CODEUNIT *instr, _PyExecutorObject *executor); - -_PyOptimizerObject *_Py_SetOptimizer(PyInterpreterState *interp, _PyOptimizerObject* optimizer); - -PyAPI_FUNC(int) PyUnstable_SetOptimizer(_PyOptimizerObject* optimizer); - -PyAPI_FUNC(_PyOptimizerObject *) PyUnstable_GetOptimizer(void); - -PyAPI_FUNC(_PyExecutorObject *) PyUnstable_GetExecutor(PyCodeObject *code, int offset); - -void _Py_ExecutorInit(_PyExecutorObject *, const _PyBloomFilter *); -void _Py_ExecutorDetach(_PyExecutorObject *); -void _Py_BloomFilter_Init(_PyBloomFilter *); -void _Py_BloomFilter_Add(_PyBloomFilter *bloom, void *obj); -PyAPI_FUNC(void) _Py_Executor_DependsOn(_PyExecutorObject *executor, void *obj); -/* For testing */ -PyAPI_FUNC(PyObject *)PyUnstable_Optimizer_NewCounter(void); -PyAPI_FUNC(PyObject *)PyUnstable_Optimizer_NewUOpOptimizer(void); - -#define _Py_MAX_ALLOWED_BUILTINS_MODIFICATIONS 3 -#define _Py_MAX_ALLOWED_GLOBALS_MODIFICATIONS 6 - -#ifdef _Py_TIER2 -PyAPI_FUNC(void) _Py_Executors_InvalidateDependency(PyInterpreterState *interp, void *obj, int is_invalidation); -PyAPI_FUNC(void) _Py_Executors_InvalidateAll(PyInterpreterState *interp, int is_invalidation); -PyAPI_FUNC(void) _Py_Executor_Invalidate(_PyExecutorObject *executor); -#else -# define _Py_Executors_InvalidateDependency(A, B, C) ((void)0) -# define _Py_Executors_InvalidateAll(A, B) ((void)0) -# define _Py_Executor_Invalidate(A) ((void)0) -#endif - - -#ifdef __cplusplus -} -#endif -#endif /* !Py_OPTIMIZER_H */ -#endif /* Py_LIMITED_API */ From c903af4ad89e9e40f8438e0b4a671a06325c379c Mon Sep 17 00:00:00 2001 From: Savannah Ostrowski Date: Mon, 12 Aug 2024 14:14:45 -0700 Subject: [PATCH 04/61] this is broken --- Include/internal/pycore_optimizer.h | 3 +++ Python/jit.c | 11 +++++++---- Python/optimizer.c | 1 + 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index 19e54bf122a8bb..ebac174965502c 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -73,6 +73,7 @@ typedef struct _PyExecutorObject { uint32_t exit_count; uint32_t code_size; size_t jit_size; + uint32_t jit_compile_count; void *jit_code; void *jit_side_entry; _PyExitData exits[1]; @@ -123,9 +124,11 @@ PyAPI_FUNC(PyObject *) _PyOptimizer_NewUOpOptimizer(void); #ifdef _Py_TIER2 PyAPI_FUNC(void) _Py_Executors_InvalidateDependency(PyInterpreterState *interp, void *obj, int is_invalidation); PyAPI_FUNC(void) _Py_Executors_InvalidateAll(PyInterpreterState *interp, int is_invalidation); +PyAPI_FUNC(void) _Py_Executor_Invalidate(_PyExecutorObject *executor); #else # define _Py_Executors_InvalidateDependency(A, B, C) ((void)0) # define _Py_Executors_InvalidateAll(A, B) ((void)0) +# define _Py_Executor_Invalidate(A) ((void)0) #endif diff --git a/Python/jit.c b/Python/jit.c index 939e6cfabc72b2..3043a7e978dca7 100644 --- a/Python/jit.c +++ b/Python/jit.c @@ -401,6 +401,7 @@ _PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction trace[], siz uintptr_t instruction_starts[UOP_MAX_TRACE_LENGTH]; size_t code_size = 0; size_t data_size = 0; + int jit_compile_count = executor->jit_compile_count; group = &trampoline; code_size += group->code_size; data_size += group->data_size; @@ -461,11 +462,12 @@ _PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction trace[], siz executor->jit_code = memory; executor->jit_side_entry = memory + trampoline.code_size; executor->jit_size = total_size; + executor->jit_compile_count = jit_compile_count + 1; - static int compile_count = 0; - if (++compile_count == 100) { - _Py_Executor_Invalidate(executor); - } + // int compile_count = 0; + // if (++compile_count == 100) { + // _Py_Executor_Invalidate(executor); + // } return 0; } @@ -478,6 +480,7 @@ _PyJIT_Free(_PyExecutorObject *executor) executor->jit_code = NULL; executor->jit_side_entry = NULL; executor->jit_size = 0; + // executor->jit_compile_count = 0; if (jit_free(memory, size)) { PyErr_WriteUnraisable(NULL); } diff --git a/Python/optimizer.c b/Python/optimizer.c index 9f6ea0f93a8397..c8ec2fb3586265 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -1194,6 +1194,7 @@ make_executor_from_uops(_PyUOpInstruction *buffer, int length, const _PyBloomFil executor->jit_code = NULL; executor->jit_side_entry = NULL; executor->jit_size = 0; + // executor->jit_compile_count=0; if (_PyJIT_Compile(executor, executor->trace, length)) { Py_DECREF(executor); return NULL; From 5ca6b7fd1533ae5239b78d68fedaa27d0e229220 Mon Sep 17 00:00:00 2001 From: Savannah Ostrowski Date: Mon, 12 Aug 2024 14:52:50 -0700 Subject: [PATCH 05/61] gc approach --- Include/internal/pycore_optimizer.h | 6 +++- Python/gc.c | 1 + Python/jit.c | 8 +---- Python/optimizer.c | 52 ++++++++++++++++++++++++++++- 4 files changed, 58 insertions(+), 9 deletions(-) diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index ebac174965502c..a56823fcb770e3 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -73,7 +73,7 @@ typedef struct _PyExecutorObject { uint32_t exit_count; uint32_t code_size; size_t jit_size; - uint32_t jit_compile_count; + bool has_run; void *jit_code; void *jit_side_entry; _PyExitData exits[1]; @@ -125,10 +125,14 @@ PyAPI_FUNC(PyObject *) _PyOptimizer_NewUOpOptimizer(void); PyAPI_FUNC(void) _Py_Executors_InvalidateDependency(PyInterpreterState *interp, void *obj, int is_invalidation); PyAPI_FUNC(void) _Py_Executors_InvalidateAll(PyInterpreterState *interp, int is_invalidation); PyAPI_FUNC(void) _Py_Executor_Invalidate(_PyExecutorObject *executor); +PyAPI_FUNC(void) _Py_Executors_InvalidateOld(PyInterpreterState *interp, int is_invalidation); + #else # define _Py_Executors_InvalidateDependency(A, B, C) ((void)0) # define _Py_Executors_InvalidateAll(A, B) ((void)0) # define _Py_Executor_Invalidate(A) ((void)0) +# define _Py_Executors_InvalidateOld(A, B) ((void)0) + #endif diff --git a/Python/gc.c b/Python/gc.c index 3d36792ffb27fc..2c36196f7d99b6 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1510,6 +1510,7 @@ gc_collect_region(PyThreadState *tstate, int untrack, struct gc_collection_stats *stats) { + _Py_Executors_InvalidateOld(tstate->interp, 0); PyGC_Head unreachable; /* non-problematic unreachable trash */ PyGC_Head finalizers; /* objects with, & reachable from, __del__ */ PyGC_Head *gc; /* initialize to prevent a compiler warning */ diff --git a/Python/jit.c b/Python/jit.c index 3043a7e978dca7..b74afdf349e522 100644 --- a/Python/jit.c +++ b/Python/jit.c @@ -144,6 +144,7 @@ set_bits(uint32_t *loc, uint8_t loc_start, uint64_t value, uint8_t value_start, #define IS_AARCH64_BRANCH(I) (((I) & 0x7C000000) == 0x14000000) #define IS_AARCH64_LDR_OR_STR(I) (((I) & 0x3B000000) == 0x39000000) #define IS_AARCH64_MOV(I) (((I) & 0x9F800000) == 0x92800000) +int total_compile_count=0; // LLD is a great reference for performing relocations... just keep in // mind that Tools/jit/build.py does filtering and preprocessing for us! @@ -401,7 +402,6 @@ _PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction trace[], siz uintptr_t instruction_starts[UOP_MAX_TRACE_LENGTH]; size_t code_size = 0; size_t data_size = 0; - int jit_compile_count = executor->jit_compile_count; group = &trampoline; code_size += group->code_size; data_size += group->data_size; @@ -462,12 +462,7 @@ _PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction trace[], siz executor->jit_code = memory; executor->jit_side_entry = memory + trampoline.code_size; executor->jit_size = total_size; - executor->jit_compile_count = jit_compile_count + 1; - // int compile_count = 0; - // if (++compile_count == 100) { - // _Py_Executor_Invalidate(executor); - // } return 0; } @@ -480,7 +475,6 @@ _PyJIT_Free(_PyExecutorObject *executor) executor->jit_code = NULL; executor->jit_side_entry = NULL; executor->jit_size = 0; - // executor->jit_compile_count = 0; if (jit_free(memory, size)) { PyErr_WriteUnraisable(NULL); } diff --git a/Python/optimizer.c b/Python/optimizer.c index c8ec2fb3586265..371c32704a40d8 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -1194,7 +1194,7 @@ make_executor_from_uops(_PyUOpInstruction *buffer, int length, const _PyBloomFil executor->jit_code = NULL; executor->jit_side_entry = NULL; executor->jit_size = 0; - // executor->jit_compile_count=0; + executor->has_run = true; if (_PyJIT_Compile(executor, executor->trace, length)) { Py_DECREF(executor); return NULL; @@ -1668,4 +1668,54 @@ void _Py_Executor_Invalidate(_PyExecutorObject *executor) } } +void +_Py_Executors_InvalidateOld(PyInterpreterState *interp, int is_invalidation) +{ + /* Walk the list of executors */ + /* TO DO -- Use a tree to avoid traversing as many objects */ + bool no_memory = false; + PyObject *invalidate = PyList_New(0); + if (invalidate == NULL) { + PyErr_Clear(); + no_memory = true; + } + int total_executors = 0; + int invalidated_executors = 0; + /* Clearing an executor can deallocate others, so we need to make a list of + * executors to invalidate first */ + for (_PyExecutorObject *exec = interp->executor_list_head; exec != NULL;) { + assert(exec->vm_data.valid); + _PyExecutorObject *next = exec->vm_data.links.next; + total_executors++; + if (!exec->has_run) { + invalidated_executors++; + unlink_executor(exec); + if (no_memory) { + exec->vm_data.valid = 0; + } else { + if (PyList_Append(invalidate, (PyObject *)exec) < 0) { + PyErr_Clear(); + no_memory = true; + exec->vm_data.valid = 0; + } + } + if (is_invalidation) { + OPT_STAT_INC(executors_invalidated); + } + } else { + exec->has_run = false; + } + exec = next; + } + if (invalidate != NULL) { + for (Py_ssize_t i = 0; i < PyList_GET_SIZE(invalidate); i++) { + _PyExecutorObject *exec = (_PyExecutorObject *)PyList_GET_ITEM(invalidate, i); + executor_clear(exec); + } + Py_DECREF(invalidate); + } + printf("Invalidated %d out of %d executors\n", invalidated_executors, total_executors); + return; +} + #endif /* _Py_TIER2 */ From beb4f65bd5b672fa6a352bf4b894374549157aa5 Mon Sep 17 00:00:00 2001 From: Savannah Ostrowski Date: Mon, 12 Aug 2024 15:20:39 -0700 Subject: [PATCH 06/61] rebase --- Python/bytecodes.c | 2 +- Python/executor_cases.c.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 01e88a34d10b6a..2e063b9b52fa7f 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -4842,8 +4842,8 @@ dummy_func( current_executor = (_PyExecutorObject*)executor; #endif assert(((_PyExecutorObject *)executor)->vm_data.valid); + ((_PyExecutorObject *)executor)->has_run = true; } - tier2 op(_FATAL_ERROR, (--)) { assert(0); Py_FatalError("Fatal error uop executed."); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 0de5c8a0408d8c..8963f4a966b36f 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -5459,6 +5459,7 @@ current_executor = (_PyExecutorObject*)executor; #endif assert(((_PyExecutorObject *)executor)->vm_data.valid); + ((_PyExecutorObject *)executor)->has_run = true; break; } From 427dbf55d8790f807451e28033f50aa6ccfad725 Mon Sep 17 00:00:00 2001 From: Savannah Ostrowski Date: Mon, 12 Aug 2024 19:05:20 -0700 Subject: [PATCH 07/61] Update has_run to run_count --- Include/internal/pycore_optimizer.h | 2 +- Python/bytecodes.c | 3 ++- Python/executor_cases.c.h | 2 +- Python/jit.c | 2 -- Python/optimizer.c | 7 +++---- 5 files changed, 7 insertions(+), 9 deletions(-) diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index a56823fcb770e3..547232e382b608 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -73,7 +73,7 @@ typedef struct _PyExecutorObject { uint32_t exit_count; uint32_t code_size; size_t jit_size; - bool has_run; + uint32_t run_count; void *jit_code; void *jit_side_entry; _PyExitData exits[1]; diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 2e063b9b52fa7f..d0fe7e09e29f03 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -4842,8 +4842,9 @@ dummy_func( current_executor = (_PyExecutorObject*)executor; #endif assert(((_PyExecutorObject *)executor)->vm_data.valid); - ((_PyExecutorObject *)executor)->has_run = true; + ((_PyExecutorObject *)executor)->run_count++; } + tier2 op(_FATAL_ERROR, (--)) { assert(0); Py_FatalError("Fatal error uop executed."); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 8963f4a966b36f..9b49d92d196965 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -5459,7 +5459,7 @@ current_executor = (_PyExecutorObject*)executor; #endif assert(((_PyExecutorObject *)executor)->vm_data.valid); - ((_PyExecutorObject *)executor)->has_run = true; + ((_PyExecutorObject *)executor)->run_count++; break; } diff --git a/Python/jit.c b/Python/jit.c index b74afdf349e522..33320761621c4c 100644 --- a/Python/jit.c +++ b/Python/jit.c @@ -144,7 +144,6 @@ set_bits(uint32_t *loc, uint8_t loc_start, uint64_t value, uint8_t value_start, #define IS_AARCH64_BRANCH(I) (((I) & 0x7C000000) == 0x14000000) #define IS_AARCH64_LDR_OR_STR(I) (((I) & 0x3B000000) == 0x39000000) #define IS_AARCH64_MOV(I) (((I) & 0x9F800000) == 0x92800000) -int total_compile_count=0; // LLD is a great reference for performing relocations... just keep in // mind that Tools/jit/build.py does filtering and preprocessing for us! @@ -462,7 +461,6 @@ _PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction trace[], siz executor->jit_code = memory; executor->jit_side_entry = memory + trampoline.code_size; executor->jit_size = total_size; - return 0; } diff --git a/Python/optimizer.c b/Python/optimizer.c index 371c32704a40d8..345ca47ffc53b6 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -1194,7 +1194,7 @@ make_executor_from_uops(_PyUOpInstruction *buffer, int length, const _PyBloomFil executor->jit_code = NULL; executor->jit_side_entry = NULL; executor->jit_size = 0; - executor->has_run = true; + executor->run_count++; if (_PyJIT_Compile(executor, executor->trace, length)) { Py_DECREF(executor); return NULL; @@ -1687,7 +1687,7 @@ _Py_Executors_InvalidateOld(PyInterpreterState *interp, int is_invalidation) assert(exec->vm_data.valid); _PyExecutorObject *next = exec->vm_data.links.next; total_executors++; - if (!exec->has_run) { + if (exec->run_count < 4) { invalidated_executors++; unlink_executor(exec); if (no_memory) { @@ -1703,7 +1703,7 @@ _Py_Executors_InvalidateOld(PyInterpreterState *interp, int is_invalidation) OPT_STAT_INC(executors_invalidated); } } else { - exec->has_run = false; + exec->run_count = 0; } exec = next; } @@ -1714,7 +1714,6 @@ _Py_Executors_InvalidateOld(PyInterpreterState *interp, int is_invalidation) } Py_DECREF(invalidate); } - printf("Invalidated %d out of %d executors\n", invalidated_executors, total_executors); return; } From 92d5590abf91ab15cfaa102cc32fce536b96b12a Mon Sep 17 00:00:00 2001 From: Savannah Ostrowski Date: Tue, 13 Aug 2024 15:49:30 -0700 Subject: [PATCH 08/61] update initialized run_count and move invalidate old --- Python/gc.c | 3 ++- Python/gc_free_threading.c | 2 ++ Python/optimizer.c | 2 +- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/Python/gc.c b/Python/gc.c index 2c36196f7d99b6..2f83a8cbe28085 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1510,7 +1510,6 @@ gc_collect_region(PyThreadState *tstate, int untrack, struct gc_collection_stats *stats) { - _Py_Executors_InvalidateOld(tstate->interp, 0); PyGC_Head unreachable; /* non-problematic unreachable trash */ PyGC_Head finalizers; /* objects with, & reachable from, __del__ */ PyGC_Head *gc; /* initialize to prevent a compiler warning */ @@ -1863,6 +1862,8 @@ _PyGC_Collect(PyThreadState *tstate, int generation, _PyGC_Reason reason) } if (reason != _Py_GC_REASON_SHUTDOWN) { invoke_gc_callback(gcstate, "stop", generation, &stats); + _Py_Executors_InvalidateOld(tstate->interp, 0); + } _PyErr_SetRaisedException(tstate, exc); GC_STAT_ADD(generation, objects_collected, stats.collected); diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index 54de0c2671ae68..89a1ae13152c0e 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -1362,6 +1362,8 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason) if (reason != _Py_GC_REASON_SHUTDOWN) { invoke_gc_callback(tstate, "stop", generation, m, n); + _Py_Executors_InvalidateOld(tstate->interp, 0); + } assert(!_PyErr_Occurred(tstate)); diff --git a/Python/optimizer.c b/Python/optimizer.c index 345ca47ffc53b6..f506ba3ece5b9a 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -1194,7 +1194,7 @@ make_executor_from_uops(_PyUOpInstruction *buffer, int length, const _PyBloomFil executor->jit_code = NULL; executor->jit_side_entry = NULL; executor->jit_size = 0; - executor->run_count++; + executor->run_count = __UINT32_MAX__; if (_PyJIT_Compile(executor, executor->trace, length)) { Py_DECREF(executor); return NULL; From 0cdf638a5262535c555493076e496b1edda3c4bc Mon Sep 17 00:00:00 2001 From: Savannah Ostrowski Date: Tue, 13 Aug 2024 16:14:09 -0700 Subject: [PATCH 09/61] set threshold to 1` --- Python/optimizer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/optimizer.c b/Python/optimizer.c index f506ba3ece5b9a..6ee8aa85b17b72 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -1687,7 +1687,7 @@ _Py_Executors_InvalidateOld(PyInterpreterState *interp, int is_invalidation) assert(exec->vm_data.valid); _PyExecutorObject *next = exec->vm_data.links.next; total_executors++; - if (exec->run_count < 4) { + if (exec->run_count < 1) { invalidated_executors++; unlink_executor(exec); if (no_memory) { From 58e744713e93613fc8395ec7bb356575e05ecb25 Mon Sep 17 00:00:00 2001 From: Savannah Ostrowski Date: Wed, 14 Aug 2024 15:35:31 -0700 Subject: [PATCH 10/61] move incrementing run count into a new op --- .../java/org/python/testbed/PythonSuite.kt | 35 ---- Include/internal/pycore_uop_ids.h | 177 +++++++++--------- Include/internal/pycore_uop_metadata.h | 4 + Python/bytecodes.c | 5 +- Python/executor_cases.c.h | 6 +- Python/optimizer.c | 1 + Python/optimizer_cases.c.h | 4 + 7 files changed, 107 insertions(+), 125 deletions(-) delete mode 100644 Android/testbed/app/src/androidTest/java/org/python/testbed/PythonSuite.kt diff --git a/Android/testbed/app/src/androidTest/java/org/python/testbed/PythonSuite.kt b/Android/testbed/app/src/androidTest/java/org/python/testbed/PythonSuite.kt deleted file mode 100644 index 0e888ab71d87da..00000000000000 --- a/Android/testbed/app/src/androidTest/java/org/python/testbed/PythonSuite.kt +++ /dev/null @@ -1,35 +0,0 @@ -package org.python.testbed - -import androidx.test.annotation.UiThreadTest -import androidx.test.platform.app.InstrumentationRegistry -import androidx.test.ext.junit.runners.AndroidJUnit4 - -import org.junit.Test -import org.junit.runner.RunWith - -import org.junit.Assert.* - - -@RunWith(AndroidJUnit4::class) -class PythonSuite { - @Test - @UiThreadTest - fun testPython() { - val start = System.currentTimeMillis() - try { - val context = - InstrumentationRegistry.getInstrumentation().targetContext - val args = - InstrumentationRegistry.getArguments().getString("pythonArgs", "") - val status = PythonTestRunner(context).run(args) - assertEquals(0, status) - } finally { - // Make sure the process lives long enough for the test script to - // detect it (see `find_pid` in android.py). - val delay = 2000 - (System.currentTimeMillis() - start) - if (delay > 0) { - Thread.sleep(delay) - } - } - } -} diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index b950f760d74ac7..a62cf527db373d 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -138,13 +138,14 @@ extern "C" { #define _GUARD_TYPE_VERSION 385 #define _IMPORT_FROM IMPORT_FROM #define _IMPORT_NAME IMPORT_NAME -#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 386 -#define _INIT_CALL_PY_EXACT_ARGS 387 -#define _INIT_CALL_PY_EXACT_ARGS_0 388 -#define _INIT_CALL_PY_EXACT_ARGS_1 389 -#define _INIT_CALL_PY_EXACT_ARGS_2 390 -#define _INIT_CALL_PY_EXACT_ARGS_3 391 -#define _INIT_CALL_PY_EXACT_ARGS_4 392 +#define _INCREMENT_RUN_COUNT 386 +#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 387 +#define _INIT_CALL_PY_EXACT_ARGS 388 +#define _INIT_CALL_PY_EXACT_ARGS_0 389 +#define _INIT_CALL_PY_EXACT_ARGS_1 390 +#define _INIT_CALL_PY_EXACT_ARGS_2 391 +#define _INIT_CALL_PY_EXACT_ARGS_3 392 +#define _INIT_CALL_PY_EXACT_ARGS_4 393 #define _INSTRUMENTED_CALL_FUNCTION_EX INSTRUMENTED_CALL_FUNCTION_EX #define _INSTRUMENTED_CALL_KW INSTRUMENTED_CALL_KW #define _INSTRUMENTED_FOR_ITER INSTRUMENTED_FOR_ITER @@ -156,65 +157,65 @@ extern "C" { #define _INSTRUMENTED_POP_JUMP_IF_NONE INSTRUMENTED_POP_JUMP_IF_NONE #define _INSTRUMENTED_POP_JUMP_IF_NOT_NONE INSTRUMENTED_POP_JUMP_IF_NOT_NONE #define _INSTRUMENTED_POP_JUMP_IF_TRUE INSTRUMENTED_POP_JUMP_IF_TRUE -#define _INTERNAL_INCREMENT_OPT_COUNTER 393 -#define _IS_NONE 394 +#define _INTERNAL_INCREMENT_OPT_COUNTER 394 +#define _IS_NONE 395 #define _IS_OP IS_OP -#define _ITER_CHECK_LIST 395 -#define _ITER_CHECK_RANGE 396 -#define _ITER_CHECK_TUPLE 397 -#define _ITER_JUMP_LIST 398 -#define _ITER_JUMP_RANGE 399 -#define _ITER_JUMP_TUPLE 400 -#define _ITER_NEXT_LIST 401 -#define _ITER_NEXT_RANGE 402 -#define _ITER_NEXT_TUPLE 403 -#define _JUMP_TO_TOP 404 +#define _ITER_CHECK_LIST 396 +#define _ITER_CHECK_RANGE 397 +#define _ITER_CHECK_TUPLE 398 +#define _ITER_JUMP_LIST 399 +#define _ITER_JUMP_RANGE 400 +#define _ITER_JUMP_TUPLE 401 +#define _ITER_NEXT_LIST 402 +#define _ITER_NEXT_RANGE 403 +#define _ITER_NEXT_TUPLE 404 +#define _JUMP_TO_TOP 405 #define _LIST_APPEND LIST_APPEND #define _LIST_EXTEND LIST_EXTEND -#define _LOAD_ATTR 405 -#define _LOAD_ATTR_CLASS 406 -#define _LOAD_ATTR_CLASS_0 407 -#define _LOAD_ATTR_CLASS_1 408 +#define _LOAD_ATTR 406 +#define _LOAD_ATTR_CLASS 407 +#define _LOAD_ATTR_CLASS_0 408 +#define _LOAD_ATTR_CLASS_1 409 #define _LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN -#define _LOAD_ATTR_INSTANCE_VALUE 409 -#define _LOAD_ATTR_INSTANCE_VALUE_0 410 -#define _LOAD_ATTR_INSTANCE_VALUE_1 411 -#define _LOAD_ATTR_METHOD_LAZY_DICT 412 -#define _LOAD_ATTR_METHOD_NO_DICT 413 -#define _LOAD_ATTR_METHOD_WITH_VALUES 414 -#define _LOAD_ATTR_MODULE 415 -#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 416 -#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 417 -#define _LOAD_ATTR_PROPERTY_FRAME 418 -#define _LOAD_ATTR_SLOT 419 -#define _LOAD_ATTR_SLOT_0 420 -#define _LOAD_ATTR_SLOT_1 421 -#define _LOAD_ATTR_WITH_HINT 422 +#define _LOAD_ATTR_INSTANCE_VALUE 410 +#define _LOAD_ATTR_INSTANCE_VALUE_0 411 +#define _LOAD_ATTR_INSTANCE_VALUE_1 412 +#define _LOAD_ATTR_METHOD_LAZY_DICT 413 +#define _LOAD_ATTR_METHOD_NO_DICT 414 +#define _LOAD_ATTR_METHOD_WITH_VALUES 415 +#define _LOAD_ATTR_MODULE 416 +#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 417 +#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 418 +#define _LOAD_ATTR_PROPERTY_FRAME 419 +#define _LOAD_ATTR_SLOT 420 +#define _LOAD_ATTR_SLOT_0 421 +#define _LOAD_ATTR_SLOT_1 422 +#define _LOAD_ATTR_WITH_HINT 423 #define _LOAD_BUILD_CLASS LOAD_BUILD_CLASS #define _LOAD_COMMON_CONSTANT LOAD_COMMON_CONSTANT #define _LOAD_CONST LOAD_CONST -#define _LOAD_CONST_INLINE 423 -#define _LOAD_CONST_INLINE_BORROW 424 -#define _LOAD_CONST_INLINE_BORROW_WITH_NULL 425 -#define _LOAD_CONST_INLINE_WITH_NULL 426 +#define _LOAD_CONST_INLINE 424 +#define _LOAD_CONST_INLINE_BORROW 425 +#define _LOAD_CONST_INLINE_BORROW_WITH_NULL 426 +#define _LOAD_CONST_INLINE_WITH_NULL 427 #define _LOAD_DEREF LOAD_DEREF -#define _LOAD_FAST 427 -#define _LOAD_FAST_0 428 -#define _LOAD_FAST_1 429 -#define _LOAD_FAST_2 430 -#define _LOAD_FAST_3 431 -#define _LOAD_FAST_4 432 -#define _LOAD_FAST_5 433 -#define _LOAD_FAST_6 434 -#define _LOAD_FAST_7 435 +#define _LOAD_FAST 428 +#define _LOAD_FAST_0 429 +#define _LOAD_FAST_1 430 +#define _LOAD_FAST_2 431 +#define _LOAD_FAST_3 432 +#define _LOAD_FAST_4 433 +#define _LOAD_FAST_5 434 +#define _LOAD_FAST_6 435 +#define _LOAD_FAST_7 436 #define _LOAD_FAST_AND_CLEAR LOAD_FAST_AND_CLEAR #define _LOAD_FAST_CHECK LOAD_FAST_CHECK #define _LOAD_FAST_LOAD_FAST LOAD_FAST_LOAD_FAST #define _LOAD_FROM_DICT_OR_DEREF LOAD_FROM_DICT_OR_DEREF #define _LOAD_FROM_DICT_OR_GLOBALS LOAD_FROM_DICT_OR_GLOBALS -#define _LOAD_GLOBAL 436 -#define _LOAD_GLOBAL_BUILTINS 437 -#define _LOAD_GLOBAL_MODULE 438 +#define _LOAD_GLOBAL 437 +#define _LOAD_GLOBAL_BUILTINS 438 +#define _LOAD_GLOBAL_MODULE 439 #define _LOAD_LOCALS LOAD_LOCALS #define _LOAD_NAME LOAD_NAME #define _LOAD_SPECIAL LOAD_SPECIAL @@ -227,59 +228,59 @@ extern "C" { #define _MATCH_KEYS MATCH_KEYS #define _MATCH_MAPPING MATCH_MAPPING #define _MATCH_SEQUENCE MATCH_SEQUENCE -#define _MAYBE_EXPAND_METHOD 439 -#define _MONITOR_CALL 440 -#define _MONITOR_JUMP_BACKWARD 441 -#define _MONITOR_RESUME 442 +#define _MAYBE_EXPAND_METHOD 440 +#define _MONITOR_CALL 441 +#define _MONITOR_JUMP_BACKWARD 442 +#define _MONITOR_RESUME 443 #define _NOP NOP #define _POP_EXCEPT POP_EXCEPT -#define _POP_JUMP_IF_FALSE 443 -#define _POP_JUMP_IF_TRUE 444 +#define _POP_JUMP_IF_FALSE 444 +#define _POP_JUMP_IF_TRUE 445 #define _POP_TOP POP_TOP -#define _POP_TOP_LOAD_CONST_INLINE_BORROW 445 +#define _POP_TOP_LOAD_CONST_INLINE_BORROW 446 #define _PUSH_EXC_INFO PUSH_EXC_INFO -#define _PUSH_FRAME 446 +#define _PUSH_FRAME 447 #define _PUSH_NULL PUSH_NULL -#define _PY_FRAME_GENERAL 447 -#define _PY_FRAME_KW 448 -#define _QUICKEN_RESUME 449 -#define _REPLACE_WITH_TRUE 450 +#define _PY_FRAME_GENERAL 448 +#define _PY_FRAME_KW 449 +#define _QUICKEN_RESUME 450 +#define _REPLACE_WITH_TRUE 451 #define _RESUME_CHECK RESUME_CHECK #define _RETURN_GENERATOR RETURN_GENERATOR #define _RETURN_VALUE RETURN_VALUE -#define _SAVE_RETURN_OFFSET 451 -#define _SEND 452 -#define _SEND_GEN_FRAME 453 +#define _SAVE_RETURN_OFFSET 452 +#define _SEND 453 +#define _SEND_GEN_FRAME 454 #define _SETUP_ANNOTATIONS SETUP_ANNOTATIONS #define _SET_ADD SET_ADD #define _SET_FUNCTION_ATTRIBUTE SET_FUNCTION_ATTRIBUTE #define _SET_UPDATE SET_UPDATE -#define _START_EXECUTOR 454 -#define _STORE_ATTR 455 -#define _STORE_ATTR_INSTANCE_VALUE 456 -#define _STORE_ATTR_SLOT 457 -#define _STORE_ATTR_WITH_HINT 458 +#define _START_EXECUTOR 455 +#define _STORE_ATTR 456 +#define _STORE_ATTR_INSTANCE_VALUE 457 +#define _STORE_ATTR_SLOT 458 +#define _STORE_ATTR_WITH_HINT 459 #define _STORE_DEREF STORE_DEREF -#define _STORE_FAST 459 -#define _STORE_FAST_0 460 -#define _STORE_FAST_1 461 -#define _STORE_FAST_2 462 -#define _STORE_FAST_3 463 -#define _STORE_FAST_4 464 -#define _STORE_FAST_5 465 -#define _STORE_FAST_6 466 -#define _STORE_FAST_7 467 +#define _STORE_FAST 460 +#define _STORE_FAST_0 461 +#define _STORE_FAST_1 462 +#define _STORE_FAST_2 463 +#define _STORE_FAST_3 464 +#define _STORE_FAST_4 465 +#define _STORE_FAST_5 466 +#define _STORE_FAST_6 467 +#define _STORE_FAST_7 468 #define _STORE_FAST_LOAD_FAST STORE_FAST_LOAD_FAST #define _STORE_FAST_STORE_FAST STORE_FAST_STORE_FAST #define _STORE_GLOBAL STORE_GLOBAL #define _STORE_NAME STORE_NAME -#define _STORE_SLICE 468 -#define _STORE_SUBSCR 469 +#define _STORE_SLICE 469 +#define _STORE_SUBSCR 470 #define _STORE_SUBSCR_DICT STORE_SUBSCR_DICT #define _STORE_SUBSCR_LIST_INT STORE_SUBSCR_LIST_INT #define _SWAP SWAP -#define _TIER2_RESUME_CHECK 470 -#define _TO_BOOL 471 +#define _TIER2_RESUME_CHECK 471 +#define _TO_BOOL 472 #define _TO_BOOL_BOOL TO_BOOL_BOOL #define _TO_BOOL_INT TO_BOOL_INT #define _TO_BOOL_LIST TO_BOOL_LIST @@ -289,14 +290,14 @@ extern "C" { #define _UNARY_NEGATIVE UNARY_NEGATIVE #define _UNARY_NOT UNARY_NOT #define _UNPACK_EX UNPACK_EX -#define _UNPACK_SEQUENCE 472 +#define _UNPACK_SEQUENCE 473 #define _UNPACK_SEQUENCE_LIST UNPACK_SEQUENCE_LIST #define _UNPACK_SEQUENCE_TUPLE UNPACK_SEQUENCE_TUPLE #define _UNPACK_SEQUENCE_TWO_TUPLE UNPACK_SEQUENCE_TWO_TUPLE #define _WITH_EXCEPT_START WITH_EXCEPT_START #define _YIELD_VALUE YIELD_VALUE #define __DO_CALL_FUNCTION_EX _DO_CALL_FUNCTION_EX -#define MAX_UOP_ID 472 +#define MAX_UOP_ID 473 #ifdef __cplusplus } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index e2cba4dc0dfc81..63a0446da7914f 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -274,6 +274,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_INTERNAL_INCREMENT_OPT_COUNTER] = 0, [_DYNAMIC_EXIT] = HAS_ESCAPES_FLAG, [_START_EXECUTOR] = 0, + [_INCREMENT_RUN_COUNT] = 0, [_FATAL_ERROR] = 0, [_CHECK_VALIDITY_AND_SET_IP] = HAS_DEOPT_FLAG, [_DEOPT] = 0, @@ -413,6 +414,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_GUARD_TYPE_VERSION] = "_GUARD_TYPE_VERSION", [_IMPORT_FROM] = "_IMPORT_FROM", [_IMPORT_NAME] = "_IMPORT_NAME", + [_INCREMENT_RUN_COUNT] = "_INCREMENT_RUN_COUNT", [_INIT_CALL_BOUND_METHOD_EXACT_ARGS] = "_INIT_CALL_BOUND_METHOD_EXACT_ARGS", [_INIT_CALL_PY_EXACT_ARGS] = "_INIT_CALL_PY_EXACT_ARGS", [_INIT_CALL_PY_EXACT_ARGS_0] = "_INIT_CALL_PY_EXACT_ARGS_0", @@ -1062,6 +1064,8 @@ int _PyUop_num_popped(int opcode, int oparg) return 0; case _START_EXECUTOR: return 0; + case _INCREMENT_RUN_COUNT: + return 0; case _FATAL_ERROR: return 0; case _CHECK_VALIDITY_AND_SET_IP: diff --git a/Python/bytecodes.c b/Python/bytecodes.c index d0fe7e09e29f03..1a8827a0c5ec8b 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -4842,7 +4842,10 @@ dummy_func( current_executor = (_PyExecutorObject*)executor; #endif assert(((_PyExecutorObject *)executor)->vm_data.valid); - ((_PyExecutorObject *)executor)->run_count++; + } + + tier2 op(_INCREMENT_RUN_COUNT, (--)) { + current_executor->run_count++; } tier2 op(_FATAL_ERROR, (--)) { diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 9b49d92d196965..578272a77f370a 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -5459,7 +5459,11 @@ current_executor = (_PyExecutorObject*)executor; #endif assert(((_PyExecutorObject *)executor)->vm_data.valid); - ((_PyExecutorObject *)executor)->run_count++; + break; + } + + case _INCREMENT_RUN_COUNT: { + current_executor->run_count++; break; } diff --git a/Python/optimizer.c b/Python/optimizer.c index 6ee8aa85b17b72..6b473bb484ab93 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -565,6 +565,7 @@ translate_bytecode_to_trace( code->co_firstlineno, 2 * INSTR_IP(initial_instr, code)); ADD_TO_TRACE(_START_EXECUTOR, 0, (uintptr_t)instr, INSTR_IP(instr, code)); + ADD_TO_TRACE(_INCREMENT_RUN_COUNT, 0, 0, 0); uint32_t target = 0; for (;;) { diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 672fec3946f2fb..92c5e4e4783239 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -2375,6 +2375,10 @@ break; } + case _INCREMENT_RUN_COUNT: { + break; + } + case _FATAL_ERROR: { break; } From 6c047e4a2d7fb423aa094c3aa68001ec09c3096a Mon Sep 17 00:00:00 2001 From: Savannah Ostrowski Date: Tue, 20 Aug 2024 16:17:48 -0700 Subject: [PATCH 11/61] add invalidation threshold in gc of 10 --- Python/gc.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/Python/gc.c b/Python/gc.c index 2f83a8cbe28085..e354f14af70898 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1825,6 +1825,8 @@ Py_ssize_t _PyGC_Collect(PyThreadState *tstate, int generation, _PyGC_Reason reason) { GCState *gcstate = &tstate->interp->gc; + static int invalidation_counter = 0; + const int invalidation_threshold = 10; int expected = 0; if (!_Py_atomic_compare_exchange_int(&gcstate->collecting, &expected, 1)) { @@ -1862,8 +1864,11 @@ _PyGC_Collect(PyThreadState *tstate, int generation, _PyGC_Reason reason) } if (reason != _Py_GC_REASON_SHUTDOWN) { invoke_gc_callback(gcstate, "stop", generation, &stats); - _Py_Executors_InvalidateOld(tstate->interp, 0); + if (++invalidation_counter >= invalidation_threshold) { + invalidation_counter = 0; + _Py_Executors_InvalidateOld(tstate->interp, 0); + } } _PyErr_SetRaisedException(tstate, exc); GC_STAT_ADD(generation, objects_collected, stats.collected); From 26450235373ab316bcd17025b279ba1bd57f5d2a Mon Sep 17 00:00:00 2001 From: Savannah Ostrowski Date: Tue, 20 Aug 2024 16:22:18 -0700 Subject: [PATCH 12/61] move back to incremenet --- Python/gc.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Python/gc.c b/Python/gc.c index e354f14af70898..1386c6143cae85 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1852,6 +1852,11 @@ _PyGC_Collect(PyThreadState *tstate, int generation, _PyGC_Reason reason) break; case 1: gc_collect_increment(tstate, &stats); + if (++invalidation_counter >= invalidation_threshold) { + invalidation_counter = 0; + printf("Invalidating old objects\n"); + _Py_Executors_InvalidateOld(tstate->interp, 0); + } break; case 2: gc_collect_full(tstate, &stats); @@ -1864,11 +1869,6 @@ _PyGC_Collect(PyThreadState *tstate, int generation, _PyGC_Reason reason) } if (reason != _Py_GC_REASON_SHUTDOWN) { invoke_gc_callback(gcstate, "stop", generation, &stats); - - if (++invalidation_counter >= invalidation_threshold) { - invalidation_counter = 0; - _Py_Executors_InvalidateOld(tstate->interp, 0); - } } _PyErr_SetRaisedException(tstate, exc); GC_STAT_ADD(generation, objects_collected, stats.collected); From 7c7ae98f6b99256657fe8701bab178ea106215eb Mon Sep 17 00:00:00 2001 From: Savannah Ostrowski Date: Tue, 20 Aug 2024 16:27:33 -0700 Subject: [PATCH 13/61] remove print --- Python/gc.c | 1 - 1 file changed, 1 deletion(-) diff --git a/Python/gc.c b/Python/gc.c index 1386c6143cae85..2e6c2eb9260b4c 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1854,7 +1854,6 @@ _PyGC_Collect(PyThreadState *tstate, int generation, _PyGC_Reason reason) gc_collect_increment(tstate, &stats); if (++invalidation_counter >= invalidation_threshold) { invalidation_counter = 0; - printf("Invalidating old objects\n"); _Py_Executors_InvalidateOld(tstate->interp, 0); } break; From 6d6d306685e0e4a8484c7a767284dfb5de86216c Mon Sep 17 00:00:00 2001 From: Savannah Ostrowski Date: Tue, 27 Aug 2024 11:03:00 -0700 Subject: [PATCH 14/61] move invalidation to executor creation --- Python/gc.c | 6 ------ Python/gc_free_threading.c | 2 -- Python/optimizer.c | 7 +++++++ 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/Python/gc.c b/Python/gc.c index 2e6c2eb9260b4c..3d36792ffb27fc 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1825,8 +1825,6 @@ Py_ssize_t _PyGC_Collect(PyThreadState *tstate, int generation, _PyGC_Reason reason) { GCState *gcstate = &tstate->interp->gc; - static int invalidation_counter = 0; - const int invalidation_threshold = 10; int expected = 0; if (!_Py_atomic_compare_exchange_int(&gcstate->collecting, &expected, 1)) { @@ -1852,10 +1850,6 @@ _PyGC_Collect(PyThreadState *tstate, int generation, _PyGC_Reason reason) break; case 1: gc_collect_increment(tstate, &stats); - if (++invalidation_counter >= invalidation_threshold) { - invalidation_counter = 0; - _Py_Executors_InvalidateOld(tstate->interp, 0); - } break; case 2: gc_collect_full(tstate, &stats); diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index 89a1ae13152c0e..54de0c2671ae68 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -1362,8 +1362,6 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason) if (reason != _Py_GC_REASON_SHUTDOWN) { invoke_gc_callback(tstate, "stop", generation, m, n); - _Py_Executors_InvalidateOld(tstate->interp, 0); - } assert(!_PyErr_Occurred(tstate)); diff --git a/Python/optimizer.c b/Python/optimizer.c index 6b473bb484ab93..5f89fa8905ad54 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -157,6 +157,7 @@ _Py_SetTier2Optimizer(_PyOptimizerObject *optimizer) return old == NULL ? -1 : 0; } +int executors_created = 0; /* Returns 1 if optimized, 0 if not optimized, and -1 for an error. * If optimized, *executor_ptr contains a new reference to the executor */ @@ -182,6 +183,12 @@ _PyOptimizer_Optimize( if (err <= 0) { return err; } + + if (++executors_created >= 5) { + executors_created = 0; + _Py_Executors_InvalidateOld(interp, 0); + } + assert(*executor_ptr != NULL); if (progress_needed) { int index = get_index_for_executor(code, start); From 4d086feae3c28b3b611b4569bef1ee0899e534f8 Mon Sep 17 00:00:00 2001 From: Savannah Ostrowski Date: Tue, 27 Aug 2024 11:06:19 -0700 Subject: [PATCH 15/61] change threshold --- .../java/org/python/testbed/PythonSuite.kt | 35 +++++++++++++++++++ Python/optimizer.c | 2 +- 2 files changed, 36 insertions(+), 1 deletion(-) create mode 100644 Android/testbed/app/src/androidTest/java/org/python/testbed/PythonSuite.kt diff --git a/Android/testbed/app/src/androidTest/java/org/python/testbed/PythonSuite.kt b/Android/testbed/app/src/androidTest/java/org/python/testbed/PythonSuite.kt new file mode 100644 index 00000000000000..282ad35f4d6890 --- /dev/null +++ b/Android/testbed/app/src/androidTest/java/org/python/testbed/PythonSuite.kt @@ -0,0 +1,35 @@ +package org.python.testbed + +import androidx.test.annotation.UiThreadTest +import androidx.test.platform.app.InstrumentationRegistry +import androidx.test.ext.junit.runners.AndroidJUnit4 + +import org.junit.Test +import org.junit.runner.RunWith + +import org.junit.Assert.* + + +@RunWith(AndroidJUnit4::class) +class PythonSuite { + @Test + @UiThreadTest + fun testPython() { + val start = System.currentTimeMillis() + try { + val context = + InstrumentationRegistry.getInstrumentation().targetContext + val args = + InstrumentationRegistry.getArguments().getString("pythonArgs", "") + val status = PythonTestRunner(context).run(args) + assertEquals(0, status) + } finally { + // Make sure the process lives long enough for the test script to + // detect it (see `find_pid` in android.py). + val delay = 2000 - (System.currentTimeMillis() - start) + if (delay > 0) { + Thread.sleep(delay) + } + } + } +} \ No newline at end of file diff --git a/Python/optimizer.c b/Python/optimizer.c index 5f89fa8905ad54..54c00d4b4c0e28 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -184,7 +184,7 @@ _PyOptimizer_Optimize( return err; } - if (++executors_created >= 5) { + if (++executors_created >= 10) { executors_created = 0; _Py_Executors_InvalidateOld(interp, 0); } From d08e45aca081135a47a26dd07ace97d72591db6d Mon Sep 17 00:00:00 2001 From: Savannah Ostrowski Date: Tue, 27 Aug 2024 11:06:57 -0700 Subject: [PATCH 16/61] new line --- .../app/src/androidTest/java/org/python/testbed/PythonSuite.kt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Android/testbed/app/src/androidTest/java/org/python/testbed/PythonSuite.kt b/Android/testbed/app/src/androidTest/java/org/python/testbed/PythonSuite.kt index 282ad35f4d6890..0e888ab71d87da 100644 --- a/Android/testbed/app/src/androidTest/java/org/python/testbed/PythonSuite.kt +++ b/Android/testbed/app/src/androidTest/java/org/python/testbed/PythonSuite.kt @@ -32,4 +32,4 @@ class PythonSuite { } } } -} \ No newline at end of file +} From 63158771209063bfd9134a2dc867eec0819bb3e7 Mon Sep 17 00:00:00 2001 From: Savannah Ostrowski Date: Tue, 27 Aug 2024 14:40:34 -0700 Subject: [PATCH 17/61] update constant --- Python/optimizer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/optimizer.c b/Python/optimizer.c index 54c00d4b4c0e28..6cd5c5ab99ae29 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -1202,7 +1202,7 @@ make_executor_from_uops(_PyUOpInstruction *buffer, int length, const _PyBloomFil executor->jit_code = NULL; executor->jit_side_entry = NULL; executor->jit_size = 0; - executor->run_count = __UINT32_MAX__; + executor->run_count = UINT32_MAX; if (_PyJIT_Compile(executor, executor->trace, length)) { Py_DECREF(executor); return NULL; From 622c2669e6f1d8c59cd498f780771e50a5e0bd89 Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Tue, 27 Aug 2024 21:44:17 +0000 Subject: [PATCH 18/61] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20b?= =?UTF-8?q?lurb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../2024-08-27-21-44-14.gh-issue-116017.ZY3yBY.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2024-08-27-21-44-14.gh-issue-116017.ZY3yBY.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-08-27-21-44-14.gh-issue-116017.ZY3yBY.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-08-27-21-44-14.gh-issue-116017.ZY3yBY.rst new file mode 100644 index 00000000000000..d5dc84426b175a --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-08-27-21-44-14.gh-issue-116017.ZY3yBY.rst @@ -0,0 +1 @@ +Improved JIT memory consumption by invalidating cold executors From 7c6704c114ecc7c7be18ce04fdb6e2803b8f8ed6 Mon Sep 17 00:00:00 2001 From: Savannah Ostrowski Date: Thu, 25 Jul 2024 20:48:14 -0700 Subject: [PATCH 19/61] resolve conflict --- Include/cpython/optimizer.h | 137 ++++++++++++++++++++++++++++++++++++ Python/jit.c | 5 ++ Python/optimizer.c | 9 +++ 3 files changed, 151 insertions(+) create mode 100644 Include/cpython/optimizer.h diff --git a/Include/cpython/optimizer.h b/Include/cpython/optimizer.h new file mode 100644 index 00000000000000..5367be1c631240 --- /dev/null +++ b/Include/cpython/optimizer.h @@ -0,0 +1,137 @@ + +#ifndef Py_LIMITED_API +#ifndef Py_OPTIMIZER_H +#define Py_OPTIMIZER_H +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct _PyExecutorLinkListNode { + struct _PyExecutorObject *next; + struct _PyExecutorObject *previous; +} _PyExecutorLinkListNode; + + +/* Bloom filter with m = 256 + * https://en.wikipedia.org/wiki/Bloom_filter */ +#define _Py_BLOOM_FILTER_WORDS 8 + +typedef struct { + uint32_t bits[_Py_BLOOM_FILTER_WORDS]; +} _PyBloomFilter; + +typedef struct { + uint8_t opcode; + uint8_t oparg; + uint8_t valid; + uint8_t linked; + int index; // Index of ENTER_EXECUTOR (if code isn't NULL, below). + _PyBloomFilter bloom; + _PyExecutorLinkListNode links; + PyCodeObject *code; // Weak (NULL if no corresponding ENTER_EXECUTOR). +} _PyVMData; + +/* Depending on the format, + * the 32 bits between the oparg and operand are: + * UOP_FORMAT_TARGET: + * uint32_t target; + * UOP_FORMAT_EXIT + * uint16_t exit_index; + * uint16_t error_target; + * UOP_FORMAT_JUMP + * uint16_t jump_target; + * uint16_t error_target; + */ +typedef struct { + uint16_t opcode:14; + uint16_t format:2; + uint16_t oparg; + union { + uint32_t target; + struct { + union { + uint16_t exit_index; + uint16_t jump_target; + }; + uint16_t error_target; + }; + }; + uint64_t operand; // A cache entry +} _PyUOpInstruction; + +typedef struct { + uint32_t target; + _Py_BackoffCounter temperature; + const struct _PyExecutorObject *executor; +} _PyExitData; + +typedef struct _PyExecutorObject { + PyObject_VAR_HEAD + const _PyUOpInstruction *trace; + _PyVMData vm_data; /* Used by the VM, but opaque to the optimizer */ + uint32_t exit_count; + uint32_t code_size; + size_t jit_size; + void *jit_code; + void *jit_side_entry; + _PyExitData exits[1]; +} _PyExecutorObject; + +typedef struct _PyOptimizerObject _PyOptimizerObject; + +/* Should return > 0 if a new executor is created. O if no executor is produced and < 0 if an error occurred. */ +typedef int (*_Py_optimize_func)( + _PyOptimizerObject* self, struct _PyInterpreterFrame *frame, + _Py_CODEUNIT *instr, _PyExecutorObject **exec_ptr, + int curr_stackentries); + +struct _PyOptimizerObject { + PyObject_HEAD + _Py_optimize_func optimize; + /* Data needed by the optimizer goes here, but is opaque to the VM */ +}; + +/** Test support **/ +typedef struct { + _PyOptimizerObject base; + int64_t count; +} _PyCounterOptimizerObject; + +PyAPI_FUNC(int) PyUnstable_Replace_Executor(PyCodeObject *code, _Py_CODEUNIT *instr, _PyExecutorObject *executor); + +_PyOptimizerObject *_Py_SetOptimizer(PyInterpreterState *interp, _PyOptimizerObject* optimizer); + +PyAPI_FUNC(int) PyUnstable_SetOptimizer(_PyOptimizerObject* optimizer); + +PyAPI_FUNC(_PyOptimizerObject *) PyUnstable_GetOptimizer(void); + +PyAPI_FUNC(_PyExecutorObject *) PyUnstable_GetExecutor(PyCodeObject *code, int offset); + +void _Py_ExecutorInit(_PyExecutorObject *, const _PyBloomFilter *); +void _Py_ExecutorDetach(_PyExecutorObject *); +void _Py_BloomFilter_Init(_PyBloomFilter *); +void _Py_BloomFilter_Add(_PyBloomFilter *bloom, void *obj); +PyAPI_FUNC(void) _Py_Executor_DependsOn(_PyExecutorObject *executor, void *obj); +/* For testing */ +PyAPI_FUNC(PyObject *)PyUnstable_Optimizer_NewCounter(void); +PyAPI_FUNC(PyObject *)PyUnstable_Optimizer_NewUOpOptimizer(void); + +#define _Py_MAX_ALLOWED_BUILTINS_MODIFICATIONS 3 +#define _Py_MAX_ALLOWED_GLOBALS_MODIFICATIONS 6 + +#ifdef _Py_TIER2 +PyAPI_FUNC(void) _Py_Executors_InvalidateDependency(PyInterpreterState *interp, void *obj, int is_invalidation); +PyAPI_FUNC(void) _Py_Executors_InvalidateAll(PyInterpreterState *interp, int is_invalidation); +PyAPI_FUNC(void) _Py_Executor_Invalidate(_PyExecutorObject *executor); +#else +# define _Py_Executors_InvalidateDependency(A, B, C) ((void)0) +# define _Py_Executors_InvalidateAll(A, B) ((void)0) +# define _Py_Executor_Invalidate(A) ((void)0) +#endif + + +#ifdef __cplusplus +} +#endif +#endif /* !Py_OPTIMIZER_H */ +#endif /* Py_LIMITED_API */ diff --git a/Python/jit.c b/Python/jit.c index 33320761621c4c..8c10c9570e9930 100644 --- a/Python/jit.c +++ b/Python/jit.c @@ -461,6 +461,11 @@ _PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction trace[], siz executor->jit_code = memory; executor->jit_side_entry = memory + trampoline.code_size; executor->jit_size = total_size; + + // static int compile_count = 0; + // if (++compile_count == 100) { + // _Py_Executor_Invalidate(executor); + // } return 0; } diff --git a/Python/optimizer.c b/Python/optimizer.c index 9198e410627dd4..8f7496f9e66c90 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -1659,4 +1659,13 @@ _Py_Executors_InvalidateAll(PyInterpreterState *interp, int is_invalidation) } } +void _Py_Executor_Invalidate(_PyExecutorObject *executor) +{ + if (executor->vm_data.valid) { + printf("Invalidating executor %p\n", executor); + unlink_executor(executor); + executor_clear(executor); + } +} + #endif /* _Py_TIER2 */ From 1d72fdda7f1e2299923c9f404dd8b6acf95e99f8 Mon Sep 17 00:00:00 2001 From: Savannah Ostrowski Date: Mon, 24 Jun 2024 19:24:49 -0700 Subject: [PATCH 20/61] tests pass except ssl --- Python/jit.c | 8 ++++---- Python/optimizer.c | 1 - 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/Python/jit.c b/Python/jit.c index 8c10c9570e9930..939e6cfabc72b2 100644 --- a/Python/jit.c +++ b/Python/jit.c @@ -462,10 +462,10 @@ _PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction trace[], siz executor->jit_side_entry = memory + trampoline.code_size; executor->jit_size = total_size; - // static int compile_count = 0; - // if (++compile_count == 100) { - // _Py_Executor_Invalidate(executor); - // } + static int compile_count = 0; + if (++compile_count == 100) { + _Py_Executor_Invalidate(executor); + } return 0; } diff --git a/Python/optimizer.c b/Python/optimizer.c index 8f7496f9e66c90..9f6ea0f93a8397 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -1662,7 +1662,6 @@ _Py_Executors_InvalidateAll(PyInterpreterState *interp, int is_invalidation) void _Py_Executor_Invalidate(_PyExecutorObject *executor) { if (executor->vm_data.valid) { - printf("Invalidating executor %p\n", executor); unlink_executor(executor); executor_clear(executor); } From 1778185cf88217cceabc2731b7ca55c7c90b9c7d Mon Sep 17 00:00:00 2001 From: Savannah Ostrowski Date: Thu, 25 Jul 2024 21:04:24 -0700 Subject: [PATCH 21/61] remove file --- Include/cpython/optimizer.h | 137 ------------------------------------ 1 file changed, 137 deletions(-) delete mode 100644 Include/cpython/optimizer.h diff --git a/Include/cpython/optimizer.h b/Include/cpython/optimizer.h deleted file mode 100644 index 5367be1c631240..00000000000000 --- a/Include/cpython/optimizer.h +++ /dev/null @@ -1,137 +0,0 @@ - -#ifndef Py_LIMITED_API -#ifndef Py_OPTIMIZER_H -#define Py_OPTIMIZER_H -#ifdef __cplusplus -extern "C" { -#endif - -typedef struct _PyExecutorLinkListNode { - struct _PyExecutorObject *next; - struct _PyExecutorObject *previous; -} _PyExecutorLinkListNode; - - -/* Bloom filter with m = 256 - * https://en.wikipedia.org/wiki/Bloom_filter */ -#define _Py_BLOOM_FILTER_WORDS 8 - -typedef struct { - uint32_t bits[_Py_BLOOM_FILTER_WORDS]; -} _PyBloomFilter; - -typedef struct { - uint8_t opcode; - uint8_t oparg; - uint8_t valid; - uint8_t linked; - int index; // Index of ENTER_EXECUTOR (if code isn't NULL, below). - _PyBloomFilter bloom; - _PyExecutorLinkListNode links; - PyCodeObject *code; // Weak (NULL if no corresponding ENTER_EXECUTOR). -} _PyVMData; - -/* Depending on the format, - * the 32 bits between the oparg and operand are: - * UOP_FORMAT_TARGET: - * uint32_t target; - * UOP_FORMAT_EXIT - * uint16_t exit_index; - * uint16_t error_target; - * UOP_FORMAT_JUMP - * uint16_t jump_target; - * uint16_t error_target; - */ -typedef struct { - uint16_t opcode:14; - uint16_t format:2; - uint16_t oparg; - union { - uint32_t target; - struct { - union { - uint16_t exit_index; - uint16_t jump_target; - }; - uint16_t error_target; - }; - }; - uint64_t operand; // A cache entry -} _PyUOpInstruction; - -typedef struct { - uint32_t target; - _Py_BackoffCounter temperature; - const struct _PyExecutorObject *executor; -} _PyExitData; - -typedef struct _PyExecutorObject { - PyObject_VAR_HEAD - const _PyUOpInstruction *trace; - _PyVMData vm_data; /* Used by the VM, but opaque to the optimizer */ - uint32_t exit_count; - uint32_t code_size; - size_t jit_size; - void *jit_code; - void *jit_side_entry; - _PyExitData exits[1]; -} _PyExecutorObject; - -typedef struct _PyOptimizerObject _PyOptimizerObject; - -/* Should return > 0 if a new executor is created. O if no executor is produced and < 0 if an error occurred. */ -typedef int (*_Py_optimize_func)( - _PyOptimizerObject* self, struct _PyInterpreterFrame *frame, - _Py_CODEUNIT *instr, _PyExecutorObject **exec_ptr, - int curr_stackentries); - -struct _PyOptimizerObject { - PyObject_HEAD - _Py_optimize_func optimize; - /* Data needed by the optimizer goes here, but is opaque to the VM */ -}; - -/** Test support **/ -typedef struct { - _PyOptimizerObject base; - int64_t count; -} _PyCounterOptimizerObject; - -PyAPI_FUNC(int) PyUnstable_Replace_Executor(PyCodeObject *code, _Py_CODEUNIT *instr, _PyExecutorObject *executor); - -_PyOptimizerObject *_Py_SetOptimizer(PyInterpreterState *interp, _PyOptimizerObject* optimizer); - -PyAPI_FUNC(int) PyUnstable_SetOptimizer(_PyOptimizerObject* optimizer); - -PyAPI_FUNC(_PyOptimizerObject *) PyUnstable_GetOptimizer(void); - -PyAPI_FUNC(_PyExecutorObject *) PyUnstable_GetExecutor(PyCodeObject *code, int offset); - -void _Py_ExecutorInit(_PyExecutorObject *, const _PyBloomFilter *); -void _Py_ExecutorDetach(_PyExecutorObject *); -void _Py_BloomFilter_Init(_PyBloomFilter *); -void _Py_BloomFilter_Add(_PyBloomFilter *bloom, void *obj); -PyAPI_FUNC(void) _Py_Executor_DependsOn(_PyExecutorObject *executor, void *obj); -/* For testing */ -PyAPI_FUNC(PyObject *)PyUnstable_Optimizer_NewCounter(void); -PyAPI_FUNC(PyObject *)PyUnstable_Optimizer_NewUOpOptimizer(void); - -#define _Py_MAX_ALLOWED_BUILTINS_MODIFICATIONS 3 -#define _Py_MAX_ALLOWED_GLOBALS_MODIFICATIONS 6 - -#ifdef _Py_TIER2 -PyAPI_FUNC(void) _Py_Executors_InvalidateDependency(PyInterpreterState *interp, void *obj, int is_invalidation); -PyAPI_FUNC(void) _Py_Executors_InvalidateAll(PyInterpreterState *interp, int is_invalidation); -PyAPI_FUNC(void) _Py_Executor_Invalidate(_PyExecutorObject *executor); -#else -# define _Py_Executors_InvalidateDependency(A, B, C) ((void)0) -# define _Py_Executors_InvalidateAll(A, B) ((void)0) -# define _Py_Executor_Invalidate(A) ((void)0) -#endif - - -#ifdef __cplusplus -} -#endif -#endif /* !Py_OPTIMIZER_H */ -#endif /* Py_LIMITED_API */ From 25068215bf7cb51df60f17c2d511846bccefdd2a Mon Sep 17 00:00:00 2001 From: Savannah Ostrowski Date: Mon, 12 Aug 2024 14:14:45 -0700 Subject: [PATCH 22/61] this is broken --- Include/internal/pycore_optimizer.h | 3 +++ Python/jit.c | 11 +++++++---- Python/optimizer.c | 1 + 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index 19e54bf122a8bb..ebac174965502c 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -73,6 +73,7 @@ typedef struct _PyExecutorObject { uint32_t exit_count; uint32_t code_size; size_t jit_size; + uint32_t jit_compile_count; void *jit_code; void *jit_side_entry; _PyExitData exits[1]; @@ -123,9 +124,11 @@ PyAPI_FUNC(PyObject *) _PyOptimizer_NewUOpOptimizer(void); #ifdef _Py_TIER2 PyAPI_FUNC(void) _Py_Executors_InvalidateDependency(PyInterpreterState *interp, void *obj, int is_invalidation); PyAPI_FUNC(void) _Py_Executors_InvalidateAll(PyInterpreterState *interp, int is_invalidation); +PyAPI_FUNC(void) _Py_Executor_Invalidate(_PyExecutorObject *executor); #else # define _Py_Executors_InvalidateDependency(A, B, C) ((void)0) # define _Py_Executors_InvalidateAll(A, B) ((void)0) +# define _Py_Executor_Invalidate(A) ((void)0) #endif diff --git a/Python/jit.c b/Python/jit.c index 939e6cfabc72b2..3043a7e978dca7 100644 --- a/Python/jit.c +++ b/Python/jit.c @@ -401,6 +401,7 @@ _PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction trace[], siz uintptr_t instruction_starts[UOP_MAX_TRACE_LENGTH]; size_t code_size = 0; size_t data_size = 0; + int jit_compile_count = executor->jit_compile_count; group = &trampoline; code_size += group->code_size; data_size += group->data_size; @@ -461,11 +462,12 @@ _PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction trace[], siz executor->jit_code = memory; executor->jit_side_entry = memory + trampoline.code_size; executor->jit_size = total_size; + executor->jit_compile_count = jit_compile_count + 1; - static int compile_count = 0; - if (++compile_count == 100) { - _Py_Executor_Invalidate(executor); - } + // int compile_count = 0; + // if (++compile_count == 100) { + // _Py_Executor_Invalidate(executor); + // } return 0; } @@ -478,6 +480,7 @@ _PyJIT_Free(_PyExecutorObject *executor) executor->jit_code = NULL; executor->jit_side_entry = NULL; executor->jit_size = 0; + // executor->jit_compile_count = 0; if (jit_free(memory, size)) { PyErr_WriteUnraisable(NULL); } diff --git a/Python/optimizer.c b/Python/optimizer.c index 9f6ea0f93a8397..c8ec2fb3586265 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -1194,6 +1194,7 @@ make_executor_from_uops(_PyUOpInstruction *buffer, int length, const _PyBloomFil executor->jit_code = NULL; executor->jit_side_entry = NULL; executor->jit_size = 0; + // executor->jit_compile_count=0; if (_PyJIT_Compile(executor, executor->trace, length)) { Py_DECREF(executor); return NULL; From fca6dec77ec5dbc928491510a944ba8a133aae66 Mon Sep 17 00:00:00 2001 From: Savannah Ostrowski Date: Mon, 12 Aug 2024 14:52:50 -0700 Subject: [PATCH 23/61] gc approach --- Include/internal/pycore_optimizer.h | 6 +++- Python/gc.c | 1 + Python/jit.c | 8 +---- Python/optimizer.c | 52 ++++++++++++++++++++++++++++- 4 files changed, 58 insertions(+), 9 deletions(-) diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index ebac174965502c..a56823fcb770e3 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -73,7 +73,7 @@ typedef struct _PyExecutorObject { uint32_t exit_count; uint32_t code_size; size_t jit_size; - uint32_t jit_compile_count; + bool has_run; void *jit_code; void *jit_side_entry; _PyExitData exits[1]; @@ -125,10 +125,14 @@ PyAPI_FUNC(PyObject *) _PyOptimizer_NewUOpOptimizer(void); PyAPI_FUNC(void) _Py_Executors_InvalidateDependency(PyInterpreterState *interp, void *obj, int is_invalidation); PyAPI_FUNC(void) _Py_Executors_InvalidateAll(PyInterpreterState *interp, int is_invalidation); PyAPI_FUNC(void) _Py_Executor_Invalidate(_PyExecutorObject *executor); +PyAPI_FUNC(void) _Py_Executors_InvalidateOld(PyInterpreterState *interp, int is_invalidation); + #else # define _Py_Executors_InvalidateDependency(A, B, C) ((void)0) # define _Py_Executors_InvalidateAll(A, B) ((void)0) # define _Py_Executor_Invalidate(A) ((void)0) +# define _Py_Executors_InvalidateOld(A, B) ((void)0) + #endif diff --git a/Python/gc.c b/Python/gc.c index 3d36792ffb27fc..2c36196f7d99b6 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1510,6 +1510,7 @@ gc_collect_region(PyThreadState *tstate, int untrack, struct gc_collection_stats *stats) { + _Py_Executors_InvalidateOld(tstate->interp, 0); PyGC_Head unreachable; /* non-problematic unreachable trash */ PyGC_Head finalizers; /* objects with, & reachable from, __del__ */ PyGC_Head *gc; /* initialize to prevent a compiler warning */ diff --git a/Python/jit.c b/Python/jit.c index 3043a7e978dca7..b74afdf349e522 100644 --- a/Python/jit.c +++ b/Python/jit.c @@ -144,6 +144,7 @@ set_bits(uint32_t *loc, uint8_t loc_start, uint64_t value, uint8_t value_start, #define IS_AARCH64_BRANCH(I) (((I) & 0x7C000000) == 0x14000000) #define IS_AARCH64_LDR_OR_STR(I) (((I) & 0x3B000000) == 0x39000000) #define IS_AARCH64_MOV(I) (((I) & 0x9F800000) == 0x92800000) +int total_compile_count=0; // LLD is a great reference for performing relocations... just keep in // mind that Tools/jit/build.py does filtering and preprocessing for us! @@ -401,7 +402,6 @@ _PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction trace[], siz uintptr_t instruction_starts[UOP_MAX_TRACE_LENGTH]; size_t code_size = 0; size_t data_size = 0; - int jit_compile_count = executor->jit_compile_count; group = &trampoline; code_size += group->code_size; data_size += group->data_size; @@ -462,12 +462,7 @@ _PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction trace[], siz executor->jit_code = memory; executor->jit_side_entry = memory + trampoline.code_size; executor->jit_size = total_size; - executor->jit_compile_count = jit_compile_count + 1; - // int compile_count = 0; - // if (++compile_count == 100) { - // _Py_Executor_Invalidate(executor); - // } return 0; } @@ -480,7 +475,6 @@ _PyJIT_Free(_PyExecutorObject *executor) executor->jit_code = NULL; executor->jit_side_entry = NULL; executor->jit_size = 0; - // executor->jit_compile_count = 0; if (jit_free(memory, size)) { PyErr_WriteUnraisable(NULL); } diff --git a/Python/optimizer.c b/Python/optimizer.c index c8ec2fb3586265..371c32704a40d8 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -1194,7 +1194,7 @@ make_executor_from_uops(_PyUOpInstruction *buffer, int length, const _PyBloomFil executor->jit_code = NULL; executor->jit_side_entry = NULL; executor->jit_size = 0; - // executor->jit_compile_count=0; + executor->has_run = true; if (_PyJIT_Compile(executor, executor->trace, length)) { Py_DECREF(executor); return NULL; @@ -1668,4 +1668,54 @@ void _Py_Executor_Invalidate(_PyExecutorObject *executor) } } +void +_Py_Executors_InvalidateOld(PyInterpreterState *interp, int is_invalidation) +{ + /* Walk the list of executors */ + /* TO DO -- Use a tree to avoid traversing as many objects */ + bool no_memory = false; + PyObject *invalidate = PyList_New(0); + if (invalidate == NULL) { + PyErr_Clear(); + no_memory = true; + } + int total_executors = 0; + int invalidated_executors = 0; + /* Clearing an executor can deallocate others, so we need to make a list of + * executors to invalidate first */ + for (_PyExecutorObject *exec = interp->executor_list_head; exec != NULL;) { + assert(exec->vm_data.valid); + _PyExecutorObject *next = exec->vm_data.links.next; + total_executors++; + if (!exec->has_run) { + invalidated_executors++; + unlink_executor(exec); + if (no_memory) { + exec->vm_data.valid = 0; + } else { + if (PyList_Append(invalidate, (PyObject *)exec) < 0) { + PyErr_Clear(); + no_memory = true; + exec->vm_data.valid = 0; + } + } + if (is_invalidation) { + OPT_STAT_INC(executors_invalidated); + } + } else { + exec->has_run = false; + } + exec = next; + } + if (invalidate != NULL) { + for (Py_ssize_t i = 0; i < PyList_GET_SIZE(invalidate); i++) { + _PyExecutorObject *exec = (_PyExecutorObject *)PyList_GET_ITEM(invalidate, i); + executor_clear(exec); + } + Py_DECREF(invalidate); + } + printf("Invalidated %d out of %d executors\n", invalidated_executors, total_executors); + return; +} + #endif /* _Py_TIER2 */ From 29436fd2b69f6974bec03870d0322231105aeaaf Mon Sep 17 00:00:00 2001 From: Savannah Ostrowski Date: Mon, 12 Aug 2024 15:20:39 -0700 Subject: [PATCH 24/61] rebase --- Python/bytecodes.c | 2 +- Python/executor_cases.c.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 01e88a34d10b6a..2e063b9b52fa7f 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -4842,8 +4842,8 @@ dummy_func( current_executor = (_PyExecutorObject*)executor; #endif assert(((_PyExecutorObject *)executor)->vm_data.valid); + ((_PyExecutorObject *)executor)->has_run = true; } - tier2 op(_FATAL_ERROR, (--)) { assert(0); Py_FatalError("Fatal error uop executed."); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 0de5c8a0408d8c..8963f4a966b36f 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -5459,6 +5459,7 @@ current_executor = (_PyExecutorObject*)executor; #endif assert(((_PyExecutorObject *)executor)->vm_data.valid); + ((_PyExecutorObject *)executor)->has_run = true; break; } From b969b11c314bc395ed8c541b4b11a8a6df004f15 Mon Sep 17 00:00:00 2001 From: Savannah Ostrowski Date: Mon, 12 Aug 2024 19:05:20 -0700 Subject: [PATCH 25/61] Update has_run to run_count --- Include/internal/pycore_optimizer.h | 2 +- Python/bytecodes.c | 3 ++- Python/executor_cases.c.h | 2 +- Python/jit.c | 2 -- Python/optimizer.c | 7 +++---- 5 files changed, 7 insertions(+), 9 deletions(-) diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index a56823fcb770e3..547232e382b608 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -73,7 +73,7 @@ typedef struct _PyExecutorObject { uint32_t exit_count; uint32_t code_size; size_t jit_size; - bool has_run; + uint32_t run_count; void *jit_code; void *jit_side_entry; _PyExitData exits[1]; diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 2e063b9b52fa7f..d0fe7e09e29f03 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -4842,8 +4842,9 @@ dummy_func( current_executor = (_PyExecutorObject*)executor; #endif assert(((_PyExecutorObject *)executor)->vm_data.valid); - ((_PyExecutorObject *)executor)->has_run = true; + ((_PyExecutorObject *)executor)->run_count++; } + tier2 op(_FATAL_ERROR, (--)) { assert(0); Py_FatalError("Fatal error uop executed."); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 8963f4a966b36f..9b49d92d196965 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -5459,7 +5459,7 @@ current_executor = (_PyExecutorObject*)executor; #endif assert(((_PyExecutorObject *)executor)->vm_data.valid); - ((_PyExecutorObject *)executor)->has_run = true; + ((_PyExecutorObject *)executor)->run_count++; break; } diff --git a/Python/jit.c b/Python/jit.c index b74afdf349e522..33320761621c4c 100644 --- a/Python/jit.c +++ b/Python/jit.c @@ -144,7 +144,6 @@ set_bits(uint32_t *loc, uint8_t loc_start, uint64_t value, uint8_t value_start, #define IS_AARCH64_BRANCH(I) (((I) & 0x7C000000) == 0x14000000) #define IS_AARCH64_LDR_OR_STR(I) (((I) & 0x3B000000) == 0x39000000) #define IS_AARCH64_MOV(I) (((I) & 0x9F800000) == 0x92800000) -int total_compile_count=0; // LLD is a great reference for performing relocations... just keep in // mind that Tools/jit/build.py does filtering and preprocessing for us! @@ -462,7 +461,6 @@ _PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction trace[], siz executor->jit_code = memory; executor->jit_side_entry = memory + trampoline.code_size; executor->jit_size = total_size; - return 0; } diff --git a/Python/optimizer.c b/Python/optimizer.c index 371c32704a40d8..345ca47ffc53b6 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -1194,7 +1194,7 @@ make_executor_from_uops(_PyUOpInstruction *buffer, int length, const _PyBloomFil executor->jit_code = NULL; executor->jit_side_entry = NULL; executor->jit_size = 0; - executor->has_run = true; + executor->run_count++; if (_PyJIT_Compile(executor, executor->trace, length)) { Py_DECREF(executor); return NULL; @@ -1687,7 +1687,7 @@ _Py_Executors_InvalidateOld(PyInterpreterState *interp, int is_invalidation) assert(exec->vm_data.valid); _PyExecutorObject *next = exec->vm_data.links.next; total_executors++; - if (!exec->has_run) { + if (exec->run_count < 4) { invalidated_executors++; unlink_executor(exec); if (no_memory) { @@ -1703,7 +1703,7 @@ _Py_Executors_InvalidateOld(PyInterpreterState *interp, int is_invalidation) OPT_STAT_INC(executors_invalidated); } } else { - exec->has_run = false; + exec->run_count = 0; } exec = next; } @@ -1714,7 +1714,6 @@ _Py_Executors_InvalidateOld(PyInterpreterState *interp, int is_invalidation) } Py_DECREF(invalidate); } - printf("Invalidated %d out of %d executors\n", invalidated_executors, total_executors); return; } From a669e0fa474c9472f566d21e5aab11d53fc99cf2 Mon Sep 17 00:00:00 2001 From: Savannah Ostrowski Date: Tue, 13 Aug 2024 15:49:30 -0700 Subject: [PATCH 26/61] update initialized run_count and move invalidate old --- Python/gc.c | 3 ++- Python/gc_free_threading.c | 2 ++ Python/optimizer.c | 2 +- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/Python/gc.c b/Python/gc.c index 2c36196f7d99b6..2f83a8cbe28085 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1510,7 +1510,6 @@ gc_collect_region(PyThreadState *tstate, int untrack, struct gc_collection_stats *stats) { - _Py_Executors_InvalidateOld(tstate->interp, 0); PyGC_Head unreachable; /* non-problematic unreachable trash */ PyGC_Head finalizers; /* objects with, & reachable from, __del__ */ PyGC_Head *gc; /* initialize to prevent a compiler warning */ @@ -1863,6 +1862,8 @@ _PyGC_Collect(PyThreadState *tstate, int generation, _PyGC_Reason reason) } if (reason != _Py_GC_REASON_SHUTDOWN) { invoke_gc_callback(gcstate, "stop", generation, &stats); + _Py_Executors_InvalidateOld(tstate->interp, 0); + } _PyErr_SetRaisedException(tstate, exc); GC_STAT_ADD(generation, objects_collected, stats.collected); diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index 54de0c2671ae68..89a1ae13152c0e 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -1362,6 +1362,8 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason) if (reason != _Py_GC_REASON_SHUTDOWN) { invoke_gc_callback(tstate, "stop", generation, m, n); + _Py_Executors_InvalidateOld(tstate->interp, 0); + } assert(!_PyErr_Occurred(tstate)); diff --git a/Python/optimizer.c b/Python/optimizer.c index 345ca47ffc53b6..f506ba3ece5b9a 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -1194,7 +1194,7 @@ make_executor_from_uops(_PyUOpInstruction *buffer, int length, const _PyBloomFil executor->jit_code = NULL; executor->jit_side_entry = NULL; executor->jit_size = 0; - executor->run_count++; + executor->run_count = __UINT32_MAX__; if (_PyJIT_Compile(executor, executor->trace, length)) { Py_DECREF(executor); return NULL; From deb73eccae467fec47bf0ef9c19b9d4a0679e999 Mon Sep 17 00:00:00 2001 From: Savannah Ostrowski Date: Tue, 13 Aug 2024 16:14:09 -0700 Subject: [PATCH 27/61] set threshold to 1` --- Python/optimizer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/optimizer.c b/Python/optimizer.c index f506ba3ece5b9a..6ee8aa85b17b72 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -1687,7 +1687,7 @@ _Py_Executors_InvalidateOld(PyInterpreterState *interp, int is_invalidation) assert(exec->vm_data.valid); _PyExecutorObject *next = exec->vm_data.links.next; total_executors++; - if (exec->run_count < 4) { + if (exec->run_count < 1) { invalidated_executors++; unlink_executor(exec); if (no_memory) { From 9fa55e84544ee9ae15ab306d8be8be880ddb1363 Mon Sep 17 00:00:00 2001 From: Savannah Ostrowski Date: Wed, 14 Aug 2024 15:35:31 -0700 Subject: [PATCH 28/61] move incrementing run count into a new op --- .../java/org/python/testbed/PythonSuite.kt | 35 ---- Include/internal/pycore_uop_ids.h | 177 +++++++++--------- Include/internal/pycore_uop_metadata.h | 4 + Python/bytecodes.c | 5 +- Python/executor_cases.c.h | 6 +- Python/optimizer.c | 1 + Python/optimizer_cases.c.h | 4 + 7 files changed, 107 insertions(+), 125 deletions(-) delete mode 100644 Android/testbed/app/src/androidTest/java/org/python/testbed/PythonSuite.kt diff --git a/Android/testbed/app/src/androidTest/java/org/python/testbed/PythonSuite.kt b/Android/testbed/app/src/androidTest/java/org/python/testbed/PythonSuite.kt deleted file mode 100644 index 0e888ab71d87da..00000000000000 --- a/Android/testbed/app/src/androidTest/java/org/python/testbed/PythonSuite.kt +++ /dev/null @@ -1,35 +0,0 @@ -package org.python.testbed - -import androidx.test.annotation.UiThreadTest -import androidx.test.platform.app.InstrumentationRegistry -import androidx.test.ext.junit.runners.AndroidJUnit4 - -import org.junit.Test -import org.junit.runner.RunWith - -import org.junit.Assert.* - - -@RunWith(AndroidJUnit4::class) -class PythonSuite { - @Test - @UiThreadTest - fun testPython() { - val start = System.currentTimeMillis() - try { - val context = - InstrumentationRegistry.getInstrumentation().targetContext - val args = - InstrumentationRegistry.getArguments().getString("pythonArgs", "") - val status = PythonTestRunner(context).run(args) - assertEquals(0, status) - } finally { - // Make sure the process lives long enough for the test script to - // detect it (see `find_pid` in android.py). - val delay = 2000 - (System.currentTimeMillis() - start) - if (delay > 0) { - Thread.sleep(delay) - } - } - } -} diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index b950f760d74ac7..a62cf527db373d 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -138,13 +138,14 @@ extern "C" { #define _GUARD_TYPE_VERSION 385 #define _IMPORT_FROM IMPORT_FROM #define _IMPORT_NAME IMPORT_NAME -#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 386 -#define _INIT_CALL_PY_EXACT_ARGS 387 -#define _INIT_CALL_PY_EXACT_ARGS_0 388 -#define _INIT_CALL_PY_EXACT_ARGS_1 389 -#define _INIT_CALL_PY_EXACT_ARGS_2 390 -#define _INIT_CALL_PY_EXACT_ARGS_3 391 -#define _INIT_CALL_PY_EXACT_ARGS_4 392 +#define _INCREMENT_RUN_COUNT 386 +#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 387 +#define _INIT_CALL_PY_EXACT_ARGS 388 +#define _INIT_CALL_PY_EXACT_ARGS_0 389 +#define _INIT_CALL_PY_EXACT_ARGS_1 390 +#define _INIT_CALL_PY_EXACT_ARGS_2 391 +#define _INIT_CALL_PY_EXACT_ARGS_3 392 +#define _INIT_CALL_PY_EXACT_ARGS_4 393 #define _INSTRUMENTED_CALL_FUNCTION_EX INSTRUMENTED_CALL_FUNCTION_EX #define _INSTRUMENTED_CALL_KW INSTRUMENTED_CALL_KW #define _INSTRUMENTED_FOR_ITER INSTRUMENTED_FOR_ITER @@ -156,65 +157,65 @@ extern "C" { #define _INSTRUMENTED_POP_JUMP_IF_NONE INSTRUMENTED_POP_JUMP_IF_NONE #define _INSTRUMENTED_POP_JUMP_IF_NOT_NONE INSTRUMENTED_POP_JUMP_IF_NOT_NONE #define _INSTRUMENTED_POP_JUMP_IF_TRUE INSTRUMENTED_POP_JUMP_IF_TRUE -#define _INTERNAL_INCREMENT_OPT_COUNTER 393 -#define _IS_NONE 394 +#define _INTERNAL_INCREMENT_OPT_COUNTER 394 +#define _IS_NONE 395 #define _IS_OP IS_OP -#define _ITER_CHECK_LIST 395 -#define _ITER_CHECK_RANGE 396 -#define _ITER_CHECK_TUPLE 397 -#define _ITER_JUMP_LIST 398 -#define _ITER_JUMP_RANGE 399 -#define _ITER_JUMP_TUPLE 400 -#define _ITER_NEXT_LIST 401 -#define _ITER_NEXT_RANGE 402 -#define _ITER_NEXT_TUPLE 403 -#define _JUMP_TO_TOP 404 +#define _ITER_CHECK_LIST 396 +#define _ITER_CHECK_RANGE 397 +#define _ITER_CHECK_TUPLE 398 +#define _ITER_JUMP_LIST 399 +#define _ITER_JUMP_RANGE 400 +#define _ITER_JUMP_TUPLE 401 +#define _ITER_NEXT_LIST 402 +#define _ITER_NEXT_RANGE 403 +#define _ITER_NEXT_TUPLE 404 +#define _JUMP_TO_TOP 405 #define _LIST_APPEND LIST_APPEND #define _LIST_EXTEND LIST_EXTEND -#define _LOAD_ATTR 405 -#define _LOAD_ATTR_CLASS 406 -#define _LOAD_ATTR_CLASS_0 407 -#define _LOAD_ATTR_CLASS_1 408 +#define _LOAD_ATTR 406 +#define _LOAD_ATTR_CLASS 407 +#define _LOAD_ATTR_CLASS_0 408 +#define _LOAD_ATTR_CLASS_1 409 #define _LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN -#define _LOAD_ATTR_INSTANCE_VALUE 409 -#define _LOAD_ATTR_INSTANCE_VALUE_0 410 -#define _LOAD_ATTR_INSTANCE_VALUE_1 411 -#define _LOAD_ATTR_METHOD_LAZY_DICT 412 -#define _LOAD_ATTR_METHOD_NO_DICT 413 -#define _LOAD_ATTR_METHOD_WITH_VALUES 414 -#define _LOAD_ATTR_MODULE 415 -#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 416 -#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 417 -#define _LOAD_ATTR_PROPERTY_FRAME 418 -#define _LOAD_ATTR_SLOT 419 -#define _LOAD_ATTR_SLOT_0 420 -#define _LOAD_ATTR_SLOT_1 421 -#define _LOAD_ATTR_WITH_HINT 422 +#define _LOAD_ATTR_INSTANCE_VALUE 410 +#define _LOAD_ATTR_INSTANCE_VALUE_0 411 +#define _LOAD_ATTR_INSTANCE_VALUE_1 412 +#define _LOAD_ATTR_METHOD_LAZY_DICT 413 +#define _LOAD_ATTR_METHOD_NO_DICT 414 +#define _LOAD_ATTR_METHOD_WITH_VALUES 415 +#define _LOAD_ATTR_MODULE 416 +#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 417 +#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 418 +#define _LOAD_ATTR_PROPERTY_FRAME 419 +#define _LOAD_ATTR_SLOT 420 +#define _LOAD_ATTR_SLOT_0 421 +#define _LOAD_ATTR_SLOT_1 422 +#define _LOAD_ATTR_WITH_HINT 423 #define _LOAD_BUILD_CLASS LOAD_BUILD_CLASS #define _LOAD_COMMON_CONSTANT LOAD_COMMON_CONSTANT #define _LOAD_CONST LOAD_CONST -#define _LOAD_CONST_INLINE 423 -#define _LOAD_CONST_INLINE_BORROW 424 -#define _LOAD_CONST_INLINE_BORROW_WITH_NULL 425 -#define _LOAD_CONST_INLINE_WITH_NULL 426 +#define _LOAD_CONST_INLINE 424 +#define _LOAD_CONST_INLINE_BORROW 425 +#define _LOAD_CONST_INLINE_BORROW_WITH_NULL 426 +#define _LOAD_CONST_INLINE_WITH_NULL 427 #define _LOAD_DEREF LOAD_DEREF -#define _LOAD_FAST 427 -#define _LOAD_FAST_0 428 -#define _LOAD_FAST_1 429 -#define _LOAD_FAST_2 430 -#define _LOAD_FAST_3 431 -#define _LOAD_FAST_4 432 -#define _LOAD_FAST_5 433 -#define _LOAD_FAST_6 434 -#define _LOAD_FAST_7 435 +#define _LOAD_FAST 428 +#define _LOAD_FAST_0 429 +#define _LOAD_FAST_1 430 +#define _LOAD_FAST_2 431 +#define _LOAD_FAST_3 432 +#define _LOAD_FAST_4 433 +#define _LOAD_FAST_5 434 +#define _LOAD_FAST_6 435 +#define _LOAD_FAST_7 436 #define _LOAD_FAST_AND_CLEAR LOAD_FAST_AND_CLEAR #define _LOAD_FAST_CHECK LOAD_FAST_CHECK #define _LOAD_FAST_LOAD_FAST LOAD_FAST_LOAD_FAST #define _LOAD_FROM_DICT_OR_DEREF LOAD_FROM_DICT_OR_DEREF #define _LOAD_FROM_DICT_OR_GLOBALS LOAD_FROM_DICT_OR_GLOBALS -#define _LOAD_GLOBAL 436 -#define _LOAD_GLOBAL_BUILTINS 437 -#define _LOAD_GLOBAL_MODULE 438 +#define _LOAD_GLOBAL 437 +#define _LOAD_GLOBAL_BUILTINS 438 +#define _LOAD_GLOBAL_MODULE 439 #define _LOAD_LOCALS LOAD_LOCALS #define _LOAD_NAME LOAD_NAME #define _LOAD_SPECIAL LOAD_SPECIAL @@ -227,59 +228,59 @@ extern "C" { #define _MATCH_KEYS MATCH_KEYS #define _MATCH_MAPPING MATCH_MAPPING #define _MATCH_SEQUENCE MATCH_SEQUENCE -#define _MAYBE_EXPAND_METHOD 439 -#define _MONITOR_CALL 440 -#define _MONITOR_JUMP_BACKWARD 441 -#define _MONITOR_RESUME 442 +#define _MAYBE_EXPAND_METHOD 440 +#define _MONITOR_CALL 441 +#define _MONITOR_JUMP_BACKWARD 442 +#define _MONITOR_RESUME 443 #define _NOP NOP #define _POP_EXCEPT POP_EXCEPT -#define _POP_JUMP_IF_FALSE 443 -#define _POP_JUMP_IF_TRUE 444 +#define _POP_JUMP_IF_FALSE 444 +#define _POP_JUMP_IF_TRUE 445 #define _POP_TOP POP_TOP -#define _POP_TOP_LOAD_CONST_INLINE_BORROW 445 +#define _POP_TOP_LOAD_CONST_INLINE_BORROW 446 #define _PUSH_EXC_INFO PUSH_EXC_INFO -#define _PUSH_FRAME 446 +#define _PUSH_FRAME 447 #define _PUSH_NULL PUSH_NULL -#define _PY_FRAME_GENERAL 447 -#define _PY_FRAME_KW 448 -#define _QUICKEN_RESUME 449 -#define _REPLACE_WITH_TRUE 450 +#define _PY_FRAME_GENERAL 448 +#define _PY_FRAME_KW 449 +#define _QUICKEN_RESUME 450 +#define _REPLACE_WITH_TRUE 451 #define _RESUME_CHECK RESUME_CHECK #define _RETURN_GENERATOR RETURN_GENERATOR #define _RETURN_VALUE RETURN_VALUE -#define _SAVE_RETURN_OFFSET 451 -#define _SEND 452 -#define _SEND_GEN_FRAME 453 +#define _SAVE_RETURN_OFFSET 452 +#define _SEND 453 +#define _SEND_GEN_FRAME 454 #define _SETUP_ANNOTATIONS SETUP_ANNOTATIONS #define _SET_ADD SET_ADD #define _SET_FUNCTION_ATTRIBUTE SET_FUNCTION_ATTRIBUTE #define _SET_UPDATE SET_UPDATE -#define _START_EXECUTOR 454 -#define _STORE_ATTR 455 -#define _STORE_ATTR_INSTANCE_VALUE 456 -#define _STORE_ATTR_SLOT 457 -#define _STORE_ATTR_WITH_HINT 458 +#define _START_EXECUTOR 455 +#define _STORE_ATTR 456 +#define _STORE_ATTR_INSTANCE_VALUE 457 +#define _STORE_ATTR_SLOT 458 +#define _STORE_ATTR_WITH_HINT 459 #define _STORE_DEREF STORE_DEREF -#define _STORE_FAST 459 -#define _STORE_FAST_0 460 -#define _STORE_FAST_1 461 -#define _STORE_FAST_2 462 -#define _STORE_FAST_3 463 -#define _STORE_FAST_4 464 -#define _STORE_FAST_5 465 -#define _STORE_FAST_6 466 -#define _STORE_FAST_7 467 +#define _STORE_FAST 460 +#define _STORE_FAST_0 461 +#define _STORE_FAST_1 462 +#define _STORE_FAST_2 463 +#define _STORE_FAST_3 464 +#define _STORE_FAST_4 465 +#define _STORE_FAST_5 466 +#define _STORE_FAST_6 467 +#define _STORE_FAST_7 468 #define _STORE_FAST_LOAD_FAST STORE_FAST_LOAD_FAST #define _STORE_FAST_STORE_FAST STORE_FAST_STORE_FAST #define _STORE_GLOBAL STORE_GLOBAL #define _STORE_NAME STORE_NAME -#define _STORE_SLICE 468 -#define _STORE_SUBSCR 469 +#define _STORE_SLICE 469 +#define _STORE_SUBSCR 470 #define _STORE_SUBSCR_DICT STORE_SUBSCR_DICT #define _STORE_SUBSCR_LIST_INT STORE_SUBSCR_LIST_INT #define _SWAP SWAP -#define _TIER2_RESUME_CHECK 470 -#define _TO_BOOL 471 +#define _TIER2_RESUME_CHECK 471 +#define _TO_BOOL 472 #define _TO_BOOL_BOOL TO_BOOL_BOOL #define _TO_BOOL_INT TO_BOOL_INT #define _TO_BOOL_LIST TO_BOOL_LIST @@ -289,14 +290,14 @@ extern "C" { #define _UNARY_NEGATIVE UNARY_NEGATIVE #define _UNARY_NOT UNARY_NOT #define _UNPACK_EX UNPACK_EX -#define _UNPACK_SEQUENCE 472 +#define _UNPACK_SEQUENCE 473 #define _UNPACK_SEQUENCE_LIST UNPACK_SEQUENCE_LIST #define _UNPACK_SEQUENCE_TUPLE UNPACK_SEQUENCE_TUPLE #define _UNPACK_SEQUENCE_TWO_TUPLE UNPACK_SEQUENCE_TWO_TUPLE #define _WITH_EXCEPT_START WITH_EXCEPT_START #define _YIELD_VALUE YIELD_VALUE #define __DO_CALL_FUNCTION_EX _DO_CALL_FUNCTION_EX -#define MAX_UOP_ID 472 +#define MAX_UOP_ID 473 #ifdef __cplusplus } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index e2cba4dc0dfc81..63a0446da7914f 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -274,6 +274,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_INTERNAL_INCREMENT_OPT_COUNTER] = 0, [_DYNAMIC_EXIT] = HAS_ESCAPES_FLAG, [_START_EXECUTOR] = 0, + [_INCREMENT_RUN_COUNT] = 0, [_FATAL_ERROR] = 0, [_CHECK_VALIDITY_AND_SET_IP] = HAS_DEOPT_FLAG, [_DEOPT] = 0, @@ -413,6 +414,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_GUARD_TYPE_VERSION] = "_GUARD_TYPE_VERSION", [_IMPORT_FROM] = "_IMPORT_FROM", [_IMPORT_NAME] = "_IMPORT_NAME", + [_INCREMENT_RUN_COUNT] = "_INCREMENT_RUN_COUNT", [_INIT_CALL_BOUND_METHOD_EXACT_ARGS] = "_INIT_CALL_BOUND_METHOD_EXACT_ARGS", [_INIT_CALL_PY_EXACT_ARGS] = "_INIT_CALL_PY_EXACT_ARGS", [_INIT_CALL_PY_EXACT_ARGS_0] = "_INIT_CALL_PY_EXACT_ARGS_0", @@ -1062,6 +1064,8 @@ int _PyUop_num_popped(int opcode, int oparg) return 0; case _START_EXECUTOR: return 0; + case _INCREMENT_RUN_COUNT: + return 0; case _FATAL_ERROR: return 0; case _CHECK_VALIDITY_AND_SET_IP: diff --git a/Python/bytecodes.c b/Python/bytecodes.c index d0fe7e09e29f03..1a8827a0c5ec8b 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -4842,7 +4842,10 @@ dummy_func( current_executor = (_PyExecutorObject*)executor; #endif assert(((_PyExecutorObject *)executor)->vm_data.valid); - ((_PyExecutorObject *)executor)->run_count++; + } + + tier2 op(_INCREMENT_RUN_COUNT, (--)) { + current_executor->run_count++; } tier2 op(_FATAL_ERROR, (--)) { diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 9b49d92d196965..578272a77f370a 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -5459,7 +5459,11 @@ current_executor = (_PyExecutorObject*)executor; #endif assert(((_PyExecutorObject *)executor)->vm_data.valid); - ((_PyExecutorObject *)executor)->run_count++; + break; + } + + case _INCREMENT_RUN_COUNT: { + current_executor->run_count++; break; } diff --git a/Python/optimizer.c b/Python/optimizer.c index 6ee8aa85b17b72..6b473bb484ab93 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -565,6 +565,7 @@ translate_bytecode_to_trace( code->co_firstlineno, 2 * INSTR_IP(initial_instr, code)); ADD_TO_TRACE(_START_EXECUTOR, 0, (uintptr_t)instr, INSTR_IP(instr, code)); + ADD_TO_TRACE(_INCREMENT_RUN_COUNT, 0, 0, 0); uint32_t target = 0; for (;;) { diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 672fec3946f2fb..92c5e4e4783239 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -2375,6 +2375,10 @@ break; } + case _INCREMENT_RUN_COUNT: { + break; + } + case _FATAL_ERROR: { break; } From e4a461a68cbfde4bb750cc57b829fca750fa0d49 Mon Sep 17 00:00:00 2001 From: Savannah Ostrowski Date: Tue, 20 Aug 2024 16:17:48 -0700 Subject: [PATCH 29/61] add invalidation threshold in gc of 10 --- Python/gc.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/Python/gc.c b/Python/gc.c index 2f83a8cbe28085..e354f14af70898 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1825,6 +1825,8 @@ Py_ssize_t _PyGC_Collect(PyThreadState *tstate, int generation, _PyGC_Reason reason) { GCState *gcstate = &tstate->interp->gc; + static int invalidation_counter = 0; + const int invalidation_threshold = 10; int expected = 0; if (!_Py_atomic_compare_exchange_int(&gcstate->collecting, &expected, 1)) { @@ -1862,8 +1864,11 @@ _PyGC_Collect(PyThreadState *tstate, int generation, _PyGC_Reason reason) } if (reason != _Py_GC_REASON_SHUTDOWN) { invoke_gc_callback(gcstate, "stop", generation, &stats); - _Py_Executors_InvalidateOld(tstate->interp, 0); + if (++invalidation_counter >= invalidation_threshold) { + invalidation_counter = 0; + _Py_Executors_InvalidateOld(tstate->interp, 0); + } } _PyErr_SetRaisedException(tstate, exc); GC_STAT_ADD(generation, objects_collected, stats.collected); From d5a2bedbf7b65992546386703084c621838bd799 Mon Sep 17 00:00:00 2001 From: Savannah Ostrowski Date: Tue, 20 Aug 2024 16:22:18 -0700 Subject: [PATCH 30/61] move back to incremenet --- Python/gc.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Python/gc.c b/Python/gc.c index e354f14af70898..1386c6143cae85 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1852,6 +1852,11 @@ _PyGC_Collect(PyThreadState *tstate, int generation, _PyGC_Reason reason) break; case 1: gc_collect_increment(tstate, &stats); + if (++invalidation_counter >= invalidation_threshold) { + invalidation_counter = 0; + printf("Invalidating old objects\n"); + _Py_Executors_InvalidateOld(tstate->interp, 0); + } break; case 2: gc_collect_full(tstate, &stats); @@ -1864,11 +1869,6 @@ _PyGC_Collect(PyThreadState *tstate, int generation, _PyGC_Reason reason) } if (reason != _Py_GC_REASON_SHUTDOWN) { invoke_gc_callback(gcstate, "stop", generation, &stats); - - if (++invalidation_counter >= invalidation_threshold) { - invalidation_counter = 0; - _Py_Executors_InvalidateOld(tstate->interp, 0); - } } _PyErr_SetRaisedException(tstate, exc); GC_STAT_ADD(generation, objects_collected, stats.collected); From d232e63089ba469ec8dc4a23cc074e36d77568d6 Mon Sep 17 00:00:00 2001 From: Savannah Ostrowski Date: Tue, 20 Aug 2024 16:27:33 -0700 Subject: [PATCH 31/61] remove print --- Python/gc.c | 1 - 1 file changed, 1 deletion(-) diff --git a/Python/gc.c b/Python/gc.c index 1386c6143cae85..2e6c2eb9260b4c 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1854,7 +1854,6 @@ _PyGC_Collect(PyThreadState *tstate, int generation, _PyGC_Reason reason) gc_collect_increment(tstate, &stats); if (++invalidation_counter >= invalidation_threshold) { invalidation_counter = 0; - printf("Invalidating old objects\n"); _Py_Executors_InvalidateOld(tstate->interp, 0); } break; From e4a456ae0a61476d0c75cac255c4c03c41f9eb9c Mon Sep 17 00:00:00 2001 From: Savannah Ostrowski Date: Tue, 27 Aug 2024 11:03:00 -0700 Subject: [PATCH 32/61] move invalidation to executor creation --- Python/gc.c | 6 ------ Python/gc_free_threading.c | 2 -- Python/optimizer.c | 7 +++++++ 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/Python/gc.c b/Python/gc.c index 2e6c2eb9260b4c..3d36792ffb27fc 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1825,8 +1825,6 @@ Py_ssize_t _PyGC_Collect(PyThreadState *tstate, int generation, _PyGC_Reason reason) { GCState *gcstate = &tstate->interp->gc; - static int invalidation_counter = 0; - const int invalidation_threshold = 10; int expected = 0; if (!_Py_atomic_compare_exchange_int(&gcstate->collecting, &expected, 1)) { @@ -1852,10 +1850,6 @@ _PyGC_Collect(PyThreadState *tstate, int generation, _PyGC_Reason reason) break; case 1: gc_collect_increment(tstate, &stats); - if (++invalidation_counter >= invalidation_threshold) { - invalidation_counter = 0; - _Py_Executors_InvalidateOld(tstate->interp, 0); - } break; case 2: gc_collect_full(tstate, &stats); diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index 89a1ae13152c0e..54de0c2671ae68 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -1362,8 +1362,6 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason) if (reason != _Py_GC_REASON_SHUTDOWN) { invoke_gc_callback(tstate, "stop", generation, m, n); - _Py_Executors_InvalidateOld(tstate->interp, 0); - } assert(!_PyErr_Occurred(tstate)); diff --git a/Python/optimizer.c b/Python/optimizer.c index 6b473bb484ab93..5f89fa8905ad54 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -157,6 +157,7 @@ _Py_SetTier2Optimizer(_PyOptimizerObject *optimizer) return old == NULL ? -1 : 0; } +int executors_created = 0; /* Returns 1 if optimized, 0 if not optimized, and -1 for an error. * If optimized, *executor_ptr contains a new reference to the executor */ @@ -182,6 +183,12 @@ _PyOptimizer_Optimize( if (err <= 0) { return err; } + + if (++executors_created >= 5) { + executors_created = 0; + _Py_Executors_InvalidateOld(interp, 0); + } + assert(*executor_ptr != NULL); if (progress_needed) { int index = get_index_for_executor(code, start); From b7d2d5a6f9a72b05aad9ea65f9fd6c67feebca0d Mon Sep 17 00:00:00 2001 From: Savannah Ostrowski Date: Tue, 27 Aug 2024 11:06:19 -0700 Subject: [PATCH 33/61] change threshold --- .../java/org/python/testbed/PythonSuite.kt | 35 +++++++++++++++++++ Python/optimizer.c | 2 +- 2 files changed, 36 insertions(+), 1 deletion(-) create mode 100644 Android/testbed/app/src/androidTest/java/org/python/testbed/PythonSuite.kt diff --git a/Android/testbed/app/src/androidTest/java/org/python/testbed/PythonSuite.kt b/Android/testbed/app/src/androidTest/java/org/python/testbed/PythonSuite.kt new file mode 100644 index 00000000000000..282ad35f4d6890 --- /dev/null +++ b/Android/testbed/app/src/androidTest/java/org/python/testbed/PythonSuite.kt @@ -0,0 +1,35 @@ +package org.python.testbed + +import androidx.test.annotation.UiThreadTest +import androidx.test.platform.app.InstrumentationRegistry +import androidx.test.ext.junit.runners.AndroidJUnit4 + +import org.junit.Test +import org.junit.runner.RunWith + +import org.junit.Assert.* + + +@RunWith(AndroidJUnit4::class) +class PythonSuite { + @Test + @UiThreadTest + fun testPython() { + val start = System.currentTimeMillis() + try { + val context = + InstrumentationRegistry.getInstrumentation().targetContext + val args = + InstrumentationRegistry.getArguments().getString("pythonArgs", "") + val status = PythonTestRunner(context).run(args) + assertEquals(0, status) + } finally { + // Make sure the process lives long enough for the test script to + // detect it (see `find_pid` in android.py). + val delay = 2000 - (System.currentTimeMillis() - start) + if (delay > 0) { + Thread.sleep(delay) + } + } + } +} \ No newline at end of file diff --git a/Python/optimizer.c b/Python/optimizer.c index 5f89fa8905ad54..54c00d4b4c0e28 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -184,7 +184,7 @@ _PyOptimizer_Optimize( return err; } - if (++executors_created >= 5) { + if (++executors_created >= 10) { executors_created = 0; _Py_Executors_InvalidateOld(interp, 0); } From 6dcd2dc7cf33e553dbab2d0b5c402a1b231e74d2 Mon Sep 17 00:00:00 2001 From: Savannah Ostrowski Date: Tue, 27 Aug 2024 11:06:57 -0700 Subject: [PATCH 34/61] new line --- .../app/src/androidTest/java/org/python/testbed/PythonSuite.kt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Android/testbed/app/src/androidTest/java/org/python/testbed/PythonSuite.kt b/Android/testbed/app/src/androidTest/java/org/python/testbed/PythonSuite.kt index 282ad35f4d6890..0e888ab71d87da 100644 --- a/Android/testbed/app/src/androidTest/java/org/python/testbed/PythonSuite.kt +++ b/Android/testbed/app/src/androidTest/java/org/python/testbed/PythonSuite.kt @@ -32,4 +32,4 @@ class PythonSuite { } } } -} \ No newline at end of file +} From 219f890a05326d1411becb1f6d9680270c7258fe Mon Sep 17 00:00:00 2001 From: Savannah Ostrowski Date: Tue, 27 Aug 2024 14:40:34 -0700 Subject: [PATCH 35/61] update constant --- Python/optimizer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/optimizer.c b/Python/optimizer.c index 54c00d4b4c0e28..6cd5c5ab99ae29 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -1202,7 +1202,7 @@ make_executor_from_uops(_PyUOpInstruction *buffer, int length, const _PyBloomFil executor->jit_code = NULL; executor->jit_side_entry = NULL; executor->jit_size = 0; - executor->run_count = __UINT32_MAX__; + executor->run_count = UINT32_MAX; if (_PyJIT_Compile(executor, executor->trace, length)) { Py_DECREF(executor); return NULL; From fb7b04d7953cfd7f136d0d28dee36c54e97b32fc Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Tue, 27 Aug 2024 21:44:17 +0000 Subject: [PATCH 36/61] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20b?= =?UTF-8?q?lurb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../2024-08-27-21-44-14.gh-issue-116017.ZY3yBY.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2024-08-27-21-44-14.gh-issue-116017.ZY3yBY.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-08-27-21-44-14.gh-issue-116017.ZY3yBY.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-08-27-21-44-14.gh-issue-116017.ZY3yBY.rst new file mode 100644 index 00000000000000..d5dc84426b175a --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-08-27-21-44-14.gh-issue-116017.ZY3yBY.rst @@ -0,0 +1 @@ +Improved JIT memory consumption by invalidating cold executors From 310d20c1f1a85d6025a79392a325429a66f707c4 Mon Sep 17 00:00:00 2001 From: Savannah Ostrowski Date: Thu, 29 Aug 2024 17:59:49 -0700 Subject: [PATCH 37/61] address pr comments --- Include/internal/pycore_interp.h | 2 +- Include/internal/pycore_optimizer.h | 3 +-- Python/optimizer.c | 14 ++------------ 3 files changed, 4 insertions(+), 15 deletions(-) diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h index a1c1dd0c957230..833c51f03635a9 100644 --- a/Include/internal/pycore_interp.h +++ b/Include/internal/pycore_interp.h @@ -259,7 +259,7 @@ struct _is { struct callable_cache callable_cache; _PyOptimizerObject *optimizer; _PyExecutorObject *executor_list_head; - + size_t executors_created; _rare_events rare_events; PyDict_WatchCallback builtins_dict_watcher; diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index 547232e382b608..e0ba67ab9b1a56 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -124,17 +124,16 @@ PyAPI_FUNC(PyObject *) _PyOptimizer_NewUOpOptimizer(void); #ifdef _Py_TIER2 PyAPI_FUNC(void) _Py_Executors_InvalidateDependency(PyInterpreterState *interp, void *obj, int is_invalidation); PyAPI_FUNC(void) _Py_Executors_InvalidateAll(PyInterpreterState *interp, int is_invalidation); -PyAPI_FUNC(void) _Py_Executor_Invalidate(_PyExecutorObject *executor); PyAPI_FUNC(void) _Py_Executors_InvalidateOld(PyInterpreterState *interp, int is_invalidation); #else # define _Py_Executors_InvalidateDependency(A, B, C) ((void)0) # define _Py_Executors_InvalidateAll(A, B) ((void)0) -# define _Py_Executor_Invalidate(A) ((void)0) # define _Py_Executors_InvalidateOld(A, B) ((void)0) #endif +#define JIT_CLEANUP_THRESHOLD 10 // This is the length of the trace we project initially. #define UOP_MAX_TRACE_LENGTH 800 diff --git a/Python/optimizer.c b/Python/optimizer.c index 6cd5c5ab99ae29..0c2e333d28c6dd 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -157,7 +157,6 @@ _Py_SetTier2Optimizer(_PyOptimizerObject *optimizer) return old == NULL ? -1 : 0; } -int executors_created = 0; /* Returns 1 if optimized, 0 if not optimized, and -1 for an error. * If optimized, *executor_ptr contains a new reference to the executor */ @@ -184,8 +183,8 @@ _PyOptimizer_Optimize( return err; } - if (++executors_created >= 10) { - executors_created = 0; + if (++interp->executors_created >= JIT_CLEANUP_THRESHOLD) { + interp->executors_created = 0; _Py_Executors_InvalidateOld(interp, 0); } @@ -1668,14 +1667,6 @@ _Py_Executors_InvalidateAll(PyInterpreterState *interp, int is_invalidation) } } -void _Py_Executor_Invalidate(_PyExecutorObject *executor) -{ - if (executor->vm_data.valid) { - unlink_executor(executor); - executor_clear(executor); - } -} - void _Py_Executors_InvalidateOld(PyInterpreterState *interp, int is_invalidation) { @@ -1722,7 +1713,6 @@ _Py_Executors_InvalidateOld(PyInterpreterState *interp, int is_invalidation) } Py_DECREF(invalidate); } - return; } #endif /* _Py_TIER2 */ From d2f9dc41326b1f3991bc7028dc7a22c4087bc8ad Mon Sep 17 00:00:00 2001 From: Savannah Ostrowski Date: Thu, 29 Aug 2024 18:40:00 -0700 Subject: [PATCH 38/61] refactor invalidatecold --- Include/internal/pycore_optimizer.h | 4 +-- Python/optimizer.c | 42 ++++++++++++++--------------- 2 files changed, 22 insertions(+), 24 deletions(-) diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index e0ba67ab9b1a56..324a509513caf0 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -124,12 +124,12 @@ PyAPI_FUNC(PyObject *) _PyOptimizer_NewUOpOptimizer(void); #ifdef _Py_TIER2 PyAPI_FUNC(void) _Py_Executors_InvalidateDependency(PyInterpreterState *interp, void *obj, int is_invalidation); PyAPI_FUNC(void) _Py_Executors_InvalidateAll(PyInterpreterState *interp, int is_invalidation); -PyAPI_FUNC(void) _Py_Executors_InvalidateOld(PyInterpreterState *interp, int is_invalidation); +PyAPI_FUNC(void) _Py_Executors_InvalidateCold(PyInterpreterState *interp); #else # define _Py_Executors_InvalidateDependency(A, B, C) ((void)0) # define _Py_Executors_InvalidateAll(A, B) ((void)0) -# define _Py_Executors_InvalidateOld(A, B) ((void)0) +# define _Py_Executors_InvalidateCold(A) ((void)0) #endif diff --git a/Python/optimizer.c b/Python/optimizer.c index 0c2e333d28c6dd..38f2eb172e4e26 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -185,7 +185,7 @@ _PyOptimizer_Optimize( if (++interp->executors_created >= JIT_CLEANUP_THRESHOLD) { interp->executors_created = 0; - _Py_Executors_InvalidateOld(interp, 0); + _Py_Executors_InvalidateCold(interp); } assert(*executor_ptr != NULL); @@ -1668,16 +1668,15 @@ _Py_Executors_InvalidateAll(PyInterpreterState *interp, int is_invalidation) } void -_Py_Executors_InvalidateOld(PyInterpreterState *interp, int is_invalidation) +_Py_Executors_InvalidateCold(PyInterpreterState *interp) { /* Walk the list of executors */ /* TO DO -- Use a tree to avoid traversing as many objects */ - bool no_memory = false; PyObject *invalidate = PyList_New(0); if (invalidate == NULL) { - PyErr_Clear(); - no_memory = true; + goto error; } + int total_executors = 0; int invalidated_executors = 0; /* Clearing an executor can deallocate others, so we need to make a list of @@ -1685,34 +1684,33 @@ _Py_Executors_InvalidateOld(PyInterpreterState *interp, int is_invalidation) for (_PyExecutorObject *exec = interp->executor_list_head; exec != NULL;) { assert(exec->vm_data.valid); _PyExecutorObject *next = exec->vm_data.links.next; + total_executors++; if (exec->run_count < 1) { invalidated_executors++; unlink_executor(exec); - if (no_memory) { - exec->vm_data.valid = 0; - } else { - if (PyList_Append(invalidate, (PyObject *)exec) < 0) { - PyErr_Clear(); - no_memory = true; - exec->vm_data.valid = 0; - } - } - if (is_invalidation) { - OPT_STAT_INC(executors_invalidated); + if (PyList_Append(invalidate, (PyObject *)exec) < 0) + { + goto error; } + } else { exec->run_count = 0; } + exec = next; } - if (invalidate != NULL) { - for (Py_ssize_t i = 0; i < PyList_GET_SIZE(invalidate); i++) { - _PyExecutorObject *exec = (_PyExecutorObject *)PyList_GET_ITEM(invalidate, i); - executor_clear(exec); - } - Py_DECREF(invalidate); + for (Py_ssize_t i = 0; i < PyList_GET_SIZE(invalidate); i++) { + _PyExecutorObject *exec = (_PyExecutorObject *)PyList_GET_ITEM(invalidate, i); + executor_clear(exec); } + Py_DECREF(invalidate); + return; +error: + PyErr_Clear(); + Py_XDECREF(invalidate); + // If we're truly out of memory, wiping out everything is a fine fallback: + _Py_Executors_InvalidateAll(interp, 0); } #endif /* _Py_TIER2 */ From d755d5676147d4375596a79c531c9b2311cb5804 Mon Sep 17 00:00:00 2001 From: Savannah Ostrowski Date: Fri, 30 Aug 2024 10:01:35 -0700 Subject: [PATCH 39/61] refactor to was_run bool --- Include/internal/pycore_optimizer.h | 4 ++-- Python/bytecodes.c | 2 +- Python/executor_cases.c.h | 2 +- Python/optimizer.c | 8 +++++--- 4 files changed, 9 insertions(+), 7 deletions(-) diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index 324a509513caf0..db29e8e351f36c 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -31,7 +31,8 @@ typedef struct { uint8_t oparg; uint16_t valid:1; uint16_t linked:1; - uint16_t chain_depth:14; // Must be big engough for MAX_CHAIN_DEPTH - 1. + bool was_run:1; + uint16_t chain_depth:13; // Must be big engough for MAX_CHAIN_DEPTH - 1. int index; // Index of ENTER_EXECUTOR (if code isn't NULL, below). _PyBloomFilter bloom; _PyExecutorLinkListNode links; @@ -73,7 +74,6 @@ typedef struct _PyExecutorObject { uint32_t exit_count; uint32_t code_size; size_t jit_size; - uint32_t run_count; void *jit_code; void *jit_side_entry; _PyExitData exits[1]; diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 1a8827a0c5ec8b..3c2f95b5ff7bbc 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -4845,7 +4845,7 @@ dummy_func( } tier2 op(_INCREMENT_RUN_COUNT, (--)) { - current_executor->run_count++; + current_executor->vm_data.was_run = true; } tier2 op(_FATAL_ERROR, (--)) { diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 578272a77f370a..436a1070b0b994 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -5463,7 +5463,7 @@ } case _INCREMENT_RUN_COUNT: { - current_executor->run_count++; + current_executor->vm_data.was_run = true; break; } diff --git a/Python/optimizer.c b/Python/optimizer.c index 38f2eb172e4e26..6f01d2ee194ac1 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -1201,7 +1201,9 @@ make_executor_from_uops(_PyUOpInstruction *buffer, int length, const _PyBloomFil executor->jit_code = NULL; executor->jit_side_entry = NULL; executor->jit_size = 0; - executor->run_count = UINT32_MAX; + // This is initialized to true so we can prevent the executor + // from being immediately detected as cold and invalidated. + executor->vm_data.was_run = true; if (_PyJIT_Compile(executor, executor->trace, length)) { Py_DECREF(executor); return NULL; @@ -1686,7 +1688,7 @@ _Py_Executors_InvalidateCold(PyInterpreterState *interp) _PyExecutorObject *next = exec->vm_data.links.next; total_executors++; - if (exec->run_count < 1) { + if (!exec->vm_data.was_run) { invalidated_executors++; unlink_executor(exec); if (PyList_Append(invalidate, (PyObject *)exec) < 0) @@ -1695,7 +1697,7 @@ _Py_Executors_InvalidateCold(PyInterpreterState *interp) } } else { - exec->run_count = 0; + exec->vm_data.was_run = false; } exec = next; From c9534c09c9feee32740fe6bab5b8ab9ec6ebb1e8 Mon Sep 17 00:00:00 2001 From: Savannah Ostrowski Date: Fri, 30 Aug 2024 10:10:51 -0700 Subject: [PATCH 40/61] update blurb --- .../2024-08-27-21-44-14.gh-issue-116017.ZY3yBY.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-08-27-21-44-14.gh-issue-116017.ZY3yBY.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-08-27-21-44-14.gh-issue-116017.ZY3yBY.rst index d5dc84426b175a..de62875e16475d 100644 --- a/Misc/NEWS.d/next/Core_and_Builtins/2024-08-27-21-44-14.gh-issue-116017.ZY3yBY.rst +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-08-27-21-44-14.gh-issue-116017.ZY3yBY.rst @@ -1 +1,2 @@ -Improved JIT memory consumption by invalidating cold executors +Improved JIT memory consumption by periodically freeing memory used by infrequently-executed code. +This change is especially likely to improve the memory footprint of long-running programs. From e5065adec10716da21d856879163b2217ace4977 Mon Sep 17 00:00:00 2001 From: Savannah Ostrowski Date: Fri, 30 Aug 2024 10:31:50 -0700 Subject: [PATCH 41/61] Update op name to be more reflective of was_run --- Include/internal/pycore_uop_ids.h | 138 ++++++++++++------------- Include/internal/pycore_uop_metadata.h | 6 +- Python/bytecodes.c | 2 +- Python/executor_cases.c.h | 2 +- Python/optimizer.c | 6 +- Python/optimizer_cases.c.h | 2 +- 6 files changed, 76 insertions(+), 80 deletions(-) diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index a62cf527db373d..3c1134cb52d193 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -138,14 +138,13 @@ extern "C" { #define _GUARD_TYPE_VERSION 385 #define _IMPORT_FROM IMPORT_FROM #define _IMPORT_NAME IMPORT_NAME -#define _INCREMENT_RUN_COUNT 386 -#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 387 -#define _INIT_CALL_PY_EXACT_ARGS 388 -#define _INIT_CALL_PY_EXACT_ARGS_0 389 -#define _INIT_CALL_PY_EXACT_ARGS_1 390 -#define _INIT_CALL_PY_EXACT_ARGS_2 391 -#define _INIT_CALL_PY_EXACT_ARGS_3 392 -#define _INIT_CALL_PY_EXACT_ARGS_4 393 +#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 386 +#define _INIT_CALL_PY_EXACT_ARGS 387 +#define _INIT_CALL_PY_EXACT_ARGS_0 388 +#define _INIT_CALL_PY_EXACT_ARGS_1 389 +#define _INIT_CALL_PY_EXACT_ARGS_2 390 +#define _INIT_CALL_PY_EXACT_ARGS_3 391 +#define _INIT_CALL_PY_EXACT_ARGS_4 392 #define _INSTRUMENTED_CALL_FUNCTION_EX INSTRUMENTED_CALL_FUNCTION_EX #define _INSTRUMENTED_CALL_KW INSTRUMENTED_CALL_KW #define _INSTRUMENTED_FOR_ITER INSTRUMENTED_FOR_ITER @@ -157,65 +156,65 @@ extern "C" { #define _INSTRUMENTED_POP_JUMP_IF_NONE INSTRUMENTED_POP_JUMP_IF_NONE #define _INSTRUMENTED_POP_JUMP_IF_NOT_NONE INSTRUMENTED_POP_JUMP_IF_NOT_NONE #define _INSTRUMENTED_POP_JUMP_IF_TRUE INSTRUMENTED_POP_JUMP_IF_TRUE -#define _INTERNAL_INCREMENT_OPT_COUNTER 394 -#define _IS_NONE 395 +#define _INTERNAL_INCREMENT_OPT_COUNTER 393 +#define _IS_NONE 394 #define _IS_OP IS_OP -#define _ITER_CHECK_LIST 396 -#define _ITER_CHECK_RANGE 397 -#define _ITER_CHECK_TUPLE 398 -#define _ITER_JUMP_LIST 399 -#define _ITER_JUMP_RANGE 400 -#define _ITER_JUMP_TUPLE 401 -#define _ITER_NEXT_LIST 402 -#define _ITER_NEXT_RANGE 403 -#define _ITER_NEXT_TUPLE 404 -#define _JUMP_TO_TOP 405 +#define _ITER_CHECK_LIST 395 +#define _ITER_CHECK_RANGE 396 +#define _ITER_CHECK_TUPLE 397 +#define _ITER_JUMP_LIST 398 +#define _ITER_JUMP_RANGE 399 +#define _ITER_JUMP_TUPLE 400 +#define _ITER_NEXT_LIST 401 +#define _ITER_NEXT_RANGE 402 +#define _ITER_NEXT_TUPLE 403 +#define _JUMP_TO_TOP 404 #define _LIST_APPEND LIST_APPEND #define _LIST_EXTEND LIST_EXTEND -#define _LOAD_ATTR 406 -#define _LOAD_ATTR_CLASS 407 -#define _LOAD_ATTR_CLASS_0 408 -#define _LOAD_ATTR_CLASS_1 409 +#define _LOAD_ATTR 405 +#define _LOAD_ATTR_CLASS 406 +#define _LOAD_ATTR_CLASS_0 407 +#define _LOAD_ATTR_CLASS_1 408 #define _LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN -#define _LOAD_ATTR_INSTANCE_VALUE 410 -#define _LOAD_ATTR_INSTANCE_VALUE_0 411 -#define _LOAD_ATTR_INSTANCE_VALUE_1 412 -#define _LOAD_ATTR_METHOD_LAZY_DICT 413 -#define _LOAD_ATTR_METHOD_NO_DICT 414 -#define _LOAD_ATTR_METHOD_WITH_VALUES 415 -#define _LOAD_ATTR_MODULE 416 -#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 417 -#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 418 -#define _LOAD_ATTR_PROPERTY_FRAME 419 -#define _LOAD_ATTR_SLOT 420 -#define _LOAD_ATTR_SLOT_0 421 -#define _LOAD_ATTR_SLOT_1 422 -#define _LOAD_ATTR_WITH_HINT 423 +#define _LOAD_ATTR_INSTANCE_VALUE 409 +#define _LOAD_ATTR_INSTANCE_VALUE_0 410 +#define _LOAD_ATTR_INSTANCE_VALUE_1 411 +#define _LOAD_ATTR_METHOD_LAZY_DICT 412 +#define _LOAD_ATTR_METHOD_NO_DICT 413 +#define _LOAD_ATTR_METHOD_WITH_VALUES 414 +#define _LOAD_ATTR_MODULE 415 +#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 416 +#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 417 +#define _LOAD_ATTR_PROPERTY_FRAME 418 +#define _LOAD_ATTR_SLOT 419 +#define _LOAD_ATTR_SLOT_0 420 +#define _LOAD_ATTR_SLOT_1 421 +#define _LOAD_ATTR_WITH_HINT 422 #define _LOAD_BUILD_CLASS LOAD_BUILD_CLASS #define _LOAD_COMMON_CONSTANT LOAD_COMMON_CONSTANT #define _LOAD_CONST LOAD_CONST -#define _LOAD_CONST_INLINE 424 -#define _LOAD_CONST_INLINE_BORROW 425 -#define _LOAD_CONST_INLINE_BORROW_WITH_NULL 426 -#define _LOAD_CONST_INLINE_WITH_NULL 427 +#define _LOAD_CONST_INLINE 423 +#define _LOAD_CONST_INLINE_BORROW 424 +#define _LOAD_CONST_INLINE_BORROW_WITH_NULL 425 +#define _LOAD_CONST_INLINE_WITH_NULL 426 #define _LOAD_DEREF LOAD_DEREF -#define _LOAD_FAST 428 -#define _LOAD_FAST_0 429 -#define _LOAD_FAST_1 430 -#define _LOAD_FAST_2 431 -#define _LOAD_FAST_3 432 -#define _LOAD_FAST_4 433 -#define _LOAD_FAST_5 434 -#define _LOAD_FAST_6 435 -#define _LOAD_FAST_7 436 +#define _LOAD_FAST 427 +#define _LOAD_FAST_0 428 +#define _LOAD_FAST_1 429 +#define _LOAD_FAST_2 430 +#define _LOAD_FAST_3 431 +#define _LOAD_FAST_4 432 +#define _LOAD_FAST_5 433 +#define _LOAD_FAST_6 434 +#define _LOAD_FAST_7 435 #define _LOAD_FAST_AND_CLEAR LOAD_FAST_AND_CLEAR #define _LOAD_FAST_CHECK LOAD_FAST_CHECK #define _LOAD_FAST_LOAD_FAST LOAD_FAST_LOAD_FAST #define _LOAD_FROM_DICT_OR_DEREF LOAD_FROM_DICT_OR_DEREF #define _LOAD_FROM_DICT_OR_GLOBALS LOAD_FROM_DICT_OR_GLOBALS -#define _LOAD_GLOBAL 437 -#define _LOAD_GLOBAL_BUILTINS 438 -#define _LOAD_GLOBAL_MODULE 439 +#define _LOAD_GLOBAL 436 +#define _LOAD_GLOBAL_BUILTINS 437 +#define _LOAD_GLOBAL_MODULE 438 #define _LOAD_LOCALS LOAD_LOCALS #define _LOAD_NAME LOAD_NAME #define _LOAD_SPECIAL LOAD_SPECIAL @@ -228,32 +227,33 @@ extern "C" { #define _MATCH_KEYS MATCH_KEYS #define _MATCH_MAPPING MATCH_MAPPING #define _MATCH_SEQUENCE MATCH_SEQUENCE -#define _MAYBE_EXPAND_METHOD 440 -#define _MONITOR_CALL 441 -#define _MONITOR_JUMP_BACKWARD 442 -#define _MONITOR_RESUME 443 +#define _MAYBE_EXPAND_METHOD 439 +#define _MONITOR_CALL 440 +#define _MONITOR_JUMP_BACKWARD 441 +#define _MONITOR_RESUME 442 #define _NOP NOP #define _POP_EXCEPT POP_EXCEPT -#define _POP_JUMP_IF_FALSE 444 -#define _POP_JUMP_IF_TRUE 445 +#define _POP_JUMP_IF_FALSE 443 +#define _POP_JUMP_IF_TRUE 444 #define _POP_TOP POP_TOP -#define _POP_TOP_LOAD_CONST_INLINE_BORROW 446 +#define _POP_TOP_LOAD_CONST_INLINE_BORROW 445 #define _PUSH_EXC_INFO PUSH_EXC_INFO -#define _PUSH_FRAME 447 +#define _PUSH_FRAME 446 #define _PUSH_NULL PUSH_NULL -#define _PY_FRAME_GENERAL 448 -#define _PY_FRAME_KW 449 -#define _QUICKEN_RESUME 450 -#define _REPLACE_WITH_TRUE 451 +#define _PY_FRAME_GENERAL 447 +#define _PY_FRAME_KW 448 +#define _QUICKEN_RESUME 449 +#define _REPLACE_WITH_TRUE 450 #define _RESUME_CHECK RESUME_CHECK #define _RETURN_GENERATOR RETURN_GENERATOR #define _RETURN_VALUE RETURN_VALUE -#define _SAVE_RETURN_OFFSET 452 -#define _SEND 453 -#define _SEND_GEN_FRAME 454 +#define _SAVE_RETURN_OFFSET 451 +#define _SEND 452 +#define _SEND_GEN_FRAME 453 #define _SETUP_ANNOTATIONS SETUP_ANNOTATIONS #define _SET_ADD SET_ADD #define _SET_FUNCTION_ATTRIBUTE SET_FUNCTION_ATTRIBUTE +#define _SET_RUN_STATE 454 #define _SET_UPDATE SET_UPDATE #define _START_EXECUTOR 455 #define _STORE_ATTR 456 diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 63a0446da7914f..6aeccd23cdbaa3 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -274,7 +274,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_INTERNAL_INCREMENT_OPT_COUNTER] = 0, [_DYNAMIC_EXIT] = HAS_ESCAPES_FLAG, [_START_EXECUTOR] = 0, - [_INCREMENT_RUN_COUNT] = 0, + [_SET_RUN_STATE] = 0, [_FATAL_ERROR] = 0, [_CHECK_VALIDITY_AND_SET_IP] = HAS_DEOPT_FLAG, [_DEOPT] = 0, @@ -414,7 +414,6 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_GUARD_TYPE_VERSION] = "_GUARD_TYPE_VERSION", [_IMPORT_FROM] = "_IMPORT_FROM", [_IMPORT_NAME] = "_IMPORT_NAME", - [_INCREMENT_RUN_COUNT] = "_INCREMENT_RUN_COUNT", [_INIT_CALL_BOUND_METHOD_EXACT_ARGS] = "_INIT_CALL_BOUND_METHOD_EXACT_ARGS", [_INIT_CALL_PY_EXACT_ARGS] = "_INIT_CALL_PY_EXACT_ARGS", [_INIT_CALL_PY_EXACT_ARGS_0] = "_INIT_CALL_PY_EXACT_ARGS_0", @@ -508,6 +507,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_SET_ADD] = "_SET_ADD", [_SET_FUNCTION_ATTRIBUTE] = "_SET_FUNCTION_ATTRIBUTE", [_SET_IP] = "_SET_IP", + [_SET_RUN_STATE] = "_SET_RUN_STATE", [_SET_UPDATE] = "_SET_UPDATE", [_START_EXECUTOR] = "_START_EXECUTOR", [_STORE_ATTR] = "_STORE_ATTR", @@ -1064,7 +1064,7 @@ int _PyUop_num_popped(int opcode, int oparg) return 0; case _START_EXECUTOR: return 0; - case _INCREMENT_RUN_COUNT: + case _SET_RUN_STATE: return 0; case _FATAL_ERROR: return 0; diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 3c2f95b5ff7bbc..c636e4589d32b5 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -4844,7 +4844,7 @@ dummy_func( assert(((_PyExecutorObject *)executor)->vm_data.valid); } - tier2 op(_INCREMENT_RUN_COUNT, (--)) { + tier2 op(_SET_RUN_STATE, (--)) { current_executor->vm_data.was_run = true; } diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 436a1070b0b994..d3cda33634b03a 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -5462,7 +5462,7 @@ break; } - case _INCREMENT_RUN_COUNT: { + case _SET_RUN_STATE: { current_executor->vm_data.was_run = true; break; } diff --git a/Python/optimizer.c b/Python/optimizer.c index 6f01d2ee194ac1..fb3e2e1107d0c2 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -571,7 +571,7 @@ translate_bytecode_to_trace( code->co_firstlineno, 2 * INSTR_IP(initial_instr, code)); ADD_TO_TRACE(_START_EXECUTOR, 0, (uintptr_t)instr, INSTR_IP(instr, code)); - ADD_TO_TRACE(_INCREMENT_RUN_COUNT, 0, 0, 0); + ADD_TO_TRACE(_SET_RUN_STATE, 0, 0, 0); uint32_t target = 0; for (;;) { @@ -1679,17 +1679,13 @@ _Py_Executors_InvalidateCold(PyInterpreterState *interp) goto error; } - int total_executors = 0; - int invalidated_executors = 0; /* Clearing an executor can deallocate others, so we need to make a list of * executors to invalidate first */ for (_PyExecutorObject *exec = interp->executor_list_head; exec != NULL;) { assert(exec->vm_data.valid); _PyExecutorObject *next = exec->vm_data.links.next; - total_executors++; if (!exec->vm_data.was_run) { - invalidated_executors++; unlink_executor(exec); if (PyList_Append(invalidate, (PyObject *)exec) < 0) { diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 92c5e4e4783239..5872573b21676a 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -2375,7 +2375,7 @@ break; } - case _INCREMENT_RUN_COUNT: { + case _SET_RUN_STATE: { break; } From 2d092598872986d709a85b9a995086428270cb5c Mon Sep 17 00:00:00 2001 From: Savannah Ostrowski Date: Fri, 30 Aug 2024 10:33:47 -0700 Subject: [PATCH 42/61] fix typo --- Include/internal/pycore_optimizer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index db29e8e351f36c..a03f639e76c6fc 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -32,7 +32,7 @@ typedef struct { uint16_t valid:1; uint16_t linked:1; bool was_run:1; - uint16_t chain_depth:13; // Must be big engough for MAX_CHAIN_DEPTH - 1. + uint16_t chain_depth:13; // Must be big enough for MAX_CHAIN_DEPTH - 1. int index; // Index of ENTER_EXECUTOR (if code isn't NULL, below). _PyBloomFilter bloom; _PyExecutorLinkListNode links; From d2e8e2934b36600f1c43ec8345e3e9e2d2bcf77b Mon Sep 17 00:00:00 2001 From: Savannah Ostrowski Date: Fri, 30 Aug 2024 13:35:35 -0700 Subject: [PATCH 43/61] rename uop --- Include/internal/pycore_uop_ids.h | 2 +- Include/internal/pycore_uop_metadata.h | 6 +++--- Python/bytecodes.c | 2 +- Python/executor_cases.c.h | 2 +- Python/optimizer.c | 2 +- Python/optimizer_cases.c.h | 2 +- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index 3c1134cb52d193..6ba5c1f24440ac 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -252,8 +252,8 @@ extern "C" { #define _SEND_GEN_FRAME 453 #define _SETUP_ANNOTATIONS SETUP_ANNOTATIONS #define _SET_ADD SET_ADD +#define _SET_EXECUTOR_RUN_STATE 454 #define _SET_FUNCTION_ATTRIBUTE SET_FUNCTION_ATTRIBUTE -#define _SET_RUN_STATE 454 #define _SET_UPDATE SET_UPDATE #define _START_EXECUTOR 455 #define _STORE_ATTR 456 diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 6aeccd23cdbaa3..e16ac959df8b4b 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -274,7 +274,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_INTERNAL_INCREMENT_OPT_COUNTER] = 0, [_DYNAMIC_EXIT] = HAS_ESCAPES_FLAG, [_START_EXECUTOR] = 0, - [_SET_RUN_STATE] = 0, + [_SET_EXECUTOR_RUN_STATE] = 0, [_FATAL_ERROR] = 0, [_CHECK_VALIDITY_AND_SET_IP] = HAS_DEOPT_FLAG, [_DEOPT] = 0, @@ -505,9 +505,9 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_SEND_GEN_FRAME] = "_SEND_GEN_FRAME", [_SETUP_ANNOTATIONS] = "_SETUP_ANNOTATIONS", [_SET_ADD] = "_SET_ADD", + [_SET_EXECUTOR_RUN_STATE] = "_SET_EXECUTOR_RUN_STATE", [_SET_FUNCTION_ATTRIBUTE] = "_SET_FUNCTION_ATTRIBUTE", [_SET_IP] = "_SET_IP", - [_SET_RUN_STATE] = "_SET_RUN_STATE", [_SET_UPDATE] = "_SET_UPDATE", [_START_EXECUTOR] = "_START_EXECUTOR", [_STORE_ATTR] = "_STORE_ATTR", @@ -1064,7 +1064,7 @@ int _PyUop_num_popped(int opcode, int oparg) return 0; case _START_EXECUTOR: return 0; - case _SET_RUN_STATE: + case _SET_EXECUTOR_RUN_STATE: return 0; case _FATAL_ERROR: return 0; diff --git a/Python/bytecodes.c b/Python/bytecodes.c index c636e4589d32b5..338b52872708ed 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -4844,7 +4844,7 @@ dummy_func( assert(((_PyExecutorObject *)executor)->vm_data.valid); } - tier2 op(_SET_RUN_STATE, (--)) { + tier2 op(_SET_EXECUTOR_RUN_STATE, (--)) { current_executor->vm_data.was_run = true; } diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index d3cda33634b03a..48b9611ea300dd 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -5462,7 +5462,7 @@ break; } - case _SET_RUN_STATE: { + case _SET_EXECUTOR_RUN_STATE: { current_executor->vm_data.was_run = true; break; } diff --git a/Python/optimizer.c b/Python/optimizer.c index fb3e2e1107d0c2..bf753e8a9e5ba9 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -571,7 +571,7 @@ translate_bytecode_to_trace( code->co_firstlineno, 2 * INSTR_IP(initial_instr, code)); ADD_TO_TRACE(_START_EXECUTOR, 0, (uintptr_t)instr, INSTR_IP(instr, code)); - ADD_TO_TRACE(_SET_RUN_STATE, 0, 0, 0); + ADD_TO_TRACE(_SET_EXECUTOR_RUN_STATE, 0, 0, 0); uint32_t target = 0; for (;;) { diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 5872573b21676a..72871d711b75fe 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -2375,7 +2375,7 @@ break; } - case _SET_RUN_STATE: { + case _SET_EXECUTOR_RUN_STATE: { break; } From a894598209d99b3a0394fa96603ad78d4ad95382 Mon Sep 17 00:00:00 2001 From: Savannah Ostrowski Date: Sat, 31 Aug 2024 16:33:34 -0700 Subject: [PATCH 44/61] dedent and initialize executors_created --- Python/optimizer.c | 5 +++-- Python/pystate.c | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/Python/optimizer.c b/Python/optimizer.c index bf753e8a9e5ba9..2c992362b550e2 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -1667,6 +1667,7 @@ _Py_Executors_InvalidateAll(PyInterpreterState *interp, int is_invalidation) OPT_STAT_INC(executors_invalidated); } } + interp->executors_created=0; } void @@ -1702,8 +1703,8 @@ _Py_Executors_InvalidateCold(PyInterpreterState *interp) _PyExecutorObject *exec = (_PyExecutorObject *)PyList_GET_ITEM(invalidate, i); executor_clear(exec); } - Py_DECREF(invalidate); - return; + Py_DECREF(invalidate); + return; error: PyErr_Clear(); Py_XDECREF(invalidate); diff --git a/Python/pystate.c b/Python/pystate.c index 54caf373e91d6c..a39a4640a478d6 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -660,6 +660,7 @@ init_interpreter(PyInterpreterState *interp, #ifdef _Py_TIER2 (void)_Py_SetOptimizer(interp, NULL); interp->executor_list_head = NULL; + interp->executors_created = 0; #endif if (interp != &runtime->_main_interpreter) { /* Fix the self-referential, statically initialized fields. */ From 1927bfeef58a46d18dfbc31c6f9b10b3d8175bbd Mon Sep 17 00:00:00 2001 From: Savannah Ostrowski Date: Sun, 1 Sep 2024 09:57:56 -0700 Subject: [PATCH 45/61] address some PR comments --- Include/internal/pycore_interp.h | 1 + Include/internal/pycore_optimizer.h | 2 +- Python/optimizer.c | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h index 833c51f03635a9..6bc4f33186b628 100644 --- a/Include/internal/pycore_interp.h +++ b/Include/internal/pycore_interp.h @@ -259,6 +259,7 @@ struct _is { struct callable_cache callable_cache; _PyOptimizerObject *optimizer; _PyExecutorObject *executor_list_head; + // executors_created is limited to JIT_CLEANUP_THRESHOLD size_t executors_created; _rare_events rare_events; PyDict_WatchCallback builtins_dict_watcher; diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index a03f639e76c6fc..1e364f71fe260c 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -31,7 +31,7 @@ typedef struct { uint8_t oparg; uint16_t valid:1; uint16_t linked:1; - bool was_run:1; + uint16_t was_run:1; uint16_t chain_depth:13; // Must be big enough for MAX_CHAIN_DEPTH - 1. int index; // Index of ENTER_EXECUTOR (if code isn't NULL, below). _PyBloomFilter bloom; diff --git a/Python/optimizer.c b/Python/optimizer.c index 2c992362b550e2..f08741003e95f0 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -1667,7 +1667,7 @@ _Py_Executors_InvalidateAll(PyInterpreterState *interp, int is_invalidation) OPT_STAT_INC(executors_invalidated); } } - interp->executors_created=0; + interp->executors_created = 0; } void From 8ee0d7fd98076cd37c1b1725c05be22f88510689 Mon Sep 17 00:00:00 2001 From: Savannah Ostrowski Date: Sun, 1 Sep 2024 09:59:32 -0700 Subject: [PATCH 46/61] Update Python/optimizer.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> --- Python/optimizer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Python/optimizer.c b/Python/optimizer.c index f08741003e95f0..ecde1806bfd5a8 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -1692,8 +1692,8 @@ _Py_Executors_InvalidateCold(PyInterpreterState *interp) { goto error; } - - } else { + } + else { exec->vm_data.was_run = false; } From cedd65d9dc5049377255bc348c9ee1ecf4b8ec58 Mon Sep 17 00:00:00 2001 From: Savannah Ostrowski Date: Sun, 1 Sep 2024 10:03:44 -0700 Subject: [PATCH 47/61] Update Python/optimizer.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> --- Python/optimizer.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Python/optimizer.c b/Python/optimizer.c index ecde1806bfd5a8..9966bd34d52cc5 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -1688,8 +1688,7 @@ _Py_Executors_InvalidateCold(PyInterpreterState *interp) if (!exec->vm_data.was_run) { unlink_executor(exec); - if (PyList_Append(invalidate, (PyObject *)exec) < 0) - { + if (PyList_Append(invalidate, (PyObject *)exec) < 0) { goto error; } } From 0a9b5b693b4c1bb3ee3a1b52513dd84e2dcf0b4a Mon Sep 17 00:00:00 2001 From: Savannah Ostrowski Date: Sun, 1 Sep 2024 10:09:07 -0700 Subject: [PATCH 48/61] make was_run uint` --- Python/bytecodes.c | 2 +- Python/executor_cases.c.h | 2 +- Python/optimizer.c | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 338b52872708ed..558d72f521bd6e 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -4845,7 +4845,7 @@ dummy_func( } tier2 op(_SET_EXECUTOR_RUN_STATE, (--)) { - current_executor->vm_data.was_run = true; + current_executor->vm_data.was_run = 1; } tier2 op(_FATAL_ERROR, (--)) { diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 48b9611ea300dd..9ca81fb711844d 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -5463,7 +5463,7 @@ } case _SET_EXECUTOR_RUN_STATE: { - current_executor->vm_data.was_run = true; + current_executor->vm_data.was_run = 1; break; } diff --git a/Python/optimizer.c b/Python/optimizer.c index 9966bd34d52cc5..2be8bb1acff29c 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -1203,7 +1203,7 @@ make_executor_from_uops(_PyUOpInstruction *buffer, int length, const _PyBloomFil executor->jit_size = 0; // This is initialized to true so we can prevent the executor // from being immediately detected as cold and invalidated. - executor->vm_data.was_run = true; + executor->vm_data.was_run = 1; if (_PyJIT_Compile(executor, executor->trace, length)) { Py_DECREF(executor); return NULL; @@ -1693,7 +1693,7 @@ _Py_Executors_InvalidateCold(PyInterpreterState *interp) } } else { - exec->vm_data.was_run = false; + exec->vm_data.was_run = 0; } exec = next; From 180a68e6fd68db1f30b3a4d138072bf5be32a385 Mon Sep 17 00:00:00 2001 From: Savannah Ostrowski Date: Sun, 1 Sep 2024 10:13:56 -0700 Subject: [PATCH 49/61] add comment for JIT_CLEANUP_THRESHOLD --- Include/internal/pycore_optimizer.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index 1e364f71fe260c..36b11a0707976f 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -133,6 +133,9 @@ PyAPI_FUNC(void) _Py_Executors_InvalidateCold(PyInterpreterState *interp); #endif +// When new executors are created, we check to see if the number of +// executors created meets or exceeds the JIT_CLEANUP_THRESHOLD. If it +// does, we invalidate cold executors. #define JIT_CLEANUP_THRESHOLD 10 // This is the length of the trace we project initially. From 7cb9cba9deed15c174ac0eb2c4cb3f8aa5b41ab5 Mon Sep 17 00:00:00 2001 From: Savannah Ostrowski Date: Sun, 1 Sep 2024 10:17:01 -0700 Subject: [PATCH 50/61] Remove extraneous reset of executors_created --- Python/optimizer.c | 1 - 1 file changed, 1 deletion(-) diff --git a/Python/optimizer.c b/Python/optimizer.c index 2be8bb1acff29c..64786d5ef257e8 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -1667,7 +1667,6 @@ _Py_Executors_InvalidateAll(PyInterpreterState *interp, int is_invalidation) OPT_STAT_INC(executors_invalidated); } } - interp->executors_created = 0; } void From 3c593166f48584ad35483ddfe8394eae805eee25 Mon Sep 17 00:00:00 2001 From: Savannah Ostrowski Date: Tue, 3 Sep 2024 14:35:29 -0700 Subject: [PATCH 51/61] condense conditional statements --- Python/optimizer.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/Python/optimizer.c b/Python/optimizer.c index 64786d5ef257e8..fe41e88b8c6087 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -1685,11 +1685,8 @@ _Py_Executors_InvalidateCold(PyInterpreterState *interp) assert(exec->vm_data.valid); _PyExecutorObject *next = exec->vm_data.links.next; - if (!exec->vm_data.was_run) { - unlink_executor(exec); - if (PyList_Append(invalidate, (PyObject *)exec) < 0) { + if (!exec->vm_data.was_run && PyList_Append(invalidate, (PyObject *)exec) < 0) { goto error; - } } else { exec->vm_data.was_run = 0; From fe50615be0df9e0a5a71f412acc5c9ed2427d8c9 Mon Sep 17 00:00:00 2001 From: Savannah Ostrowski Date: Fri, 13 Sep 2024 12:56:12 -0700 Subject: [PATCH 52/61] Address PR comments from Brandt and Mark --- Include/internal/pycore_interp.h | 4 ++-- Include/internal/pycore_optimizer.h | 4 ++-- Include/internal/pycore_uop_ids.h | 32 +++++++++++++------------- Include/internal/pycore_uop_metadata.h | 6 ++--- Python/bytecodes.c | 4 ++-- Python/executor_cases.c.h | 4 ++-- Python/optimizer.c | 12 +++++----- Python/optimizer_cases.c.h | 2 +- Python/pystate.c | 2 +- 9 files changed, 35 insertions(+), 35 deletions(-) diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h index 6bc4f33186b628..f8a73352a3e3d6 100644 --- a/Include/internal/pycore_interp.h +++ b/Include/internal/pycore_interp.h @@ -259,8 +259,8 @@ struct _is { struct callable_cache callable_cache; _PyOptimizerObject *optimizer; _PyExecutorObject *executor_list_head; - // executors_created is limited to JIT_CLEANUP_THRESHOLD - size_t executors_created; + // new_executors is limited to JIT_CLEANUP_THRESHOLD + size_t new_executors; _rare_events rare_events; PyDict_WatchCallback builtins_dict_watcher; diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index 36b11a0707976f..4e0099b201ab30 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -31,8 +31,8 @@ typedef struct { uint8_t oparg; uint16_t valid:1; uint16_t linked:1; - uint16_t was_run:1; - uint16_t chain_depth:13; // Must be big enough for MAX_CHAIN_DEPTH - 1. + uint16_t chain_depth:6; // Must be big enough for MAX_CHAIN_DEPTH - 1. + bool warm; int index; // Index of ENTER_EXECUTOR (if code isn't NULL, below). _PyBloomFilter bloom; _PyExecutorLinkListNode links; diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index 6ba5c1f24440ac..927dae88c1fa73 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -222,37 +222,37 @@ extern "C" { #define _LOAD_SUPER_ATTR_METHOD LOAD_SUPER_ATTR_METHOD #define _MAKE_CELL MAKE_CELL #define _MAKE_FUNCTION MAKE_FUNCTION +#define _MAKE_WARM 439 #define _MAP_ADD MAP_ADD #define _MATCH_CLASS MATCH_CLASS #define _MATCH_KEYS MATCH_KEYS #define _MATCH_MAPPING MATCH_MAPPING #define _MATCH_SEQUENCE MATCH_SEQUENCE -#define _MAYBE_EXPAND_METHOD 439 -#define _MONITOR_CALL 440 -#define _MONITOR_JUMP_BACKWARD 441 -#define _MONITOR_RESUME 442 +#define _MAYBE_EXPAND_METHOD 440 +#define _MONITOR_CALL 441 +#define _MONITOR_JUMP_BACKWARD 442 +#define _MONITOR_RESUME 443 #define _NOP NOP #define _POP_EXCEPT POP_EXCEPT -#define _POP_JUMP_IF_FALSE 443 -#define _POP_JUMP_IF_TRUE 444 +#define _POP_JUMP_IF_FALSE 444 +#define _POP_JUMP_IF_TRUE 445 #define _POP_TOP POP_TOP -#define _POP_TOP_LOAD_CONST_INLINE_BORROW 445 +#define _POP_TOP_LOAD_CONST_INLINE_BORROW 446 #define _PUSH_EXC_INFO PUSH_EXC_INFO -#define _PUSH_FRAME 446 +#define _PUSH_FRAME 447 #define _PUSH_NULL PUSH_NULL -#define _PY_FRAME_GENERAL 447 -#define _PY_FRAME_KW 448 -#define _QUICKEN_RESUME 449 -#define _REPLACE_WITH_TRUE 450 +#define _PY_FRAME_GENERAL 448 +#define _PY_FRAME_KW 449 +#define _QUICKEN_RESUME 450 +#define _REPLACE_WITH_TRUE 451 #define _RESUME_CHECK RESUME_CHECK #define _RETURN_GENERATOR RETURN_GENERATOR #define _RETURN_VALUE RETURN_VALUE -#define _SAVE_RETURN_OFFSET 451 -#define _SEND 452 -#define _SEND_GEN_FRAME 453 +#define _SAVE_RETURN_OFFSET 452 +#define _SEND 453 +#define _SEND_GEN_FRAME 454 #define _SETUP_ANNOTATIONS SETUP_ANNOTATIONS #define _SET_ADD SET_ADD -#define _SET_EXECUTOR_RUN_STATE 454 #define _SET_FUNCTION_ATTRIBUTE SET_FUNCTION_ATTRIBUTE #define _SET_UPDATE SET_UPDATE #define _START_EXECUTOR 455 diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 2647aa618e88ef..07606135d7a356 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -274,7 +274,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_INTERNAL_INCREMENT_OPT_COUNTER] = 0, [_DYNAMIC_EXIT] = HAS_ESCAPES_FLAG, [_START_EXECUTOR] = 0, - [_SET_EXECUTOR_RUN_STATE] = 0, + [_MAKE_WARM] = 0, [_FATAL_ERROR] = 0, [_CHECK_VALIDITY_AND_SET_IP] = HAS_DEOPT_FLAG, [_DEOPT] = 0, @@ -482,6 +482,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_LOAD_SUPER_ATTR_METHOD] = "_LOAD_SUPER_ATTR_METHOD", [_MAKE_CELL] = "_MAKE_CELL", [_MAKE_FUNCTION] = "_MAKE_FUNCTION", + [_MAKE_WARM] = "_MAKE_WARM", [_MAP_ADD] = "_MAP_ADD", [_MATCH_CLASS] = "_MATCH_CLASS", [_MATCH_KEYS] = "_MATCH_KEYS", @@ -505,7 +506,6 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_SEND_GEN_FRAME] = "_SEND_GEN_FRAME", [_SETUP_ANNOTATIONS] = "_SETUP_ANNOTATIONS", [_SET_ADD] = "_SET_ADD", - [_SET_EXECUTOR_RUN_STATE] = "_SET_EXECUTOR_RUN_STATE", [_SET_FUNCTION_ATTRIBUTE] = "_SET_FUNCTION_ATTRIBUTE", [_SET_IP] = "_SET_IP", [_SET_UPDATE] = "_SET_UPDATE", @@ -1064,7 +1064,7 @@ int _PyUop_num_popped(int opcode, int oparg) return 0; case _START_EXECUTOR: return 0; - case _SET_EXECUTOR_RUN_STATE: + case _MAKE_WARM: return 0; case _FATAL_ERROR: return 0; diff --git a/Python/bytecodes.c b/Python/bytecodes.c index dde4de3626f42c..71ac5fdf3b9712 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -4840,8 +4840,8 @@ dummy_func( assert(((_PyExecutorObject *)executor)->vm_data.valid); } - tier2 op(_SET_EXECUTOR_RUN_STATE, (--)) { - current_executor->vm_data.was_run = 1; + tier2 op(_MAKE_WARM, (--)) { + current_executor->vm_data.warm = true; } tier2 op(_FATAL_ERROR, (--)) { diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index cfa6827df67aa0..e26c83214d985f 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -5458,8 +5458,8 @@ break; } - case _SET_EXECUTOR_RUN_STATE: { - current_executor->vm_data.was_run = 1; + case _MAKE_WARM: { + current_executor->vm_data.warm = true; break; } diff --git a/Python/optimizer.c b/Python/optimizer.c index fe41e88b8c6087..d90b2884a439ff 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -183,8 +183,8 @@ _PyOptimizer_Optimize( return err; } - if (++interp->executors_created >= JIT_CLEANUP_THRESHOLD) { - interp->executors_created = 0; + if (++interp->new_executors >= JIT_CLEANUP_THRESHOLD) { + interp->new_executors = 0; _Py_Executors_InvalidateCold(interp); } @@ -571,7 +571,7 @@ translate_bytecode_to_trace( code->co_firstlineno, 2 * INSTR_IP(initial_instr, code)); ADD_TO_TRACE(_START_EXECUTOR, 0, (uintptr_t)instr, INSTR_IP(instr, code)); - ADD_TO_TRACE(_SET_EXECUTOR_RUN_STATE, 0, 0, 0); + ADD_TO_TRACE(_MAKE_WARM, 0, 0, 0); uint32_t target = 0; for (;;) { @@ -1203,7 +1203,7 @@ make_executor_from_uops(_PyUOpInstruction *buffer, int length, const _PyBloomFil executor->jit_size = 0; // This is initialized to true so we can prevent the executor // from being immediately detected as cold and invalidated. - executor->vm_data.was_run = 1; + executor->vm_data.warm = true; if (_PyJIT_Compile(executor, executor->trace, length)) { Py_DECREF(executor); return NULL; @@ -1685,11 +1685,11 @@ _Py_Executors_InvalidateCold(PyInterpreterState *interp) assert(exec->vm_data.valid); _PyExecutorObject *next = exec->vm_data.links.next; - if (!exec->vm_data.was_run && PyList_Append(invalidate, (PyObject *)exec) < 0) { + if (!exec->vm_data.warm && PyList_Append(invalidate, (PyObject *)exec) < 0) { goto error; } else { - exec->vm_data.was_run = 0; + exec->vm_data.warm = false; } exec = next; diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 72871d711b75fe..9469340bb1bc5e 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -2375,7 +2375,7 @@ break; } - case _SET_EXECUTOR_RUN_STATE: { + case _MAKE_WARM: { break; } diff --git a/Python/pystate.c b/Python/pystate.c index a39a4640a478d6..0bccb75ec5d56a 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -660,7 +660,7 @@ init_interpreter(PyInterpreterState *interp, #ifdef _Py_TIER2 (void)_Py_SetOptimizer(interp, NULL); interp->executor_list_head = NULL; - interp->executors_created = 0; + interp->new_executors = 0; #endif if (interp != &runtime->_main_interpreter) { /* Fix the self-referential, statically initialized fields. */ From 5961103a1fee40c07b9d1d6d64adbb61c8b45567 Mon Sep 17 00:00:00 2001 From: Savannah Ostrowski Date: Tue, 17 Sep 2024 16:16:30 -0700 Subject: [PATCH 53/61] Refactor to use eval breaker --- Include/cpython/pystate.h | 3 ++- Include/internal/pycore_ceval.h | 1 + Include/internal/pycore_interp.h | 2 -- Include/internal/pycore_optimizer.h | 5 ----- Include/internal/pycore_uop_metadata.h | 2 +- Python/bytecodes.c | 7 +++++++ Python/ceval_gil.c | 5 +++++ Python/executor_cases.c.h | 7 +++++++ Python/optimizer.c | 7 +------ Python/pystate.c | 1 - 10 files changed, 24 insertions(+), 16 deletions(-) diff --git a/Include/cpython/pystate.h b/Include/cpython/pystate.h index f005729fff11b6..108d3538e85c07 100644 --- a/Include/cpython/pystate.h +++ b/Include/cpython/pystate.h @@ -90,9 +90,10 @@ struct _ts { unsigned int finalizing:1; unsigned int cleared:1; unsigned int finalized:1; + unsigned int run_counter:1; /* padding to align to 4 bytes */ - unsigned int :23; + unsigned int :22; } _status; #ifdef Py_BUILD_CORE # define _PyThreadState_WHENCE_NOTSET -1 diff --git a/Include/internal/pycore_ceval.h b/Include/internal/pycore_ceval.h index a97b53028c8f59..363845106e40dc 100644 --- a/Include/internal/pycore_ceval.h +++ b/Include/internal/pycore_ceval.h @@ -283,6 +283,7 @@ PyAPI_FUNC(PyObject *) _PyEval_LoadName(PyThreadState *tstate, _PyInterpreterFra #define _PY_GC_SCHEDULED_BIT (1U << 4) #define _PY_EVAL_PLEASE_STOP_BIT (1U << 5) #define _PY_EVAL_EXPLICIT_MERGE_BIT (1U << 6) +#define _PY_EVAL_JIT_INVALIDATE_COLD_BIT (1U << 7) /* Reserve a few bits for future use */ #define _PY_EVAL_EVENTS_BITS 8 diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h index f8a73352a3e3d6..c74c4ee46b86e1 100644 --- a/Include/internal/pycore_interp.h +++ b/Include/internal/pycore_interp.h @@ -259,8 +259,6 @@ struct _is { struct callable_cache callable_cache; _PyOptimizerObject *optimizer; _PyExecutorObject *executor_list_head; - // new_executors is limited to JIT_CLEANUP_THRESHOLD - size_t new_executors; _rare_events rare_events; PyDict_WatchCallback builtins_dict_watcher; diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index 4e0099b201ab30..8beede8f722223 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -133,11 +133,6 @@ PyAPI_FUNC(void) _Py_Executors_InvalidateCold(PyInterpreterState *interp); #endif -// When new executors are created, we check to see if the number of -// executors created meets or exceeds the JIT_CLEANUP_THRESHOLD. If it -// does, we invalidate cold executors. -#define JIT_CLEANUP_THRESHOLD 10 - // This is the length of the trace we project initially. #define UOP_MAX_TRACE_LENGTH 800 diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 07606135d7a356..3bd42dcb73c639 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -274,7 +274,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_INTERNAL_INCREMENT_OPT_COUNTER] = 0, [_DYNAMIC_EXIT] = HAS_ESCAPES_FLAG, [_START_EXECUTOR] = 0, - [_MAKE_WARM] = 0, + [_MAKE_WARM] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_FATAL_ERROR] = 0, [_CHECK_VALIDITY_AND_SET_IP] = HAS_DEOPT_FLAG, [_DEOPT] = 0, diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 64aab0e5cee649..8061c6c8161e05 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -4833,6 +4833,13 @@ dummy_func( tier2 op(_MAKE_WARM, (--)) { current_executor->vm_data.warm = true; + if (++tstate->_status.run_counter > 100) { + uintptr_t eval_breaker = _Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker); + if (_Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker) & _PY_EVAL_JIT_INVALIDATE_COLD_BIT) { + int err = _Py_HandlePending(tstate); + ERROR_IF(err != 0, error); + } + } } tier2 op(_FATAL_ERROR, (--)) { diff --git a/Python/ceval_gil.c b/Python/ceval_gil.c index 6f4476d055b5ec..0c9d702db30f7a 100644 --- a/Python/ceval_gil.c +++ b/Python/ceval_gil.c @@ -1289,6 +1289,11 @@ _Py_HandlePending(PyThreadState *tstate) _Py_RunGC(tstate); } + if((breaker & _PY_EVAL_JIT_INVALIDATE_COLD_BIT) != 0) { + _Py_unset_eval_breaker_bit(tstate, _PY_EVAL_JIT_INVALIDATE_COLD_BIT); + _Py_Executors_InvalidateCold(tstate->interp); + } + /* GIL drop request */ if ((breaker & _PY_GIL_DROP_REQUEST_BIT) != 0) { /* Give another thread a chance */ diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index def98f7239e9a4..ccf9ec5b07382f 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -5450,6 +5450,13 @@ case _MAKE_WARM: { current_executor->vm_data.warm = true; + if (++tstate->_status.run_counter > 100) { + uintptr_t eval_breaker = _Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker); + if (_Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker) & _PY_EVAL_JIT_INVALIDATE_COLD_BIT) { + int err = _Py_HandlePending(tstate); + if (err != 0) JUMP_TO_ERROR(); + } + } break; } diff --git a/Python/optimizer.c b/Python/optimizer.c index d90b2884a439ff..a66d4baa32cf2d 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -183,11 +183,6 @@ _PyOptimizer_Optimize( return err; } - if (++interp->new_executors >= JIT_CLEANUP_THRESHOLD) { - interp->new_executors = 0; - _Py_Executors_InvalidateCold(interp); - } - assert(*executor_ptr != NULL); if (progress_needed) { int index = get_index_for_executor(code, start); @@ -1703,7 +1698,7 @@ _Py_Executors_InvalidateCold(PyInterpreterState *interp) error: PyErr_Clear(); Py_XDECREF(invalidate); - // If we're truly out of memory, wiping out everything is a fine fallback: + // If we're truly out of memory, wiping out everything is a fine fallback _Py_Executors_InvalidateAll(interp, 0); } diff --git a/Python/pystate.c b/Python/pystate.c index 0bccb75ec5d56a..54caf373e91d6c 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -660,7 +660,6 @@ init_interpreter(PyInterpreterState *interp, #ifdef _Py_TIER2 (void)_Py_SetOptimizer(interp, NULL); interp->executor_list_head = NULL; - interp->new_executors = 0; #endif if (interp != &runtime->_main_interpreter) { /* Fix the self-referential, statically initialized fields. */ From 99262b683b15534cf3e3f6a0f6743c2b3daf785e Mon Sep 17 00:00:00 2001 From: Savannah Ostrowski Date: Thu, 19 Sep 2024 16:10:50 -0700 Subject: [PATCH 54/61] Address comments --- Include/cpython/pystate.h | 3 +-- Include/internal/pycore_interp.h | 1 + Include/internal/pycore_uop_metadata.h | 2 +- Python/bytecodes.c | 9 +++------ Python/executor_cases.c.h | 9 +++------ Python/pystate.c | 1 + 6 files changed, 10 insertions(+), 15 deletions(-) diff --git a/Include/cpython/pystate.h b/Include/cpython/pystate.h index 108d3538e85c07..f005729fff11b6 100644 --- a/Include/cpython/pystate.h +++ b/Include/cpython/pystate.h @@ -90,10 +90,9 @@ struct _ts { unsigned int finalizing:1; unsigned int cleared:1; unsigned int finalized:1; - unsigned int run_counter:1; /* padding to align to 4 bytes */ - unsigned int :22; + unsigned int :23; } _status; #ifdef Py_BUILD_CORE # define _PyThreadState_WHENCE_NOTSET -1 diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h index c74c4ee46b86e1..b857db5d062f9a 100644 --- a/Include/internal/pycore_interp.h +++ b/Include/internal/pycore_interp.h @@ -259,6 +259,7 @@ struct _is { struct callable_cache callable_cache; _PyOptimizerObject *optimizer; _PyExecutorObject *executor_list_head; + size_t run_counter; _rare_events rare_events; PyDict_WatchCallback builtins_dict_watcher; diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 3bd42dcb73c639..ac2f361f304b23 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -274,7 +274,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_INTERNAL_INCREMENT_OPT_COUNTER] = 0, [_DYNAMIC_EXIT] = HAS_ESCAPES_FLAG, [_START_EXECUTOR] = 0, - [_MAKE_WARM] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_MAKE_WARM] = HAS_ESCAPES_FLAG, [_FATAL_ERROR] = 0, [_CHECK_VALIDITY_AND_SET_IP] = HAS_DEOPT_FLAG, [_DEOPT] = 0, diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 8061c6c8161e05..2f55a187d7e0db 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -4833,12 +4833,9 @@ dummy_func( tier2 op(_MAKE_WARM, (--)) { current_executor->vm_data.warm = true; - if (++tstate->_status.run_counter > 100) { - uintptr_t eval_breaker = _Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker); - if (_Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker) & _PY_EVAL_JIT_INVALIDATE_COLD_BIT) { - int err = _Py_HandlePending(tstate); - ERROR_IF(err != 0, error); - } + if (++tstate->interp->run_counter > 100) { + _Py_set_eval_breaker_bit(tstate, _PY_EVAL_JIT_INVALIDATE_COLD_BIT); + tstate->interp->run_counter = 0; } } diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index ccf9ec5b07382f..066da8dd2d1da3 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -5450,12 +5450,9 @@ case _MAKE_WARM: { current_executor->vm_data.warm = true; - if (++tstate->_status.run_counter > 100) { - uintptr_t eval_breaker = _Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker); - if (_Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker) & _PY_EVAL_JIT_INVALIDATE_COLD_BIT) { - int err = _Py_HandlePending(tstate); - if (err != 0) JUMP_TO_ERROR(); - } + if (++tstate->interp->run_counter > 100) { + _Py_set_eval_breaker_bit(tstate, _PY_EVAL_JIT_INVALIDATE_COLD_BIT); + tstate->interp->run_counter = 0; } break; } diff --git a/Python/pystate.c b/Python/pystate.c index 54caf373e91d6c..3a7a74b30fde4b 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -660,6 +660,7 @@ init_interpreter(PyInterpreterState *interp, #ifdef _Py_TIER2 (void)_Py_SetOptimizer(interp, NULL); interp->executor_list_head = NULL; + interp->run_counter = 0; #endif if (interp != &runtime->_main_interpreter) { /* Fix the self-referential, statically initialized fields. */ From 230fe059122be0a4c05fce616ec6d5d339656ec0 Mon Sep 17 00:00:00 2001 From: Savannah Ostrowski Date: Thu, 19 Sep 2024 16:11:35 -0700 Subject: [PATCH 55/61] Update to 10k --- Python/bytecodes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 2f55a187d7e0db..de41ec9d4ba5a7 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -4833,7 +4833,7 @@ dummy_func( tier2 op(_MAKE_WARM, (--)) { current_executor->vm_data.warm = true; - if (++tstate->interp->run_counter > 100) { + if (++tstate->interp->run_counter > 10000) { _Py_set_eval_breaker_bit(tstate, _PY_EVAL_JIT_INVALIDATE_COLD_BIT); tstate->interp->run_counter = 0; } From f3c01a1904eb817183446a558e94dc11d2c928cb Mon Sep 17 00:00:00 2001 From: Savannah Ostrowski Date: Thu, 19 Sep 2024 16:12:02 -0700 Subject: [PATCH 56/61] Update to 1m --- Python/bytecodes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index de41ec9d4ba5a7..a3802e364f11f0 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -4833,7 +4833,7 @@ dummy_func( tier2 op(_MAKE_WARM, (--)) { current_executor->vm_data.warm = true; - if (++tstate->interp->run_counter > 10000) { + if (++tstate->interp->run_counter > 1000000) { _Py_set_eval_breaker_bit(tstate, _PY_EVAL_JIT_INVALIDATE_COLD_BIT); tstate->interp->run_counter = 0; } From 563a4d7bbe9b8f2f34438c8c114d89965633aa07 Mon Sep 17 00:00:00 2001 From: Savannah Ostrowski Date: Thu, 19 Sep 2024 16:15:48 -0700 Subject: [PATCH 57/61] update cases --- Python/executor_cases.c.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 066da8dd2d1da3..d9655e3cfe16e4 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -5450,7 +5450,7 @@ case _MAKE_WARM: { current_executor->vm_data.warm = true; - if (++tstate->interp->run_counter > 100) { + if (++tstate->interp->run_counter > 1000000) { _Py_set_eval_breaker_bit(tstate, _PY_EVAL_JIT_INVALIDATE_COLD_BIT); tstate->interp->run_counter = 0; } From 062c54fd62b6b5972dbd0c20b8fa815d06ea55eb Mon Sep 17 00:00:00 2001 From: Savannah Ostrowski Date: Mon, 23 Sep 2024 10:22:03 -0700 Subject: [PATCH 58/61] add py_set_eval_breaker_bit to nonescaping' --- Include/internal/pycore_uop_metadata.h | 2 +- Tools/cases_generator/analyzer.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index ac2f361f304b23..07606135d7a356 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -274,7 +274,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_INTERNAL_INCREMENT_OPT_COUNTER] = 0, [_DYNAMIC_EXIT] = HAS_ESCAPES_FLAG, [_START_EXECUTOR] = 0, - [_MAKE_WARM] = HAS_ESCAPES_FLAG, + [_MAKE_WARM] = 0, [_FATAL_ERROR] = 0, [_CHECK_VALIDITY_AND_SET_IP] = HAS_DEOPT_FLAG, [_DEOPT] = 0, diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index 3cc36b6b5841bd..b1ff2e0b318f21 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -527,6 +527,7 @@ def has_error_without_pop(op: parser.InstDef) -> bool: "_PyList_FromStackRefSteal", "_PyTuple_FromArraySteal", "_PyTuple_FromStackRefSteal", + "_Py_set_eval_breaker_bit" ) ESCAPING_FUNCTIONS = ( From 17ece50f5ce2ddcae8313f42a41de93be1276116 Mon Sep 17 00:00:00 2001 From: Savannah Ostrowski Date: Mon, 23 Sep 2024 15:55:04 -0700 Subject: [PATCH 59/61] create 100k branch --- Python/bytecodes.c | 2 +- Python/executor_cases.c.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index a3802e364f11f0..b4c374bd1c41aa 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -4833,7 +4833,7 @@ dummy_func( tier2 op(_MAKE_WARM, (--)) { current_executor->vm_data.warm = true; - if (++tstate->interp->run_counter > 1000000) { + if (++tstate->interp->run_counter > 100000) { _Py_set_eval_breaker_bit(tstate, _PY_EVAL_JIT_INVALIDATE_COLD_BIT); tstate->interp->run_counter = 0; } diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index d9655e3cfe16e4..f9f374a5582309 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -5450,7 +5450,7 @@ case _MAKE_WARM: { current_executor->vm_data.warm = true; - if (++tstate->interp->run_counter > 1000000) { + if (++tstate->interp->run_counter > 100000) { _Py_set_eval_breaker_bit(tstate, _PY_EVAL_JIT_INVALIDATE_COLD_BIT); tstate->interp->run_counter = 0; } From 34363f2f59c3ab23ca5545ab4fcc031152b0aca0 Mon Sep 17 00:00:00 2001 From: Savannah Ostrowski Date: Tue, 24 Sep 2024 15:29:13 -0700 Subject: [PATCH 60/61] Address comments from Brandt --- Include/internal/pycore_interp.h | 2 +- Include/internal/pycore_optimizer.h | 10 +++++++--- Python/bytecodes.c | 4 ++-- Python/ceval_gil.c | 2 +- Python/executor_cases.c.h | 4 ++-- Python/optimizer.c | 1 - Python/pystate.c | 2 +- 7 files changed, 14 insertions(+), 11 deletions(-) diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h index 21088297c2191e..a1898d926ac39f 100644 --- a/Include/internal/pycore_interp.h +++ b/Include/internal/pycore_interp.h @@ -261,7 +261,7 @@ struct _is { struct callable_cache callable_cache; _PyOptimizerObject *optimizer; _PyExecutorObject *executor_list_head; - size_t run_counter; + size_t trace_run_counter; _rare_events rare_events; PyDict_WatchCallback builtins_dict_watcher; diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index 8beede8f722223..f92c0a0cddf906 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -29,9 +29,9 @@ typedef struct { typedef struct { uint8_t opcode; uint8_t oparg; - uint16_t valid:1; - uint16_t linked:1; - uint16_t chain_depth:6; // Must be big enough for MAX_CHAIN_DEPTH - 1. + uint8_t valid:1; + uint8_t linked:1; + uint8_t chain_depth:6; // Must be big enough for MAX_CHAIN_DEPTH - 1. bool warm; int index; // Index of ENTER_EXECUTOR (if code isn't NULL, below). _PyBloomFilter bloom; @@ -133,6 +133,10 @@ PyAPI_FUNC(void) _Py_Executors_InvalidateCold(PyInterpreterState *interp); #endif +// Used as the threshold to trigger executor invalidation when +// trace_run_counter is greater than this value. +#define JIT_CLEANUP_THRESHOLD 100000 + // This is the length of the trace we project initially. #define UOP_MAX_TRACE_LENGTH 800 diff --git a/Python/bytecodes.c b/Python/bytecodes.c index b4c374bd1c41aa..8c7708011b1a37 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -4833,9 +4833,9 @@ dummy_func( tier2 op(_MAKE_WARM, (--)) { current_executor->vm_data.warm = true; - if (++tstate->interp->run_counter > 100000) { + if (++tstate->interp->trace_run_counter > JIT_CLEANUP_THRESHOLD) { _Py_set_eval_breaker_bit(tstate, _PY_EVAL_JIT_INVALIDATE_COLD_BIT); - tstate->interp->run_counter = 0; + tstate->interp->trace_run_counter = 0; } } diff --git a/Python/ceval_gil.c b/Python/ceval_gil.c index 0c9d702db30f7a..b1f06c513ad98e 100644 --- a/Python/ceval_gil.c +++ b/Python/ceval_gil.c @@ -1289,7 +1289,7 @@ _Py_HandlePending(PyThreadState *tstate) _Py_RunGC(tstate); } - if((breaker & _PY_EVAL_JIT_INVALIDATE_COLD_BIT) != 0) { + if ((breaker & _PY_EVAL_JIT_INVALIDATE_COLD_BIT) != 0) { _Py_unset_eval_breaker_bit(tstate, _PY_EVAL_JIT_INVALIDATE_COLD_BIT); _Py_Executors_InvalidateCold(tstate->interp); } diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index f9f374a5582309..b867f1a8de79fb 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -5450,9 +5450,9 @@ case _MAKE_WARM: { current_executor->vm_data.warm = true; - if (++tstate->interp->run_counter > 100000) { + if (++tstate->interp->trace_run_counter > JIT_CLEANUP_THRESHOLD) { _Py_set_eval_breaker_bit(tstate, _PY_EVAL_JIT_INVALIDATE_COLD_BIT); - tstate->interp->run_counter = 0; + tstate->interp->trace_run_counter = 0; } break; } diff --git a/Python/optimizer.c b/Python/optimizer.c index a66d4baa32cf2d..b1562ad48f352c 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -182,7 +182,6 @@ _PyOptimizer_Optimize( if (err <= 0) { return err; } - assert(*executor_ptr != NULL); if (progress_needed) { int index = get_index_for_executor(code, start); diff --git a/Python/pystate.c b/Python/pystate.c index bfbdecaa2c17a0..6a617ed5a36c69 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -660,7 +660,7 @@ init_interpreter(PyInterpreterState *interp, #ifdef _Py_TIER2 (void)_Py_SetOptimizer(interp, NULL); interp->executor_list_head = NULL; - interp->run_counter = 0; + interp->trace_run_counter = 0; #endif if (interp != &runtime->_main_interpreter) { /* Fix the self-referential, statically initialized fields. */ From 09e3300140c00f0403ea5b8a4f11b3b349bd524c Mon Sep 17 00:00:00 2001 From: Savannah Ostrowski Date: Wed, 25 Sep 2024 08:54:52 -0700 Subject: [PATCH 61/61] Dedent goto error --- Python/optimizer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/optimizer.c b/Python/optimizer.c index 73a33fc145949b..978649faa04d45 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -1680,7 +1680,7 @@ _Py_Executors_InvalidateCold(PyInterpreterState *interp) _PyExecutorObject *next = exec->vm_data.links.next; if (!exec->vm_data.warm && PyList_Append(invalidate, (PyObject *)exec) < 0) { - goto error; + goto error; } else { exec->vm_data.warm = false;