Skip to content

Commit

Permalink
backport perf trampoline fork support from upstream 3.13
Browse files Browse the repository at this point in the history
Summary: Straight up port of python/cpython#109666

Reviewed By: jbower-fb

Differential Revision: D66118319

fbshipit-source-id: bd7d4181c16d35526f57500cebb6e16fc2df58aa
  • Loading branch information
Aniket Panse authored and facebook-github-bot committed Dec 17, 2024
1 parent 55b50e1 commit 5712d9b
Show file tree
Hide file tree
Showing 6 changed files with 190 additions and 8 deletions.
2 changes: 2 additions & 0 deletions Include/internal/pycore_ceval_state.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ struct _ceval_runtime_state {
struct code_arena_st *code_arena;
struct trampoline_api_st trampoline_api;
FILE *map_file;
Py_ssize_t persist_after_fork;
#else
int _not_used;
#endif
Expand All @@ -76,6 +77,7 @@ struct _ceval_runtime_state {
{ \
.status = PERF_STATUS_NO_INIT, \
.extra_code_index = -1, \
.persist_after_fork = 0, \
}
#else
# define _PyEval_RUNTIME_PERF_INIT {0}
Expand Down
9 changes: 6 additions & 3 deletions Include/sysmodule.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@

/* System module interface */

#ifndef Py_SYSMODULE_H
#define Py_SYSMODULE_H
#ifdef __cplusplus
Expand Down Expand Up @@ -40,8 +37,14 @@ PyAPI_FUNC(int) PyUnstable_PerfMapState_Init(void);
PyAPI_FUNC(int) PyUnstable_WritePerfMapEntry(const void *code_addr, unsigned int code_size, const char *entry_name);

PyAPI_FUNC(void) PyUnstable_PerfMapState_Fini(void);

PyAPI_FUNC(int) PyUnstable_CopyPerfMapFile(const char* parent_filename);
PyAPI_FUNC(int) PyUnstable_PerfTrampoline_CompileCode(PyCodeObject *);
PyAPI_FUNC(int) PyUnstable_PerfTrampoline_SetPersistAfterFork(int enable);

#endif


#ifndef Py_LIMITED_API
# define Py_CPYTHON_SYSMODULE_H
# include "cpython/sysmodule.h"
Expand Down
70 changes: 70 additions & 0 deletions Lib/test/test_perf_profiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,76 @@ def baz(n):
self.assertNotIn(f"py::bar:{script}", stdout)
self.assertNotIn(f"py::baz:{script}", stdout)

def test_pre_fork_compile(self):
code = """if 1:
import sys
import os
import sysconfig
from _testinternalcapi import (
compile_perf_trampoline_entry,
perf_trampoline_set_persist_after_fork,
)
def foo_fork():
pass
def bar_fork():
foo_fork()
def foo():
pass
def bar():
foo()
def compile_trampolines_for_all_functions():
perf_trampoline_set_persist_after_fork(1)
for _, obj in globals().items():
if callable(obj) and hasattr(obj, '__code__'):
compile_perf_trampoline_entry(obj.__code__)
if __name__ == "__main__":
compile_trampolines_for_all_functions()
pid = os.fork()
if pid == 0:
print(os.getpid())
bar_fork()
else:
bar()
"""

with temp_dir() as script_dir:
script = make_script(script_dir, "perftest", code)
with subprocess.Popen(
[sys.executable, "-Xperf", script],
universal_newlines=True,
stderr=subprocess.PIPE,
stdout=subprocess.PIPE,
) as process:
stdout, stderr = process.communicate()

self.assertEqual(process.returncode, 0)
self.assertNotIn("Error:", stderr)
child_pid = int(stdout.strip())
perf_file = pathlib.Path(f"/tmp/perf-{process.pid}.map")
perf_child_file = pathlib.Path(f"/tmp/perf-{child_pid}.map")
self.assertTrue(perf_file.exists())
self.assertTrue(perf_child_file.exists())

perf_file_contents = perf_file.read_text()
self.assertIn(f"py::foo:{script}", perf_file_contents)
self.assertIn(f"py::bar:{script}", perf_file_contents)
self.assertIn(f"py::foo_fork:{script}", perf_file_contents)
self.assertIn(f"py::bar_fork:{script}", perf_file_contents)

child_perf_file_contents = perf_child_file.read_text()
self.assertIn(f"py::foo_fork:{script}", child_perf_file_contents)
self.assertIn(f"py::bar_fork:{script}", child_perf_file_contents)

# Pre-compiled perf-map entries of a forked process must be
# identical in both the parent and child perf-map files.
perf_file_lines = perf_file_contents.split("\n")
for line in perf_file_lines:
if (
f"py::foo_fork:{script}" in line
or f"py::bar_fork:{script}" in line
):
self.assertIn(line, child_perf_file_contents)


if __name__ == "__main__":
unittest.main()
33 changes: 33 additions & 0 deletions Modules/_testinternalcapi.c
Original file line number Diff line number Diff line change
Expand Up @@ -1072,6 +1072,37 @@ gh_119213_getargs_impl(PyObject *module, PyObject *spam)
return Py_NewRef(spam);
}

static PyObject *
compile_perf_trampoline_entry(PyObject *self, PyObject *args)
{
PyObject *co;
if (!PyArg_ParseTuple(args, "O!", &PyCode_Type, &co)) {
return NULL;
}
int ret = PyUnstable_PerfTrampoline_CompileCode((PyCodeObject *)co);
if (ret != 0) {
PyErr_SetString(PyExc_AssertionError, "Failed to compile trampoline");
return NULL;
}
return PyLong_FromLong(ret);
}

static PyObject *
perf_trampoline_set_persist_after_fork(PyObject *self, PyObject *args)
{
int enable;
if (!PyArg_ParseTuple(args, "i", &enable)) {
return NULL;
}
int ret = PyUnstable_PerfTrampoline_SetPersistAfterFork(enable);
if (ret == 0) {
PyErr_SetString(PyExc_AssertionError, "Failed to set persist_after_fork");
return NULL;
}
return PyLong_FromLong(ret);
}



static PyMethodDef module_functions[] = {
{"get_configs", get_configs, METH_NOARGS},
Expand Down Expand Up @@ -1106,6 +1137,8 @@ static PyMethodDef module_functions[] = {
METH_VARARGS | METH_KEYWORDS},
{"pending_identify", pending_identify, METH_VARARGS, NULL},
GH_119213_GETARGS_METHODDEF
{"compile_perf_trampoline_entry", compile_perf_trampoline_entry, METH_VARARGS},
{"perf_trampoline_set_persist_after_fork", perf_trampoline_set_persist_after_fork, METH_VARARGS},
{NULL, NULL} /* sentinel */
};

Expand Down
49 changes: 44 additions & 5 deletions Python/perf_trampoline.c
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,7 @@ typedef struct trampoline_api_st trampoline_api_t;
#define perf_code_arena _PyRuntime.ceval.perf.code_arena
#define trampoline_api _PyRuntime.ceval.perf.trampoline_api
#define perf_map_file _PyRuntime.ceval.perf.map_file
#define persist_after_fork _PyRuntime.ceval.perf.persist_after_fork


static void
Expand Down Expand Up @@ -374,6 +375,26 @@ py_trampoline_evaluator(PyThreadState *ts, _PyInterpreterFrame *frame,
}
#endif // PY_HAVE_PERF_TRAMPOLINE

int PyUnstable_PerfTrampoline_CompileCode(PyCodeObject *co)
{
#ifdef PY_HAVE_PERF_TRAMPOLINE
py_trampoline f = NULL;
assert(extra_code_index != -1);
int ret = _PyCode_GetExtra((PyObject *)co, extra_code_index, (void **)&f);
if (ret != 0 || f == NULL) {
py_trampoline new_trampoline = compile_trampoline();
if (new_trampoline == NULL) {
return 0;
}
trampoline_api.write_state(trampoline_api.state, new_trampoline,
perf_code_arena->code_size, co);
return _PyCode_SetExtra((PyObject *)co, extra_code_index,
(void *)new_trampoline);
}
#endif // PY_HAVE_PERF_TRAMPOLINE
return 0;
}

int
_PyIsPerfTrampolineActive(void)
{
Expand Down Expand Up @@ -477,16 +498,34 @@ _PyPerfTrampoline_Fini(void)
return 0;
}

int
PyUnstable_PerfTrampoline_SetPersistAfterFork(int enable){
#ifdef PY_HAVE_PERF_TRAMPOLINE
persist_after_fork = enable;
return persist_after_fork;
#endif
return 0;
}

PyStatus
_PyPerfTrampoline_AfterFork_Child(void)
{
#ifdef PY_HAVE_PERF_TRAMPOLINE
// Restart trampoline in file in child.
int was_active = _PyIsPerfTrampolineActive();
_PyPerfTrampoline_Fini();
PyUnstable_PerfMapState_Fini();
if (was_active) {
_PyPerfTrampoline_Init(1);
if (persist_after_fork) {
char filename[256];
pid_t parent_pid = getppid();
snprintf(filename, sizeof(filename), "/tmp/perf-%d.map", parent_pid);
if (PyUnstable_CopyPerfMapFile(filename) != 0) {
return PyStatus_Error("Failed to copy perf map file.");
}
} else {
// Restart trampoline in file in child.
int was_active = _PyIsPerfTrampolineActive();
_PyPerfTrampoline_Fini();
if (was_active) {
_PyPerfTrampoline_Init(1);
}
}
#endif
return PyStatus_Ok();
Expand Down
35 changes: 35 additions & 0 deletions Python/sysmodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -2369,6 +2369,41 @@ PyAPI_FUNC(void) PyUnstable_PerfMapState_Fini(void) {
#endif
}

PyAPI_FUNC(int) PyUnstable_CopyPerfMapFile(const char* parent_filename) {
#ifndef MS_WINDOWS
FILE* from = fopen(parent_filename, "r");
if (!from) {
return -1;
}
if (perf_map_state.perf_map == NULL) {
int ret = PyUnstable_PerfMapState_Init();
if (ret != 0) {
return ret;
}
}
char buf[4096];
PyThread_acquire_lock(perf_map_state.map_lock, 1);
int fflush_result = 0, result = 0;
while (1) {
size_t bytes_read = fread(buf, 1, sizeof(buf), from);
size_t bytes_written = fwrite(buf, 1, bytes_read, perf_map_state.perf_map);
fflush_result = fflush(perf_map_state.perf_map);
if (fflush_result != 0 || bytes_read == 0 || bytes_written < bytes_read) {
result = -1;
goto close_and_release;
}
if (bytes_read < sizeof(buf) && feof(from)) {
goto close_and_release;
}
}
close_and_release:
fclose(from);
PyThread_release_lock(perf_map_state.map_lock);
return result;
#endif
return 0;
}

#ifdef __cplusplus
}
#endif
Expand Down

0 comments on commit 5712d9b

Please sign in to comment.