diff --git a/Python/jit.c b/Python/jit.c
index 40b9bf5a7e080c..57558372882115 100644
--- a/Python/jit.c
+++ b/Python/jit.c
@@ -288,10 +288,8 @@ copy_and_patch(char *base, const Stencil *stencil, uint64_t *patches)
static void
emit(const StencilGroup *group, uint64_t patches[])
{
- char *data = (char *)patches[HoleValue_DATA];
- copy_and_patch(data, &group->data, patches);
- char *text = (char *)patches[HoleValue_TEXT];
- copy_and_patch(text, &group->text, patches);
+ copy_and_patch((char *)patches[HoleValue_CODE], &group->code, patches);
+ copy_and_patch((char *)patches[HoleValue_DATA], &group->data, patches);
}
// This becomes the executor's execute member, and handles some setup/teardown:
@@ -312,53 +310,53 @@ int
_PyJIT_Compile(_PyUOpExecutorObject *executor)
{
// Loop once to find the total compiled size:
- size_t text_size = 0;
+ size_t code_size = 0;
size_t data_size = 0;
for (Py_ssize_t i = 0; i < Py_SIZE(executor); i++) {
_PyUOpInstruction *instruction = &executor->trace[i];
const StencilGroup *group = &stencil_groups[instruction->opcode];
- text_size += group->text.body_size;
+ code_size += group->code.body_size;
data_size += group->data.body_size;
}
- // Round up to the nearest page (text and data need separate pages):
+ // Round up to the nearest page (code and data need separate pages):
size_t page_size = get_page_size();
assert((page_size & (page_size - 1)) == 0);
- text_size += page_size - (text_size & (page_size - 1));
+ code_size += page_size - (code_size & (page_size - 1));
data_size += page_size - (data_size & (page_size - 1));
- char *memory = jit_alloc(text_size + data_size);
+ char *memory = jit_alloc(code_size + data_size);
if (memory == NULL) {
goto fail;
}
// Loop again to emit the code:
- char *text = memory;
- char *data = memory + text_size;
+ char *code = memory;
+ char *data = memory + code_size;
for (Py_ssize_t i = 0; i < Py_SIZE(executor); i++) {
_PyUOpInstruction *instruction = &executor->trace[i];
const StencilGroup *group = &stencil_groups[instruction->opcode];
// Think of patches as a dictionary mapping HoleValue to uint64_t:
uint64_t patches[] = GET_PATCHES();
- patches[HoleValue_CONTINUE] = (uint64_t)text + group->text.body_size;
- patches[HoleValue_CURRENT_EXECUTOR] = (uint64_t)executor;
+ patches[HoleValue_CODE] = (uint64_t)code;
+ patches[HoleValue_CONTINUE] = (uint64_t)code + group->code.body_size;
+ patches[HoleValue_DATA] = (uint64_t)data;
+ patches[HoleValue_EXECUTOR] = (uint64_t)executor;
patches[HoleValue_OPARG] = instruction->oparg;
patches[HoleValue_OPERAND] = instruction->operand;
patches[HoleValue_TARGET] = instruction->target;
- patches[HoleValue_DATA] = (uint64_t)data;
- patches[HoleValue_TEXT] = (uint64_t)text;
patches[HoleValue_TOP] = (uint64_t)memory;
patches[HoleValue_ZERO] = 0;
emit(group, patches);
- text += group->text.body_size;
+ code += group->code.body_size;
data += group->data.body_size;
}
- if (mark_executable(memory, text_size) ||
- mark_readable(memory + text_size, data_size))
+ if (mark_executable(memory, code_size) ||
+ mark_readable(memory + code_size, data_size))
{
- jit_free(memory, text_size + data_size);
+ jit_free(memory, code_size + data_size);
goto fail;
}
executor->base.execute = execute;
executor->jit_code = memory;
- executor->jit_size = text_size + data_size;
+ executor->jit_size = code_size + data_size;
return 1;
fail:
return PyErr_Occurred() ? -1 : 0;
diff --git a/Tools/jit/README.md b/Tools/jit/README.md
index e2e34513e431c0..2d0613abbfd8c7 100644
--- a/Tools/jit/README.md
+++ b/Tools/jit/README.md
@@ -1,15 +1,11 @@
-
-
The JIT Compiler
================
-
-
This version of CPython can be built with an experimental just-in-time compiler. While most everything you already know about building and using CPython is unchanged, you will probably need to install a compatible version of LLVM first.
### Installing LLVM
-While the JIT compiler does not require end users to install any third-party dependencies, part of it must be *built* using LLVM. It is *not* required for you to build the rest of CPython using LLVM, or the even the same version of LLVM (in fact, this is uncommon).
+The JIT compiler does not require end users to install any third-party dependencies, but part of it must be *built* using LLVM. You are *not* required to build the rest of CPython using LLVM, or the even the same version of LLVM (in fact, this is uncommon).
LLVM version 16 is required. Both `clang` and `llvm-readobj` need to be installed and discoverable (version suffixes, like `clang-16`, are okay). It's highly recommended that you also have `llvm-objdump` available, since this allows the build script to dump human-readable assembly for the generated code.
@@ -47,4 +43,4 @@ For `PCbuild`-based builds, pass the new `--experimental-jit` option to `build.b
For all other builds, pass the new `--enable-experimental-jit` option to `configure`.
-Otherwise, just configure and build as you normally would. Even cross-compiling "just works", since the JIT is built for the host platform.
+Otherwise, just configure and build as you normally would. Cross-compiling "just works", since the JIT is built for the host platform.
diff --git a/Tools/jit/build.py b/Tools/jit/build.py
index 0c2892da48687b..1e92a36129a583 100644
--- a/Tools/jit/build.py
+++ b/Tools/jit/build.py
@@ -34,13 +34,13 @@
@enum.unique
class HoleValue(enum.Enum):
+ CODE = enum.auto()
CONTINUE = enum.auto()
- CURRENT_EXECUTOR = enum.auto()
DATA = enum.auto()
+ EXECUTOR = enum.auto()
OPARG = enum.auto()
OPERAND = enum.auto()
TARGET = enum.auto()
- TEXT = enum.auto()
TOP = enum.auto()
ZERO = enum.auto()
@@ -55,19 +55,102 @@ class Hole:
replace = dataclasses.replace
+S = typing.TypeVar("S", schema.COFFSection, schema.ELFSection, schema.MachOSection)
+R = typing.TypeVar(
+ "R", schema.COFFRelocation, schema.ELFRelocation, schema.MachORelocation
+)
+
+
@dataclasses.dataclass
-class Stencil:
+class Stencil(typing.Generic[R]):
body: bytearray = dataclasses.field(default_factory=bytearray)
holes: list[Hole] = dataclasses.field(default_factory=list)
disassembly: list[str] = dataclasses.field(default_factory=list)
symbols: dict[str, int] = dataclasses.field(default_factory=dict, init=False)
offsets: dict[int, int] = dataclasses.field(default_factory=dict, init=False)
+ relocations: list[tuple[int, R]] = dataclasses.field(
+ default_factory=list, init=False
+ )
+
+ def pad(self, alignment: int) -> None:
+ offset = len(self.body)
+ padding = -offset % alignment
+ self.disassembly.append(f"{offset:x}: {' '.join(['00'] * padding)}")
+ self.body.extend([0] * padding)
+
+ def emit_aarch64_trampoline(self, hole: Hole) -> typing.Generator[Hole, None, None]:
+ base = len(self.body)
+ where = slice(hole.offset, hole.offset + 4)
+ instruction = int.from_bytes(self.body[where], sys.byteorder)
+ instruction &= 0xFC000000
+ instruction |= ((base - hole.offset) >> 2) & 0x03FFFFFF
+ self.body[where] = instruction.to_bytes(4, sys.byteorder)
+ self.disassembly += [
+ f"{base + 4 * 0: x}: d2800008 mov x8, #0x0",
+ f"{base + 4 * 0:016x}: R_AARCH64_MOVW_UABS_G0_NC {hole.symbol}",
+ f"{base + 4 * 1:x}: f2a00008 movk x8, #0x0, lsl #16",
+ f"{base + 4 * 1:016x}: R_AARCH64_MOVW_UABS_G1_NC {hole.symbol}",
+ f"{base + 4 * 2:x}: f2c00008 movk x8, #0x0, lsl #32",
+ f"{base + 4 * 2:016x}: R_AARCH64_MOVW_UABS_G2_NC {hole.symbol}",
+ f"{base + 4 * 3:x}: f2e00008 movk x8, #0x0, lsl #48",
+ f"{base + 4 * 3:016x}: R_AARCH64_MOVW_UABS_G3 {hole.symbol}",
+ f"{base + 4 * 4:x}: d61f0100 br x8",
+ ]
+ self.body.extend(0xD2800008.to_bytes(4, sys.byteorder))
+ self.body.extend(0xF2A00008.to_bytes(4, sys.byteorder))
+ self.body.extend(0xF2C00008.to_bytes(4, sys.byteorder))
+ self.body.extend(0xF2E00008.to_bytes(4, sys.byteorder))
+ self.body.extend(0xD61F0100.to_bytes(4, sys.byteorder))
+ yield hole.replace(offset=base + 4 * 0, kind="R_AARCH64_MOVW_UABS_G0_NC")
+ yield hole.replace(offset=base + 4 * 1, kind="R_AARCH64_MOVW_UABS_G1_NC")
+ yield hole.replace(offset=base + 4 * 2, kind="R_AARCH64_MOVW_UABS_G2_NC")
+ yield hole.replace(offset=base + 4 * 3, kind="R_AARCH64_MOVW_UABS_G3")
@dataclasses.dataclass
-class StencilGroup:
- text: Stencil = dataclasses.field(default_factory=Stencil)
- data: Stencil = dataclasses.field(default_factory=Stencil)
+class StencilGroup(typing.Generic[R]):
+ code: Stencil[R] = dataclasses.field(default_factory=Stencil)
+ data: Stencil[R] = dataclasses.field(default_factory=Stencil)
+ global_offset_table: dict[str, int] = dataclasses.field(
+ default_factory=dict, init=False
+ )
+
+ def global_offset_table_lookup(self, symbol: str | None) -> int:
+ self.data.pad(8)
+ if symbol is None:
+ return len(self.data.body)
+ default = 8 * len(self.global_offset_table)
+ return len(self.data.body) + self.global_offset_table.setdefault(
+ symbol, default
+ )
+
+ def emit_global_offset_table(self) -> None:
+ global_offset_table = len(self.data.body)
+ for s, offset in self.global_offset_table.items():
+ if s in self.code.symbols:
+ value, symbol = HoleValue.CODE, None
+ addend = self.code.symbols[s]
+ elif s in self.data.symbols:
+ value, symbol = HoleValue.DATA, None
+ addend = self.data.symbols[s]
+ else:
+ value, symbol = _symbol_to_value(s)
+ addend = 0
+ self.data.holes.append(
+ Hole(global_offset_table + offset, "R_X86_64_64", value, symbol, addend)
+ )
+ value_part = value.name if value is not HoleValue.ZERO else ""
+ if value_part and not symbol and not addend:
+ addend_part = ""
+ else:
+ addend_part = f"&{symbol} + " if symbol else ""
+ addend_part += format_addend(addend)
+ if value_part:
+ value_part += " + "
+ self.data.disassembly.append(
+ f"{len(self.data.body):x}: {value_part}{addend_part}"
+ )
+ self.data.body.extend([0] * 8)
_SEMAPHORE = asyncio.BoundedSemaphore(os.cpu_count() or 1)
@@ -85,38 +168,47 @@ async def run(
assert err is None, err
if process.returncode:
raise RuntimeError(f"{args[0]} exited with {process.returncode}")
- return out
+ return out or b""
-S = typing.TypeVar("S", schema.COFFSection, schema.ELFSection, schema.MachOSection)
-R = typing.TypeVar(
- "R", schema.COFFRelocation, schema.ELFRelocation, schema.MachORelocation
-)
+def _symbol_to_value(symbol: str) -> tuple[HoleValue, str | None]:
+ try:
+ if symbol.startswith("_JIT_"):
+ return HoleValue[symbol.removeprefix("_JIT_")], None
+ except KeyError:
+ pass
+ return HoleValue.ZERO, symbol
-class Parser(typing.Generic[S, R]):
- def __init__(self, options: "Options") -> None:
- self.group = StencilGroup()
- self.relocations_text: list[tuple[int, R]] = []
- self.relocations_data: list[tuple[int, R]] = []
- self.global_offset_table: dict[str, int] = {}
- assert options.target.parser is type(self)
- self.options = options
+@dataclasses.dataclass
+class Target(typing.Generic[S, R]):
+ triple: str
+ _: dataclasses.KW_ONLY
+ alignment: int = 1
+ prefix: str = ""
+ debug: bool = False
+ verbose: bool = False
+
+ def sha256(self) -> bytes:
+ hasher = hashlib.sha256()
+ hasher.update(self.triple.encode())
+ hasher.update(self.alignment.to_bytes())
+ hasher.update(self.prefix.encode())
+ return hasher.digest()
- async def parse(self, path: pathlib.Path) -> StencilGroup:
- objdump = llvm.find_tool("llvm-objdump", echo=self.options.verbose)
+ async def parse(self, path: pathlib.Path) -> StencilGroup[R]:
+ group: StencilGroup[R] = StencilGroup()
+ objdump = llvm.find_tool("llvm-objdump", echo=self.verbose)
if objdump is not None:
flags = ["--disassemble", "--reloc"]
- output = await run(
- objdump, *flags, path, capture=True, echo=self.options.verbose
- )
+ output = await run(objdump, *flags, path, capture=True, echo=self.verbose)
assert output is not None
- self.group.text.disassembly.extend(
+ group.code.disassembly.extend(
line.expandtabs().strip()
for line in output.decode().splitlines()
if not line.isspace()
)
- readobj = llvm.require_tool("llvm-readobj", echo=self.options.verbose)
+ readobj = llvm.require_tool("llvm-readobj", echo=self.verbose)
flags = [
"--elf-output-style=JSON",
"--expand-relocs",
@@ -126,178 +218,163 @@ async def parse(self, path: pathlib.Path) -> StencilGroup:
"--section-symbols",
"--sections",
]
- output = await run(
- readobj, *flags, path, capture=True, echo=self.options.verbose
- )
+ output = await run(readobj, *flags, path, capture=True, echo=self.verbose)
assert output is not None
# --elf-output-style=JSON is only *slightly* broken on Mach-O...
output = output.replace(b"PrivateExtern\n", b"\n")
output = output.replace(b"Extern\n", b"\n")
# ...and also COFF:
- start = output.index(b"[", 1)
- end = output.rindex(b"]", start, -1) + 1
- sections: list[dict[typing.Literal["Section"], S]] = json.loads(
- output[start:end]
- )
+ output = output[output.index(b"[", 1, None):]
+ output = output[:output.rindex(b"]", None, -1) + 1]
+ sections: list[dict[typing.Literal["Section"], S]] = json.loads(output)
for wrapped_section in sections:
- self._handle_section(wrapped_section["Section"])
- assert self.group.text.symbols["_JIT_ENTRY"] == 0
- if self.group.data.body:
- self.group.data.disassembly.append(
- f"0: {str(bytes(self.group.data.body)).removeprefix('b')}"
- )
- self._pad(self.group.data, 8)
- self._process_relocations(self.relocations_text, self.group.text)
+ self._handle_section(wrapped_section["Section"], group)
+ assert group.code.symbols["_JIT_ENTRY"] == 0
+ if group.data.body:
+ bytes_without_b = str(bytes(group.data.body)).removeprefix("b")
+ group.data.disassembly.append(f"0: {bytes_without_b}")
+ group.data.pad(8)
+ self._process_relocations(group.code, group)
remaining: list[Hole] = []
- for hole in self.group.text.holes:
+ for hole in group.code.holes:
if (
hole.kind in {"R_AARCH64_CALL26", "R_AARCH64_JUMP26"}
and hole.value is HoleValue.ZERO
):
- remaining.extend(self._emit_aarch64_trampoline(self.group.text, hole))
+ remaining.extend(group.code.emit_aarch64_trampoline(hole))
else:
remaining.append(hole)
- self.group.text.holes[:] = remaining
- self._pad(self.group.text, self.options.target.alignment)
- self._process_relocations(self.relocations_data, self.group.data)
- self._emit_global_offset_table()
- self.group.text.holes.sort(key=lambda hole: hole.offset)
- self.group.data.holes.sort(key=lambda hole: hole.offset)
- return self.group
-
- def _emit_global_offset_table(self) -> None:
- global_offset_table = len(self.group.data.body)
- for s, offset in self.global_offset_table.items():
- if s in self.group.text.symbols:
- value, symbol = HoleValue.TEXT, None
- addend = self.group.text.symbols[s]
- elif s in self.group.data.symbols:
+ group.code.holes[:] = remaining
+ group.code.pad(self.alignment)
+ self._process_relocations(group.data, group)
+ group.emit_global_offset_table()
+ group.code.holes.sort(key=lambda hole: hole.offset)
+ group.data.holes.sort(key=lambda hole: hole.offset)
+ return group
+
+ def _process_relocations(self, stencil: Stencil[R], group: StencilGroup[R]) -> None:
+ for base, relocation in stencil.relocations:
+ hole = self._handle_relocation(base, relocation, group, stencil.body)
+ if hole.symbol in group.data.symbols:
value, symbol = HoleValue.DATA, None
- addend = self.group.data.symbols[s]
- else:
- value, symbol = self._symbol_to_value(s)
- addend = 0
- self.group.data.holes.append(
- Hole(global_offset_table + offset, "R_X86_64_64", value, symbol, addend)
- )
- value_part = value.name if value is not HoleValue.ZERO else ""
- if value_part and not symbol and not addend:
- addend_part = ""
- else:
- addend_part = f"&{symbol} + " if symbol else ""
- addend_part += format_addend(addend)
- if value_part:
- value_part += " + "
- self.group.data.disassembly.append(
- f"{len(self.group.data.body):x}: {value_part}{addend_part}"
- )
- self.group.data.body.extend([0] * 8)
-
- @staticmethod
- def _emit_aarch64_trampoline(
- stencil: Stencil, hole: Hole
- ) -> typing.Generator[Hole, None, None]:
- base = len(stencil.body)
- instruction = int.from_bytes(
- stencil.body[hole.offset : hole.offset + 4], sys.byteorder
- )
- instruction = (instruction & 0xFC000000) | (
- ((base - hole.offset) >> 2) & 0x03FFFFFF
- )
- stencil.body[hole.offset : hole.offset + 4] = instruction.to_bytes(
- 4, sys.byteorder
- )
- stencil.disassembly += [
- f"{base + 4 * 0: x}: d2800008 mov x8, #0x0",
- f"{base + 4 * 0:016x}: R_AARCH64_MOVW_UABS_G0_NC {hole.symbol}",
- f"{base + 4 * 1:x}: f2a00008 movk x8, #0x0, lsl #16",
- f"{base + 4 * 1:016x}: R_AARCH64_MOVW_UABS_G1_NC {hole.symbol}",
- f"{base + 4 * 2:x}: f2c00008 movk x8, #0x0, lsl #32",
- f"{base + 4 * 2:016x}: R_AARCH64_MOVW_UABS_G2_NC {hole.symbol}",
- f"{base + 4 * 3:x}: f2e00008 movk x8, #0x0, lsl #48",
- f"{base + 4 * 3:016x}: R_AARCH64_MOVW_UABS_G3 {hole.symbol}",
- f"{base + 4 * 4:x}: d61f0100 br x8",
- ]
- stencil.body.extend(0xD2800008.to_bytes(4, sys.byteorder))
- stencil.body.extend(0xF2A00008.to_bytes(4, sys.byteorder))
- stencil.body.extend(0xF2C00008.to_bytes(4, sys.byteorder))
- stencil.body.extend(0xF2E00008.to_bytes(4, sys.byteorder))
- stencil.body.extend(0xD61F0100.to_bytes(4, sys.byteorder))
- yield hole.replace(offset=base + 4 * 0, kind="R_AARCH64_MOVW_UABS_G0_NC")
- yield hole.replace(offset=base + 4 * 1, kind="R_AARCH64_MOVW_UABS_G1_NC")
- yield hole.replace(offset=base + 4 * 2, kind="R_AARCH64_MOVW_UABS_G2_NC")
- yield hole.replace(offset=base + 4 * 3, kind="R_AARCH64_MOVW_UABS_G3")
-
- def _process_relocations(
- self, relocations: list[tuple[int, R]], stencil: Stencil
- ) -> None:
- for base, relocation in relocations:
- hole = self._handle_relocation(base, relocation, stencil.body)
- if hole.symbol in self.group.data.symbols:
- value, symbol = HoleValue.DATA, None
- addend = hole.addend + self.group.data.symbols[hole.symbol]
+ addend = hole.addend + group.data.symbols[hole.symbol]
hole = hole.replace(value=value, symbol=symbol, addend=addend)
- elif hole.symbol in self.group.text.symbols:
- value, symbol = HoleValue.TEXT, None
- addend = hole.addend + self.group.text.symbols[hole.symbol]
+ elif hole.symbol in group.code.symbols:
+ value, symbol = HoleValue.CODE, None
+ addend = hole.addend + group.code.symbols[hole.symbol]
hole = hole.replace(value=value, symbol=symbol, addend=addend)
stencil.holes.append(hole)
- @staticmethod
- def _pad(stencil: Stencil, alignment: int) -> None:
- offset = len(stencil.body)
- padding = -offset % alignment
- stencil.disassembly.append(f"{offset:x}: {' '.join(['00'] * padding)}")
- stencil.body.extend([0] * padding)
-
- def _global_offset_table_lookup(self, symbol: str | None) -> int:
- self._pad(self.group.data, 8)
- if symbol is None:
- return len(self.group.data.body)
- default = 8 * len(self.global_offset_table)
- return len(self.group.data.body) + self.global_offset_table.setdefault(
- symbol, default
- )
-
- def _symbol_to_value(self, symbol: str) -> tuple[HoleValue, str | None]:
- try:
- if symbol.startswith("_JIT_"):
- return HoleValue[symbol.removeprefix("_JIT_")], None
- except KeyError:
- pass
- return HoleValue.ZERO, symbol
-
- def _handle_section(self, section: S) -> None:
+ def _handle_section(self, section: S, group: StencilGroup[R]) -> None:
raise NotImplementedError()
- def _handle_relocation(self, base: int, relocation: R, raw: bytes) -> Hole:
+ def _handle_relocation(
+ self, base: int, relocation: R, group: StencilGroup[R], raw: bytes
+ ) -> Hole:
raise NotImplementedError()
-
-class ELF(Parser[schema.ELFSection, schema.ELFRelocation]):
- def _handle_section(self, section: schema.ELFSection) -> None:
+ async def _compile(
+ self, opname: str, c: pathlib.Path, tempdir: pathlib.Path
+ ) -> StencilGroup[R]:
+ o = tempdir / f"{opname}.o"
+ flags = [
+ f"--target={self.triple}",
+ "-DPy_BUILD_CORE",
+ "-D_DEBUG" if self.debug else "-DNDEBUG",
+ f"-D_JIT_OPCODE={opname}",
+ "-D_PyJIT_ACTIVE",
+ "-D_Py_JIT",
+ "-I.",
+ f"-I{INCLUDE}",
+ f"-I{INCLUDE_INTERNAL}",
+ f"-I{PYTHON}",
+ "-O3",
+ "-c",
+ "-fno-asynchronous-unwind-tables",
+ # SET_FUNCTION_ATTRIBUTE on 32-bit Windows debug builds:
+ "-fno-jump-tables",
+ # Position-independent code adds indirection to every load and jump:
+ "-fno-pic",
+ # Don't make calls to weird stack-smashing canaries:
+ "-fno-stack-protector",
+ # We have three options for code model:
+ # - "small": the default, assumes that code and data reside in the lowest
+ # 2GB of memory (128MB on aarch64)
+ # - "medium": assumes that code resides in the lowest 2GB of memory, and
+ # makes no assumptions about data (not available on aarch64)
+ # - "large": makes no assumptions about either code or data
+ "-mcmodel=large",
+ ]
+ clang = llvm.require_tool("clang", echo=self.verbose)
+ await run(clang, *flags, "-o", o, c, echo=self.verbose)
+ return await self.parse(o)
+
+ async def build_stencils(self) -> dict[str, StencilGroup[R]]:
+ generated_cases = PYTHON_EXECUTOR_CASES_C_H.read_text()
+ opnames = sorted(re.findall(r"\n {8}case (\w+): \{\n", generated_cases))
+ tasks = []
+ with tempfile.TemporaryDirectory() as tempdir:
+ work = pathlib.Path(tempdir).resolve()
+ async with asyncio.TaskGroup() as group:
+ for opname in opnames:
+ coro = self._compile(opname, TOOLS_JIT_TEMPLATE_C, work)
+ tasks.append(group.create_task(coro, name=opname))
+ return {task.get_name(): task.result() for task in tasks}
+
+ def build(self, out: pathlib.Path) -> None:
+ jit_stencils = out / "jit_stencils.h"
+ hasher = hashlib.sha256()
+ hasher.update(PYTHON_EXECUTOR_CASES_C_H.read_bytes())
+ hasher.update((out / "pyconfig.h").read_bytes())
+ for dirpath, _, filenames in sorted(os.walk(TOOLS_JIT)):
+ for filename in filenames:
+ hasher.update(pathlib.Path(dirpath, filename).read_bytes())
+ digest = hasher.hexdigest()
+ if jit_stencils.exists():
+ with jit_stencils.open() as file:
+ if file.readline().removeprefix("// ").removesuffix("\n") == digest:
+ return
+ stencil_groups = asyncio.run(self.build_stencils())
+ with jit_stencils.open("w") as file:
+ file.write(f"// {digest}\n")
+ for line in dump(stencil_groups):
+ file.write(f"{line}\n")
+
+
+class ELF(Target[schema.ELFSection, schema.ELFRelocation]):
+ def _handle_section(
+ self, section: schema.ELFSection, group: StencilGroup[schema.ELFRelocation]
+ ) -> None:
section_type = section["Type"]["Value"]
flags = {flag["Name"] for flag in section["Flags"]["Flags"]}
if section_type == "SHT_RELA":
assert "SHF_INFO_LINK" in flags, flags
assert not section["Symbols"]
- if section["Info"] in self.group.text.offsets:
- base = self.group.text.offsets[section["Info"]]
- for wrapped_relocation in section["Relocations"]:
- relocation = wrapped_relocation["Relocation"]
- self.relocations_text.append((base, relocation))
+ if section["Info"] in group.code.offsets:
+ stencil = group.code
else:
- base = self.group.data.offsets[section["Info"]]
- for wrapped_relocation in section["Relocations"]:
- relocation = wrapped_relocation["Relocation"]
- self.relocations_data.append((base, relocation))
+ stencil = group.data
+ base = stencil.offsets[section["Info"]]
+ for wrapped_relocation in section["Relocations"]:
+ relocation = wrapped_relocation["Relocation"]
+ stencil.relocations.append((base, relocation))
elif section_type == "SHT_PROGBITS":
if "SHF_ALLOC" not in flags:
return
if "SHF_EXECINSTR" in flags:
- self._handle_section_data(section, self.group.text)
+ stencil = group.code
else:
- self._handle_section_data(section, self.group.data)
+ stencil = group.data
+ stencil.offsets[section["Index"]] = len(stencil.body)
+ for wrapped_symbol in section["Symbols"]:
+ symbol = wrapped_symbol["Symbol"]
+ offset = len(stencil.body) + symbol["Value"]
+ name = symbol["Name"]["Value"]
+ name = name.removeprefix(self.prefix)
+ assert name not in stencil.symbols
+ stencil.symbols[name] = offset
+ section_data = section["SectionData"]
+ stencil.body.extend(section_data["Bytes"])
assert not section["Relocations"]
else:
assert section_type in {
@@ -308,22 +385,12 @@ def _handle_section(self, section: schema.ELFSection) -> None:
"SHT_SYMTAB",
}, section_type
- def _handle_section_data(
- self, section: schema.ELFSection, stencil: Stencil
- ) -> None:
- stencil.offsets[section["Index"]] = len(stencil.body)
- for wrapped_symbol in section["Symbols"]:
- symbol = wrapped_symbol["Symbol"]
- offset = len(stencil.body) + symbol["Value"]
- name = symbol["Name"]["Value"]
- name = name.removeprefix(self.options.target.prefix)
- assert name not in stencil.symbols
- stencil.symbols[name] = offset
- section_data = section["SectionData"]
- stencil.body.extend(section_data["Bytes"])
-
def _handle_relocation(
- self, base: int, relocation: schema.ELFRelocation, raw: bytes
+ self,
+ base: int,
+ relocation: schema.ELFRelocation,
+ group: StencilGroup[schema.ELFRelocation],
+ raw: bytes,
) -> Hole:
match relocation:
case {
@@ -333,53 +400,47 @@ def _handle_relocation(
"Addend": addend,
}:
offset += base
- s = s.removeprefix(self.options.target.prefix)
- value, symbol = self._symbol_to_value(s)
+ s = s.removeprefix(self.prefix)
+ value, symbol = _symbol_to_value(s)
case _:
raise NotImplementedError(relocation)
return Hole(offset, kind, value, symbol, addend)
-class COFF(Parser[schema.COFFSection, schema.COFFRelocation]):
- def _handle_section(self, section: schema.COFFSection) -> None:
+class COFF(Target[schema.COFFSection, schema.COFFRelocation]):
+ def _handle_section(
+ self, section: schema.COFFSection, group: StencilGroup[schema.COFFRelocation]
+ ) -> None:
flags = {flag["Name"] for flag in section["Characteristics"]["Flags"]}
- if "SectionData" not in section:
- return
- section_data = section["SectionData"]
+ if "SectionData" in section:
+ section_data_bytes = section["SectionData"]["Bytes"]
+ else:
+ # Zeroed BSS data, seen with printf debugging calls:
+ section_data_bytes = [0] * section["RawDataSize"]
if "IMAGE_SCN_MEM_EXECUTE" in flags:
- assert not self.group.data.body, self.group.data.body
- base = self.group.text.offsets[section["Number"]] = len(
- self.group.text.body
- )
- self.group.text.body.extend(section_data["Bytes"])
- for wrapped_symbol in section["Symbols"]:
- symbol = wrapped_symbol["Symbol"]
- offset = base + symbol["Value"]
- name = symbol["Name"]
- name = name.removeprefix(self.options.target.prefix)
- self.group.text.symbols[name] = offset
- for wrapped_relocation in section["Relocations"]:
- relocation = wrapped_relocation["Relocation"]
- self.relocations_text.append((base, relocation))
+ stencil = group.code
elif "IMAGE_SCN_MEM_READ" in flags:
- base = self.group.data.offsets[section["Number"]] = len(
- self.group.data.body
- )
- self.group.data.body.extend(section_data["Bytes"])
- for wrapped_symbol in section["Symbols"]:
- symbol = wrapped_symbol["Symbol"]
- offset = base + symbol["Value"]
- name = symbol["Name"]
- name = name.removeprefix(self.options.target.prefix)
- self.group.data.symbols[name] = offset
- for wrapped_relocation in section["Relocations"]:
- relocation = wrapped_relocation["Relocation"]
- self.relocations_data.append((base, relocation))
+ stencil = group.data
else:
return
+ base = stencil.offsets[section["Number"]] = len(stencil.body)
+ stencil.body.extend(section_data_bytes)
+ for wrapped_symbol in section["Symbols"]:
+ symbol = wrapped_symbol["Symbol"]
+ offset = base + symbol["Value"]
+ name = symbol["Name"]
+ name = name.removeprefix(self.prefix)
+ stencil.symbols[name] = offset
+ for wrapped_relocation in section["Relocations"]:
+ relocation = wrapped_relocation["Relocation"]
+ stencil.relocations.append((base, relocation))
def _handle_relocation(
- self, base: int, relocation: schema.COFFRelocation, raw: bytes
+ self,
+ base: int,
+ relocation: schema.COFFRelocation,
+ group: StencilGroup[schema.COFFRelocation],
+ raw: bytes,
) -> Hole:
match relocation:
case {
@@ -388,8 +449,8 @@ def _handle_relocation(
"Offset": offset,
}:
offset += base
- s = s.removeprefix(self.options.target.prefix)
- value, symbol = self._symbol_to_value(s)
+ s = s.removeprefix(self.prefix)
+ value, symbol = _symbol_to_value(s)
addend = int.from_bytes(raw[offset : offset + 8], "little")
case {
"Type": {"Value": "IMAGE_REL_I386_DIR32" as kind},
@@ -397,69 +458,68 @@ def _handle_relocation(
"Offset": offset,
}:
offset += base
- s = s.removeprefix(self.options.target.prefix)
- value, symbol = self._symbol_to_value(s)
+ s = s.removeprefix(self.prefix)
+ value, symbol = _symbol_to_value(s)
addend = int.from_bytes(raw[offset : offset + 4], "little")
case _:
raise NotImplementedError(relocation)
return Hole(offset, kind, value, symbol, addend)
-class MachO(Parser[schema.MachOSection, schema.MachORelocation]):
- def _handle_section(self, section: schema.MachOSection) -> None:
- assert section["Address"] >= len(self.group.text.body)
+class MachO(Target[schema.MachOSection, schema.MachORelocation]):
+ def _handle_section(
+ self, section: schema.MachOSection, group: StencilGroup[schema.MachORelocation]
+ ) -> None:
+ assert section["Address"] >= len(group.code.body)
assert "SectionData" in section
section_data = section["SectionData"]
flags = {flag["Name"] for flag in section["Attributes"]["Flags"]}
name = section["Name"]["Value"]
- name = name.removeprefix(self.options.target.prefix)
+ name = name.removeprefix(self.prefix)
if "SomeInstructions" in flags:
- assert not self.group.data.body, self.group.data.body
- self.group.text.body.extend(
- [0] * (section["Address"] - len(self.group.text.body))
- )
- before = self.group.text.offsets[section["Index"]] = section["Address"]
- self.group.text.body.extend(section_data["Bytes"])
- self.group.text.symbols[name] = before
+ assert not group.data.body, group.data.body
+ group.code.body.extend([0] * (section["Address"] - len(group.code.body)))
+ before = group.code.offsets[section["Index"]] = section["Address"]
+ group.code.body.extend(section_data["Bytes"])
+ group.code.symbols[name] = before
assert "Symbols" in section
for wrapped_symbol in section["Symbols"]:
symbol = wrapped_symbol["Symbol"]
offset = symbol["Value"]
name = symbol["Name"]["Value"]
- name = name.removeprefix(self.options.target.prefix)
- self.group.text.symbols[name] = offset
+ name = name.removeprefix(self.prefix)
+ group.code.symbols[name] = offset
assert "Relocations" in section
for wrapped_relocation in section["Relocations"]:
relocation = wrapped_relocation["Relocation"]
- self.relocations_text.append((before, relocation))
+ group.code.relocations.append((before, relocation))
else:
- self.group.data.body.extend(
- [0]
- * (
- section["Address"]
- - len(self.group.data.body)
- - len(self.group.text.body)
- )
+ group.data.body.extend(
+ [0] * (section["Address"] - len(group.data.body) - len(group.code.body))
+ )
+ before = group.data.offsets[section["Index"]] = section["Address"] - len(
+ group.code.body
)
- before = self.group.data.offsets[section["Index"]] = section[
- "Address"
- ] - len(self.group.text.body)
- self.group.data.body.extend(section_data["Bytes"])
- self.group.data.symbols[name] = len(self.group.text.body)
+ group.data.body.extend(section_data["Bytes"])
+ group.data.symbols[name] = len(group.code.body)
assert "Symbols" in section
for wrapped_symbol in section["Symbols"]:
symbol = wrapped_symbol["Symbol"]
- offset = symbol["Value"] - len(self.group.text.body)
+ offset = symbol["Value"] - len(group.code.body)
name = symbol["Name"]["Value"]
- name = name.removeprefix(self.options.target.prefix)
- self.group.data.symbols[name] = offset
+ name = name.removeprefix(self.prefix)
+ group.data.symbols[name] = offset
assert "Relocations" in section
for wrapped_relocation in section["Relocations"]:
relocation = wrapped_relocation["Relocation"]
- self.relocations_data.append((before, relocation))
+ group.data.relocations.append((before, relocation))
def _handle_relocation(
- self, base: int, relocation: schema.MachORelocation, raw: bytes
+ self,
+ base: int,
+ relocation: schema.MachORelocation,
+ group: StencilGroup[schema.MachORelocation],
+ raw: bytes,
) -> Hole:
match relocation:
case {
@@ -471,9 +531,9 @@ def _handle_relocation(
"Offset": offset,
}:
offset += base
- s = s.removeprefix(self.options.target.prefix)
+ s = s.removeprefix(self.prefix)
value, symbol = HoleValue.DATA, None
- addend = self._global_offset_table_lookup(s)
+ addend = group.global_offset_table_lookup(s)
case {
"Type": {"Value": kind},
"Section": {"Value": s},
@@ -484,8 +544,8 @@ def _handle_relocation(
"Offset": offset,
}:
offset += base
- s = s.removeprefix(self.options.target.prefix)
- value, symbol = self._symbol_to_value(s)
+ s = s.removeprefix(self.prefix)
+ value, symbol = _symbol_to_value(s)
addend = 0
case _:
raise NotImplementedError(relocation)
@@ -495,138 +555,25 @@ def _handle_relocation(
return Hole(offset, kind, value, symbol, addend)
-@dataclasses.dataclass(frozen=True)
-class Target:
- triple: str
- pattern: str
- alignment: int
- prefix: str
- parser: type[MachO | COFF | ELF]
-
- def sha256(self) -> bytes:
- hasher = hashlib.sha256()
- hasher.update(self.triple.encode())
- hasher.update(bytes([self.alignment]))
- hasher.update(self.prefix.encode())
- return hasher.digest()
-
-
-TARGETS = [
- Target(
- triple="aarch64-apple-darwin",
- pattern=r"aarch64-apple-darwin.*",
- alignment=8,
- prefix="_",
- parser=MachO,
- ),
- Target(
- triple="aarch64-unknown-linux-gnu",
- pattern=r"aarch64-.*-linux-gnu",
- alignment=8,
- prefix="",
- parser=ELF,
- ),
- Target(
- triple="i686-pc-windows-msvc",
- pattern=r"i686-pc-windows-msvc",
- alignment=1,
- prefix="_",
- parser=COFF,
- ),
- Target(
- triple="x86_64-apple-darwin",
- pattern=r"x86_64-apple-darwin.*",
- alignment=1,
- prefix="_",
- parser=MachO,
- ),
- Target(
- triple="x86_64-pc-windows-msvc",
- pattern=r"x86_64-pc-windows-msvc",
- alignment=1,
- prefix="",
- parser=COFF,
- ),
- Target(
- triple="x86_64-unknown-linux-gnu",
- pattern=r"x86_64-.*-linux-gnu",
- alignment=1,
- prefix="",
- parser=ELF,
- ),
-]
-
-
-def get_target(host: str) -> Target:
- for target in TARGETS:
- if re.fullmatch(target.pattern, host):
- return target
- raise ValueError(host)
-
-
-@dataclasses.dataclass(frozen=True)
-class Options:
- target: Target
- debug: bool
- out: pathlib.Path
- verbose: bool
-
- def sha256(self) -> bytes:
- hasher = hashlib.sha256()
- hasher.update(self.target.sha256())
- hasher.update(bytes([self.debug]))
- hasher.update(bytes(self.out.resolve()))
- return hasher.digest()
-
-
-async def _compile(
- options: Options, opname: str, c: pathlib.Path, tempdir: pathlib.Path
-) -> StencilGroup:
- o = tempdir / f"{opname}.o"
- flags = [
- f"--target={options.target.triple}",
- "-DPy_BUILD_CORE",
- "-D_DEBUG" if options.debug else "-DNDEBUG",
- f"-D_JIT_OPCODE={opname}",
- "-D_PyJIT_ACTIVE",
- "-D_Py_JIT",
- "-I.",
- f"-I{INCLUDE}",
- f"-I{INCLUDE_INTERNAL}",
- f"-I{PYTHON}",
- "-O3",
- "-c",
- "-fno-asynchronous-unwind-tables",
- # SET_FUNCTION_ATTRIBUTE on 32-bit Windows debug builds:
- "-fno-jump-tables",
- # Position-independent code adds indirection to every load and jump:
- "-fno-pic",
- # Don't make calls to weird stack-smashing canaries:
- "-fno-stack-protector",
- # We have three options for code model:
- # - "small": the default, assumes that code and data reside in the lowest
- # 2GB of memory (128MB on aarch64)
- # - "medium": assumes that code resides in the lowest 2GB of memory, and
- # makes no assumptions about data (not available on aarch64)
- # - "large": makes no assumptions about either code or data
- "-mcmodel=large",
- ]
- clang = llvm.require_tool("clang", echo=options.verbose)
- await run(clang, *flags, "-o", o, c, echo=options.verbose)
- return await options.target.parser(options).parse(o)
-
-
-async def build_stencils(options: Options) -> dict[str, StencilGroup]:
- generated_cases = PYTHON_EXECUTOR_CASES_C_H.read_text()
- opnames = sorted(re.findall(r"\n {8}case (\w+): \{\n", generated_cases))
- tasks = []
- with tempfile.TemporaryDirectory() as tempdir:
- work = pathlib.Path(tempdir).resolve()
- async with asyncio.TaskGroup() as group:
- for opname in opnames:
- coro = _compile(options, opname, TOOLS_JIT_TEMPLATE_C, work)
- tasks.append(group.create_task(coro, name=opname))
- return {task.get_name(): task.result() for task in tasks}
+def get_target(
+ host: str, *, debug: bool = False, verbose: bool = False
+) -> COFF | ELF | MachO:
+ target: COFF | ELF | MachO
+ if re.fullmatch(r"aarch64-apple-darwin.*", host):
+ target = MachO("aarch64-apple-darwin", alignment=8, prefix="_")
+ elif re.fullmatch(r"aarch64-.*-linux-gnu", host):
+ target = ELF("aarch64-unknown-linux-gnu", alignment=8)
+ elif re.fullmatch(r"i686-pc-windows-msvc", host):
+ target = COFF("i686-pc-windows-msvc", prefix="_")
+ elif re.fullmatch(r"x86_64-apple-darwin.*", host):
+ target = MachO("x86_64-apple-darwin", prefix="_")
+ elif re.fullmatch(r"x86_64-pc-windows-msvc", host):
+ target = COFF("x86_64-pc-windows-msvc")
+ elif re.fullmatch(r"x86_64-.*-linux-gnu", host):
+ target = ELF("x86_64-unknown-linux-gnu")
+ else:
+ raise ValueError(host)
+ return dataclasses.replace(target, debug=debug, verbose=verbose)
def dump_header() -> typing.Generator[str, None, None]:
@@ -638,7 +585,7 @@ def dump_header() -> typing.Generator[str, None, None]:
yield "} HoleKind;"
yield ""
yield "typedef enum {"
- for value in HoleValue:
+ for value in sorted(HoleValue, key=lambda value: value.name):
yield f" HoleValue_{value.name},"
yield "} HoleValue;"
yield ""
@@ -658,7 +605,7 @@ def dump_header() -> typing.Generator[str, None, None]:
yield "} Stencil;"
yield ""
yield "typedef struct {"
- yield " const Stencil text;"
+ yield " const Stencil code;"
yield " const Stencil data;"
yield "} StencilGroup;"
yield ""
@@ -673,7 +620,7 @@ def dump_footer(opnames: list[str]) -> typing.Generator[str, None, None]:
yield "}"
yield ""
yield "#define INIT_STENCIL_GROUP(OP) { \\"
- yield " .text = INIT_STENCIL(OP##_text), \\"
+ yield " .code = INIT_STENCIL(OP##_code), \\"
yield " .data = INIT_STENCIL(OP##_data), \\"
yield "}"
yield ""
@@ -683,27 +630,29 @@ def dump_footer(opnames: list[str]) -> typing.Generator[str, None, None]:
yield "};"
yield ""
yield "#define GET_PATCHES() { \\"
- for value in HoleValue:
+ for value in sorted(HoleValue, key=lambda value: value.name):
yield f" [HoleValue_{value.name}] = (uint64_t)0xBADBADBADBADBADB, \\"
yield "}"
-def dump(stencil_groups: dict[str, StencilGroup]) -> typing.Generator[str, None, None]:
+def dump(
+ stencil_groups: dict[str, StencilGroup[R]]
+) -> typing.Generator[str, None, None]:
yield from dump_header()
opnames = []
for opname, stencil in sorted(stencil_groups.items()):
opnames.append(opname)
yield f"// {opname}"
- assert stencil.text
- for line in stencil.text.disassembly:
+ assert stencil.code
+ for line in stencil.code.disassembly:
yield f"// {line}"
- body = ", ".join(f"0x{byte:02x}" for byte in stencil.text.body)
- size = len(stencil.text.body) + 1
- yield f"static const unsigned char {opname}_text_body[{size}] = {{{body}}};"
- if stencil.text.holes:
- size = len(stencil.text.holes) + 1
- yield f"static const Hole {opname}_text_holes[{size}] = {{"
- for hole in sorted(stencil.text.holes, key=lambda hole: hole.offset):
+ body = ", ".join(f"0x{byte:02x}" for byte in stencil.code.body)
+ size = len(stencil.code.body) + 1
+ yield f"static const unsigned char {opname}_code_body[{size}] = {{{body}}};"
+ if stencil.code.holes:
+ size = len(stencil.code.holes) + 1
+ yield f"static const Hole {opname}_code_holes[{size}] = {{"
+ for hole in sorted(stencil.code.holes, key=lambda hole: hole.offset):
parts = [
hex(hole.offset),
f"HoleKind_{hole.kind}",
@@ -714,7 +663,7 @@ def dump(stencil_groups: dict[str, StencilGroup]) -> typing.Generator[str, None,
yield f" {{{', '.join(parts)}}},"
yield "};"
else:
- yield f"static const Hole {opname}_text_holes[1];"
+ yield f"static const Hole {opname}_code_holes[1];"
for line in stencil.data.disassembly:
yield f"// {line}"
body = ", ".join(f"0x{byte:02x}" for byte in stencil.data.body)
@@ -749,40 +698,18 @@ def format_addend(addend: int) -> str:
return hex(addend)
-def build(target: Target, *, debug: bool, out: pathlib.Path, verbose: bool) -> None:
- jit_stencils = out / "jit_stencils.h"
- options = Options(target, debug, out, verbose)
- hasher = hashlib.sha256()
- hasher.update(options.sha256())
- hasher.update(PYTHON_EXECUTOR_CASES_C_H.read_bytes())
- hasher.update((out / "pyconfig.h").read_bytes())
- for dirpath, _, filenames in sorted(os.walk(TOOLS_JIT)):
- for filename in filenames:
- hasher.update(pathlib.Path(dirpath, filename).read_bytes())
- digest = hasher.hexdigest()
- if jit_stencils.exists():
- with jit_stencils.open() as file:
- if file.readline().removeprefix("// ").removesuffix("\n") == digest:
- return
- stencil_groups = asyncio.run(build_stencils(options))
- with jit_stencils.open("w") as file:
- file.write(f"// {digest}\n")
- for line in dump(stencil_groups):
- file.write(f"{line}\n")
-
-
def main() -> None:
parser = argparse.ArgumentParser()
- parser.add_argument(
- "target", type=get_target, help="a PEP 11 target triple to compile for"
- )
+ parser.add_argument("target", help="a PEP 11 target triple to compile for")
parser.add_argument(
"-d", "--debug", action="store_true", help="compile for a debug build of Python"
)
parser.add_argument(
"-v", "--verbose", action="store_true", help="echo commands as they are run"
)
- build(out=pathlib.Path.cwd(), **vars(parser.parse_args()))
+ args = parser.parse_args()
+ target = get_target(args.target, debug=args.debug, verbose=args.verbose)
+ target.build(pathlib.Path.cwd())
if __name__ == "__main__":
diff --git a/Tools/jit/template.c b/Tools/jit/template.c
index e0af5f365fcd77..c915de91504932 100644
--- a/Tools/jit/template.c
+++ b/Tools/jit/template.c
@@ -56,7 +56,7 @@ _Py_CODEUNIT *
_JIT_ENTRY(_PyInterpreterFrame *frame, PyObject **stack_pointer, PyThreadState *tstate)
{
// Locals that the instruction implementations expect to exist:
- PATCH_VALUE(_PyUOpExecutorObject *, current_executor, _JIT_CURRENT_EXECUTOR)
+ PATCH_VALUE(_PyUOpExecutorObject *, current_executor, _JIT_EXECUTOR)
int oparg;
int opcode = _JIT_OPCODE;
_PyUOpInstruction *next_uop;