From 4034318ba7d588f591f2b2bea28ed9ed9653bc6f Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Tue, 15 Nov 2022 11:13:51 -0800 Subject: [PATCH 1/6] Compress marshalled bytecode --- Lib/importlib/_bootstrap_external.py | 3 +- Programs/test_frozenmain.h | 70 +++++++++++------------- Python/marshal.c | 79 +++++++++++++++++++++++++--- Tools/build/umarshal.py | 22 +++++++- 4 files changed, 126 insertions(+), 48 deletions(-) diff --git a/Lib/importlib/_bootstrap_external.py b/Lib/importlib/_bootstrap_external.py index f4dbbebcd224c8..6fbd60893461d0 100644 --- a/Lib/importlib/_bootstrap_external.py +++ b/Lib/importlib/_bootstrap_external.py @@ -426,6 +426,7 @@ def _write_atomic(path, data, mode=0o666): # Python 3.12a1 3510 (FOR_ITER leaves iterator on the stack) # Python 3.12a1 3511 (Add STOPITERATION_ERROR instruction) # Python 3.12a1 3512 (Remove all unused consts from code objects) +# Python 3.12a1 3513 (Compress marshalled bytecode) # Python 3.13 will start with 3550 @@ -438,7 +439,7 @@ def _write_atomic(path, data, mode=0o666): # Whenever MAGIC_NUMBER is changed, the ranges in the magic_values array # in PC/launcher.c must also be updated. -MAGIC_NUMBER = (3512).to_bytes(2, 'little') + b'\r\n' +MAGIC_NUMBER = (3513).to_bytes(2, 'little') + b'\r\n' _RAW_MAGIC_NUMBER = int.from_bytes(MAGIC_NUMBER, 'little') # For import.c diff --git a/Programs/test_frozenmain.h b/Programs/test_frozenmain.h index 96be3ce3c25c3f..58af92c1933dd8 100644 --- a/Programs/test_frozenmain.h +++ b/Programs/test_frozenmain.h @@ -1,42 +1,36 @@ // Auto-generated by Programs/freeze_test_frozenmain.py unsigned char M_test_frozenmain[] = { 227,0,0,0,0,0,0,0,0,0,0,0,0,8,0,0, - 0,0,0,0,0,243,184,0,0,0,151,0,100,0,100,1, - 108,0,90,0,100,0,100,1,108,1,90,1,2,0,101,2, - 100,2,171,1,0,0,0,0,0,0,0,0,1,0,2,0, - 101,2,100,3,101,0,106,6,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,171,2,0,0,0,0, - 0,0,0,0,1,0,2,0,101,1,106,8,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,171,0, - 0,0,0,0,0,0,0,0,100,4,25,0,0,0,0,0, - 0,0,0,0,90,5,100,5,68,0,93,23,0,0,90,6, - 2,0,101,2,100,6,101,6,155,0,100,7,101,5,101,6, - 25,0,0,0,0,0,0,0,0,0,155,0,157,4,171,1, - 0,0,0,0,0,0,0,0,1,0,140,25,4,0,100,1, - 83,0,41,8,233,0,0,0,0,78,122,18,70,114,111,122, - 101,110,32,72,101,108,108,111,32,87,111,114,108,100,122,8, - 115,121,115,46,97,114,103,118,218,6,99,111,110,102,105,103, - 41,5,218,12,112,114,111,103,114,97,109,95,110,97,109,101, - 218,10,101,120,101,99,117,116,97,98,108,101,218,15,117,115, - 101,95,101,110,118,105,114,111,110,109,101,110,116,218,17,99, - 111,110,102,105,103,117,114,101,95,99,95,115,116,100,105,111, - 218,14,98,117,102,102,101,114,101,100,95,115,116,100,105,111, - 122,7,99,111,110,102,105,103,32,122,2,58,32,41,7,218, - 3,115,121,115,218,17,95,116,101,115,116,105,110,116,101,114, - 110,97,108,99,97,112,105,218,5,112,114,105,110,116,218,4, - 97,114,103,118,218,11,103,101,116,95,99,111,110,102,105,103, - 115,114,3,0,0,0,218,3,107,101,121,169,0,243,0,0, - 0,0,250,18,116,101,115,116,95,102,114,111,122,101,110,109, - 97,105,110,46,112,121,250,8,60,109,111,100,117,108,101,62, - 114,18,0,0,0,1,0,0,0,115,154,0,0,0,240,3, - 1,1,1,240,8,0,1,11,128,10,128,10,128,10,216,0, - 24,208,0,24,208,0,24,208,0,24,224,0,5,128,5,208, - 6,26,212,0,27,208,0,27,216,0,5,128,5,128,106,144, - 35,151,40,145,40,212,0,27,208,0,27,216,9,38,208,9, - 26,215,9,38,209,9,38,212,9,40,168,24,212,9,50,128, - 6,240,2,6,12,2,240,0,7,1,42,241,0,7,1,42, - 128,67,240,14,0,5,10,128,69,208,10,40,144,67,208,10, - 40,208,10,40,152,54,160,35,156,59,208,10,40,208,10,40, - 212,4,41,208,4,41,208,4,41,240,15,7,1,42,240,0, - 7,1,42,240,0,7,1,42,114,16,0,0,0, + 0,0,0,0,0,92,0,0,0,151,0,100,0,100,1,108, + 0,90,0,100,0,100,1,108,1,90,1,2,101,2,100,2, + 171,1,1,2,101,2,100,3,101,0,106,6,171,2,1,2, + 101,1,106,8,171,0,100,4,25,90,5,100,5,68,93,23, + 90,6,2,101,2,100,6,101,6,155,0,100,7,101,5,101, + 6,25,155,0,157,4,171,1,1,140,25,4,100,1,83,41, + 8,233,0,0,0,0,78,122,18,70,114,111,122,101,110,32, + 72,101,108,108,111,32,87,111,114,108,100,122,8,115,121,115, + 46,97,114,103,118,218,6,99,111,110,102,105,103,41,5,218, + 12,112,114,111,103,114,97,109,95,110,97,109,101,218,10,101, + 120,101,99,117,116,97,98,108,101,218,15,117,115,101,95,101, + 110,118,105,114,111,110,109,101,110,116,218,17,99,111,110,102, + 105,103,117,114,101,95,99,95,115,116,100,105,111,218,14,98, + 117,102,102,101,114,101,100,95,115,116,100,105,111,122,7,99, + 111,110,102,105,103,32,122,2,58,32,41,7,218,3,115,121, + 115,218,17,95,116,101,115,116,105,110,116,101,114,110,97,108, + 99,97,112,105,218,5,112,114,105,110,116,218,4,97,114,103, + 118,218,11,103,101,116,95,99,111,110,102,105,103,115,114,2, + 0,0,0,218,3,107,101,121,169,0,243,0,0,0,0,250, + 18,116,101,115,116,95,102,114,111,122,101,110,109,97,105,110, + 46,112,121,250,8,60,109,111,100,117,108,101,62,114,17,0, + 0,0,1,0,0,0,115,154,0,0,0,240,3,1,1,1, + 240,8,0,1,11,128,10,128,10,128,10,216,0,24,208,0, + 24,208,0,24,208,0,24,224,0,5,128,5,208,6,26,212, + 0,27,208,0,27,216,0,5,128,5,128,106,144,35,151,40, + 145,40,212,0,27,208,0,27,216,9,38,208,9,26,215,9, + 38,209,9,38,212,9,40,168,24,212,9,50,128,6,240,2, + 6,12,2,240,0,7,1,42,241,0,7,1,42,128,67,240, + 14,0,5,10,128,69,208,10,40,144,67,208,10,40,208,10, + 40,152,54,160,35,156,59,208,10,40,208,10,40,212,4,41, + 208,4,41,208,4,41,240,15,7,1,42,240,0,7,1,42, + 240,0,7,1,42,114,15,0,0,0, }; diff --git a/Python/marshal.c b/Python/marshal.c index d6cc04d6f2abd3..6500a734246d31 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -12,6 +12,7 @@ #include "pycore_call.h" // _PyObject_CallNoArgs() #include "pycore_code.h" // _PyCode_New() #include "pycore_hashtable.h" // _Py_hashtable_t +#include "pycore_opcode.h" #include "marshal.h" // Py_MARSHAL_VERSION /*[clinic input] @@ -291,6 +292,21 @@ w_float_str(double v, WFILE *p) PyMem_Free(buf); } +static void +w_bytecode(PyCodeObject *code, WFILE *p) +{ + W_SIZE(Py_SIZE(code), p); + for (Py_ssize_t i = 0; i < Py_SIZE(code); i++) { + _Py_CODEUNIT instruction = _PyCode_CODE(code)[i]; + int opcode = _PyOpcode_Deopt[_Py_OPCODE(instruction)]; + w_byte(opcode, p); + if (HAS_ARG(opcode)) { + w_byte(_Py_OPARG(instruction), p); + } + i += _PyOpcode_Caches[opcode]; + } +} + static int w_ref(PyObject *v, char *flag, WFILE *p) { @@ -550,18 +566,13 @@ w_complex_object(PyObject *v, char flag, WFILE *p) } else if (PyCode_Check(v)) { PyCodeObject *co = (PyCodeObject *)v; - PyObject *co_code = _PyCode_GetCode(co); - if (co_code == NULL) { - p->error = WFERR_NOMEMORY; - return; - } W_TYPE(TYPE_CODE, p); w_long(co->co_argcount, p); w_long(co->co_posonlyargcount, p); w_long(co->co_kwonlyargcount, p); w_long(co->co_stacksize, p); w_long(co->co_flags, p); - w_object(co_code, p); + w_bytecode(co, p); w_object(co->co_consts, p); w_object(co->co_names, p); w_object(co->co_localsplusnames, p); @@ -572,7 +583,6 @@ w_complex_object(PyObject *v, char flag, WFILE *p) w_long(co->co_firstlineno, p); w_object(co->co_linetable, p); w_object(co->co_exceptiontable, p); - Py_DECREF(co_code); } else if (PyObject_CheckBuffer(v)) { /* Write unknown bytes-like objects as a bytes object */ @@ -921,6 +931,59 @@ r_float_str(RFILE *p) return PyOS_string_to_double(buf, NULL, NULL); } +static PyObject * +r_bytecode(RFILE *p) +{ + long size = r_long(p); + if (PyErr_Occurred()) { + return NULL; + } + Py_ssize_t nbytes = size * sizeof(_Py_CODEUNIT); + if (nbytes < 0 || SIZE32_MAX < nbytes) { + const char *e = "bad marshal data (bytecode size out of range)"; + PyErr_SetString(PyExc_ValueError, e); + return NULL; + } + PyObject *bytecode = PyBytes_FromStringAndSize(NULL, nbytes); + if (bytecode == NULL) { + return NULL; + } + _Py_CODEUNIT *buffer = (_Py_CODEUNIT *)PyBytes_AS_STRING(bytecode); + long i = 0; + while (i < size) { + int opcode = r_byte(p); + if (opcode == EOF) { + const char *e = "EOF read where opcode expected"; + PyErr_SetString(PyExc_EOFError, e); + return NULL; + } + int oparg; + if (HAS_ARG(opcode)) { + oparg = r_byte(p); + if (oparg == EOF) { + const char *e = "EOF read where oparg expected"; + PyErr_SetString(PyExc_EOFError, e); + return NULL; + } + } + else { + oparg = 0; + } + assert(0x00 <= opcode && opcode < 0x100); + assert(0x00 <= oparg && oparg < 0x100); + buffer[i++] = _Py_MAKECODEUNIT(opcode, oparg); + for (int j = 0; j < _PyOpcode_Caches[opcode]; j++) { + buffer[i++] = _Py_MAKECODEUNIT(CACHE, oparg); + } + } + if (i != size) { + const char *e = "bad marshal data (bytecode size incorrect)"; + PyErr_SetString(PyExc_ValueError, e); + return NULL; + } + return bytecode; +} + /* allocate the reflist index for a new object. Return -1 on failure */ static Py_ssize_t r_ref_reserve(int flag, RFILE *p) @@ -1382,7 +1445,7 @@ r_object(RFILE *p) flags = (int)r_long(p); if (PyErr_Occurred()) goto code_error; - code = r_object(p); + code = r_bytecode(p); if (code == NULL) goto code_error; consts = r_object(p); diff --git a/Tools/build/umarshal.py b/Tools/build/umarshal.py index f61570cbaff751..5acfc11b63e578 100644 --- a/Tools/build/umarshal.py +++ b/Tools/build/umarshal.py @@ -1,6 +1,7 @@ # Implementat marshal.loads() in pure Python import ast +import opcode from typing import Any, Tuple @@ -47,6 +48,8 @@ class Type: CO_FAST_CELL = 0x40 CO_FAST_FREE = 0x80 +CACHE = opcode.opmap["CACHE"] + class Code: def __init__(self, **kwds: Any): @@ -177,6 +180,23 @@ def r_object(self) -> Any: return self._r_object() finally: self.level = old_level + + def r_bytecode(self) -> bytes: + nbytes = self.r_long() * 2 + bytecode = bytearray() + while len(bytecode) < nbytes: + opcode_byte = self.r_byte() + if opcode.HAVE_ARGUMENT <= opcode_byte: + oparg_byte = self.r_byte() + else: + oparg_byte = 0 + assert 0x00 <= opcode_byte < 0x100 + assert 0x00 <= oparg_byte < 0x100 + bytecode.extend([opcode_byte, oparg_byte]) + for _ in range(opcode._inline_cache_entries[opcode_byte]): + bytecode.extend([CACHE, 0]) + assert len(bytecode) == nbytes + return bytes(bytecode) def _r_object(self) -> Any: code = self.r_byte() @@ -279,7 +299,7 @@ def R_REF(obj: Any) -> Any: retval.co_kwonlyargcount = self.r_long() retval.co_stacksize = self.r_long() retval.co_flags = self.r_long() - retval.co_code = self.r_object() + retval.co_code = self.r_bytecode() retval.co_consts = self.r_object() retval.co_names = self.r_object() retval.co_localsplusnames = self.r_object() From 8b6b393bace3768f8311ae8ffeaa494fe89492a1 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Wed, 16 Nov 2022 05:37:45 -0800 Subject: [PATCH 2/6] blurb add --- .../next/Library/2022-11-16-05-37-32.gh-issue-99554.4sJH79.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Library/2022-11-16-05-37-32.gh-issue-99554.4sJH79.rst diff --git a/Misc/NEWS.d/next/Library/2022-11-16-05-37-32.gh-issue-99554.4sJH79.rst b/Misc/NEWS.d/next/Library/2022-11-16-05-37-32.gh-issue-99554.4sJH79.rst new file mode 100644 index 00000000000000..e67d1c7504139d --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-11-16-05-37-32.gh-issue-99554.4sJH79.rst @@ -0,0 +1 @@ +Modify the :mod:`marshal` format to serialize bytecode more efficiently. From 5f81b82cc2d00a567389c8e83ae1104906a80fc5 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Wed, 16 Nov 2022 12:12:55 -0800 Subject: [PATCH 3/6] Add opcode_for_build --- Lib/opcode.py | 2 +- Tools/build/deepfreeze.py | 4 ++-- Tools/build/opcode_for_build.py | 17 +++++++++++++++++ Tools/build/umarshal.py | 4 ++-- 4 files changed, 22 insertions(+), 5 deletions(-) create mode 100644 Tools/build/opcode_for_build.py diff --git a/Lib/opcode.py b/Lib/opcode.py index 0ee75958508ac7..1f7e3973794333 100644 --- a/Lib/opcode.py +++ b/Lib/opcode.py @@ -205,7 +205,7 @@ def pseudo_op(name, op, real_ops): hasfree.append(148) def_op('COPY_FREE_VARS', 149) def_op('YIELD_VALUE', 150) -def_op('RESUME', 151) # This must be kept in sync with deepfreeze.py +def_op('RESUME', 151) def_op('MATCH_CLASS', 152) def_op('FORMAT_VALUE', 155) diff --git a/Tools/build/deepfreeze.py b/Tools/build/deepfreeze.py index 2eef649437a680..ea8ee4454c961e 100644 --- a/Tools/build/deepfreeze.py +++ b/Tools/build/deepfreeze.py @@ -17,13 +17,13 @@ from typing import Dict, FrozenSet, TextIO, Tuple import umarshal +import opcode_for_build as opcode from generate_global_objects import get_identifiers_and_strings verbose = False identifiers, strings = get_identifiers_and_strings() -# This must be kept in sync with opcode.py -RESUME = 151 +RESUME = opcode.opmap["RESUME"] def isprintable(b: bytes) -> bool: return all(0x20 <= c < 0x7f for c in b) diff --git a/Tools/build/opcode_for_build.py b/Tools/build/opcode_for_build.py new file mode 100644 index 00000000000000..eeb0f372bf8ead --- /dev/null +++ b/Tools/build/opcode_for_build.py @@ -0,0 +1,17 @@ +""" +Parts of our build process (looking at you, deepfreeze) need the opcode module +for the Python *being built*, not the Python *doing the building*. + +This basically just loads ../../Lib/opcode.py and re-exports everything: + +>>> import opcode_for_build as opcode +""" + +import os + +_opcode_path = os.path.join( + os.path.dirname(__file__), os.pardir, os.pardir, "Lib", "opcode.py" +) +with open(_opcode_path, encoding="utf-8") as _opcode_file: + # Don't try this at home, kids: + exec(_opcode_file.read()) diff --git a/Tools/build/umarshal.py b/Tools/build/umarshal.py index 5acfc11b63e578..a1b3102e2fdb67 100644 --- a/Tools/build/umarshal.py +++ b/Tools/build/umarshal.py @@ -1,7 +1,7 @@ # Implementat marshal.loads() in pure Python import ast -import opcode +import opcode_for_build as opcode from typing import Any, Tuple @@ -180,7 +180,7 @@ def r_object(self) -> Any: return self._r_object() finally: self.level = old_level - + def r_bytecode(self) -> bytes: nbytes = self.r_long() * 2 bytecode = bytearray() From 23ba802a35d64907f293b4fb0c98b8c20d673b5e Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Wed, 14 Dec 2022 17:04:40 -0800 Subject: [PATCH 4/6] fixup --- Python/marshal.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Python/marshal.c b/Python/marshal.c index 130769df38bef9..2dd1d3f0a42c1f 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -971,9 +971,11 @@ r_bytecode(RFILE *p) } assert(0x00 <= opcode && opcode < 0x100); assert(0x00 <= oparg && oparg < 0x100); - buffer[i++] = _Py_MAKECODEUNIT(opcode, oparg); + buffer[i].opcode = opcode; + buffer[i++].oparg = oparg; for (int j = 0; j < _PyOpcode_Caches[opcode]; j++) { - buffer[i++] = _Py_MAKECODEUNIT(CACHE, oparg); + buffer[i].opcode = CACHE; + buffer[i++].oparg = 0; } } if (i != size) { From 5534112bb0d20ef4c1c6a773efef94245350cd74 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Sat, 17 Dec 2022 07:22:33 -0800 Subject: [PATCH 5/6] Address review comments --- Python/marshal.c | 7 +++++-- Tools/build/deepfreeze.py | 4 +++- Tools/build/opcode_for_build.py | 24 +++++++++++++++++------- Tools/build/umarshal.py | 6 ++++-- 4 files changed, 29 insertions(+), 12 deletions(-) diff --git a/Python/marshal.c b/Python/marshal.c index 2dd1d3f0a42c1f..d3a2c258a37bbc 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -972,10 +972,13 @@ r_bytecode(RFILE *p) assert(0x00 <= opcode && opcode < 0x100); assert(0x00 <= oparg && oparg < 0x100); buffer[i].opcode = opcode; - buffer[i++].oparg = oparg; + buffer[i].oparg = oparg; + i++; for (int j = 0; j < _PyOpcode_Caches[opcode]; j++) { + assert(i < size); buffer[i].opcode = CACHE; - buffer[i++].oparg = 0; + buffer[i].oparg = 0; + i++; } } if (i != size) { diff --git a/Tools/build/deepfreeze.py b/Tools/build/deepfreeze.py index b674f7185a85b0..94f575f7d49037 100644 --- a/Tools/build/deepfreeze.py +++ b/Tools/build/deepfreeze.py @@ -17,9 +17,11 @@ from typing import Dict, FrozenSet, TextIO, Tuple import umarshal -import opcode_for_build as opcode +import opcode_for_build from generate_global_objects import get_identifiers_and_strings +opcode = opcode_for_build.import_opcode() + verbose = False identifiers, strings = get_identifiers_and_strings() diff --git a/Tools/build/opcode_for_build.py b/Tools/build/opcode_for_build.py index eeb0f372bf8ead..0c2d43301f43cd 100644 --- a/Tools/build/opcode_for_build.py +++ b/Tools/build/opcode_for_build.py @@ -2,16 +2,26 @@ Parts of our build process (looking at you, deepfreeze) need the opcode module for the Python *being built*, not the Python *doing the building*. -This basically just loads ../../Lib/opcode.py and re-exports everything: +This basically just loads ../../Lib/opcode.py: ->>> import opcode_for_build as opcode +>>> import opcode_for_build +>>> opcode = opcode_for_build.import_opcode() """ import os +import types -_opcode_path = os.path.join( - os.path.dirname(__file__), os.pardir, os.pardir, "Lib", "opcode.py" +_OPCODE_PATH = os.path.realpath( + os.path.join( + os.path.dirname(__file__), os.pardir, os.pardir, "Lib", "opcode.py" + ) ) -with open(_opcode_path, encoding="utf-8") as _opcode_file: - # Don't try this at home, kids: - exec(_opcode_file.read()) + +def import_opcode() -> types.ModuleType: + """Import the current version of the opcode module (from Lib).""" + opcode_module = types.ModuleType("opcode") + opcode_module.__file__ = os.path.realpath(_OPCODE_PATH) + with open(_OPCODE_PATH, encoding="utf-8") as opcode_file: + # Don't try this at home, kids: + exec(opcode_file.read(), opcode_module.__dict__) + return opcode_module diff --git a/Tools/build/umarshal.py b/Tools/build/umarshal.py index a1b3102e2fdb67..8013c5ae6a7a07 100644 --- a/Tools/build/umarshal.py +++ b/Tools/build/umarshal.py @@ -1,10 +1,12 @@ # Implementat marshal.loads() in pure Python import ast -import opcode_for_build as opcode +import opcode_for_build from typing import Any, Tuple +opcode = opcode_for_build.import_opcode() + class Type: # Adapted from marshal.c @@ -186,7 +188,7 @@ def r_bytecode(self) -> bytes: bytecode = bytearray() while len(bytecode) < nbytes: opcode_byte = self.r_byte() - if opcode.HAVE_ARGUMENT <= opcode_byte: + if opcode_byte >= opcode.HAVE_ARGUMENT: oparg_byte = self.r_byte() else: oparg_byte = 0 From bcd798058296083c29ff46ae52af3b569c7b44d4 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Sat, 17 Dec 2022 17:02:55 -0800 Subject: [PATCH 6/6] Terminate the compressed bytecode with two zeros --- Programs/test_frozenmain.h | 48 +++++++++++++++++++------------------- Python/marshal.c | 14 ++++++++--- Tools/build/umarshal.py | 7 +++--- 3 files changed, 39 insertions(+), 30 deletions(-) diff --git a/Programs/test_frozenmain.h b/Programs/test_frozenmain.h index eea81295ad3eb6..7885f4861df83b 100644 --- a/Programs/test_frozenmain.h +++ b/Programs/test_frozenmain.h @@ -6,28 +6,28 @@ unsigned char M_test_frozenmain[] = { 171,1,1,2,101,2,100,3,101,0,106,6,171,2,1,2, 101,1,106,8,171,0,100,4,25,90,5,100,5,68,93,23, 90,6,2,101,2,100,6,101,6,155,0,100,7,101,5,101, - 6,25,155,0,157,4,171,1,1,140,25,4,100,1,83,41, - 8,233,0,0,0,0,78,122,18,70,114,111,122,101,110,32, - 72,101,108,108,111,32,87,111,114,108,100,122,8,115,121,115, - 46,97,114,103,118,218,6,99,111,110,102,105,103,41,5,218, - 12,112,114,111,103,114,97,109,95,110,97,109,101,218,10,101, - 120,101,99,117,116,97,98,108,101,218,15,117,115,101,95,101, - 110,118,105,114,111,110,109,101,110,116,218,17,99,111,110,102, - 105,103,117,114,101,95,99,95,115,116,100,105,111,218,14,98, - 117,102,102,101,114,101,100,95,115,116,100,105,111,122,7,99, - 111,110,102,105,103,32,122,2,58,32,41,7,218,3,115,121, - 115,218,17,95,116,101,115,116,105,110,116,101,114,110,97,108, - 99,97,112,105,218,5,112,114,105,110,116,218,4,97,114,103, - 118,218,11,103,101,116,95,99,111,110,102,105,103,115,114,2, - 0,0,0,218,3,107,101,121,169,0,243,0,0,0,0,250, - 18,116,101,115,116,95,102,114,111,122,101,110,109,97,105,110, - 46,112,121,250,8,60,109,111,100,117,108,101,62,114,17,0, - 0,0,1,0,0,0,115,100,0,0,0,240,3,1,1,1, - 243,8,0,1,11,219,0,24,225,0,5,208,6,26,213,0, - 27,217,0,5,128,106,144,35,151,40,145,40,213,0,27,216, - 9,38,208,9,26,215,9,38,209,9,38,212,9,40,168,24, - 212,9,50,128,6,240,2,6,12,2,242,0,7,1,42,128, - 67,241,14,0,5,10,208,10,40,144,67,209,10,40,152,54, - 160,35,156,59,209,10,40,214,4,41,242,15,7,1,42,114, - 15,0,0,0, + 6,25,155,0,157,4,171,1,1,140,25,4,100,1,83,0, + 0,41,8,233,0,0,0,0,78,122,18,70,114,111,122,101, + 110,32,72,101,108,108,111,32,87,111,114,108,100,122,8,115, + 121,115,46,97,114,103,118,218,6,99,111,110,102,105,103,41, + 5,218,12,112,114,111,103,114,97,109,95,110,97,109,101,218, + 10,101,120,101,99,117,116,97,98,108,101,218,15,117,115,101, + 95,101,110,118,105,114,111,110,109,101,110,116,218,17,99,111, + 110,102,105,103,117,114,101,95,99,95,115,116,100,105,111,218, + 14,98,117,102,102,101,114,101,100,95,115,116,100,105,111,122, + 7,99,111,110,102,105,103,32,122,2,58,32,41,7,218,3, + 115,121,115,218,17,95,116,101,115,116,105,110,116,101,114,110, + 97,108,99,97,112,105,218,5,112,114,105,110,116,218,4,97, + 114,103,118,218,11,103,101,116,95,99,111,110,102,105,103,115, + 114,2,0,0,0,218,3,107,101,121,169,0,243,0,0,0, + 0,250,18,116,101,115,116,95,102,114,111,122,101,110,109,97, + 105,110,46,112,121,250,8,60,109,111,100,117,108,101,62,114, + 17,0,0,0,1,0,0,0,115,100,0,0,0,240,3,1, + 1,1,243,8,0,1,11,219,0,24,225,0,5,208,6,26, + 213,0,27,217,0,5,128,106,144,35,151,40,145,40,213,0, + 27,216,9,38,208,9,26,215,9,38,209,9,38,212,9,40, + 168,24,212,9,50,128,6,240,2,6,12,2,242,0,7,1, + 42,128,67,241,14,0,5,10,208,10,40,144,67,209,10,40, + 152,54,160,35,156,59,209,10,40,214,4,41,242,15,7,1, + 42,114,15,0,0,0, }; diff --git a/Python/marshal.c b/Python/marshal.c index d3a2c258a37bbc..5138fbce68a54a 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -305,6 +305,11 @@ w_bytecode(PyCodeObject *code, WFILE *p) } i += _PyOpcode_Caches[opcode]; } + // Terminate with two zero bytes, so that programs scanning .pyc files can + // skip over the bytecode (even if they don't know the compression scheme). + // This is simpler than writing the compressed size in the header, which + // requires two loops (one to count the bytes, then one to write them): + w_short(0, p); } static int @@ -969,8 +974,8 @@ r_bytecode(RFILE *p) else { oparg = 0; } - assert(0x00 <= opcode && opcode < 0x100); - assert(0x00 <= oparg && oparg < 0x100); + assert(0x01 <= opcode && opcode <= 0xFF); + assert(0x00 <= oparg && oparg <= 0xFF); buffer[i].opcode = opcode; buffer[i].oparg = oparg; i++; @@ -981,7 +986,10 @@ r_bytecode(RFILE *p) i++; } } - if (i != size) { + // The compressed bytecode is terminated with two zero bytes (see the + // comment at the bottom of w_bytecode): + int zero_zero = r_short(p); + if (zero_zero == EOF || zero_zero != 0 || i != size) { const char *e = "bad marshal data (bytecode size incorrect)"; PyErr_SetString(PyExc_ValueError, e); return NULL; diff --git a/Tools/build/umarshal.py b/Tools/build/umarshal.py index 8013c5ae6a7a07..dfda56669488c6 100644 --- a/Tools/build/umarshal.py +++ b/Tools/build/umarshal.py @@ -192,12 +192,13 @@ def r_bytecode(self) -> bytes: oparg_byte = self.r_byte() else: oparg_byte = 0 - assert 0x00 <= opcode_byte < 0x100 - assert 0x00 <= oparg_byte < 0x100 + assert 0x01 <= opcode_byte <= 0xFF + assert 0x00 <= oparg_byte <= 0xFF bytecode.extend([opcode_byte, oparg_byte]) for _ in range(opcode._inline_cache_entries[opcode_byte]): bytecode.extend([CACHE, 0]) - assert len(bytecode) == nbytes + zero_zero = self.r_short() + assert zero_zero == 0 and len(bytecode) == nbytes return bytes(bytecode) def _r_object(self) -> Any: