From 7bb123eb0a8f722f5a78b4f0c851da758772d1a3 Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Tue, 24 Jan 2023 10:38:36 +0000 Subject: [PATCH 1/4] gh-98831: rewrite pattern matching opcodes in the instruction definition DSL --- Python/bytecodes.c | 43 +++++++++----------------------- Python/generated_cases.c.h | 50 +++++++++++++++++++++----------------- Python/opcode_metadata.h | 8 +++--- 3 files changed, 44 insertions(+), 57 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 47bbe1a99e3bf0..eaa4c11d7a86e5 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2093,61 +2093,42 @@ dummy_func( PUSH(len_o); } - // stack effect: (__0, __1 -- ) - inst(MATCH_CLASS) { + inst(MATCH_CLASS, (subject, type, names -- attrs)) { // Pop TOS and TOS1. Set TOS to a tuple of attributes on success, or // None on failure. - PyObject *names = POP(); - PyObject *type = POP(); - PyObject *subject = TOP(); assert(PyTuple_CheckExact(names)); - PyObject *attrs = match_class(tstate, subject, type, oparg, names); - Py_DECREF(names); - Py_DECREF(type); + attrs = match_class(tstate, subject, type, oparg, names); + DECREF_INPUTS(); if (attrs) { // Success! assert(PyTuple_CheckExact(attrs)); - SET_TOP(attrs); } else if (_PyErr_Occurred(tstate)) { // Error! - goto error; + ERROR_IF(true, error); } else { // Failure! - SET_TOP(Py_NewRef(Py_None)); + attrs = Py_NewRef(Py_None); } - Py_DECREF(subject); } - // stack effect: ( -- __0) - inst(MATCH_MAPPING) { - PyObject *subject = TOP(); + inst(MATCH_MAPPING, (subject -- subject, res)) { int match = Py_TYPE(subject)->tp_flags & Py_TPFLAGS_MAPPING; - PyObject *res = match ? Py_True : Py_False; - PUSH(Py_NewRef(res)); + res = Py_NewRef(match ? Py_True : Py_False); PREDICT(POP_JUMP_IF_FALSE); } - // stack effect: ( -- __0) - inst(MATCH_SEQUENCE) { - PyObject *subject = TOP(); + inst(MATCH_SEQUENCE, (subject -- subject, res)) { int match = Py_TYPE(subject)->tp_flags & Py_TPFLAGS_SEQUENCE; - PyObject *res = match ? Py_True : Py_False; - PUSH(Py_NewRef(res)); + res = Py_NewRef(match ? Py_True : Py_False); PREDICT(POP_JUMP_IF_FALSE); } - // stack effect: ( -- __0) - inst(MATCH_KEYS) { + inst(MATCH_KEYS, (subject, keys -- subject, keys, values_or_none)) { // On successful match, PUSH(values). Otherwise, PUSH(None). - PyObject *keys = TOP(); - PyObject *subject = SECOND(); - PyObject *values_or_none = match_keys(tstate, subject, keys); - if (values_or_none == NULL) { - goto error; - } - PUSH(values_or_none); + values_or_none = match_keys(tstate, subject, keys); + ERROR_IF(values_or_none == NULL, error); } // stack effect: ( -- ) diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index c1eb4000883dc7..07e97e7b6653b7 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -2480,59 +2480,65 @@ } TARGET(MATCH_CLASS) { + PyObject *names = PEEK(1); + PyObject *type = PEEK(2); + PyObject *subject = PEEK(3); + PyObject *attrs; // Pop TOS and TOS1. Set TOS to a tuple of attributes on success, or // None on failure. - PyObject *names = POP(); - PyObject *type = POP(); - PyObject *subject = TOP(); assert(PyTuple_CheckExact(names)); - PyObject *attrs = match_class(tstate, subject, type, oparg, names); - Py_DECREF(names); + attrs = match_class(tstate, subject, type, oparg, names); + Py_DECREF(subject); Py_DECREF(type); + Py_DECREF(names); if (attrs) { // Success! assert(PyTuple_CheckExact(attrs)); - SET_TOP(attrs); } else if (_PyErr_Occurred(tstate)) { // Error! - goto error; + if (true) goto pop_3_error; } else { // Failure! - SET_TOP(Py_NewRef(Py_None)); + attrs = Py_NewRef(Py_None); } - Py_DECREF(subject); + STACK_SHRINK(2); + POKE(1, attrs); DISPATCH(); } TARGET(MATCH_MAPPING) { - PyObject *subject = TOP(); + PyObject *subject = PEEK(1); + PyObject *res; int match = Py_TYPE(subject)->tp_flags & Py_TPFLAGS_MAPPING; - PyObject *res = match ? Py_True : Py_False; - PUSH(Py_NewRef(res)); + res = Py_NewRef(match ? Py_True : Py_False); + STACK_GROW(1); + POKE(1, res); PREDICT(POP_JUMP_IF_FALSE); DISPATCH(); } TARGET(MATCH_SEQUENCE) { - PyObject *subject = TOP(); + PyObject *subject = PEEK(1); + PyObject *res; int match = Py_TYPE(subject)->tp_flags & Py_TPFLAGS_SEQUENCE; - PyObject *res = match ? Py_True : Py_False; - PUSH(Py_NewRef(res)); + res = Py_NewRef(match ? Py_True : Py_False); + STACK_GROW(1); + POKE(1, res); PREDICT(POP_JUMP_IF_FALSE); DISPATCH(); } TARGET(MATCH_KEYS) { + PyObject *keys = PEEK(1); + PyObject *subject = PEEK(2); + PyObject *values_or_none; // On successful match, PUSH(values). Otherwise, PUSH(None). - PyObject *keys = TOP(); - PyObject *subject = SECOND(); - PyObject *values_or_none = match_keys(tstate, subject, keys); - if (values_or_none == NULL) { - goto error; - } - PUSH(values_or_none); + values_or_none = match_keys(tstate, subject, keys); + if (values_or_none == NULL) goto error; + STACK_GROW(1); + POKE(1, values_or_none); DISPATCH(); } diff --git a/Python/opcode_metadata.h b/Python/opcode_metadata.h index 3ceaca8c397f6b..4f25abc062f6d9 100644 --- a/Python/opcode_metadata.h +++ b/Python/opcode_metadata.h @@ -133,10 +133,10 @@ static const struct { [JUMP_IF_TRUE_OR_POP] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IB }, [JUMP_BACKWARD_NO_INTERRUPT] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IB }, [GET_LEN] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IX }, - [MATCH_CLASS] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IB }, - [MATCH_MAPPING] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IX }, - [MATCH_SEQUENCE] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IX }, - [MATCH_KEYS] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IX }, + [MATCH_CLASS] = { 3, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IB }, + [MATCH_MAPPING] = { 1, 2, DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IX }, + [MATCH_SEQUENCE] = { 1, 2, DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IX }, + [MATCH_KEYS] = { 2, 3, DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IX }, [GET_ITER] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IX }, [GET_YIELD_FROM_ITER] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IX }, [FOR_ITER] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IB }, From 46f09753d684c9a0ac95a252ae9640fdfa592d63 Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Tue, 24 Jan 2023 18:36:36 +0000 Subject: [PATCH 2/4] Brandt's suggestion --- Python/bytecodes.c | 6 ++---- Python/generated_cases.c.h | 6 ++---- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index eaa4c11d7a86e5..57e5d82574b4f8 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2103,11 +2103,9 @@ dummy_func( // Success! assert(PyTuple_CheckExact(attrs)); } - else if (_PyErr_Occurred(tstate)) { - // Error! - ERROR_IF(true, error); - } else { + // Error! + ERROR_IF(_PyErr_Occurred(tstate), error); // Failure! attrs = Py_NewRef(Py_None); } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 07e97e7b6653b7..87576a3fbb77cc 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -2495,11 +2495,9 @@ // Success! assert(PyTuple_CheckExact(attrs)); } - else if (_PyErr_Occurred(tstate)) { - // Error! - if (true) goto pop_3_error; - } else { + // Error! + if (_PyErr_Occurred(tstate)) goto pop_3_error; // Failure! attrs = Py_NewRef(Py_None); } From 2400e22e1a2643c6ac584b82ec118c2dd6423dd7 Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Tue, 24 Jan 2023 18:56:18 +0000 Subject: [PATCH 3/4] fix generator's handling of trailing comment after ERROR_IF() --- Python/bytecodes.c | 9 +++------ Python/generated_cases.c.h | 7 ++----- Tools/cases_generator/generate_cases.py | 2 +- 3 files changed, 6 insertions(+), 12 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 7986f89df1708f..d3e242b81e608d 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2099,14 +2099,11 @@ dummy_func( attrs = match_class(tstate, subject, type, oparg, names); DECREF_INPUTS(); if (attrs) { - // Success! - assert(PyTuple_CheckExact(attrs)); + assert(PyTuple_CheckExact(attrs)); // Success! } else { - // Error! - ERROR_IF(_PyErr_Occurred(tstate), error); - // Failure! - attrs = Py_NewRef(Py_None); + ERROR_IF(_PyErr_Occurred(tstate), error); // Error! + attrs = Py_NewRef(Py_None); // Failure! } } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 2ec1ad387d2ebe..7d3396ad6bdec3 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -2491,14 +2491,11 @@ Py_DECREF(type); Py_DECREF(names); if (attrs) { - // Success! - assert(PyTuple_CheckExact(attrs)); + assert(PyTuple_CheckExact(attrs)); // Success! } else { - // Error! if (_PyErr_Occurred(tstate)) goto pop_3_error; - // Failure! - attrs = Py_NewRef(Py_None); + attrs = Py_NewRef(Py_None); // Failure! } STACK_SHRINK(2); POKE(1, attrs); diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py index 90f101adceeb47..1c9dc592756c49 100644 --- a/Tools/cases_generator/generate_cases.py +++ b/Tools/cases_generator/generate_cases.py @@ -354,7 +354,7 @@ def write_body(self, out: Formatter, dedent: int, cache_adjust: int = 0) -> None assert dedent <= 0 extra = " " * -dedent for line in self.block_text: - if m := re.match(r"(\s*)ERROR_IF\((.+), (\w+)\);\s*$", line): + if m := re.match(r"(\s*)ERROR_IF\((.+), (\w+)\);\s*(?://.*)?$", line): space, cond, label = m.groups() # ERROR_IF() must pop the inputs from the stack. # The code block is responsible for DECREF()ing them. From 5b4edb346a1a64ee33ec877fd7f4e448bd4f8cdd Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Tue, 24 Jan 2023 21:39:24 +0000 Subject: [PATCH 4/4] fix more regexes. Add test for comment after ERROR_IF --- Tools/cases_generator/generate_cases.py | 6 +++--- Tools/cases_generator/test_generator.py | 14 ++++++++++++++ 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py index 1c9dc592756c49..429c9d34d60601 100644 --- a/Tools/cases_generator/generate_cases.py +++ b/Tools/cases_generator/generate_cases.py @@ -26,7 +26,7 @@ ) BEGIN_MARKER = "// BEGIN BYTECODES //" END_MARKER = "// END BYTECODES //" -RE_PREDICTED = r"^\s*(?:PREDICT\(|GO_TO_INSTRUCTION\(|DEOPT_IF\(.*?,\s*)(\w+)\);\s*$" +RE_PREDICTED = r"^\s*(?:PREDICT\(|GO_TO_INSTRUCTION\(|DEOPT_IF\(.*?,\s*)(\w+)\);\s*(?://.*)?$" UNUSED = "unused" BITS_PER_CODE_UNIT = 16 @@ -378,7 +378,7 @@ def write_body(self, out: Formatter, dedent: int, cache_adjust: int = 0) -> None ) else: out.write_raw(f"{extra}{space}if ({cond}) goto {label};\n") - elif m := re.match(r"(\s*)DECREF_INPUTS\(\);\s*$", line): + elif m := re.match(r"(\s*)DECREF_INPUTS\(\);\s*(?://.*)?$", line): if not self.register: space = m.group(1) for ieff in self.input_effects: @@ -964,7 +964,7 @@ def extract_block_text(block: parser.Block) -> tuple[list[str], list[str]]: # Separate PREDICT(...) macros from end predictions: list[str] = [] - while blocklines and (m := re.match(r"^\s*PREDICT\((\w+)\);\s*$", blocklines[-1])): + while blocklines and (m := re.match(r"^\s*PREDICT\((\w+)\);\s*(?://.*)?$", blocklines[-1])): predictions.insert(0, m.group(1)) blocklines.pop() diff --git a/Tools/cases_generator/test_generator.py b/Tools/cases_generator/test_generator.py index ae0c1e2f97c35e..c59aae878cfa70 100644 --- a/Tools/cases_generator/test_generator.py +++ b/Tools/cases_generator/test_generator.py @@ -215,6 +215,20 @@ def test_error_if_plain(): """ run_cases_test(input, output) +def test_error_if_plain_with_comment(): + input = """ + inst(OP, (--)) { + ERROR_IF(cond, label); // Comment is ok + } + """ + output = """ + TARGET(OP) { + if (cond) goto label; + DISPATCH(); + } + """ + run_cases_test(input, output) + def test_error_if_pop(): input = """ inst(OP, (left, right -- res)) {