diff --git a/pod/perldiag.pod b/pod/perldiag.pod index c1d776bb0716..ec1edb6e70c8 100644 --- a/pod/perldiag.pod +++ b/pod/perldiag.pod @@ -6285,6 +6285,15 @@ The message attempts to include the name of the called subroutine. If the subroutine has been aliased, the subroutine's original name will be shown, regardless of what name the caller used. +=item Too many nested open parens in regex; marked by <-- HERE in m/%s/ + +(F) You have exceeded the number of open C<"("> parentheses that haven't +been matched by corresponding closing ones. This limit prevents eating +up too much memory. It is initially set to 1000, but may be changed by +setting C<${^RE_COMPILE_RECURSION_LIMIT}> to some other value. This may +need to be done in a BEGIN block before the regular expression pattern +is compiled. + =item Too many )'s (A) You've accidentally run your script through B instead of Perl. diff --git a/pod/perlvar.pod b/pod/perlvar.pod index d67d4cd8b1b3..b42cbe22519f 100644 --- a/pod/perlvar.pod +++ b/pod/perlvar.pod @@ -1249,6 +1249,17 @@ regular expression assertion (see L). May be written to. This variable was added in Perl 5.005. +=item ${^RE_COMPILE_RECURSION_LIMIT} +X<${^RE_COMPILE_RECURSION_LIMIT}> + +The current value giving the maximum number of open but unclosed +parenthetical groups there may be at any point during a regular +expression compilation. The default is currently 1000 nested groups. +You may adjust it depending on your needs and the amount of memory +available. + +This variable was added in Perl v5.30.0. + =item ${^RE_DEBUG_FLAGS} X<${^RE_DEBUG_FLAGS}> diff --git a/regcomp.c b/regcomp.c index 98c5f42be638..816d735f5b39 100644 --- a/regcomp.c +++ b/regcomp.c @@ -11009,6 +11009,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth) I32 freeze_paren = 0; I32 after_freeze = 0; I32 num; /* numeric backreferences */ + SV * max_open; /* Max number of unclosed parens */ char * parse_start = RExC_parse; /* MJD */ char * const oregcomp_parse = RExC_parse; @@ -11018,6 +11019,17 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth) PERL_ARGS_ASSERT_REG; DEBUG_PARSE("reg "); + + max_open = get_sv(RE_COMPILE_RECURSION_LIMIT, GV_ADD); + assert(max_open); + if (!SvIOK(max_open)) { + sv_setiv(max_open, RE_COMPILE_RECURSION_INIT); + } + if (depth > 4 * SvIV(max_open)) { /* We increase depth by 4 for each open + paren */ + vFAIL("Too many nested open parens"); + } + *flagp = 0; /* Tentatively. */ /* Having this true makes it feasible to have a lot fewer tests for the diff --git a/regcomp.h b/regcomp.h index 7a4432e3d620..5002e2b38d35 100644 --- a/regcomp.h +++ b/regcomp.h @@ -946,6 +946,9 @@ typedef struct _reg_ac_data reg_ac_data; #define RE_TRIE_MAXBUF_NAME "\022E_TRIE_MAXBUF" #define RE_DEBUG_FLAGS "\022E_DEBUG_FLAGS" +#define RE_COMPILE_RECURSION_INIT 1000 +#define RE_COMPILE_RECURSION_LIMIT "\022E_COMPILE_RECURSION_LIMIT" + /* RE_DEBUG_FLAGS is used to control what debug output is emitted diff --git a/t/lib/croak/regcomp b/t/lib/croak/regcomp index c72e3d47bf1e..0ba705e9159d 100644 --- a/t/lib/croak/regcomp +++ b/t/lib/croak/regcomp @@ -63,3 +63,10 @@ my $p00="[\\x59\\N{U+.}]"; qr/$p00/ui; EXPECT Invalid hexadecimal number in \N{U+...} in regex; marked by <-- HERE in m/[\x59\N{U+. <-- HERE }]/ at - line 1. ######## +# NAME ${^RE_COMPILE_RECURSION_LIMIT} [perl #131551] +BEGIN { ${^RE_COMPILE_RECURSION_LIMIT} = ${^RE_COMPILE_RECURSION_LIMIT} = 2; } +qr/(a)/; +qr/((a))/; +EXPECT +Too many nested open parens in regex; marked by <-- HERE in m/(( <-- HERE a))/ at - line 3. +########