diff --git a/embed.fnc b/embed.fnc index 517bcb577c5c..c1236e15ba2f 100644 --- a/embed.fnc +++ b/embed.fnc @@ -2424,7 +2424,7 @@ Es |void |reginsert |NN RExC_state_t *pRExC_state \ |const U8 op \ |const regnode_offset operand \ |const U32 depth -Es |void |regtail |NN RExC_state_t * pRExC_state \ +Es |bool |regtail |NN RExC_state_t * pRExC_state \ |NN const regnode_offset p \ |NN const regnode_offset val \ |const U32 depth @@ -2556,7 +2556,7 @@ Es |void |dump_trie_interim_list|NN const struct _reg_trie_data *trie\ Es |void |dump_trie_interim_table|NN const struct _reg_trie_data *trie\ |NULLOK HV* widecharmap|NN AV *revcharmap\ |U32 next_alloc|U32 depth -Es |U8 |regtail_study |NN RExC_state_t *pRExC_state \ +Es |bool |regtail_study |NN RExC_state_t *pRExC_state \ |NN regnode_offset p|NN const regnode_offset val|U32 depth # endif #endif diff --git a/proto.h b/proto.h index 936d344c323a..49fcb89b7d16 100644 --- a/proto.h +++ b/proto.h @@ -4434,7 +4434,7 @@ PERL_CALLCONV int Perl_re_indentf(pTHX_ const char *fmt, U32 depth, ...); assert(fmt) STATIC void S_regdump_extflags(pTHX_ const char *lead, const U32 flags); STATIC void S_regdump_intflags(pTHX_ const char *lead, const U32 flags); -STATIC U8 S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode_offset p, const regnode_offset val, U32 depth); +STATIC bool S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode_offset p, const regnode_offset val, U32 depth); #define PERL_ARGS_ASSERT_REGTAIL_STUDY \ assert(pRExC_state); assert(p); assert(val) # endif @@ -5569,7 +5569,7 @@ STATIC regnode_offset S_regnode_guts(pTHX_ RExC_state_t *pRExC_state, const U8 o STATIC regnode_offset S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth); #define PERL_ARGS_ASSERT_REGPIECE \ assert(pRExC_state); assert(flagp) -STATIC void S_regtail(pTHX_ RExC_state_t * pRExC_state, const regnode_offset p, const regnode_offset val, const U32 depth); +STATIC bool S_regtail(pTHX_ RExC_state_t * pRExC_state, const regnode_offset p, const regnode_offset val, const U32 depth); #define PERL_ARGS_ASSERT_REGTAIL \ assert(pRExC_state); assert(p); assert(val) STATIC void S_scan_commit(pTHX_ const RExC_state_t *pRExC_state, struct scan_data_t *data, SSize_t *minlenp, int is_inf); diff --git a/regcomp.c b/regcomp.c index 15d843e7c2ff..73e8a83b643f 100644 --- a/regcomp.c +++ b/regcomp.c @@ -364,7 +364,6 @@ struct RExC_state_t { } \ } STMT_END -#define BRANCH_MAX_OFFSET U16_MAX #define REQUIRE_BRANCHJ(flagp, restart_retval) \ STMT_START { \ RExC_use_BRANCHJ = 1; \ @@ -12070,7 +12069,9 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth) RETURN_FAIL_ON_RESTART(flags, flagp); FAIL2("panic: regbranch returned failure, flags=%#" UVxf, (UV) flags); } - REGTAIL(pRExC_state, lastbr, br); /* BRANCH -> BRANCH. */ + if (! REGTAIL(pRExC_state, lastbr, br)) { /* BRANCH -> BRANCH. */ + REQUIRE_BRANCHJ(flagp, 0); + } lastbr = br; *flagp |= flags & (SPSTART | HASWIDTH | POSTPONED); } @@ -12141,7 +12142,9 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth) (IV)(ender - lastbr) ); ); - REGTAIL(pRExC_state, lastbr, ender); + if (! REGTAIL(pRExC_state, lastbr, ender)) { + REQUIRE_BRANCHJ(flagp, 0); + } if (have_branch) { char is_nothing= 1; @@ -12152,9 +12155,12 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth) for (br = REGNODE_p(ret); br; br = regnext(br)) { const U8 op = PL_regkind[OP(br)]; if (op == BRANCH) { - REGTAIL_STUDY(pRExC_state, - REGNODE_OFFSET(NEXTOPER(br)), - ender); + if (! REGTAIL_STUDY(pRExC_state, + REGNODE_OFFSET(NEXTOPER(br)), + ender)) + { + REQUIRE_BRANCHJ(flagp, 0); + } if ( OP(NEXTOPER(br)) != NOTHING || regnext(NEXTOPER(br)) != REGNODE_p(ender)) is_nothing= 0; @@ -12221,7 +12227,10 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth) Set_Node_Cur_Length(REGNODE_p(ret), parse_start); Set_Node_Offset(REGNODE_p(ret), parse_start + 1); FLAGS(REGNODE_p(ret)) = flag; - REGTAIL_STUDY(pRExC_state, ret, reg_node(pRExC_state, TAIL)); + if (! REGTAIL_STUDY(pRExC_state, ret, reg_node(pRExC_state, TAIL))) + { + REQUIRE_BRANCHJ(flagp, 0); + } } } @@ -12315,16 +12324,14 @@ S_regbranch(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, I32 first, U32 depth) /* FIXME adding one for every branch after the first is probably * excessive now we have TRIE support. (hv) */ MARK_NAUGHTY(1); - REGTAIL(pRExC_state, chain, latest); + if (! REGTAIL(pRExC_state, chain, latest)) { + /* XXX We could just redo this branch, but figuring out what + * bookkeeping needs to be reset is a pain, and it's likely + * that other branches that goto END will also be too large */ + REQUIRE_BRANCHJ(flagp, 0); + } } chain = latest; - if ( chain > (SSize_t) BRANCH_MAX_OFFSET - && ! RExC_use_BRANCHJ) - { - /* XXX We could just redo this branch, but figuring out what - * bookkeeping needs to be reset is a pain */ - REQUIRE_BRANCHJ(flagp, 0); - } c++; } if (chain == 0) { /* Loop ran zero times. */ @@ -19627,10 +19634,13 @@ S_reginsert(pTHX_ RExC_state_t *pRExC_state, const U8 op, } /* -- regtail - set the next-pointer at the end of a node chain of p to val. +- regtail - set the next-pointer at the end of a node chain of p to val. If + that value won't fit in the space available, instead returns FALSE. + (Except asserts if we can't fit in the largest space the regex + engine is designed for.) - SEE ALSO: regtail_study */ -STATIC void +STATIC bool S_regtail(pTHX_ RExC_state_t * pRExC_state, const regnode_offset p, const regnode_offset val, @@ -19666,8 +19676,17 @@ S_regtail(pTHX_ RExC_state_t * pRExC_state, ARG_SET(REGNODE_p(scan), val - scan); } else { + if (val - scan > U16_MAX) { + /* Since not all callers check the return value, populate this with + * something that won't loop and will likely lead to a crash if + * execution continues */ + NEXT_OFF(REGNODE_p(scan)) = U16_MAX; + return FALSE; + } NEXT_OFF(REGNODE_p(scan)) = val - scan; } + + return TRUE; } #ifdef DEBUGGING @@ -19684,10 +19703,14 @@ that it is purely analytical. Currently only used when in DEBUG mode. The macro REGTAIL_STUDY() is used to control which is which. +This used to return a value that was ignored. It was a problem that it is +#ifdef'd to be another function that didn't return a value. khw has changed it +so both currently return a pass/fail return. + */ /* TODO: All four parms should be const */ -STATIC U8 +STATIC bool S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode_offset p, const regnode_offset val, U32 depth) { @@ -19711,7 +19734,7 @@ S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode_offset p, bool unfolded_multi_char; /* Unexamined in this routine */ if (join_exact(pRExC_state, scan, &min, &unfolded_multi_char, 1, REGNODE_p(val), depth+1)) - return EXACT; + return TRUE; /* Was return EXACT */ } #endif if ( exact ) { @@ -19764,10 +19787,14 @@ S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode_offset p, ARG_SET(REGNODE_p(scan), val - scan); } else { + if (val - scan > U16_MAX) { + NEXT_OFF(REGNODE_p(scan)) = U16_MAX; + return FALSE; + } NEXT_OFF(REGNODE_p(scan)) = val - scan; } - return exact; + return TRUE; /* Was 'return exact' */ } #endif diff --git a/t/re/pat.t b/t/re/pat.t index 7bb215a1eac6..c2b9d447a0db 100644 --- a/t/re/pat.t +++ b/t/re/pat.t @@ -24,7 +24,7 @@ BEGIN { skip_all('no re module') unless defined &DynaLoader::boot_DynaLoader; skip_all_without_unicode_tables(); -plan tests => 857; # Update this when adding/deleting tests. +plan tests => 859; # Update this when adding/deleting tests. run_tests() unless caller; @@ -2008,6 +2008,53 @@ CODE { # [perl #133921], segfault fresh_perl_is('qr0||ß+p00000F00000ù\Q00000ÿ00000x00000x0c0e0\Qx0\Qx0\x{0c!}\;\;î0\xÿÿÿþù\Q`\Qx`{0c!}e;ù\ò`\Qm`\x{0c!}\;\;îçÿ ç!F/;îçÿù\Qxÿÿÿÿù`x{0c!}e;ù\Q`\Qx`\x{c!}\;\;îç!}\;îçÿù\Q‡ \xÿÿÿÿ>=\Qx`\Qx`ù\ò`\Qx`\x{0c!};\;îçÿ Fn0t0c €d;t ù ç€!00000000000000000000000m/0000000000000000000000000000000m/\x{){} )|i', "", {}, "[perl #133921]"); fresh_perl_is('|ß+W0ü0r0\Qx0\Qx0x0c0G00000000000000000O000000000x0x0x0c!}\;îçÿù\Q0 \xÿÿÿÿù\Q`\Qx`{0d ;ù\ò`\Qm`\x{0c!}\;\;îçÿ ç!F/;îçÿù\Qxÿÿÿÿù`x{0c!};ù\Q`\Qq`\x{c!}\;\;îç!}\;îçÿù\Q‡ \xÿÿÿÿ>=\Qx`\Qx`ù\ò`\Qx`\x{0c!};\;îçÿ 0000000Fm0t0c €d;t ù ç€!00000000000000000000000m/0000000000000000000000000000000m/\x{){} )|i', "", {}, "[perl #133921]"); + +fresh_perl_is('s|ß+W0ü0f0\Qx0\Qx0x0c0G0xgive0000000000000O0h000x0 \xòÿÿÿù\Q`\Q + + + + + ç + + + + + + + + + + + + + + + +x{0c!}\;\;çÿ q0/i0/!F/;îçÿù\Qxÿÿÿÿù`x{0c!}e;ù\Q`\Qx`\x{0c!}\;ÿÿÿÿ!}\;îçÿù\Q‡\xÿÿÿÿ>=\Qx`\Qx`ù\ò`ÿ>=\Qx`\Qx`ù\ò`\Qx`\x{0c!};\;îçÿ u00000F000t0p €d? ù ç€!00000000000000000000000m/0000000000000000000000000000000m/0\} )|i', "", {}, "[perl #133921]"); + + fresh_perl_is('a aúúv sWtrt\ó||ß+Wüefù\Qx`\Qx`\x{1c!gGnuc given1111111111111O1111each111\jx` \xòÿÿÿù\Qx`\Q + + + + + + ç + + + + + + + + + + + + + + + +x{1c!}\;\;îçÿp qr/elsif/!eF/;îçÿù\QxÿÿÿÿùHQx`Lx{1c!}e;ù\Qx`\Qx`\x{1c!}\;ÿÿÿÿc!}\;îçÿù\Qx‡\xÿÿÿÿ>=\Qx`\Qx`ù\òx`ÿ>=\Qx`\Qx`ù\òx`\Qx`\x{1c!}8;\;îçÿp unshifteFnormat0cmp €d?not ùp ç€!0000000000000000000000000m/00000000000000000000000000000000m/0R\} )|\aï||K??p€¿ÿÿfúd{\{gri{\x{1x/} ð¹NuntiÀh', "", {}, "[perl #133921]"); } } # End of sub run_tests