Skip to content

Commit

Permalink
Add more checking for regnode offset overflowing
Browse files Browse the repository at this point in the history
This is part of the ongoing failures in [perl #133921].

The bottom line cause is that there are generally 16 bits available for
the address of the next regnode.  On very large patterns, this may not
be enough.  When that happens, a long jump is used instead.

What previous commits have done is to insert tests in a loop to detect
that overflow isn't going to occur.  But it turns out that there are
other places where such overflow could occur.  The real solution should
be to detect overflow in the base level routine that would otherwise get
things wrong.  This entails making that routine be able to return
failure.  It turns out that another function is used under DEBUGGING, so
that one must be changed as well.  And the calls where it is possible
for this to overflow are changed to look for failure return and proceed
appropriately, which is to set a flag that we need to use long jumps,
and restart the parse.
  • Loading branch information
khwilliamson committed Mar 14, 2019
1 parent 912b808 commit bf848a1
Show file tree
Hide file tree
Showing 4 changed files with 99 additions and 25 deletions.
4 changes: 2 additions & 2 deletions embed.fnc
Original file line number Diff line number Diff line change
Expand Up @@ -2424,7 +2424,7 @@ Es |void |reginsert |NN RExC_state_t *pRExC_state \
|const U8 op \
|const regnode_offset operand \
|const U32 depth
Es |void |regtail |NN RExC_state_t * pRExC_state \
Es |bool |regtail |NN RExC_state_t * pRExC_state \
|NN const regnode_offset p \
|NN const regnode_offset val \
|const U32 depth
Expand Down Expand Up @@ -2556,7 +2556,7 @@ Es |void |dump_trie_interim_list|NN const struct _reg_trie_data *trie\
Es |void |dump_trie_interim_table|NN const struct _reg_trie_data *trie\
|NULLOK HV* widecharmap|NN AV *revcharmap\
|U32 next_alloc|U32 depth
Es |U8 |regtail_study |NN RExC_state_t *pRExC_state \
Es |bool |regtail_study |NN RExC_state_t *pRExC_state \
|NN regnode_offset p|NN const regnode_offset val|U32 depth
# endif
#endif
Expand Down
4 changes: 2 additions & 2 deletions proto.h
Original file line number Diff line number Diff line change
Expand Up @@ -4434,7 +4434,7 @@ PERL_CALLCONV int Perl_re_indentf(pTHX_ const char *fmt, U32 depth, ...);
assert(fmt)
STATIC void S_regdump_extflags(pTHX_ const char *lead, const U32 flags);
STATIC void S_regdump_intflags(pTHX_ const char *lead, const U32 flags);
STATIC U8 S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode_offset p, const regnode_offset val, U32 depth);
STATIC bool S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode_offset p, const regnode_offset val, U32 depth);
#define PERL_ARGS_ASSERT_REGTAIL_STUDY \
assert(pRExC_state); assert(p); assert(val)
# endif
Expand Down Expand Up @@ -5569,7 +5569,7 @@ STATIC regnode_offset S_regnode_guts(pTHX_ RExC_state_t *pRExC_state, const U8 o
STATIC regnode_offset S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth);
#define PERL_ARGS_ASSERT_REGPIECE \
assert(pRExC_state); assert(flagp)
STATIC void S_regtail(pTHX_ RExC_state_t * pRExC_state, const regnode_offset p, const regnode_offset val, const U32 depth);
STATIC bool S_regtail(pTHX_ RExC_state_t * pRExC_state, const regnode_offset p, const regnode_offset val, const U32 depth);
#define PERL_ARGS_ASSERT_REGTAIL \
assert(pRExC_state); assert(p); assert(val)
STATIC void S_scan_commit(pTHX_ const RExC_state_t *pRExC_state, struct scan_data_t *data, SSize_t *minlenp, int is_inf);
Expand Down
67 changes: 47 additions & 20 deletions regcomp.c
Original file line number Diff line number Diff line change
Expand Up @@ -364,7 +364,6 @@ struct RExC_state_t {
} \
} STMT_END

#define BRANCH_MAX_OFFSET U16_MAX
#define REQUIRE_BRANCHJ(flagp, restart_retval) \
STMT_START { \
RExC_use_BRANCHJ = 1; \
Expand Down Expand Up @@ -12070,7 +12069,9 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
RETURN_FAIL_ON_RESTART(flags, flagp);
FAIL2("panic: regbranch returned failure, flags=%#" UVxf, (UV) flags);
}
REGTAIL(pRExC_state, lastbr, br); /* BRANCH -> BRANCH. */
if (! REGTAIL(pRExC_state, lastbr, br)) { /* BRANCH -> BRANCH. */
REQUIRE_BRANCHJ(flagp, 0);
}
lastbr = br;
*flagp |= flags & (SPSTART | HASWIDTH | POSTPONED);
}
Expand Down Expand Up @@ -12141,7 +12142,9 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
(IV)(ender - lastbr)
);
);
REGTAIL(pRExC_state, lastbr, ender);
if (! REGTAIL(pRExC_state, lastbr, ender)) {
REQUIRE_BRANCHJ(flagp, 0);
}

if (have_branch) {
char is_nothing= 1;
Expand All @@ -12152,9 +12155,12 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
for (br = REGNODE_p(ret); br; br = regnext(br)) {
const U8 op = PL_regkind[OP(br)];
if (op == BRANCH) {
REGTAIL_STUDY(pRExC_state,
REGNODE_OFFSET(NEXTOPER(br)),
ender);
if (! REGTAIL_STUDY(pRExC_state,
REGNODE_OFFSET(NEXTOPER(br)),
ender))
{
REQUIRE_BRANCHJ(flagp, 0);
}
if ( OP(NEXTOPER(br)) != NOTHING
|| regnext(NEXTOPER(br)) != REGNODE_p(ender))
is_nothing= 0;
Expand Down Expand Up @@ -12221,7 +12227,10 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
Set_Node_Cur_Length(REGNODE_p(ret), parse_start);
Set_Node_Offset(REGNODE_p(ret), parse_start + 1);
FLAGS(REGNODE_p(ret)) = flag;
REGTAIL_STUDY(pRExC_state, ret, reg_node(pRExC_state, TAIL));
if (! REGTAIL_STUDY(pRExC_state, ret, reg_node(pRExC_state, TAIL)))
{
REQUIRE_BRANCHJ(flagp, 0);
}
}
}

Expand Down Expand Up @@ -12315,16 +12324,14 @@ S_regbranch(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, I32 first, U32 depth)
/* FIXME adding one for every branch after the first is probably
* excessive now we have TRIE support. (hv) */
MARK_NAUGHTY(1);
REGTAIL(pRExC_state, chain, latest);
if (! REGTAIL(pRExC_state, chain, latest)) {
/* XXX We could just redo this branch, but figuring out what
* bookkeeping needs to be reset is a pain, and it's likely
* that other branches that goto END will also be too large */
REQUIRE_BRANCHJ(flagp, 0);
}
}
chain = latest;
if ( chain > (SSize_t) BRANCH_MAX_OFFSET
&& ! RExC_use_BRANCHJ)
{
/* XXX We could just redo this branch, but figuring out what
* bookkeeping needs to be reset is a pain */
REQUIRE_BRANCHJ(flagp, 0);
}
c++;
}
if (chain == 0) { /* Loop ran zero times. */
Expand Down Expand Up @@ -19627,10 +19634,13 @@ S_reginsert(pTHX_ RExC_state_t *pRExC_state, const U8 op,
}

/*
- regtail - set the next-pointer at the end of a node chain of p to val.
- regtail - set the next-pointer at the end of a node chain of p to val. If
that value won't fit in the space available, instead returns FALSE.
(Except asserts if we can't fit in the largest space the regex
engine is designed for.)
- SEE ALSO: regtail_study
*/
STATIC void
STATIC bool
S_regtail(pTHX_ RExC_state_t * pRExC_state,
const regnode_offset p,
const regnode_offset val,
Expand Down Expand Up @@ -19666,8 +19676,17 @@ S_regtail(pTHX_ RExC_state_t * pRExC_state,
ARG_SET(REGNODE_p(scan), val - scan);
}
else {
if (val - scan > U16_MAX) {
/* Since not all callers check the return value, populate this with
* something that won't loop and will likely lead to a crash if
* execution continues */
NEXT_OFF(REGNODE_p(scan)) = U16_MAX;
return FALSE;
}
NEXT_OFF(REGNODE_p(scan)) = val - scan;
}

return TRUE;
}

#ifdef DEBUGGING
Expand All @@ -19684,10 +19703,14 @@ that it is purely analytical.
Currently only used when in DEBUG mode. The macro REGTAIL_STUDY() is used
to control which is which.

This used to return a value that was ignored. It was a problem that it is
#ifdef'd to be another function that didn't return a value. khw has changed it
so both currently return a pass/fail return.

*/
/* TODO: All four parms should be const */

STATIC U8
STATIC bool
S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode_offset p,
const regnode_offset val, U32 depth)
{
Expand All @@ -19711,7 +19734,7 @@ S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode_offset p,
bool unfolded_multi_char; /* Unexamined in this routine */
if (join_exact(pRExC_state, scan, &min,
&unfolded_multi_char, 1, REGNODE_p(val), depth+1))
return EXACT;
return TRUE; /* Was return EXACT */
}
#endif
if ( exact ) {
Expand Down Expand Up @@ -19764,10 +19787,14 @@ S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode_offset p,
ARG_SET(REGNODE_p(scan), val - scan);
}
else {
if (val - scan > U16_MAX) {
NEXT_OFF(REGNODE_p(scan)) = U16_MAX;
return FALSE;
}
NEXT_OFF(REGNODE_p(scan)) = val - scan;
}

return exact;
return TRUE; /* Was 'return exact' */
}
#endif

Expand Down
49 changes: 48 additions & 1 deletion t/re/pat.t
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ BEGIN {
skip_all('no re module') unless defined &DynaLoader::boot_DynaLoader;
skip_all_without_unicode_tables();

plan tests => 857; # Update this when adding/deleting tests.
plan tests => 859; # Update this when adding/deleting tests.

run_tests() unless caller;

Expand Down Expand Up @@ -2008,6 +2008,53 @@ CODE
{ # [perl #133921], segfault
fresh_perl_is('qr0||ß+p00000F00000ù\Q00000ÿ00000x00000x0c0e0\Qx0\Qx0\x{0c!}\;\;î0\xÿÿÿþù\Q`\Qx`{0c!}e;ù\ò`\Qm`\x{0c!}\;\;îçÿ ç!F/;îçÿù\Qxÿÿÿÿù`x{0c!}e;ù\Q`\Qx`\x{c!}\;\;îç!}\;îçÿù\Q‡ \xÿÿÿÿ>=\Qx`\Qx`ù\ò`\Qx`\x{0c!};\;îçÿ Fn0t0c €d;t ù ç€!00000000000000000000000m/0000000000000000000000000000000m/\x{){} )|i', "", {}, "[perl #133921]");
fresh_perl_is('|ß+W0ü0r0\Qx0\Qx0x0c0G00000000000000000O000000000x0x0x0c!}\;îçÿù\Q0 \xÿÿÿÿù\Q`\Qx`{0d ;ù\ò`\Qm`\x{0c!}\;\;îçÿ ç!F/;îçÿù\Qxÿÿÿÿù`x{0c!};ù\Q`\Qq`\x{c!}\;\;îç!}\;îçÿù\Q‡ \xÿÿÿÿ>=\Qx`\Qx`ù\ò`\Qx`\x{0c!};\;îçÿ 0000000Fm0t0c €d;t ù ç€!00000000000000000000000m/0000000000000000000000000000000m/\x{){} )|i', "", {}, "[perl #133921]");

fresh_perl_is('s|ß+W0ü0f0\Qx0\Qx0x0c0G0xgive0000000000000O0h000x0 \xòÿÿÿù\Q`\Q

ç

x{0c!}\;\;çÿ q0/i0/!F/;îçÿù\Qxÿÿÿÿù`x{0c!}e;ù\Q`\Qx`\x{0c!}\;ÿÿÿÿ!}\;îçÿù\Q‡\xÿÿÿÿ>=\Qx`\Qx`ù\ò`ÿ>=\Qx`\Qx`ù\ò`\Qx`\x{0c!};\;îçÿ u00000F000t0p €d? ù ç€!00000000000000000000000m/0000000000000000000000000000000m/0\} )|i', "", {}, "[perl #133921]");

fresh_perl_is('a aúúv sWtrt\ó||ß+Wüefù\Qx`\Qx`\x{1c!gGnuc given1111111111111O1111each111\jx` \xòÿÿÿù\Qx`\Q
ç

x{1c!}\;\;îçÿp qr/elsif/!eF/;îçÿù\QxÿÿÿÿùHQx`Lx{1c!}e;ù\Qx`\Qx`\x{1c!}\;ÿÿÿÿc!}\;îçÿù\Qx‡\xÿÿÿÿ>=\Qx`\Qx`ù\òx`ÿ>=\Qx`\Qx`ù\òx`\Qx`\x{1c!}8;\;îçÿp unshifteFnormat0cmp €d?not ùp ç€!0000000000000000000000000m/00000000000000000000000000000000m/0R\} )|\aï||K??p€¿ÿÿfúd{\{gri{\x{1x/} ð¹NuntiÀh', "", {}, "[perl #133921]");
}

} # End of sub run_tests
Expand Down

0 comments on commit bf848a1

Please sign in to comment.