Skip to content

Commit

Permalink
PATCH: [perl #134329] Use after free in regcomp.c
Browse files Browse the repository at this point in the history
A compiled regex is composed of nodes, forming a linked list, with
normally a maximum of 16 bits used to specify the offset of the next
link.  For patterns that require more space than this, the nodes that
jump around are replaced with ones that have wider offsets.  Most nodes
are unaffected, as they just contain the offset of the next node, and
that number is always small.  The jump nodes are the ones affected.

When compiling a pattern, the 16 bit mechanism is used, until it
overflows, at which point the pattern is recompiled with the long jumps
instead.

When I rewrote the compiler last year to make it generally one pass, I
noticed a lot of the cases where a node was added didn't check if the
result overflowed (the function that does this returns FALSE in that
case).  I presumed the prior authors knew better, and did not change
things, except to put in a bogus value in the link (offset) field that
should cause a crash if it were used.  That's what's happening in this
ticket.

But seeing this example, it's clear that the return value should be
checked every time, because you can reach the limit at any time.  This
commit changes to do that, and to require the function's return value to
not be ignored, to guard against future changes.

My guess is that the reason it generally worked when there were multiple
passes is that the first pass didn't do anything except count space, and
that at some point before the end of the pass the return value did get
checked, so by the time the nodes were allocated for real, it knew
enough to use the long jumps.

(cherry picked from commit 3b2e562)
  • Loading branch information
khwilliamson authored and steve-m-hay committed Feb 12, 2020
1 parent 284d721 commit 9067ea0
Show file tree
Hide file tree
Showing 5 changed files with 88 additions and 34 deletions.
1 change: 1 addition & 0 deletions MANIFEST
Original file line number Diff line number Diff line change
Expand Up @@ -5870,6 +5870,7 @@ t/porting/test_bootstrap.t Test that the instructions for test bootstrapping are
t/porting/utils.t Check that utility scripts still compile
t/re/alpha_assertions.t See if things like '(*postive_lookahed:...) work properly
t/re/anyof.t See if bracketed char classes [...] compile properly
t/re/bigfuzzy_not_utf8.t Big and ugly tests not storable as UTF-8
t/re/charset.t See if regex modifiers like /d, /u work properly
t/re/fold_grind.pl Core file to see if regex case folding works properly
t/re/fold_grind_8.t Wrapper for fold_grind.pl for /l testing with a UTF-8 locale
Expand Down
4 changes: 2 additions & 2 deletions embed.fnc
Original file line number Diff line number Diff line change
Expand Up @@ -2425,7 +2425,7 @@ Es |void |reginsert |NN RExC_state_t *pRExC_state \
|const U8 op \
|const regnode_offset operand \
|const U32 depth
Es |bool |regtail |NN RExC_state_t * pRExC_state \
EsR |bool |regtail |NN RExC_state_t * pRExC_state \
|NN const regnode_offset p \
|NN const regnode_offset val \
|const U32 depth
Expand Down Expand Up @@ -2559,7 +2559,7 @@ Es |void |dump_trie_interim_list|NN const struct _reg_trie_data *trie\
Es |void |dump_trie_interim_table|NN const struct _reg_trie_data *trie\
|NULLOK HV* widecharmap|NN AV *revcharmap\
|U32 next_alloc|U32 depth
Es |bool |regtail_study |NN RExC_state_t *pRExC_state \
EsR |bool |regtail_study |NN RExC_state_t *pRExC_state \
|NN regnode_offset p|NN const regnode_offset val|U32 depth
# endif
#endif
Expand Down
8 changes: 6 additions & 2 deletions proto.h
Original file line number Diff line number Diff line change
Expand Up @@ -4452,9 +4452,11 @@ PERL_CALLCONV int Perl_re_indentf(pTHX_ const char *fmt, U32 depth, ...);
assert(fmt)
STATIC void S_regdump_extflags(pTHX_ const char *lead, const U32 flags);
STATIC void S_regdump_intflags(pTHX_ const char *lead, const U32 flags);
STATIC bool S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode_offset p, const regnode_offset val, U32 depth);
STATIC bool S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode_offset p, const regnode_offset val, U32 depth)
__attribute__warn_unused_result__;
#define PERL_ARGS_ASSERT_REGTAIL_STUDY \
assert(pRExC_state); assert(p); assert(val)

# endif
# if defined(PERL_IN_REGEXEC_C)
STATIC void S_debug_start_match(pTHX_ const REGEXP *prog, const bool do_utf8, const char *start, const char *end, const char *blurb);
Expand Down Expand Up @@ -5587,9 +5589,11 @@ STATIC regnode_offset S_regnode_guts(pTHX_ RExC_state_t *pRExC_state, const U8 o
STATIC regnode_offset S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth);
#define PERL_ARGS_ASSERT_REGPIECE \
assert(pRExC_state); assert(flagp)
STATIC bool S_regtail(pTHX_ RExC_state_t * pRExC_state, const regnode_offset p, const regnode_offset val, const U32 depth);
STATIC bool S_regtail(pTHX_ RExC_state_t * pRExC_state, const regnode_offset p, const regnode_offset val, const U32 depth)
__attribute__warn_unused_result__;
#define PERL_ARGS_ASSERT_REGTAIL \
assert(pRExC_state); assert(p); assert(val)

STATIC void S_scan_commit(pTHX_ const RExC_state_t *pRExC_state, struct scan_data_t *data, SSize_t *minlenp, int is_inf);
#define PERL_ARGS_ASSERT_SCAN_COMMIT \
assert(pRExC_state); assert(data); assert(minlenp)
Expand Down
109 changes: 79 additions & 30 deletions regcomp.c
Original file line number Diff line number Diff line change
Expand Up @@ -11307,10 +11307,15 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
return 0;
}

REGTAIL(pRExC_state, ret, atomic);
if (! REGTAIL(pRExC_state, ret, atomic)) {
REQUIRE_BRANCHJ(flagp, 0);
}

REGTAIL(pRExC_state, atomic,
reg_node(pRExC_state, SRCLOSE));
if (! REGTAIL(pRExC_state, atomic, reg_node(pRExC_state,
SRCLOSE)))
{
REQUIRE_BRANCHJ(flagp, 0);
}

RExC_in_script_run = 0;
return ret;
Expand Down Expand Up @@ -11769,7 +11774,9 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
RExC_flags & RXf_PMf_COMPILETIME
);
FLAGS(REGNODE_p(ret)) = 2;
REGTAIL(pRExC_state, ret, eval);
if (! REGTAIL(pRExC_state, ret, eval)) {
REQUIRE_BRANCHJ(flagp, 0);
}
/* deal with the length of this later - MJD */
return ret;
}
Expand Down Expand Up @@ -11822,7 +11829,9 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)

tail = reg(pRExC_state, 1, &flag, depth+1);
RETURN_FAIL_ON_RESTART(flag, flagp);
REGTAIL(pRExC_state, ret, tail);
if (! REGTAIL(pRExC_state, ret, tail)) {
REQUIRE_BRANCHJ(flagp, 0);
}
goto insert_if;
}
else if ( RExC_parse[0] == '<' /* (?(<NAME>)...) */
Expand Down Expand Up @@ -11914,15 +11923,22 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
}
nextchar(pRExC_state);
insert_if:
REGTAIL(pRExC_state, ret, reganode(pRExC_state, IFTHEN, 0));
if (! REGTAIL(pRExC_state, ret, reganode(pRExC_state,
IFTHEN, 0)))
{
REQUIRE_BRANCHJ(flagp, 0);
}
br = regbranch(pRExC_state, &flags, 1, depth+1);
if (br == 0) {
RETURN_FAIL_ON_RESTART(flags,flagp);
FAIL2("panic: regbranch returned failure, flags=%#" UVxf,
(UV) flags);
} else
REGTAIL(pRExC_state, br, reganode(pRExC_state,
LONGJMP, 0));
if (! REGTAIL(pRExC_state, br, reganode(pRExC_state,
LONGJMP, 0)))
{
REQUIRE_BRANCHJ(flagp, 0);
}
c = UCHARAT(RExC_parse);
nextchar(pRExC_state);
if (flags&HASWIDTH)
Expand All @@ -11939,7 +11955,9 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
FAIL2("panic: regbranch returned failure, flags=%#" UVxf,
(UV) flags);
}
REGTAIL(pRExC_state, ret, lastbr);
if (! REGTAIL(pRExC_state, ret, lastbr)) {
REQUIRE_BRANCHJ(flagp, 0);
}
if (flags&HASWIDTH)
*flagp |= HASWIDTH;
c = UCHARAT(RExC_parse);
Expand All @@ -11954,16 +11972,26 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
vFAIL("Switch (?(condition)... contains too many branches");
}
ender = reg_node(pRExC_state, TAIL);
REGTAIL(pRExC_state, br, ender);
if (! REGTAIL(pRExC_state, br, ender)) {
REQUIRE_BRANCHJ(flagp, 0);
}
if (lastbr) {
REGTAIL(pRExC_state, lastbr, ender);
REGTAIL(pRExC_state, REGNODE_OFFSET(
NEXTOPER(
NEXTOPER(REGNODE_p(lastbr)))),
ender);
if (! REGTAIL(pRExC_state, lastbr, ender)) {
REQUIRE_BRANCHJ(flagp, 0);
}
if (! REGTAIL(pRExC_state,
REGNODE_OFFSET(
NEXTOPER(
NEXTOPER(REGNODE_p(lastbr)))),
ender))
{
REQUIRE_BRANCHJ(flagp, 0);
}
}
else
REGTAIL(pRExC_state, ret, ender);
if (! REGTAIL(pRExC_state, ret, ender)) {
REQUIRE_BRANCHJ(flagp, 0);
}
#if 0 /* Removing this doesn't cause failures in the test suite -- khw */
RExC_size++; /* XXX WHY do we need this?!!
For large programs it seems to be required
Expand Down Expand Up @@ -12113,20 +12141,25 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
*flagp |= flags&SIMPLE;
}
if (is_open) { /* Starts with OPEN. */
REGTAIL(pRExC_state, ret, br); /* OPEN -> first. */
if (! REGTAIL(pRExC_state, ret, br)) { /* OPEN -> first. */
REQUIRE_BRANCHJ(flagp, 0);
}
}
else if (paren != '?') /* Not Conditional */
ret = br;
*flagp |= flags & (SPSTART | HASWIDTH | POSTPONED);
lastbr = br;
while (*RExC_parse == '|') {
if (RExC_use_BRANCHJ) {
bool shut_gcc_up;

ender = reganode(pRExC_state, LONGJMP, 0);

/* Append to the previous. */
REGTAIL(pRExC_state,
REGNODE_OFFSET(NEXTOPER(NEXTOPER(REGNODE_p(lastbr)))),
ender);
shut_gcc_up = REGTAIL(pRExC_state,
REGNODE_OFFSET(NEXTOPER(NEXTOPER(REGNODE_p(lastbr)))),
ender);
PERL_UNUSED_VAR(shut_gcc_up);
}
nextchar(pRExC_state);
if (freeze_paren) {
Expand Down Expand Up @@ -12237,9 +12270,10 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
is_nothing= 0;
}
else if (op == BRANCHJ) {
REGTAIL_STUDY(pRExC_state,
REGNODE_OFFSET(NEXTOPER(NEXTOPER(br))),
ender);
bool shut_gcc_up = REGTAIL_STUDY(pRExC_state,
REGNODE_OFFSET(NEXTOPER(NEXTOPER(br))),
ender);
PERL_UNUSED_VAR(shut_gcc_up);
/* for now we always disable this optimisation * /
if ( OP(NEXTOPER(NEXTOPER(br))) != NOTHING
|| regnext(NEXTOPER(NEXTOPER(br))) != REGNODE_p(ender))
Expand Down Expand Up @@ -12551,7 +12585,9 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
const regnode_offset w = reg_node(pRExC_state, WHILEM);

FLAGS(REGNODE_p(w)) = 0;
REGTAIL(pRExC_state, ret, w);
if (! REGTAIL(pRExC_state, ret, w)) {
REQUIRE_BRANCHJ(flagp, 0);
}
if (RExC_use_BRANCHJ) {
reginsert(pRExC_state, LONGJMP, ret, depth+1);
reginsert(pRExC_state, NOTHING, ret, depth+1);
Expand All @@ -12566,7 +12602,11 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
if (RExC_use_BRANCHJ)
NEXT_OFF(REGNODE_p(ret)) = 3; /* Go over NOTHING to
LONGJMP. */
REGTAIL(pRExC_state, ret, reg_node(pRExC_state, NOTHING));
if (! REGTAIL(pRExC_state, ret, reg_node(pRExC_state,
NOTHING)))
{
REQUIRE_BRANCHJ(flagp, 0);
}
RExC_whilem_seen++;
MARK_NAUGHTY_EXP(1, 4); /* compound interest */
}
Expand Down Expand Up @@ -12638,16 +12678,22 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
if (*RExC_parse == '?') {
nextchar(pRExC_state);
reginsert(pRExC_state, MINMOD, ret, depth+1);
REGTAIL(pRExC_state, ret, ret + NODE_STEP_REGNODE);
if (! REGTAIL(pRExC_state, ret, ret + NODE_STEP_REGNODE)) {
REQUIRE_BRANCHJ(flagp, 0);
}
}
else if (*RExC_parse == '+') {
regnode_offset ender;
nextchar(pRExC_state);
ender = reg_node(pRExC_state, SUCCEED);
REGTAIL(pRExC_state, ret, ender);
if (! REGTAIL(pRExC_state, ret, ender)) {
REQUIRE_BRANCHJ(flagp, 0);
}
reginsert(pRExC_state, SUSPEND, ret, depth+1);
ender = reg_node(pRExC_state, TAIL);
REGTAIL(pRExC_state, ret, ender);
if (! REGTAIL(pRExC_state, ret, ender)) {
REQUIRE_BRANCHJ(flagp, 0);
}
}

if (ISMULT2(RExC_parse)) {
Expand Down Expand Up @@ -19822,8 +19868,8 @@ S_regtail(pTHX_ RExC_state_t * pRExC_state,
}
else {
if (val - scan > U16_MAX) {
/* Since not all callers check the return value, populate this with
* something that won't loop and will likely lead to a crash if
/* Populate this with something that won't loop and will likely
* lead to a crash if the caller ignores the failure return, and
* execution continues */
NEXT_OFF(REGNODE_p(scan)) = U16_MAX;
return FALSE;
Expand Down Expand Up @@ -19934,6 +19980,9 @@ S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode_offset p,
}
else {
if (val - scan > U16_MAX) {
/* Populate this with something that won't loop and will likely
* lead to a crash if the caller ignores the failure return, and
* execution continues */
NEXT_OFF(REGNODE_p(scan)) = U16_MAX;
return FALSE;
}
Expand Down
Binary file added t/re/bigfuzzy_not_utf8.t
Binary file not shown.

0 comments on commit 9067ea0

Please sign in to comment.