Skip to content

Commit

Permalink
Fix and add tests for *PRUNE/*THEN plus leading non-greedy +
Browse files Browse the repository at this point in the history
"aaabc" should match /a+?(*THEN)bc/ with "abc".
  • Loading branch information
demerphq committed Jun 22, 2013
1 parent 337ff30 commit b8f6efd
Show file tree
Hide file tree
Showing 6 changed files with 28 additions and 12 deletions.
2 changes: 1 addition & 1 deletion embed.h
Original file line number Diff line number Diff line change
Expand Up @@ -878,7 +878,7 @@
#define dumpuntil(a,b,c,d,e,f,g,h) S_dumpuntil(aTHX_ a,b,c,d,e,f,g,h)
#define put_byte(a,b) S_put_byte(aTHX_ a,b)
#define regdump_extflags(a,b) S_regdump_extflags(aTHX_ a,b)
#define regdump_intflags(a,b) S_regdump_intflags(aTHX_ a,b)
#define regdump_intflags(a,b) S_regdump_intflags(aTHX_ a,b)
#define regtail_study(a,b,c,d) S_regtail_study(aTHX_ a,b,c,d)
# endif
# if defined(PERL_IN_REGEXEC_C)
Expand Down
2 changes: 1 addition & 1 deletion proto.h
Original file line number Diff line number Diff line change
Expand Up @@ -5188,7 +5188,7 @@ STATIC void S_put_byte(pTHX_ SV* sv, int c)
assert(sv)

STATIC void S_regdump_extflags(pTHX_ const char *lead, const U32 flags);
STATIC void S_regdump_intflags(pTHX_ const char *lead, const U32 flags);
STATIC void S_regdump_intflags(pTHX_ const char *lead, const U32 flags);
STATIC U8 S_regtail_study(pTHX_ struct RExC_state_t *pRExC_state, regnode *p, const regnode *val, U32 depth)
__attribute__nonnull__(pTHX_1)
__attribute__nonnull__(pTHX_2)
Expand Down
13 changes: 9 additions & 4 deletions regcomp.c
Original file line number Diff line number Diff line change
Expand Up @@ -5521,6 +5521,8 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
I32 sawlookahead = 0;
I32 sawplus = 0;
I32 sawopen = 0;
I32 sawminmod = 0;

regex_charset initial_charset = get_regex_charset(orig_rx_flags);
bool recompile = 0;
bool runtime_code = 0;
Expand Down Expand Up @@ -6029,7 +6031,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
}

reStudy:
r->minlen = minlen = sawlookahead = sawplus = sawopen = 0;
r->minlen = minlen = sawlookahead = sawplus = sawopen = sawminmod = 0;
Zero(r->substrs, 1, struct reg_substr_data);

#ifdef TRIE_STUDY_OPT
Expand Down Expand Up @@ -6098,12 +6100,15 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
* the only op that could be a regnode is PLUS, all the rest
* will be regnode_1 or regnode_2.
*
* (yves doesn't think this is true)
*/
if (OP(first) == PLUS)
sawplus = 1;
else
else {
if (OP(first) == MINMOD)
sawminmod = 1;
first += regarglen[OP(first)];

}
first = NEXTOPER(first);
first_next= regnext(first);
}
Expand Down Expand Up @@ -6174,7 +6179,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
first = NEXTOPER(first);
goto again;
}
if (sawplus && !sawlookahead && (!sawopen || !RExC_sawback)
if (sawplus && !sawminmod && !sawlookahead && (!sawopen || !RExC_sawback)
&& !pRExC_state->num_code_blocks) /* May examine pos and $& */
/* x+ must match at the 1st pos of run of x's */
r->intflags |= PREGf_SKIP;
Expand Down
12 changes: 6 additions & 6 deletions regnodes.h
Original file line number Diff line number Diff line change
Expand Up @@ -692,12 +692,12 @@ EXTCONST char * const PL_reg_extflags_name[] = {
EXTCONST char * PL_reg_intflags_name[];
#else
EXTCONST char * const PL_reg_intflags_name[] = {
"SKIP", /* 0x00000001 - PREGf_SKIP */
"IMPLICIT", /* 0x00000002 - PREGf_IMPLICIT - Converted .* to ^.* */
"NAUGHTY", /* 0x00000004 - PREGf_NAUGHTY - how exponential is this pattern? */
"VERBARG_SEEN", /* 0x00000008 - PREGf_VERBARG_SEEN */
"CUTGROUP_SEEN", /* 0x00000010 - PREGf_CUTGROUP_SEEN */
"USE_RE_EVAL", /* 0x00000020 - PREGf_USE_RE_EVAL - compiled with "use re 'eval'" */
"SKIP", /* 0x00000001 - PREGf_SKIP */
"IMPLICIT", /* 0x00000002 - PREGf_IMPLICIT - Converted .* to ^.* */
"NAUGHTY", /* 0x00000004 - PREGf_NAUGHTY - how exponential is this pattern? */
"VERBARG_SEEN", /* 0x00000008 - PREGf_VERBARG_SEEN */
"CUTGROUP_SEEN", /* 0x00000010 - PREGf_CUTGROUP_SEEN */
"USE_RE_EVAL", /* 0x00000020 - PREGf_USE_RE_EVAL - compiled with "use re 'eval'" */
};
#endif /* DOINIT */

Expand Down
8 changes: 8 additions & 0 deletions t/re/re_tests
Original file line number Diff line number Diff line change
Expand Up @@ -1754,4 +1754,12 @@ m?^xy\?$? xy? y $& xy?
((A(*PRUNE)B|A(*PRUNE)D|A(*PRUNE)C)) AC n - -
((A(*PRUNE)B|A(*PRUNE)C|A(*PRUNE)D)) AC n - -

A+?(*THEN)BC AAABC y $& ABC
A+?(*PRUNE)BC AAABC y $& ABC
A+(*THEN)BC AAABC y $& AAABC
A+(*PRUNE)BC AAABC y $& AAABC
A+?(*THEN)BC(?{}) AAABC y $& ABC
A+?(*PRUNE)BC(?{}) AAABC y $& ABC
A+(*THEN)BC(?{}) AAABC y $& AAABC
A+(*PRUNE)BC(?{}) AAABC y $& AAABC
# vim: softtabstop=0 noexpandtab
3 changes: 3 additions & 0 deletions t/re/regexp.t
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,9 @@ foreach (@tests) {
chomp;
s/\\n/\n/g unless $regex_sets;
my ($pat, $subject, $result, $repl, $expect, $reason) = split(/\t/,$_,6);
if (!defined $subject) {
die "Bad test definition on line $test: $_\n";
}
$reason = '' unless defined $reason;
my $input = join(':',$pat,$subject,$result,$repl,$expect);
# the double '' below keeps simple syntax highlighters from going crazy
Expand Down

0 comments on commit b8f6efd

Please sign in to comment.