diff --git a/regcomp.c b/regcomp.c index c843b1dead..e06b505f1a 100644 --- a/regcomp.c +++ b/regcomp.c @@ -3264,6 +3264,14 @@ setup_subexp_call(Node* node, ScanEnv* env) } #endif +#define IN_ALT (1<<0) +#define IN_NOT (1<<1) +#define IN_REPEAT (1<<2) +#define IN_VAR_REPEAT (1<<3) +#define IN_CALL (1<<4) +#define IN_RECCALL (1<<5) +#define IN_LOOK_BEHIND (1<<6) + /* divide different length alternatives in look-behind. (?<=A|B) ==> (?<=A)|(?<=B) (? (?s; end = sn->end; if (start >= end) return 0; + is_in_look_behind = (state & IN_LOOK_BEHIND) != 0; + r = 0; top_root = root = prev_node = snode = NULL_NODE; alt_num = 1; @@ -3593,7 +3606,7 @@ expand_case_fold_string(Node* node, regex_t* reg) len = enclen(reg->enc, p, end); varlen = is_case_fold_variable_len(n, items, len); - if (n == 0 || varlen == 0) { + if (n == 0 || varlen == 0 || is_in_look_behind) { if (IS_NULL(snode)) { if (IS_NULL(root) && IS_NOT_NULL(prev_node)) { onig_node_free(top_root); @@ -3854,13 +3867,6 @@ setup_comb_exp_check(Node* node, int state, ScanEnv* env) } #endif -#define IN_ALT (1<<0) -#define IN_NOT (1<<1) -#define IN_REPEAT (1<<2) -#define IN_VAR_REPEAT (1<<3) -#define IN_CALL (1<<4) -#define IN_RECCALL (1<<5) - /* setup_tree does the following work. 1. check empty loop. (set qn->target_empty_info) 2. expand ignore-case in char class. @@ -3902,7 +3908,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) case NT_STR: if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) { - r = expand_case_fold_string(node, reg); + r = expand_case_fold_string(node, reg, state); } break; @@ -4145,7 +4151,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) if (r < 0) return r; if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; if (NTYPE(node) != NT_ANCHOR) goto restart; - r = setup_tree(an->target, reg, state, env); + r = setup_tree(an->target, reg, (state | IN_LOOK_BEHIND), env); if (r != 0) return r; r = setup_look_behind(node, reg, env); } @@ -4158,7 +4164,8 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) if (r < 0) return r; if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; if (NTYPE(node) != NT_ANCHOR) goto restart; - r = setup_tree(an->target, reg, (state | IN_NOT), env); + r = setup_tree(an->target, reg, (state | IN_NOT | IN_LOOK_BEHIND), + env); if (r != 0) return r; r = setup_look_behind(node, reg, env); } diff --git a/testpy.py b/testpy.py index 8335404c37..a94e1da2d0 100755 --- a/testpy.py +++ b/testpy.py @@ -1172,6 +1172,28 @@ def main(): x2("(?i)(?<=\u0149)a", "\u02bcna", 2, 3) # with look-behind # Other Unicode tests x2("\\x{25771}", "\U00025771", 0, 1) + x2("(?i:ss)", "ss", 0, 2) + x2("(?i:ss)", "Ss", 0, 2) + x2("(?i:ss)", "SS", 0, 2) + if is_unicode_encoding(onig_encoding): + x2("(?i:ss)", "\u017fS", 0, 2) # LATIN SMALL LETTER LONG S + x2("(?i:ss)", "s\u017f", 0, 2) + x2("(?i:ss)", "\u00df", 0, 1) # LATIN SMALL LETTER SHARP S + x2("(?i:ss)", "\u1e9e", 0, 1) # LATIN CAPITAL LETTER SHARP S + x2("(?i:xssy)", "xssy", 0, 4) + x2("(?i:xssy)", "xSsy", 0, 4) + x2("(?i:xssy)", "xSSy", 0, 4) + if is_unicode_encoding(onig_encoding): + x2("(?i:xssy)", "x\u017fSy", 0, 4) + x2("(?i:xssy)", "xs\u017fy", 0, 4) + x2("(?i:xssy)", "x\u00dfy", 0, 3) + x2("(?i:xssy)", "x\u1e9ey", 0, 3) + x2("(?i:\u00df)", "ss", 0, 2) + x2("(?i:\u00df)", "SS", 0, 2) + x2("(?i:[\u00df])", "ss", 0, 2) + x2("(?i:[\u00df])", "SS", 0, 2) + x2("(?i)(?