Skip to content

Commit

Permalink
fix(regular_expression): Report more MayContainStrings error in (nest…
Browse files Browse the repository at this point in the history
…ed)class (#5661)

Fixes #5632
  • Loading branch information
leaysgur committed Sep 10, 2024
1 parent febb29f commit 0511d55
Show file tree
Hide file tree
Showing 8 changed files with 113 additions and 59 deletions.
2 changes: 2 additions & 0 deletions crates/oxc_ast/src/generated/assert_layouts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1507,6 +1507,7 @@ const _: () = {
assert!(offset_of!(CharacterClass, span) == 0usize);
assert!(offset_of!(CharacterClass, negative) == 8usize);
assert!(offset_of!(CharacterClass, kind) == 9usize);
assert!(offset_of!(CharacterClass, strings) == 10usize);
assert!(offset_of!(CharacterClass, body) == 16usize);

assert!(size_of::<CharacterClassContentsKind>() == 1usize);
Expand Down Expand Up @@ -3061,6 +3062,7 @@ const _: () = {
assert!(offset_of!(CharacterClass, span) == 0usize);
assert!(offset_of!(CharacterClass, negative) == 8usize);
assert!(offset_of!(CharacterClass, kind) == 9usize);
assert!(offset_of!(CharacterClass, strings) == 10usize);
assert!(offset_of!(CharacterClass, body) == 12usize);

assert!(size_of::<CharacterClassContentsKind>() == 1usize);
Expand Down
1 change: 1 addition & 0 deletions crates/oxc_regular_expression/src/ast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,7 @@ pub struct CharacterClass<'a> {
pub span: Span,
pub negative: bool,
pub kind: CharacterClassContentsKind,
pub strings: bool,
pub body: Vec<'a, CharacterClassContents<'a>>,
}

Expand Down
7 changes: 7 additions & 0 deletions crates/oxc_regular_expression/src/body_parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,7 @@ mod test {
(r"\P{Basic_Emoji}", ParserOptions::default().with_unicode_sets_mode(), true),
(r"[^\p{Basic_Emoji}]", ParserOptions::default().with_unicode_sets_mode(), true),
(r"[[^\p{Basic_Emoji}]]", ParserOptions::default().with_unicode_sets_mode(), true),
(r"[^\q{}]", ParserOptions::default().with_unicode_sets_mode(), true),
(r"[[^\q{}]]", ParserOptions::default().with_unicode_sets_mode(), true),
(r"[[^\q{ng}]]", ParserOptions::default().with_unicode_sets_mode(), true),
(r"[[^\q{a|}]]", ParserOptions::default().with_unicode_sets_mode(), true),
Expand All @@ -237,6 +238,12 @@ mod test {
(r"[[^\q{ng}--\q{o|k}]]", ParserOptions::default().with_unicode_sets_mode(), true),
(r"[[^\q{o|k}--\q{ng}]]", ParserOptions::default().with_unicode_sets_mode(), false),
(r"[[z-a]]", ParserOptions::default().with_unicode_sets_mode(), true),
(r"[[[[[^[[[[\q{ng}]]]]]]]]]", ParserOptions::default().with_unicode_sets_mode(), true),
(
r"[^[[[[[[[[[[[[[[[[\q{ng}]]]]]]]]]]]]]]]]]",
ParserOptions::default().with_unicode_sets_mode(),
true,
),
] {
assert_eq!(
PatternParser::new(&allocator, source_text, *options).parse().is_err(),
Expand Down
119 changes: 60 additions & 59 deletions crates/oxc_regular_expression/src/body_parser/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -727,20 +727,11 @@ impl<'a> PatternParser<'a> {
let (kind, body) = self.parse_class_contents()?;

if self.reader.eat(']') {
let strings = body.iter().any(PatternParser::may_contain_strings_in_class_contents);

// [SS:EE] CharacterClass :: [^ ClassContents ]
// It is a Syntax Error if MayContainStrings of the ClassContents is true.
if negative
&& body.iter().any(|item| match item {
// MayContainStrings is true
// - if ClassContents contains UnicodePropertyValueExpression
// - and the UnicodePropertyValueExpression is LoneUnicodePropertyNameOrValue
// - and it is binary property of strings(can be true only with `UnicodeSetsMode`)
ast::CharacterClassContents::UnicodePropertyEscape(
unicode_property_escape,
) => unicode_property_escape.strings,
_ => false,
})
{
if negative && strings {
return Err(diagnostics::invalid_character_class(
self.span_factory.create(span_start, self.reader.offset()),
));
Expand All @@ -750,6 +741,7 @@ impl<'a> PatternParser<'a> {
span: self.span_factory.create(span_start, self.reader.offset()),
negative,
kind,
strings,
body,
}));
}
Expand Down Expand Up @@ -1267,62 +1259,45 @@ impl<'a> PatternParser<'a> {
let (kind, body) = self.parse_class_contents()?;

if self.reader.eat(']') {
let strings = match kind {
// MayContainStrings is true
// - if ClassContents is ClassUnion
// - && ClassUnion has ClassOperands
// - && at least 1 ClassOperand has MayContainStrings: true
ast::CharacterClassContentsKind::Union => {
body.iter().any(PatternParser::may_contain_strings_in_class_contents)
}
// MayContainStrings is true
// - if ClassContents is ClassIntersection
// - && ClassIntersection has ClassOperands
// - && all ClassOperands have MayContainStrings: true
ast::CharacterClassContentsKind::Intersection => {
body.iter().all(PatternParser::may_contain_strings_in_class_contents)
}
// MayContainStrings is true
// - if ClassContents is ClassSubtraction
// - && ClassSubtraction has ClassOperands
// - && the first ClassOperand has MayContainStrings: true
ast::CharacterClassContentsKind::Subtraction => body
.iter()
.next()
.map_or(false, PatternParser::may_contain_strings_in_class_contents),
};

// [SS:EE] NestedClass :: [^ ClassContents ]
// It is a Syntax Error if MayContainStrings of the ClassContents is true.
if negative {
let may_contain_strings = |item: &ast::CharacterClassContents| match item {
// MayContainStrings is true
// - if ClassContents contains UnicodePropertyValueExpression
// - && UnicodePropertyValueExpression is LoneUnicodePropertyNameOrValue
// - && it is binary property of strings(can be true only with `UnicodeSetsMode`)
ast::CharacterClassContents::UnicodePropertyEscape(
unicode_property_escape,
) => unicode_property_escape.strings,
// MayContainStrings is true
// - if ClassStringDisjunction is [empty]
// - || if ClassStringDisjunction contains ClassString
// - && ClassString is [empty]
// - || ClassString contains 2 more ClassSetCharacters
ast::CharacterClassContents::ClassStringDisjunction(
class_string_disjunction,
) => class_string_disjunction.strings,
_ => false,
};

if match kind {
// MayContainStrings is true
// - if ClassContents is ClassUnion
// - && ClassUnion has ClassOperands
// - && at least 1 ClassOperand has MayContainStrings: true
ast::CharacterClassContentsKind::Union => {
body.iter().any(may_contain_strings)
}
// MayContainStrings is true
// - if ClassContents is ClassIntersection
// - && ClassIntersection has ClassOperands
// - && all ClassOperands have MayContainStrings: true
ast::CharacterClassContentsKind::Intersection => {
body.iter().all(may_contain_strings)
}
// MayContainStrings is true
// - if ClassContents is ClassSubtraction
// - && ClassSubtraction has ClassOperands
// - && the first ClassOperand has MayContainStrings: true
ast::CharacterClassContentsKind::Subtraction => {
body.iter().next().map_or(false, may_contain_strings)
}
} {
return Err(diagnostics::character_class_contents_invalid_operands(
self.span_factory.create(span_start, self.reader.offset()),
));
}
if negative && strings {
return Err(diagnostics::character_class_contents_invalid_operands(
self.span_factory.create(span_start, self.reader.offset()),
));
}

return Ok(Some(ast::CharacterClassContents::NestedCharacterClass(Box::new_in(
ast::CharacterClass {
span: self.span_factory.create(span_start, self.reader.offset()),
negative,
kind,
strings,
body,
},
self.allocator,
Expand Down Expand Up @@ -2185,4 +2160,30 @@ impl<'a> PatternParser<'a> {

Some(value)
}

// ---

fn may_contain_strings_in_class_contents(item: &ast::CharacterClassContents) -> bool {
match item {
// MayContainStrings is true
// - if ClassContents contains UnicodePropertyValueExpression
// - && UnicodePropertyValueExpression is LoneUnicodePropertyNameOrValue
// - && it is binary property of strings(can be true only with `UnicodeSetsMode`)
ast::CharacterClassContents::UnicodePropertyEscape(unicode_property_escape) => {
unicode_property_escape.strings
}
// MayContainStrings is true
// - if ClassStringDisjunction is [empty]
// - || if ClassStringDisjunction contains ClassString
// - && ClassString is [empty]
// - || ClassString contains 2 more ClassSetCharacters
ast::CharacterClassContents::ClassStringDisjunction(class_string_disjunction) => {
class_string_disjunction.strings
}
// MayContainStrings is true
// - if NestedClass has MayContainStrings: true
ast::CharacterClassContents::NestedCharacterClass(nested_class) => nested_class.strings,
_ => false,
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,7 @@ impl<'old_alloc, 'new_alloc> CloneIn<'new_alloc> for CharacterClass<'old_alloc>
span: CloneIn::clone_in(&self.span, allocator),
negative: CloneIn::clone_in(&self.negative, allocator),
kind: CloneIn::clone_in(&self.kind, allocator),
strings: CloneIn::clone_in(&self.strings, allocator),
body: CloneIn::clone_in(&self.body, allocator),
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,7 @@ impl<'a> ContentEq for CharacterClass<'a> {
fn content_eq(&self, other: &Self) -> bool {
ContentEq::content_eq(&self.negative, &other.negative)
&& ContentEq::content_eq(&self.kind, &other.kind)
&& ContentEq::content_eq(&self.strings, &other.strings)
&& ContentEq::content_eq(&self.body, &other.body)
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ impl<'a> ContentHash for CharacterClass<'a> {
fn content_hash<H: Hasher>(&self, state: &mut H) {
ContentHash::content_hash(&self.negative, state);
ContentHash::content_hash(&self.kind, state);
ContentHash::content_hash(&self.strings, state);
ContentHash::content_hash(&self.body, state);
}
}
Expand Down
40 changes: 40 additions & 0 deletions tasks/coverage/parser_typescript.snap
Original file line number Diff line number Diff line change
Expand Up @@ -10957,6 +10957,46 @@ Expect to Parse: tasks/coverage/typescript/tests/cases/conformance/salsa/private
43 │ /[^\p{Emoji}[[\p{RGI_Emoji}]]][^\p{Emoji}--[[\p{RGI_Emoji}]]][^\p{Emoji}&&[[\p{RGI_Emoji}]]]/v,
╰────

× Invalid regular expression: Invalid character class with strings unicode property
╭─[typescript/tests/cases/compiler/regularExpressionScanning.ts:43:3]
42 │ /[^\p{RGI_Emoji}\q{foo}][^\p{RGI_Emoji}--\q{foo}][^\p{RGI_Emoji}&&\q{foo}]/v,
43 │ /[^\p{Emoji}[[\p{RGI_Emoji}]]][^\p{Emoji}--[[\p{RGI_Emoji}]]][^\p{Emoji}&&[[\p{RGI_Emoji}]]]/v,
· ─────────────────────────────
44 │ /[^[[\p{RGI_Emoji}]]\p{Emoji}][^[[\p{RGI_Emoji}]]--\p{Emoji}][^[[\p{RGI_Emoji}]]&&\p{Emoji}]/v,
╰────

× Invalid regular expression: Invalid character class with strings unicode property
╭─[typescript/tests/cases/compiler/regularExpressionScanning.ts:44:3]
43 │ /[^\p{Emoji}[[\p{RGI_Emoji}]]][^\p{Emoji}--[[\p{RGI_Emoji}]]][^\p{Emoji}&&[[\p{RGI_Emoji}]]]/v,
44 │ /[^[[\p{RGI_Emoji}]]\p{Emoji}][^[[\p{RGI_Emoji}]]--\p{Emoji}][^[[\p{RGI_Emoji}]]&&\p{Emoji}]/v,
· ─────────────────────────────
45 │ /[^[[\p{RGI_Emoji}]]\q{foo}][^[[\p{RGI_Emoji}]]--\q{foo}][^[[\p{RGI_Emoji}]]&&\q{foo}]/v,
╰────

× Invalid regular expression: Invalid character class with strings unicode property
╭─[typescript/tests/cases/compiler/regularExpressionScanning.ts:45:3]
44 │ /[^[[\p{RGI_Emoji}]]\p{Emoji}][^[[\p{RGI_Emoji}]]--\p{Emoji}][^[[\p{RGI_Emoji}]]&&\p{Emoji}]/v,
45 │ /[^[[\p{RGI_Emoji}]]\q{foo}][^[[\p{RGI_Emoji}]]--\q{foo}][^[[\p{RGI_Emoji}]]&&\q{foo}]/v,
· ───────────────────────────
46 │ /[^\q{foo|bar|baz}--\q{foo}--\q{bar}--\q{baz}][^\p{L}--\q{foo}--[\q{bar}]--[^[\q{baz}]]]/v,
╰────

× Invalid regular expression: Invalid character class with strings unicode property
╭─[typescript/tests/cases/compiler/regularExpressionScanning.ts:46:3]
45 │ /[^[[\p{RGI_Emoji}]]\q{foo}][^[[\p{RGI_Emoji}]]--\q{foo}][^[[\p{RGI_Emoji}]]&&\q{foo}]/v,
46 │ /[^\q{foo|bar|baz}--\q{foo}--\q{bar}--\q{baz}][^\p{L}--\q{foo}--[\q{bar}]--[^[\q{baz}]]]/v,
· ─────────────────────────────────────────────
47 │ /[^[[\q{foo|bar|baz}]]--\q{foo}--\q{bar}--\q{baz}][^[^[^\p{L}]]--\q{foo}--[\q{bar}]--[^[\q{baz}]]]/v,
╰────

× Invalid regular expression: Invalid character class with strings unicode property
╭─[typescript/tests/cases/compiler/regularExpressionScanning.ts:47:3]
46 │ /[^\q{foo|bar|baz}--\q{foo}--\q{bar}--\q{baz}][^\p{L}--\q{foo}--[\q{bar}]--[^[\q{baz}]]]/v,
47 │ /[^[[\q{foo|bar|baz}]]--\q{foo}--\q{bar}--\q{baz}][^[^[^\p{L}]]--\q{foo}--[\q{bar}]--[^[\q{baz}]]]/v,
· ─────────────────────────────────────────────────
48 │ ];
╰────

× The 'u' and 'v' regular expression flags cannot be enabled at the same time
╭─[typescript/tests/cases/compiler/regularExpressionScanning.ts:3:2]
2 │ // Flags
Expand Down

0 comments on commit 0511d55

Please sign in to comment.