From 603817bef9b1e375e7b6b22ae6b7cd8518c12907 Mon Sep 17 00:00:00 2001 From: Boshen <1430279+Boshen@users.noreply.github.com> Date: Sat, 7 Sep 2024 10:48:57 +0000 Subject: [PATCH] feat(oxc)!: add `SourceType::Unambiguous`; parse `.js` as unambiguous (#5557) See https://babel.dev/docs/options#misc-options for background on `unambiguous` Once `SourceType::Unambiguous` is parsed, it will correctly set the returned `Program::source_type` to either `module` or `script`. --- crates/oxc_parser/src/js/expression.rs | 5 ++- crates/oxc_parser/src/js/statement.rs | 5 +++ crates/oxc_parser/src/lib.rs | 32 +++++++++++++++++++ crates/oxc_semantic/src/checker/javascript.rs | 4 +++ crates/oxc_span/src/source_type/mod.rs | 23 ++++++++++--- crates/oxc_span/src/source_type/types.rs | 8 +++++ tasks/coverage/misc/fail/oxc.js | 2 ++ tasks/coverage/parser_misc.snap | 13 ++++---- 8 files changed, 80 insertions(+), 12 deletions(-) diff --git a/crates/oxc_parser/src/js/expression.rs b/crates/oxc_parser/src/js/expression.rs index 18fb30e8fd85a..63a0c360de6b6 100644 --- a/crates/oxc_parser/src/js/expression.rs +++ b/crates/oxc_parser/src/js/expression.rs @@ -548,7 +548,10 @@ impl<'a> ParserImpl<'a> { ) -> Result> { self.bump_any(); // bump `.` let property = match self.cur_kind() { - Kind::Meta => self.parse_keyword_identifier(Kind::Meta), + Kind::Meta => { + self.set_source_type_to_module_if_unambiguous(); + self.parse_keyword_identifier(Kind::Meta) + } Kind::Target => self.parse_keyword_identifier(Kind::Target), _ => self.parse_identifier_name()?, }; diff --git a/crates/oxc_parser/src/js/statement.rs b/crates/oxc_parser/src/js/statement.rs index 408c3ae930b32..0b482fe8120a2 100644 --- a/crates/oxc_parser/src/js/statement.rs +++ b/crates/oxc_parser/src/js/statement.rs @@ -41,6 +41,11 @@ impl<'a> ParserImpl<'a> { break; } let stmt = self.parse_statement_list_item(StatementContext::StatementList)?; + + if is_top_level && stmt.is_module_declaration() { + self.set_source_type_to_module_if_unambiguous(); + } + // Section 11.2.1 Directive Prologue // The only way to get a correct directive is to parse the statement first and check if it is a string literal. // All other method are flawed, see test cases in [babel](https://github.com/babel/babel/blob/main/packages/babel-parser/test/fixtures/core/categorized/not-directive/input.js) diff --git a/crates/oxc_parser/src/lib.rs b/crates/oxc_parser/src/lib.rs index 9641d0caf1a69..055ed30a37298 100644 --- a/crates/oxc_parser/src/lib.rs +++ b/crates/oxc_parser/src/lib.rs @@ -350,6 +350,8 @@ impl<'a> ParserImpl<'a> { let (directives, statements) = self.parse_directives_and_statements(/* is_top_level */ true)?; + self.set_source_type_to_script_if_unambiguous(); + let span = Span::new(0, self.source_text.len() as u32); Ok(self.ast.program(span, self.source_type, hashbang, directives, statements)) } @@ -416,6 +418,18 @@ impl<'a> ParserImpl<'a> { fn ts_enabled(&self) -> bool { self.source_type.is_typescript() } + + fn set_source_type_to_module_if_unambiguous(&mut self) { + if self.source_type.is_unambiguous() { + self.source_type = self.source_type.with_module(true); + } + } + + fn set_source_type_to_script_if_unambiguous(&mut self) { + if self.source_type.is_unambiguous() { + self.source_type = self.source_type.with_script(true); + } + } } #[cfg(test)] @@ -511,6 +525,24 @@ mod test { } } + #[test] + fn unambiguous() { + let allocator = Allocator::default(); + let source_type = SourceType::default().with_unambiguous(true); + assert!(source_type.is_unambiguous()); + let sources = ["import x from 'foo';", "export {x} from 'foo';", "import.meta"]; + for source in sources { + let ret = Parser::new(&allocator, source, source_type).parse(); + assert!(ret.program.source_type.is_module()); + } + + let sources = ["", "import('foo')"]; + for source in sources { + let ret = Parser::new(&allocator, source, source_type).parse(); + assert!(ret.program.source_type.is_script()); + } + } + #[test] fn memory_leak() { let allocator = Allocator::default(); diff --git a/crates/oxc_semantic/src/checker/javascript.rs b/crates/oxc_semantic/src/checker/javascript.rs index e3b091e0c658e..ac92606ad3d10 100644 --- a/crates/oxc_semantic/src/checker/javascript.rs +++ b/crates/oxc_semantic/src/checker/javascript.rs @@ -404,6 +404,10 @@ pub fn check_module_declaration<'a>( let start = decl.span().start; let span = Span::new(start, start + 6); match ctx.source_type.module_kind() { + ModuleKind::Unambiguous => { + #[cfg(debug_assertions)] + panic!("Technically unreachable, omit to avoid panic."); + } ModuleKind::Script => { ctx.error(module_code(text, span)); } diff --git a/crates/oxc_span/src/source_type/mod.rs b/crates/oxc_span/src/source_type/mod.rs index e058fa3f51497..160a7fdb6af7a 100644 --- a/crates/oxc_span/src/source_type/mod.rs +++ b/crates/oxc_span/src/source_type/mod.rs @@ -64,7 +64,7 @@ impl SourceType { pub const fn js() -> Self { Self { language: Language::JavaScript, - module_kind: ModuleKind::Script, + module_kind: ModuleKind::Unambiguous, variant: LanguageVariant::Standard, } } @@ -159,6 +159,10 @@ impl SourceType { self.module_kind == ModuleKind::Module } + pub fn is_unambiguous(self) -> bool { + self.module_kind == ModuleKind::Unambiguous + } + pub fn module_kind(self) -> ModuleKind { self.module_kind } @@ -204,6 +208,14 @@ impl SourceType { self } + #[must_use] + pub const fn with_unambiguous(mut self, yes: bool) -> Self { + if yes { + self.module_kind = ModuleKind::Unambiguous; + } + self + } + #[must_use] pub const fn with_typescript(mut self, yes: bool) -> Self { if yes { @@ -290,7 +302,8 @@ impl SourceType { })?; let (language, module_kind) = match extension { - "js" | "mjs" | "jsx" => (Language::JavaScript, ModuleKind::Module), + "js" => (Language::JavaScript, ModuleKind::Unambiguous), + "mjs" | "jsx" => (Language::JavaScript, ModuleKind::Module), "cjs" => (Language::JavaScript, ModuleKind::Script), "ts" if file_name.ends_with(".d.ts") => { (Language::TypeScriptDefinition, ModuleKind::Module) @@ -417,15 +430,15 @@ mod tests { assert!(!ty.is_typescript(), "{ty:?}"); } - assert_eq!(SourceType::js().with_jsx(true).with_module(true), js); + assert_eq!(SourceType::js().with_jsx(true).with_unambiguous(true), js); assert_eq!(SourceType::jsx().with_module(true), jsx); - assert!(js.is_module()); + assert!(js.is_unambiguous()); assert!(mjs.is_module()); assert!(cjs.is_script()); assert!(jsx.is_module()); - assert!(js.is_strict()); + assert!(!js.is_strict()); assert!(mjs.is_strict()); assert!(!cjs.is_strict()); assert!(jsx.is_strict()); diff --git a/crates/oxc_span/src/source_type/types.rs b/crates/oxc_span/src/source_type/types.rs index 1560c888d0044..3bfd8299df9d2 100644 --- a/crates/oxc_span/src/source_type/types.rs +++ b/crates/oxc_span/src/source_type/types.rs @@ -43,6 +43,14 @@ pub enum ModuleKind { Script = 0, /// ES6 Module Module = 1, + /// Consider the file a "module" if ESM syntax is present, or else consider it a "script". + /// + /// ESM syntax includes `import` statement, `export` statement and `import.meta`. + /// + /// Note: Dynamic import expression is not ESM syntax. + /// + /// See + Unambiguous = 2, } /// JSX for JavaScript and TypeScript diff --git a/tasks/coverage/misc/fail/oxc.js b/tasks/coverage/misc/fail/oxc.js index 510f490a096dd..ce5993196ebb6 100644 --- a/tasks/coverage/misc/fail/oxc.js +++ b/tasks/coverage/misc/fail/oxc.js @@ -1,2 +1,4 @@ +'use strict'; + let.a = 1; let()[a] = 1; diff --git a/tasks/coverage/parser_misc.snap b/tasks/coverage/parser_misc.snap index 5365b6458234c..90918b8f01e4e 100644 --- a/tasks/coverage/parser_misc.snap +++ b/tasks/coverage/parser_misc.snap @@ -245,15 +245,16 @@ Negative Passed: 17/17 (100.00%) ╰──── × The keyword 'let' is reserved - ╭─[misc/fail/oxc.js:1:1] - 1 │ let.a = 1; + ╭─[misc/fail/oxc.js:3:1] + 2 │ + 3 │ let.a = 1; · ─── - 2 │ let()[a] = 1; + 4 │ let()[a] = 1; ╰──── × The keyword 'let' is reserved - ╭─[misc/fail/oxc.js:2:1] - 1 │ let.a = 1; - 2 │ let()[a] = 1; + ╭─[misc/fail/oxc.js:4:1] + 3 │ let.a = 1; + 4 │ let()[a] = 1; · ─── ╰────