diff --git a/Cargo.lock b/Cargo.lock index 3aedb86..54f2642 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -29,6 +29,21 @@ dependencies = [ "zerocopy", ] +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + +[[package]] +name = "allocator-api2" +version = "0.2.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c6cb57a04249c6480766f7f7cef5467412af1490f8d1e243141daddada3264f" + [[package]] name = "anstream" version = "0.6.13" @@ -260,6 +275,20 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "chumsky" +version = "1.0.0-alpha.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7b80276986f86789dc56ca6542d53bba9cda3c66091ebbe7bd96fc1bdf20f1f" +dependencies = [ + "hashbrown 0.14.5", + "regex-automata", + "serde", + "spin", + "stacker", + "unicode-ident", +] + [[package]] name = "clap" version = "4.5.1" @@ -470,6 +499,16 @@ dependencies = [ "ahash", ] +[[package]] +name = "hashbrown" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +dependencies = [ + "ahash", + "allocator-api2", +] + [[package]] name = "heck" version = "0.4.1" @@ -511,6 +550,7 @@ name = "juice-core" version = "0.1.0" dependencies = [ "ariadne", + "chumsky", "in_definite", ] @@ -535,12 +575,14 @@ name = "juice-frontend" version = "0.1.0" dependencies = [ "ariadne", + "chumsky", "derive-where", "derive_more", "juice-core", "juice-macros", "lasso", "num-bigint", + "thousands", ] [[package]] @@ -562,7 +604,7 @@ version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4644821e1c3d7a560fe13d842d13f587c07348a1a05d3a797152d41c90c56df2" dependencies = [ - "hashbrown", + "hashbrown 0.13.2", ] [[package]] @@ -736,6 +778,15 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "psm" +version = "0.1.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5787f7cda34e3033a72192c018bc5883100330f362ef279a8cbccfce8bb4e874" +dependencies = [ + "cc", +] + [[package]] name = "quote" version = "1.0.35" @@ -754,6 +805,23 @@ dependencies = [ "bitflags 1.3.2", ] +[[package]] +name = "regex-automata" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59b23e92ee4318893fa3fe3e6fb365258efbfe6ac6ab30f090cdcbb7aa37efa9" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da" + [[package]] name = "rustc-demangle" version = "0.1.23" @@ -794,6 +862,26 @@ version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "92d43fe69e652f3df9bdc2b85b2854a0825b86e4fb76bc44d945137d053639ca" +[[package]] +name = "serde" +version = "1.0.200" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddc6f9cc94d67c0e21aaf7eda3a010fd3af78ebf6e096aa6e2e13c79749cce4f" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.200" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "856f046b9400cee3c8c94ed572ecdb752444c24528c035cd35882aad6f492bcb" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.52", +] + [[package]] name = "signal-hook-registry" version = "1.4.1" @@ -828,6 +916,25 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "spin" +version = "0.9.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" + +[[package]] +name = "stacker" +version = "0.1.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c886bd4480155fd3ef527d45e9ac8dd7118a898a46530b7b94c3e21866259fce" +dependencies = [ + "cc", + "cfg-if", + "libc", + "psm", + "winapi", +] + [[package]] name = "strsim" version = "0.11.0" @@ -868,6 +975,12 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "thousands" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3bf63baf9f5039dadc247375c29eb13706706cfde997d0330d05aa63a77d8820" + [[package]] name = "tokio" version = "1.36.0" @@ -963,6 +1076,28 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + [[package]] name = "windows-sys" version = "0.48.0" diff --git a/Cargo.toml b/Cargo.toml index 63cd571..49924a7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,6 +7,7 @@ rust-version = "1.79.0" [workspace.dependencies] ariadne = "0.4.1" +chumsky = { version = "1.0.0-alpha.7", features = ["nightly", "sync"] } derive_more = "0.99.17" juice-core = { version = "0.1.0", path = "core" } juice-driver = { version = "0.1.0", path = "driver" } diff --git a/core/Cargo.toml b/core/Cargo.toml index d07c001..a2c481c 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -8,4 +8,5 @@ rust-version = { workspace = true } [dependencies] ariadne = { workspace = true } +chumsky = { workspace = true } in_definite = "1.0.0" diff --git a/core/src/diag.rs b/core/src/diag.rs index 9bb0db8..6fe4b15 100644 --- a/core/src/diag.rs +++ b/core/src/diag.rs @@ -2,7 +2,7 @@ use std::{ borrow::Cow, fmt::{Display, Formatter, Result as FmtResult}, marker::PhantomData, - sync::Arc, + sync::{Arc, Mutex, PoisonError}, }; use ariadne::{Color, Fmt as _, ReportKind}; @@ -11,6 +11,37 @@ mod private { pub trait Sealed {} } +#[derive(Clone, Default)] +pub struct ColorGenerator { + colors: Arc)>>, + current: usize, +} + +impl ColorGenerator { + pub fn new() -> Self { + Self::default() + } + + pub fn from_generator(generator: ariadne::ColorGenerator) -> Self { + Self { + colors: Arc::new(Mutex::new((generator, Vec::new()))), + current: 0, + } + } + + pub fn next(&mut self) -> Color { + let mut colors = self.colors.lock().unwrap_or_else(PoisonError::into_inner); + while self.current >= colors.1.len() { + let color = colors.0.next(); + colors.1.push(color); + } + + let color = colors.1[self.current]; + self.current += 1; + color + } +} + pub trait ColorExt { fn error_color() -> Self; fn warning_color() -> Self; diff --git a/core/src/lib.rs b/core/src/lib.rs index 23b27a8..d5eb336 100644 --- a/core/src/lib.rs +++ b/core/src/lib.rs @@ -3,6 +3,7 @@ mod char_ext; pub mod diag; mod option_ext; +pub mod parser_ext; mod peekable_chars; use std::ops::{ControlFlow, FromResidual, Try}; diff --git a/core/src/parser_ext.rs b/core/src/parser_ext.rs new file mode 100644 index 0000000..5e9e2bb --- /dev/null +++ b/core/src/parser_ext.rs @@ -0,0 +1,41 @@ +use chumsky::{ + combinator::{FoldlWith, FoldrWith, MapWith}, + extra::ParserExtra, + input::{Input, MapExtra}, + IterParser, Parser, +}; + +pub trait ParserExt<'a, I: Input<'a>, O, E: ParserExtra<'a, I>>: Parser<'a, I, O, E> + Sized { + fn foldl_with_span, OB>( + self, + other: B, + f: impl Fn(O, OB, I::Span) -> O + Clone, + ) -> FoldlWith) -> O + Clone, Self, B, OB, E> { + self.foldl_with(other, move |o, ob, extra| f(o, ob, extra.span())) + } + + fn map_with_span( + self, + f: impl Fn(O, I::Span) -> U + Clone, + ) -> MapWith) -> U + Clone> { + self.map_with(move |o, extra| f(o, extra.span())) + } + + fn with_span(self) -> MapWith) -> (O, I::Span) + Clone> { + self.map_with(move |o, extra| (o, extra.span())) + } +} + +impl<'a, I: Input<'a>, O, E: ParserExtra<'a, I>, P: Parser<'a, I, O, E>> ParserExt<'a, I, O, E> for P {} + +pub trait IterParserExt<'a, I: Input<'a>, O, E: ParserExtra<'a, I>>: IterParser<'a, I, O, E> + Sized { + fn foldr_with_span, OA>( + self, + other: B, + f: impl Fn(O, OA, I::Span) -> OA + Clone, + ) -> FoldrWith) -> OA + Clone, Self, B, O, E> { + self.foldr_with(other, move |o, oa, extra| f(o, oa, extra.span())) + } +} + +impl<'a, I: Input<'a>, O, E: ParserExtra<'a, I>, P: IterParser<'a, I, O, E>> IterParserExt<'a, I, O, E> for P {} diff --git a/frontend/Cargo.toml b/frontend/Cargo.toml index 95262d6..ae071b6 100644 --- a/frontend/Cargo.toml +++ b/frontend/Cargo.toml @@ -7,9 +7,11 @@ edition = "2021" [dependencies] ariadne = { workspace = true } +chumsky = { workspace = true } derive-where = "1.2.7" derive_more = { workspace = true } juice-core = { workspace = true } juice-macros = { workspace = true } lasso = "0.7.2" num-bigint = "0.4.4" +thousands = "0.2.0" diff --git a/frontend/src/ast/expr.rs b/frontend/src/ast/expr.rs new file mode 100644 index 0000000..08b1578 --- /dev/null +++ b/frontend/src/ast/expr.rs @@ -0,0 +1,371 @@ +use std::{ + fmt::{Display, Formatter, Result as FmtResult}, + sync::Arc, +}; + +use ariadne::{Color, Fmt as _}; +use derive_where::derive_where; +use juice_core::diag::{ColorExt as _, ColorGenerator}; +use thousands::{digits::ASCII_HEXADECIMAL, Separable as _, SeparatorPolicy}; + +use crate::{source_loc::SourceRange, source_manager::SourceManager}; + +const UNDERSCORE_HEX_SEPARATOR: SeparatorPolicy = SeparatorPolicy { + separator: "_", + groups: &[4], + digits: ASCII_HEXADECIMAL, +}; + +#[derive_where(Debug, Clone)] +pub struct BinaryOperatorSequenceExpr<'src, M: 'src + SourceManager> { + pub first: Box>, + pub rest: Vec<(SourceRange<'src, M>, Expr<'src, M>)>, +} + +impl<'src, M: 'src + SourceManager> BinaryOperatorSequenceExpr<'src, M> { + pub fn new(first: Box>) -> Self { + Self { + first, + rest: Vec::new(), + } + } + + pub fn push(&mut self, op_range: SourceRange<'src, M>, expr: Expr<'src, M>) { + self.rest.push((op_range, expr)); + } + + fn display(&self, f: &mut Formatter<'_>, indentation: usize, mut colors: ColorGenerator) -> FmtResult { + let color = colors.next(); + let indent_str = " ".repeat(indentation * 4); + + let part_color = colors.next(); + + write!( + f, + "{0}\n{1} {2} [\n{1} {3}\n{1} {4} ", + "BinaryOperatorSequenceExpr(".fg(color), + indent_str, + "parts:".fg(color), + "ExprPart(".fg(part_color), + "expr:".fg(part_color), + )?; + + self.first.kind.display(f, indentation + 2, colors.clone())?; + + writeln!(f, "{} {},", indent_str, ")".fg(part_color))?; + + for (op_range, expr) in &self.rest { + writeln!( + f, + "{} {} {:?}{},", + indent_str, + "OperatorPart(operator:".fg(part_color), + op_range.get_str(), + ")".fg(part_color) + )?; + + write!( + f, + "{0} {1}\n{0} {2} ", + indent_str, + "ExprPart(".fg(part_color), + "expr:".fg(part_color) + )?; + + expr.kind.display(f, indentation + 2, colors.clone())?; + + writeln!(f, "{} {},", indent_str, ")".fg(part_color))?; + } + + writeln!(f, "{0} ]\n{0}{1}", indent_str, ")".fg(color)) + } +} + +#[derive_where(Debug, Clone)] +pub struct BinaryOperatorExpr<'src, M: 'src + SourceManager> { + pub lhs: Box>, + pub op_range: SourceRange<'src, M>, + pub rhs: Box>, +} + +impl<'src, M: 'src + SourceManager> BinaryOperatorExpr<'src, M> { + pub fn new(lhs: Expr<'src, M>, op_range: SourceRange<'src, M>, rhs: Expr<'src, M>) -> Self { + Self { + lhs: Box::new(lhs), + op_range, + rhs: Box::new(rhs), + } + } + + fn display(&self, f: &mut Formatter<'_>, indentation: usize, mut colors: ColorGenerator) -> FmtResult { + let color = colors.next(); + let indent_str = " ".repeat(indentation * 4); + + write!( + f, + "{0}\n{1} {2} {3:?}\n{1} {4} ", + "BinaryOperatorExpr(".fg(color), + indent_str, + "operator:".fg(color), + self.op_range.get_str(), + "lhs:".fg(color), + )?; + + self.lhs.kind.display(f, indentation + 1, colors.clone())?; + + write!(f, "{} {} ", indent_str, "rhs:".fg(color))?; + + self.rhs.kind.display(f, indentation + 1, colors)?; + + writeln!(f, "{}{}", indent_str, ")".fg(color)) + } +} + +#[derive_where(Debug, Clone)] +pub struct UnaryOperatorExpr<'src, M: 'src + SourceManager> { + pub operand: Box>, + pub op_range: SourceRange<'src, M>, + pub is_prefix: bool, +} + +impl<'src, M: 'src + SourceManager> UnaryOperatorExpr<'src, M> { + pub fn new(operand: Expr<'src, M>, op_range: SourceRange<'src, M>, is_prefix: bool) -> Self { + Self { + operand: Box::new(operand), + op_range, + is_prefix, + } + } + + fn display(&self, f: &mut Formatter<'_>, indentation: usize, mut colors: ColorGenerator) -> FmtResult { + let color = colors.next(); + let indent_str = " ".repeat(indentation * 4); + + let name = if self.is_prefix { "Prefix" } else { "Postfix" }; + + write!( + f, + "{0}{1}\n{2} {3} {4:?}\n{2} {5} ", + name.fg(color), + "OperatorExpr(".fg(color), + indent_str, + "operator:".fg(color), + self.op_range.get_str(), + "operand:".fg(color), + )?; + + self.operand.kind.display(f, indentation + 1, colors)?; + + writeln!(f, "{}{}", indent_str, ")".fg(color)) + } +} + +#[derive_where(Debug, Clone)] +pub struct BorrowExpr<'src, M: 'src + SourceManager> { + pub expr: Box>, + pub is_mutable: bool, +} + +impl<'src, M: 'src + SourceManager> BorrowExpr<'src, M> { + pub fn new(expr: Expr<'src, M>, is_mutable: bool) -> Self { + Self { + expr: Box::new(expr), + is_mutable, + } + } + + fn display(&self, f: &mut Formatter<'_>, indentation: usize, mut colors: ColorGenerator) -> FmtResult { + let color = colors.next(); + let indent_str = " ".repeat(indentation * 4); + + write!( + f, + "{0}\n{1} {2} {3}\n{1} {4} ", + "BorrowExpr(".fg(color), + indent_str, + "is_mutable:".fg(color), + self.is_mutable, + "expr:".fg(color), + )?; + + self.expr.kind.display(f, indentation + 1, colors)?; + + writeln!(f, "{}{}", indent_str, ")".fg(color)) + } +} + +#[derive(Debug, Clone)] +pub enum IntLiteralExpr { + Int(u64), + BigInt(Arc<[u64]>), +} + +#[derive_where(Debug, Clone)] +pub enum InterpolationExprPart<'src, M: 'src + SourceManager> { + String(Arc), + Interpolation(Expr<'src, M>), +} + +#[derive_where(Debug, Clone)] +pub enum LiteralExpr<'src, M: 'src + SourceManager> { + Bool(bool), + Int(IntLiteralExpr), + Float(f64), + Char(char), + String(Arc), + StringInterpolation(Arc<[InterpolationExprPart<'src, M>]>), +} + +impl LiteralExpr<'_, M> { + fn display(&self, f: &mut Formatter<'_>, indentation: usize, mut colors: ColorGenerator) -> FmtResult { + let color = colors.next(); + let indent_str = " ".repeat(indentation * 4); + + match self { + Self::Bool(value) => writeln!(f, "{} {}{}", "BoolExpr(value:".fg(color), value, ")".fg(color)), + Self::Int(IntLiteralExpr::Int(value)) => { + writeln!(f, "{} {}{}", "IntExpr(value:".fg(color), value, ")".fg(color)) + } + Self::Int(IntLiteralExpr::BigInt(value)) => { + let (last, rest) = value.split_last().unwrap(); + + write!( + f, + "{}\n{} {} 0x{}_", + "IntExpr(".fg(color), + indent_str, + "value:".fg(color), + last.separate_by_policy(UNDERSCORE_HEX_SEPARATOR) + )?; + + for part in rest.iter().rev() { + write!( + f, + "{}", + format!("{:016x}", part).separate_by_policy(UNDERSCORE_HEX_SEPARATOR) + )?; + } + + writeln!(f, "{}{}", indent_str, ")".fg(color)) + } + Self::Float(value) => writeln!(f, "{} {}{}", "FloatExpr(value:".fg(color), value, ")".fg(color)), + Self::Char(value) => writeln!(f, "{} '{}'{}", "CharExpr(value:".fg(color), value, ")".fg(color)), + Self::String(value) => writeln!(f, "{} {:?}{}", "StringExpr(value:".fg(color), value, ")".fg(color)), + Self::StringInterpolation(parts) => { + writeln!( + f, + "{}\n{} {} [", + "StringInterpolationExpr(".fg(color), + indent_str, + "parts:".fg(color) + )?; + + let part_color = colors.next(); + + for part in parts.as_ref() { + match part { + InterpolationExprPart::String(s) => { + writeln!( + f, + "{} {} {:?}{},", + indent_str, + "StringPart(value:".fg(part_color), + s, + ")".fg(part_color) + )?; + } + InterpolationExprPart::Interpolation(expr) => { + write!( + f, + "{0} {1}\n{0} {2} ", + indent_str, + "InterpolationPart(".fg(part_color), + "expr:".fg(part_color) + )?; + expr.kind.display(f, indentation + 2, colors.clone())?; + writeln!(f, "{} {},", indent_str, ")".fg(part_color))?; + } + } + } + + writeln!(f, "{0} ]\n{0}{1}", indent_str, ")".fg(color)) + } + } + } +} + +#[derive_where(Debug, Clone)] +pub enum ExprKind<'src, M: 'src + SourceManager> { + BinaryOperatorSequence(BinaryOperatorSequenceExpr<'src, M>), + BinaryOperator(BinaryOperatorExpr<'src, M>), + UnaryOperator(UnaryOperatorExpr<'src, M>), + Borrow(BorrowExpr<'src, M>), + Literal(LiteralExpr<'src, M>), + Identifier(SourceRange<'src, M>), + Grouping(Box>), + Error, +} + +impl ExprKind<'_, M> { + fn display(&self, f: &mut Formatter<'_>, indentation: usize, mut colors: ColorGenerator) -> FmtResult { + match self { + Self::BinaryOperatorSequence(expr) => expr.display(f, indentation, colors), + Self::BinaryOperator(expr) => expr.display(f, indentation, colors), + Self::UnaryOperator(expr) => expr.display(f, indentation, colors), + Self::Borrow(expr) => expr.display(f, indentation, colors), + Self::Literal(expr) => expr.display(f, indentation, colors), + Self::Identifier(range) => { + let color = colors.next(); + writeln!( + f, + "{} {:?}{}", + "IdentifierExpr(ident:".fg(color), + range.get_str(), + ")".fg(color) + ) + } + Self::Grouping(expr) => expr.kind.display(f, indentation, colors), + Self::Error => writeln!(f, "{}", "ErrorExpr()".fg(Color::error_color())), + } + } +} + +#[derive_where(Debug, Clone)] +pub struct Expr<'src, M: 'src + SourceManager> { + pub kind: ExprKind<'src, M>, + pub source_range: SourceRange<'src, M>, +} + +impl<'src, M: 'src + SourceManager> Expr<'src, M> { + pub fn new(kind: ExprKind<'src, M>, source_range: SourceRange<'src, M>) -> Self { + Self { kind, source_range } + } + + pub fn with_binary_operator( + self, + (op_range, rhs): (SourceRange<'src, M>, Expr<'src, M>), + source_range: SourceRange<'src, M>, + ) -> Self { + let kind = match self.kind { + ExprKind::BinaryOperatorSequence(mut seq) => { + seq.push(op_range, rhs); + ExprKind::BinaryOperatorSequence(seq) + } + ExprKind::BinaryOperator(lhs) => { + let mut seq = BinaryOperatorSequenceExpr::new(lhs.lhs); + seq.push(lhs.op_range, *lhs.rhs); + seq.push(op_range, rhs); + ExprKind::BinaryOperatorSequence(seq) + } + _ => ExprKind::BinaryOperator(BinaryOperatorExpr::new(self, op_range, rhs)), + }; + + Self::new(kind, source_range) + } +} + +impl Display for Expr<'_, M> { + fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult { + self.kind.display(f, 0, ColorGenerator::default()) + } +} diff --git a/frontend/src/ast/mod.rs b/frontend/src/ast/mod.rs new file mode 100644 index 0000000..016a1ff --- /dev/null +++ b/frontend/src/ast/mod.rs @@ -0,0 +1 @@ +pub mod expr; diff --git a/frontend/src/diag/consumer.rs b/frontend/src/diag/consumer.rs index db78d67..734b948 100644 --- a/frontend/src/diag/consumer.rs +++ b/frontend/src/diag/consumer.rs @@ -1,7 +1,7 @@ use std::ops::Try; -use ariadne::{Color, ColorGenerator, Config, IndexType, Label, Report}; -use juice_core::diag::ColorExt as _; +use ariadne::{Color, Config, IndexType, Label, Report}; +use juice_core::diag::{ColorExt as _, ColorGenerator}; use super::{DiagnosticEngine, DiagnosticReport}; use crate::{ @@ -10,7 +10,7 @@ use crate::{ Result, }; -pub trait Consumer<'src, M: SourceManager>: Sized { +pub trait Consumer<'src, M: 'src + SourceManager>: Sized { type Output: Try; fn consume<'diag>( @@ -24,7 +24,7 @@ pub trait Consumer<'src, M: SourceManager>: Sized { pub struct DefaultConsumer; impl DefaultConsumer { - fn build_ariadne_report<'src, M: AriadneSourceManager>( + fn build_ariadne_report<'src, M: 'src + AriadneSourceManager>( &self, report: DiagnosticReport<'src, '_, M, Self>, ) -> Report<'src, SourceRange<'src, M>> { @@ -63,7 +63,7 @@ impl DefaultConsumer { } } -impl<'src, M: AriadneSourceManager> Consumer<'src, M> for DefaultConsumer { +impl<'src, M: 'src + AriadneSourceManager> Consumer<'src, M> for DefaultConsumer { type Output = Result<()>; fn consume<'diag>( diff --git a/frontend/src/diag/engine.rs b/frontend/src/diag/engine.rs index 56956e3..67888e7 100644 --- a/frontend/src/diag/engine.rs +++ b/frontend/src/diag/engine.rs @@ -6,13 +6,13 @@ use crate::{ source_manager::{DefaultSourceManager, SourceManager}, }; -pub struct Engine<'src, M, C> { +pub struct Engine<'src, M: 'src, C> { source_manager: &'src M, consumer: C, had_error: AtomicBool, } -impl<'src, M: SourceManager, C: DiagnosticConsumer<'src, M>> Engine<'src, M, C> { +impl<'src, M: 'src + SourceManager, C: DiagnosticConsumer<'src, M>> Engine<'src, M, C> { pub fn new_with_consumer(source_manager: &'src M, consumer: C) -> Self { Self { source_manager, @@ -60,7 +60,7 @@ impl<'src> Engine<'src, DefaultSourceManager, DefaultDiagnosticConsumer> { } #[must_use = "report does nothing unless diagnosed"] -pub struct Report<'src, 'diag, M: SourceManager, C> { +pub struct Report<'src, 'diag, M: 'src + SourceManager, C> { pub source_loc: SourceLoc<'src, M>, pub diagnostic: Diagnostic<'src>, pub context_notes: Vec<(SourceRange<'src, M>, DiagnosticContextNote<'src>)>, @@ -68,7 +68,7 @@ pub struct Report<'src, 'diag, M: SourceManager, C> { engine: &'diag Engine<'src, M, C>, } -impl<'src, 'diag, M: SourceManager, C: DiagnosticConsumer<'src, M>> Report<'src, 'diag, M, C> +impl<'src, 'diag, M: 'src + SourceManager, C: DiagnosticConsumer<'src, M>> Report<'src, 'diag, M, C> where 'src: 'diag, { diff --git a/frontend/src/lib.rs b/frontend/src/lib.rs index f191dde..baf2ff8 100644 --- a/frontend/src/lib.rs +++ b/frontend/src/lib.rs @@ -1,5 +1,12 @@ -#![feature(assert_matches, iter_advance_by, macro_metavar_expr, try_trait_v2)] +#![feature( + assert_matches, + iter_advance_by, + macro_metavar_expr, + try_trait_v2, + type_alias_impl_trait +)] +mod ast; mod diag; mod error; mod parser; diff --git a/frontend/src/parser/lexer/literal.rs b/frontend/src/parser/lexer/literal.rs index e253288..8959ebd 100644 --- a/frontend/src/parser/lexer/literal.rs +++ b/frontend/src/parser/lexer/literal.rs @@ -67,26 +67,43 @@ impl Radix { } #[derive_where(Debug, Clone)] -pub enum InterpolationPart<'src, M: SourceManager> { +pub enum InterpolationPart<'src, M: 'src + SourceManager> { String(Arc), - Interpolation(Vec>), + Interpolation(Arc<[Token<'src, M>]>), } #[derive_where(Debug, Clone)] -pub enum LiteralKind<'src, M: SourceManager> { +pub enum LiteralKind<'src, M: 'src + SourceManager> { Int(u64, Radix), - BigInt(Vec, Radix), + BigInt(Arc<[u64]>, Radix), Float(f64), Char(char), String(Arc), - StringInterpolation(Vec>), + StringInterpolation(Arc<[InterpolationPart<'src, M>]>), InvalidInt, InvalidFloat, InvalidChar, InvalidString, } -impl<'src, M: SourceManager> LiteralKind<'src, M> { +impl PartialEq for LiteralKind<'_, M> { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (Self::Int(a, ra), Self::Int(b, rb)) => a == b && ra == rb, + (Self::BigInt(a, ra), Self::BigInt(b, rb)) => a == b && ra == rb, + (Self::Float(a), Self::Float(b)) => a == b, + (Self::Char(a), Self::Char(b)) => a == b, + (Self::String(a), Self::String(b)) => a == b, + (Self::InvalidInt, Self::InvalidInt) + | (Self::InvalidFloat, Self::InvalidFloat) + | (Self::InvalidChar, Self::InvalidChar) + | (Self::InvalidString, Self::InvalidString) => true, + _ => false, + } + } +} + +impl<'src, M: 'src + SourceManager> LiteralKind<'src, M> { pub fn lex_number(lexer: &mut Lexer<'src, M>, start: char) -> Self { let (radix, mut is_first) = (start == '0') .then(|| lexer.match_char_map(Radix::from_prefix)) @@ -239,7 +256,7 @@ impl<'src, M: SourceManager> LiteralKind<'src, M> { match words.len() { 0 => Self::Int(0, radix), 1 => Self::Int(words[0], radix), - _ => Self::BigInt(words, radix), + _ => Self::BigInt(words.into(), radix), } } @@ -436,12 +453,12 @@ impl<'src, M: SourceManager> LiteralKind<'src, M> { recovering_character: bool, literal_name: &'static str, ) -> Self { - enum Part<'src, M: SourceManager> { + enum Part<'src, M: 'src + SourceManager> { String { content: String, newline_locations: Vec<(usize, SourceLoc<'src, M>)>, }, - Interpolation(Vec>), + Interpolation(Arc<[Token<'src, M>]>), } if is_multiline && lexer.in_interpolation { @@ -916,12 +933,12 @@ impl<'src, M: SourceManager> LiteralKind<'src, M> { #[cfg(test)] mod tests { - use super::{InterpolationPart, LiteralKind::*, Radix::*}; + use super::{InterpolationPart, Radix::*}; use crate::{ diag::{Diagnostic, DiagnosticContextNote, DiagnosticNote}, parser::lexer::{ test::{assert_all_reports, assert_all_tokens, run_lexer}, - token_kind::{PunctuationKind::*, TokenKind::*}, + Tok, }, source_manager::test::SourceManager, }; @@ -943,31 +960,31 @@ mod tests { assert_all_tokens!( tokens; - Punctuation(Newline), 0; - Literal(Int(0, Decimal)), 13; - Literal(Int(0, Hexadecimal)), 15..18; - Literal(Int(0, Binary)), 19..22; - Literal(Int(0, Octal)), 23..26; - Punctuation(Newline), 26; - Literal(Int(10, Decimal)), 39..41; - Literal(Int(16, Hexadecimal)), 42..46; - Literal(Int(2, Binary)), 47..51; - Literal(Int(8, Octal)), 52..56; - Punctuation(Newline), 56; - Literal(Int(1_000_000, Decimal)), 69..78; - Punctuation(Newline), 78; - Literal(BigInt(v, Hexadecimal)) if v.len() == 2 && v[0] == 0 && v[1] == 1, 91..114; - Punctuation(Newline), 114; - Literal(Int(0, Decimal)), 127; - Punctuation(Dot), 128; - Identifier, 129, "foo"; - Punctuation(Newline), 132; - Identifier, 145, "foo"; - Punctuation(Dot), 148; - Literal(Int(0, Decimal)), 149; - Punctuation(Dot), 150; - Literal(Int(0, Decimal)), 151; - Punctuation(Newline), 152; + Tok![Newline], 0; + Tok![Int(0, Decimal)], 13; + Tok![Int(0, Hexadecimal)], 15..18; + Tok![Int(0, Binary)], 19..22; + Tok![Int(0, Octal)], 23..26; + Tok![Newline], 26; + Tok![Int(10, Decimal)], 39..41; + Tok![Int(16, Hexadecimal)], 42..46; + Tok![Int(2, Binary)], 47..51; + Tok![Int(8, Octal)], 52..56; + Tok![Newline], 56; + Tok![Int(1_000_000, Decimal)], 69..78; + Tok![Newline], 78; + Tok![BigInt(v, Hexadecimal)] if v.len() == 2 && v[0] == 0 && v[1] == 1, 91..114; + Tok![Newline], 114; + Tok![Int(0, Decimal)], 127; + Tok![.], 128; + Tok![Ident], 129, "foo"; + Tok![Newline], 132; + Tok![Ident], 145, "foo"; + Tok![.], 148; + Tok![Int(0, Decimal)], 149; + Tok![.], 150; + Tok![Int(0, Decimal)], 151; + Tok![Newline], 152; ); } @@ -1010,24 +1027,24 @@ mod tests { assert_all_tokens!( tokens; - Punctuation(Newline), 0; - Literal(Float(0.0)), 13..16; - Punctuation(Newline), 16; - Literal(Float(1.0)), 29..32; - Literal(Float(1.0)), 33..38; - Literal(Float(10.0)), 39..45; - Literal(Float(0.1)), 46..52; - Literal(Float(1.0)), 53..56; - Literal(Float(10.0)), 57..61; - Literal(Float(0.1)), 62..66; - Punctuation(Newline), 66; - Literal(Float(1_000.0)), 79..86; - Literal(Float(1.000_1)), 87..94; - Punctuation(Newline), 94; - Literal(Float(0.0)), 107..110; - Punctuation(Dot), 110; - Identifier, 111, "foo"; - Punctuation(Newline), 114; + Tok![Newline], 0; + Tok![Float(0.0)], 13..16; + Tok![Newline], 16; + Tok![Float(1.0)], 29..32; + Tok![Float(1.0)], 33..38; + Tok![Float(10.0)], 39..45; + Tok![Float(0.1)], 46..52; + Tok![Float(1.0)], 53..56; + Tok![Float(10.0)], 57..61; + Tok![Float(0.1)], 62..66; + Tok![Newline], 66; + Tok![Float(1_000.0)], 79..86; + Tok![Float(1.000_1)], 87..94; + Tok![Newline], 94; + Tok![Float(0.0)], 107..110; + Tok![.], 110; + Tok![Ident], 111, "foo"; + Tok![Newline], 114; ); } @@ -1059,20 +1076,20 @@ mod tests { assert_all_tokens!( tokens; - Literal(Char('a')), 0..3; - Literal(Char('0')), 4..7; - Literal(Char(' ')), 8..11; - Literal(Char('"')), 12..15; - Literal(Char('\0')), 16..20; - Literal(Char('\\')), 21..25; - Literal(Char('\t')), 26..30; - Literal(Char('\n')), 31..35; - Literal(Char('\r')), 36..40; - Literal(Char('"')), 41..45; - Literal(Char('\'')), 46..50; - Literal(Char(' ')), 51..59; - Literal(Char('\u{1F600}')), 60..71; - Literal(Char('\u{1F600}')), 72..78; + Tok![Char('a')], 0..3; + Tok![Char('0')], 4..7; + Tok![Char(' ')], 8..11; + Tok![Char('"')], 12..15; + Tok![Char('\0')], 16..20; + Tok![Char('\\')], 21..25; + Tok![Char('\t')], 26..30; + Tok![Char('\n')], 31..35; + Tok![Char('\r')], 36..40; + Tok![Char('"')], 41..45; + Tok![Char('\'')], 46..50; + Tok![Char(' ')], 51..59; + Tok![Char('\u{1F600}')], 60..71; + Tok![Char('\u{1F600}')], 72..78; ); } @@ -1162,28 +1179,28 @@ mod tests { assert_all_tokens!( tokens; - Punctuation(Newline), 0; - Literal(String(s)) if s.as_ref() == "", 13..15; - Literal(String(s)) if s.as_ref() == "hello", 16..23; - Literal(String(s)) if s.as_ref() == "'\0\\\t\n\r\"\'$ ", 24..49; - Punctuation(Newline), 49; - Literal(String(s)) if s.as_ref() == "", 62..66; - Literal(String(s)) if s.as_ref() == "hello", 67..76; - Literal(String(s)) if s.as_ref() == "\\n\n", 77..86; - Literal(String(s)) if s.as_ref() == "\\n\\#n\n", 87..102; - Punctuation(Newline), 102; - Literal(String(s)) if s.as_ref() == "", 115..121; - Literal(String(s)) if s.as_ref() == "hello", 122..133; - Literal(String(s)) if s.as_ref() == "\n", 134..142; - Punctuation(Newline), 142; - Literal(String(s)) if s.as_ref() == "hello", 155..192; - Punctuation(Newline), 192; - Literal(String(s)) if s.as_ref() == "hello, world", 205..263; - Punctuation(Newline), 263; - Literal(String(s)) if s.as_ref() == "hello", 276..317; - Punctuation(Newline), 317; - Literal(String(s)) if s.as_ref() == "hello\\\nworld!", 330..404; - Punctuation(Newline), 404; + Tok![Newline], 0; + Tok![String(s)] if s.as_ref() == "", 13..15; + Tok![String(s)] if s.as_ref() == "hello", 16..23; + Tok![String(s)] if s.as_ref() == "'\0\\\t\n\r\"\'$ ", 24..49; + Tok![Newline], 49; + Tok![String(s)] if s.as_ref() == "", 62..66; + Tok![String(s)] if s.as_ref() == "hello", 67..76; + Tok![String(s)] if s.as_ref() == "\\n\n", 77..86; + Tok![String(s)] if s.as_ref() == "\\n\\#n\n", 87..102; + Tok![Newline], 102; + Tok![String(s)] if s.as_ref() == "", 115..121; + Tok![String(s)] if s.as_ref() == "hello", 122..133; + Tok![String(s)] if s.as_ref() == "\n", 134..142; + Tok![Newline], 142; + Tok![String(s)] if s.as_ref() == "hello", 155..192; + Tok![Newline], 192; + Tok![String(s)] if s.as_ref() == "hello, world", 205..263; + Tok![Newline], 263; + Tok![String(s)] if s.as_ref() == "hello", 276..317; + Tok![Newline], 317; + Tok![String(s)] if s.as_ref() == "hello\\\nworld!", 330..404; + Tok![Newline], 404; ); } @@ -1258,51 +1275,51 @@ mod tests { assert_all_tokens!( tokens; - Punctuation(Newline), 0; - Literal(StringInterpolation(parts)) if matches!( - parts.as_slice(), + Tok![Newline], 0; + Tok![Interpolation(parts)] if matches!( + parts.as_ref(), [ InterpolationPart::String(s), InterpolationPart::Interpolation(inner_tokens), ] if { assert_all_tokens!( inner_tokens; - Identifier, 23, "world"; + Tok![Ident], 23, "world"; ); s.as_ref() == "hello, " } ), 13..30; - Literal(StringInterpolation(parts)) if matches!( - parts.as_slice(), + Tok![Interpolation(parts)] if matches!( + parts.as_ref(), [ InterpolationPart::Interpolation(inner_tokens_1), InterpolationPart::Interpolation(inner_tokens_2), ] if { assert_all_tokens!( inner_tokens_1; - Identifier, 34, "a"; - Operator, 36, "+"; - Identifier, 38, "b"; + Tok![Ident], 34, "a"; + Tok![BinOp], 36, "+"; + Tok![Ident], 38, "b"; ); assert_all_tokens!( inner_tokens_2; - Identifier, 42, "c"; + Tok![Ident], 42, "c"; ); true } ), 31..45; - Punctuation(Newline), 45; - Literal(StringInterpolation(parts)) if matches!( - parts.as_slice(), + Tok![Newline], 45; + Tok![Interpolation(parts)] if matches!( + parts.as_ref(), [InterpolationPart::Interpolation(inner_tokens)] if { assert_all_tokens!( inner_tokens; - Literal(StringInterpolation(parts)) if matches!( - parts.as_slice(), + Tok![Interpolation(parts)] if matches!( + parts.as_ref(), [InterpolationPart::Interpolation(inner_inner_tokens)] if { assert_all_tokens!( inner_inner_tokens; - Identifier, 64, "a"; + Tok![Ident], 64, "a"; ); true } @@ -1311,21 +1328,21 @@ mod tests { true } ), 58..69; - Punctuation(Newline), 69; - Literal(StringInterpolation(parts)) if matches!( - parts.as_slice(), + Tok![Newline], 69; + Tok![Interpolation(parts)] if matches!( + parts.as_ref(), [ InterpolationPart::String(s), InterpolationPart::Interpolation(inner_tokens), ] if { assert_all_tokens!( inner_tokens; - Identifier, 108, "world"; + Tok![Ident], 108, "world"; ); s.as_ref() == "hello$, " } ), 82..130; - Punctuation(Newline), 130; + Tok![Newline], 130; ); } diff --git a/frontend/src/parser/lexer/mod.rs b/frontend/src/parser/lexer/mod.rs index 0e8253e..11bd241 100644 --- a/frontend/src/parser/lexer/mod.rs +++ b/frontend/src/parser/lexer/mod.rs @@ -1,13 +1,14 @@ -mod literal; -mod token; -mod token_kind; +pub mod literal; +pub mod token; +pub mod token_kind; use std::{num::NonZero, ops::Try as _}; use juice_core::{CharExt, OptionExt as _, PeekableChars}; -use self::literal::LiteralKind; +pub(crate) use self::token_kind::Tok; pub use self::{ + literal::LiteralKind, token::Token, token_kind::{KeywordKind, PunctuationKind, TokenKind}, }; @@ -15,18 +16,18 @@ use crate::{ diag::{Diagnostic, DiagnosticConsumer, DiagnosticContextNote, DiagnosticEngine, DiagnosticNote}, source_loc::{SourceLoc, SourceRange}, source_manager::{Source, SourceManager}, - Result, Tok, + Result, }; #[derive(Debug, Clone)] -pub struct Error<'src, M: SourceManager> { +pub struct Error<'src, M: 'src + SourceManager> { source_loc: SourceLoc<'src, M>, diagnostic: Diagnostic<'src>, context_notes: Vec<(SourceRange<'src, M>, DiagnosticContextNote<'src>)>, note: Option>, } -impl<'src, M: SourceManager> Error<'src, M> { +impl<'src, M: 'src + SourceManager> Error<'src, M> { pub fn diagnose>(self, diagnostics: &DiagnosticEngine<'src, M, C>) -> C::Output { let mut report = diagnostics.report(self.source_loc, self.diagnostic); @@ -43,7 +44,7 @@ impl<'src, M: SourceManager> Error<'src, M> { } #[derive(Debug, Clone)] -struct PendingError<'src, M: SourceManager> { +struct PendingError<'src, M: 'src + SourceManager> { source_loc: SourceLoc<'src, M>, diagnostic: Diagnostic<'src>, initial_context_note: DiagnosticContextNote<'src>, @@ -52,7 +53,7 @@ struct PendingError<'src, M: SourceManager> { } #[must_use = "Errors must be recorded to be diagnosed"] -struct ErrorBuilder<'src, 'lex, M: SourceManager> +struct ErrorBuilder<'src, 'lex, M: 'src + SourceManager> where 'src: 'lex, { @@ -64,7 +65,7 @@ where lexer: &'lex mut Lexer<'src, M>, } -impl<'src, 'lex, M: SourceManager> ErrorBuilder<'src, 'lex, M> { +impl<'src, 'lex, M: 'src + SourceManager> ErrorBuilder<'src, 'lex, M> { fn new_with_range( source_range: SourceRange<'src, M>, at_end: bool, @@ -158,13 +159,14 @@ impl<'src, 'lex, M: SourceManager> ErrorBuilder<'src, 'lex, M> { } #[derive(Debug)] -pub struct Lexer<'src, M: SourceManager> { +pub struct Lexer<'src, M: 'src + SourceManager> { source: Source<'src, M>, chars: PeekableChars<'src>, start: usize, current: usize, leading_whitespace_start: usize, last_considered_leading_whitespace: bool, + last_was_borrow: bool, last_was_dot: bool, in_interpolation: bool, brace_depth: isize, @@ -182,6 +184,7 @@ impl<'src, M: SourceManager> Lexer<'src, M> { current: 0, leading_whitespace_start: 0, last_considered_leading_whitespace: true, + last_was_borrow: false, last_was_dot: false, in_interpolation: false, brace_depth: 0, @@ -380,7 +383,11 @@ impl<'src, M: SourceManager> Lexer<'src, M> { if self.peek() == Some('w') && self.peek2().is_none_or(|c| !c.is_identifier_char()) { self.advance(); Tok![&w] - } else if self.peek().is_some_and(CharExt::is_operator) { + } else if self + .chars + .peek_first_after(CharExt::is_operator) + .is_none_or(CharExt::is_trailing_whitespace) + { self.consume_operator(false) } else { Tok![&] @@ -463,6 +470,15 @@ impl<'src, M: SourceManager> Lexer<'src, M> { continue; } } + Some('&') => { + if self + .chars + .peek_first_after(CharExt::is_operator) + .is_some_and(|c| !c.is_trailing_whitespace()) + { + break; + } + } _ => {} } @@ -471,7 +487,28 @@ impl<'src, M: SourceManager> Lexer<'src, M> { } } - Tok![Op] + let has_leading_whitespace = self.last_considered_leading_whitespace + || !self.get_leading_whitespace_range().is_empty() + || self.last_was_borrow; + + let has_trailing_whitespace = self + .peek() + .is_none_or(|c| c.is_trailing_whitespace() || (c == '/' && matches!(self.peek2(), Some('/') | Some('*')))); + + let next_is_dot = self.peek() == Some('.'); + + match (has_leading_whitespace, has_trailing_whitespace) { + (true, false) => Tok![PrefixOp], + (false, true) => Tok![PostfixOp], + (true, true) => Tok![BinOp], + (false, false) => { + if next_is_dot { + Tok![PostfixOp] + } else { + Tok![BinOp] + } + } + } } fn consume_number_literal(&mut self, start: char) -> TokenKind<'src, M> { @@ -537,28 +574,17 @@ impl<'src, M: SourceManager> Lexer<'src, M> { self.errors.push(error); } - let has_leading_whitespace = - self.last_considered_leading_whitespace || !self.get_leading_whitespace_range().is_empty(); - - let has_trailing_whitespace = self - .peek() - .is_none_or(|c| c.is_trailing_whitespace() || (c == '/' && matches!(self.peek2(), Some('/') | Some('*')))); + self.last_considered_leading_whitespace = current_range + .get_str() + .chars() + .last() + .is_some_and(CharExt::is_leading_whitespace); - if let Some(c) = current_range.get_str().chars().last() { - self.last_considered_leading_whitespace = c.is_leading_whitespace(); - } + self.last_was_borrow = matches!(kind, Tok![&] | Tok![&w]); - if matches!(kind, TokenKind::Punctuation(PunctuationKind::Dot)) { - self.last_was_dot = true; - } + self.last_was_dot = matches!(kind, Tok![.]); - Token::new( - kind, - current_range, - self.get_leading_whitespace_range(), - has_leading_whitespace, - has_trailing_whitespace, - ) + Token::new(kind, current_range, self.get_leading_whitespace_range()) } fn error<'lex>( @@ -690,7 +716,7 @@ impl<'src, M: SourceManager> Lexer<'src, M> { } } -impl<'src, M: SourceManager> Iterator for Lexer<'src, M> { +impl<'src, M: 'src + SourceManager> Iterator for Lexer<'src, M> { type Item = Token<'src, M>; fn next(&mut self) -> Option { @@ -976,14 +1002,11 @@ mod tests { use std::assert_matches::assert_matches; use super::{ - test::{assert_all_tokens, run_lexer}, - KeywordKind::*, - PunctuationKind::*, - TokenKind::*, + test::{assert_all_reports, assert_all_tokens, run_lexer}, + Tok, }; use crate::{ diag::{Diagnostic, DiagnosticContextNote, DiagnosticNote}, - parser::lexer::{test::assert_all_reports, Token}, source_manager::test::SourceManager, }; @@ -995,44 +1018,45 @@ mod tests { assert_all_tokens!( tokens; - Keyword(Else), 0..4; - Keyword(If), 5..7; - Keyword(Let), 8..11; - Keyword(Var), 12..15; - Keyword(While), 16..21; - Identifier, 22, "foo"; - Identifier, 26, "letter"; + Tok![else], 0..4; + Tok![if], 5..7; + Tok![let], 8..11; + Tok![var], 12..15; + Tok![while], 16..21; + Tok![Ident], 22, "foo"; + Tok![Ident], 26, "letter"; ); } #[test] fn test_punctuation() { - static SOURCE_MANAGER: SourceManager = SourceManager::new("` ( ) [ ] { } , : ; @ ? . = => -> & &w # \n"); + static SOURCE_MANAGER: SourceManager = SourceManager::new("` ( ) [ ] { } , : ; @ ? . = => -> &x &w # \n"); let tokens = run_lexer(&SOURCE_MANAGER).unwrap(); assert_all_tokens!( tokens; - Punctuation(Backtick), 0; - Punctuation(LeftParen), 2; - Punctuation(RightParen), 4; - Punctuation(LeftBracket), 6; - Punctuation(RightBracket), 8; - Punctuation(LeftBrace), 10; - Punctuation(RightBrace), 12; - Punctuation(Comma), 14; - Punctuation(Colon), 16; - Punctuation(Semicolon), 18; - Punctuation(At), 20; - Punctuation(QuestionMark), 22; - Punctuation(Dot), 24; - Punctuation(Equals), 26; - Punctuation(FatArrow), 28..30; - Punctuation(Arrow), 31..33; - Punctuation(Ampersand), 34; - Punctuation(AmpersandW), 36..38; - Punctuation(NumberSign), 39; - Punctuation(Newline), 41; + Tok![Backtick], 0; + Tok![LeftParen], 2; + Tok![RightParen], 4; + Tok![LeftBracket], 6; + Tok![RightBracket], 8; + Tok![LeftBrace], 10; + Tok![RightBrace], 12; + Tok![,], 14; + Tok![:], 16; + Tok![;], 18; + Tok![@], 20; + Tok![?], 22; + Tok![.], 24; + Tok![=], 26; + Tok![=>], 28..30; + Tok![->], 31..33; + Tok![&], 34; + Tok![Ident], 35, "x"; + Tok![&w], 37..39; + Tok![#], 40; + Tok![Newline], 42; ); } @@ -1044,16 +1068,98 @@ mod tests { assert_all_tokens!( tokens; - Operator, 0, "+"; - Operator, 2, "-"; - Punctuation(Dot), 4; - Operator, 6, ".."; - Operator, 9, "./."; - Operator, 13, "+"; - Punctuation(Dot), 14; - Operator, 16, "^*^"; - Operator, 20, "-"; - Identifier, 21, "hello"; + Tok![BinOp], 0, "+"; + Tok![BinOp], 2, "-"; + Tok![.], 4; + Tok![BinOp], 6, ".."; + Tok![BinOp], 9, "./."; + Tok![PrefixOp], 13, "+"; + Tok![.], 14; + Tok![BinOp], 16, "^*^"; + Tok![PrefixOp], 20, "-"; + Tok![Ident], 21, "hello"; + ); + } + + #[test] + fn test_operator_kind() { + static SOURCE_MANAGER: SourceManager = SourceManager::new("+a-b * c++ +\n-d--.e++"); + + let tokens = run_lexer(&SOURCE_MANAGER).unwrap(); + + assert_all_tokens!( + tokens; + Tok![PrefixOp], 0, "+"; + Tok![Ident], 1, "a"; + Tok![BinOp], 2, "-"; + Tok![Ident], 3, "b"; + Tok![BinOp], 5, "*"; + Tok![Ident], 7, "c"; + Tok![PostfixOp], 8, "++"; + Tok![BinOp], 11, "+"; + Tok![Newline], 12; + Tok![PrefixOp], 13, "-"; + Tok![Ident], 14, "d"; + Tok![PostfixOp], 15, "--"; + Tok![.], 17; + Tok![Ident], 18, "e"; + Tok![PostfixOp], 19, "++"; + ); + } + + #[test] + fn test_borrow() { + static SOURCE_MANAGER: SourceManager = + SourceManager::new("&& &&x &&w &&& &&&x &&&w &&&() &&- &&-x &&w-x &&-& &&-&x &&-&w &-&-&x"); + + let tokens = run_lexer(&SOURCE_MANAGER).unwrap(); + + assert_all_tokens!( + tokens; + Tok![BinOp], 0, "&&"; + Tok![&], 3; + Tok![&], 4; + Tok![Ident], 5, "x"; + Tok![&], 7; + Tok![&w], 8..10; + Tok![BinOp], 11, "&&&"; + Tok![&], 15; + Tok![&], 16; + Tok![&], 17; + Tok![Ident], 18, "x"; + Tok![&], 20; + Tok![&], 21; + Tok![&w], 22..24; + Tok![&], 25; + Tok![&], 26; + Tok![&], 27; + Tok![LeftParen], 28; + Tok![RightParen], 29; + Tok![BinOp], 31, "&&-"; + Tok![&], 35; + Tok![&], 36; + Tok![PrefixOp], 37, "-"; + Tok![Ident], 38, "x"; + Tok![&], 40; + Tok![&w], 41..43; + Tok![PrefixOp], 43, "-"; + Tok![Ident], 44, "x"; + Tok![BinOp], 46, "&&-&"; + Tok![&], 51; + Tok![&], 52; + Tok![PrefixOp], 53, "-"; + Tok![&], 54; + Tok![Ident], 55, "x"; + Tok![&], 57; + Tok![&], 58; + Tok![PrefixOp], 59, "-"; + Tok![&w], 60..62; + Tok![&], 63; + Tok![PrefixOp], 64, "-"; + Tok![&], 65; + Tok![PrefixOp], 66, "-"; + Tok![&], 67; + Tok![Ident], 68, "x"; ); } @@ -1074,46 +1180,23 @@ mod tests { assert_all_tokens!( tokens; - Punctuation(Newline), 0; - Identifier, 13, "a"; - Operator, 15, "+"; - Identifier, 17, "b"; - Punctuation(Newline), 39; - Identifier, 52, "c"; - Identifier, 100, "d"; - Punctuation(Newline), 101; - Identifier, 114, "e"; - Identifier, 153, "f"; - Punctuation(Newline), 154; - Identifier, 167, "g"; - Operator, 169, "+"; - Punctuation(Newline), 217; - Identifier, 230, "h"; - Operator, 232, "-"; - Punctuation(Newline), 283; - ); - } - - #[test] - fn test_whitespace() { - static SOURCE_MANAGER: SourceManager = SourceManager::new("+a-b * c? +\n-d?"); - - let tokens = run_lexer(&SOURCE_MANAGER).unwrap(); - - assert_all_tokens!( - tokens; - Operator, 0, "+" => Token::has_only_leading_whitespace; - Identifier, 1, "a"; - Operator, 2, "-" => Token::has_no_whitespace; - Identifier, 3, "b"; - Operator, 5, "*" => Token::is_surrounded_by_whitespace; - Identifier, 7, "c"; - Punctuation(QuestionMark), 8 => Token::has_only_trailing_whitespace; - Operator, 10, "+" => Token::is_surrounded_by_whitespace; - Punctuation(Newline), 11; - Operator, 12, "-" => Token::has_only_leading_whitespace; - Identifier, 13, "d"; - Punctuation(QuestionMark), 14 => Token::has_only_trailing_whitespace; + Tok![Newline], 0; + Tok![Ident], 13, "a"; + Tok![BinOp], 15, "+"; + Tok![Ident], 17, "b"; + Tok![Newline], 39; + Tok![Ident], 52, "c"; + Tok![Ident], 100, "d"; + Tok![Newline], 101; + Tok![Ident], 114, "e"; + Tok![Ident], 153, "f"; + Tok![Newline], 154; + Tok![Ident], 167, "g"; + Tok![BinOp], 169, "+"; + Tok![Newline], 217; + Tok![Ident], 230, "h"; + Tok![BinOp], 232, "-"; + Tok![Newline], 283; ); } diff --git a/frontend/src/parser/lexer/token.rs b/frontend/src/parser/lexer/token.rs index c74fd96..fc3648d 100644 --- a/frontend/src/parser/lexer/token.rs +++ b/frontend/src/parser/lexer/token.rs @@ -6,46 +6,24 @@ use super::TokenKind; use crate::{source_loc::SourceRange, source_manager::SourceManager}; #[derive_where(Clone)] -pub struct Token<'src, M: SourceManager> { +pub struct Token<'src, M: 'src + SourceManager> { pub kind: TokenKind<'src, M>, pub source_range: SourceRange<'src, M>, pub leading_whitespace_range: SourceRange<'src, M>, - pub has_leading_whitespace: bool, - pub has_trailing_whitespace: bool, } -impl<'src, M: SourceManager> Token<'src, M> { +impl<'src, M: 'src + SourceManager> Token<'src, M> { pub fn new( kind: TokenKind<'src, M>, source_range: SourceRange<'src, M>, leading_whitespace_range: SourceRange<'src, M>, - has_leading_whitespace: bool, - has_trailing_whitespace: bool, ) -> Self { Self { kind, source_range, leading_whitespace_range, - has_leading_whitespace, - has_trailing_whitespace, } } - - pub fn is_surrounded_by_whitespace(&self) -> bool { - self.has_leading_whitespace || self.has_trailing_whitespace - } - - pub fn has_no_whitespace(&self) -> bool { - !self.has_leading_whitespace && !self.has_trailing_whitespace - } - - pub fn has_only_leading_whitespace(&self) -> bool { - self.has_leading_whitespace && !self.has_trailing_whitespace - } - - pub fn has_only_trailing_whitespace(&self) -> bool { - !self.has_leading_whitespace && self.has_trailing_whitespace - } } impl Debug for Token<'_, M> { @@ -53,8 +31,6 @@ impl Debug for Token<'_, M> { f.debug_struct("Token") .field("kind", &self.kind) .field("text", &self.source_range.get_str()) - .field("has_leading_whitespace", &self.has_leading_whitespace) - .field("has_trailing_whitespace", &self.has_trailing_whitespace) .finish() } } diff --git a/frontend/src/parser/lexer/token_kind.rs b/frontend/src/parser/lexer/token_kind.rs index 9e93069..a2bea50 100644 --- a/frontend/src/parser/lexer/token_kind.rs +++ b/frontend/src/parser/lexer/token_kind.rs @@ -8,8 +8,10 @@ string_enum! { #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum KeywordKind { Else = "else", + False = "false", If = "if", Let = "let", + True = "true", Var = "var", While = "while", } @@ -41,106 +43,160 @@ string_enum! { } } -#[derive_where(Debug, Clone)] -pub enum TokenKind<'src, M: SourceManager> { +#[derive_where(Debug, Clone, PartialEq)] +pub enum TokenKind<'src, M: 'src + SourceManager> { Keyword(KeywordKind), Punctuation(PunctuationKind), Literal(LiteralKind<'src, M>), Identifier, - Operator, + PrefixOperator, + PostfixOperator, + BinaryOperator, Unknown, } -#[macro_export] macro_rules! keyword_kind { ($kind:ident) => { - $crate::parser::lexer::TokenKind::Keyword($crate::parser::lexer::token_kind::KeywordKind::$kind) + $crate::parser::lexer::TokenKind::Keyword($crate::parser::lexer::KeywordKind::$kind) }; } -#[macro_export] macro_rules! punctuation_kind { ($kind:ident) => { - $crate::parser::lexer::TokenKind::Punctuation($crate::parser::lexer::token_kind::PunctuationKind::$kind) + $crate::parser::lexer::TokenKind::Punctuation($crate::parser::lexer::PunctuationKind::$kind) }; } -#[macro_export] macro_rules! literal_kind { - ($kind:ident) => { - $crate::parser::lexer::TokenKind::Literal($crate::parser::lexer::token_kind::LiteralKind::$kind) + ($kind:pat_param) => { + $crate::parser::lexer::TokenKind::Literal($kind) }; } -#[macro_export] macro_rules! Tok { + (else) => { + $crate::parser::lexer::token_kind::keyword_kind!(Else) + }; + (false) => { + $crate::parser::lexer::token_kind::keyword_kind!(False) + }; + (if) => { + $crate::parser::lexer::token_kind::keyword_kind!(If) + }; + (let) => { + $crate::parser::lexer::token_kind::keyword_kind!(Let) + }; + (true) => { + $crate::parser::lexer::token_kind::keyword_kind!(True) + }; + (var) => { + $crate::parser::lexer::token_kind::keyword_kind!(Var) + }; + (while) => { + $crate::parser::lexer::token_kind::keyword_kind!(While) + }; (Newline) => { - $crate::punctuation_kind!(Newline) + $crate::parser::lexer::token_kind::punctuation_kind!(Newline) }; (Backtick) => { - $crate::punctuation_kind!(Backtick) + $crate::parser::lexer::token_kind::punctuation_kind!(Backtick) }; (LeftParen) => { - $crate::punctuation_kind!(LeftParen) + $crate::parser::lexer::token_kind::punctuation_kind!(LeftParen) }; (RightParen) => { - $crate::punctuation_kind!(RightParen) + $crate::parser::lexer::token_kind::punctuation_kind!(RightParen) }; (LeftBracket) => { - $crate::punctuation_kind!(LeftBracket) + $crate::parser::lexer::token_kind::punctuation_kind!(LeftBracket) }; (RightBracket) => { - $crate::punctuation_kind!(RightBracket) + $crate::parser::lexer::token_kind::punctuation_kind!(RightBracket) }; (LeftBrace) => { - $crate::punctuation_kind!(LeftBrace) + $crate::parser::lexer::token_kind::punctuation_kind!(LeftBrace) }; (RightBrace) => { - $crate::punctuation_kind!(RightBrace) + $crate::parser::lexer::token_kind::punctuation_kind!(RightBrace) }; (,) => { - $crate::punctuation_kind!(Comma) + $crate::parser::lexer::token_kind::punctuation_kind!(Comma) }; (:) => { - $crate::punctuation_kind!(Colon) + $crate::parser::lexer::token_kind::punctuation_kind!(Colon) }; (;) => { - $crate::punctuation_kind!(Semicolon) + $crate::parser::lexer::token_kind::punctuation_kind!(Semicolon) }; (@) => { - $crate::punctuation_kind!(At) + $crate::parser::lexer::token_kind::punctuation_kind!(At) }; (?) => { - $crate::punctuation_kind!(QuestionMark) + $crate::parser::lexer::token_kind::punctuation_kind!(QuestionMark) }; (.) => { - $crate::punctuation_kind!(Dot) + $crate::parser::lexer::token_kind::punctuation_kind!(Dot) }; (=) => { - $crate::punctuation_kind!(Equals) + $crate::parser::lexer::token_kind::punctuation_kind!(Equals) }; (=>) => { - $crate::punctuation_kind!(FatArrow) + $crate::parser::lexer::token_kind::punctuation_kind!(FatArrow) }; (->) => { - $crate::punctuation_kind!(Arrow) + $crate::parser::lexer::token_kind::punctuation_kind!(Arrow) }; (&) => { - $crate::punctuation_kind!(Ampersand) + $crate::parser::lexer::token_kind::punctuation_kind!(Ampersand) }; (&w) => { - $crate::punctuation_kind!(AmpersandW) + $crate::parser::lexer::token_kind::punctuation_kind!(AmpersandW) }; (#) => { - $crate::punctuation_kind!(NumberSign) + $crate::parser::lexer::token_kind::punctuation_kind!(NumberSign) + }; + (Int($pat:pat_param, $radix:pat_param)) => { + $crate::parser::lexer::token_kind::literal_kind!($crate::parser::lexer::LiteralKind::Int($pat, $radix)) + }; + (Int($pat:pat_param)) => { + $crate::parser::lexer::token_kind::literal_kind!($crate::parser::lexer::LiteralKind::Int($pat, _)) + }; + (BigInt($pat:pat_param, $radix:pat_param)) => { + $crate::parser::lexer::token_kind::literal_kind!($crate::parser::lexer::LiteralKind::BigInt($pat, $radix)) + }; + (BigInt($pat:pat_param)) => { + $crate::parser::lexer::token_kind::literal_kind!($crate::parser::lexer::LiteralKind::BigInt($pat, _)) + }; + (Float($pat:pat_param)) => { + $crate::parser::lexer::token_kind::literal_kind!($crate::parser::lexer::LiteralKind::Float($pat)) + }; + (Char($pat:pat_param)) => { + $crate::parser::lexer::token_kind::literal_kind!($crate::parser::lexer::LiteralKind::Char($pat)) + }; + (String($pat:pat_param)) => { + $crate::parser::lexer::token_kind::literal_kind!($crate::parser::lexer::LiteralKind::String($pat)) + }; + (Interpolation($pat:pat_param)) => { + $crate::parser::lexer::token_kind::literal_kind!($crate::parser::lexer::LiteralKind::StringInterpolation($pat)) }; (Ident) => { $crate::parser::lexer::TokenKind::Identifier }; - (Op) => { - $crate::parser::lexer::TokenKind::Operator + (PrefixOp) => { + $crate::parser::lexer::TokenKind::PrefixOperator + }; + (PostfixOp) => { + $crate::parser::lexer::TokenKind::PostfixOperator + }; + (BinOp) => { + $crate::parser::lexer::TokenKind::BinaryOperator }; (Unknown) => { $crate::parser::lexer::TokenKind::Unknown }; } + +pub(crate) use keyword_kind; +pub(crate) use literal_kind; +pub(crate) use punctuation_kind; +pub(crate) use Tok; diff --git a/frontend/src/parser/mod.rs b/frontend/src/parser/mod.rs index b8ec4ef..94b56c8 100644 --- a/frontend/src/parser/mod.rs +++ b/frontend/src/parser/mod.rs @@ -1,3 +1,209 @@ pub mod lexer; +use chumsky::{ + error::Error as ChumskyError, + extra::Err as ExtraErr, + input::{BoxedStream, Input as _, SpannedInput, Stream}, + primitive::{choice, just}, + recovery::{nested_delimiters, via_parser}, + recursive::recursive, + select, + util::MaybeRef, + Parser as ChumskyParser, +}; +use derive_where::derive_where; +use juice_core::parser_ext::{IterParserExt as _, ParserExt as _}; + pub use self::lexer::Lexer; +use self::lexer::{literal::InterpolationPart, Tok, TokenKind}; +use crate::{ + ast::expr::{BorrowExpr, Expr, ExprKind, IntLiteralExpr, InterpolationExprPart, LiteralExpr, UnaryOperatorExpr}, + source_loc::SourceRange, + source_manager::SourceManager, +}; + +type LexerStream<'src, 'lex, M> = BoxedStream<'lex, (TokenKind<'src, M>, SourceRange<'src, M>)>; + +type ParserInput<'src, 'lex, M> = SpannedInput, SourceRange<'src, M>, LexerStream<'src, 'lex, M>>; + +#[derive_where(Debug, Clone)] +pub struct Error<'src, M: 'src + SourceManager> { + source_range: SourceRange<'src, M>, + expected: Vec>>, + found: Option>, +} + +impl<'src, 'lex, M: 'src + SourceManager> ChumskyError<'lex, ParserInput<'src, 'lex, M>> for Error<'src, M> +where + 'src: 'lex, +{ + fn expected_found>>>>( + expected: Iter, + found: Option>>, + span: SourceRange<'src, M>, + ) -> Self { + Self { + source_range: span, + expected: expected.into_iter().map(|e| e.as_deref().cloned()).collect(), + found: found.as_deref().cloned(), + } + } + + fn merge(mut self, mut other: Self) -> Self { + self.expected.append(&mut other.expected); + self + } +} + +pub trait Parser<'src, 'lex, M: 'src + SourceManager, O>: + ChumskyParser<'lex, ParserInput<'src, 'lex, M>, O, ExtraErr>> +where + 'src: 'lex, +{ +} + +impl<'src, 'lex, M: 'src + SourceManager, O, P> Parser<'src, 'lex, M, O> for P +where + P: ChumskyParser<'lex, ParserInput<'src, 'lex, M>, O, ExtraErr>>, + 'src: 'lex, +{ +} + +fn ignore_newlines<'src, 'lex, M: 'src + SourceManager>() -> impl Parser<'src, 'lex, M, ()> + Clone +where + 'src: 'lex, +{ + just(Tok![Newline]).repeated().ignored() +} + +pub fn expr_parser<'src, 'lex, M: 'src + SourceManager>() -> impl Parser<'src, 'lex, M, Expr<'src, M>> + Clone +where + 'src: 'lex, +{ + recursive(|expr| { + let cloned_expr = expr.clone(); + + let literal = select! { + Tok![false] => LiteralExpr::Bool(false), + Tok![true] => LiteralExpr::Bool(true), + Tok![Int(v)] => LiteralExpr::Int(IntLiteralExpr::Int(v)), + Tok![BigInt(v)] => LiteralExpr::Int(IntLiteralExpr::BigInt(v)), + Tok![Float(v)] => LiteralExpr::Float(v), + Tok![Char(c)] => LiteralExpr::Char(c), + Tok![String(s)] => LiteralExpr::String(s), + } + .or(select! { + Tok![Interpolation(parts)] => parts, + } + .validate(move |parts, e, emitter| { + let mut expr_parts = Vec::new(); + + let span: SourceRange = e.span(); + + for part in parts.iter().cloned() { + match part { + InterpolationPart::String(s) => { + expr_parts.push(InterpolationExprPart::String(s.clone())); + } + InterpolationPart::Interpolation(tokens) => { + let start_loc = tokens + .first() + .map(|t| t.source_range.start_loc()) + .unwrap_or(span.start_loc()); + + let eoi_range = if let Some(t) = tokens.last() { + let end = t.source_range.end; + t.source_range.source.get_range(end, end) + } else { + let end = span.end; + span.source.get_range(end, end) + }; + + let inner_span = start_loc.source.get_range(start_loc.offset, eoi_range.end); + + let input = tokens + .iter() + .cloned() + .map(|t| (t.kind, t.source_range)) + .collect::>(); + + let parser_input = Stream::from_iter(input).boxed().spanned(eoi_range); + + let (expr, errors) = cloned_expr.parse(parser_input).into_output_errors(); + + for error in errors { + emitter.emit(error); + } + + let expr = expr.unwrap_or_else(|| Expr::new(ExprKind::Error, inner_span)); + + expr_parts.push(InterpolationExprPart::Interpolation(expr)); + } + } + } + + LiteralExpr::StringInterpolation(expr_parts.into()) + })) + .map(ExprKind::Literal); + + let primary_expr = literal + .or(just(Tok![Ident]).to_span().map(ExprKind::Identifier)) + .or(expr + .delimited_by( + just(Tok![LeftParen]).then(ignore_newlines()), + ignore_newlines().then(just(Tok![RightParen])), + ) + .map(Box::new) + .map(ExprKind::Grouping)) + .map_with_span(Expr::new) + .recover_with(via_parser(nested_delimiters( + Tok![LeftParen], + Tok![RightParen], + [ + (Tok![LeftBracket], Tok![RightBracket]), + (Tok![LeftBrace], Tok![RightBrace]), + ], + |span| Expr::new(ExprKind::Error, span), + ))); + + let postfix_expr = primary_expr + .then(just(Tok![PostfixOp]).to_span().or_not()) + .map(|(expr, op_range)| { + if let Some(op_range) = op_range { + ExprKind::UnaryOperator(UnaryOperatorExpr::new(expr, op_range, false)) + } else { + expr.kind + } + }) + .map_with_span(Expr::new); + + let prefix_operator = choice(( + just(Tok![&]).to((|e, _| ExprKind::Borrow(BorrowExpr::new(e, false))) as fn(_, _) -> _), + just(Tok![&w]).to((|e, _| ExprKind::Borrow(BorrowExpr::new(e, true))) as fn(_, _) -> _), + just(Tok![PrefixOp]) + .to((|e, span| ExprKind::UnaryOperator(UnaryOperatorExpr::new(e, span, true))) as fn(_, _) -> _), + )); + + let prefix_expr = prefix_operator + .with_span() + .repeated() + .foldr_with_span(postfix_expr, |(op_f, op_span), e, span| { + Expr::new(op_f(e, op_span), span) + }); + + let binary_expr = prefix_expr + .clone() + .foldl_with_span( + just(Tok![BinOp]) + .to_span() + .padded_by(ignore_newlines()) + .then(prefix_expr) + .repeated(), + Expr::with_binary_operator, + ) + .boxed(); // needed if we want compilation to finish in a reasonable amount of time + + binary_expr + }) + .then_ignore(just(Tok![Newline]).repeated()) +} diff --git a/frontend/src/runner.rs b/frontend/src/runner.rs index fa40271..9dbc584 100644 --- a/frontend/src/runner.rs +++ b/frontend/src/runner.rs @@ -4,9 +4,14 @@ use std::{ path::PathBuf, }; +use chumsky::{ + input::{Input as _, Stream}, + Parser as _, +}; + use crate::{ diag::DiagnosticEngine, - parser::Lexer, + parser::{expr_parser, Lexer}, source_manager::{DefaultSourceManager, SourceManager}, Result, }; @@ -65,26 +70,30 @@ impl Runner { } } - fn run_impl(self) -> Result { + fn run_impl(mut self) -> Result { let source_manager = DefaultSourceManager::new(self.args.input_filepath)?; let diagnostics = DiagnosticEngine::new(&source_manager); - let mut lexer = Lexer::new(source_manager.get_main_source()); + let source = source_manager.get_main_source(); + + let mut lexer = Lexer::new(source); + + let parser_input = Stream::from_iter((&mut lexer).map(|t| (t.kind, t.source_range))) + .boxed() + .spanned(source.get_eof_range()); - let tokens = (&mut lexer).collect::>(); + let (ast, errors) = expr_parser().parse(parser_input).into_output_errors(); lexer.diagnose_errors(&diagnostics)?; - check_error!(diagnostics); - for token in tokens { - println!( - "{:?} {:?} {} {}", - token.kind, - token.source_range.get_str(), - token.has_leading_whitespace, - token.has_trailing_whitespace - ); + println!("{:?}", errors); + + if let Some(ast) = ast { + if self.args.action == Action::DumpParse { + writeln!(self.args.output_stream, "{}", ast)?; + return Ok(true); + } } check_error!(diagnostics); diff --git a/frontend/src/source_loc.rs b/frontend/src/source_loc.rs index d214155..67766f2 100644 --- a/frontend/src/source_loc.rs +++ b/frontend/src/source_loc.rs @@ -1,20 +1,20 @@ use std::{ cmp::Ordering, - fmt::{Display, Formatter, Result as FmtResult}, - ops::{Add, Sub}, + fmt::{Debug, Display, Formatter, Result as FmtResult}, + ops::{Add, Range, Sub}, }; use derive_where::derive_where; use crate::source_manager::{AriadneSourceManager, Source, SourceManager}; -#[derive_where(Debug, Clone, Copy, PartialEq, Eq)] -pub struct SourceLoc<'src, M: SourceManager> { +#[derive_where(Clone, Copy, PartialEq, Eq)] +pub struct SourceLoc<'src, M: 'src + SourceManager> { pub source: Source<'src, M>, pub offset: usize, } -impl<'src, M: SourceManager> SourceLoc<'src, M> { +impl<'src, M: 'src + SourceManager> SourceLoc<'src, M> { pub fn new(source: Source<'src, M>, offset: usize) -> Self { Self { source, offset } } @@ -56,6 +56,15 @@ impl Sub for SourceLoc<'_, M> { } } +impl Debug for SourceLoc<'_, M> { + fn fmt(&self, f: &mut Formatter) -> FmtResult { + f.debug_struct("SourceLoc") + .field("source", &format!("{}", self.source)) + .field("offset", &self.offset) + .finish() + } +} + impl Display for SourceLoc<'_, M> { fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult { if let Some((line, column)) = self.get_line_and_column() { @@ -66,14 +75,14 @@ impl Display for SourceLoc<'_, M> { } } -#[derive_where(Debug, PartialEq, Eq, Clone, Copy)] -pub struct SourceRange<'src, M: SourceManager> { +#[derive_where(PartialEq, Eq, Clone, Copy)] +pub struct SourceRange<'src, M: 'src + SourceManager> { pub source: Source<'src, M>, pub start: usize, pub end: usize, } -impl<'src, M: SourceManager> SourceRange<'src, M> { +impl<'src, M: 'src + SourceManager> SourceRange<'src, M> { pub fn new(source: Source<'src, M>, start: usize, end: usize) -> Self { Self { source, start, end } } @@ -99,7 +108,7 @@ impl<'src, M: SourceManager> SourceRange<'src, M> { } } -impl<'src, M: SourceManager> ariadne::Span for SourceRange<'src, M> { +impl<'src, M: 'src + SourceManager> ariadne::Span for SourceRange<'src, M> { type SourceId = Source<'src, M>; fn source(&self) -> &Source<'src, M> { @@ -115,6 +124,37 @@ impl<'src, M: SourceManager> ariadne::Span for SourceRange<'src, M> { } } +impl<'src, M: 'src + SourceManager> chumsky::span::Span for SourceRange<'src, M> { + type Context = Source<'src, M>; + type Offset = usize; + + fn new(context: Source<'src, M>, range: Range) -> Self { + Self::new(context, range.start, range.end) + } + + fn context(&self) -> Source<'src, M> { + self.source + } + + fn start(&self) -> usize { + self.start + } + + fn end(&self) -> usize { + self.end + } +} + +impl Debug for SourceRange<'_, M> { + fn fmt(&self, f: &mut Formatter) -> FmtResult { + f.debug_struct("SourceRange") + .field("source", &format!("{}", self.source)) + .field("start", &self.start) + .field("end", &self.end) + .finish() + } +} + impl Display for SourceRange<'_, M> { fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult { match ( diff --git a/frontend/src/source_manager.rs b/frontend/src/source_manager.rs index b39e662..915836c 100644 --- a/frontend/src/source_manager.rs +++ b/frontend/src/source_manager.rs @@ -14,7 +14,7 @@ use crate::{ Result, }; -pub trait SourceManager: private::SourceManager + Debug + Sized { +pub trait SourceManager: private::SourceManager + Debug + Send + Sync + Sized { type Index; type Output; @@ -29,7 +29,7 @@ mod private { }; pub trait SourceManager { - type Key: Debug + Clone + Copy + Eq + Hash; + type Key: Debug + Clone + Copy + Eq + Hash + Send + Sync; type Storage: AsRef; fn get_storage(&self, key: Self::Key) -> &Self::Storage; @@ -144,11 +144,11 @@ impl private::AriadneSourceManager for DefaultSourceManager { impl AriadneSourceManager for DefaultSourceManager {} -pub struct SourceCache<'src, M> { +pub struct SourceCache<'src, M: 'src> { source_manager: &'src M, } -impl<'src, M: AriadneSourceManager> ariadne::Cache> for SourceCache<'src, M> { +impl<'src, M: 'src + AriadneSourceManager> ariadne::Cache> for SourceCache<'src, M> { type Storage = M::Storage; fn fetch(&mut self, id: &Source) -> Result<&ariadne::Source, Box> { @@ -161,12 +161,12 @@ impl<'src, M: AriadneSourceManager> ariadne::Cache> for SourceCa } #[derive_where(Debug, Clone, Copy)] -pub struct Source<'src, M: SourceManager> { +pub struct Source<'src, M: 'src + SourceManager> { key: M::Key, source_manager: &'src M, } -impl<'src, M: SourceManager> Source<'src, M> { +impl<'src, M: 'src + SourceManager> Source<'src, M> { pub fn get_contents(&self) -> &'src str { self.source_manager.get_storage(self.key).as_ref() } @@ -178,15 +178,21 @@ impl<'src, M: SourceManager> Source<'src, M> { pub fn get_range(&self, start: usize, end: usize) -> SourceRange<'src, M> { SourceRange::new(*self, start, end) } + + pub fn get_eof_range(&self) -> SourceRange<'src, M> { + let contents = self.get_contents(); + let end = contents.len(); + SourceRange::new(*self, end, end) + } } -impl<'src, M: SourceManager>> Source<'src, M> { +impl<'src, M: 'src + SourceManager>> Source<'src, M> { pub fn get_contents_owned(&self) -> Arc { self.source_manager.get_storage(self.key).clone() } } -impl<'src, M: AriadneSourceManager> Source<'src, M> { +impl<'src, M: 'src + AriadneSourceManager> Source<'src, M> { pub fn get_line_and_column(&self, offset: usize) -> Option<(usize, usize)> { self.source_manager .get_ariadne_source(self.key)