Skip to content

Commit

Permalink
Add feature: non-ASCII identifiers
Browse files Browse the repository at this point in the history
The feature `non_ascii_idents` gives Rune feature parity with Rust in
supporting Unicode identifiers. Refer to:
rust-lang/rfcs#2457
  • Loading branch information
lotem committed Aug 21, 2024
1 parent dac879c commit 6a218c8
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 2 deletions.
2 changes: 2 additions & 0 deletions crates/rune/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ disable-io = ["alloc"]
fmt = ["alloc", "syntree"]
std = ["alloc", "num/std", "serde/std", "rune-core/std", "rune-alloc/std", "musli/std", "musli/std", "once_cell/std", "anyhow/std"]
alloc = ["anyhow", "rune-alloc/alloc", "rune-core/alloc", "once_cell/alloc", "serde/alloc"]
non_ascii_idents = ["dep:unicode-ident"]

[dependencies]
rune-macros = { version = "=0.14.0", path = "../rune-macros" }
Expand Down Expand Up @@ -76,6 +77,7 @@ sha2 = { version = "0.10.6", optional = true }
base64 = { version = "0.21.0", optional = true }
rand = { version = "0.8.5", optional = true }
memchr = "2.7.4"
unicode-ident = { version = "1.0.12", optional = true }

[dev-dependencies]
tokio = { version = "1.28.1", features = ["full"] }
Expand Down
22 changes: 20 additions & 2 deletions crates/rune/src/parse/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,24 @@ use crate::ast::Span;
use crate::compile::{self, ErrorKind};
use crate::SourceId;

#[cfg(feature = "non_ascii_idents")]
use unicode_ident::{is_xid_continue as is_ident_continue, is_xid_start};

#[cfg(feature = "non_ascii_idents")]
fn is_ident_start(c: char) -> bool {
c == '_' || is_xid_start(c)
}

#[cfg(not(feature = "non_ascii_idents"))]
fn is_ident_start(c: char) -> bool {
matches!(c, '_' | 'a'..='z' | 'A'..='Z')
}

#[cfg(not(feature = "non_ascii_idents"))]
fn is_ident_continue(c: char) -> bool {
matches!(c, 'a'..='z' | 'A'..='Z' | '_' | '0'..='9')
}

/// Lexer for the rune language.
#[derive(Debug)]
pub struct Lexer<'a> {
Expand Down Expand Up @@ -159,7 +177,7 @@ impl<'a> Lexer<'a> {

fn next_ident(&mut self, start: usize) -> compile::Result<Option<ast::Token>> {
while let Some(c) = self.iter.peek() {
if !matches!(c, 'a'..='z' | 'A'..='Z' | '_' | '0'..='9') {
if !is_ident_continue(c) {
break;
}

Expand Down Expand Up @@ -842,7 +860,7 @@ impl<'a> Lexer<'a> {
'@' => ast::Kind::At,
'$' => ast::Kind::Dollar,
'~' => ast::Kind::Tilde,
'_' | 'a'..='z' | 'A'..='Z' => {
c if is_ident_start(c) => {
return self.next_ident(start);
}
'0'..='9' => {
Expand Down
27 changes: 27 additions & 0 deletions scripts/non_ascii_idents.rn
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
// Test non-ASCII identifiers (feature `non_ascii_idents`).

// Creating a variable.
let 另一個世界 = "三體世界";

// Reference to a variable.
let 世界 = 另一個世界;

// In template interpolation.
let 高論 = `你好,${世界}。`;

// In string formatting.
println!("我對{另一個世界}說話:「{}」", 高論);

// Compatibility check for alphanumeric characters and underscore.
let _ = ();
let aB_1 = ();
let Ab_2 = ();
let __甲_乙_丙_丁__ = ();

// Naming functions and function arguments.
fn 口號(蟲子, 主) {
`消除${蟲子}暴政,世界屬於${主}!`
}

// Function call.
println!("我們的口號是:「{}」", 口號("人類", "三體"));

0 comments on commit 6a218c8

Please sign in to comment.