Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

added support for special chars (#309) #321

Merged
merged 3 commits into from
Oct 12, 2021
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -23,6 +23,7 @@ encoding_rs_io = "0.1"
codespan-reporting = "0.11.1"
mun_lld = "110.0.0"
generational-arena = "0.2.8"
regex = "1"

[lib]
name = "rusty"
44 changes: 44 additions & 0 deletions src/codegen/tests/code_gen_tests.rs
Original file line number Diff line number Diff line change
@@ -847,6 +847,50 @@ entry:
assert_eq!(result, expected);
}

#[test]
fn program_with_special_chars_in_string() {
let result = codegen!(
r#"PROGRAM prg
VAR
should_replace_s : STRING;
should_not_replace_s : STRING;

should_replace_ws : WSTRING;
should_not_replace_ws : WSTRING;
END_VAR
should_replace_s := 'a$l$L b$n$N c$p$P d$r$R e$t$T $$ $'single$' $57💖$F0$9F$92$96';
should_not_replace_s := '$0043 $"no replace$"';

should_replace_ws := "a$l$L b$n$N c$p$P d$r$R e$t$T $$ $"double$" $0057💖$D83D$DC96";
should_not_replace_ws := "$43 $'no replace$'";
END_PROGRAM
"#
);

let expected = r#"; ModuleID = 'main'
source_filename = "main"

%prg_interface = type { [81 x i8], [81 x i8], [162 x i8], [162 x i8] }

@prg_instance = global %prg_interface zeroinitializer

define void @prg(%prg_interface* %0) {
entry:
%should_replace_s = getelementptr inbounds %prg_interface, %prg_interface* %0, i32 0, i32 0
%should_not_replace_s = getelementptr inbounds %prg_interface, %prg_interface* %0, i32 0, i32 1
%should_replace_ws = getelementptr inbounds %prg_interface, %prg_interface* %0, i32 0, i32 2
%should_not_replace_ws = getelementptr inbounds %prg_interface, %prg_interface* %0, i32 0, i32 3
store [41 x i8] c"a\0A\0A b\0A\0A c\0C\0C d\0D\0D e\09\09 $ 'single' W\F0\9F\92\96\F0\9F\92\96\00", [81 x i8]* %should_replace_s, align 1
store [19 x i8] c"\0043 $\22no replace$\22\00", [81 x i8]* %should_not_replace_s, align 1
store [74 x i8] c"a\00\0A\00\0A\00 \00b\00\0A\00\0A\00 \00c\00\0C\00\0C\00 \00d\00\0D\00\0D\00 \00e\00\09\00\09\00 \00$\00 \00\22\00d\00o\00u\00b\00l\00e\00\22\00 \00W\00=\D8\96\DC=\D8\96\DC\00\00", [162 x i8]* %should_replace_ws, align 1
store [38 x i8] c"$\004\003\00 \00$\00'\00n\00o\00 \00r\00e\00p\00l\00a\00c\00e\00$\00'\00\00\00", [162 x i8]* %should_not_replace_ws, align 1
ret void
}
"#;

assert_eq!(result, expected);
}

#[test]
fn different_case_references() {
let result = codegen!(
85 changes: 84 additions & 1 deletion src/parser/expressions_parser.rs
Original file line number Diff line number Diff line change
@@ -7,6 +7,7 @@ use crate::{
parser::parse_any_in_region,
Diagnostic,
};
use regex::{Captures, Regex};
use std::str::FromStr;

macro_rules! parse_left_associative_expression {
@@ -779,14 +780,65 @@ fn trim_quotes(quoted_string: &str) -> String {
quoted_string[1..quoted_string.len() - 1].to_string()
}

fn handle_special_chars(string: &str, is_wide: bool) -> String {
let (re, re_hex) = if is_wide {
(
Regex::new(r#"(\$([lLnNpPrRtT$"]))"#).unwrap(), //Cannot fail
Regex::new(r"(\$([[:xdigit:]]{2}){2})+").unwrap(), //Cannot fail
)
} else {
(
Regex::new(r"(\$([lLnNpPrRtT$']))").unwrap(), //Cannot fail
Regex::new(r"(\$([[:xdigit:]]{2}))+").unwrap(), //Cannot fail
)
};

// separated re and re_hex to minimize copying
let res = re.replace_all(string, |caps: &Captures| {
let cap_str = &caps[1];
match cap_str {
"$l" | "$L" => "\n",
"$n" | "$N" => "\n",
"$p" | "$P" => "\x0C",
"$r" | "$R" => "\r",
"$t" | "$T" => "\t",
"$$" => "$",
"$'" => "\'",
"$\"" => "\"",
_ => unreachable!(),
}
});

re_hex
.replace_all(&res, |caps: &Captures| {
let hex = &caps[0];
let hex_vals: Vec<&str> = hex.split('$').filter(|it| !it.is_empty()).collect();
let res = if is_wide {
let hex_vals: Vec<u16> = hex_vals
.iter()
.map(|it| u16::from_str_radix(*it, 16).unwrap_or_default())
.collect();
String::from_utf16_lossy(&hex_vals)
} else {
let hex_vals: Vec<u8> = hex_vals
.iter()
.map(|it| u8::from_str_radix(*it, 16).unwrap_or_default())
.collect();
String::from_utf8_lossy(&hex_vals).to_string()
};
res
})
.into()
}

fn parse_literal_string(
lexer: &mut ParseSession,
is_wide: bool,
) -> Result<AstStatement, Diagnostic> {
let result = lexer.slice();
let location = lexer.location();
let string_literal = Ok(AstStatement::LiteralString {
value: trim_quotes(result),
value: handle_special_chars(&trim_quotes(result), is_wide),
is_wide,
location,
id: lexer.next_id(),
@@ -825,3 +877,34 @@ fn parse_literal_real(
))
}
}

#[cfg(test)]
mod tests {
use crate::parser::expressions_parser::handle_special_chars;

#[test]
fn replace_all_test() {
// following special chars should be replaced
let string = "a $l$L b $n$N test $p$P c $r$R d $t$T$$ $'quote$' $57 💖 $F0$9F$92$96";
let expected = "a \n\n b \n\n test \x0C\x0C c \r\r d \t\t$ 'quote' W 💖 💖";

let w_string = r#"a $l$L b $n$N test $p$P c $r$R d $t$T$$ $"double$" $0077 💖 $D83D$DC96"#;
let w_expected = "a \n\n b \n\n test \x0C\x0C c \r\r d \t\t$ \"double\" w 💖 💖";

assert_eq!(handle_special_chars(w_string, true), w_expected);
assert_eq!(handle_special_chars(string, false), expected);
}

#[test]
fn should_not_replace_test() {
// following special chars should not be replaced
let string = r#"$0043 $"no replace$""#;
let expected = "\u{0}43 $\"no replace$\"";

let w_string = r#"$57 $'no replace$'"#;
let w_expected = "$57 $'no replace$'";

assert_eq!(handle_special_chars(w_string, true), w_expected);
assert_eq!(handle_special_chars(string, false), expected);
}
}