PLC-lang · ghaith · Oct 12, 2021 · Oct 5, 2021 · Oct 5, 2021 · Oct 7, 2021
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -23,6 +23,7 @@ encoding_rs_io = "0.1"
 codespan-reporting = "0.11.1"
 mun_lld = "110.0.0"
 generational-arena = "0.2.8"
+regex = "1"
 
 [lib]
 name = "rusty"

diff --git a/src/codegen/tests/code_gen_tests.rs b/src/codegen/tests/code_gen_tests.rs
@@ -847,6 +847,50 @@ entry:
     assert_eq!(result, expected);
 }
 
+#[test]
+fn program_with_special_chars_in_string() {
+    let result = codegen!(
+        r#"PROGRAM prg
+VAR
+should_replace_s : STRING;
+should_not_replace_s : STRING;
+
+should_replace_ws : WSTRING;
+should_not_replace_ws : WSTRING;
+END_VAR
+should_replace_s := 'a$l$L b$n$N c$p$P d$r$R e$t$T $$ $'single$' $57💖$F0$9F$92$96';
+should_not_replace_s := '$0043 $"no replace$"';
+
+should_replace_ws := "a$l$L b$n$N c$p$P d$r$R e$t$T $$ $"double$" $0057💖$D83D$DC96";
+should_not_replace_ws := "$43 $'no replace$'";
+END_PROGRAM
+"#
+    );
+
+    let expected = r#"; ModuleID = 'main'
+source_filename = "main"
+
+%prg_interface = type { [81 x i8], [81 x i8], [162 x i8], [162 x i8] }
+
+@prg_instance = global %prg_interface zeroinitializer
+
+define void @prg(%prg_interface* %0) {
+entry:
+  %should_replace_s = getelementptr inbounds %prg_interface, %prg_interface* %0, i32 0, i32 0
+  %should_not_replace_s = getelementptr inbounds %prg_interface, %prg_interface* %0, i32 0, i32 1
+  %should_replace_ws = getelementptr inbounds %prg_interface, %prg_interface* %0, i32 0, i32 2
+  %should_not_replace_ws = getelementptr inbounds %prg_interface, %prg_interface* %0, i32 0, i32 3
+  store [41 x i8] c"a\0A\0A b\0A\0A c\0C\0C d\0D\0D e\09\09 $ 'single' W\F0\9F\92\96\F0\9F\92\96\00", [81 x i8]* %should_replace_s, align 1
+  store [19 x i8] c"\0043 $\22no replace$\22\00", [81 x i8]* %should_not_replace_s, align 1
+  store [74 x i8] c"a\00\0A\00\0A\00 \00b\00\0A\00\0A\00 \00c\00\0C\00\0C\00 \00d\00\0D\00\0D\00 \00e\00\09\00\09\00 \00$\00 \00\22\00d\00o\00u\00b\00l\00e\00\22\00 \00W\00=\D8\96\DC=\D8\96\DC\00\00", [162 x i8]* %should_replace_ws, align 1
+  store [38 x i8] c"$\004\003\00 \00$\00'\00n\00o\00 \00r\00e\00p\00l\00a\00c\00e\00$\00'\00\00\00", [162 x i8]* %should_not_replace_ws, align 1
+  ret void
+}
+"#;
+
+    assert_eq!(result, expected);
+}
+
 #[test]
 fn different_case_references() {
     let result = codegen!(

diff --git a/src/parser/expressions_parser.rs b/src/parser/expressions_parser.rs
@@ -7,6 +7,7 @@ use crate::{
     parser::parse_any_in_region,
     Diagnostic,
 };
+use regex::{Captures, Regex};
 use std::str::FromStr;
 
 macro_rules! parse_left_associative_expression {
@@ -779,14 +780,65 @@ fn trim_quotes(quoted_string: &str) -> String {
     quoted_string[1..quoted_string.len() - 1].to_string()
 }
 
+fn handle_special_chars(string: &str, is_wide: bool) -> String {
+    let (re, re_hex) = if is_wide {
+        (
+            Regex::new(r#"(\$([lLnNpPrRtT$"]))"#).unwrap(), //Cannot fail
+            Regex::new(r"(\$([[:xdigit:]]{2}){2})+").unwrap(), //Cannot fail
+        )
+    } else {
+        (
+            Regex::new(r"(\$([lLnNpPrRtT$']))").unwrap(), //Cannot fail
+            Regex::new(r"(\$([[:xdigit:]]{2}))+").unwrap(), //Cannot fail
+        )
+    };
+
+    // separated re and re_hex to minimize copying
+    let res = re.replace_all(string, |caps: &Captures| {
+        let cap_str = &caps[1];
+        match cap_str {
+            "$l" | "$L" => "\n",
+            "$n" | "$N" => "\n",
+            "$p" | "$P" => "\x0C",
+            "$r" | "$R" => "\r",
+            "$t" | "$T" => "\t",
+            "$$" => "$",
+            "$'" => "\'",
+            "$\"" => "\"",
+            _ => unreachable!(),
+        }
+    });
+
+    re_hex
+        .replace_all(&res, |caps: &Captures| {
+            let hex = &caps[0];
+            let hex_vals: Vec<&str> = hex.split('$').filter(|it| !it.is_empty()).collect();
+            let res = if is_wide {
+                let hex_vals: Vec<u16> = hex_vals
+                    .iter()
+                    .map(|it| u16::from_str_radix(*it, 16).unwrap_or_default())
+                    .collect();
+                String::from_utf16_lossy(&hex_vals)
+            } else {
+                let hex_vals: Vec<u8> = hex_vals
+                    .iter()
+                    .map(|it| u8::from_str_radix(*it, 16).unwrap_or_default())
+                    .collect();
+                String::from_utf8_lossy(&hex_vals).to_string()
+            };
+            res
+        })
+        .into()
+}
+
 fn parse_literal_string(
     lexer: &mut ParseSession,
     is_wide: bool,
 ) -> Result<AstStatement, Diagnostic> {
     let result = lexer.slice();
     let location = lexer.location();
     let string_literal = Ok(AstStatement::LiteralString {
-        value: trim_quotes(result),
+        value: handle_special_chars(&trim_quotes(result), is_wide),
         is_wide,
         location,
         id: lexer.next_id(),
@@ -825,3 +877,34 @@ fn parse_literal_real(
         ))
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use crate::parser::expressions_parser::handle_special_chars;
+
+    #[test]
+    fn replace_all_test() {
+        // following special chars should be replaced
+        let string = "a $l$L b $n$N test $p$P c $r$R d $t$T$$ $'quote$' $57 💖 $F0$9F$92$96";
+        let expected = "a \n\n b \n\n test \x0C\x0C c \r\r d \t\t$ 'quote' W 💖 💖";
+
+        let w_string = r#"a $l$L b $n$N test $p$P c $r$R d $t$T$$ $"double$" $0077 💖 $D83D$DC96"#;
+        let w_expected = "a \n\n b \n\n test \x0C\x0C c \r\r d \t\t$ \"double\" w 💖 💖";
+
+        assert_eq!(handle_special_chars(w_string, true), w_expected);
+        assert_eq!(handle_special_chars(string, false), expected);
+    }
+
+    #[test]
+    fn should_not_replace_test() {
+        // following special chars should not be replaced
+        let string = r#"$0043 $"no replace$""#;
+        let expected = "\u{0}43 $\"no replace$\"";
+
+        let w_string = r#"$57 $'no replace$'"#;
+        let w_expected = "$57 $'no replace$'";
+
+        assert_eq!(handle_special_chars(w_string, true), w_expected);
+        assert_eq!(handle_special_chars(string, false), expected);
+    }
+}