From 16a7e28007c0e4bb15f03c72a87b99da9cfded11 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Thu, 7 Dec 2023 09:53:08 +1100 Subject: [PATCH] Detect `NulInCStr` error earlier. By making it an `EscapeError` instead of a `LitError`. This makes it like the other errors produced when checking string literals contents, e.g. for invalid escape sequences or bare CR chars. NOTE: this means these errors are issued earlier, before expansion, which changes behaviour. It will be possible to move the check back to the later point if desired. If that happens, it's likely that all the string literal contents checks will be delayed together. One nice thing about this: the old approach had some code in `report_lit_error` to calculate the span of the nul char from a range. This code used a hardwired `+2` to account for the `c"` at the start of a C string literal, but this should have changed to a `+3` for raw C string literals to account for the `cr"`, which meant that the caret in `cr"` nul error messages was one short of where it should have been. The new approach doesn't need any of this and avoids the off-by-one error. --- compiler/rustc_ast/src/util/literal.rs | 12 ++---------- compiler/rustc_lexer/src/unescape.rs | 17 +++++++++++++++-- compiler/rustc_parse/messages.ftl | 2 ++ compiler/rustc_parse/src/errors.rs | 5 +++++ .../src/lexer/unescape_error_reporting.rs | 3 +++ compiler/rustc_session/messages.ftl | 2 -- compiler/rustc_session/src/errors.rs | 15 +-------------- .../crates/parser/src/lexed_str.rs | 1 + .../crates/syntax/src/validation.rs | 3 +++ .../rfcs/rfc-3348-c-string-literals/no-nuls.rs | Bin 738 -> 1906 bytes .../rfc-3348-c-string-literals/no-nuls.stderr | Bin 674 -> 2028 bytes 11 files changed, 32 insertions(+), 28 deletions(-) diff --git a/compiler/rustc_ast/src/util/literal.rs b/compiler/rustc_ast/src/util/literal.rs index 92b9adf1db751..fbae496458813 100644 --- a/compiler/rustc_ast/src/util/literal.rs +++ b/compiler/rustc_ast/src/util/literal.rs @@ -8,7 +8,6 @@ use rustc_lexer::unescape::{ }; use rustc_span::symbol::{kw, sym, Symbol}; use rustc_span::Span; -use std::ops::Range; use std::{ascii, fmt, str}; // Escapes a string, represented as a symbol. Reuses the original symbol, @@ -39,7 +38,6 @@ pub enum LitError { InvalidFloatSuffix, NonDecimalFloat(u32), IntTooLarge(u32), - NulInCStr(Range), } impl LitKind { @@ -156,10 +154,7 @@ impl LitKind { let s = symbol.as_str(); let mut buf = Vec::with_capacity(s.len()); let mut error = Ok(()); - unescape_c_string(s, Mode::CStr, &mut |span, c| match c { - Ok(CStrUnit::Byte(0) | CStrUnit::Char('\0')) => { - error = Err(LitError::NulInCStr(span)); - } + unescape_c_string(s, Mode::CStr, &mut |_span, c| match c { Ok(CStrUnit::Byte(b)) => buf.push(b), Ok(CStrUnit::Char(c)) => { buf.extend_from_slice(c.encode_utf8(&mut [0; 4]).as_bytes()) @@ -179,10 +174,7 @@ impl LitKind { // can convert the symbol directly to a `Lrc` on success. let s = symbol.as_str(); let mut error = Ok(()); - unescape_c_string(s, Mode::RawCStr, &mut |span, c| match c { - Ok(CStrUnit::Byte(0) | CStrUnit::Char('\0')) => { - error = Err(LitError::NulInCStr(span)); - } + unescape_c_string(s, Mode::RawCStr, &mut |_, c| match c { Ok(_) => {} Err(err) => { if err.is_fatal() { diff --git a/compiler/rustc_lexer/src/unescape.rs b/compiler/rustc_lexer/src/unescape.rs index abec12f52a6e6..0a632c4d12ad5 100644 --- a/compiler/rustc_lexer/src/unescape.rs +++ b/compiler/rustc_lexer/src/unescape.rs @@ -59,6 +59,9 @@ pub enum EscapeError { /// Non-ascii character in byte literal, byte string literal, or raw byte string literal. NonAsciiCharInByte, + // `\0` in a C string literal. + NulInCStr, + /// After a line ending with '\', the next line contains whitespace /// characters that are not skipped. UnskippedWhitespaceWarning, @@ -122,10 +125,20 @@ where { match mode { CStr => { - unescape_non_raw_common(src, mode, callback); + unescape_non_raw_common(src, mode, &mut |r, mut result| { + if let Ok(CStrUnit::Byte(0) | CStrUnit::Char('\0')) = result { + result = Err(EscapeError::NulInCStr); + } + callback(r, result) + }); } RawCStr => { - check_raw_common(src, mode, &mut |r, result| callback(r, result.map(CStrUnit::Char))); + check_raw_common(src, mode, &mut |r, mut result| { + if let Ok('\0') = result { + result = Err(EscapeError::NulInCStr); + } + callback(r, result.map(CStrUnit::Char)) + }); } Char | Byte | Str | RawStr | ByteStr | RawByteStr => unreachable!(), } diff --git a/compiler/rustc_parse/messages.ftl b/compiler/rustc_parse/messages.ftl index c11a6fab7e5d2..3c2fb53b29404 100644 --- a/compiler/rustc_parse/messages.ftl +++ b/compiler/rustc_parse/messages.ftl @@ -613,6 +613,8 @@ parse_note_mut_pattern_usage = `mut` may be followed by `variable` and `variable parse_note_pattern_alternatives_use_single_vert = alternatives in or-patterns are separated with `|`, not `||` +parse_nul_in_c_str = null characters in C string literals are not supported + parse_or_pattern_not_allowed_in_fn_parameters = top-level or-patterns are not allowed in function parameters parse_or_pattern_not_allowed_in_let_binding = top-level or-patterns are not allowed in `let` bindings parse_out_of_range_hex_escape = out of range hex escape diff --git a/compiler/rustc_parse/src/errors.rs b/compiler/rustc_parse/src/errors.rs index 0de252707bd32..ef8e9d0a24832 100644 --- a/compiler/rustc_parse/src/errors.rs +++ b/compiler/rustc_parse/src/errors.rs @@ -2144,6 +2144,11 @@ pub enum UnescapeError { #[subdiagnostic] suggestion: MoreThanOneCharSugg, }, + #[diag(parse_nul_in_c_str)] + NulInCStr { + #[primary_span] + span: Span, + }, } #[derive(Subdiagnostic)] diff --git a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs index fbc77f2878081..3238f8e23bb0a 100644 --- a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs +++ b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs @@ -262,6 +262,9 @@ pub(crate) fn emit_unescape_error( EscapeError::LoneSlash => { dcx.emit_err(UnescapeError::LoneSlash(err_span)); } + EscapeError::NulInCStr => { + dcx.emit_err(UnescapeError::NulInCStr { span: err_span }); + } EscapeError::UnskippedWhitespaceWarning => { let (c, char_span) = last_char(); dcx.emit_warn(UnescapeError::UnskippedWhitespace { diff --git a/compiler/rustc_session/messages.ftl b/compiler/rustc_session/messages.ftl index f2e646c70f577..4f824f9f62e24 100644 --- a/compiler/rustc_session/messages.ftl +++ b/compiler/rustc_session/messages.ftl @@ -72,8 +72,6 @@ session_not_circumvent_feature = `-Zunleash-the-miri-inside-of-you` may not be u session_not_supported = not supported -session_nul_in_c_str = null characters in C string literals are not supported - session_octal_float_literal_not_supported = octal float literal is not supported session_optimization_fuel_exhausted = optimization-fuel-exhausted: {$msg} diff --git a/compiler/rustc_session/src/errors.rs b/compiler/rustc_session/src/errors.rs index b672e760feb31..b0a397de3eb18 100644 --- a/compiler/rustc_session/src/errors.rs +++ b/compiler/rustc_session/src/errors.rs @@ -7,7 +7,7 @@ use rustc_errors::{ error_code, DiagCtxt, DiagnosticBuilder, DiagnosticMessage, IntoDiagnostic, Level, MultiSpan, }; use rustc_macros::Diagnostic; -use rustc_span::{BytePos, Span, Symbol}; +use rustc_span::{Span, Symbol}; use rustc_target::spec::{SplitDebuginfo, StackProtector, TargetTriple}; pub struct FeatureGateError { @@ -327,13 +327,6 @@ pub(crate) struct BinaryFloatLiteralNotSupported { pub span: Span, } -#[derive(Diagnostic)] -#[diag(session_nul_in_c_str)] -pub(crate) struct NulInCStr { - #[primary_span] - pub span: Span, -} - pub fn report_lit_error(sess: &ParseSess, err: LitError, lit: token::Lit, span: Span) { // Checks if `s` looks like i32 or u1234 etc. fn looks_like_width_suffix(first_chars: &[char], s: &str) -> bool { @@ -413,12 +406,6 @@ pub fn report_lit_error(sess: &ParseSess, err: LitError, lit: token::Lit, span: }; dcx.emit_err(IntLiteralTooLarge { span, limit }); } - LitError::NulInCStr(range) => { - let lo = BytePos(span.lo().0 + range.start as u32 + 2); - let hi = BytePos(span.lo().0 + range.end as u32 + 2); - let span = span.with_lo(lo).with_hi(hi); - dcx.emit_err(NulInCStr { span }); - } } } diff --git a/src/tools/rust-analyzer/crates/parser/src/lexed_str.rs b/src/tools/rust-analyzer/crates/parser/src/lexed_str.rs index b9e7566fdf9bc..3753a1beb7a8e 100644 --- a/src/tools/rust-analyzer/crates/parser/src/lexed_str.rs +++ b/src/tools/rust-analyzer/crates/parser/src/lexed_str.rs @@ -371,6 +371,7 @@ fn error_to_diagnostic_message(error: EscapeError, mode: Mode) -> &'static str { "non-ASCII character in byte string literal" } EscapeError::NonAsciiCharInByte => "non-ASCII character in raw byte string literal", + EscapeError::NulInCStr => "null character in C string literal", EscapeError::UnskippedWhitespaceWarning => "", EscapeError::MultipleSkippedLinesWarning => "", } diff --git a/src/tools/rust-analyzer/crates/syntax/src/validation.rs b/src/tools/rust-analyzer/crates/syntax/src/validation.rs index eabbda2c3983c..0504f67c9dc7e 100644 --- a/src/tools/rust-analyzer/crates/syntax/src/validation.rs +++ b/src/tools/rust-analyzer/crates/syntax/src/validation.rs @@ -106,6 +106,9 @@ fn rustc_unescape_error_to_string(err: unescape::EscapeError) -> (&'static str, EE::NonAsciiCharInByte => { "Byte literals must not contain non-ASCII characters" } + EE::NulInCStr => { + "C strings literals must not contain null characters" + } EE::UnskippedWhitespaceWarning => "Whitespace after this escape is not skipped", EE::MultipleSkippedLinesWarning => "Multiple lines are skipped by this escape", diff --git a/tests/ui/rfcs/rfc-3348-c-string-literals/no-nuls.rs b/tests/ui/rfcs/rfc-3348-c-string-literals/no-nuls.rs index e20ca50b88f905d5a0332a71f64d02c061f21711..2f9ca09f3a5defe1f650efb73f2e7b98dbeef929 100644 GIT binary patch literal 1906 zcmeHHO=}x55Y2gi#l+3QTg=+cF`&f-IEE|vE`+~r>#(-L&=Le~0sGNs82+?ah2csi- zGyzr^zsk9G~pG)eI18B=T zI`E<)V7B?3Jfo5K&WDlUp)FIfry8{tIgu=sx&-TD0iRmIW(5A0x~jNb*c`CJROgsE z2~&=R@?N?iA9j|&*+1_%g3z*PzG#0=oX~cOh8Za+Y$)tx&CnCvO_Oh-fCrZdI|Ie4 z+AZZS=#3^{B%ixYg%)Pi?F-lp*>ce=CLSCFjDTQJ7E6ADuE3I_5YTT%jG(X$Y}S%GC`f%+vqXGk!VPsay*^3%Deh6X^K2U>vUGv zwC7`iqCSyn`-<+MQb&=dI+>pEnSYl%@oZM(X{lWsxUWztYPhM2Jj*HzlKU`+mkju$ z20tc)Tlod!@h`ZaPQOhL-HVF7N?rA;d3V+A%Ve@;gp;Z-&Z1sVI_alS`$5lrWbro3 zYE<4m`PNXl1ge|3>Af6Y4XNGQmhNyCJeW0x`B zzYb=^!;#({UGGS9IHg)2itNoJ*AG8`{Q7X8|CAIXh4??k(^KqINnxImOP|D3>VthA s>EGor4Of2hbRAH-A!DQ?yH&<|k8&~i7gA*C63!V?WcWHQM}lm#-?uw#vH$=8 literal 738 zcmbu7F;BxV5QRJYS6oAfgaMn5tw2JkOh`z9jY1U;r?qe#WM3pu6n;C7(>5q#YCPG` zKI?n;j&qRjid1P9Fc43a&Xe_+RytJ|n!Dk@xqwRW$SXp<$&Ypc; ze0OvIxJq8yG)X%8FHIlp-(c|Q&|mcJ!p?b570jH#wI|sG|3{v1n#N(sl5Wa+slyw} PlW3>}+=19)d#Pc+{;k%# diff --git a/tests/ui/rfcs/rfc-3348-c-string-literals/no-nuls.stderr b/tests/ui/rfcs/rfc-3348-c-string-literals/no-nuls.stderr index ff9006f6f97f1be7eed18d188e8c30ba2075743e..a1e5e6d3e0d233000e91163f03d931e9e0f440dd 100644 GIT binary patch literal 2028 zcmc(gF;BxV5QRJYS3DvH7y?e5(j-VMupo6}t|Fza1(8A=ogDi8l z-goDhcalg^iEe0HY+h=rH%5|1)8QW4P}4o5ZR}RvYSbjrDFZ$9y&@yG42a`9oIgB1 zU3V(BAJJE&%Vb_I>=AZsxx~&cfK9aG(e1c7z5`Xom<%C);}ah~IsbA2D*FV2Sa=|2 zj(#BN+#|hu=LhW%R(nzA?(I|L6puiSPYYl52epT3z2H9a6I)Lt9tfxVftoK;kMt?6 zC&WKmPvm8iImVZ-z4{8;h`P`Fgg_yp7}^7DlGRkLIQWwflH sCfLNtQad(}xM!c9UYuVkTCeRy-cRf8AYl}eIeLH-21zkfR{#J2 delta 76 zcmaFEzle202&2j5i%ivw=0H+p;`0JV10X|qayDZ+qcKQm@^i+ViI;>Yr!fgmu43ex ce1cs-*i@mQD77pzzqD8(wWuh+sF;fj03KW#o&W#<