Skip to content

Commit

Permalink
Rollup merge of rust-lang#119172 - nnethercote:earlier-NulInCStr, r=p…
Browse files Browse the repository at this point in the history
…etrochenkov

Detect `NulInCStr` error earlier.

By making it an `EscapeError` instead of a `LitError`. This makes it like the other errors produced when checking string literals contents, e.g. for invalid escape sequences or bare CR chars.

NOTE: this means these errors are issued earlier, before expansion, which changes behaviour. It will be possible to move the check back to the later point if desired. If that happens, it's likely that all the string literal contents checks will be delayed together.

One nice thing about this: the old approach had some code in `report_lit_error` to calculate the span of the nul char from a range. This code used a hardwired `+2` to account for the `c"` at the start of a C string literal, but this should have changed to a `+3` for raw C string literals to account for the `cr"`, which meant that the caret in `cr"` nul error messages was one short of where it should have been. The new approach doesn't need any of this and avoids the off-by-one error.

r? ``@fee1-dead``
  • Loading branch information
matthiaskrgr authored Jan 18, 2024
2 parents 2457c02 + 9018d2c commit 53d99e5
Show file tree
Hide file tree
Showing 11 changed files with 32 additions and 28 deletions.
12 changes: 2 additions & 10 deletions compiler/rustc_ast/src/util/literal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ use rustc_lexer::unescape::{
};
use rustc_span::symbol::{kw, sym, Symbol};
use rustc_span::Span;
use std::ops::Range;
use std::{ascii, fmt, str};

// Escapes a string, represented as a symbol. Reuses the original symbol,
Expand Down Expand Up @@ -39,7 +38,6 @@ pub enum LitError {
InvalidFloatSuffix,
NonDecimalFloat(u32),
IntTooLarge(u32),
NulInCStr(Range<usize>),
}

impl LitKind {
Expand Down Expand Up @@ -156,10 +154,7 @@ impl LitKind {
let s = symbol.as_str();
let mut buf = Vec::with_capacity(s.len());
let mut error = Ok(());
unescape_c_string(s, Mode::CStr, &mut |span, c| match c {
Ok(CStrUnit::Byte(0) | CStrUnit::Char('\0')) => {
error = Err(LitError::NulInCStr(span));
}
unescape_c_string(s, Mode::CStr, &mut |_span, c| match c {
Ok(CStrUnit::Byte(b)) => buf.push(b),
Ok(CStrUnit::Char(c)) => {
buf.extend_from_slice(c.encode_utf8(&mut [0; 4]).as_bytes())
Expand All @@ -179,10 +174,7 @@ impl LitKind {
// can convert the symbol directly to a `Lrc<u8>` on success.
let s = symbol.as_str();
let mut error = Ok(());
unescape_c_string(s, Mode::RawCStr, &mut |span, c| match c {
Ok(CStrUnit::Byte(0) | CStrUnit::Char('\0')) => {
error = Err(LitError::NulInCStr(span));
}
unescape_c_string(s, Mode::RawCStr, &mut |_, c| match c {
Ok(_) => {}
Err(err) => {
if err.is_fatal() {
Expand Down
17 changes: 15 additions & 2 deletions compiler/rustc_lexer/src/unescape.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,9 @@ pub enum EscapeError {
/// Non-ascii character in byte literal, byte string literal, or raw byte string literal.
NonAsciiCharInByte,

// `\0` in a C string literal.
NulInCStr,

/// After a line ending with '\', the next line contains whitespace
/// characters that are not skipped.
UnskippedWhitespaceWarning,
Expand Down Expand Up @@ -122,10 +125,20 @@ where
{
match mode {
CStr => {
unescape_non_raw_common(src, mode, callback);
unescape_non_raw_common(src, mode, &mut |r, mut result| {
if let Ok(CStrUnit::Byte(0) | CStrUnit::Char('\0')) = result {
result = Err(EscapeError::NulInCStr);
}
callback(r, result)
});
}
RawCStr => {
check_raw_common(src, mode, &mut |r, result| callback(r, result.map(CStrUnit::Char)));
check_raw_common(src, mode, &mut |r, mut result| {
if let Ok('\0') = result {
result = Err(EscapeError::NulInCStr);
}
callback(r, result.map(CStrUnit::Char))
});
}
Char | Byte | Str | RawStr | ByteStr | RawByteStr => unreachable!(),
}
Expand Down
2 changes: 2 additions & 0 deletions compiler/rustc_parse/messages.ftl
Original file line number Diff line number Diff line change
Expand Up @@ -616,6 +616,8 @@ parse_note_mut_pattern_usage = `mut` may be followed by `variable` and `variable
parse_note_pattern_alternatives_use_single_vert = alternatives in or-patterns are separated with `|`, not `||`
parse_nul_in_c_str = null characters in C string literals are not supported
parse_or_pattern_not_allowed_in_fn_parameters = top-level or-patterns are not allowed in function parameters
parse_or_pattern_not_allowed_in_let_binding = top-level or-patterns are not allowed in `let` bindings
parse_out_of_range_hex_escape = out of range hex escape
Expand Down
5 changes: 5 additions & 0 deletions compiler/rustc_parse/src/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2163,6 +2163,11 @@ pub enum UnescapeError {
#[subdiagnostic]
suggestion: MoreThanOneCharSugg,
},
#[diag(parse_nul_in_c_str)]
NulInCStr {
#[primary_span]
span: Span,
},
}

#[derive(Subdiagnostic)]
Expand Down
3 changes: 3 additions & 0 deletions compiler/rustc_parse/src/lexer/unescape_error_reporting.rs
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,9 @@ pub(crate) fn emit_unescape_error(
EscapeError::LoneSlash => {
dcx.emit_err(UnescapeError::LoneSlash(err_span));
}
EscapeError::NulInCStr => {
dcx.emit_err(UnescapeError::NulInCStr { span: err_span });
}
EscapeError::UnskippedWhitespaceWarning => {
let (c, char_span) = last_char();
dcx.emit_warn(UnescapeError::UnskippedWhitespace {
Expand Down
2 changes: 0 additions & 2 deletions compiler/rustc_session/messages.ftl
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,6 @@ session_not_circumvent_feature = `-Zunleash-the-miri-inside-of-you` may not be u
session_not_supported = not supported
session_nul_in_c_str = null characters in C string literals are not supported
session_octal_float_literal_not_supported = octal float literal is not supported
session_optimization_fuel_exhausted = optimization-fuel-exhausted: {$msg}
Expand Down
15 changes: 1 addition & 14 deletions compiler/rustc_session/src/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use rustc_errors::{
error_code, DiagCtxt, DiagnosticBuilder, DiagnosticMessage, IntoDiagnostic, Level, MultiSpan,
};
use rustc_macros::Diagnostic;
use rustc_span::{BytePos, Span, Symbol};
use rustc_span::{Span, Symbol};
use rustc_target::spec::{SplitDebuginfo, StackProtector, TargetTriple};

use crate::parse::ParseSess;
Expand Down Expand Up @@ -346,13 +346,6 @@ pub(crate) struct BinaryFloatLiteralNotSupported {
pub span: Span,
}

#[derive(Diagnostic)]
#[diag(session_nul_in_c_str)]
pub(crate) struct NulInCStr {
#[primary_span]
pub span: Span,
}

pub fn report_lit_error(sess: &ParseSess, err: LitError, lit: token::Lit, span: Span) {
// Checks if `s` looks like i32 or u1234 etc.
fn looks_like_width_suffix(first_chars: &[char], s: &str) -> bool {
Expand Down Expand Up @@ -432,12 +425,6 @@ pub fn report_lit_error(sess: &ParseSess, err: LitError, lit: token::Lit, span:
};
dcx.emit_err(IntLiteralTooLarge { span, limit });
}
LitError::NulInCStr(range) => {
let lo = BytePos(span.lo().0 + range.start as u32 + 2);
let hi = BytePos(span.lo().0 + range.end as u32 + 2);
let span = span.with_lo(lo).with_hi(hi);
dcx.emit_err(NulInCStr { span });
}
}
}

Expand Down
1 change: 1 addition & 0 deletions src/tools/rust-analyzer/crates/parser/src/lexed_str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -369,6 +369,7 @@ fn error_to_diagnostic_message(error: EscapeError, mode: Mode) -> &'static str {
"non-ASCII character in byte string literal"
}
EscapeError::NonAsciiCharInByte => "non-ASCII character in raw byte string literal",
EscapeError::NulInCStr => "null character in C string literal",
EscapeError::UnskippedWhitespaceWarning => "",
EscapeError::MultipleSkippedLinesWarning => "",
}
Expand Down
3 changes: 3 additions & 0 deletions src/tools/rust-analyzer/crates/syntax/src/validation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,9 @@ fn rustc_unescape_error_to_string(err: unescape::EscapeError) -> (&'static str,
EE::NonAsciiCharInByte => {
"Byte literals must not contain non-ASCII characters"
}
EE::NulInCStr => {
"C strings literals must not contain null characters"
}
EE::UnskippedWhitespaceWarning => "Whitespace after this escape is not skipped",
EE::MultipleSkippedLinesWarning => "Multiple lines are skipped by this escape",

Expand Down
Binary file modified tests/ui/rfcs/rfc-3348-c-string-literals/no-nuls.rs
Binary file not shown.
Binary file modified tests/ui/rfcs/rfc-3348-c-string-literals/no-nuls.stderr
Binary file not shown.

0 comments on commit 53d99e5

Please sign in to comment.