Skip to content

Commit

Permalink
Auto merge of #128200 - estebank:normalize-whitespace, r=pnkfelix
Browse files Browse the repository at this point in the history
Change output normalization logic to be linear against size of output

Modify the rendered output normalization routine to scan each character *once* and construct a `String` to be printed out to the terminal *once*, instead of using `String::replace` in a loop multiple times. The output doesn't change, but the time spent to prepare a diagnostic is now faster (or rather, closer to what it was before #127528).
  • Loading branch information
bors committed Aug 6, 2024
2 parents c9687a9 + 51b5bb1 commit 8c7e0e1
Showing 1 changed file with 38 additions and 30 deletions.
68 changes: 38 additions & 30 deletions compiler/rustc_errors/src/emitter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2564,22 +2564,13 @@ fn num_decimal_digits(num: usize) -> usize {

// We replace some characters so the CLI output is always consistent and underlines aligned.
// Keep the following list in sync with `rustc_span::char_width`.
// ATTENTION: keep lexicografically sorted so that the binary search will work
const OUTPUT_REPLACEMENTS: &[(char, &str)] = &[
('\t', " "), // We do our own tab replacement
('\u{200D}', ""), // Replace ZWJ with nothing for consistent terminal output of grapheme clusters.
('\u{202A}', "�"), // The following unicode text flow control characters are inconsistently
('\u{202B}', "�"), // supported across CLIs and can cause confusion due to the bytes on disk
('\u{202D}', "�"), // not corresponding to the visible source code, so we replace them always.
('\u{202E}', "�"),
('\u{2066}', "�"),
('\u{2067}', "�"),
('\u{2068}', "�"),
('\u{202C}', "�"),
('\u{2069}', "�"),
// tidy-alphabetical-start
// In terminals without Unicode support the following will be garbled, but in *all* terminals
// the underlying codepoint will be as well. We could gate this replacement behind a "unicode
// support" gate.
('\u{0000}', "␀"),
('\0', "␀"),
('\u{0001}', "␁"),
('\u{0002}', "␂"),
('\u{0003}', "␃"),
Expand All @@ -2588,11 +2579,12 @@ const OUTPUT_REPLACEMENTS: &[(char, &str)] = &[
('\u{0006}', "␆"),
('\u{0007}', "␇"),
('\u{0008}', "␈"),
('\u{000B}', "␋"),
('\u{000C}', "␌"),
('\u{000D}', "␍"),
('\u{000E}', "␎"),
('\u{000F}', "␏"),
('\u{0009}', " "), // We do our own tab replacement
('\u{000b}', "␋"),
('\u{000c}', "␌"),
('\u{000d}', "␍"),
('\u{000e}', "␎"),
('\u{000f}', "␏"),
('\u{0010}', "␐"),
('\u{0011}', "␑"),
('\u{0012}', "␒"),
Expand All @@ -2603,21 +2595,37 @@ const OUTPUT_REPLACEMENTS: &[(char, &str)] = &[
('\u{0017}', "␗"),
('\u{0018}', "␘"),
('\u{0019}', "␙"),
('\u{001A}', "␚"),
('\u{001B}', "␛"),
('\u{001C}', "␜"),
('\u{001D}', "␝"),
('\u{001E}', "␞"),
('\u{001F}', "␟"),
('\u{007F}', "␡"),
('\u{001a}', "␚"),
('\u{001b}', "␛"),
('\u{001c}', "␜"),
('\u{001d}', "␝"),
('\u{001e}', "␞"),
('\u{001f}', "␟"),
('\u{007f}', "␡"),
('\u{200d}', ""), // Replace ZWJ for consistent terminal output of grapheme clusters.
('\u{202a}', "�"), // The following unicode text flow control characters are inconsistently
('\u{202b}', "�"), // supported across CLIs and can cause confusion due to the bytes on disk
('\u{202c}', "�"), // not corresponding to the visible source code, so we replace them always.
('\u{202d}', "�"),
('\u{202e}', "�"),
('\u{2066}', "�"),
('\u{2067}', "�"),
('\u{2068}', "�"),
('\u{2069}', "�"),
// tidy-alphabetical-end
];

fn normalize_whitespace(str: &str) -> String {
let mut s = str.to_string();
for (c, replacement) in OUTPUT_REPLACEMENTS {
s = s.replace(*c, replacement);
}
s
fn normalize_whitespace(s: &str) -> String {
// Scan the input string for a character in the ordered table above. If it's present, replace
// it with it's alternative string (it can be more than 1 char!). Otherwise, retain the input
// char. At the end, allocate all chars into a string in one operation.
s.chars().fold(String::with_capacity(s.len()), |mut s, c| {
match OUTPUT_REPLACEMENTS.binary_search_by_key(&c, |(k, _)| *k) {
Ok(i) => s.push_str(OUTPUT_REPLACEMENTS[i].1),
_ => s.push(c),
}
s
})
}

fn draw_col_separator(buffer: &mut StyledBuffer, line: usize, col: usize) {
Expand Down

0 comments on commit 8c7e0e1

Please sign in to comment.