Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[compiletest] Ignore known paths when abbreviating output #96551

Merged
merged 8 commits into from
Jun 6, 2022
157 changes: 101 additions & 56 deletions src/tools/compiletest/src/read2.rs
Original file line number Diff line number Diff line change
@@ -1,71 +1,24 @@
// FIXME: This is a complete copy of `cargo/src/cargo/util/read2.rs`
// Consider unify the read2() in libstd, cargo and this to prevent further code duplication.

#[cfg(test)]
mod tests;

pub use self::imp::read2;
use std::io;
use std::io::{self, Write};
use std::mem::replace;
use std::process::{Child, Output};

pub fn read2_abbreviated(mut child: Child) -> io::Result<Output> {
use io::Write;
use std::mem::replace;

const HEAD_LEN: usize = 160 * 1024;
const TAIL_LEN: usize = 256 * 1024;

enum ProcOutput {
Full(Vec<u8>),
Abbreviated { head: Vec<u8>, skipped: usize, tail: Box<[u8]> },
}

impl ProcOutput {
fn extend(&mut self, data: &[u8]) {
let new_self = match *self {
ProcOutput::Full(ref mut bytes) => {
bytes.extend_from_slice(data);
let new_len = bytes.len();
if new_len <= HEAD_LEN + TAIL_LEN {
return;
}
let tail = bytes.split_off(new_len - TAIL_LEN).into_boxed_slice();
let head = replace(bytes, Vec::new());
let skipped = new_len - HEAD_LEN - TAIL_LEN;
ProcOutput::Abbreviated { head, skipped, tail }
}
ProcOutput::Abbreviated { ref mut skipped, ref mut tail, .. } => {
*skipped += data.len();
if data.len() <= TAIL_LEN {
tail[..data.len()].copy_from_slice(data);
tail.rotate_left(data.len());
} else {
tail.copy_from_slice(&data[(data.len() - TAIL_LEN)..]);
}
return;
}
};
*self = new_self;
}

fn into_bytes(self) -> Vec<u8> {
match self {
ProcOutput::Full(bytes) => bytes,
ProcOutput::Abbreviated { mut head, skipped, tail } => {
write!(&mut head, "\n\n<<<<<< SKIPPED {} BYTES >>>>>>\n\n", skipped).unwrap();
head.extend_from_slice(&tail);
head
}
}
}
}

let mut stdout = ProcOutput::Full(Vec::new());
let mut stderr = ProcOutput::Full(Vec::new());
pub fn read2_abbreviated(mut child: Child, filter_paths_from_len: &[String]) -> io::Result<Output> {
let mut stdout = ProcOutput::new();
let mut stderr = ProcOutput::new();

drop(child.stdin.take());
read2(
child.stdout.take().unwrap(),
child.stderr.take().unwrap(),
&mut |is_stdout, data, _| {
if is_stdout { &mut stdout } else { &mut stderr }.extend(data);
if is_stdout { &mut stdout } else { &mut stderr }.extend(data, filter_paths_from_len);
data.clear();
},
)?;
Expand All @@ -74,6 +27,98 @@ pub fn read2_abbreviated(mut child: Child) -> io::Result<Output> {
Ok(Output { status, stdout: stdout.into_bytes(), stderr: stderr.into_bytes() })
}

const HEAD_LEN: usize = 160 * 1024;
const TAIL_LEN: usize = 256 * 1024;

// Whenever a path is filtered when counting the length of the output, we need to add some
// placeholder length to ensure a compiler emitting only filtered paths doesn't cause a OOM.
//
// 32 was chosen semi-arbitrarily: it was the highest power of two that still allowed the test
// suite to pass at the moment of implementing path filtering.
const FILTERED_PATHS_PLACEHOLDER_LEN: usize = 32;

enum ProcOutput {
Full { bytes: Vec<u8>, filtered_len: usize },
Abbreviated { head: Vec<u8>, skipped: usize, tail: Box<[u8]> },
}

impl ProcOutput {
fn new() -> Self {
ProcOutput::Full { bytes: Vec::new(), filtered_len: 0 }
}

fn extend(&mut self, data: &[u8], filter_paths_from_len: &[String]) {
let new_self = match *self {
ProcOutput::Full { ref mut bytes, ref mut filtered_len } => {
let old_len = bytes.len();
bytes.extend_from_slice(data);
*filtered_len += data.len();

// We had problems in the past with tests failing only in some environments,
// due to the length of the base path pushing the output size over the limit.
//
// To make those failures deterministic across all environments we ignore known
// paths when calculating the string length, while still including the full
// path in the output. This could result in some output being larger than the
// threshold, but it's better than having nondeterministic failures.
//
// The compiler emitting only excluded strings is addressed by adding a
// placeholder size for each excluded segment, which will eventually reach
// the configured threshold.
for path in filter_paths_from_len {
let path_bytes = path.as_bytes();
// We start matching `path_bytes - 1` into the previously loaded data,
// to account for the fact a path_bytes might be included across multiple
// `extend` calls. Starting from `- 1` avoids double-counting paths.
let matches = (&bytes[(old_len.saturating_sub(path_bytes.len() - 1))..])
.windows(path_bytes.len())
.filter(|window| window == &path_bytes)
.count();
*filtered_len -= matches * path_bytes.len();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is going to run into some problems if we have overlapping matches (e.g., one of the patterns is /home/mark and the other is /home/mark/rust, then we'll subtract out twice).

I think src_base and build_base won't do that in practice so we're OK for now, but ideally we'd either (a) assert that it doesn't happen or (b) somehow fix it. I'm not quite sure what the best approach to fixing it would be -- trying to detect this kind of overlap seems annoying.


// We can't just remove the length of the filtered path from the output lenght,
// otherwise a compiler emitting only filtered paths would OOM compiletest. Add
// a fixed placeholder length for each path to prevent that.
*filtered_len += matches * FILTERED_PATHS_PLACEHOLDER_LEN;
}

let new_len = bytes.len();
if *filtered_len <= HEAD_LEN + TAIL_LEN {
return;
}

let mut head = replace(bytes, Vec::new());
let mut middle = head.split_off(HEAD_LEN);
let tail = middle.split_off(middle.len() - TAIL_LEN).into_boxed_slice();
let skipped = new_len - HEAD_LEN - TAIL_LEN;
ProcOutput::Abbreviated { head, skipped, tail }
}
ProcOutput::Abbreviated { ref mut skipped, ref mut tail, .. } => {
*skipped += data.len();
if data.len() <= TAIL_LEN {
tail[..data.len()].copy_from_slice(data);
tail.rotate_left(data.len());
} else {
tail.copy_from_slice(&data[(data.len() - TAIL_LEN)..]);
}
return;
}
};
*self = new_self;
}

fn into_bytes(self) -> Vec<u8> {
match self {
ProcOutput::Full { bytes, .. } => bytes,
ProcOutput::Abbreviated { mut head, skipped, tail } => {
write!(&mut head, "\n\n<<<<<< SKIPPED {} BYTES >>>>>>\n\n", skipped).unwrap();
head.extend_from_slice(&tail);
head
}
}
}
}

#[cfg(not(any(unix, windows)))]
mod imp {
use std::io::{self, Read};
Expand Down
123 changes: 123 additions & 0 deletions src/tools/compiletest/src/read2/tests.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
use crate::read2::{ProcOutput, EXCLUDED_PLACEHOLDER_LEN, HEAD_LEN, TAIL_LEN};

#[test]
fn test_abbreviate_short_string() {
let mut out = ProcOutput::new();
out.extend(b"Hello world!", &[]);
assert_eq!(b"Hello world!", &*out.into_bytes());
}

#[test]
fn test_abbreviate_short_string_multiple_steps() {
let mut out = ProcOutput::new();

out.extend(b"Hello ", &[]);
out.extend(b"world!", &[]);

assert_eq!(b"Hello world!", &*out.into_bytes());
}

#[test]
fn test_abbreviate_long_string() {
let mut out = ProcOutput::new();

let data = vec![b'.'; HEAD_LEN + TAIL_LEN + 16];
out.extend(&data, &[]);

let mut expected = vec![b'.'; HEAD_LEN];
expected.extend_from_slice(b"\n\n<<<<<< SKIPPED 16 BYTES >>>>>>\n\n");
Copy link
Member

@Mark-Simulacrum Mark-Simulacrum May 7, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FWIW, I think we should make the read2_abbreviated return an error or panic instead of inserting this skipped message -- that typically leads to a JSON de-serialization failure and a pretty opaque one at that, which I think isn't too helpful -- we can probably say something more useful like "captured X bytes, which is above the limit in compiletest. See abbreviated output: {output}".

(Can be out of scope for this PR).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, let's do that in a separate PR though.

expected.extend_from_slice(&vec![b'.'; TAIL_LEN]);

// We first check the length to avoid endless terminal output if the length differs, since
// `out` is hundreds of KBs in size.
let out = out.into_bytes();
assert_eq!(expected.len(), out.len());
assert_eq!(expected, out);
}

#[test]
fn test_abbreviate_long_string_multiple_steps() {
let mut out = ProcOutput::new();

out.extend(&vec![b'.'; HEAD_LEN], &[]);
out.extend(&vec![b'.'; TAIL_LEN], &[]);
// Also test whether the rotation works
out.extend(&vec![b'!'; 16], &[]);
out.extend(&vec![b'?'; 16], &[]);

let mut expected = vec![b'.'; HEAD_LEN];
expected.extend_from_slice(b"\n\n<<<<<< SKIPPED 32 BYTES >>>>>>\n\n");
expected.extend_from_slice(&vec![b'.'; TAIL_LEN - 32]);
expected.extend_from_slice(&vec![b'!'; 16]);
expected.extend_from_slice(&vec![b'?'; 16]);

// We first check the length to avoid endless terminal output if the length differs, since
// `out` is hundreds of KBs in size.
let out = out.into_bytes();
assert_eq!(expected.len(), out.len());
assert_eq!(expected, out);
}

#[test]
fn test_abbreviate_exclusions_are_detected() {
let mut out = ProcOutput::new();
let exclusions = &["foo".to_string(), "quux".to_string()];

out.extend(b"Hello foo", exclusions);
// Check items from a previous extension are not double-counted.
out.extend(b"! This is a qu", exclusions);
// Check items are detected across extensions.
out.extend(b"ux.", exclusions);

match out {
ProcOutput::Full { excluded_len, .. } => assert_eq!(
excluded_len,
EXCLUDED_PLACEHOLDER_LEN * exclusions.len() as isize
- exclusions.iter().map(|i| i.len() as isize).sum::<isize>()
),
ProcOutput::Abbreviated { .. } => panic!("out should not be abbreviated"),
}

assert_eq!(b"Hello foo! This is a quux.", &*out.into_bytes());
}

#[test]
fn test_abbreviate_exclusions_avoid_abbreviations() {
let mut out = ProcOutput::new();
let exclusions = &[std::iter::repeat('a').take(64).collect::<String>()];

let mut expected = vec![b'.'; HEAD_LEN - EXCLUDED_PLACEHOLDER_LEN as usize];
expected.extend_from_slice(exclusions[0].as_bytes());
expected.extend_from_slice(&vec![b'.'; TAIL_LEN]);

out.extend(&expected, exclusions);

// We first check the length to avoid endless terminal output if the length differs, since
// `out` is hundreds of KBs in size.
let out = out.into_bytes();
assert_eq!(expected.len(), out.len());
assert_eq!(expected, out);
}

#[test]
fn test_abbreviate_exclusions_can_still_cause_abbreviations() {
let mut out = ProcOutput::new();
let exclusions = &[std::iter::repeat('a').take(64).collect::<String>()];

let mut input = vec![b'.'; HEAD_LEN];
input.extend_from_slice(&vec![b'.'; TAIL_LEN]);
input.extend_from_slice(exclusions[0].as_bytes());

let mut expected = vec![b'.'; HEAD_LEN];
expected.extend_from_slice(b"\n\n<<<<<< SKIPPED 64 BYTES >>>>>>\n\n");
expected.extend_from_slice(&vec![b'.'; TAIL_LEN - 64]);
expected.extend_from_slice(&vec![b'a'; 64]);

out.extend(&input, exclusions);

// We first check the length to avoid endless terminal output if the length differs, since
// `out` is hundreds of KBs in size.
let out = out.into_bytes();
assert_eq!(expected.len(), out.len());
assert_eq!(expected, out);
}
29 changes: 25 additions & 4 deletions src/tools/compiletest/src/runtest.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ use std::hash::{Hash, Hasher};
use std::io::prelude::*;
use std::io::{self, BufReader};
use std::path::{Path, PathBuf};
use std::process::{Command, ExitStatus, Output, Stdio};
use std::process::{Child, Command, ExitStatus, Output, Stdio};
use std::str;

use glob::glob;
Expand Down Expand Up @@ -1735,6 +1735,28 @@ impl<'test> TestCx<'test> {
dylib
}

fn read2_abbreviated(&self, child: Child) -> Output {
let mut filter_paths_from_len = Vec::new();
let mut add_path = |path: &Path| {
let path = path.display().to_string();
let windows = path.replace("\\", "\\\\");
if windows != path {
filter_paths_from_len.push(windows);
}
filter_paths_from_len.push(path);
};

// List of paths that will not be measured when determining whether the output is larger
// than the output truncation threshold.
//
// Note: avoid adding a subdirectory of an already filtered directory here, otherwise the
// same slice of text will be double counted and the truncation might not happen.
add_path(&self.config.src_base);
add_path(&self.config.build_base);

read2_abbreviated(child, &filter_paths_from_len).expect("failed to read output")
}

fn compose_and_run(
&self,
mut command: Command,
Expand Down Expand Up @@ -1769,8 +1791,7 @@ impl<'test> TestCx<'test> {
child.stdin.as_mut().unwrap().write_all(input.as_bytes()).unwrap();
}

let Output { status, stdout, stderr } =
read2_abbreviated(child).expect("failed to read output");
let Output { status, stdout, stderr } = self.read2_abbreviated(child);

let result = ProcRes {
status,
Expand Down Expand Up @@ -2959,7 +2980,7 @@ impl<'test> TestCx<'test> {
}
}

let output = cmd.spawn().and_then(read2_abbreviated).expect("failed to spawn `make`");
let output = self.read2_abbreviated(cmd.spawn().expect("failed to spawn `make`"));
if !output.status.success() {
let res = ProcRes {
status: output.status,
Expand Down