From 73497b11bb4d0926dca58c9ca1f65746dff673e8 Mon Sep 17 00:00:00 2001 From: Pietro Albini Date: Fri, 29 Apr 2022 14:24:28 +0200 Subject: [PATCH 1/8] ignore known paths when deciding whether to abbreviate the output --- src/tools/compiletest/src/read2.rs | 40 ++++++++++++++++++++-------- src/tools/compiletest/src/runtest.rs | 29 +++++++++++++++++--- 2 files changed, 54 insertions(+), 15 deletions(-) diff --git a/src/tools/compiletest/src/read2.rs b/src/tools/compiletest/src/read2.rs index 897b9dd400793..1de3556932361 100644 --- a/src/tools/compiletest/src/read2.rs +++ b/src/tools/compiletest/src/read2.rs @@ -5,7 +5,7 @@ pub use self::imp::read2; use std::io; use std::process::{Child, Output}; -pub fn read2_abbreviated(mut child: Child) -> io::Result { +pub fn read2_abbreviated(mut child: Child, exclude_from_len: &[String]) -> io::Result { use io::Write; use std::mem::replace; @@ -13,21 +13,39 @@ pub fn read2_abbreviated(mut child: Child) -> io::Result { const TAIL_LEN: usize = 256 * 1024; enum ProcOutput { - Full(Vec), + Full { bytes: Vec, excluded_len: usize }, Abbreviated { head: Vec, skipped: usize, tail: Box<[u8]> }, } impl ProcOutput { - fn extend(&mut self, data: &[u8]) { + fn extend(&mut self, data: &[u8], exclude_from_len: &[String]) { let new_self = match *self { - ProcOutput::Full(ref mut bytes) => { + ProcOutput::Full { ref mut bytes, ref mut excluded_len } => { bytes.extend_from_slice(data); + + // We had problems in the past with tests failing only in some environments, + // due to the length of the base path pushing the output size over the limit. + // + // To make those failures deterministic across all environments we ignore known + // paths when calculating the string length, while still including the full + // path in the output. This could result in some output being larger than the + // threshold, but it's better than having nondeterministic failures. + for pattern in exclude_from_len { + let pattern_bytes = pattern.as_bytes(); + let matches = data + .windows(pattern_bytes.len()) + .filter(|window| window == &pattern_bytes) + .count(); + *excluded_len += matches * pattern_bytes.len(); + } + let new_len = bytes.len(); - if new_len <= HEAD_LEN + TAIL_LEN { + if new_len.saturating_sub(*excluded_len) <= HEAD_LEN + TAIL_LEN { return; } - let tail = bytes.split_off(new_len - TAIL_LEN).into_boxed_slice(); - let head = replace(bytes, Vec::new()); + + let mut head = replace(bytes, Vec::new()); + let tail = head.split_off(new_len - TAIL_LEN).into_boxed_slice(); let skipped = new_len - HEAD_LEN - TAIL_LEN; ProcOutput::Abbreviated { head, skipped, tail } } @@ -47,7 +65,7 @@ pub fn read2_abbreviated(mut child: Child) -> io::Result { fn into_bytes(self) -> Vec { match self { - ProcOutput::Full(bytes) => bytes, + ProcOutput::Full { bytes, .. } => bytes, ProcOutput::Abbreviated { mut head, skipped, tail } => { write!(&mut head, "\n\n<<<<<< SKIPPED {} BYTES >>>>>>\n\n", skipped).unwrap(); head.extend_from_slice(&tail); @@ -57,15 +75,15 @@ pub fn read2_abbreviated(mut child: Child) -> io::Result { } } - let mut stdout = ProcOutput::Full(Vec::new()); - let mut stderr = ProcOutput::Full(Vec::new()); + let mut stdout = ProcOutput::Full { bytes: Vec::new(), excluded_len: 0 }; + let mut stderr = ProcOutput::Full { bytes: Vec::new(), excluded_len: 0 }; drop(child.stdin.take()); read2( child.stdout.take().unwrap(), child.stderr.take().unwrap(), &mut |is_stdout, data, _| { - if is_stdout { &mut stdout } else { &mut stderr }.extend(data); + if is_stdout { &mut stdout } else { &mut stderr }.extend(data, exclude_from_len); data.clear(); }, )?; diff --git a/src/tools/compiletest/src/runtest.rs b/src/tools/compiletest/src/runtest.rs index 6d94fe3ebb9cb..fe8d451da4914 100644 --- a/src/tools/compiletest/src/runtest.rs +++ b/src/tools/compiletest/src/runtest.rs @@ -28,7 +28,7 @@ use std::hash::{Hash, Hasher}; use std::io::prelude::*; use std::io::{self, BufReader}; use std::path::{Path, PathBuf}; -use std::process::{Command, ExitStatus, Output, Stdio}; +use std::process::{Child, Command, ExitStatus, Output, Stdio}; use std::str; use glob::glob; @@ -1735,6 +1735,28 @@ impl<'test> TestCx<'test> { dylib } + fn read2_abbreviated(&self, child: Child) -> Output { + let mut exclude_from_len = Vec::new(); + let mut add_path = |path: &Path| { + let path = path.display().to_string(); + let windows = path.replace("\\", "\\\\"); + if windows != path { + exclude_from_len.push(windows); + } + exclude_from_len.push(path); + }; + + // List of strings that will not be measured when determining whether the output is larger + // than the output truncation threshold. + // + // Note: avoid adding a subdirectory of an already excluded directory here, otherwise the + // same slice of text will be double counted and the truncation might not happen. + add_path(&self.config.src_base); + add_path(&self.config.build_base); + + read2_abbreviated(child, &exclude_from_len).expect("failed to read output") + } + fn compose_and_run( &self, mut command: Command, @@ -1769,8 +1791,7 @@ impl<'test> TestCx<'test> { child.stdin.as_mut().unwrap().write_all(input.as_bytes()).unwrap(); } - let Output { status, stdout, stderr } = - read2_abbreviated(child).expect("failed to read output"); + let Output { status, stdout, stderr } = self.read2_abbreviated(child); let result = ProcRes { status, @@ -2959,7 +2980,7 @@ impl<'test> TestCx<'test> { } } - let output = cmd.spawn().and_then(read2_abbreviated).expect("failed to spawn `make`"); + let output = self.read2_abbreviated(cmd.spawn().expect("failed to spawn `make`")); if !output.status.success() { let res = ProcRes { status: output.status, From 47d8ba9095219fe018c9dd86453d2853a59555fe Mon Sep 17 00:00:00 2001 From: Pietro Albini Date: Fri, 29 Apr 2022 17:58:43 +0200 Subject: [PATCH 2/8] handle compiler emitting only excluded strings --- src/tools/compiletest/src/read2.rs | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/tools/compiletest/src/read2.rs b/src/tools/compiletest/src/read2.rs index 1de3556932361..4c48cd0ce7714 100644 --- a/src/tools/compiletest/src/read2.rs +++ b/src/tools/compiletest/src/read2.rs @@ -11,9 +11,10 @@ pub fn read2_abbreviated(mut child: Child, exclude_from_len: &[String]) -> io::R const HEAD_LEN: usize = 160 * 1024; const TAIL_LEN: usize = 256 * 1024; + const EXCLUDED_PLACEHOLDER_LEN: isize = 32; enum ProcOutput { - Full { bytes: Vec, excluded_len: usize }, + Full { bytes: Vec, excluded_len: isize }, Abbreviated { head: Vec, skipped: usize, tail: Box<[u8]> }, } @@ -30,17 +31,22 @@ pub fn read2_abbreviated(mut child: Child, exclude_from_len: &[String]) -> io::R // paths when calculating the string length, while still including the full // path in the output. This could result in some output being larger than the // threshold, but it's better than having nondeterministic failures. + // + // The compiler emitting only excluded strings is addressed by adding a + // placeholder size for each excluded segment, which will eventually reach + // the configured threshold. for pattern in exclude_from_len { let pattern_bytes = pattern.as_bytes(); let matches = data .windows(pattern_bytes.len()) .filter(|window| window == &pattern_bytes) .count(); - *excluded_len += matches * pattern_bytes.len(); + *excluded_len += matches as isize + * (EXCLUDED_PLACEHOLDER_LEN - pattern_bytes.len() as isize); } let new_len = bytes.len(); - if new_len.saturating_sub(*excluded_len) <= HEAD_LEN + TAIL_LEN { + if (new_len as isize + *excluded_len) as usize <= HEAD_LEN + TAIL_LEN { return; } From b7473be6b1f0618127887d5b8a13830ea874f3c1 Mon Sep 17 00:00:00 2001 From: Pietro Albini Date: Mon, 2 May 2022 09:25:55 +0200 Subject: [PATCH 3/8] handle excluded strings across boundaries --- src/tools/compiletest/src/read2.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/tools/compiletest/src/read2.rs b/src/tools/compiletest/src/read2.rs index 4c48cd0ce7714..f13a3d7bfee6e 100644 --- a/src/tools/compiletest/src/read2.rs +++ b/src/tools/compiletest/src/read2.rs @@ -22,6 +22,7 @@ pub fn read2_abbreviated(mut child: Child, exclude_from_len: &[String]) -> io::R fn extend(&mut self, data: &[u8], exclude_from_len: &[String]) { let new_self = match *self { ProcOutput::Full { ref mut bytes, ref mut excluded_len } => { + let old_len = bytes.len(); bytes.extend_from_slice(data); // We had problems in the past with tests failing only in some environments, @@ -37,7 +38,10 @@ pub fn read2_abbreviated(mut child: Child, exclude_from_len: &[String]) -> io::R // the configured threshold. for pattern in exclude_from_len { let pattern_bytes = pattern.as_bytes(); - let matches = data + // We start matching `pattern_bytes - 1` into the previously loaded data, + // to account for the fact a pattern might be included across multiple + // `extend` calls. Starting from `- 1` avoids double-counting patterns. + let matches = (&bytes[(old_len.saturating_sub(pattern_bytes.len() - 1))..]) .windows(pattern_bytes.len()) .filter(|window| window == &pattern_bytes) .count(); From e52fb6c96b96fdaba14f31ca583baebdec14366b Mon Sep 17 00:00:00 2001 From: Pietro Albini Date: Mon, 2 May 2022 09:45:49 +0200 Subject: [PATCH 4/8] extract ProcOutput out of the function to make it testable --- src/tools/compiletest/src/read2.rs | 166 +++++++++++++++-------------- 1 file changed, 84 insertions(+), 82 deletions(-) diff --git a/src/tools/compiletest/src/read2.rs b/src/tools/compiletest/src/read2.rs index f13a3d7bfee6e..19c11a1e6b8f1 100644 --- a/src/tools/compiletest/src/read2.rs +++ b/src/tools/compiletest/src/read2.rs @@ -2,91 +2,13 @@ // Consider unify the read2() in libstd, cargo and this to prevent further code duplication. pub use self::imp::read2; -use std::io; +use std::io::{self, Write}; +use std::mem::replace; use std::process::{Child, Output}; pub fn read2_abbreviated(mut child: Child, exclude_from_len: &[String]) -> io::Result { - use io::Write; - use std::mem::replace; - - const HEAD_LEN: usize = 160 * 1024; - const TAIL_LEN: usize = 256 * 1024; - const EXCLUDED_PLACEHOLDER_LEN: isize = 32; - - enum ProcOutput { - Full { bytes: Vec, excluded_len: isize }, - Abbreviated { head: Vec, skipped: usize, tail: Box<[u8]> }, - } - - impl ProcOutput { - fn extend(&mut self, data: &[u8], exclude_from_len: &[String]) { - let new_self = match *self { - ProcOutput::Full { ref mut bytes, ref mut excluded_len } => { - let old_len = bytes.len(); - bytes.extend_from_slice(data); - - // We had problems in the past with tests failing only in some environments, - // due to the length of the base path pushing the output size over the limit. - // - // To make those failures deterministic across all environments we ignore known - // paths when calculating the string length, while still including the full - // path in the output. This could result in some output being larger than the - // threshold, but it's better than having nondeterministic failures. - // - // The compiler emitting only excluded strings is addressed by adding a - // placeholder size for each excluded segment, which will eventually reach - // the configured threshold. - for pattern in exclude_from_len { - let pattern_bytes = pattern.as_bytes(); - // We start matching `pattern_bytes - 1` into the previously loaded data, - // to account for the fact a pattern might be included across multiple - // `extend` calls. Starting from `- 1` avoids double-counting patterns. - let matches = (&bytes[(old_len.saturating_sub(pattern_bytes.len() - 1))..]) - .windows(pattern_bytes.len()) - .filter(|window| window == &pattern_bytes) - .count(); - *excluded_len += matches as isize - * (EXCLUDED_PLACEHOLDER_LEN - pattern_bytes.len() as isize); - } - - let new_len = bytes.len(); - if (new_len as isize + *excluded_len) as usize <= HEAD_LEN + TAIL_LEN { - return; - } - - let mut head = replace(bytes, Vec::new()); - let tail = head.split_off(new_len - TAIL_LEN).into_boxed_slice(); - let skipped = new_len - HEAD_LEN - TAIL_LEN; - ProcOutput::Abbreviated { head, skipped, tail } - } - ProcOutput::Abbreviated { ref mut skipped, ref mut tail, .. } => { - *skipped += data.len(); - if data.len() <= TAIL_LEN { - tail[..data.len()].copy_from_slice(data); - tail.rotate_left(data.len()); - } else { - tail.copy_from_slice(&data[(data.len() - TAIL_LEN)..]); - } - return; - } - }; - *self = new_self; - } - - fn into_bytes(self) -> Vec { - match self { - ProcOutput::Full { bytes, .. } => bytes, - ProcOutput::Abbreviated { mut head, skipped, tail } => { - write!(&mut head, "\n\n<<<<<< SKIPPED {} BYTES >>>>>>\n\n", skipped).unwrap(); - head.extend_from_slice(&tail); - head - } - } - } - } - - let mut stdout = ProcOutput::Full { bytes: Vec::new(), excluded_len: 0 }; - let mut stderr = ProcOutput::Full { bytes: Vec::new(), excluded_len: 0 }; + let mut stdout = ProcOutput::new(); + let mut stderr = ProcOutput::new(); drop(child.stdin.take()); read2( @@ -102,6 +24,86 @@ pub fn read2_abbreviated(mut child: Child, exclude_from_len: &[String]) -> io::R Ok(Output { status, stdout: stdout.into_bytes(), stderr: stderr.into_bytes() }) } +const HEAD_LEN: usize = 160 * 1024; +const TAIL_LEN: usize = 256 * 1024; +const EXCLUDED_PLACEHOLDER_LEN: isize = 32; + +enum ProcOutput { + Full { bytes: Vec, excluded_len: isize }, + Abbreviated { head: Vec, skipped: usize, tail: Box<[u8]> }, +} + +impl ProcOutput { + fn new() -> Self { + ProcOutput::Full { bytes: Vec::new(), excluded_len: 0 } + } + + fn extend(&mut self, data: &[u8], exclude_from_len: &[String]) { + let new_self = match *self { + ProcOutput::Full { ref mut bytes, ref mut excluded_len } => { + let old_len = bytes.len(); + bytes.extend_from_slice(data); + + // We had problems in the past with tests failing only in some environments, + // due to the length of the base path pushing the output size over the limit. + // + // To make those failures deterministic across all environments we ignore known + // paths when calculating the string length, while still including the full + // path in the output. This could result in some output being larger than the + // threshold, but it's better than having nondeterministic failures. + // + // The compiler emitting only excluded strings is addressed by adding a + // placeholder size for each excluded segment, which will eventually reach + // the configured threshold. + for pattern in exclude_from_len { + let pattern_bytes = pattern.as_bytes(); + // We start matching `pattern_bytes - 1` into the previously loaded data, + // to account for the fact a pattern might be included across multiple + // `extend` calls. Starting from `- 1` avoids double-counting patterns. + let matches = (&bytes[(old_len.saturating_sub(pattern_bytes.len() - 1))..]) + .windows(pattern_bytes.len()) + .filter(|window| window == &pattern_bytes) + .count(); + *excluded_len += matches as isize + * (EXCLUDED_PLACEHOLDER_LEN - pattern_bytes.len() as isize); + } + + let new_len = bytes.len(); + if (new_len as isize + *excluded_len) as usize <= HEAD_LEN + TAIL_LEN { + return; + } + + let mut head = replace(bytes, Vec::new()); + let tail = head.split_off(new_len - TAIL_LEN).into_boxed_slice(); + let skipped = new_len - HEAD_LEN - TAIL_LEN; + ProcOutput::Abbreviated { head, skipped, tail } + } + ProcOutput::Abbreviated { ref mut skipped, ref mut tail, .. } => { + *skipped += data.len(); + if data.len() <= TAIL_LEN { + tail[..data.len()].copy_from_slice(data); + tail.rotate_left(data.len()); + } else { + tail.copy_from_slice(&data[(data.len() - TAIL_LEN)..]); + } + return; + } + }; + *self = new_self; + } + + fn into_bytes(self) -> Vec { + match self { + ProcOutput::Full { bytes, .. } => bytes, + ProcOutput::Abbreviated { mut head, skipped, tail } => { + write!(&mut head, "\n\n<<<<<< SKIPPED {} BYTES >>>>>>\n\n", skipped).unwrap(); + head.extend_from_slice(&tail); + head + } + } + } +} + #[cfg(not(any(unix, windows)))] mod imp { use std::io::{self, Read}; From da139693f6375415567725b949c26ddb0367b73c Mon Sep 17 00:00:00 2001 From: Pietro Albini Date: Mon, 2 May 2022 09:51:38 +0200 Subject: [PATCH 5/8] fix existing bug when the first write was always included in full --- src/tools/compiletest/src/read2.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/tools/compiletest/src/read2.rs b/src/tools/compiletest/src/read2.rs index 19c11a1e6b8f1..bd5f3593b6554 100644 --- a/src/tools/compiletest/src/read2.rs +++ b/src/tools/compiletest/src/read2.rs @@ -74,7 +74,8 @@ impl ProcOutput { } let mut head = replace(bytes, Vec::new()); - let tail = head.split_off(new_len - TAIL_LEN).into_boxed_slice(); + let mut middle = head.split_off(HEAD_LEN); + let tail = middle.split_off(middle.len() - TAIL_LEN).into_boxed_slice(); let skipped = new_len - HEAD_LEN - TAIL_LEN; ProcOutput::Abbreviated { head, skipped, tail } } From 95e1bd0df83e7595dfc0ef9d4415b7e601d8fa4f Mon Sep 17 00:00:00 2001 From: Pietro Albini Date: Mon, 2 May 2022 10:09:08 +0200 Subject: [PATCH 6/8] add tests --- src/tools/compiletest/src/read2.rs | 3 + src/tools/compiletest/src/read2/tests.rs | 123 +++++++++++++++++++++++ 2 files changed, 126 insertions(+) create mode 100644 src/tools/compiletest/src/read2/tests.rs diff --git a/src/tools/compiletest/src/read2.rs b/src/tools/compiletest/src/read2.rs index bd5f3593b6554..313508a796678 100644 --- a/src/tools/compiletest/src/read2.rs +++ b/src/tools/compiletest/src/read2.rs @@ -1,6 +1,9 @@ // FIXME: This is a complete copy of `cargo/src/cargo/util/read2.rs` // Consider unify the read2() in libstd, cargo and this to prevent further code duplication. +#[cfg(test)] +mod tests; + pub use self::imp::read2; use std::io::{self, Write}; use std::mem::replace; diff --git a/src/tools/compiletest/src/read2/tests.rs b/src/tools/compiletest/src/read2/tests.rs new file mode 100644 index 0000000000000..d66b501fc2b9f --- /dev/null +++ b/src/tools/compiletest/src/read2/tests.rs @@ -0,0 +1,123 @@ +use crate::read2::{ProcOutput, EXCLUDED_PLACEHOLDER_LEN, HEAD_LEN, TAIL_LEN}; + +#[test] +fn test_abbreviate_short_string() { + let mut out = ProcOutput::new(); + out.extend(b"Hello world!", &[]); + assert_eq!(b"Hello world!", &*out.into_bytes()); +} + +#[test] +fn test_abbreviate_short_string_multiple_steps() { + let mut out = ProcOutput::new(); + + out.extend(b"Hello ", &[]); + out.extend(b"world!", &[]); + + assert_eq!(b"Hello world!", &*out.into_bytes()); +} + +#[test] +fn test_abbreviate_long_string() { + let mut out = ProcOutput::new(); + + let data = vec![b'.'; HEAD_LEN + TAIL_LEN + 16]; + out.extend(&data, &[]); + + let mut expected = vec![b'.'; HEAD_LEN]; + expected.extend_from_slice(b"\n\n<<<<<< SKIPPED 16 BYTES >>>>>>\n\n"); + expected.extend_from_slice(&vec![b'.'; TAIL_LEN]); + + // We first check the length to avoid endless terminal output if the length differs, since + // `out` is hundreds of KBs in size. + let out = out.into_bytes(); + assert_eq!(expected.len(), out.len()); + assert_eq!(expected, out); +} + +#[test] +fn test_abbreviate_long_string_multiple_steps() { + let mut out = ProcOutput::new(); + + out.extend(&vec![b'.'; HEAD_LEN], &[]); + out.extend(&vec![b'.'; TAIL_LEN], &[]); + // Also test whether the rotation works + out.extend(&vec![b'!'; 16], &[]); + out.extend(&vec![b'?'; 16], &[]); + + let mut expected = vec![b'.'; HEAD_LEN]; + expected.extend_from_slice(b"\n\n<<<<<< SKIPPED 32 BYTES >>>>>>\n\n"); + expected.extend_from_slice(&vec![b'.'; TAIL_LEN - 32]); + expected.extend_from_slice(&vec![b'!'; 16]); + expected.extend_from_slice(&vec![b'?'; 16]); + + // We first check the length to avoid endless terminal output if the length differs, since + // `out` is hundreds of KBs in size. + let out = out.into_bytes(); + assert_eq!(expected.len(), out.len()); + assert_eq!(expected, out); +} + +#[test] +fn test_abbreviate_exclusions_are_detected() { + let mut out = ProcOutput::new(); + let exclusions = &["foo".to_string(), "quux".to_string()]; + + out.extend(b"Hello foo", exclusions); + // Check items from a previous extension are not double-counted. + out.extend(b"! This is a qu", exclusions); + // Check items are detected across extensions. + out.extend(b"ux.", exclusions); + + match out { + ProcOutput::Full { excluded_len, .. } => assert_eq!( + excluded_len, + EXCLUDED_PLACEHOLDER_LEN * exclusions.len() as isize + - exclusions.iter().map(|i| i.len() as isize).sum::() + ), + ProcOutput::Abbreviated { .. } => panic!("out should not be abbreviated"), + } + + assert_eq!(b"Hello foo! This is a quux.", &*out.into_bytes()); +} + +#[test] +fn test_abbreviate_exclusions_avoid_abbreviations() { + let mut out = ProcOutput::new(); + let exclusions = &[std::iter::repeat('a').take(64).collect::()]; + + let mut expected = vec![b'.'; HEAD_LEN - EXCLUDED_PLACEHOLDER_LEN as usize]; + expected.extend_from_slice(exclusions[0].as_bytes()); + expected.extend_from_slice(&vec![b'.'; TAIL_LEN]); + + out.extend(&expected, exclusions); + + // We first check the length to avoid endless terminal output if the length differs, since + // `out` is hundreds of KBs in size. + let out = out.into_bytes(); + assert_eq!(expected.len(), out.len()); + assert_eq!(expected, out); +} + +#[test] +fn test_abbreviate_exclusions_can_still_cause_abbreviations() { + let mut out = ProcOutput::new(); + let exclusions = &[std::iter::repeat('a').take(64).collect::()]; + + let mut input = vec![b'.'; HEAD_LEN]; + input.extend_from_slice(&vec![b'.'; TAIL_LEN]); + input.extend_from_slice(exclusions[0].as_bytes()); + + let mut expected = vec![b'.'; HEAD_LEN]; + expected.extend_from_slice(b"\n\n<<<<<< SKIPPED 64 BYTES >>>>>>\n\n"); + expected.extend_from_slice(&vec![b'.'; TAIL_LEN - 64]); + expected.extend_from_slice(&vec![b'a'; 64]); + + out.extend(&input, exclusions); + + // We first check the length to avoid endless terminal output if the length differs, since + // `out` is hundreds of KBs in size. + let out = out.into_bytes(); + assert_eq!(expected.len(), out.len()); + assert_eq!(expected, out); +} From e257f38160b361f738e9c42c3b76665fbbf13aa4 Mon Sep 17 00:00:00 2001 From: Pietro Albini Date: Sat, 4 Jun 2022 19:24:41 +0200 Subject: [PATCH 7/8] address review comments --- src/tools/compiletest/src/read2.rs | 47 +++++++++++++++++----------- src/tools/compiletest/src/runtest.rs | 12 +++---- 2 files changed, 35 insertions(+), 24 deletions(-) diff --git a/src/tools/compiletest/src/read2.rs b/src/tools/compiletest/src/read2.rs index 313508a796678..7640e65174428 100644 --- a/src/tools/compiletest/src/read2.rs +++ b/src/tools/compiletest/src/read2.rs @@ -9,7 +9,7 @@ use std::io::{self, Write}; use std::mem::replace; use std::process::{Child, Output}; -pub fn read2_abbreviated(mut child: Child, exclude_from_len: &[String]) -> io::Result { +pub fn read2_abbreviated(mut child: Child, filter_paths_from_len: &[String]) -> io::Result { let mut stdout = ProcOutput::new(); let mut stderr = ProcOutput::new(); @@ -18,7 +18,7 @@ pub fn read2_abbreviated(mut child: Child, exclude_from_len: &[String]) -> io::R child.stdout.take().unwrap(), child.stderr.take().unwrap(), &mut |is_stdout, data, _| { - if is_stdout { &mut stdout } else { &mut stderr }.extend(data, exclude_from_len); + if is_stdout { &mut stdout } else { &mut stderr }.extend(data, filter_paths_from_len); data.clear(); }, )?; @@ -29,23 +29,30 @@ pub fn read2_abbreviated(mut child: Child, exclude_from_len: &[String]) -> io::R const HEAD_LEN: usize = 160 * 1024; const TAIL_LEN: usize = 256 * 1024; -const EXCLUDED_PLACEHOLDER_LEN: isize = 32; + +// Whenever a path is filtered when counting the length of the output, we need to add some +// placeholder length to ensure a compiler emitting only filtered paths doesn't cause a OOM. +// +// 32 was chosen semi-arbitrarily: it was the highest power of two that still allowed the test +// suite to pass at the moment of implementing path filtering. +const FILTERED_PATHS_PLACEHOLDER_LEN: usize = 32; enum ProcOutput { - Full { bytes: Vec, excluded_len: isize }, + Full { bytes: Vec, filtered_len: usize }, Abbreviated { head: Vec, skipped: usize, tail: Box<[u8]> }, } impl ProcOutput { fn new() -> Self { - ProcOutput::Full { bytes: Vec::new(), excluded_len: 0 } + ProcOutput::Full { bytes: Vec::new(), filtered_len: 0 } } - fn extend(&mut self, data: &[u8], exclude_from_len: &[String]) { + fn extend(&mut self, data: &[u8], filter_paths_from_len: &[String]) { let new_self = match *self { - ProcOutput::Full { ref mut bytes, ref mut excluded_len } => { + ProcOutput::Full { ref mut bytes, ref mut filtered_len } => { let old_len = bytes.len(); bytes.extend_from_slice(data); + *filtered_len += data.len(); // We had problems in the past with tests failing only in some environments, // due to the length of the base path pushing the output size over the limit. @@ -58,21 +65,25 @@ impl ProcOutput { // The compiler emitting only excluded strings is addressed by adding a // placeholder size for each excluded segment, which will eventually reach // the configured threshold. - for pattern in exclude_from_len { - let pattern_bytes = pattern.as_bytes(); - // We start matching `pattern_bytes - 1` into the previously loaded data, - // to account for the fact a pattern might be included across multiple - // `extend` calls. Starting from `- 1` avoids double-counting patterns. - let matches = (&bytes[(old_len.saturating_sub(pattern_bytes.len() - 1))..]) - .windows(pattern_bytes.len()) - .filter(|window| window == &pattern_bytes) + for path in filter_paths_from_len { + let path_bytes = path.as_bytes(); + // We start matching `path_bytes - 1` into the previously loaded data, + // to account for the fact a path_bytes might be included across multiple + // `extend` calls. Starting from `- 1` avoids double-counting paths. + let matches = (&bytes[(old_len.saturating_sub(path_bytes.len() - 1))..]) + .windows(path_bytes.len()) + .filter(|window| window == &path_bytes) .count(); - *excluded_len += matches as isize - * (EXCLUDED_PLACEHOLDER_LEN - pattern_bytes.len() as isize); + *filtered_len -= matches * path_bytes.len(); + + // We can't just remove the length of the filtered path from the output lenght, + // otherwise a compiler emitting only filtered paths would OOM compiletest. Add + // a fixed placeholder length for each path to prevent that. + *filtered_len += matches * FILTERED_PATHS_PLACEHOLDER_LEN; } let new_len = bytes.len(); - if (new_len as isize + *excluded_len) as usize <= HEAD_LEN + TAIL_LEN { + if *filtered_len <= HEAD_LEN + TAIL_LEN { return; } diff --git a/src/tools/compiletest/src/runtest.rs b/src/tools/compiletest/src/runtest.rs index fe8d451da4914..1d39d24524803 100644 --- a/src/tools/compiletest/src/runtest.rs +++ b/src/tools/compiletest/src/runtest.rs @@ -1736,25 +1736,25 @@ impl<'test> TestCx<'test> { } fn read2_abbreviated(&self, child: Child) -> Output { - let mut exclude_from_len = Vec::new(); + let mut filter_paths_from_len = Vec::new(); let mut add_path = |path: &Path| { let path = path.display().to_string(); let windows = path.replace("\\", "\\\\"); if windows != path { - exclude_from_len.push(windows); + filter_paths_from_len.push(windows); } - exclude_from_len.push(path); + filter_paths_from_len.push(path); }; - // List of strings that will not be measured when determining whether the output is larger + // List of paths that will not be measured when determining whether the output is larger // than the output truncation threshold. // - // Note: avoid adding a subdirectory of an already excluded directory here, otherwise the + // Note: avoid adding a subdirectory of an already filtered directory here, otherwise the // same slice of text will be double counted and the truncation might not happen. add_path(&self.config.src_base); add_path(&self.config.build_base); - read2_abbreviated(child, &exclude_from_len).expect("failed to read output") + read2_abbreviated(child, &filter_paths_from_len).expect("failed to read output") } fn compose_and_run( From 8ea95988df4d493ec3f556ab0de6b0b812ed27be Mon Sep 17 00:00:00 2001 From: Pietro Albini Date: Sat, 4 Jun 2022 19:38:51 +0200 Subject: [PATCH 8/8] update tests --- src/tools/compiletest/src/read2/tests.rs | 40 ++++++++++++------------ 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/src/tools/compiletest/src/read2/tests.rs b/src/tools/compiletest/src/read2/tests.rs index d66b501fc2b9f..1ca682a46aaaf 100644 --- a/src/tools/compiletest/src/read2/tests.rs +++ b/src/tools/compiletest/src/read2/tests.rs @@ -1,4 +1,4 @@ -use crate::read2::{ProcOutput, EXCLUDED_PLACEHOLDER_LEN, HEAD_LEN, TAIL_LEN}; +use crate::read2::{ProcOutput, FILTERED_PATHS_PLACEHOLDER_LEN, HEAD_LEN, TAIL_LEN}; #[test] fn test_abbreviate_short_string() { @@ -59,21 +59,21 @@ fn test_abbreviate_long_string_multiple_steps() { } #[test] -fn test_abbreviate_exclusions_are_detected() { +fn test_abbreviate_filterss_are_detected() { let mut out = ProcOutput::new(); - let exclusions = &["foo".to_string(), "quux".to_string()]; + let filters = &["foo".to_string(), "quux".to_string()]; - out.extend(b"Hello foo", exclusions); + out.extend(b"Hello foo", filters); // Check items from a previous extension are not double-counted. - out.extend(b"! This is a qu", exclusions); + out.extend(b"! This is a qu", filters); // Check items are detected across extensions. - out.extend(b"ux.", exclusions); + out.extend(b"ux.", filters); - match out { - ProcOutput::Full { excluded_len, .. } => assert_eq!( - excluded_len, - EXCLUDED_PLACEHOLDER_LEN * exclusions.len() as isize - - exclusions.iter().map(|i| i.len() as isize).sum::() + match &out { + ProcOutput::Full { bytes, filtered_len } => assert_eq!( + *filtered_len, + bytes.len() + FILTERED_PATHS_PLACEHOLDER_LEN * filters.len() + - filters.iter().map(|i| i.len()).sum::() ), ProcOutput::Abbreviated { .. } => panic!("out should not be abbreviated"), } @@ -82,15 +82,15 @@ fn test_abbreviate_exclusions_are_detected() { } #[test] -fn test_abbreviate_exclusions_avoid_abbreviations() { +fn test_abbreviate_filters_avoid_abbreviations() { let mut out = ProcOutput::new(); - let exclusions = &[std::iter::repeat('a').take(64).collect::()]; + let filters = &[std::iter::repeat('a').take(64).collect::()]; - let mut expected = vec![b'.'; HEAD_LEN - EXCLUDED_PLACEHOLDER_LEN as usize]; - expected.extend_from_slice(exclusions[0].as_bytes()); + let mut expected = vec![b'.'; HEAD_LEN - FILTERED_PATHS_PLACEHOLDER_LEN as usize]; + expected.extend_from_slice(filters[0].as_bytes()); expected.extend_from_slice(&vec![b'.'; TAIL_LEN]); - out.extend(&expected, exclusions); + out.extend(&expected, filters); // We first check the length to avoid endless terminal output if the length differs, since // `out` is hundreds of KBs in size. @@ -100,20 +100,20 @@ fn test_abbreviate_exclusions_avoid_abbreviations() { } #[test] -fn test_abbreviate_exclusions_can_still_cause_abbreviations() { +fn test_abbreviate_filters_can_still_cause_abbreviations() { let mut out = ProcOutput::new(); - let exclusions = &[std::iter::repeat('a').take(64).collect::()]; + let filters = &[std::iter::repeat('a').take(64).collect::()]; let mut input = vec![b'.'; HEAD_LEN]; input.extend_from_slice(&vec![b'.'; TAIL_LEN]); - input.extend_from_slice(exclusions[0].as_bytes()); + input.extend_from_slice(filters[0].as_bytes()); let mut expected = vec![b'.'; HEAD_LEN]; expected.extend_from_slice(b"\n\n<<<<<< SKIPPED 64 BYTES >>>>>>\n\n"); expected.extend_from_slice(&vec![b'.'; TAIL_LEN - 64]); expected.extend_from_slice(&vec![b'a'; 64]); - out.extend(&input, exclusions); + out.extend(&input, filters); // We first check the length to avoid endless terminal output if the length differs, since // `out` is hundreds of KBs in size.