From 73497b11bb4d0926dca58c9ca1f65746dff673e8 Mon Sep 17 00:00:00 2001
From: Pietro Albini <pietro.albini@ferrous-systems.com>
Date: Fri, 29 Apr 2022 14:24:28 +0200
Subject: [PATCH 1/8] ignore known paths when deciding whether to abbreviate
 the output

---
 src/tools/compiletest/src/read2.rs   | 40 ++++++++++++++++++++--------
 src/tools/compiletest/src/runtest.rs | 29 +++++++++++++++++---
 2 files changed, 54 insertions(+), 15 deletions(-)
diff --git a/src/tools/compiletest/src/read2.rs b/src/tools/compiletest/src/read2.rs
index 897b9dd400793..1de3556932361 100644
--- a/src/tools/compiletest/src/read2.rs
+++ b/src/tools/compiletest/src/read2.rs
@@ -5,7 +5,7 @@ pub use self::imp::read2;
 use std::io;
 use std::process::{Child, Output};
 
-pub fn read2_abbreviated(mut child: Child) -> io::Result<Output> {
+pub fn read2_abbreviated(mut child: Child, exclude_from_len: &[String]) -> io::Result<Output> {
     use io::Write;
     use std::mem::replace;
 
@@ -13,21 +13,39 @@ pub fn read2_abbreviated(mut child: Child) -> io::Result<Output> {
     const TAIL_LEN: usize = 256 * 1024;
 
     enum ProcOutput {
-        Full(Vec<u8>),
+        Full { bytes: Vec<u8>, excluded_len: usize },
         Abbreviated { head: Vec<u8>, skipped: usize, tail: Box<[u8]> },
     }
 
     impl ProcOutput {
-        fn extend(&mut self, data: &[u8]) {
+        fn extend(&mut self, data: &[u8], exclude_from_len: &[String]) {
             let new_self = match *self {
-                ProcOutput::Full(ref mut bytes) => {
+                ProcOutput::Full { ref mut bytes, ref mut excluded_len } => {
                     bytes.extend_from_slice(data);
+
+                    // We had problems in the past with tests failing only in some environments,
+                    // due to the length of the base path pushing the output size over the limit.
+                    //
+                    // To make those failures deterministic across all environments we ignore known
+                    // paths when calculating the string length, while still including the full
+                    // path in the output. This could result in some output being larger than the
+                    // threshold, but it's better than having nondeterministic failures.
+                    for pattern in exclude_from_len {
+                        let pattern_bytes = pattern.as_bytes();
+                        let matches = data
+                            .windows(pattern_bytes.len())
+                            .filter(|window| window == &pattern_bytes)
+                            .count();
+                        *excluded_len += matches * pattern_bytes.len();
+                    }
+
                     let new_len = bytes.len();
-                    if new_len <= HEAD_LEN + TAIL_LEN {
+                    if new_len.saturating_sub(*excluded_len) <= HEAD_LEN + TAIL_LEN {
                         return;
                     }
-                    let tail = bytes.split_off(new_len - TAIL_LEN).into_boxed_slice();
-                    let head = replace(bytes, Vec::new());
+
+                    let mut head = replace(bytes, Vec::new());
+                    let tail = head.split_off(new_len - TAIL_LEN).into_boxed_slice();
                     let skipped = new_len - HEAD_LEN - TAIL_LEN;
                     ProcOutput::Abbreviated { head, skipped, tail }
                 }
@@ -47,7 +65,7 @@ pub fn read2_abbreviated(mut child: Child) -> io::Result<Output> {
 
         fn into_bytes(self) -> Vec<u8> {
             match self {
-                ProcOutput::Full(bytes) => bytes,
+                ProcOutput::Full { bytes, .. } => bytes,
                 ProcOutput::Abbreviated { mut head, skipped, tail } => {
                     write!(&mut head, "\n\n<<<<<< SKIPPED {} BYTES >>>>>>\n\n", skipped).unwrap();
                     head.extend_from_slice(&tail);
@@ -57,15 +75,15 @@ pub fn read2_abbreviated(mut child: Child) -> io::Result<Output> {
         }
     }
 
-    let mut stdout = ProcOutput::Full(Vec::new());
-    let mut stderr = ProcOutput::Full(Vec::new());
+    let mut stdout = ProcOutput::Full { bytes: Vec::new(), excluded_len: 0 };
+    let mut stderr = ProcOutput::Full { bytes: Vec::new(), excluded_len: 0 };
 
     drop(child.stdin.take());
     read2(
         child.stdout.take().unwrap(),
         child.stderr.take().unwrap(),
         &mut |is_stdout, data, _| {
-            if is_stdout { &mut stdout } else { &mut stderr }.extend(data);
+            if is_stdout { &mut stdout } else { &mut stderr }.extend(data, exclude_from_len);
             data.clear();
         },
     )?;
diff --git a/src/tools/compiletest/src/runtest.rs b/src/tools/compiletest/src/runtest.rs
index 6d94fe3ebb9cb..fe8d451da4914 100644
--- a/src/tools/compiletest/src/runtest.rs
+++ b/src/tools/compiletest/src/runtest.rs
@@ -28,7 +28,7 @@ use std::hash::{Hash, Hasher};
 use std::io::prelude::*;
 use std::io::{self, BufReader};
 use std::path::{Path, PathBuf};
-use std::process::{Command, ExitStatus, Output, Stdio};
+use std::process::{Child, Command, ExitStatus, Output, Stdio};
 use std::str;
 
 use glob::glob;
@@ -1735,6 +1735,28 @@ impl<'test> TestCx<'test> {
         dylib
     }
 
+    fn read2_abbreviated(&self, child: Child) -> Output {
+        let mut exclude_from_len = Vec::new();
+        let mut add_path = |path: &Path| {
+            let path = path.display().to_string();
+            let windows = path.replace("\\", "\\\\");
+            if windows != path {
+                exclude_from_len.push(windows);
+            }
+            exclude_from_len.push(path);
+        };
+
+        // List of strings that will not be measured when determining whether the output is larger
+        // than the output truncation threshold.
+        //
+        // Note: avoid adding a subdirectory of an already excluded directory here, otherwise the
+        // same slice of text will be double counted and the truncation might not happen.
+        add_path(&self.config.src_base);
+        add_path(&self.config.build_base);
+
+        read2_abbreviated(child, &exclude_from_len).expect("failed to read output")
+    }
+
     fn compose_and_run(
         &self,
         mut command: Command,
@@ -1769,8 +1791,7 @@ impl<'test> TestCx<'test> {
             child.stdin.as_mut().unwrap().write_all(input.as_bytes()).unwrap();
         }
 
-        let Output { status, stdout, stderr } =
-            read2_abbreviated(child).expect("failed to read output");
+        let Output { status, stdout, stderr } = self.read2_abbreviated(child);
 
         let result = ProcRes {
             status,
@@ -2959,7 +2980,7 @@ impl<'test> TestCx<'test> {
             }
         }
 
-        let output = cmd.spawn().and_then(read2_abbreviated).expect("failed to spawn `make`");
+        let output = self.read2_abbreviated(cmd.spawn().expect("failed to spawn `make`"));
         if !output.status.success() {
             let res = ProcRes {
                 status: output.status,

From 47d8ba9095219fe018c9dd86453d2853a59555fe Mon Sep 17 00:00:00 2001
From: Pietro Albini <pietro.albini@ferrous-systems.com>
Date: Fri, 29 Apr 2022 17:58:43 +0200
Subject: [PATCH 2/8] handle compiler emitting only excluded strings

---
 src/tools/compiletest/src/read2.rs | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/src/tools/compiletest/src/read2.rs b/src/tools/compiletest/src/read2.rs
index 1de3556932361..4c48cd0ce7714 100644
--- a/src/tools/compiletest/src/read2.rs
+++ b/src/tools/compiletest/src/read2.rs
@@ -11,9 +11,10 @@ pub fn read2_abbreviated(mut child: Child, exclude_from_len: &[String]) -> io::R
 
     const HEAD_LEN: usize = 160 * 1024;
     const TAIL_LEN: usize = 256 * 1024;
+    const EXCLUDED_PLACEHOLDER_LEN: isize = 32;
 
     enum ProcOutput {
-        Full { bytes: Vec<u8>, excluded_len: usize },
+        Full { bytes: Vec<u8>, excluded_len: isize },
         Abbreviated { head: Vec<u8>, skipped: usize, tail: Box<[u8]> },
     }
 
@@ -30,17 +31,22 @@ pub fn read2_abbreviated(mut child: Child, exclude_from_len: &[String]) -> io::R
                     // paths when calculating the string length, while still including the full
                     // path in the output. This could result in some output being larger than the
                     // threshold, but it's better than having nondeterministic failures.
+                    //
+                    // The compiler emitting only excluded strings is addressed by adding a
+                    // placeholder size for each excluded segment, which will eventually reach
+                    // the configured threshold.
                     for pattern in exclude_from_len {
                         let pattern_bytes = pattern.as_bytes();
                         let matches = data
                             .windows(pattern_bytes.len())
                             .filter(|window| window == &pattern_bytes)
                             .count();
-                        *excluded_len += matches * pattern_bytes.len();
+                        *excluded_len += matches as isize
+                            * (EXCLUDED_PLACEHOLDER_LEN - pattern_bytes.len() as isize);
                     }
 
                     let new_len = bytes.len();
-                    if new_len.saturating_sub(*excluded_len) <= HEAD_LEN + TAIL_LEN {
+                    if (new_len as isize + *excluded_len) as usize <= HEAD_LEN + TAIL_LEN {
                         return;
                     }
 

From b7473be6b1f0618127887d5b8a13830ea874f3c1 Mon Sep 17 00:00:00 2001
From: Pietro Albini <pietro.albini@ferrous-systems.com>
Date: Mon, 2 May 2022 09:25:55 +0200
Subject: [PATCH 3/8] handle excluded strings across boundaries

---
 src/tools/compiletest/src/read2.rs | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/tools/compiletest/src/read2.rs b/src/tools/compiletest/src/read2.rs
index 4c48cd0ce7714..f13a3d7bfee6e 100644
--- a/src/tools/compiletest/src/read2.rs
+++ b/src/tools/compiletest/src/read2.rs
@@ -22,6 +22,7 @@ pub fn read2_abbreviated(mut child: Child, exclude_from_len: &[String]) -> io::R
         fn extend(&mut self, data: &[u8], exclude_from_len: &[String]) {
             let new_self = match *self {
                 ProcOutput::Full { ref mut bytes, ref mut excluded_len } => {
+                    let old_len = bytes.len();
                     bytes.extend_from_slice(data);
 
                     // We had problems in the past with tests failing only in some environments,
@@ -37,7 +38,10 @@ pub fn read2_abbreviated(mut child: Child, exclude_from_len: &[String]) -> io::R
                     // the configured threshold.
                     for pattern in exclude_from_len {
                         let pattern_bytes = pattern.as_bytes();
-                        let matches = data
+                        // We start matching `pattern_bytes - 1` into the previously loaded data,
+                        // to account for the fact a pattern might be included across multiple
+                        // `extend` calls. Starting from `- 1` avoids double-counting patterns.
+                        let matches = (&bytes[(old_len.saturating_sub(pattern_bytes.len() - 1))..])
                             .windows(pattern_bytes.len())
                             .filter(|window| window == &pattern_bytes)
                             .count();

From e52fb6c96b96fdaba14f31ca583baebdec14366b Mon Sep 17 00:00:00 2001
From: Pietro Albini <pietro.albini@ferrous-systems.com>
Date: Mon, 2 May 2022 09:45:49 +0200
Subject: [PATCH 4/8] extract ProcOutput out of the function to make it
 testable

---
 src/tools/compiletest/src/read2.rs | 166 +++++++++++++++--------------
 1 file changed, 84 insertions(+), 82 deletions(-)

diff --git a/src/tools/compiletest/src/read2.rs b/src/tools/compiletest/src/read2.rs
index f13a3d7bfee6e..19c11a1e6b8f1 100644
--- a/src/tools/compiletest/src/read2.rs
+++ b/src/tools/compiletest/src/read2.rs
@@ -2,91 +2,13 @@
 // Consider unify the read2() in libstd, cargo and this to prevent further code duplication.
 
 pub use self::imp::read2;
-use std::io;
+use std::io::{self, Write};
+use std::mem::replace;
 use std::process::{Child, Output};
 
 pub fn read2_abbreviated(mut child: Child, exclude_from_len: &[String]) -> io::Result<Output> {
-    use io::Write;
-    use std::mem::replace;
-
-    const HEAD_LEN: usize = 160 * 1024;
-    const TAIL_LEN: usize = 256 * 1024;
-    const EXCLUDED_PLACEHOLDER_LEN: isize = 32;
-
-    enum ProcOutput {
-        Full { bytes: Vec<u8>, excluded_len: isize },
-        Abbreviated { head: Vec<u8>, skipped: usize, tail: Box<[u8]> },
-    }
-
-    impl ProcOutput {
-        fn extend(&mut self, data: &[u8], exclude_from_len: &[String]) {
-            let new_self = match *self {
-                ProcOutput::Full { ref mut bytes, ref mut excluded_len } => {
-                    let old_len = bytes.len();
-                    bytes.extend_from_slice(data);
-
-                    // We had problems in the past with tests failing only in some environments,
-                    // due to the length of the base path pushing the output size over the limit.
-                    //
-                    // To make those failures deterministic across all environments we ignore known
-                    // paths when calculating the string length, while still including the full
-                    // path in the output. This could result in some output being larger than the
-                    // threshold, but it's better than having nondeterministic failures.
-                    //
-                    // The compiler emitting only excluded strings is addressed by adding a
-                    // placeholder size for each excluded segment, which will eventually reach
-                    // the configured threshold.
-                    for pattern in exclude_from_len {
-                        let pattern_bytes = pattern.as_bytes();
-                        // We start matching `pattern_bytes - 1` into the previously loaded data,
-                        // to account for the fact a pattern might be included across multiple
-                        // `extend` calls. Starting from `- 1` avoids double-counting patterns.
-                        let matches = (&bytes[(old_len.saturating_sub(pattern_bytes.len() - 1))..])
-                            .windows(pattern_bytes.len())
-                            .filter(|window| window == &pattern_bytes)
-                            .count();
-                        *excluded_len += matches as isize
-                            * (EXCLUDED_PLACEHOLDER_LEN - pattern_bytes.len() as isize);
-                    }
-
-                    let new_len = bytes.len();
-                    if (new_len as isize + *excluded_len) as usize <= HEAD_LEN + TAIL_LEN {
-                        return;
-                    }
-
-                    let mut head = replace(bytes, Vec::new());
-                    let tail = head.split_off(new_len - TAIL_LEN).into_boxed_slice();
-                    let skipped = new_len - HEAD_LEN - TAIL_LEN;
-                    ProcOutput::Abbreviated { head, skipped, tail }
-                }
-                ProcOutput::Abbreviated { ref mut skipped, ref mut tail, .. } => {
-                    *skipped += data.len();
-                    if data.len() <= TAIL_LEN {
-                        tail[..data.len()].copy_from_slice(data);
-                        tail.rotate_left(data.len());
-                    } else {
-                        tail.copy_from_slice(&data[(data.len() - TAIL_LEN)..]);
-                    }
-                    return;
-                }
-            };
-            *self = new_self;
-        }
-
-        fn into_bytes(self) -> Vec<u8> {
-            match self {
-                ProcOutput::Full { bytes, .. } => bytes,
-                ProcOutput::Abbreviated { mut head, skipped, tail } => {
-                    write!(&mut head, "\n\n<<<<<< SKIPPED {} BYTES >>>>>>\n\n", skipped).unwrap();
-                    head.extend_from_slice(&tail);
-                    head
-                }
-            }
-        }
-    }
-
-    let mut stdout = ProcOutput::Full { bytes: Vec::new(), excluded_len: 0 };
-    let mut stderr = ProcOutput::Full { bytes: Vec::new(), excluded_len: 0 };
+    let mut stdout = ProcOutput::new();
+    let mut stderr = ProcOutput::new();
 
     drop(child.stdin.take());
     read2(
@@ -102,6 +24,86 @@ pub fn read2_abbreviated(mut child: Child, exclude_from_len: &[String]) -> io::R
     Ok(Output { status, stdout: stdout.into_bytes(), stderr: stderr.into_bytes() })
 }
 
+const HEAD_LEN: usize = 160 * 1024;
+const TAIL_LEN: usize = 256 * 1024;
+const EXCLUDED_PLACEHOLDER_LEN: isize = 32;
+
+enum ProcOutput {
+    Full { bytes: Vec<u8>, excluded_len: isize },
+    Abbreviated { head: Vec<u8>, skipped: usize, tail: Box<[u8]> },
+}
+
+impl ProcOutput {
+    fn new() -> Self {
+        ProcOutput::Full { bytes: Vec::new(), excluded_len: 0 }
+    }
+
+    fn extend(&mut self, data: &[u8], exclude_from_len: &[String]) {
+        let new_self = match *self {
+            ProcOutput::Full { ref mut bytes, ref mut excluded_len } => {
+                let old_len = bytes.len();
+                bytes.extend_from_slice(data);
+
+                // We had problems in the past with tests failing only in some environments,
+                // due to the length of the base path pushing the output size over the limit.
+                //
+                // To make those failures deterministic across all environments we ignore known
+                // paths when calculating the string length, while still including the full
+                // path in the output. This could result in some output being larger than the
+                // threshold, but it's better than having nondeterministic failures.
+                //
+                // The compiler emitting only excluded strings is addressed by adding a
+                // placeholder size for each excluded segment, which will eventually reach
+                // the configured threshold.
+                for pattern in exclude_from_len {
+                    let pattern_bytes = pattern.as_bytes();
+                    // We start matching `pattern_bytes - 1` into the previously loaded data,
+                    // to account for the fact a pattern might be included across multiple
+                    // `extend` calls. Starting from `- 1` avoids double-counting patterns.
+                    let matches = (&bytes[(old_len.saturating_sub(pattern_bytes.len() - 1))..])
+                        .windows(pattern_bytes.len())
+                        .filter(|window| window == &pattern_bytes)
+                        .count();
+                    *excluded_len += matches as isize
+                        * (EXCLUDED_PLACEHOLDER_LEN - pattern_bytes.len() as isize);
+                }
+
+                let new_len = bytes.len();
+                if (new_len as isize + *excluded_len) as usize <= HEAD_LEN + TAIL_LEN {
+                    return;
+                }
+
+                let mut head = replace(bytes, Vec::new());
+                let tail = head.split_off(new_len - TAIL_LEN).into_boxed_slice();
+                let skipped = new_len - HEAD_LEN - TAIL_LEN;
+                ProcOutput::Abbreviated { head, skipped, tail }
+            }
+            ProcOutput::Abbreviated { ref mut skipped, ref mut tail, .. } => {
+                *skipped += data.len();
+                if data.len() <= TAIL_LEN {
+                    tail[..data.len()].copy_from_slice(data);
+                    tail.rotate_left(data.len());
+                } else {
+                    tail.copy_from_slice(&data[(data.len() - TAIL_LEN)..]);
+                }
+                return;
+            }
+        };
+        *self = new_self;
+    }
+
+    fn into_bytes(self) -> Vec<u8> {
+        match self {
+            ProcOutput::Full { bytes, .. } => bytes,
+            ProcOutput::Abbreviated { mut head, skipped, tail } => {
+                write!(&mut head, "\n\n<<<<<< SKIPPED {} BYTES >>>>>>\n\n", skipped).unwrap();
+                head.extend_from_slice(&tail);
+                head
+            }
+        }
+    }
+}
+
 #[cfg(not(any(unix, windows)))]
 mod imp {
     use std::io::{self, Read};

From da139693f6375415567725b949c26ddb0367b73c Mon Sep 17 00:00:00 2001
From: Pietro Albini <pietro.albini@ferrous-systems.com>
Date: Mon, 2 May 2022 09:51:38 +0200
Subject: [PATCH 5/8] fix existing bug when the first write was always included
 in full

---
 src/tools/compiletest/src/read2.rs | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/tools/compiletest/src/read2.rs b/src/tools/compiletest/src/read2.rs
index 19c11a1e6b8f1..bd5f3593b6554 100644
--- a/src/tools/compiletest/src/read2.rs
+++ b/src/tools/compiletest/src/read2.rs
@@ -74,7 +74,8 @@ impl ProcOutput {
                 }
 
                 let mut head = replace(bytes, Vec::new());
-                let tail = head.split_off(new_len - TAIL_LEN).into_boxed_slice();
+                let mut middle = head.split_off(HEAD_LEN);
+                let tail = middle.split_off(middle.len() - TAIL_LEN).into_boxed_slice();
                 let skipped = new_len - HEAD_LEN - TAIL_LEN;
                 ProcOutput::Abbreviated { head, skipped, tail }
             }

From 95e1bd0df83e7595dfc0ef9d4415b7e601d8fa4f Mon Sep 17 00:00:00 2001
From: Pietro Albini <pietro.albini@ferrous-systems.com>
Date: Mon, 2 May 2022 10:09:08 +0200
Subject: [PATCH 6/8] add tests

---
 src/tools/compiletest/src/read2.rs       |   3 +
 src/tools/compiletest/src/read2/tests.rs | 123 +++++++++++++++++++++++
 2 files changed, 126 insertions(+)
 create mode 100644 src/tools/compiletest/src/read2/tests.rs

diff --git a/src/tools/compiletest/src/read2.rs b/src/tools/compiletest/src/read2.rs
index bd5f3593b6554..313508a796678 100644
--- a/src/tools/compiletest/src/read2.rs
+++ b/src/tools/compiletest/src/read2.rs
@@ -1,6 +1,9 @@
 // FIXME: This is a complete copy of `cargo/src/cargo/util/read2.rs`
 // Consider unify the read2() in libstd, cargo and this to prevent further code duplication.
 
+#[cfg(test)]
+mod tests;
+
 pub use self::imp::read2;
 use std::io::{self, Write};
 use std::mem::replace;
diff --git a/src/tools/compiletest/src/read2/tests.rs b/src/tools/compiletest/src/read2/tests.rs
new file mode 100644
index 0000000000000..d66b501fc2b9f
--- /dev/null
+++ b/src/tools/compiletest/src/read2/tests.rs
@@ -0,0 +1,123 @@
+use crate::read2::{ProcOutput, EXCLUDED_PLACEHOLDER_LEN, HEAD_LEN, TAIL_LEN};
+
+#[test]
+fn test_abbreviate_short_string() {
+    let mut out = ProcOutput::new();
+    out.extend(b"Hello world!", &[]);
+    assert_eq!(b"Hello world!", &*out.into_bytes());
+}
+
+#[test]
+fn test_abbreviate_short_string_multiple_steps() {
+    let mut out = ProcOutput::new();
+
+    out.extend(b"Hello ", &[]);
+    out.extend(b"world!", &[]);
+
+    assert_eq!(b"Hello world!", &*out.into_bytes());
+}
+
+#[test]
+fn test_abbreviate_long_string() {
+    let mut out = ProcOutput::new();
+
+    let data = vec![b'.'; HEAD_LEN + TAIL_LEN + 16];
+    out.extend(&data, &[]);
+
+    let mut expected = vec![b'.'; HEAD_LEN];
+    expected.extend_from_slice(b"\n\n<<<<<< SKIPPED 16 BYTES >>>>>>\n\n");
+    expected.extend_from_slice(&vec![b'.'; TAIL_LEN]);
+
+    // We first check the length to avoid endless terminal output if the length differs, since
+    // `out` is hundreds of KBs in size.
+    let out = out.into_bytes();
+    assert_eq!(expected.len(), out.len());
+    assert_eq!(expected, out);
+}
+
+#[test]
+fn test_abbreviate_long_string_multiple_steps() {
+    let mut out = ProcOutput::new();
+
+    out.extend(&vec![b'.'; HEAD_LEN], &[]);
+    out.extend(&vec![b'.'; TAIL_LEN], &[]);
+    // Also test whether the rotation works
+    out.extend(&vec![b'!'; 16], &[]);
+    out.extend(&vec![b'?'; 16], &[]);
+
+    let mut expected = vec![b'.'; HEAD_LEN];
+    expected.extend_from_slice(b"\n\n<<<<<< SKIPPED 32 BYTES >>>>>>\n\n");
+    expected.extend_from_slice(&vec![b'.'; TAIL_LEN - 32]);
+    expected.extend_from_slice(&vec![b'!'; 16]);
+    expected.extend_from_slice(&vec![b'?'; 16]);
+
+    // We first check the length to avoid endless terminal output if the length differs, since
+    // `out` is hundreds of KBs in size.
+    let out = out.into_bytes();
+    assert_eq!(expected.len(), out.len());
+    assert_eq!(expected, out);
+}
+
+#[test]
+fn test_abbreviate_exclusions_are_detected() {
+    let mut out = ProcOutput::new();
+    let exclusions = &["foo".to_string(), "quux".to_string()];
+
+    out.extend(b"Hello foo", exclusions);
+    // Check items from a previous extension are not double-counted.
+    out.extend(b"! This is a qu", exclusions);
+    // Check items are detected across extensions.
+    out.extend(b"ux.", exclusions);
+
+    match out {
+        ProcOutput::Full { excluded_len, .. } => assert_eq!(
+            excluded_len,
+            EXCLUDED_PLACEHOLDER_LEN * exclusions.len() as isize
+                - exclusions.iter().map(|i| i.len() as isize).sum::<isize>()
+        ),
+        ProcOutput::Abbreviated { .. } => panic!("out should not be abbreviated"),
+    }
+
+    assert_eq!(b"Hello foo! This is a quux.", &*out.into_bytes());
+}
+
+#[test]
+fn test_abbreviate_exclusions_avoid_abbreviations() {
+    let mut out = ProcOutput::new();
+    let exclusions = &[std::iter::repeat('a').take(64).collect::<String>()];
+
+    let mut expected = vec![b'.'; HEAD_LEN - EXCLUDED_PLACEHOLDER_LEN as usize];
+    expected.extend_from_slice(exclusions[0].as_bytes());
+    expected.extend_from_slice(&vec![b'.'; TAIL_LEN]);
+
+    out.extend(&expected, exclusions);
+
+    // We first check the length to avoid endless terminal output if the length differs, since
+    // `out` is hundreds of KBs in size.
+    let out = out.into_bytes();
+    assert_eq!(expected.len(), out.len());
+    assert_eq!(expected, out);
+}
+
+#[test]
+fn test_abbreviate_exclusions_can_still_cause_abbreviations() {
+    let mut out = ProcOutput::new();
+    let exclusions = &[std::iter::repeat('a').take(64).collect::<String>()];
+
+    let mut input = vec![b'.'; HEAD_LEN];
+    input.extend_from_slice(&vec![b'.'; TAIL_LEN]);
+    input.extend_from_slice(exclusions[0].as_bytes());
+
+    let mut expected = vec![b'.'; HEAD_LEN];
+    expected.extend_from_slice(b"\n\n<<<<<< SKIPPED 64 BYTES >>>>>>\n\n");
+    expected.extend_from_slice(&vec![b'.'; TAIL_LEN - 64]);
+    expected.extend_from_slice(&vec![b'a'; 64]);
+
+    out.extend(&input, exclusions);
+
+    // We first check the length to avoid endless terminal output if the length differs, since
+    // `out` is hundreds of KBs in size.
+    let out = out.into_bytes();
+    assert_eq!(expected.len(), out.len());
+    assert_eq!(expected, out);
+}

From e257f38160b361f738e9c42c3b76665fbbf13aa4 Mon Sep 17 00:00:00 2001
From: Pietro Albini <pietro.albini@ferrous-systems.com>
Date: Sat, 4 Jun 2022 19:24:41 +0200
Subject: [PATCH 7/8] address review comments

---
 src/tools/compiletest/src/read2.rs   | 47 +++++++++++++++++-----------
 src/tools/compiletest/src/runtest.rs | 12 +++----
 2 files changed, 35 insertions(+), 24 deletions(-)

diff --git a/src/tools/compiletest/src/read2.rs b/src/tools/compiletest/src/read2.rs
index 313508a796678..7640e65174428 100644
--- a/src/tools/compiletest/src/read2.rs
+++ b/src/tools/compiletest/src/read2.rs
@@ -9,7 +9,7 @@ use std::io::{self, Write};
 use std::mem::replace;
 use std::process::{Child, Output};
 
-pub fn read2_abbreviated(mut child: Child, exclude_from_len: &[String]) -> io::Result<Output> {
+pub fn read2_abbreviated(mut child: Child, filter_paths_from_len: &[String]) -> io::Result<Output> {
     let mut stdout = ProcOutput::new();
     let mut stderr = ProcOutput::new();
 
@@ -18,7 +18,7 @@ pub fn read2_abbreviated(mut child: Child, exclude_from_len: &[String]) -> io::R
         child.stdout.take().unwrap(),
         child.stderr.take().unwrap(),
         &mut |is_stdout, data, _| {
-            if is_stdout { &mut stdout } else { &mut stderr }.extend(data, exclude_from_len);
+            if is_stdout { &mut stdout } else { &mut stderr }.extend(data, filter_paths_from_len);
             data.clear();
         },
     )?;
@@ -29,23 +29,30 @@ pub fn read2_abbreviated(mut child: Child, exclude_from_len: &[String]) -> io::R
 
 const HEAD_LEN: usize = 160 * 1024;
 const TAIL_LEN: usize = 256 * 1024;
-const EXCLUDED_PLACEHOLDER_LEN: isize = 32;
+
+// Whenever a path is filtered when counting the length of the output, we need to add some
+// placeholder length to ensure a compiler emitting only filtered paths doesn't cause a OOM.
+//
+// 32 was chosen semi-arbitrarily: it was the highest power of two that still allowed the test
+// suite to pass at the moment of implementing path filtering.
+const FILTERED_PATHS_PLACEHOLDER_LEN: usize = 32;
 
 enum ProcOutput {
-    Full { bytes: Vec<u8>, excluded_len: isize },
+    Full { bytes: Vec<u8>, filtered_len: usize },
     Abbreviated { head: Vec<u8>, skipped: usize, tail: Box<[u8]> },
 }
 
 impl ProcOutput {
     fn new() -> Self {
-        ProcOutput::Full { bytes: Vec::new(), excluded_len: 0 }
+        ProcOutput::Full { bytes: Vec::new(), filtered_len: 0 }
     }
 
-    fn extend(&mut self, data: &[u8], exclude_from_len: &[String]) {
+    fn extend(&mut self, data: &[u8], filter_paths_from_len: &[String]) {
         let new_self = match *self {
-            ProcOutput::Full { ref mut bytes, ref mut excluded_len } => {
+            ProcOutput::Full { ref mut bytes, ref mut filtered_len } => {
                 let old_len = bytes.len();
                 bytes.extend_from_slice(data);
+                *filtered_len += data.len();
 
                 // We had problems in the past with tests failing only in some environments,
                 // due to the length of the base path pushing the output size over the limit.
@@ -58,21 +65,25 @@ impl ProcOutput {
                 // The compiler emitting only excluded strings is addressed by adding a
                 // placeholder size for each excluded segment, which will eventually reach
                 // the configured threshold.
-                for pattern in exclude_from_len {
-                    let pattern_bytes = pattern.as_bytes();
-                    // We start matching `pattern_bytes - 1` into the previously loaded data,
-                    // to account for the fact a pattern might be included across multiple
-                    // `extend` calls. Starting from `- 1` avoids double-counting patterns.
-                    let matches = (&bytes[(old_len.saturating_sub(pattern_bytes.len() - 1))..])
-                        .windows(pattern_bytes.len())
-                        .filter(|window| window == &pattern_bytes)
+                for path in filter_paths_from_len {
+                    let path_bytes = path.as_bytes();
+                    // We start matching `path_bytes - 1` into the previously loaded data,
+                    // to account for the fact a path_bytes might be included across multiple
+                    // `extend` calls. Starting from `- 1` avoids double-counting paths.
+                    let matches = (&bytes[(old_len.saturating_sub(path_bytes.len() - 1))..])
+                        .windows(path_bytes.len())
+                        .filter(|window| window == &path_bytes)
                         .count();
-                    *excluded_len += matches as isize
-                        * (EXCLUDED_PLACEHOLDER_LEN - pattern_bytes.len() as isize);
+                    *filtered_len -= matches * path_bytes.len();
+
+                    // We can't just remove the length of the filtered path from the output lenght,
+                    // otherwise a compiler emitting only filtered paths would OOM compiletest. Add
+                    // a fixed placeholder length for each path to prevent that.
+                    *filtered_len += matches * FILTERED_PATHS_PLACEHOLDER_LEN;
                 }
 
                 let new_len = bytes.len();
-                if (new_len as isize + *excluded_len) as usize <= HEAD_LEN + TAIL_LEN {
+                if *filtered_len <= HEAD_LEN + TAIL_LEN {
                     return;
                 }
 
diff --git a/src/tools/compiletest/src/runtest.rs b/src/tools/compiletest/src/runtest.rs
index fe8d451da4914..1d39d24524803 100644
--- a/src/tools/compiletest/src/runtest.rs
+++ b/src/tools/compiletest/src/runtest.rs
@@ -1736,25 +1736,25 @@ impl<'test> TestCx<'test> {
     }
 
     fn read2_abbreviated(&self, child: Child) -> Output {
-        let mut exclude_from_len = Vec::new();
+        let mut filter_paths_from_len = Vec::new();
         let mut add_path = |path: &Path| {
             let path = path.display().to_string();
             let windows = path.replace("\\", "\\\\");
             if windows != path {
-                exclude_from_len.push(windows);
+                filter_paths_from_len.push(windows);
             }
-            exclude_from_len.push(path);
+            filter_paths_from_len.push(path);
         };
 
-        // List of strings that will not be measured when determining whether the output is larger
+        // List of paths that will not be measured when determining whether the output is larger
         // than the output truncation threshold.
         //
-        // Note: avoid adding a subdirectory of an already excluded directory here, otherwise the
+        // Note: avoid adding a subdirectory of an already filtered directory here, otherwise the
         // same slice of text will be double counted and the truncation might not happen.
         add_path(&self.config.src_base);
         add_path(&self.config.build_base);
 
-        read2_abbreviated(child, &exclude_from_len).expect("failed to read output")
+        read2_abbreviated(child, &filter_paths_from_len).expect("failed to read output")
     }
 
     fn compose_and_run(

From 8ea95988df4d493ec3f556ab0de6b0b812ed27be Mon Sep 17 00:00:00 2001
From: Pietro Albini <pietro.albini@ferrous-systems.com>
Date: Sat, 4 Jun 2022 19:38:51 +0200
Subject: [PATCH 8/8] update tests

---
 src/tools/compiletest/src/read2/tests.rs | 40 ++++++++++++------------
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/src/tools/compiletest/src/read2/tests.rs b/src/tools/compiletest/src/read2/tests.rs
index d66b501fc2b9f..1ca682a46aaaf 100644
--- a/src/tools/compiletest/src/read2/tests.rs
+++ b/src/tools/compiletest/src/read2/tests.rs
@@ -1,4 +1,4 @@
-use crate::read2::{ProcOutput, EXCLUDED_PLACEHOLDER_LEN, HEAD_LEN, TAIL_LEN};
+use crate::read2::{ProcOutput, FILTERED_PATHS_PLACEHOLDER_LEN, HEAD_LEN, TAIL_LEN};
 
 #[test]
 fn test_abbreviate_short_string() {
@@ -59,21 +59,21 @@ fn test_abbreviate_long_string_multiple_steps() {
 }
 
 #[test]
-fn test_abbreviate_exclusions_are_detected() {
+fn test_abbreviate_filterss_are_detected() {
     let mut out = ProcOutput::new();
-    let exclusions = &["foo".to_string(), "quux".to_string()];
+    let filters = &["foo".to_string(), "quux".to_string()];
 
-    out.extend(b"Hello foo", exclusions);
+    out.extend(b"Hello foo", filters);
     // Check items from a previous extension are not double-counted.
-    out.extend(b"! This is a qu", exclusions);
+    out.extend(b"! This is a qu", filters);
     // Check items are detected across extensions.
-    out.extend(b"ux.", exclusions);
+    out.extend(b"ux.", filters);
 
-    match out {
-        ProcOutput::Full { excluded_len, .. } => assert_eq!(
-            excluded_len,
-            EXCLUDED_PLACEHOLDER_LEN * exclusions.len() as isize
-                - exclusions.iter().map(|i| i.len() as isize).sum::<isize>()
+    match &out {
+        ProcOutput::Full { bytes, filtered_len } => assert_eq!(
+            *filtered_len,
+            bytes.len() + FILTERED_PATHS_PLACEHOLDER_LEN * filters.len()
+                - filters.iter().map(|i| i.len()).sum::<usize>()
         ),
         ProcOutput::Abbreviated { .. } => panic!("out should not be abbreviated"),
     }
@@ -82,15 +82,15 @@ fn test_abbreviate_exclusions_are_detected() {
 }
 
 #[test]
-fn test_abbreviate_exclusions_avoid_abbreviations() {
+fn test_abbreviate_filters_avoid_abbreviations() {
     let mut out = ProcOutput::new();
-    let exclusions = &[std::iter::repeat('a').take(64).collect::<String>()];
+    let filters = &[std::iter::repeat('a').take(64).collect::<String>()];
 
-    let mut expected = vec![b'.'; HEAD_LEN - EXCLUDED_PLACEHOLDER_LEN as usize];
-    expected.extend_from_slice(exclusions[0].as_bytes());
+    let mut expected = vec![b'.'; HEAD_LEN - FILTERED_PATHS_PLACEHOLDER_LEN as usize];
+    expected.extend_from_slice(filters[0].as_bytes());
     expected.extend_from_slice(&vec![b'.'; TAIL_LEN]);
 
-    out.extend(&expected, exclusions);
+    out.extend(&expected, filters);
 
     // We first check the length to avoid endless terminal output if the length differs, since
     // `out` is hundreds of KBs in size.
@@ -100,20 +100,20 @@ fn test_abbreviate_exclusions_avoid_abbreviations() {
 }
 
 #[test]
-fn test_abbreviate_exclusions_can_still_cause_abbreviations() {
+fn test_abbreviate_filters_can_still_cause_abbreviations() {
     let mut out = ProcOutput::new();
-    let exclusions = &[std::iter::repeat('a').take(64).collect::<String>()];
+    let filters = &[std::iter::repeat('a').take(64).collect::<String>()];
 
     let mut input = vec![b'.'; HEAD_LEN];
     input.extend_from_slice(&vec![b'.'; TAIL_LEN]);
-    input.extend_from_slice(exclusions[0].as_bytes());
+    input.extend_from_slice(filters[0].as_bytes());
 
     let mut expected = vec![b'.'; HEAD_LEN];
     expected.extend_from_slice(b"\n\n<<<<<< SKIPPED 64 BYTES >>>>>>\n\n");
     expected.extend_from_slice(&vec![b'.'; TAIL_LEN - 64]);
     expected.extend_from_slice(&vec![b'a'; 64]);
 
-    out.extend(&input, exclusions);
+    out.extend(&input, filters);
 
     // We first check the length to avoid endless terminal output if the length differs, since
     // `out` is hundreds of KBs in size.