Rollup merge of #106978 - mejrs:mir_build3, r=davidtwco

[rust.git] / src / tools / compiletest / src / read2.rs
diff --git a/src/tools/compiletest/src/read2.rs b/src/tools/compiletest/src/read2.rs

index 897b9dd4007939b07acded6460dc01737eb7c4d8..a5dc6859732a3d2c5348fb8084e00e0afbd876a5 100644 (file)
--- a/src/tools/compiletest/src/read2.rs
+++ b/src/tools/compiletest/src/read2.rs
@@ -1,77 +1,131 @@
  // FIXME: This is a complete copy of `cargo/src/cargo/util/read2.rs`
  // Consider unify the read2() in libstd, cargo and this to prevent further code duplication.
  
+#[cfg(test)]
+mod tests;
+
  pub use self::imp::read2;
-use std::io;
+use std::io::{self, Write};
+use std::mem::replace;
  use std::process::{Child, Output};
  
-pub fn read2_abbreviated(mut child: Child) -> io::Result<Output> {
-    use io::Write;
-    use std::mem::replace;
+pub fn read2_abbreviated(mut child: Child, filter_paths_from_len: &[String]) -> io::Result<Output> {
+    let mut stdout = ProcOutput::new();
+    let mut stderr = ProcOutput::new();
  
-    const HEAD_LEN: usize = 160 * 1024;
-    const TAIL_LEN: usize = 256 * 1024;
+    drop(child.stdin.take());
+    read2(
+        child.stdout.take().unwrap(),
+        child.stderr.take().unwrap(),
+        &mut |is_stdout, data, _| {
+            if is_stdout { &mut stdout } else { &mut stderr }.extend(data, filter_paths_from_len);
+            data.clear();
+        },
+    )?;
+    let status = child.wait()?;
  
-    enum ProcOutput {
-        Full(Vec<u8>),
-        Abbreviated { head: Vec<u8>, skipped: usize, tail: Box<[u8]> },
+    Ok(Output { status, stdout: stdout.into_bytes(), stderr: stderr.into_bytes() })
+}
+
+const HEAD_LEN: usize = 160 * 1024;
+const TAIL_LEN: usize = 256 * 1024;
+
+// Whenever a path is filtered when counting the length of the output, we need to add some
+// placeholder length to ensure a compiler emitting only filtered paths doesn't cause a OOM.
+//
+// 32 was chosen semi-arbitrarily: it was the highest power of two that still allowed the test
+// suite to pass at the moment of implementing path filtering.
+const FILTERED_PATHS_PLACEHOLDER_LEN: usize = 32;
+
+enum ProcOutput {
+    Full { bytes: Vec<u8>, filtered_len: usize },
+    Abbreviated { head: Vec<u8>, skipped: usize, tail: Box<[u8]> },
+}
+
+impl ProcOutput {
+    fn new() -> Self {
+        ProcOutput::Full { bytes: Vec::new(), filtered_len: 0 }
      }
  
-    impl ProcOutput {
-        fn extend(&mut self, data: &[u8]) {
-            let new_self = match *self {
-                ProcOutput::Full(ref mut bytes) => {
-                    bytes.extend_from_slice(data);
-                    let new_len = bytes.len();
-                    if new_len <= HEAD_LEN + TAIL_LEN {
-                        return;
-                    }
-                    let tail = bytes.split_off(new_len - TAIL_LEN).into_boxed_slice();
-                    let head = replace(bytes, Vec::new());
-                    let skipped = new_len - HEAD_LEN - TAIL_LEN;
-                    ProcOutput::Abbreviated { head, skipped, tail }
+    fn extend(&mut self, data: &[u8], filter_paths_from_len: &[String]) {
+        let new_self = match *self {
+            ProcOutput::Full { ref mut bytes, ref mut filtered_len } => {
+                let old_len = bytes.len();
+                bytes.extend_from_slice(data);
+                *filtered_len += data.len();
+
+                // We had problems in the past with tests failing only in some environments,
+                // due to the length of the base path pushing the output size over the limit.
+                //
+                // To make those failures deterministic across all environments we ignore known
+                // paths when calculating the string length, while still including the full
+                // path in the output. This could result in some output being larger than the
+                // threshold, but it's better than having nondeterministic failures.
+                //
+                // The compiler emitting only excluded strings is addressed by adding a
+                // placeholder size for each excluded segment, which will eventually reach
+                // the configured threshold.
+                for path in filter_paths_from_len {
+                    let path_bytes = path.as_bytes();
+                    // We start matching `path_bytes - 1` into the previously loaded data,
+                    // to account for the fact a path_bytes might be included across multiple
+                    // `extend` calls. Starting from `- 1` avoids double-counting paths.
+                    let matches = (&bytes[(old_len.saturating_sub(path_bytes.len() - 1))..])
+                        .windows(path_bytes.len())
+                        .filter(|window| window == &path_bytes)
+                        .count();
+                    *filtered_len -= matches * path_bytes.len();
+
+                    // We can't just remove the length of the filtered path from the output lenght,
+                    // otherwise a compiler emitting only filtered paths would OOM compiletest. Add
+                    // a fixed placeholder length for each path to prevent that.
+                    *filtered_len += matches * FILTERED_PATHS_PLACEHOLDER_LEN;
                  }
-                ProcOutput::Abbreviated { ref mut skipped, ref mut tail, .. } => {
-                    *skipped += data.len();
-                    if data.len() <= TAIL_LEN {
-                        tail[..data.len()].copy_from_slice(data);
-                        tail.rotate_left(data.len());
-                    } else {
-                        tail.copy_from_slice(&data[(data.len() - TAIL_LEN)..]);
-                    }
+
+                let new_len = bytes.len();
+                if *filtered_len <= HEAD_LEN + TAIL_LEN {
                      return;
                  }
-            };
-            *self = new_self;
-        }
  
-        fn into_bytes(self) -> Vec<u8> {
-            match self {
-                ProcOutput::Full(bytes) => bytes,
-                ProcOutput::Abbreviated { mut head, skipped, tail } => {
-                    write!(&mut head, "\n\n<<<<<< SKIPPED {} BYTES >>>>>>\n\n", skipped).unwrap();
-                    head.extend_from_slice(&tail);
-                    head
+                let mut head = replace(bytes, Vec::new());
+                let mut middle = head.split_off(HEAD_LEN);
+                let tail = middle.split_off(middle.len() - TAIL_LEN).into_boxed_slice();
+                let skipped = new_len - HEAD_LEN - TAIL_LEN;
+                ProcOutput::Abbreviated { head, skipped, tail }
+            }
+            ProcOutput::Abbreviated { ref mut skipped, ref mut tail, .. } => {
+                *skipped += data.len();
+                if data.len() <= TAIL_LEN {
+                    tail[..data.len()].copy_from_slice(data);
+                    tail.rotate_left(data.len());
+                } else {
+                    tail.copy_from_slice(&data[(data.len() - TAIL_LEN)..]);
                  }
+                return;
              }
-        }
+        };
+        *self = new_self;
      }
  
-    let mut stdout = ProcOutput::Full(Vec::new());
-    let mut stderr = ProcOutput::Full(Vec::new());
+    fn into_bytes(self) -> Vec<u8> {
+        match self {
+            ProcOutput::Full { bytes, .. } => bytes,
+            ProcOutput::Abbreviated { mut head, mut skipped, tail } => {
+                let mut tail = &*tail;
  
-    drop(child.stdin.take());
-    read2(
-        child.stdout.take().unwrap(),
-        child.stderr.take().unwrap(),
-        &mut |is_stdout, data, _| {
-            if is_stdout { &mut stdout } else { &mut stderr }.extend(data);
-            data.clear();
-        },
-    )?;
-    let status = child.wait()?;
+                // Skip over '{' at the start of the tail, so we don't later wrongfully consider this as json.
+                // See <https://rust-lang.zulipchat.com/#narrow/stream/182449-t-compiler.2Fhelp/topic/Weird.20CI.20failure/near/321797811>
+                while tail.get(0) == Some(&b'{') {
+                    tail = &tail[1..];
+                    skipped += 1;
+                }
  
-    Ok(Output { status, stdout: stdout.into_bytes(), stderr: stderr.into_bytes() })
+                write!(&mut head, "\n\n<<<<<< SKIPPED {} BYTES >>>>>>\n\n", skipped).unwrap();
+                head.extend_from_slice(tail);
+                head
+            }
+        }
+    }
  }
  
  #[cfg(not(any(unix, windows)))]