rust-lang-ci
diff --git a/‎Cargo.lock
+1 b/‎Cargo.lock
+1
diff --git a/‎src/bootstrap/src/core/build_steps/check.rs
+67 b/‎src/bootstrap/src/core/build_steps/check.rs
+67
diff --git a/‎src/bootstrap/src/core/build_steps/run.rs
+28 b/‎src/bootstrap/src/core/build_steps/run.rs
+28
diff --git a/‎src/bootstrap/src/core/build_steps/test.rs
+1 b/‎src/bootstrap/src/core/build_steps/test.rs
+1
diff --git a/‎src/bootstrap/src/core/builder/mod.rs
+2 b/‎src/bootstrap/src/core/builder/mod.rs
+2
diff --git a/‎src/tools/coverage-dump/Cargo.toml
+1 b/‎src/tools/coverage-dump/Cargo.toml
+1
diff --git a/‎src/tools/coverage-dump/src/covfun.rs
+57-29 b/‎src/tools/coverage-dump/src/covfun.rs
+57-29
diff --git a/‎src/tools/coverage-dump/src/covfun/tests.rs
+53 b/‎src/tools/coverage-dump/src/covfun/tests.rs
+53
diff --git a/‎src/tools/coverage-dump/src/covmap.rs
+75 b/‎src/tools/coverage-dump/src/covmap.rs
+75
@@ -777,6 +777,7 @@ name = "coverage-dump"
 version = "0.1.0"
 dependencies = [
  "anyhow",
+ "itertools",
  "leb128",
  "md-5",
  "miniz_oxide 0.7.4",
 
@@ -527,3 +527,70 @@ tool_check_step!(Bootstrap { path: "src/bootstrap", default: false });
 // `run-make-support` will be built as part of suitable run-make compiletest test steps, but support
 // check to make it easier to work on.
 tool_check_step!(RunMakeSupport { path: "src/tools/run-make-support", default: false });
+
+/// Check step for the `coverage-dump` bootstrap tool. The coverage-dump tool
+/// is used internally by coverage tests.
+///
+/// FIXME(Zalathar): This is temporarily separate from the other tool check
+/// steps so that it can use the stage 0 compiler instead of `top_stage`,
+/// without introducing conflicts with the stage 0 redesign (#119899).
+///
+/// After the stage 0 redesign lands, we can look into using the stage 0
+/// compiler to check all bootstrap tools (#139170).
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub(crate) struct CoverageDump;
+
+impl CoverageDump {
+    const PATH: &str = "src/tools/coverage-dump";
+}
+
+impl Step for CoverageDump {
+    type Output = ();
+
+    /// Most contributors won't care about coverage-dump, so don't make their
+    /// check builds slower unless they opt in and check it explicitly.
+    const DEFAULT: bool = false;
+    const ONLY_HOSTS: bool = true;
+
+    fn should_run(run: ShouldRun<'_>) -> ShouldRun<'_> {
+        run.path(Self::PATH)
+    }
+
+    fn make_run(run: RunConfig<'_>) {
+        run.builder.ensure(Self {});
+    }
+
+    fn run(self, builder: &Builder<'_>) -> Self::Output {
+        // Make sure we haven't forgotten any fields, if there are any.
+        let Self {} = self;
+        let display_name = "coverage-dump";
+        let host = builder.config.build;
+        let target = host;
+        let mode = Mode::ToolBootstrap;
+
+        let compiler = builder.compiler(0, host);
+        let cargo = prepare_tool_cargo(
+            builder,
+            compiler,
+            mode,
+            target,
+            builder.kind,
+            Self::PATH,
+            SourceType::InTree,
+            &[],
+        );
+
+        let stamp = BuildStamp::new(&builder.cargo_out(compiler, mode, target))
+            .with_prefix(&format!("{display_name}-check"));
+
+        let _guard = builder.msg_tool(
+            builder.kind,
+            mode,
+            display_name,
+            compiler.stage,
+            &compiler.host,
+            &target,
+        );
+        run_cargo(builder, cargo, builder.config.free_args.clone(), &stamp, vec![], true, false);
+    }
+}
@@ -392,3 +392,31 @@ impl Step for CyclicStep {
         builder.ensure(CyclicStep { n: self.n.saturating_sub(1) })
     }
 }
+
+/// Step to manually run the coverage-dump tool (`./x run coverage-dump`).
+///
+/// The coverage-dump tool is an internal detail of coverage tests, so this run
+/// step is only needed when testing coverage-dump manually.
+#[derive(Debug, PartialOrd, Ord, Clone, Hash, PartialEq, Eq)]
+pub struct CoverageDump;
+
+impl Step for CoverageDump {
+    type Output = ();
+
+    const DEFAULT: bool = false;
+    const ONLY_HOSTS: bool = true;
+
+    fn should_run(run: ShouldRun<'_>) -> ShouldRun<'_> {
+        run.path("src/tools/coverage-dump")
+    }
+
+    fn make_run(run: RunConfig<'_>) {
+        run.builder.ensure(Self {});
+    }
+
+    fn run(self, builder: &Builder<'_>) {
+        let mut cmd = builder.tool_cmd(Tool::CoverageDump);
+        cmd.args(&builder.config.free_args);
+        cmd.run(builder);
+    }
+}
@@ -54,6 +54,7 @@ impl Step for CrateBootstrap {
         run.path("src/tools/jsondoclint")
             .path("src/tools/suggest-tests")
             .path("src/tools/replace-version-placeholder")
+            .path("src/tools/coverage-dump")
             // We want `./x test tidy` to _run_ the tidy tool, not its tests.
             // So we need a separate alias to test the tidy tool itself.
             .alias("tidyselftest")
 
@@ -961,6 +961,7 @@ impl<'a> Builder<'a> {
                 check::RunMakeSupport,
                 check::Compiletest,
                 check::FeaturesStatusDump,
+                check::CoverageDump,
             ),
             Kind::Test => describe!(
                 crate::core::build_steps::toolstate::ToolStateCheck,
@@ -1114,6 +1115,7 @@ impl<'a> Builder<'a> {
                 run::UnicodeTableGenerator,
                 run::FeaturesStatusDump,
                 run::CyclicStep,
+                run::CoverageDump,
             ),
             Kind::Setup => {
                 describe!(setup::Profile, setup::Hook, setup::Link, setup::Editor)
 
@@ -7,6 +7,7 @@ edition = "2021"
 
 [dependencies]
 anyhow = "1.0.71"
+itertools = "0.12"
 leb128 = "0.2.5"
 md5 = { package = "md-5" , version = "0.10.5" }
 miniz_oxide = "0.7.1"
 
@@ -1,23 +1,33 @@
 use std::collections::HashMap;
 use std::fmt::{self, Debug, Write as _};
-use std::sync::OnceLock;
+use std::sync::LazyLock;
 
-use anyhow::{Context, anyhow};
+use anyhow::{Context, anyhow, bail, ensure};
+use itertools::Itertools;
 use regex::Regex;
 
-use crate::parser::{Parser, unescape_llvm_string_contents};
+use crate::covmap::FilenameTables;
+use crate::llvm_utils::unescape_llvm_string_contents;
+use crate::parser::Parser;
+
+#[cfg(test)]
+mod tests;
 
 pub(crate) fn dump_covfun_mappings(
     llvm_ir: &str,
+    filename_tables: &FilenameTables,
     function_names: &HashMap<u64, String>,
 ) -> anyhow::Result<()> {
     // Extract function coverage entries from the LLVM IR assembly, and associate
     // each entry with its (demangled) name.
     let mut covfun_entries = llvm_ir
         .lines()
-        .filter_map(covfun_line_data)
-        .map(|line_data| (function_names.get(&line_data.name_hash).map(String::as_str), line_data))
-        .collect::<Vec<_>>();
+        .filter(|line| is_covfun_line(line))
+        .map(parse_covfun_line)
+        .map_ok(|line_data| {
+            (function_names.get(&line_data.name_hash).map(String::as_str), line_data)
+        })
+        .collect::<Result<Vec<_>, _>>()?;
     covfun_entries.sort_by(|a, b| {
         // Sort entries primarily by name, to help make the order consistent
         // across platforms and relatively insensitive to changes.
@@ -41,8 +51,12 @@ pub(crate) fn dump_covfun_mappings(
         println!("Number of files: {num_files}");
 
         for i in 0..num_files {
-            let global_file_id = parser.read_uleb128_u32()?;
-            println!("- file {i} => global file {global_file_id}");
+            let global_file_id = parser.read_uleb128_usize()?;
+            let &CovfunLineData { filenames_hash, .. } = line_data;
+            let Some(filename) = filename_tables.lookup(filenames_hash, global_file_id) else {
+                bail!("couldn't resolve global file: {filenames_hash}, {global_file_id}");
+            };
+            println!("- file {i} => {filename}");
         }
 
         let num_expressions = parser.read_uleb128_u32()?;
@@ -107,36 +121,50 @@ pub(crate) fn dump_covfun_mappings(
     Ok(())
 }
 
+#[derive(Debug, PartialEq, Eq)]
 struct CovfunLineData {
-    name_hash: u64,
     is_used: bool,
+    name_hash: u64,
+    filenames_hash: u64,
     payload: Vec<u8>,
 }
 
-/// Checks a line of LLVM IR assembly to see if it contains an `__llvm_covfun`
-/// entry, and if so extracts relevant data in a `CovfunLineData`.
-fn covfun_line_data(line: &str) -> Option<CovfunLineData> {
-    let re = {
-        // We cheat a little bit and match variable names `@__covrec_[HASH]u`
-        // rather than the section name, because the section name is harder to
-        // extract and differs across Linux/Windows/macOS. We also extract the
-        // symbol name hash from the variable name rather than the data, since
-        // it's easier and both should match.
-        static RE: OnceLock<Regex> = OnceLock::new();
-        RE.get_or_init(|| {
-            Regex::new(
-                r#"^@__covrec_(?<name_hash>[0-9A-Z]+)(?<is_used>u)? = .*\[[0-9]+ x i8\] c"(?<payload>[^"]*)".*$"#,
-            )
-            .unwrap()
-        })
-    };
+fn is_covfun_line(line: &str) -> bool {
+    line.starts_with("@__covrec_")
+}
 
-    let captures = re.captures(line)?;
-    let name_hash = u64::from_str_radix(&captures["name_hash"], 16).unwrap();
+/// Given a line of LLVM IR assembly that should contain an `__llvm_covfun`
+/// entry, parses it to extract relevant data in a `CovfunLineData`.
+fn parse_covfun_line(line: &str) -> anyhow::Result<CovfunLineData> {
+    ensure!(is_covfun_line(line));
+
+    // We cheat a little bit and match variable names `@__covrec_[HASH]u`
+    // rather than the section name, because the section name is harder to
+    // extract and differs across Linux/Windows/macOS.
+    const RE_STRING: &str = r#"(?x)^
+        @__covrec_[0-9A-Z]+(?<is_used>u)?
+        \ = \ # (trailing space)
+        .*
+        <\{
+            \ i64 \ (?<name_hash> -? [0-9]+),
+            \ i32 \ -? [0-9]+, # (length of payload; currently unused)
+            \ i64 \ -? [0-9]+, # (source hash; currently unused)
+            \ i64 \ (?<filenames_hash> -? [0-9]+),
+            \ \[ [0-9]+ \ x \ i8 \] \ c"(?<payload>[^"]*)"
+            \ # (trailing space)
+        }>
+        .*$
+    "#;
+    static RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(RE_STRING).unwrap());
+
+    let captures =
+        RE.captures(line).with_context(|| format!("couldn't parse covfun line: {line:?}"))?;
     let is_used = captures.name("is_used").is_some();
+    let name_hash = i64::from_str_radix(&captures["name_hash"], 10).unwrap() as u64;
+    let filenames_hash = i64::from_str_radix(&captures["filenames_hash"], 10).unwrap() as u64;
     let payload = unescape_llvm_string_contents(&captures["payload"]);
 
-    Some(CovfunLineData { name_hash, is_used, payload })
+    Ok(CovfunLineData { is_used, name_hash, filenames_hash, payload })
 }
 
 // Extra parser methods only needed when parsing `covfun` payloads.
 
@@ -0,0 +1,53 @@
+use super::{CovfunLineData, parse_covfun_line};
+
+/// Integers in LLVM IR are not inherently signed/unsigned, and the text format tends
+/// to emit them in signed form, so this helper function converts `i64` to `u64`.
+fn as_u64(x: i64) -> u64 {
+    x as u64
+}
+
+#[test]
+fn parse_covfun_line_data() {
+    struct Case {
+        line: &'static str,
+        expected: CovfunLineData,
+    }
+    let cases = &[
+        // Copied from `trivial.ll`:
+        Case {
+            line: r#"@__covrec_49A9BAAE5F896E81u = linkonce_odr hidden constant <{ i64, i32, i64, i64, [9 x i8] }> <{ i64 5307978893922758273, i32 9, i64 445092354169400020, i64 6343436898695299756, [9 x i8] c"\01\01\00\01\01\03\01\00\0D" }>, section "__LLVM_COV,__llvm_covfun", align 8"#,
+            expected: CovfunLineData {
+                is_used: true,
+                name_hash: as_u64(5307978893922758273),
+                filenames_hash: as_u64(6343436898695299756),
+                payload: b"\x01\x01\x00\x01\x01\x03\x01\x00\x0D".to_vec(),
+            },
+        },
+        // Copied from `on-off-sandwich.ll`:
+        Case {
+            line: r#"@__covrec_D0CE53C5E64F319Au = linkonce_odr hidden constant <{ i64, i32, i64, i64, [14 x i8] }> <{ i64 -3400688559180533350, i32 14, i64 7307957714577672185, i64 892196767019953100, [14 x i8] c"\01\01\00\02\01\10\05\02\10\01\07\05\00\06" }>, section "__LLVM_COV,__llvm_covfun", align 8"#,
+            expected: CovfunLineData {
+                is_used: true,
+                name_hash: as_u64(-3400688559180533350),
+                filenames_hash: as_u64(892196767019953100),
+                payload: b"\x01\x01\x00\x02\x01\x10\x05\x02\x10\x01\x07\x05\x00\x06".to_vec(),
+            },
+        },
+        // Copied from `no-core.ll`:
+        Case {
+            line: r#"@__covrec_F8016FC82D46106u = linkonce_odr hidden constant <{ i64, i32, i64, i64, [9 x i8] }> <{ i64 1116917981370409222, i32 9, i64 -8857254680411629915, i64 -3625186110715410276, [9 x i8] c"\01\01\00\01\01\0C\01\00\0D" }>, section "__LLVM_COV,__llvm_covfun", align 8"#,
+            expected: CovfunLineData {
+                is_used: true,
+                name_hash: as_u64(1116917981370409222),
+                filenames_hash: as_u64(-3625186110715410276),
+                payload: b"\x01\x01\x00\x01\x01\x0C\x01\x00\x0D".to_vec(),
+            },
+        },
+    ];
+
+    for &Case { line, ref expected } in cases {
+        println!("- {line}");
+        let line_data = parse_covfun_line(line).map_err(|e| e.to_string());
+        assert_eq!(line_data.as_ref(), Ok(expected));
+    }
+}
@@ -0,0 +1,75 @@
+use std::collections::HashMap;
+use std::sync::LazyLock;
+
+use anyhow::{Context, ensure};
+use regex::Regex;
+
+use crate::llvm_utils::{truncated_md5, unescape_llvm_string_contents};
+use crate::parser::Parser;
+
+#[derive(Debug, Default)]
+pub(crate) struct FilenameTables {
+    map: HashMap<u64, Vec<String>>,
+}
+
+impl FilenameTables {
+    pub(crate) fn lookup(&self, filenames_hash: u64, global_file_id: usize) -> Option<&str> {
+        let table = self.map.get(&filenames_hash)?;
+        let filename = table.get(global_file_id)?;
+        Some(filename)
+    }
+}
+
+struct CovmapLineData {
+    payload: Vec<u8>,
+}
+
+pub(crate) fn make_filename_tables(llvm_ir: &str) -> anyhow::Result<FilenameTables> {
+    let mut map = HashMap::default();
+
+    for line in llvm_ir.lines().filter(|line| is_covmap_line(line)) {
+        let CovmapLineData { payload } = parse_covmap_line(line)?;
+
+        let mut parser = Parser::new(&payload);
+        let n_filenames = parser.read_uleb128_usize()?;
+        let uncompressed_bytes = parser.read_chunk_to_uncompressed_bytes()?;
+        parser.ensure_empty()?;
+
+        let mut filenames_table = vec![];
+
+        let mut parser = Parser::new(&uncompressed_bytes);
+        for _ in 0..n_filenames {
+            let len = parser.read_uleb128_usize()?;
+            let bytes = parser.read_n_bytes(len)?;
+            let filename = str::from_utf8(bytes)?;
+            filenames_table.push(filename.to_owned());
+        }
+
+        let filenames_hash = truncated_md5(&payload);
+        map.insert(filenames_hash, filenames_table);
+    }
+
+    Ok(FilenameTables { map })
+}
+
+fn is_covmap_line(line: &str) -> bool {
+    line.starts_with("@__llvm_coverage_mapping ")
+}
+
+fn parse_covmap_line(line: &str) -> anyhow::Result<CovmapLineData> {
+    ensure!(is_covmap_line(line));
+
+    const RE_STRING: &str = r#"(?x)^
+        @__llvm_coverage_mapping \ =
+        .*
+        \[ [0-9]+ \ x \ i8 \] \ c"(?<payload>[^"]*)"
+        .*$
+    "#;
+    static RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(RE_STRING).unwrap());
+
+    let captures =
+        RE.captures(line).with_context(|| format!("couldn't parse covmap line: {line:?}"))?;
+    let payload = unescape_llvm_string_contents(&captures["payload"]);
+
+    Ok(CovmapLineData { payload })
+}