Skip to content

Commit 4d20660

Browse files
authored
Unrolled build for rust-lang#140251
Rollup merge of rust-lang#140251 - Zalathar:coverage-dump-path, r=clubby789 coverage-dump: Resolve global file IDs to filenames The coverage-dump tool, used by coverage tests, currently includes “global file ID” numbers in its dump output. This PR adds support for parsing coverage filename information from LLVM assembly `.ll` files, and resolving those file IDs to the corresponding filename, for inclusion in dump output. This makes dump output more informative, especially for test cases involving multiple files, and will be important for testing expansion region support in the future. --- The bootstrap changes don't necessarily have to land at the same time (e.g. they could be deferred to after the stage0 redesign if requested), but I would prefer to land them now if possible.
2 parents 7295b08 + c53a767 commit 4d20660

File tree

105 files changed

+684
-410
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

105 files changed

+684
-410
lines changed

Cargo.lock

+1
Original file line numberDiff line numberDiff line change
@@ -777,6 +777,7 @@ name = "coverage-dump"
777777
version = "0.1.0"
778778
dependencies = [
779779
"anyhow",
780+
"itertools",
780781
"leb128",
781782
"md-5",
782783
"miniz_oxide 0.7.4",

src/bootstrap/src/core/build_steps/check.rs

+67
Original file line numberDiff line numberDiff line change
@@ -527,3 +527,70 @@ tool_check_step!(Bootstrap { path: "src/bootstrap", default: false });
527527
// `run-make-support` will be built as part of suitable run-make compiletest test steps, but support
528528
// check to make it easier to work on.
529529
tool_check_step!(RunMakeSupport { path: "src/tools/run-make-support", default: false });
530+
531+
/// Check step for the `coverage-dump` bootstrap tool. The coverage-dump tool
532+
/// is used internally by coverage tests.
533+
///
534+
/// FIXME(Zalathar): This is temporarily separate from the other tool check
535+
/// steps so that it can use the stage 0 compiler instead of `top_stage`,
536+
/// without introducing conflicts with the stage 0 redesign (#119899).
537+
///
538+
/// After the stage 0 redesign lands, we can look into using the stage 0
539+
/// compiler to check all bootstrap tools (#139170).
540+
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
541+
pub(crate) struct CoverageDump;
542+
543+
impl CoverageDump {
544+
const PATH: &str = "src/tools/coverage-dump";
545+
}
546+
547+
impl Step for CoverageDump {
548+
type Output = ();
549+
550+
/// Most contributors won't care about coverage-dump, so don't make their
551+
/// check builds slower unless they opt in and check it explicitly.
552+
const DEFAULT: bool = false;
553+
const ONLY_HOSTS: bool = true;
554+
555+
fn should_run(run: ShouldRun<'_>) -> ShouldRun<'_> {
556+
run.path(Self::PATH)
557+
}
558+
559+
fn make_run(run: RunConfig<'_>) {
560+
run.builder.ensure(Self {});
561+
}
562+
563+
fn run(self, builder: &Builder<'_>) -> Self::Output {
564+
// Make sure we haven't forgotten any fields, if there are any.
565+
let Self {} = self;
566+
let display_name = "coverage-dump";
567+
let host = builder.config.build;
568+
let target = host;
569+
let mode = Mode::ToolBootstrap;
570+
571+
let compiler = builder.compiler(0, host);
572+
let cargo = prepare_tool_cargo(
573+
builder,
574+
compiler,
575+
mode,
576+
target,
577+
builder.kind,
578+
Self::PATH,
579+
SourceType::InTree,
580+
&[],
581+
);
582+
583+
let stamp = BuildStamp::new(&builder.cargo_out(compiler, mode, target))
584+
.with_prefix(&format!("{display_name}-check"));
585+
586+
let _guard = builder.msg_tool(
587+
builder.kind,
588+
mode,
589+
display_name,
590+
compiler.stage,
591+
&compiler.host,
592+
&target,
593+
);
594+
run_cargo(builder, cargo, builder.config.free_args.clone(), &stamp, vec![], true, false);
595+
}
596+
}

src/bootstrap/src/core/build_steps/run.rs

+28
Original file line numberDiff line numberDiff line change
@@ -392,3 +392,31 @@ impl Step for CyclicStep {
392392
builder.ensure(CyclicStep { n: self.n.saturating_sub(1) })
393393
}
394394
}
395+
396+
/// Step to manually run the coverage-dump tool (`./x run coverage-dump`).
397+
///
398+
/// The coverage-dump tool is an internal detail of coverage tests, so this run
399+
/// step is only needed when testing coverage-dump manually.
400+
#[derive(Debug, PartialOrd, Ord, Clone, Hash, PartialEq, Eq)]
401+
pub struct CoverageDump;
402+
403+
impl Step for CoverageDump {
404+
type Output = ();
405+
406+
const DEFAULT: bool = false;
407+
const ONLY_HOSTS: bool = true;
408+
409+
fn should_run(run: ShouldRun<'_>) -> ShouldRun<'_> {
410+
run.path("src/tools/coverage-dump")
411+
}
412+
413+
fn make_run(run: RunConfig<'_>) {
414+
run.builder.ensure(Self {});
415+
}
416+
417+
fn run(self, builder: &Builder<'_>) {
418+
let mut cmd = builder.tool_cmd(Tool::CoverageDump);
419+
cmd.args(&builder.config.free_args);
420+
cmd.run(builder);
421+
}
422+
}

src/bootstrap/src/core/build_steps/test.rs

+1
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ impl Step for CrateBootstrap {
5454
run.path("src/tools/jsondoclint")
5555
.path("src/tools/suggest-tests")
5656
.path("src/tools/replace-version-placeholder")
57+
.path("src/tools/coverage-dump")
5758
// We want `./x test tidy` to _run_ the tidy tool, not its tests.
5859
// So we need a separate alias to test the tidy tool itself.
5960
.alias("tidyselftest")

src/bootstrap/src/core/builder/mod.rs

+2
Original file line numberDiff line numberDiff line change
@@ -961,6 +961,7 @@ impl<'a> Builder<'a> {
961961
check::RunMakeSupport,
962962
check::Compiletest,
963963
check::FeaturesStatusDump,
964+
check::CoverageDump,
964965
),
965966
Kind::Test => describe!(
966967
crate::core::build_steps::toolstate::ToolStateCheck,
@@ -1114,6 +1115,7 @@ impl<'a> Builder<'a> {
11141115
run::UnicodeTableGenerator,
11151116
run::FeaturesStatusDump,
11161117
run::CyclicStep,
1118+
run::CoverageDump,
11171119
),
11181120
Kind::Setup => {
11191121
describe!(setup::Profile, setup::Hook, setup::Link, setup::Editor)

src/tools/coverage-dump/Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ edition = "2021"
77

88
[dependencies]
99
anyhow = "1.0.71"
10+
itertools = "0.12"
1011
leb128 = "0.2.5"
1112
md5 = { package = "md-5" , version = "0.10.5" }
1213
miniz_oxide = "0.7.1"

src/tools/coverage-dump/src/covfun.rs

+57-29
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,33 @@
11
use std::collections::HashMap;
22
use std::fmt::{self, Debug, Write as _};
3-
use std::sync::OnceLock;
3+
use std::sync::LazyLock;
44

5-
use anyhow::{Context, anyhow};
5+
use anyhow::{Context, anyhow, bail, ensure};
6+
use itertools::Itertools;
67
use regex::Regex;
78

8-
use crate::parser::{Parser, unescape_llvm_string_contents};
9+
use crate::covmap::FilenameTables;
10+
use crate::llvm_utils::unescape_llvm_string_contents;
11+
use crate::parser::Parser;
12+
13+
#[cfg(test)]
14+
mod tests;
915

1016
pub(crate) fn dump_covfun_mappings(
1117
llvm_ir: &str,
18+
filename_tables: &FilenameTables,
1219
function_names: &HashMap<u64, String>,
1320
) -> anyhow::Result<()> {
1421
// Extract function coverage entries from the LLVM IR assembly, and associate
1522
// each entry with its (demangled) name.
1623
let mut covfun_entries = llvm_ir
1724
.lines()
18-
.filter_map(covfun_line_data)
19-
.map(|line_data| (function_names.get(&line_data.name_hash).map(String::as_str), line_data))
20-
.collect::<Vec<_>>();
25+
.filter(|line| is_covfun_line(line))
26+
.map(parse_covfun_line)
27+
.map_ok(|line_data| {
28+
(function_names.get(&line_data.name_hash).map(String::as_str), line_data)
29+
})
30+
.collect::<Result<Vec<_>, _>>()?;
2131
covfun_entries.sort_by(|a, b| {
2232
// Sort entries primarily by name, to help make the order consistent
2333
// across platforms and relatively insensitive to changes.
@@ -41,8 +51,12 @@ pub(crate) fn dump_covfun_mappings(
4151
println!("Number of files: {num_files}");
4252

4353
for i in 0..num_files {
44-
let global_file_id = parser.read_uleb128_u32()?;
45-
println!("- file {i} => global file {global_file_id}");
54+
let global_file_id = parser.read_uleb128_usize()?;
55+
let &CovfunLineData { filenames_hash, .. } = line_data;
56+
let Some(filename) = filename_tables.lookup(filenames_hash, global_file_id) else {
57+
bail!("couldn't resolve global file: {filenames_hash}, {global_file_id}");
58+
};
59+
println!("- file {i} => {filename}");
4660
}
4761

4862
let num_expressions = parser.read_uleb128_u32()?;
@@ -107,36 +121,50 @@ pub(crate) fn dump_covfun_mappings(
107121
Ok(())
108122
}
109123

124+
#[derive(Debug, PartialEq, Eq)]
110125
struct CovfunLineData {
111-
name_hash: u64,
112126
is_used: bool,
127+
name_hash: u64,
128+
filenames_hash: u64,
113129
payload: Vec<u8>,
114130
}
115131

116-
/// Checks a line of LLVM IR assembly to see if it contains an `__llvm_covfun`
117-
/// entry, and if so extracts relevant data in a `CovfunLineData`.
118-
fn covfun_line_data(line: &str) -> Option<CovfunLineData> {
119-
let re = {
120-
// We cheat a little bit and match variable names `@__covrec_[HASH]u`
121-
// rather than the section name, because the section name is harder to
122-
// extract and differs across Linux/Windows/macOS. We also extract the
123-
// symbol name hash from the variable name rather than the data, since
124-
// it's easier and both should match.
125-
static RE: OnceLock<Regex> = OnceLock::new();
126-
RE.get_or_init(|| {
127-
Regex::new(
128-
r#"^@__covrec_(?<name_hash>[0-9A-Z]+)(?<is_used>u)? = .*\[[0-9]+ x i8\] c"(?<payload>[^"]*)".*$"#,
129-
)
130-
.unwrap()
131-
})
132-
};
132+
fn is_covfun_line(line: &str) -> bool {
133+
line.starts_with("@__covrec_")
134+
}
133135

134-
let captures = re.captures(line)?;
135-
let name_hash = u64::from_str_radix(&captures["name_hash"], 16).unwrap();
136+
/// Given a line of LLVM IR assembly that should contain an `__llvm_covfun`
137+
/// entry, parses it to extract relevant data in a `CovfunLineData`.
138+
fn parse_covfun_line(line: &str) -> anyhow::Result<CovfunLineData> {
139+
ensure!(is_covfun_line(line));
140+
141+
// We cheat a little bit and match variable names `@__covrec_[HASH]u`
142+
// rather than the section name, because the section name is harder to
143+
// extract and differs across Linux/Windows/macOS.
144+
const RE_STRING: &str = r#"(?x)^
145+
@__covrec_[0-9A-Z]+(?<is_used>u)?
146+
\ = \ # (trailing space)
147+
.*
148+
<\{
149+
\ i64 \ (?<name_hash> -? [0-9]+),
150+
\ i32 \ -? [0-9]+, # (length of payload; currently unused)
151+
\ i64 \ -? [0-9]+, # (source hash; currently unused)
152+
\ i64 \ (?<filenames_hash> -? [0-9]+),
153+
\ \[ [0-9]+ \ x \ i8 \] \ c"(?<payload>[^"]*)"
154+
\ # (trailing space)
155+
}>
156+
.*$
157+
"#;
158+
static RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(RE_STRING).unwrap());
159+
160+
let captures =
161+
RE.captures(line).with_context(|| format!("couldn't parse covfun line: {line:?}"))?;
136162
let is_used = captures.name("is_used").is_some();
163+
let name_hash = i64::from_str_radix(&captures["name_hash"], 10).unwrap() as u64;
164+
let filenames_hash = i64::from_str_radix(&captures["filenames_hash"], 10).unwrap() as u64;
137165
let payload = unescape_llvm_string_contents(&captures["payload"]);
138166

139-
Some(CovfunLineData { name_hash, is_used, payload })
167+
Ok(CovfunLineData { is_used, name_hash, filenames_hash, payload })
140168
}
141169

142170
// Extra parser methods only needed when parsing `covfun` payloads.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
use super::{CovfunLineData, parse_covfun_line};
2+
3+
/// Integers in LLVM IR are not inherently signed/unsigned, and the text format tends
4+
/// to emit them in signed form, so this helper function converts `i64` to `u64`.
5+
fn as_u64(x: i64) -> u64 {
6+
x as u64
7+
}
8+
9+
#[test]
10+
fn parse_covfun_line_data() {
11+
struct Case {
12+
line: &'static str,
13+
expected: CovfunLineData,
14+
}
15+
let cases = &[
16+
// Copied from `trivial.ll`:
17+
Case {
18+
line: r#"@__covrec_49A9BAAE5F896E81u = linkonce_odr hidden constant <{ i64, i32, i64, i64, [9 x i8] }> <{ i64 5307978893922758273, i32 9, i64 445092354169400020, i64 6343436898695299756, [9 x i8] c"\01\01\00\01\01\03\01\00\0D" }>, section "__LLVM_COV,__llvm_covfun", align 8"#,
19+
expected: CovfunLineData {
20+
is_used: true,
21+
name_hash: as_u64(5307978893922758273),
22+
filenames_hash: as_u64(6343436898695299756),
23+
payload: b"\x01\x01\x00\x01\x01\x03\x01\x00\x0D".to_vec(),
24+
},
25+
},
26+
// Copied from `on-off-sandwich.ll`:
27+
Case {
28+
line: r#"@__covrec_D0CE53C5E64F319Au = linkonce_odr hidden constant <{ i64, i32, i64, i64, [14 x i8] }> <{ i64 -3400688559180533350, i32 14, i64 7307957714577672185, i64 892196767019953100, [14 x i8] c"\01\01\00\02\01\10\05\02\10\01\07\05\00\06" }>, section "__LLVM_COV,__llvm_covfun", align 8"#,
29+
expected: CovfunLineData {
30+
is_used: true,
31+
name_hash: as_u64(-3400688559180533350),
32+
filenames_hash: as_u64(892196767019953100),
33+
payload: b"\x01\x01\x00\x02\x01\x10\x05\x02\x10\x01\x07\x05\x00\x06".to_vec(),
34+
},
35+
},
36+
// Copied from `no-core.ll`:
37+
Case {
38+
line: r#"@__covrec_F8016FC82D46106u = linkonce_odr hidden constant <{ i64, i32, i64, i64, [9 x i8] }> <{ i64 1116917981370409222, i32 9, i64 -8857254680411629915, i64 -3625186110715410276, [9 x i8] c"\01\01\00\01\01\0C\01\00\0D" }>, section "__LLVM_COV,__llvm_covfun", align 8"#,
39+
expected: CovfunLineData {
40+
is_used: true,
41+
name_hash: as_u64(1116917981370409222),
42+
filenames_hash: as_u64(-3625186110715410276),
43+
payload: b"\x01\x01\x00\x01\x01\x0C\x01\x00\x0D".to_vec(),
44+
},
45+
},
46+
];
47+
48+
for &Case { line, ref expected } in cases {
49+
println!("- {line}");
50+
let line_data = parse_covfun_line(line).map_err(|e| e.to_string());
51+
assert_eq!(line_data.as_ref(), Ok(expected));
52+
}
53+
}

src/tools/coverage-dump/src/covmap.rs

+75
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
use std::collections::HashMap;
2+
use std::sync::LazyLock;
3+
4+
use anyhow::{Context, ensure};
5+
use regex::Regex;
6+
7+
use crate::llvm_utils::{truncated_md5, unescape_llvm_string_contents};
8+
use crate::parser::Parser;
9+
10+
#[derive(Debug, Default)]
11+
pub(crate) struct FilenameTables {
12+
map: HashMap<u64, Vec<String>>,
13+
}
14+
15+
impl FilenameTables {
16+
pub(crate) fn lookup(&self, filenames_hash: u64, global_file_id: usize) -> Option<&str> {
17+
let table = self.map.get(&filenames_hash)?;
18+
let filename = table.get(global_file_id)?;
19+
Some(filename)
20+
}
21+
}
22+
23+
struct CovmapLineData {
24+
payload: Vec<u8>,
25+
}
26+
27+
pub(crate) fn make_filename_tables(llvm_ir: &str) -> anyhow::Result<FilenameTables> {
28+
let mut map = HashMap::default();
29+
30+
for line in llvm_ir.lines().filter(|line| is_covmap_line(line)) {
31+
let CovmapLineData { payload } = parse_covmap_line(line)?;
32+
33+
let mut parser = Parser::new(&payload);
34+
let n_filenames = parser.read_uleb128_usize()?;
35+
let uncompressed_bytes = parser.read_chunk_to_uncompressed_bytes()?;
36+
parser.ensure_empty()?;
37+
38+
let mut filenames_table = vec![];
39+
40+
let mut parser = Parser::new(&uncompressed_bytes);
41+
for _ in 0..n_filenames {
42+
let len = parser.read_uleb128_usize()?;
43+
let bytes = parser.read_n_bytes(len)?;
44+
let filename = str::from_utf8(bytes)?;
45+
filenames_table.push(filename.to_owned());
46+
}
47+
48+
let filenames_hash = truncated_md5(&payload);
49+
map.insert(filenames_hash, filenames_table);
50+
}
51+
52+
Ok(FilenameTables { map })
53+
}
54+
55+
fn is_covmap_line(line: &str) -> bool {
56+
line.starts_with("@__llvm_coverage_mapping ")
57+
}
58+
59+
fn parse_covmap_line(line: &str) -> anyhow::Result<CovmapLineData> {
60+
ensure!(is_covmap_line(line));
61+
62+
const RE_STRING: &str = r#"(?x)^
63+
@__llvm_coverage_mapping \ =
64+
.*
65+
\[ [0-9]+ \ x \ i8 \] \ c"(?<payload>[^"]*)"
66+
.*$
67+
"#;
68+
static RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(RE_STRING).unwrap());
69+
70+
let captures =
71+
RE.captures(line).with_context(|| format!("couldn't parse covmap line: {line:?}"))?;
72+
let payload = unescape_llvm_string_contents(&captures["payload"]);
73+
74+
Ok(CovmapLineData { payload })
75+
}

0 commit comments

Comments
 (0)