1
1
use std:: collections:: HashMap ;
2
2
use std:: fmt:: { self , Debug , Write as _} ;
3
- use std:: sync:: OnceLock ;
3
+ use std:: sync:: LazyLock ;
4
4
5
- use anyhow:: { Context , anyhow} ;
5
+ use anyhow:: { Context , anyhow, bail, ensure} ;
6
+ use itertools:: Itertools ;
6
7
use regex:: Regex ;
7
8
8
- use crate :: parser:: { Parser , unescape_llvm_string_contents} ;
9
+ use crate :: covmap:: FilenameTables ;
10
+ use crate :: llvm_utils:: unescape_llvm_string_contents;
11
+ use crate :: parser:: Parser ;
12
+
13
+ #[ cfg( test) ]
14
+ mod tests;
9
15
10
16
pub ( crate ) fn dump_covfun_mappings (
11
17
llvm_ir : & str ,
18
+ filename_tables : & FilenameTables ,
12
19
function_names : & HashMap < u64 , String > ,
13
20
) -> anyhow:: Result < ( ) > {
14
21
// Extract function coverage entries from the LLVM IR assembly, and associate
15
22
// each entry with its (demangled) name.
16
23
let mut covfun_entries = llvm_ir
17
24
. lines ( )
18
- . filter_map ( covfun_line_data)
19
- . map ( |line_data| ( function_names. get ( & line_data. name_hash ) . map ( String :: as_str) , line_data) )
20
- . collect :: < Vec < _ > > ( ) ;
25
+ . filter ( |line| is_covfun_line ( line) )
26
+ . map ( parse_covfun_line)
27
+ . map_ok ( |line_data| {
28
+ ( function_names. get ( & line_data. name_hash ) . map ( String :: as_str) , line_data)
29
+ } )
30
+ . collect :: < Result < Vec < _ > , _ > > ( ) ?;
21
31
covfun_entries. sort_by ( |a, b| {
22
32
// Sort entries primarily by name, to help make the order consistent
23
33
// across platforms and relatively insensitive to changes.
@@ -41,8 +51,12 @@ pub(crate) fn dump_covfun_mappings(
41
51
println ! ( "Number of files: {num_files}" ) ;
42
52
43
53
for i in 0 ..num_files {
44
- let global_file_id = parser. read_uleb128_u32 ( ) ?;
45
- println ! ( "- file {i} => global file {global_file_id}" ) ;
54
+ let global_file_id = parser. read_uleb128_usize ( ) ?;
55
+ let & CovfunLineData { filenames_hash, .. } = line_data;
56
+ let Some ( filename) = filename_tables. lookup ( filenames_hash, global_file_id) else {
57
+ bail ! ( "couldn't resolve global file: {filenames_hash}, {global_file_id}" ) ;
58
+ } ;
59
+ println ! ( "- file {i} => {filename}" ) ;
46
60
}
47
61
48
62
let num_expressions = parser. read_uleb128_u32 ( ) ?;
@@ -107,36 +121,50 @@ pub(crate) fn dump_covfun_mappings(
107
121
Ok ( ( ) )
108
122
}
109
123
124
+ #[ derive( Debug , PartialEq , Eq ) ]
110
125
struct CovfunLineData {
111
- name_hash : u64 ,
112
126
is_used : bool ,
127
+ name_hash : u64 ,
128
+ filenames_hash : u64 ,
113
129
payload : Vec < u8 > ,
114
130
}
115
131
116
- /// Checks a line of LLVM IR assembly to see if it contains an `__llvm_covfun`
117
- /// entry, and if so extracts relevant data in a `CovfunLineData`.
118
- fn covfun_line_data ( line : & str ) -> Option < CovfunLineData > {
119
- let re = {
120
- // We cheat a little bit and match variable names `@__covrec_[HASH]u`
121
- // rather than the section name, because the section name is harder to
122
- // extract and differs across Linux/Windows/macOS. We also extract the
123
- // symbol name hash from the variable name rather than the data, since
124
- // it's easier and both should match.
125
- static RE : OnceLock < Regex > = OnceLock :: new ( ) ;
126
- RE . get_or_init ( || {
127
- Regex :: new (
128
- r#"^@__covrec_(?<name_hash>[0-9A-Z]+)(?<is_used>u)? = .*\[[0-9]+ x i8\] c"(?<payload>[^"]*)".*$"# ,
129
- )
130
- . unwrap ( )
131
- } )
132
- } ;
132
+ fn is_covfun_line ( line : & str ) -> bool {
133
+ line. starts_with ( "@__covrec_" )
134
+ }
133
135
134
- let captures = re. captures ( line) ?;
135
- let name_hash = u64:: from_str_radix ( & captures[ "name_hash" ] , 16 ) . unwrap ( ) ;
136
+ /// Given a line of LLVM IR assembly that should contain an `__llvm_covfun`
137
+ /// entry, parses it to extract relevant data in a `CovfunLineData`.
138
+ fn parse_covfun_line ( line : & str ) -> anyhow:: Result < CovfunLineData > {
139
+ ensure ! ( is_covfun_line( line) ) ;
140
+
141
+ // We cheat a little bit and match variable names `@__covrec_[HASH]u`
142
+ // rather than the section name, because the section name is harder to
143
+ // extract and differs across Linux/Windows/macOS.
144
+ const RE_STRING : & str = r#"(?x)^
145
+ @__covrec_[0-9A-Z]+(?<is_used>u)?
146
+ \ = \ # (trailing space)
147
+ .*
148
+ <\{
149
+ \ i64 \ (?<name_hash> -? [0-9]+),
150
+ \ i32 \ -? [0-9]+, # (length of payload; currently unused)
151
+ \ i64 \ -? [0-9]+, # (source hash; currently unused)
152
+ \ i64 \ (?<filenames_hash> -? [0-9]+),
153
+ \ \[ [0-9]+ \ x \ i8 \] \ c"(?<payload>[^"]*)"
154
+ \ # (trailing space)
155
+ }>
156
+ .*$
157
+ "# ;
158
+ static RE : LazyLock < Regex > = LazyLock :: new ( || Regex :: new ( RE_STRING ) . unwrap ( ) ) ;
159
+
160
+ let captures =
161
+ RE . captures ( line) . with_context ( || format ! ( "couldn't parse covfun line: {line:?}" ) ) ?;
136
162
let is_used = captures. name ( "is_used" ) . is_some ( ) ;
163
+ let name_hash = i64:: from_str_radix ( & captures[ "name_hash" ] , 10 ) . unwrap ( ) as u64 ;
164
+ let filenames_hash = i64:: from_str_radix ( & captures[ "filenames_hash" ] , 10 ) . unwrap ( ) as u64 ;
137
165
let payload = unescape_llvm_string_contents ( & captures[ "payload" ] ) ;
138
166
139
- Some ( CovfunLineData { name_hash, is_used , payload } )
167
+ Ok ( CovfunLineData { is_used , name_hash, filenames_hash , payload } )
140
168
}
141
169
142
170
// Extra parser methods only needed when parsing `covfun` payloads.
0 commit comments