Skip to content

Commit ae031ef

Browse files
committed
coverage: llvm-cov expects column numbers to be bytes, not code points
1 parent 5db359c commit ae031ef

File tree

4 files changed

+58
-24
lines changed

4 files changed

+58
-24
lines changed

compiler/rustc_mir_transform/src/coverage/mod.rs

+46-13
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ use rustc_middle::mir::{
2323
use rustc_middle::ty::TyCtxt;
2424
use rustc_span::def_id::LocalDefId;
2525
use rustc_span::source_map::SourceMap;
26-
use rustc_span::{ExpnKind, Span, Symbol};
26+
use rustc_span::{BytePos, ExpnKind, Pos, RelativeBytePos, Span, Symbol};
2727

2828
/// Inserts `StatementKind::Coverage` statements that either instrument the binary with injected
2929
/// counters, via intrinsic `llvm.instrprof.increment`, and/or inject metadata used during codegen
@@ -258,6 +258,10 @@ fn inject_statement(mir_body: &mut mir::Body<'_>, counter_kind: CoverageKind, bb
258258

259259
/// Convert the Span into its file name, start line and column, and end line and column.
260260
///
261+
/// Line numbers and column numbers are 1-based. Unlike most column numbers emitted by
262+
/// the compiler, these column numbers are denoted in **bytes**, because that's what
263+
/// LLVM's `llvm-cov` tool expects to see in coverage maps.
264+
///
261265
/// Returns `None` if the conversion failed for some reason. There is no known example
262266
/// of code that would cause this to happen, but it's hard to rule out entirely
263267
/// (especially in the presence of complex macros or other expansions), and if it does
@@ -276,20 +280,49 @@ fn make_code_region(
276280
source_map.span_to_diagnostic_string(body_span)
277281
);
278282

279-
let (file, mut start_line, mut start_col, mut end_line, mut end_col) =
280-
source_map.span_to_location_info(span);
281-
if span.hi() == span.lo() {
282-
// Extend an empty span by one character so the region will be counted.
283-
if span.hi() == body_span.hi() {
284-
start_col = start_col.saturating_sub(1);
285-
} else {
286-
end_col = start_col + 1;
287-
}
283+
let lo = span.lo();
284+
let hi = span.hi();
285+
286+
let file = source_map.lookup_source_file(lo);
287+
288+
// Column numbers need to be in bytes, so we can't use the more convenient
289+
// `SourceMap` methods for looking up file coordinates.
290+
let rpos_and_line_and_byte_column = |pos: BytePos| -> Option<(RelativeBytePos, usize, usize)> {
291+
let rpos = file.relative_position(pos);
292+
let line_index = file.lookup_line(rpos)?;
293+
let line_start = file.lines()[line_index];
294+
// Line numbers and column numbers are 1-based, so add 1 to each.
295+
Some((rpos, line_index + 1, (rpos - line_start).to_usize() + 1))
288296
};
289-
if let Some(file) = file {
290-
start_line = source_map.doctest_offset_line(&file.name, start_line);
291-
end_line = source_map.doctest_offset_line(&file.name, end_line);
297+
298+
let (lo_rpos, mut start_line, mut start_col) = rpos_and_line_and_byte_column(lo)?;
299+
let (hi_rpos, mut end_line, mut end_col) = rpos_and_line_and_byte_column(hi)?;
300+
301+
// If the span is empty, try to expand it by one character so that it is
302+
// more visible in `llvm-cov` reports. (LLVM measures columns in bytes,
303+
// so "one character" might be multiple bytes.)
304+
if lo == hi
305+
&& let Some(src) = &file.src
306+
{
307+
// Prefer to expand the end position, if it won't go outside the body span.
308+
if hi < body_span.hi() {
309+
let hi_rpos = hi_rpos.to_usize();
310+
let nudge_bytes = src.ceil_char_boundary(hi_rpos + 1) - hi_rpos;
311+
end_col += nudge_bytes;
312+
} else if lo > body_span.lo() {
313+
let lo_rpos = lo_rpos.to_usize();
314+
let nudge_bytes = lo_rpos - src.floor_char_boundary(lo_rpos - 1);
315+
// Subtract the nudge, but don't go below column 1.
316+
start_col = start_col.saturating_sub(nudge_bytes).max(1);
317+
}
318+
// If neither nudge could be applied, stick with the empty span coordinates.
292319
}
320+
321+
// Apply an offset so that code in doctests has correct line numbers.
322+
// FIXME(#79417): Currently we have no way to offset doctest _columns_.
323+
start_line = source_map.doctest_offset_line(&file.name, start_line);
324+
end_line = source_map.doctest_offset_line(&file.name, end_line);
325+
293326
Some(CodeRegion {
294327
file_name,
295328
start_line: start_line as u32,

compiler/rustc_mir_transform/src/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#![feature(min_specialization)]
1111
#![feature(never_type)]
1212
#![feature(option_get_or_insert_default)]
13+
#![feature(round_char_boundary)]
1314
#![feature(trusted_step)]
1415
#![feature(try_blocks)]
1516
#![feature(yeet_expr)]

tests/coverage/unicode.cov-map

+9-9
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
Function name: unicode::main
2-
Raw bytes (67): 0x[01, 01, 09, 01, 05, 03, 05, 1e, 0d, 22, 09, 03, 05, 11, 1b, 1e, 0d, 22, 09, 03, 05, 09, 01, 0c, 01, 00, 0b, 05, 01, 09, 00, 0b, 03, 00, 0f, 00, 18, 05, 00, 19, 00, 24, 22, 02, 08, 00, 13, 09, 00, 17, 00, 22, 11, 00, 23, 02, 06, 1b, 02, 06, 00, 07, 17, 02, 05, 01, 02]
2+
Raw bytes (67): 0x[01, 01, 09, 01, 05, 03, 05, 1e, 0d, 22, 09, 03, 05, 11, 1b, 1e, 0d, 22, 09, 03, 05, 09, 01, 0c, 01, 00, 0b, 05, 01, 09, 00, 0c, 03, 00, 10, 00, 1b, 05, 00, 1c, 00, 28, 22, 02, 08, 00, 25, 09, 00, 29, 00, 46, 11, 00, 47, 02, 06, 1b, 02, 06, 00, 07, 17, 02, 05, 01, 02]
33
Number of files: 1
44
- file 0 => global file 1
55
Number of expressions: 9
@@ -14,26 +14,26 @@ Number of expressions: 9
1414
- expression 8 operands: lhs = Expression(0, Add), rhs = Counter(1)
1515
Number of file 0 mappings: 9
1616
- Code(Counter(0)) at (prev + 12, 1) to (start + 0, 11)
17-
- Code(Counter(1)) at (prev + 1, 9) to (start + 0, 11)
18-
- Code(Expression(0, Add)) at (prev + 0, 15) to (start + 0, 24)
17+
- Code(Counter(1)) at (prev + 1, 9) to (start + 0, 12)
18+
- Code(Expression(0, Add)) at (prev + 0, 16) to (start + 0, 27)
1919
= (c0 + c1)
20-
- Code(Counter(1)) at (prev + 0, 25) to (start + 0, 36)
21-
- Code(Expression(8, Sub)) at (prev + 2, 8) to (start + 0, 19)
20+
- Code(Counter(1)) at (prev + 0, 28) to (start + 0, 40)
21+
- Code(Expression(8, Sub)) at (prev + 2, 8) to (start + 0, 37)
2222
= ((c0 + c1) - c1)
23-
- Code(Counter(2)) at (prev + 0, 23) to (start + 0, 34)
24-
- Code(Counter(4)) at (prev + 0, 35) to (start + 2, 6)
23+
- Code(Counter(2)) at (prev + 0, 41) to (start + 0, 70)
24+
- Code(Counter(4)) at (prev + 0, 71) to (start + 2, 6)
2525
- Code(Expression(6, Add)) at (prev + 2, 6) to (start + 0, 7)
2626
= ((((c0 + c1) - c1) - c2) + c3)
2727
- Code(Expression(5, Add)) at (prev + 2, 5) to (start + 1, 2)
2828
= (c4 + ((((c0 + c1) - c1) - c2) + c3))
2929

3030
Function name: unicode::サビ
31-
Raw bytes (9): 0x[01, 01, 00, 01, 01, 1c, 12, 00, 14]
31+
Raw bytes (9): 0x[01, 01, 00, 01, 01, 1c, 14, 00, 18]
3232
Number of files: 1
3333
- file 0 => global file 1
3434
Number of expressions: 0
3535
Number of file 0 mappings: 1
36-
- Code(Counter(0)) at (prev + 28, 18) to (start + 0, 20)
36+
- Code(Counter(0)) at (prev + 28, 20) to (start + 0, 24)
3737

3838
Function name: unicode::申し訳ございません
3939
Raw bytes (9): 0x[01, 01, 00, 01, 01, 16, 01, 02, 02]

tests/coverage/unicode.coverage

+2-2
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,10 @@
1111
LL| |
1212
LL| 1|fn main() {
1313
LL| 33| for _İ in 'А'..='Я' { /* Я */ }
14-
^32 ^32
14+
^32 ^32
1515
LL| |
1616
LL| 1| if 申し訳ございません() && 申し訳ございません() {
17-
^0
17+
^0
1818
LL| 0| println!("true");
1919
LL| 1| }
2020
LL| |

0 commit comments

Comments
 (0)