|
8 | 8 | //! The output types are defined in `rustc_session::config::ErrorOutputType`.
|
9 | 9 |
|
10 | 10 | use rustc_span::source_map::SourceMap;
|
11 |
| -use rustc_span::{FileLines, FileName, SourceFile, Span}; |
| 11 | +use rustc_span::{char_width, FileLines, FileName, SourceFile, Span}; |
12 | 12 |
|
13 | 13 | use crate::snippet::{
|
14 | 14 | Annotation, AnnotationColumn, AnnotationType, Line, MultilineAnnotation, Style, StyledString,
|
@@ -677,10 +677,7 @@ impl HumanEmitter {
|
677 | 677 | .skip(left)
|
678 | 678 | .take_while(|ch| {
|
679 | 679 | // Make sure that the trimming on the right will fall within the terminal width.
|
680 |
| - // FIXME: `unicode_width` sometimes disagrees with terminals on how wide a `char` |
681 |
| - // is. For now, just accept that sometimes the code line will be longer than |
682 |
| - // desired. |
683 |
| - let next = unicode_width::UnicodeWidthChar::width(*ch).unwrap_or(1); |
| 680 | + let next = char_width(*ch); |
684 | 681 | if taken + next > right - left {
|
685 | 682 | return false;
|
686 | 683 | }
|
@@ -742,11 +739,7 @@ impl HumanEmitter {
|
742 | 739 | let left = margin.left(source_string.len());
|
743 | 740 |
|
744 | 741 | // Account for unicode characters of width !=0 that were removed.
|
745 |
| - let left = source_string |
746 |
| - .chars() |
747 |
| - .take(left) |
748 |
| - .map(|ch| unicode_width::UnicodeWidthChar::width(ch).unwrap_or(1)) |
749 |
| - .sum(); |
| 742 | + let left = source_string.chars().take(left).map(|ch| char_width(ch)).sum(); |
750 | 743 |
|
751 | 744 | self.draw_line(
|
752 | 745 | buffer,
|
@@ -2039,7 +2032,7 @@ impl HumanEmitter {
|
2039 | 2032 | let sub_len: usize =
|
2040 | 2033 | if is_whitespace_addition { &part.snippet } else { part.snippet.trim() }
|
2041 | 2034 | .chars()
|
2042 |
| - .map(|ch| unicode_width::UnicodeWidthChar::width(ch).unwrap_or(1)) |
| 2035 | + .map(|ch| char_width(ch)) |
2043 | 2036 | .sum();
|
2044 | 2037 |
|
2045 | 2038 | let offset: isize = offsets
|
@@ -2076,11 +2069,8 @@ impl HumanEmitter {
|
2076 | 2069 | }
|
2077 | 2070 |
|
2078 | 2071 | // length of the code after substitution
|
2079 |
| - let full_sub_len = part |
2080 |
| - .snippet |
2081 |
| - .chars() |
2082 |
| - .map(|ch| unicode_width::UnicodeWidthChar::width(ch).unwrap_or(1)) |
2083 |
| - .sum::<usize>() as isize; |
| 2072 | + let full_sub_len = |
| 2073 | + part.snippet.chars().map(|ch| char_width(ch)).sum::<usize>() as isize; |
2084 | 2074 |
|
2085 | 2075 | // length of the code to be substituted
|
2086 | 2076 | let snippet_len = span_end_pos as isize - span_start_pos as isize;
|
@@ -2568,18 +2558,53 @@ fn num_decimal_digits(num: usize) -> usize {
|
2568 | 2558 | }
|
2569 | 2559 |
|
2570 | 2560 | // We replace some characters so the CLI output is always consistent and underlines aligned.
|
| 2561 | +// Keep the following list in sync with `rustc_span::char_width`. |
2571 | 2562 | const OUTPUT_REPLACEMENTS: &[(char, &str)] = &[
|
2572 |
| - ('\t', " "), // We do our own tab replacement |
| 2563 | + ('\t', " "), // We do our own tab replacement |
2573 | 2564 | ('\u{200D}', ""), // Replace ZWJ with nothing for consistent terminal output of grapheme clusters.
|
2574 |
| - ('\u{202A}', ""), // The following unicode text flow control characters are inconsistently |
2575 |
| - ('\u{202B}', ""), // supported across CLIs and can cause confusion due to the bytes on disk |
2576 |
| - ('\u{202D}', ""), // not corresponding to the visible source code, so we replace them always. |
2577 |
| - ('\u{202E}', ""), |
2578 |
| - ('\u{2066}', ""), |
2579 |
| - ('\u{2067}', ""), |
2580 |
| - ('\u{2068}', ""), |
2581 |
| - ('\u{202C}', ""), |
2582 |
| - ('\u{2069}', ""), |
| 2565 | + ('\u{202A}', "�"), // The following unicode text flow control characters are inconsistently |
| 2566 | + ('\u{202B}', "�"), // supported across CLIs and can cause confusion due to the bytes on disk |
| 2567 | + ('\u{202D}', "�"), // not corresponding to the visible source code, so we replace them always. |
| 2568 | + ('\u{202E}', "�"), |
| 2569 | + ('\u{2066}', "�"), |
| 2570 | + ('\u{2067}', "�"), |
| 2571 | + ('\u{2068}', "�"), |
| 2572 | + ('\u{202C}', "�"), |
| 2573 | + ('\u{2069}', "�"), |
| 2574 | + // In terminals without Unicode support the following will be garbled, but in *all* terminals |
| 2575 | + // the underlying codepoint will be as well. We could gate this replacement behind a "unicode |
| 2576 | + // support" gate. |
| 2577 | + ('\u{0000}', "␀"), |
| 2578 | + ('\u{0001}', "␁"), |
| 2579 | + ('\u{0002}', "␂"), |
| 2580 | + ('\u{0003}', "␃"), |
| 2581 | + ('\u{0004}', "␄"), |
| 2582 | + ('\u{0005}', "␅"), |
| 2583 | + ('\u{0006}', "␆"), |
| 2584 | + ('\u{0007}', "␇"), |
| 2585 | + ('\u{0008}', "␈"), |
| 2586 | + ('\u{000B}', "␋"), |
| 2587 | + ('\u{000C}', "␌"), |
| 2588 | + ('\u{000D}', "␍"), |
| 2589 | + ('\u{000E}', "␎"), |
| 2590 | + ('\u{000F}', "␏"), |
| 2591 | + ('\u{0010}', "␐"), |
| 2592 | + ('\u{0011}', "␑"), |
| 2593 | + ('\u{0012}', "␒"), |
| 2594 | + ('\u{0013}', "␓"), |
| 2595 | + ('\u{0014}', "␔"), |
| 2596 | + ('\u{0015}', "␕"), |
| 2597 | + ('\u{0016}', "␖"), |
| 2598 | + ('\u{0017}', "␗"), |
| 2599 | + ('\u{0018}', "␘"), |
| 2600 | + ('\u{0019}', "␙"), |
| 2601 | + ('\u{001A}', "␚"), |
| 2602 | + ('\u{001B}', "␛"), |
| 2603 | + ('\u{001C}', "␜"), |
| 2604 | + ('\u{001D}', "␝"), |
| 2605 | + ('\u{001E}', "␞"), |
| 2606 | + ('\u{001F}', "␟"), |
| 2607 | + ('\u{007F}', "␡"), |
2583 | 2608 | ];
|
2584 | 2609 |
|
2585 | 2610 | fn normalize_whitespace(str: &str) -> String {
|
|
0 commit comments