Treat control characters as width 1, fixes #16 (#19)

Aetf · web-flow · commit 66e84b3ad81e · 2024-06-23T18:12:51.000-07:00
This is consistent with how unicode-width handles string width vs char width. See also unicode-rs/unicode-width#45
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -44,3 +44,5 @@ harness = false
 codegen-units = 1
 lto = true
 
+[profile.test]
+debug-assertions = true
diff --git a/src/lib.rs b/src/lib.rs
@@ -152,7 +152,9 @@ impl UnicodeTruncateStr for str {
         let (byte_index, new_width) = self
             .char_indices()
             // map to byte index and the width of char start at the index
-            .map(|(byte_index, char)| (byte_index, char.width().unwrap_or(0)))
+            // control characters treated as of width 1
+            // https://github.com/unicode-rs/unicode-width/pull/45
+            .map(|(byte_index, char)| (byte_index, char.width().unwrap_or(1)))
             // chain a final element representing the position past the last char
             .chain(core::iter::once((self.len(), 0)))
             // fold to byte index and the width up to the index
@@ -182,7 +184,9 @@ impl UnicodeTruncateStr for str {
             // instead of start checking from the start do so from the end
             .rev()
             // map to byte index and the width of char start at the index
-            .map(|(byte_index, char)| (byte_index, char.width().unwrap_or(0)))
+            // control characters treated as of width 1
+            // https://github.com/unicode-rs/unicode-width/pull/45
+            .map(|(byte_index, char)| (byte_index, char.width().unwrap_or(1)))
             // skip any position with zero width, the cut won't happen at these points
             // this also helps with not including zero width char at the beginning
             .filter(|&(_, char_width)| char_width > 0)
@@ -223,7 +227,9 @@ impl UnicodeTruncateStr for str {
 
         let from_start = self
             .char_indices()
-            .map(|(byte_index, char)| (byte_index, char.width().unwrap_or(0)))
+            // control characters treated as of width 1
+            // https://github.com/unicode-rs/unicode-width/pull/45
+            .map(|(byte_index, char)| (byte_index, char.width().unwrap_or(1)))
             // skip any position with zero width, the cut won't happen at these points
             // this also helps with removing zero width char at the beginning
             .filter(|&(_, char_width)| char_width > 0)
@@ -242,7 +248,9 @@ impl UnicodeTruncateStr for str {
 
         let from_end = self
             .char_indices()
-            .map(|(byte_index, char)| (byte_index, char.width().unwrap_or(0)))
+            // control characters treated as of width 1
+            // https://github.com/unicode-rs/unicode-width/pull/45
+            .map(|(byte_index, char)| (byte_index, char.width().unwrap_or(1)))
             // skip any position with zero width, the cut won't happen at these points
             // this also helps with keeping zero width char at the end
             .filter(|&(_, char_width)| char_width > 0)
@@ -511,6 +519,13 @@ mod tests {
                 ("b\u{0306}y\u{0306}", 2)
             );
         }
+
+        #[test]
+        fn control_char() {
+            assert_eq!("\u{0019}".width(), 1);
+            assert_eq!('\u{0019}'.width(), None);
+            assert_eq!("\u{0019}".unicode_truncate(2), ("\u{0019}", 1));
+        }
     }
 
     #[test]