Skip to content

Commit 66e84b3

Browse files
authored
Treat control characters as width 1, fixes #16 (#19)
This is consistent with how unicode-width handles string width vs char width. See also unicode-rs/unicode-width#45
1 parent 9e49ef4 commit 66e84b3

File tree

3 files changed

+23
-6
lines changed

3 files changed

+23
-6
lines changed

Cargo.lock

+2-2
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

+2
Original file line numberDiff line numberDiff line change
@@ -44,3 +44,5 @@ harness = false
4444
codegen-units = 1
4545
lto = true
4646

47+
[profile.test]
48+
debug-assertions = true

src/lib.rs

+19-4
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,9 @@ impl UnicodeTruncateStr for str {
152152
let (byte_index, new_width) = self
153153
.char_indices()
154154
// map to byte index and the width of char start at the index
155-
.map(|(byte_index, char)| (byte_index, char.width().unwrap_or(0)))
155+
// control characters treated as of width 1
156+
// https://github.com/unicode-rs/unicode-width/pull/45
157+
.map(|(byte_index, char)| (byte_index, char.width().unwrap_or(1)))
156158
// chain a final element representing the position past the last char
157159
.chain(core::iter::once((self.len(), 0)))
158160
// fold to byte index and the width up to the index
@@ -182,7 +184,9 @@ impl UnicodeTruncateStr for str {
182184
// instead of start checking from the start do so from the end
183185
.rev()
184186
// map to byte index and the width of char start at the index
185-
.map(|(byte_index, char)| (byte_index, char.width().unwrap_or(0)))
187+
// control characters treated as of width 1
188+
// https://github.com/unicode-rs/unicode-width/pull/45
189+
.map(|(byte_index, char)| (byte_index, char.width().unwrap_or(1)))
186190
// skip any position with zero width, the cut won't happen at these points
187191
// this also helps with not including zero width char at the beginning
188192
.filter(|&(_, char_width)| char_width > 0)
@@ -223,7 +227,9 @@ impl UnicodeTruncateStr for str {
223227

224228
let from_start = self
225229
.char_indices()
226-
.map(|(byte_index, char)| (byte_index, char.width().unwrap_or(0)))
230+
// control characters treated as of width 1
231+
// https://github.com/unicode-rs/unicode-width/pull/45
232+
.map(|(byte_index, char)| (byte_index, char.width().unwrap_or(1)))
227233
// skip any position with zero width, the cut won't happen at these points
228234
// this also helps with removing zero width char at the beginning
229235
.filter(|&(_, char_width)| char_width > 0)
@@ -242,7 +248,9 @@ impl UnicodeTruncateStr for str {
242248

243249
let from_end = self
244250
.char_indices()
245-
.map(|(byte_index, char)| (byte_index, char.width().unwrap_or(0)))
251+
// control characters treated as of width 1
252+
// https://github.com/unicode-rs/unicode-width/pull/45
253+
.map(|(byte_index, char)| (byte_index, char.width().unwrap_or(1)))
246254
// skip any position with zero width, the cut won't happen at these points
247255
// this also helps with keeping zero width char at the end
248256
.filter(|&(_, char_width)| char_width > 0)
@@ -511,6 +519,13 @@ mod tests {
511519
("b\u{0306}y\u{0306}", 2)
512520
);
513521
}
522+
523+
#[test]
524+
fn control_char() {
525+
assert_eq!("\u{0019}".width(), 1);
526+
assert_eq!('\u{0019}'.width(), None);
527+
assert_eq!("\u{0019}".unicode_truncate(2), ("\u{0019}", 1));
528+
}
514529
}
515530

516531
#[test]

0 commit comments

Comments
 (0)