Skip to content

Commit 78aa72f

Browse files
authored
fix versionsort chunk split on non-ASCII numerics (#6407)
Replaces `char::is_numeric()` with `char::is_ascii_digit()` in `VersionChunkIter::parse_str_chunk()`. The text in the Style Guide explicitly mentions that a numeric chunk is a sequence of ASCII digits.
1 parent 0a32a02 commit 78aa72f

5 files changed

+90
-1
lines changed

src/sort.rs

+10-1
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ impl<'a> VersionChunkIter<'a> {
6565
break;
6666
}
6767

68-
if !c.is_numeric() {
68+
if !c.is_ascii_digit() {
6969
continue;
7070
}
7171

@@ -283,6 +283,10 @@ mod test {
283283
source: "009"
284284
})
285285
);
286+
287+
// '๙' = U+0E59 THAI DIGIT NINE, General Category Nd
288+
let mut iter = VersionChunkIter::new("x๙v");
289+
assert_eq!(iter.next(), Some(VersionChunk::Str("x๙v")));
286290
}
287291

288292
#[test]
@@ -297,6 +301,11 @@ mod test {
297301
input.sort_by(|a, b| version_sort(a, b));
298302
assert_eq!(input, expected);
299303

304+
let mut input = vec!["x๙x", "xéx", "x0x"];
305+
let expected = vec!["x0x", "xéx", "x๙x"];
306+
input.sort_by(|a, b| version_sort(a, b));
307+
assert_eq!(input, expected);
308+
300309
let mut input = vec!["applesauce", "apple"];
301310
let expected = vec!["apple", "applesauce"];
302311
input.sort_by(|a, b| version_sort(a, b));
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
// rustfmt-style_edition: 2015
2+
3+
// ascii-betically sorted
4+
pub use print๙msg;
5+
pub use print0msg;
6+
pub use printémsg;
7+
8+
fn main() {}
9+
10+
/// '๙' = 0E59;THAI DIGIT NINE;Nd; (Non-ASCII Decimal_Number, sorts third)
11+
mod print๙msg {}
12+
13+
/// '0' = 0030;DIGIT ZERO;Nd; (ASCII Decimal_Number, sorts first)
14+
mod print0msg {}
15+
16+
/// 'é' = 00E9;LATIN SMALL LETTER E WITH ACUTE;Ll; (Lowercase_Letter, sorts second)
17+
mod printémsg {}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
// rustfmt-style_edition: 2024
2+
3+
// versionsorted
4+
pub use print๙msg;
5+
pub use print0msg;
6+
pub use printémsg;
7+
8+
fn main() {}
9+
10+
/// '๙' = 0E59;THAI DIGIT NINE;Nd; (Non-ASCII Decimal_Number, one string chunk)
11+
///
12+
/// U+0E59 > U+00E9, sorts third
13+
mod print๙msg {}
14+
15+
/// '0' = 0030;DIGIT ZERO;Nd; (ASCII Decimal_Number, splits into 3 chunks ("print",0,"msg"))
16+
///
17+
/// shortest chunk "print", sorts first
18+
mod print0msg {}
19+
20+
/// 'é' = 00E9;LATIN SMALL LETTER E WITH ACUTE;Ll; (Lowercase_Letter, one string chunk)
21+
///
22+
/// U+00E9 < U+0E59, sorts second
23+
mod printémsg {}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
// rustfmt-style_edition: 2015
2+
3+
// ascii-betically sorted
4+
pub use print0msg;
5+
pub use printémsg;
6+
pub use print๙msg;
7+
8+
fn main() {}
9+
10+
/// '๙' = 0E59;THAI DIGIT NINE;Nd; (Non-ASCII Decimal_Number, sorts third)
11+
mod print๙msg {}
12+
13+
/// '0' = 0030;DIGIT ZERO;Nd; (ASCII Decimal_Number, sorts first)
14+
mod print0msg {}
15+
16+
/// 'é' = 00E9;LATIN SMALL LETTER E WITH ACUTE;Ll; (Lowercase_Letter, sorts second)
17+
mod printémsg {}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
// rustfmt-style_edition: 2024
2+
3+
// versionsorted
4+
pub use print0msg;
5+
pub use printémsg;
6+
pub use print๙msg;
7+
8+
fn main() {}
9+
10+
/// '๙' = 0E59;THAI DIGIT NINE;Nd; (Non-ASCII Decimal_Number, one string chunk)
11+
///
12+
/// U+0E59 > U+00E9, sorts third
13+
mod print๙msg {}
14+
15+
/// '0' = 0030;DIGIT ZERO;Nd; (ASCII Decimal_Number, splits into 3 chunks ("print",0,"msg"))
16+
///
17+
/// shortest chunk "print", sorts first
18+
mod print0msg {}
19+
20+
/// 'é' = 00E9;LATIN SMALL LETTER E WITH ACUTE;Ll; (Lowercase_Letter, one string chunk)
21+
///
22+
/// U+00E9 < U+0E59, sorts second
23+
mod printémsg {}

0 commit comments

Comments
 (0)