Skip to content

Commit fcfe10a

Browse files
committed
add proptests to test against panics
add proptests to test against panics handle voiced kana
1 parent 57547b7 commit fcfe10a

File tree

5 files changed

+77
-14
lines changed

5 files changed

+77
-14
lines changed

Cargo.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,9 @@ path = "src/bin/to_kana.rs"
3232
name = "to_romaji"
3333
path = "src/bin/to_romaji.rs"
3434

35+
[dev-dependencies]
36+
proptest = "1.5.0"
37+
3538
# docs.rs-specific configuration
3639
[package.metadata.docs.rs]
3740
# document all features

src/lib.rs

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,11 +90,53 @@ pub mod traits;
9090
pub use traits::{ConvertJapanese, IsJapaneseChar, IsJapaneseStr};
9191

9292
#[cfg(test)]
93-
mod tests {
93+
mod proptests {
9494
use super::*;
9595

9696
#[test]
9797
fn test_regression_11() {
9898
assert_eq!("シークヮーサー".to_hiragana(), "しいくゎあさあ");
9999
}
100+
101+
use proptest::prelude::*;
102+
103+
proptest! {
104+
#[test]
105+
fn to_kana_does_not_panic(input in ".*") {
106+
let _ = input.to_kana();
107+
}
108+
109+
#[test]
110+
fn to_romaji_does_not_panic(input in ".*") {
111+
let _ = &input.to_romaji();
112+
}
113+
114+
#[test]
115+
fn to_romaji_does_not_panic_kana_katakana_input(
116+
input in r"[ぁ-ゟァ-ヿー]{1,100}"
117+
) {
118+
let _ = &input.to_romaji();
119+
}
120+
121+
#[test]
122+
fn to_katakana_does_not_panic(input in ".*") {
123+
let _ = &input.to_katakana();
124+
}
125+
126+
#[test]
127+
fn to_hiragana_does_not_panic(input in ".*") {
128+
let _ = &input.to_hiragana();
129+
}
130+
131+
#[test]
132+
fn mixed_romaji_kana_katakana_input_does_not_panic(
133+
input in r"[A-Za-zぁ-ゟァ-ヿー]{1,100}"
134+
) {
135+
let _ = input.to_kana();
136+
let _ = input.to_romaji();
137+
let _ = input.to_katakana();
138+
let _ = input.to_hiragana();
139+
}
140+
141+
}
100142
}

src/to_romaji.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -232,4 +232,9 @@ mod tests {
232232
fn check_panic_issue_13() {
233233
assert_eq!(to_romaji("ウーッー"), "uu");
234234
}
235+
236+
#[test]
237+
fn check_panic_proptest1() {
238+
assert_eq!(to_romaji("ヷー"), "waa");
239+
}
235240
}

src/to_romaji_node_tree.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,13 @@ impl Node {
4242
lazy_static! {
4343
pub(crate) static ref TO_ROMAJI_NODE_TREE: Node = {
4444
let transitions = Some(vec![
45+
(
46+
'ヷ',
47+
Node {
48+
transitions: None,
49+
output: "va",
50+
},
51+
),
4552
(
4653
'\u{3000}',
4754
Node {

src/utils/katakana_to_hiragana.rs

Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -59,17 +59,12 @@ pub(crate) fn katakana_to_hiragana_with_opt(input: &str, is_destination_romaji:
5959
(previous_kana, is_char_inner_long_dash(input_char, index))
6060
{
6161
// Transform previous_kana back to romaji, and slice off the vowel
62-
let romaji = TO_ROMAJI_NODE_TREE
63-
.find_transition_node(previous_kana)
64-
.unwrap_or_else(|| {
65-
panic!(
66-
"Failed to find transition node for previous kana {}",
67-
previous_kana
68-
)
69-
})
70-
.output;
62+
let Some(node) = TO_ROMAJI_NODE_TREE.find_transition_node(previous_kana) else {
63+
hira.push(input_char);
64+
continue;
65+
};
7166

72-
let romaji_opt = romaji.chars().last();
67+
let romaji_opt = node.output.chars().last();
7368
// However, ensure 'オー' => 'おお' => 'oo' if this is a transform on the way to romaji
7469
if let Some(prev_char) = input.chars().nth(index - 1) {
7570
if is_char_katakana(prev_char) && romaji_opt == Some('o') && is_destination_romaji {
@@ -82,9 +77,20 @@ pub(crate) fn katakana_to_hiragana_with_opt(input: &str, is_destination_romaji:
8277
hira.push(*hit);
8378
}
8479
} else if !is_char_long_dash(input_char) && is_char_katakana(input_char) {
85-
// Shift charcode.
86-
let code = input_char as i32 + (HIRAGANA_START as i32 - KATAKANA_START as i32);
87-
let hira_char = std::char::from_u32(code as u32).unwrap();
80+
let hira_char = match input_char {
81+
// rare special cases
82+
'ヷ' => 'わ', // wa with a voiced mark
83+
'ヸ' => 'ゐ', // wi with a voiced mark
84+
'ヹ' => 'ゑ', // we with a voiced mark
85+
'ヺ' => 'を', // wo with a voiced mark
86+
_ => {
87+
// Shift charcode.
88+
let code = input_char as i32 + (HIRAGANA_START as i32 - KATAKANA_START as i32);
89+
// the fallback shouldn't normally happen
90+
std::char::from_u32(code as u32).unwrap_or(input_char)
91+
}
92+
};
93+
8894
hira.push(hira_char);
8995
previous_kana = Some(hira_char);
9096
} else {

0 commit comments

Comments
 (0)