Skip to content

Commit 57547b7

Browse files
committed
fix panic on to_romaji
properly handle if transition node doesn't contain romaji fixes #13
1 parent c26ed30 commit 57547b7

File tree

3 files changed

+32
-16
lines changed

3 files changed

+32
-16
lines changed

src/to_romaji.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -227,4 +227,9 @@ mod tests {
227227
assert_eq!(to_romaji("シンヨ"), "shin'yo");
228228
}
229229
}
230+
231+
#[test]
232+
fn check_panic_issue_13() {
233+
assert_eq!(to_romaji("ウーッー"), "uu");
234+
}
230235
}

src/trim_okurigana.rs

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,17 @@ pub fn trim_okurigana(input: &str) -> &str {
2323
}
2424

2525
pub(crate) fn is_leading_without_initial_kana(input: &str, trim_from_start: bool) -> bool {
26+
if input.is_empty() {
27+
return false;
28+
}
2629
trim_from_start && !is_char_kana(input.chars().next().unwrap())
2730
}
2831

2932
#[inline]
3033
pub(crate) fn is_trailing_without_final_kana(input: &str, trim_from_start: bool) -> bool {
34+
if input.is_empty() {
35+
return false;
36+
}
3137
!trim_from_start && !is_char_kana(input.chars().last().unwrap())
3238
}
3339

@@ -77,7 +83,9 @@ pub fn trim_okurigana_with_opt<'a>(
7783
} else {
7884
tokenize(input)
7985
};
80-
dbg!(&tokens);
86+
if tokens.is_empty() {
87+
return input;
88+
}
8189

8290
if trim_from_start {
8391
input.trim_start_matches(&tokens[0])

src/utils/katakana_to_hiragana.rs

Lines changed: 18 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -47,46 +47,49 @@ pub fn katakana_to_hiragana(input: &str) -> String {
4747
pub(crate) fn katakana_to_hiragana_with_opt(input: &str, is_destination_romaji: bool) -> String {
4848
let mut hira = Vec::with_capacity(input.chars().count());
4949
let mut previous_kana: Option<char> = None;
50-
for (index, char) in input.chars().enumerate() {
50+
for (index, input_char) in input.chars().enumerate() {
5151
// Short circuit to avoid incorrect codeshift for 'ー' and '・'
52-
if is_char_slash_dot(char)
53-
|| is_char_initial_long_dash(char, index)
54-
|| is_kana_as_symbol(char)
52+
if is_char_slash_dot(input_char)
53+
|| is_char_initial_long_dash(input_char, index)
54+
|| is_kana_as_symbol(input_char)
5555
{
56-
hira.push(char);
56+
hira.push(input_char);
5757
// Transform long vowels: 'オー' to 'おう'
5858
} else if let (Some(previous_kana), true) =
59-
(previous_kana, is_char_inner_long_dash(char, index))
59+
(previous_kana, is_char_inner_long_dash(input_char, index))
6060
{
6161
// Transform previous_kana back to romaji, and slice off the vowel
6262
let romaji = TO_ROMAJI_NODE_TREE
6363
.find_transition_node(previous_kana)
64-
.unwrap()
64+
.unwrap_or_else(|| {
65+
panic!(
66+
"Failed to find transition node for previous kana {}",
67+
previous_kana
68+
)
69+
})
6570
.output;
6671

67-
let romaji = romaji.chars().last().unwrap_or_else(|| {
68-
panic!("could not find kana {:?} in TO_ROMAJI map", previous_kana)
69-
});
72+
let romaji_opt = romaji.chars().last();
7073
// However, ensure 'オー' => 'おお' => 'oo' if this is a transform on the way to romaji
7174
if let Some(prev_char) = input.chars().nth(index - 1) {
72-
if is_char_katakana(prev_char) && romaji == 'o' && is_destination_romaji {
75+
if is_char_katakana(prev_char) && romaji_opt == Some('o') && is_destination_romaji {
7376
hira.push('お');
7477
continue;
7578
}
7679
}
7780

78-
if let Some(hit) = LONG_VOWELS.get(&romaji) {
81+
if let Some(hit) = romaji_opt.and_then(|romaji| LONG_VOWELS.get(&romaji)) {
7982
hira.push(*hit);
8083
}
81-
} else if !is_char_long_dash(char) && is_char_katakana(char) {
84+
} else if !is_char_long_dash(input_char) && is_char_katakana(input_char) {
8285
// Shift charcode.
83-
let code = char as i32 + (HIRAGANA_START as i32 - KATAKANA_START as i32);
86+
let code = input_char as i32 + (HIRAGANA_START as i32 - KATAKANA_START as i32);
8487
let hira_char = std::char::from_u32(code as u32).unwrap();
8588
hira.push(hira_char);
8689
previous_kana = Some(hira_char);
8790
} else {
8891
// Pass non katakana chars through
89-
hira.push(char);
92+
hira.push(input_char);
9093
previous_kana = None;
9194
}
9295
}

0 commit comments

Comments
 (0)