Skip to content

Commit 8dc9e9f

Browse files
committed
Fix use of wrong UNICHARSET
Signed-off-by: Stefan Weil <[email protected]>
1 parent 0e43ae5 commit 8dc9e9f

File tree

6 files changed

+23
-12
lines changed

6 files changed

+23
-12
lines changed

src/api/baseapi.h

+1
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ class Trie;
7575
class Wordrec;
7676

7777
typedef int (Dict::*DictFunc)(void* void_dawg_args,
78+
const UNICHARSET& unicharset,
7879
UNICHAR_ID unichar_id, bool word_end) const;
7980
typedef double (Dict::*ProbabilityInContextFunc)(const char* lang,
8081
const char* context,

src/dict/dict.cpp

+8-3
Original file line numberDiff line numberDiff line change
@@ -361,10 +361,13 @@ void Dict::End() {
361361
// according to at least one of the dawgs in the dawgs_ vector.
362362
// See more extensive comments in dict.h where this function is declared.
363363
int Dict::def_letter_is_okay(void* void_dawg_args,
364+
const UNICHARSET& unicharset,
364365
UNICHAR_ID unichar_id,
365366
bool word_end) const {
366367
DawgArgs *dawg_args = static_cast<DawgArgs *>(void_dawg_args);
367368

369+
ASSERT_HOST(unicharset.contains_unichar_id(unichar_id));
370+
368371
if (dawg_debug_level >= 3) {
369372
tprintf("def_letter_is_okay: current unichar=%s word_end=%d"
370373
" num active dawgs=%d\n",
@@ -410,7 +413,7 @@ int Dict::def_letter_is_okay(void* void_dawg_args,
410413
for (int s = 0; s < slist.length(); ++s) {
411414
int sdawg_index = slist[s];
412415
const Dawg *sdawg = dawgs_[sdawg_index];
413-
UNICHAR_ID ch = char_for_dawg(unichar_id, sdawg);
416+
UNICHAR_ID ch = char_for_dawg(unicharset, unichar_id, sdawg);
414417
EDGE_REF dawg_edge = sdawg->edge_char_of(0, ch, word_end);
415418
if (dawg_edge != NO_EDGE) {
416419
if (dawg_debug_level >=3) {
@@ -477,7 +480,8 @@ int Dict::def_letter_is_okay(void* void_dawg_args,
477480
// Find the edge out of the node for the unichar_id.
478481
NODE_REF node = GetStartingNode(dawg, pos.dawg_ref);
479482
EDGE_REF edge = (node == NO_EDGE) ? NO_EDGE
480-
: dawg->edge_char_of(node, char_for_dawg(unichar_id, dawg), word_end);
483+
: dawg->edge_char_of(node, char_for_dawg(unicharset, unichar_id, dawg),
484+
word_end);
481485

482486
if (dawg_debug_level >= 3) {
483487
tprintf("Active dawg: [%d, " REFFORMAT "] edge=" REFFORMAT "\n",
@@ -759,7 +763,8 @@ int Dict::valid_word(const WERD_CHOICE &word, bool numbers_ok) const {
759763
int last_index = word_ptr->length() - 1;
760764
// Call letter_is_okay for each letter in the word.
761765
for (int i = hyphen_base_size(); i <= last_index; ++i) {
762-
if (!((this->*letter_is_okay_)(&dawg_args, word_ptr->unichar_id(i),
766+
if (!((this->*letter_is_okay_)(&dawg_args, *word_ptr->unicharset(),
767+
word_ptr->unichar_id(i),
763768
i == last_index))) break;
764769
// Swap active_dawgs, constraints with the corresponding updated vector.
765770
if (dawg_args.updated_dawgs == &(active_dawgs[1])) {

src/dict/dict.h

+8-5
Original file line numberDiff line numberDiff line change
@@ -351,15 +351,17 @@ class Dict {
351351
*/
352352

353353
//
354-
int def_letter_is_okay(void* void_dawg_args,
354+
int def_letter_is_okay(void* void_dawg_args, const UNICHARSET& unicharset,
355355
UNICHAR_ID unichar_id, bool word_end) const;
356356

357357
int (Dict::*letter_is_okay_)(void* void_dawg_args,
358+
const UNICHARSET& unicharset,
358359
UNICHAR_ID unichar_id, bool word_end) const;
359360
/// Calls letter_is_okay_ member function.
360-
int LetterIsOkay(void* void_dawg_args,
361+
int LetterIsOkay(void* void_dawg_args, const UNICHARSET& unicharset,
361362
UNICHAR_ID unichar_id, bool word_end) const {
362-
return (this->*letter_is_okay_)(void_dawg_args, unichar_id, word_end);
363+
return (this->*letter_is_okay_)(void_dawg_args,
364+
unicharset, unichar_id, word_end);
363365
}
364366

365367

@@ -428,11 +430,12 @@ class Dict {
428430
// Given a unichar from a string and a given dawg, return the unichar
429431
// we should use to match in that dawg type. (for example, in the number
430432
// dawg, all numbers are transformed to kPatternUnicharId).
431-
inline UNICHAR_ID char_for_dawg(UNICHAR_ID ch, const Dawg *dawg) const {
433+
UNICHAR_ID char_for_dawg(const UNICHARSET& unicharset, UNICHAR_ID ch,
434+
const Dawg *dawg) const {
432435
if (!dawg) return ch;
433436
switch (dawg->type()) {
434437
case DAWG_TYPE_NUMBER:
435-
return getUnicharset().get_isdigit(ch) ? Dawg::kPatternUnicharID : ch;
438+
return unicharset.get_isdigit(ch) ? Dawg::kPatternUnicharID : ch;
436439
default:
437440
return ch;
438441
}

src/dict/permdawg.cpp

+3-2
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ void Dict::go_deeper_dawg_fxn(
8888
++num_unigrams;
8989
word->append_unichar_id(uch_id, 1, 0.0, 0.0);
9090
unigrams_ok = (this->*letter_is_okay_)(
91-
&unigram_dawg_args,
91+
&unigram_dawg_args, *word->unicharset(),
9292
word->unichar_id(word_index+num_unigrams-1),
9393
word_ending && i == encoding.size() - 1);
9494
(*unigram_dawg_args.active_dawgs) = *(unigram_dawg_args.updated_dawgs);
@@ -111,7 +111,8 @@ void Dict::go_deeper_dawg_fxn(
111111
// Check which dawgs from the dawgs_ vector contain the word
112112
// up to and including the current unichar.
113113
if (checked_unigrams || (this->*letter_is_okay_)(
114-
more_args, word->unichar_id(word_index), word_ending)) {
114+
more_args, *word->unicharset(), word->unichar_id(word_index),
115+
word_ending)) {
115116
// Add a new word choice
116117
if (word_ending) {
117118
if (dawg_debug_level) {

src/lstm/recodebeam.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -771,7 +771,8 @@ void RecodeBeamSearch::ContinueDawg(int code, int unichar_id, float cert,
771771
return; // Can't continue if not a dict word.
772772
}
773773
PermuterType permuter = static_cast<PermuterType>(
774-
dict_->def_letter_is_okay(&dawg_args, unichar_id, false));
774+
dict_->def_letter_is_okay(&dawg_args,
775+
dict_->getUnicharset(), unichar_id, false));
775776
if (permuter != NO_PERM) {
776777
PushHeapIfBetter(kBeamWidths[0], code, unichar_id, permuter, false,
777778
word_start, dawg_args.valid_end, false, cert, prev,

src/wordrec/language_model.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -853,7 +853,7 @@ LanguageModelDawgInfo *LanguageModel::GenerateDawgInfo(
853853
if (language_model_debug_level > 2)
854854
tprintf("Test Letter OK for unichar %d, normed %d\n",
855855
b.unichar_id(), normed_ids[i]);
856-
dict_->LetterIsOkay(&dawg_args_, normed_ids[i],
856+
dict_->LetterIsOkay(&dawg_args_, dict_->getUnicharset(), normed_ids[i],
857857
word_end && i == normed_ids.size() - 1);
858858
if (dawg_args_.permuter == NO_PERM) {
859859
break;

0 commit comments

Comments
 (0)