Skip to content

Commit 5deebe6

Browse files
committed
Fixed multilang for LSTM, pushed cube to one side without actually deleting it
1 parent 798d79a commit 5deebe6

14 files changed

+139
-124
lines changed

api/tesseractmain.cpp

+3-4
Original file line numberDiff line numberDiff line change
@@ -123,10 +123,9 @@ void PrintHelpForOEM() {
123123
const char* msg =
124124
"OCR Engine modes:\n"
125125
" 0 Original Tesseract only.\n"
126-
" 1 Cube only.\n"
127-
" 2 Tesseract + cube.\n"
128-
" 3 Default, based on what is available.\n"
129-
" 4 Neural nets (LSTM) only.\n";
126+
" 1 Neural nets LSTM only.\n"
127+
" 2 Tesseract + LSTM.\n"
128+
" 3 Default, based on what is available.\n";
130129

131130
printf("%s", msg);
132131
}

ccmain/control.cpp

+32-29
Original file line numberDiff line numberDiff line change
@@ -31,21 +31,22 @@
3131
#include <errno.h>
3232
#endif
3333
#include <ctype.h>
34-
#include "ocrclass.h"
35-
#include "werdit.h"
34+
#include "callcpp.h"
35+
#include "control.h"
36+
#include "docqual.h"
3637
#include "drawfx.h"
37-
#include "tessbox.h"
38-
#include "tessvars.h"
39-
#include "pgedit.h"
40-
#include "reject.h"
4138
#include "fixspace.h"
42-
#include "docqual.h"
43-
#include "control.h"
44-
#include "output.h"
45-
#include "callcpp.h"
4639
#include "globals.h"
40+
#include "lstmrecognizer.h"
41+
#include "ocrclass.h"
42+
#include "output.h"
43+
#include "pgedit.h"
44+
#include "reject.h"
4745
#include "sorthelper.h"
46+
#include "tessbox.h"
4847
#include "tesseractclass.h"
48+
#include "tessvars.h"
49+
#include "werdit.h"
4950

5051
#define MIN_FONT_ROW_COUNT 8
5152
#define MAX_XHEIGHT_DIFF 3
@@ -192,8 +193,8 @@ void Tesseract::SetupWordPassN(int pass_n, WordData* word) {
192193
WERD_RES* word_res = new WERD_RES;
193194
word_res->InitForRetryRecognition(*word->word);
194195
word->lang_words.push_back(word_res);
195-
// Cube doesn't get setup for pass2.
196-
if (pass_n == 1 || lang_t->tessedit_ocr_engine_mode != OEM_CUBE_ONLY) {
196+
// LSTM doesn't get setup for pass2.
197+
if (pass_n == 1 || lang_t->tessedit_ocr_engine_mode != OEM_LSTM_ONLY) {
197198
word_res->SetupForRecognition(
198199
lang_t->unicharset, lang_t, BestPix(),
199200
lang_t->tessedit_ocr_engine_mode, NULL,
@@ -301,16 +302,6 @@ bool Tesseract::recog_all_words(PAGE_RES* page_res,
301302
const TBOX* target_word_box,
302303
const char* word_config,
303304
int dopasses) {
304-
// PSM_RAW_LINE is a special-case mode in which the layout analysis is
305-
// completely ignored and LSTM is run on the raw image. There is no hope
306-
// of running normal tesseract in this situation or of integrating output.
307-
#ifndef ANDROID_BUILD
308-
if (tessedit_ocr_engine_mode == OEM_LSTM_ONLY &&
309-
tessedit_pageseg_mode == PSM_RAW_LINE) {
310-
RecogRawLine(page_res);
311-
return true;
312-
}
313-
#endif
314305
PAGE_RES_IT page_res_it(page_res);
315306

316307
if (tessedit_minimal_rej_pass1) {
@@ -397,8 +388,7 @@ bool Tesseract::recog_all_words(PAGE_RES* page_res,
397388
if (!RecogAllWordsPassN(2, monitor, &page_res_it, &words)) return false;
398389
}
399390

400-
// The next passes can only be run if tesseract has been used, as cube
401-
// doesn't set all the necessary outputs in WERD_RES.
391+
// The next passes are only required for Tess-only.
402392
if (AnyTessLang() && !AnyLSTMLang()) {
403393
// ****************** Pass 3 *******************
404394
// Fix fuzzy spaces.
@@ -451,8 +441,13 @@ bool Tesseract::recog_all_words(PAGE_RES* page_res,
451441
for (page_res_it.restart_page(); page_res_it.word() != NULL;
452442
page_res_it.forward()) {
453443
WERD_RES* word = page_res_it.word();
454-
if (word->best_choice == NULL || word->best_choice->length() == 0)
444+
POLY_BLOCK* pb = page_res_it.block()->block != NULL
445+
? page_res_it.block()->block->poly_block()
446+
: NULL;
447+
if (word->best_choice == NULL || word->best_choice->length() == 0 ||
448+
(word->best_choice->IsAllSpaces() && (pb == NULL || pb->IsText()))) {
455449
page_res_it.DeleteCurrentWord();
450+
}
456451
}
457452

458453
if (monitor != NULL) {
@@ -1376,12 +1371,20 @@ void Tesseract::classify_word_pass1(const WordData& word_data,
13761371
cube_word_pass1(block, row, *in_word);
13771372
return;
13781373
}
1379-
if (tessedit_ocr_engine_mode == OEM_LSTM_ONLY) {
1380-
if (!(*in_word)->odd_size) {
1374+
#endif
1375+
#ifndef ANDROID_BUILD
1376+
if (tessedit_ocr_engine_mode == OEM_LSTM_ONLY ||
1377+
tessedit_ocr_engine_mode == OEM_TESSERACT_LSTM_COMBINED) {
1378+
if (!(*in_word)->odd_size || tessedit_ocr_engine_mode == OEM_LSTM_ONLY) {
13811379
LSTMRecognizeWord(*block, row, *in_word, out_words);
13821380
if (!out_words->empty())
13831381
return; // Successful lstm recognition.
13841382
}
1383+
if (tessedit_ocr_engine_mode == OEM_LSTM_ONLY) {
1384+
// No fallback allowed, so use a fake.
1385+
(*in_word)->SetupFake(lstm_recognizer_->GetUnicharset());
1386+
return;
1387+
}
13851388
// Fall back to tesseract for failed words or odd words.
13861389
(*in_word)->SetupForRecognition(unicharset, this, BestPix(),
13871390
OEM_TESSERACT_ONLY, NULL,
@@ -1523,7 +1526,7 @@ void Tesseract::classify_word_pass2(const WordData& word_data,
15231526
WERD_RES** in_word,
15241527
PointerVector<WERD_RES>* out_words) {
15251528
// Return if we do not want to run Tesseract.
1526-
if (tessedit_ocr_engine_mode == OEM_CUBE_ONLY) {
1529+
if (tessedit_ocr_engine_mode == OEM_LSTM_ONLY) {
15271530
return;
15281531
}
15291532
ROW* row = word_data.row;
@@ -1908,7 +1911,7 @@ static void find_modal_font( //good chars in word
19081911
* Get the fonts for the word.
19091912
*/
19101913
void Tesseract::set_word_fonts(WERD_RES *word) {
1911-
// Don't try to set the word fonts for a cube word, as the configs
1914+
// Don't try to set the word fonts for an lstm word, as the configs
19121915
// will be meaningless.
19131916
if (word->chopped_word == NULL) return;
19141917
ASSERT_HOST(word->best_choice != NULL);

ccmain/linerec.cpp

+17-23
Original file line numberDiff line numberDiff line change
@@ -219,19 +219,6 @@ ImageData* Tesseract::GetRectImage(const TBOX& box, const BLOCK& block,
219219
}
220220

221221
#ifndef ANDROID_BUILD
222-
// Top-level function recognizes a single raw line.
223-
void Tesseract::RecogRawLine(PAGE_RES* page_res) {
224-
PAGE_RES_IT it(page_res);
225-
PointerVector<WERD_RES> words;
226-
LSTMRecognizeWord(*it.block()->block, it.row()->row, it.word(), &words);
227-
if (getDict().stopper_debug_level >= 1) {
228-
for (int w = 0; w < words.size(); ++w) {
229-
words[w]->DebugWordChoices(true, NULL);
230-
}
231-
}
232-
it.ReplaceCurrentWord(&words);
233-
}
234-
235222
// Recognizes a word or group of words, converting to WERD_RES in *words.
236223
// Analogous to classify_word_pass1, but can handle a group of words as well.
237224
void Tesseract::LSTMRecognizeWord(const BLOCK& block, ROW *row, WERD_RES *word,
@@ -268,7 +255,17 @@ void Tesseract::SearchWords(PointerVector<WERD_RES>* words) {
268255
// for each of the output words.
269256
// If we drop a word as junk, then there is always a space in front of the
270257
// next.
271-
bool deleted_prev = false;
258+
const Dict* stopper_dict = lstm_recognizer_->GetDict();
259+
if (stopper_dict == nullptr) stopper_dict = &getDict();
260+
bool any_nonspace_delimited = false;
261+
for (int w = 0; w < words->size(); ++w) {
262+
WERD_RES* word = (*words)[w];
263+
if (word->best_choice != nullptr &&
264+
word->best_choice->ContainsAnyNonSpaceDelimited()) {
265+
any_nonspace_delimited = true;
266+
break;
267+
}
268+
}
272269
for (int w = 0; w < words->size(); ++w) {
273270
WERD_RES* word = (*words)[w];
274271
if (word->best_choice == NULL) {
@@ -284,9 +281,7 @@ void Tesseract::SearchWords(PointerVector<WERD_RES>* words) {
284281
}
285282
if (word->best_choice == NULL) {
286283
// It is a dud.
287-
words->remove(w);
288-
--w;
289-
deleted_prev = true;
284+
word->SetupFake(lstm_recognizer_->GetUnicharset());
290285
} else {
291286
// Set the best state.
292287
for (int i = 0; i < word->best_choice->length(); ++i) {
@@ -314,22 +309,21 @@ void Tesseract::SearchWords(PointerVector<WERD_RES>* words) {
314309
word->best_choice->print();
315310
}
316311
// Discard words that are impossibly bad, but allow a bit more for
317-
// dictionary words.
312+
// dictionary words, and keep bad words in non-space-delimited langs.
318313
if (word_certainty >= RecodeBeamSearch::kMinCertainty ||
314+
any_nonspace_delimited ||
319315
(word_certainty >= kWorstDictCertainty &&
320316
Dict::valid_word_permuter(word->best_choice->permuter(), true))) {
321-
word->best_choice->set_certainty(word_certainty);
322-
if (deleted_prev) word->word->set_blanks(1);
317+
word->tess_accepted = stopper_dict->AcceptableResult(word);
323318
} else {
324319
if (getDict().stopper_debug_level >= 1) {
325320
tprintf("Deleting word with certainty %g\n", word_certainty);
326321
word->best_choice->print();
327322
}
328323
// It is a dud.
329-
words->remove(w);
330-
--w;
331-
deleted_prev = true;
324+
word->SetupFake(lstm_recognizer_->GetUnicharset());
332325
}
326+
word->best_choice->set_certainty(word_certainty);
333327
}
334328
}
335329
}

ccmain/tessedit.cpp

+41-34
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,7 @@ bool Tesseract::init_tesseract_lang_data(
161161
// Determine which ocr engine(s) should be loaded and used for recognition.
162162
if (oem != OEM_DEFAULT) tessedit_ocr_engine_mode.set_value(oem);
163163
if (tessdata_manager_debug_level) {
164-
tprintf("Loading Tesseract/Cube with tessedit_ocr_engine_mode %d\n",
164+
tprintf("Loading Tesseract/LSTM with tessedit_ocr_engine_mode %d\n",
165165
static_cast<int>(tessedit_ocr_engine_mode));
166166
}
167167

@@ -174,9 +174,37 @@ bool Tesseract::init_tesseract_lang_data(
174174
return true;
175175
}
176176

177+
// The various OcrEngineMode settings (see publictypes.h) determine which
178+
// engine-specific data files need to be loaded. Currently everything needs
179+
// the base tesseract data, which supplies other useful information, but
180+
// alternative engines, such as LSTM are optional.
181+
#ifndef ANDROID_BUILD
182+
if (tessedit_ocr_engine_mode == OEM_LSTM_ONLY ||
183+
tessedit_ocr_engine_mode == OEM_TESSERACT_LSTM_COMBINED) {
184+
if (tessdata_manager.swap()) {
185+
tprintf("Error: LSTM requested on big-endian hardware!!\n");
186+
tprintf("Big-endian not yet supported! Loading tesseract.\n");
187+
tessedit_ocr_engine_mode.set_value(OEM_TESSERACT_ONLY);
188+
} else if (tessdata_manager.SeekToStart(TESSDATA_LSTM)) {
189+
lstm_recognizer_ = new LSTMRecognizer;
190+
TFile fp;
191+
fp.Open(tessdata_manager.GetDataFilePtr(), -1);
192+
ASSERT_HOST(lstm_recognizer_->DeSerialize(tessdata_manager.swap(), &fp));
193+
if (lstm_use_matrix)
194+
lstm_recognizer_->LoadDictionary(tessdata_path.string(), language);
195+
} else {
196+
tprintf("Error: LSTM requested, but not present!! Loading tesseract.\n");
197+
tessedit_ocr_engine_mode.set_value(OEM_TESSERACT_ONLY);
198+
}
199+
}
200+
#endif
201+
177202
// Load the unicharset
178-
if (!tessdata_manager.SeekToStart(TESSDATA_UNICHARSET) ||
179-
!unicharset.load_from_file(tessdata_manager.GetDataFilePtr())) {
203+
if (tessedit_ocr_engine_mode == OEM_LSTM_ONLY) {
204+
// Avoid requiring a unicharset when we aren't running base tesseract.
205+
unicharset.CopyFrom(lstm_recognizer_->GetUnicharset());
206+
} else if (!tessdata_manager.SeekToStart(TESSDATA_UNICHARSET) ||
207+
!unicharset.load_from_file(tessdata_manager.GetDataFilePtr())) {
180208
return false;
181209
}
182210
if (unicharset.size() > MAX_NUM_CLASSES) {
@@ -203,11 +231,6 @@ bool Tesseract::init_tesseract_lang_data(
203231
ambigs_debug_level, use_ambigs_for_adaption, &unicharset);
204232
if (tessdata_manager_debug_level) tprintf("Loaded ambigs\n");
205233
}
206-
207-
// The various OcrEngineMode settings (see publictypes.h) determine which
208-
// engine-specific data files need to be loaded. Currently everything needs
209-
// the base tesseract data, which supplies other useful information, but
210-
// alternative engines, such as cube and LSTM are optional.
211234
#ifndef NO_CUBE_BUILD
212235
if (tessedit_ocr_engine_mode == OEM_CUBE_ONLY) {
213236
ASSERT_HOST(init_cube_objects(false, &tessdata_manager));
@@ -217,22 +240,6 @@ bool Tesseract::init_tesseract_lang_data(
217240
ASSERT_HOST(init_cube_objects(true, &tessdata_manager));
218241
if (tessdata_manager_debug_level)
219242
tprintf("Loaded Cube with combiner\n");
220-
} else if (tessedit_ocr_engine_mode == OEM_LSTM_ONLY) {
221-
if (tessdata_manager.swap()) {
222-
tprintf("Error: LSTM requested on big-endian hardware!!\n");
223-
tprintf("Big-endian not yet supported! Loading tesseract.\n");
224-
tessedit_ocr_engine_mode.set_value(OEM_TESSERACT_ONLY);
225-
} else if (tessdata_manager.SeekToStart(TESSDATA_LSTM)) {
226-
lstm_recognizer_ = new LSTMRecognizer;
227-
TFile fp;
228-
fp.Open(tessdata_manager.GetDataFilePtr(), -1);
229-
ASSERT_HOST(lstm_recognizer_->DeSerialize(tessdata_manager.swap(), &fp));
230-
if (lstm_use_matrix)
231-
lstm_recognizer_->LoadDictionary(tessdata_path.string(), language);
232-
} else {
233-
tprintf("Error: LSTM requested, but not present!! Loading tesseract.\n");
234-
tessedit_ocr_engine_mode.set_value(OEM_TESSERACT_ONLY);
235-
}
236243
}
237244
#endif
238245
// Init ParamsModel.
@@ -425,16 +432,16 @@ int Tesseract::init_tesseract_internal(
425432
tessdata_manager.End();
426433
return 0;
427434
}
428-
// If only Cube will be used, skip loading Tesseract classifier's
429-
// pre-trained templates.
430-
bool init_tesseract_classifier =
431-
tessedit_ocr_engine_mode != OEM_CUBE_ONLY;
432-
// If only Cube will be used and if it has its own Unicharset,
433-
// skip initializing permuter and loading Tesseract Dawgs.
434-
bool init_dict =
435-
!(tessedit_ocr_engine_mode == OEM_CUBE_ONLY &&
436-
tessdata_manager.SeekToStart(TESSDATA_CUBE_UNICHARSET));
437-
program_editup(textbase, init_tesseract_classifier, init_dict);
435+
// If only LSTM will be used, skip loading Tesseract classifier's
436+
// pre-trained templates and dictionary.
437+
bool init_tesseract = tessedit_ocr_engine_mode != OEM_LSTM_ONLY &&
438+
tessedit_ocr_engine_mode != OEM_CUBE_ONLY;
439+
bool init_dict = init_tesseract;
440+
if (tessedit_ocr_engine_mode == OEM_CUBE_ONLY &&
441+
!tessdata_manager.SeekToStart(TESSDATA_CUBE_UNICHARSET)) {
442+
init_dict = true;
443+
}
444+
program_editup(textbase, init_tesseract, init_dict);
438445
tessdata_manager.End();
439446
return 0; //Normal exit
440447
}

ccmain/tesseract_cube_combiner.cpp

+6-6
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121
// the recognition results of Tesseract and Cube at the word level
2222

2323
#include <algorithm>
24+
#include <string>
25+
#include <vector>
2426
#include <wctype.h>
2527

2628
#include "tesseract_cube_combiner.h"
@@ -125,12 +127,10 @@ bool TesseractCubeCombiner::ValidWord(const string &str) {
125127
// Public method for computing the combiner features. The agreement
126128
// output parameter will be true if both answers are identical,
127129
// and false otherwise.
128-
bool TesseractCubeCombiner::ComputeCombinerFeatures(const string &tess_str,
129-
int tess_confidence,
130-
CubeObject *cube_obj,
131-
WordAltList *cube_alt_list,
132-
vector<double> *features,
133-
bool *agreement) {
130+
bool TesseractCubeCombiner::ComputeCombinerFeatures(
131+
const string &tess_str, int tess_confidence, CubeObject *cube_obj,
132+
WordAltList *cube_alt_list, std::vector<double> *features,
133+
bool *agreement) {
134134
features->clear();
135135
*agreement = false;
136136
if (cube_alt_list == NULL || cube_alt_list->AltCount() <= 0)

ccmain/tesseractclass.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -81,9 +81,9 @@ Tesseract::Tesseract()
8181
" (Values from PageSegMode enum in publictypes.h)",
8282
this->params()),
8383
INT_INIT_MEMBER(tessedit_ocr_engine_mode, tesseract::OEM_TESSERACT_ONLY,
84-
"Which OCR engine(s) to run (Tesseract, Cube, both)."
84+
"Which OCR engine(s) to run (Tesseract, LSTM, both)."
8585
" Defaults to loading and running only Tesseract"
86-
" (no Cube,no combiner)."
86+
" (no LSTM,no combiner)."
8787
" Values from OcrEngineMode enum in tesseractclass.h)",
8888
this->params()),
8989
STRING_MEMBER(tessedit_char_blacklist, "",

0 commit comments

Comments
 (0)