28
28
namespace tesseract {
29
29
30
30
LTRResultIterator::LTRResultIterator (PAGE_RES* page_res, Tesseract* tesseract,
31
- int scale, int scaled_yres,
32
- int rect_left, int rect_top,
33
- int rect_width, int rect_height)
34
- : PageIterator(page_res, tesseract, scale, scaled_yres,
35
- rect_left, rect_top, rect_width, rect_height),
36
- line_separator_ (" \n " ),
37
- paragraph_separator_(" \n " ) {
38
- }
31
+ int scale, int scaled_yres, int rect_left,
32
+ int rect_top, int rect_width,
33
+ int rect_height)
34
+ : PageIterator(page_res, tesseract, scale, scaled_yres, rect_left, rect_top,
35
+ rect_width, rect_height),
36
+ line_separator_ (" \n " ),
37
+ paragraph_separator_(" \n " ) {}
39
38
40
39
// Destructor.
41
40
// It is defined here, so the compiler can create a single vtable
@@ -57,9 +56,9 @@ char* LTRResultIterator::GetUTF8Text(PageIteratorLevel level) const {
57
56
} else {
58
57
bool eol = false ; // end of line?
59
58
bool eop = false ; // end of paragraph?
60
- do { // for each paragraph in a block
61
- do { // for each text line in a paragraph
62
- do { // for each word in a text line
59
+ do { // for each paragraph in a block
60
+ do { // for each text line in a paragraph
61
+ do { // for each word in a text line
63
62
best_choice = res_it.word ()->best_choice ;
64
63
ASSERT_HOST (best_choice != nullptr );
65
64
text += best_choice->unichar_string ();
@@ -70,7 +69,7 @@ char* LTRResultIterator::GetUTF8Text(PageIteratorLevel level) const {
70
69
text.truncate_at (text.length () - 1 );
71
70
text += line_separator_;
72
71
eop = res_it.block () != res_it.prev_block () ||
73
- res_it.row ()->row ->para () != res_it.prev_row ()->row ->para ();
72
+ res_it.row ()->row ->para () != res_it.prev_row ()->row ->para ();
74
73
} while (level != RIL_TEXTLINE && !eop);
75
74
if (eop) text += paragraph_separator_;
76
75
} while (level == RIL_BLOCK && res_it.block () == res_it.prev_block ());
@@ -82,12 +81,12 @@ char* LTRResultIterator::GetUTF8Text(PageIteratorLevel level) const {
82
81
}
83
82
84
83
// Set the string inserted at the end of each text line. "\n" by default.
85
- void LTRResultIterator::SetLineSeparator (const char * new_line) {
84
+ void LTRResultIterator::SetLineSeparator (const char * new_line) {
86
85
line_separator_ = new_line;
87
86
}
88
87
89
88
// Set the string inserted at the end of each paragraph. "\n" by default.
90
- void LTRResultIterator::SetParagraphSeparator (const char * new_para) {
89
+ void LTRResultIterator::SetParagraphSeparator (const char * new_para) {
91
90
paragraph_separator_ = new_para;
92
91
}
93
92
@@ -131,7 +130,7 @@ float LTRResultIterator::Confidence(PageIteratorLevel level) const {
131
130
break ;
132
131
case RIL_WORD:
133
132
mean_certainty += best_choice->certainty ();
134
- ++certainty_count;
133
+ ++certainty_count;
135
134
break ;
136
135
case RIL_SYMBOL:
137
136
mean_certainty += best_choice->certainty (blob_index_);
@@ -163,26 +162,23 @@ void LTRResultIterator::RowAttributes(float* row_height, float* descenders,
163
162
// the iterator itself, ie rendered invalid by various members of
164
163
// TessBaseAPI, including Init, SetImage, End or deleting the TessBaseAPI.
165
164
// Pointsize is returned in printers points (1/72 inch.)
166
- const char * LTRResultIterator::WordFontAttributes (bool * is_bold,
167
- bool * is_italic,
168
- bool * is_underlined,
169
- bool * is_monospace,
170
- bool * is_serif,
171
- bool * is_smallcaps,
172
- int * pointsize,
173
- int * font_id) const {
165
+ const char * LTRResultIterator::WordFontAttributes (
166
+ bool * is_bold, bool * is_italic, bool * is_underlined, bool * is_monospace,
167
+ bool * is_serif, bool * is_smallcaps, int * pointsize, int * font_id) const {
174
168
const char * result = nullptr ;
175
169
176
170
if (it_->word () == nullptr ) {
177
171
// Already at the end!
178
172
*pointsize = 0 ;
179
173
} else {
180
174
float row_height = it_->row ()->row ->x_height () +
181
- it_->row ()->row ->ascenders () - it_->row ()->row ->descenders ();
175
+ it_->row ()->row ->ascenders () -
176
+ it_->row ()->row ->descenders ();
182
177
// Convert from pixels to printers points.
183
- *pointsize = scaled_yres_ > 0
184
- ? static_cast <int >(row_height * kPointsPerInch / scaled_yres_ + 0.5 )
185
- : 0 ;
178
+ *pointsize =
179
+ scaled_yres_ > 0
180
+ ? static_cast <int >(row_height * kPointsPerInch / scaled_yres_ + 0.5 )
181
+ : 0 ;
186
182
const FontInfo* font_info = it_->word ()->fontinfo ;
187
183
if (font_info) {
188
184
// Font information available.
@@ -212,7 +208,8 @@ const char* LTRResultIterator::WordFontAttributes(bool* is_bold,
212
208
213
209
// Returns the name of the language used to recognize this word.
214
210
const char * LTRResultIterator::WordRecognitionLanguage () const {
215
- if (it_->word () == nullptr || it_->word ()->tesseract == nullptr ) return nullptr ;
211
+ if (it_->word () == nullptr || it_->word ()->tesseract == nullptr )
212
+ return nullptr ;
216
213
return it_->word ()->tesseract ->lang .string ();
217
214
}
218
215
@@ -221,12 +218,9 @@ StrongScriptDirection LTRResultIterator::WordDirection() const {
221
218
if (it_->word () == nullptr ) return DIR_NEUTRAL;
222
219
bool has_rtl = it_->word ()->AnyRtlCharsInWord ();
223
220
bool has_ltr = it_->word ()->AnyLtrCharsInWord ();
224
- if (has_rtl && !has_ltr)
225
- return DIR_RIGHT_TO_LEFT;
226
- if (has_ltr && !has_rtl)
227
- return DIR_LEFT_TO_RIGHT;
228
- if (!has_ltr && !has_rtl)
229
- return DIR_NEUTRAL;
221
+ if (has_rtl && !has_ltr) return DIR_RIGHT_TO_LEFT;
222
+ if (has_ltr && !has_rtl) return DIR_LEFT_TO_RIGHT;
223
+ if (!has_ltr && !has_rtl) return DIR_NEUTRAL;
230
224
return DIR_MIX;
231
225
}
232
226
@@ -259,20 +253,21 @@ bool LTRResultIterator::HasBlamerInfo() const {
259
253
260
254
// Returns the pointer to ParamsTrainingBundle stored in the BlamerBundle
261
255
// of the current word.
262
- const void *LTRResultIterator::GetParamsTrainingBundle () const {
263
- return (it_->word () != nullptr && it_->word ()->blamer_bundle != nullptr ) ?
264
- &(it_->word ()->blamer_bundle ->params_training_bundle ()) : nullptr ;
256
+ const void * LTRResultIterator::GetParamsTrainingBundle () const {
257
+ return (it_->word () != nullptr && it_->word ()->blamer_bundle != nullptr )
258
+ ? &(it_->word ()->blamer_bundle ->params_training_bundle ())
259
+ : nullptr ;
265
260
}
266
261
267
262
// Returns the pointer to the string with blamer information for this word.
268
263
// Assumes that the word's blamer_bundle is not nullptr.
269
- const char * LTRResultIterator::GetBlamerDebug () const {
264
+ const char * LTRResultIterator::GetBlamerDebug () const {
270
265
return it_->word ()->blamer_bundle ->debug ().string ();
271
266
}
272
267
273
268
// Returns the pointer to the string with misadaption information for this word.
274
269
// Assumes that the word's blamer_bundle is not nullptr.
275
- const char * LTRResultIterator::GetBlamerMisadaptionDebug () const {
270
+ const char * LTRResultIterator::GetBlamerMisadaptionDebug () const {
276
271
return it_->word ()->blamer_bundle ->misadaption_debug ().string ();
277
272
}
278
273
@@ -288,7 +283,7 @@ bool LTRResultIterator::HasTruthString() const {
288
283
289
284
// Returns true if the given string is equivalent to the truth string for
290
285
// the current word.
291
- bool LTRResultIterator::EquivalentToTruth (const char * str) const {
286
+ bool LTRResultIterator::EquivalentToTruth (const char * str) const {
292
287
if (!HasTruthString ()) return false ;
293
288
ASSERT_HOST (it_->word ()->uch_set != nullptr );
294
289
WERD_CHOICE str_wd (str, *(it_->word ()->uch_set ));
@@ -312,7 +307,7 @@ char* LTRResultIterator::WordNormedUTF8Text() const {
312
307
if (it_->word () == nullptr ) return nullptr ; // Already at the end!
313
308
STRING ocr_text;
314
309
WERD_CHOICE* best_choice = it_->word ()->best_choice ;
315
- const UNICHARSET * unicharset = it_->word ()->uch_set ;
310
+ const UNICHARSET* unicharset = it_->word ()->uch_set ;
316
311
ASSERT_HOST (best_choice != nullptr );
317
312
for (int i = 0 ; i < best_choice->length (); ++i) {
318
313
ocr_text += unicharset->get_normed_unichar (best_choice->unichar_id (i));
@@ -325,7 +320,7 @@ char* LTRResultIterator::WordNormedUTF8Text() const {
325
320
326
321
// Returns a pointer to serialized choice lattice.
327
322
// Fills lattice_size with the number of bytes in lattice data.
328
- const char * LTRResultIterator::WordLattice (int * lattice_size) const {
323
+ const char * LTRResultIterator::WordLattice (int * lattice_size) const {
329
324
if (it_->word () == nullptr ) return nullptr ; // Already at the end!
330
325
if (it_->word ()->blamer_bundle == nullptr ) return nullptr ;
331
326
*lattice_size = it_->word ()->blamer_bundle ->lattice_size ();
@@ -338,7 +333,7 @@ const char *LTRResultIterator::WordLattice(int *lattice_size) const {
338
333
bool LTRResultIterator::SymbolIsSuperscript () const {
339
334
if (cblob_it_ == nullptr && it_->word () != nullptr )
340
335
return it_->word ()->best_choice ->BlobPosition (blob_index_) ==
341
- SP_SUPERSCRIPT;
336
+ SP_SUPERSCRIPT;
342
337
return false ;
343
338
}
344
339
@@ -372,40 +367,49 @@ ChoiceIterator::ChoiceIterator(const LTRResultIterator& result_it) {
372
367
} else {
373
368
choice_it_ = nullptr ;
374
369
}
370
+ if (&word_res_->symbol_steps != nullptr && !word_res_->symbol_steps .empty ()) {
371
+ symbol_step_it_ = word_res_->symbol_steps .begin ();
372
+ }
375
373
}
376
374
377
- ChoiceIterator::~ChoiceIterator () {
378
- delete choice_it_;
379
- }
375
+ ChoiceIterator::~ChoiceIterator () { delete choice_it_; }
380
376
381
377
// Moves to the next choice for the symbol and returns false if there
382
378
// are none left.
383
379
bool ChoiceIterator::Next () {
384
- if (choice_it_ == nullptr )
385
- return false ;
380
+ if (choice_it_ == nullptr ) return false ;
381
+ if (&word_res_->symbol_steps != nullptr ) {
382
+ if (symbol_step_it_ == word_res_->symbol_steps .end ()) {
383
+ symbol_step_it_ = word_res_->symbol_steps .begin ();
384
+ } else {
385
+ symbol_step_it_++;
386
+ }
387
+ }
386
388
choice_it_->forward ();
387
389
return !choice_it_->cycled_list ();
388
390
}
389
391
390
392
// Returns the null terminated UTF-8 encoded text string for the current
391
393
// choice. Do NOT use delete [] to free after use.
392
394
const char * ChoiceIterator::GetUTF8Text () const {
393
- if (choice_it_ == nullptr )
394
- return nullptr ;
395
+ if (choice_it_ == nullptr ) return nullptr ;
395
396
UNICHAR_ID id = choice_it_->data ()->unichar_id ();
396
397
return word_res_->uch_set ->id_to_unichar_ext (id);
397
398
}
398
399
399
400
// Returns the confidence of the current choice.
400
401
// The number should be interpreted as a percent probability. (0.0f-100.0f)
401
402
float ChoiceIterator::Confidence () const {
402
- if (choice_it_ == nullptr )
403
- return 0 .0f ;
403
+ if (choice_it_ == nullptr ) return 0 .0f ;
404
404
float confidence = 100 + 5 * choice_it_->data ()->certainty ();
405
405
if (confidence < 0 .0f ) confidence = 0 .0f ;
406
406
if (confidence > 100 .0f ) confidence = 100 .0f ;
407
407
return confidence;
408
408
}
409
409
410
-
410
+ std::vector<std::vector<std::pair<const char *, float >>>*
411
+ ChoiceIterator::Timesteps () const {
412
+ if (&word_res_->symbol_steps == nullptr ) return nullptr ;
413
+ return &*symbol_step_it_;
414
+ }
411
415
} // namespace tesseract.
0 commit comments