@@ -122,29 +122,24 @@ PAGE_RES* Tesseract::ApplyBoxes(const STRING& fname,
122
122
123
123
const int box_count = boxes.size ();
124
124
int box_failures = 0 ;
125
- // Add an empty everything to the end.
126
- boxes.push_back (TBOX ());
127
- texts.push_back (STRING ());
128
- full_texts.push_back (STRING ());
129
125
130
126
// In word mode, we use the boxes to make a word for each box, but
131
127
// in blob mode we use the existing words and maximally chop them first.
132
128
PAGE_RES* page_res = find_segmentation ?
133
129
nullptr : SetupApplyBoxes (boxes, block_list);
134
130
clear_any_old_text (block_list);
135
131
136
- for (int i = 0 ; i < boxes. size () - 1 ; i++) {
132
+ for (int i = 0 ; i < box_count ; i++) {
137
133
bool foundit = false ;
138
134
if (page_res != nullptr ) {
139
- if (i == 0 ) {
140
- foundit = ResegmentCharBox (page_res, nullptr , boxes[i], boxes[i + 1 ],
141
- full_texts[i].string ());
142
- } else {
143
- foundit = ResegmentCharBox (page_res, &boxes[i-1 ], boxes[i],
144
- boxes[i + 1 ], full_texts[i].string ());
145
- }
135
+ foundit = ResegmentCharBox (page_res,
136
+ (i == 0 ) ? nullptr : &boxes[i - 1 ],
137
+ boxes[i],
138
+ (i == box_count - 1 ) ? nullptr : &boxes[i + 1 ],
139
+ full_texts[i].string ());
146
140
} else {
147
- foundit = ResegmentWordBox (block_list, boxes[i], boxes[i + 1 ],
141
+ foundit = ResegmentWordBox (block_list, boxes[i],
142
+ (i == box_count - 1 ) ? nullptr : &boxes[i + 1 ],
148
143
texts[i].string ());
149
144
}
150
145
if (!foundit) {
@@ -339,8 +334,8 @@ static double BoxMissMetric(const TBOX& box1, const TBOX& box2) {
339
334
// /
340
335
// / This means that occasionally, blobs may be incorrectly segmented if the
341
336
// / chopper fails to find a suitable chop point.
342
- bool Tesseract::ResegmentCharBox (PAGE_RES* page_res, const TBOX * prev_box,
343
- const TBOX& box, const TBOX& next_box,
337
+ bool Tesseract::ResegmentCharBox (PAGE_RES* page_res, const TBOX* prev_box,
338
+ const TBOX& box, const TBOX* next_box,
344
339
const char * correct_text) {
345
340
if (applybox_debug > 1 ) {
346
341
tprintf (" \n APPLY_BOX: in ResegmentCharBox() for %s\n " , correct_text);
@@ -365,24 +360,26 @@ bool Tesseract::ResegmentCharBox(PAGE_RES* page_res, const TBOX *prev_box,
365
360
break ;
366
361
if (word_res->correct_text [i + blob_count].length () > 0 )
367
362
break ; // Blob is claimed already.
368
- const double current_box_miss_metric = BoxMissMetric (blob_box, box);
369
- const double next_box_miss_metric = BoxMissMetric (blob_box, next_box);
370
- if (applybox_debug > 2 ) {
371
- tprintf (" Checking blob:" );
372
- blob_box.print ();
373
- tprintf (" Current miss metric = %g, next = %g\n " ,
374
- current_box_miss_metric, next_box_miss_metric);
363
+ if (next_box != nullptr ) {
364
+ const double current_box_miss_metric = BoxMissMetric (blob_box, box);
365
+ const double next_box_miss_metric = BoxMissMetric (blob_box, *next_box);
366
+ if (applybox_debug > 2 ) {
367
+ tprintf (" Checking blob:" );
368
+ blob_box.print ();
369
+ tprintf (" Current miss metric = %g, next = %g\n " ,
370
+ current_box_miss_metric, next_box_miss_metric);
371
+ }
372
+ if (current_box_miss_metric > next_box_miss_metric)
373
+ break ; // Blob is a better match for next box.
375
374
}
376
- if (current_box_miss_metric > next_box_miss_metric)
377
- break ; // Blob is a better match for next box.
378
375
char_box += blob_box;
379
376
}
380
377
if (blob_count > 0 ) {
381
378
if (applybox_debug > 1 ) {
382
379
tprintf (" Index [%d, %d) seem good.\n " , i, i + blob_count);
383
380
}
384
381
if (!char_box.almost_equal (box, 3 ) &&
385
- (box.x_gap (next_box) < -3 ||
382
+ ((next_box != nullptr && box.x_gap (* next_box) < -3 ) ||
386
383
(prev_box != nullptr && prev_box->x_gap (box) < -3 ))) {
387
384
return false ;
388
385
}
@@ -398,8 +395,10 @@ bool Tesseract::ResegmentCharBox(PAGE_RES* page_res, const TBOX *prev_box,
398
395
word_res->box_word ->BlobBox (i).print ();
399
396
tprintf (" Matches box:" );
400
397
box.print ();
401
- tprintf (" With next box:" );
402
- next_box.print ();
398
+ if (next_box != nullptr ) {
399
+ tprintf (" With next box:" );
400
+ next_box->print ();
401
+ }
403
402
}
404
403
// Eliminated best_state and correct_text entries for the consumed
405
404
// blobs.
@@ -438,7 +437,7 @@ bool Tesseract::ResegmentCharBox(PAGE_RES* page_res, const TBOX *prev_box,
438
437
// / @return false if the box was in error, which can only be caused by
439
438
// / failing to find an overlapping blob for a box.
440
439
bool Tesseract::ResegmentWordBox (BLOCK_LIST *block_list,
441
- const TBOX& box, const TBOX& next_box,
440
+ const TBOX& box, const TBOX* next_box,
442
441
const char * correct_text) {
443
442
if (applybox_debug > 1 ) {
444
443
tprintf (" \n APPLY_BOX: in ResegmentWordBox() for %s\n " , correct_text);
@@ -472,23 +471,27 @@ bool Tesseract::ResegmentWordBox(BLOCK_LIST *block_list,
472
471
TBOX blob_box = blob->bounding_box ();
473
472
if (!blob_box.major_overlap (box))
474
473
continue ;
475
- const double current_box_miss_metric = BoxMissMetric (blob_box, box);
476
- const double next_box_miss_metric = BoxMissMetric (blob_box, next_box);
477
- if (applybox_debug > 2 ) {
478
- tprintf (" Checking blob:" );
479
- blob_box.print ();
480
- tprintf (" Current miss metric = %g, next = %g\n " ,
481
- current_box_miss_metric, next_box_miss_metric);
474
+ if (next_box != nullptr ) {
475
+ const double current_box_miss_metric = BoxMissMetric (blob_box, box);
476
+ const double next_box_miss_metric = BoxMissMetric (blob_box, *next_box);
477
+ if (applybox_debug > 2 ) {
478
+ tprintf (" Checking blob:" );
479
+ blob_box.print ();
480
+ tprintf (" Current miss metric = %g, next = %g\n " ,
481
+ current_box_miss_metric, next_box_miss_metric);
482
+ }
483
+ if (current_box_miss_metric > next_box_miss_metric)
484
+ continue ; // Blob is a better match for next box.
482
485
}
483
- if (current_box_miss_metric > next_box_miss_metric)
484
- continue ; // Blob is a better match for next box.
485
486
if (applybox_debug > 2 ) {
486
487
tprintf (" Blob match: blob:" );
487
488
blob_box.print ();
488
489
tprintf (" Matches box:" );
489
490
box.print ();
490
- tprintf (" With next box:" );
491
- next_box.print ();
491
+ if (next_box != nullptr ) {
492
+ tprintf (" With next box:" );
493
+ next_box->print ();
494
+ }
492
495
}
493
496
if (new_word == nullptr ) {
494
497
// Make a new word with a single blob.
0 commit comments