Skip to content

Commit b08966a

Browse files
committed
Fix assertion caused by access to default TBOX
Instead of adding an empty TBOX at the end of the box list, that corner case is now handled by passing a nullptr (like it was already done for the first box in the list). This avoids the calls of BoxMissMetric with a TBOX which raises an assertion there (b == 0). Signed-off-by: Stefan Weil <[email protected]>
1 parent 97f6864 commit b08966a

File tree

2 files changed

+46
-43
lines changed

2 files changed

+46
-43
lines changed

src/ccmain/applybox.cpp

+42-39
Original file line numberDiff line numberDiff line change
@@ -122,29 +122,24 @@ PAGE_RES* Tesseract::ApplyBoxes(const STRING& fname,
122122

123123
const int box_count = boxes.size();
124124
int box_failures = 0;
125-
// Add an empty everything to the end.
126-
boxes.push_back(TBOX());
127-
texts.push_back(STRING());
128-
full_texts.push_back(STRING());
129125

130126
// In word mode, we use the boxes to make a word for each box, but
131127
// in blob mode we use the existing words and maximally chop them first.
132128
PAGE_RES* page_res = find_segmentation ?
133129
nullptr : SetupApplyBoxes(boxes, block_list);
134130
clear_any_old_text(block_list);
135131

136-
for (int i = 0; i < boxes.size() - 1; i++) {
132+
for (int i = 0; i < box_count; i++) {
137133
bool foundit = false;
138134
if (page_res != nullptr) {
139-
if (i == 0) {
140-
foundit = ResegmentCharBox(page_res, nullptr, boxes[i], boxes[i + 1],
141-
full_texts[i].string());
142-
} else {
143-
foundit = ResegmentCharBox(page_res, &boxes[i-1], boxes[i],
144-
boxes[i + 1], full_texts[i].string());
145-
}
135+
foundit = ResegmentCharBox(page_res,
136+
(i == 0) ? nullptr : &boxes[i - 1],
137+
boxes[i],
138+
(i == box_count - 1) ? nullptr : &boxes[i + 1],
139+
full_texts[i].string());
146140
} else {
147-
foundit = ResegmentWordBox(block_list, boxes[i], boxes[i + 1],
141+
foundit = ResegmentWordBox(block_list, boxes[i],
142+
(i == box_count - 1) ? nullptr : &boxes[i + 1],
148143
texts[i].string());
149144
}
150145
if (!foundit) {
@@ -339,8 +334,8 @@ static double BoxMissMetric(const TBOX& box1, const TBOX& box2) {
339334
///
340335
/// This means that occasionally, blobs may be incorrectly segmented if the
341336
/// chopper fails to find a suitable chop point.
342-
bool Tesseract::ResegmentCharBox(PAGE_RES* page_res, const TBOX *prev_box,
343-
const TBOX& box, const TBOX& next_box,
337+
bool Tesseract::ResegmentCharBox(PAGE_RES* page_res, const TBOX* prev_box,
338+
const TBOX& box, const TBOX* next_box,
344339
const char* correct_text) {
345340
if (applybox_debug > 1) {
346341
tprintf("\nAPPLY_BOX: in ResegmentCharBox() for %s\n", correct_text);
@@ -365,24 +360,26 @@ bool Tesseract::ResegmentCharBox(PAGE_RES* page_res, const TBOX *prev_box,
365360
break;
366361
if (word_res->correct_text[i + blob_count].length() > 0)
367362
break; // Blob is claimed already.
368-
const double current_box_miss_metric = BoxMissMetric(blob_box, box);
369-
const double next_box_miss_metric = BoxMissMetric(blob_box, next_box);
370-
if (applybox_debug > 2) {
371-
tprintf("Checking blob:");
372-
blob_box.print();
373-
tprintf("Current miss metric = %g, next = %g\n",
374-
current_box_miss_metric, next_box_miss_metric);
363+
if (next_box != nullptr) {
364+
const double current_box_miss_metric = BoxMissMetric(blob_box, box);
365+
const double next_box_miss_metric = BoxMissMetric(blob_box, *next_box);
366+
if (applybox_debug > 2) {
367+
tprintf("Checking blob:");
368+
blob_box.print();
369+
tprintf("Current miss metric = %g, next = %g\n",
370+
current_box_miss_metric, next_box_miss_metric);
371+
}
372+
if (current_box_miss_metric > next_box_miss_metric)
373+
break; // Blob is a better match for next box.
375374
}
376-
if (current_box_miss_metric > next_box_miss_metric)
377-
break; // Blob is a better match for next box.
378375
char_box += blob_box;
379376
}
380377
if (blob_count > 0) {
381378
if (applybox_debug > 1) {
382379
tprintf("Index [%d, %d) seem good.\n", i, i + blob_count);
383380
}
384381
if (!char_box.almost_equal(box, 3) &&
385-
(box.x_gap(next_box) < -3 ||
382+
((next_box != nullptr && box.x_gap(*next_box) < -3)||
386383
(prev_box != nullptr && prev_box->x_gap(box) < -3))) {
387384
return false;
388385
}
@@ -398,8 +395,10 @@ bool Tesseract::ResegmentCharBox(PAGE_RES* page_res, const TBOX *prev_box,
398395
word_res->box_word->BlobBox(i).print();
399396
tprintf("Matches box:");
400397
box.print();
401-
tprintf("With next box:");
402-
next_box.print();
398+
if (next_box != nullptr) {
399+
tprintf("With next box:");
400+
next_box->print();
401+
}
403402
}
404403
// Eliminated best_state and correct_text entries for the consumed
405404
// blobs.
@@ -438,7 +437,7 @@ bool Tesseract::ResegmentCharBox(PAGE_RES* page_res, const TBOX *prev_box,
438437
/// @return false if the box was in error, which can only be caused by
439438
/// failing to find an overlapping blob for a box.
440439
bool Tesseract::ResegmentWordBox(BLOCK_LIST *block_list,
441-
const TBOX& box, const TBOX& next_box,
440+
const TBOX& box, const TBOX* next_box,
442441
const char* correct_text) {
443442
if (applybox_debug > 1) {
444443
tprintf("\nAPPLY_BOX: in ResegmentWordBox() for %s\n", correct_text);
@@ -472,23 +471,27 @@ bool Tesseract::ResegmentWordBox(BLOCK_LIST *block_list,
472471
TBOX blob_box = blob->bounding_box();
473472
if (!blob_box.major_overlap(box))
474473
continue;
475-
const double current_box_miss_metric = BoxMissMetric(blob_box, box);
476-
const double next_box_miss_metric = BoxMissMetric(blob_box, next_box);
477-
if (applybox_debug > 2) {
478-
tprintf("Checking blob:");
479-
blob_box.print();
480-
tprintf("Current miss metric = %g, next = %g\n",
481-
current_box_miss_metric, next_box_miss_metric);
474+
if (next_box != nullptr) {
475+
const double current_box_miss_metric = BoxMissMetric(blob_box, box);
476+
const double next_box_miss_metric = BoxMissMetric(blob_box, *next_box);
477+
if (applybox_debug > 2) {
478+
tprintf("Checking blob:");
479+
blob_box.print();
480+
tprintf("Current miss metric = %g, next = %g\n",
481+
current_box_miss_metric, next_box_miss_metric);
482+
}
483+
if (current_box_miss_metric > next_box_miss_metric)
484+
continue; // Blob is a better match for next box.
482485
}
483-
if (current_box_miss_metric > next_box_miss_metric)
484-
continue; // Blob is a better match for next box.
485486
if (applybox_debug > 2) {
486487
tprintf("Blob match: blob:");
487488
blob_box.print();
488489
tprintf("Matches box:");
489490
box.print();
490-
tprintf("With next box:");
491-
next_box.print();
491+
if (next_box != nullptr) {
492+
tprintf("With next box:");
493+
next_box->print();
494+
}
492495
}
493496
if (new_word == nullptr) {
494497
// Make a new word with a single blob.

src/ccmain/tesseractclass.h

+4-4
Original file line numberDiff line numberDiff line change
@@ -744,17 +744,17 @@ class Tesseract : public Wordrec {
744744
// failing to find an appropriate blob for a box.
745745
// This means that occasionally, blobs may be incorrectly segmented if the
746746
// chopper fails to find a suitable chop point.
747-
bool ResegmentCharBox(PAGE_RES* page_res, const TBOX *prev_box,
748-
const TBOX& box, const TBOX& next_box,
747+
bool ResegmentCharBox(PAGE_RES* page_res, const TBOX* prev_box,
748+
const TBOX& box, const TBOX* next_box,
749749
const char* correct_text);
750750
// Consume all source blobs that strongly overlap the given box,
751751
// putting them into a new word, with the correct_text label.
752752
// Fights over which box owns which blobs are settled by
753753
// applying the blobs to box or next_box with the least non-overlap.
754754
// Returns false if the box was in error, which can only be caused by
755755
// failing to find an overlapping blob for a box.
756-
bool ResegmentWordBox(BLOCK_LIST *block_list,
757-
const TBOX& box, const TBOX& next_box,
756+
bool ResegmentWordBox(BLOCK_LIST* block_list,
757+
const TBOX& box, const TBOX* next_box,
758758
const char* correct_text);
759759
// Resegments the words by running the classifier in an attempt to find the
760760
// correct segmentation that produces the required string.

0 commit comments

Comments
 (0)