Skip to content

Commit 3110536

Browse files
committed
put common code in AddBoxToLSTM
1 parent b51c1bf commit 3110536

File tree

1 file changed

+25
-27
lines changed

1 file changed

+25
-27
lines changed

src/api/lstmboxrenderer.cpp

+25-27
Original file line numberDiff line numberDiff line change
@@ -27,61 +27,59 @@ namespace tesseract {
2727
* page_number is a 0-base page index that will appear in the box file.
2828
* Returned string must be freed with the delete [] operator.
2929
*/
30+
static void AddBoxToLSTM(int right, int bottom, int top,
31+
int image_height_, int page_num,
32+
STRING* text) {
33+
text->add_str_int(" ", image_height_ - bottom);
34+
text->add_str_int(" ", right + 5);
35+
text->add_str_int(" ", image_height_ - top);
36+
text->add_str_int(" ", page_num);
37+
}
3038

3139
char* TessBaseAPI::GetLSTMBOXText(int page_number) {
3240
if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(nullptr) < 0))
3341
return nullptr;
3442

3543
STRING lstm_box_str("");
36-
3744
int page_num = page_number;
3845
bool first_word = true;
39-
46+
int left, top, right, bottom;
47+
4048
LTRResultIterator* res_it = GetLTRIterator();
4149
while (!res_it->Empty(RIL_BLOCK)) {
4250
if (res_it->Empty(RIL_SYMBOL)) {
4351
res_it->Next(RIL_SYMBOL);
4452
continue;
4553
}
46-
47-
int left, top, right, bottom;
48-
4954
if (!first_word) {
55+
if (!(res_it->IsAtBeginningOf(RIL_TEXTLINE))) {
5056
if (res_it->IsAtBeginningOf(RIL_WORD)) {
5157
lstm_box_str.add_str_int(" ", left);
52-
lstm_box_str.add_str_int(" ", image_height_ - bottom);
53-
lstm_box_str.add_str_int(" ", right + 5);
54-
lstm_box_str.add_str_int(" ", image_height_ - top);
55-
lstm_box_str.add_str_int(" ", page_num); // - word
58+
AddBoxToLSTM(right, bottom, top, image_height_, page_num, &lstm_box_str);
5659
lstm_box_str += "\n"; // end of row for word
57-
}
60+
} // word
61+
} else {
5862
if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) {
5963
lstm_box_str.add_str_int("\t ", left);
60-
lstm_box_str.add_str_int(" ", image_height_ - bottom);
61-
lstm_box_str.add_str_int(" ", right + 5);
62-
lstm_box_str.add_str_int(" ", image_height_ - top);
63-
lstm_box_str.add_str_int(" ", page_num); // - line
64+
AddBoxToLSTM(right, bottom, top, image_height_, page_num, &lstm_box_str);
6465
lstm_box_str += "\n"; // end of row for line
65-
}
66-
}
66+
} // line
67+
}
68+
} // not first word
6769
first_word=false;
68-
// Use bounding box for whole line for every character
70+
// Use bounding box for whole line for everything
6971
res_it->BoundingBox(RIL_TEXTLINE, &left, &top, &right, &bottom);
70-
71-
do {
72-
lstm_box_str +=
72+
do { lstm_box_str +=
7373
std::unique_ptr<const char[]>(res_it->GetUTF8Text(RIL_SYMBOL)).get();
7474
res_it->Next(RIL_SYMBOL);
7575
} while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_SYMBOL));
76-
7776
lstm_box_str.add_str_int(" ", left);
78-
lstm_box_str.add_str_int(" ", image_height_ - bottom);
79-
lstm_box_str.add_str_int(" ", right + 5);
80-
lstm_box_str.add_str_int(" ", image_height_ - top);
81-
lstm_box_str.add_str_int(" ", page_num); // symbol
82-
lstm_box_str += "\n"; // end of row
83-
77+
AddBoxToLSTM(right, bottom, top, image_height_, page_num, &lstm_box_str);
78+
lstm_box_str += "\n"; // end of row for symbol
8479
}
80+
lstm_box_str.add_str_int("\t ", left);
81+
AddBoxToLSTM(right, bottom, top, image_height_, page_num, &lstm_box_str);
82+
lstm_box_str += "\n"; // end of PAGE
8583
char* ret = new char[lstm_box_str.length() + 1];
8684
strcpy(ret, lstm_box_str.string());
8785
delete res_it;

0 commit comments

Comments
 (0)