Skip to content

Commit 0f42fd8

Browse files
committed
change to use bbox coordinates for TEXTLINE for all characters
(cherry picked from commit 049db10)
1 parent 9c89cd5 commit 0f42fd8

File tree

1 file changed

+11
-14
lines changed

1 file changed

+11
-14
lines changed

src/api/lstmboxrenderer.cpp

+11-14
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,6 @@
1616
*
1717
**********************************************************************/
1818

19-
20-
#include <locale> // for std::locale::classic
21-
#include <memory> // for std::unique_ptr
22-
#include <sstream> // for std::stringstream
2319
#include "baseapi.h" // for TessBaseAPI
2420
#include "renderer.h"
2521
#include "tesseractclass.h" // for Tesseract
@@ -54,37 +50,38 @@ char* TessBaseAPI::GetLSTMBOXText(int page_number) {
5450
if (res_it->IsAtBeginningOf(RIL_WORD)) {
5551
lstm_box_str.add_str_int(" ", left);
5652
lstm_box_str.add_str_int(" ", image_height_ - bottom);
57-
lstm_box_str.add_str_int(" ", right + 2);
53+
lstm_box_str.add_str_int(" ", right + 5);
5854
lstm_box_str.add_str_int(" ", image_height_ - top);
59-
lstm_box_str.add_str_int(" ", page_num); // level 5 - word
55+
lstm_box_str.add_str_int(" ", page_num); // - word
6056
lstm_box_str += "\n"; // end of row for word
6157
}
6258
if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) {
6359
lstm_box_str.add_str_int("\t ", left);
6460
lstm_box_str.add_str_int(" ", image_height_ - bottom);
6561
lstm_box_str.add_str_int(" ", right + 5);
6662
lstm_box_str.add_str_int(" ", image_height_ - top);
67-
lstm_box_str.add_str_int(" ", page_num); // level 4 - line
63+
lstm_box_str.add_str_int(" ", page_num); // - line
6864
lstm_box_str += "\n"; // end of row for line
6965
}
7066
}
7167
first_word=false;
72-
res_it->BoundingBox(RIL_SYMBOL, &left, &top, &right, &bottom);
68+
// Use bounding box for whole line for every character
69+
res_it->BoundingBox(RIL_TEXTLINE, &left, &top, &right, &bottom);
7370

7471
do {
75-
lstm_box_str +=std::unique_ptr<const char[]>(res_it->GetUTF8Text(RIL_SYMBOL)).get();
72+
lstm_box_str +=
73+
std::unique_ptr<const char[]>(res_it->GetUTF8Text(RIL_SYMBOL)).get();
7674
res_it->Next(RIL_SYMBOL);
77-
} while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_SYMBOL));
75+
} while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_SYMBOL));
7876

7977
lstm_box_str.add_str_int(" ", left);
8078
lstm_box_str.add_str_int(" ", image_height_ - bottom);
81-
lstm_box_str.add_str_int(" ", right);
79+
lstm_box_str.add_str_int(" ", right + 5);
8280
lstm_box_str.add_str_int(" ", image_height_ - top);
83-
lstm_box_str.add_str_int(" ", page_num); // level 6 - symbol
81+
lstm_box_str.add_str_int(" ", page_num); // symbol
8482
lstm_box_str += "\n"; // end of row
85-
83+
8684
}
87-
8885
char* ret = new char[lstm_box_str.length() + 1];
8986
strcpy(ret, lstm_box_str.string());
9087
delete res_it;

0 commit comments

Comments
 (0)