|
16 | 16 | *
|
17 | 17 | **********************************************************************/
|
18 | 18 |
|
19 |
| - |
20 |
| -#include <locale> // for std::locale::classic |
21 |
| -#include <memory> // for std::unique_ptr |
22 |
| -#include <sstream> // for std::stringstream |
23 | 19 | #include "baseapi.h" // for TessBaseAPI
|
24 | 20 | #include "renderer.h"
|
25 | 21 | #include "tesseractclass.h" // for Tesseract
|
@@ -54,37 +50,38 @@ char* TessBaseAPI::GetLSTMBOXText(int page_number) {
|
54 | 50 | if (res_it->IsAtBeginningOf(RIL_WORD)) {
|
55 | 51 | lstm_box_str.add_str_int(" ", left);
|
56 | 52 | lstm_box_str.add_str_int(" ", image_height_ - bottom);
|
57 |
| - lstm_box_str.add_str_int(" ", right + 2); |
| 53 | + lstm_box_str.add_str_int(" ", right + 5); |
58 | 54 | lstm_box_str.add_str_int(" ", image_height_ - top);
|
59 |
| - lstm_box_str.add_str_int(" ", page_num); // level 5 - word |
| 55 | + lstm_box_str.add_str_int(" ", page_num); // - word |
60 | 56 | lstm_box_str += "\n"; // end of row for word
|
61 | 57 | }
|
62 | 58 | if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) {
|
63 | 59 | lstm_box_str.add_str_int("\t ", left);
|
64 | 60 | lstm_box_str.add_str_int(" ", image_height_ - bottom);
|
65 | 61 | lstm_box_str.add_str_int(" ", right + 5);
|
66 | 62 | lstm_box_str.add_str_int(" ", image_height_ - top);
|
67 |
| - lstm_box_str.add_str_int(" ", page_num); // level 4 - line |
| 63 | + lstm_box_str.add_str_int(" ", page_num); // - line |
68 | 64 | lstm_box_str += "\n"; // end of row for line
|
69 | 65 | }
|
70 | 66 | }
|
71 | 67 | first_word=false;
|
72 |
| - res_it->BoundingBox(RIL_SYMBOL, &left, &top, &right, &bottom); |
| 68 | + // Use bounding box for whole line for every character |
| 69 | + res_it->BoundingBox(RIL_TEXTLINE, &left, &top, &right, &bottom); |
73 | 70 |
|
74 | 71 | do {
|
75 |
| - lstm_box_str +=std::unique_ptr<const char[]>(res_it->GetUTF8Text(RIL_SYMBOL)).get(); |
| 72 | + lstm_box_str += |
| 73 | + std::unique_ptr<const char[]>(res_it->GetUTF8Text(RIL_SYMBOL)).get(); |
76 | 74 | res_it->Next(RIL_SYMBOL);
|
77 |
| - } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_SYMBOL)); |
| 75 | + } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_SYMBOL)); |
78 | 76 |
|
79 | 77 | lstm_box_str.add_str_int(" ", left);
|
80 | 78 | lstm_box_str.add_str_int(" ", image_height_ - bottom);
|
81 |
| - lstm_box_str.add_str_int(" ", right); |
| 79 | + lstm_box_str.add_str_int(" ", right + 5); |
82 | 80 | lstm_box_str.add_str_int(" ", image_height_ - top);
|
83 |
| - lstm_box_str.add_str_int(" ", page_num); // level 6 - symbol |
| 81 | + lstm_box_str.add_str_int(" ", page_num); // symbol |
84 | 82 | lstm_box_str += "\n"; // end of row
|
85 |
| - |
| 83 | + |
86 | 84 | }
|
87 |
| - |
88 | 85 | char* ret = new char[lstm_box_str.length() + 1];
|
89 | 86 | strcpy(ret, lstm_box_str.string());
|
90 | 87 | delete res_it;
|
|
0 commit comments