Skip to content

Commit c13371d

Browse files
committed
Renamed GetGlyphConfidences() to GetChoices() and glyph_confidences to lstm_choice_mode
Renamed the global attribute glyph_confidences to lstm_choice_mode and the method GetGlyphConfidences() to GetChoices(). All Variables and comments contained in related methods were renamed as well. Signed-off-by: Noah Metzger <[email protected]>
1 parent e93e8f0 commit c13371d

11 files changed

+73
-75
lines changed

src/api/baseapi.cpp

+8-8
Original file line numberDiff line numberDiff line change
@@ -1560,8 +1560,8 @@ char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) {
15601560

15611561
// Now, process the word...
15621562
std::vector<std::vector<std::pair<const char*, float>>>* confidencemap = nullptr;
1563-
if (tesseract_->glyph_confidences) {
1564-
confidencemap = res_it->GetGlyphConfidences();
1563+
if (tesseract_->lstm_choice_mode) {
1564+
confidencemap = res_it->GetChoices();
15651565
}
15661566
hocr_str += "\n <span class='ocrx_word'";
15671567
AddIdTohOCR(&hocr_str, "word", page_id, wcnt);
@@ -1621,16 +1621,16 @@ char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) {
16211621
} while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD));
16221622
if (italic) hocr_str += "</em>";
16231623
if (bold) hocr_str += "</strong>";
1624-
// If glyph confidence is required it is added here
1625-
if (tesseract_->glyph_confidences == 1 && confidencemap != nullptr) {
1624+
// If the lstm choice mode is required it is added here
1625+
if (tesseract_->lstm_choice_mode == 1 && confidencemap != nullptr) {
16261626
for (size_t i = 0; i < confidencemap->size(); i++) {
16271627
hocr_str += "\n <span class='ocrx_cinfo'";
16281628
AddIdTohOCR(&hocr_str, "timestep", page_id, wcnt, tcnt);
16291629
hocr_str += ">";
16301630
std::vector<std::pair<const char*, float>> timestep = (*confidencemap)[i];
16311631
for (std::pair<const char*, float> conf : timestep) {
16321632
hocr_str += "<span class='ocr_glyph'";
1633-
AddIdTohOCR(&hocr_str, "glyph", page_id, wcnt, gcnt);
1633+
AddIdTohOCR(&hocr_str, "choice", page_id, wcnt, gcnt);
16341634
hocr_str.add_str_int(" title='x_confs ", int(conf.second * 100));
16351635
hocr_str += "'";
16361636
hocr_str += ">";
@@ -1641,18 +1641,18 @@ char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) {
16411641
hocr_str += "</span>";
16421642
tcnt++;
16431643
}
1644-
} else if (tesseract_->glyph_confidences == 2 && confidencemap != nullptr) {
1644+
} else if (tesseract_->lstm_choice_mode == 2 && confidencemap != nullptr) {
16451645
for (size_t i = 0; i < confidencemap->size(); i++) {
16461646
std::vector<std::pair<const char*, float>> timestep = (*confidencemap)[i];
16471647
if (timestep.size() > 0) {
16481648
hocr_str += "\n <span class='ocrx_cinfo'";
1649-
AddIdTohOCR(&hocr_str, "alternative_glyphs", page_id, wcnt, tcnt);
1649+
AddIdTohOCR(&hocr_str, "lstm_choices", page_id, wcnt, tcnt);
16501650
hocr_str += " chosen='";
16511651
hocr_str += timestep[0].first;
16521652
hocr_str += "'>";
16531653
for (size_t j = 1; j < timestep.size(); j++) {
16541654
hocr_str += "<span class='ocr_glyph'";
1655-
AddIdTohOCR(&hocr_str, "glyph", page_id, wcnt, gcnt);
1655+
AddIdTohOCR(&hocr_str, "choice", page_id, wcnt, gcnt);
16561656
hocr_str.add_str_int(" title='x_confs ", int(timestep[j].second * 100));
16571657
hocr_str += "'";
16581658
hocr_str += ">";

src/ccmain/linerec.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -239,7 +239,7 @@ void Tesseract::LSTMRecognizeWord(const BLOCK& block, ROW *row, WERD_RES *word,
239239
if (im_data == nullptr) return;
240240
lstm_recognizer_->RecognizeLine(*im_data, true, classify_debug_level > 0,
241241
kWorstDictCertainty / kCertaintyScale,
242-
word_box, words, glyph_confidences);
242+
word_box, words, lstm_choice_mode);
243243
delete im_data;
244244
SearchWords(words);
245245
}

src/ccmain/resultiterator.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -604,7 +604,7 @@ char* ResultIterator::GetUTF8Text(PageIteratorLevel level) const {
604604
return result;
605605
}
606606

607-
std::vector<std::vector<std::pair<const char*, float>>>* ResultIterator::GetGlyphConfidences() const {
607+
std::vector<std::vector<std::pair<const char*, float>>>* ResultIterator::GetChoices() const {
608608
if (it_->word() != nullptr) {
609609
return &it_->word()->timesteps;
610610
} else {

src/ccmain/resultiterator.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -98,9 +98,9 @@ class TESS_API ResultIterator : public LTRResultIterator {
9898
virtual char* GetUTF8Text(PageIteratorLevel level) const;
9999

100100
/**
101-
* Returns the glyph confidences for every LSTM timestep for the current Word
101+
* Returns the lstm choices for every LSTM timestep for the current Word
102102
*/
103-
virtual std::vector<std::vector<std::pair<const char*, float>>>* GetGlyphConfidences() const;
103+
virtual std::vector<std::vector<std::pair<const char*, float>>>* GetChoices() const;
104104

105105
/**
106106
* Return whether the current paragraph's dominant reading direction

src/ccmain/tesseractclass.cpp

+6-6
Original file line numberDiff line numberDiff line change
@@ -514,12 +514,12 @@ Tesseract::Tesseract()
514514
STRING_MEMBER(page_separator, "\f",
515515
"Page separator (default is form feed control character)",
516516
this->params()),
517-
INT_MEMBER(glyph_confidences, 0,
518-
"Allows to include glyph confidences in the hOCR output. "
519-
"Valid input values are 0, 1 and 2. 0 is the default value. "
520-
"With 1 the glyph confidences of all timesteps are included. "
521-
"With 2 the glyph confidences are accumulated per charakter.",
522-
this->params()),
517+
INT_MEMBER(lstm_choice_mode, 0,
518+
"Allows to include alternative symbols choices in the hOCR output. "
519+
"Valid input values are 0, 1 and 2. 0 is the default value. "
520+
"With 1 the alternative symbol choices per timestep are included. "
521+
"With 2 the alternative symbol choices are accumulated per character.",
522+
this->params()),
523523

524524
backup_config_file_(nullptr),
525525
pix_binary_(nullptr),

src/ccmain/tesseractclass.h

+4-4
Original file line numberDiff line numberDiff line change
@@ -1118,11 +1118,11 @@ class Tesseract : public Wordrec {
11181118
"Preserve multiple interword spaces");
11191119
STRING_VAR_H(page_separator, "\f",
11201120
"Page separator (default is form feed control character)");
1121-
INT_VAR_H(glyph_confidences, 0,
1122-
"Allows to include glyph confidences in the hOCR output. "
1121+
INT_VAR_H(lstm_choice_mode, 0,
1122+
"Allows to include alternative symbols choices in the hOCR output. "
11231123
"Valid input values are 0, 1 and 2. 0 is the default value. "
1124-
"With 1 the glyph confidences of all timesteps are included. "
1125-
"With 2 the glyph confidences are accumulated per charakter.");
1124+
"With 1 the alternative symbol choices per timestep are included. "
1125+
"With 2 the alternative symbol choices are accumulated per character.");
11261126

11271127
//// ambigsrecog.cpp /////////////////////////////////////////////////////////
11281128
FILE *init_recog_training(const STRING &fname);

src/ccstruct/pageres.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -220,7 +220,7 @@ class WERD_RES : public ELIST_LINK {
220220
// Gaps between blobs in chopped_word. blob_gaps[i] is the gap between
221221
// blob i and blob i+1.
222222
GenericVector<int> blob_gaps;
223-
// Stores the glyph confidences of every timestep of the lstm
223+
// Stores the lstm choices of every timestep
224224
std::vector<std::vector<std::pair<const char*, float>>> timesteps;
225225
// Ratings matrix contains classifier choices for each classified combination
226226
// of blobs. The dimension is the same as the number of blobs in chopped_word

src/lstm/lstmrecognizer.cpp

+3-4
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,7 @@ void LSTMRecognizer::RecognizeLine(const ImageData& image_data, bool invert,
173173
bool debug, double worst_dict_cert,
174174
const TBOX& line_box,
175175
PointerVector<WERD_RES>* words,
176-
int glyph_confidences) {
176+
int lstm_choice_mode) {
177177
NetworkIO outputs;
178178
float scale_factor;
179179
NetworkIO inputs;
@@ -185,10 +185,9 @@ void LSTMRecognizer::RecognizeLine(const ImageData& image_data, bool invert,
185185
new RecodeBeamSearch(recoder_, null_char_, SimpleTextOutput(), dict_);
186186
}
187187
search_->Decode(outputs, kDictRatio, kCertOffset, worst_dict_cert,
188-
&GetUnicharset(), glyph_confidences);
188+
&GetUnicharset(), lstm_choice_mode);
189189
search_->ExtractBestPathAsWords(line_box, scale_factor, debug,
190-
&GetUnicharset(), words,
191-
glyph_confidences);
190+
&GetUnicharset(), words, lstm_choice_mode);
192191
}
193192

194193
// Helper computes min and mean best results in the output.

src/lstm/lstmrecognizer.h

+1-2
Original file line numberDiff line numberDiff line change
@@ -184,8 +184,7 @@ class LSTMRecognizer {
184184
// will be used in a dictionary word.
185185
void RecognizeLine(const ImageData& image_data, bool invert, bool debug,
186186
double worst_dict_cert, const TBOX& line_box,
187-
PointerVector<WERD_RES>* words,
188-
int glyph_confidences = 0);
187+
PointerVector<WERD_RES>* words, int lstm_choice_mode = 0);
189188

190189
// Helper computes min and mean best results in the output.
191190
void OutputStats(const NetworkIO& outputs,

src/lstm/recodebeam.cpp

+41-41
Original file line numberDiff line numberDiff line change
@@ -81,17 +81,17 @@ RecodeBeamSearch::RecodeBeamSearch(const UnicharCompress& recoder,
8181
// Decodes the set of network outputs, storing the lattice internally.
8282
void RecodeBeamSearch::Decode(const NetworkIO& output, double dict_ratio,
8383
double cert_offset, double worst_dict_cert,
84-
const UNICHARSET* charset, int glyph_confidence) {
84+
const UNICHARSET* charset, int lstm_choice_mode) {
8585
beam_size_ = 0;
8686
int width = output.Width();
87-
if (glyph_confidence)
87+
if (lstm_choice_mode)
8888
timesteps.clear();
8989
for (int t = 0; t < width; ++t) {
9090
ComputeTopN(output.f(t), output.NumFeatures(), kBeamWidths[0]);
9191
DecodeStep(output.f(t), t, dict_ratio, cert_offset, worst_dict_cert,
9292
charset);
93-
if (glyph_confidence) {
94-
SaveMostCertainGlyphs(output.f(t), output.NumFeatures(), charset, t);
93+
if (lstm_choice_mode) {
94+
SaveMostCertainChoices(output.f(t), output.NumFeatures(), charset, t);
9595
}
9696
}
9797
}
@@ -107,33 +107,33 @@ void RecodeBeamSearch::Decode(const GENERIC_2D_ARRAY<float>& output,
107107
}
108108
}
109109

110-
void RecodeBeamSearch::SaveMostCertainGlyphs(const float* outputs,
110+
void RecodeBeamSearch::SaveMostCertainChoices(const float* outputs,
111111
int num_outputs,
112112
const UNICHARSET* charset,
113113
int xCoord) {
114-
std::vector<std::pair<const char*, float>> glyphs;
114+
std::vector<std::pair<const char*, float>> choices;
115115
int pos = 0;
116116
for (int i = 0; i < num_outputs; ++i) {
117117
if (outputs[i] >= 0.01f) {
118-
const char* charakter;
118+
const char* character;
119119
if (i + 2 >= num_outputs) {
120-
charakter = "";
120+
character = "";
121121
} else if (i > 0) {
122-
charakter = charset->id_to_unichar_ext(i + 2);
122+
character = charset->id_to_unichar_ext(i + 2);
123123
} else {
124-
charakter = charset->id_to_unichar_ext(i);
124+
character = charset->id_to_unichar_ext(i);
125125
}
126126
pos = 0;
127-
//order the possible glyphs within one timestep
127+
//order the possible choices within one timestep
128128
//beginning with the most likely
129-
while (glyphs.size() > pos && glyphs[pos].second > outputs[i]) {
129+
while (choices.size() > pos && choices[pos].second > outputs[i]) {
130130
pos++;
131131
}
132-
glyphs.insert(glyphs.begin() + pos,
133-
std::pair<const char*, float>(charakter, outputs[i]));
132+
choices.insert(choices.begin() + pos,
133+
std::pair<const char*, float>(character, outputs[i]));
134134
}
135135
}
136-
timesteps.push_back(glyphs);
136+
timesteps.push_back(choices);
137137
}
138138

139139
// Returns the best path as labels/scores/xcoords similar to simple CTC.
@@ -179,15 +179,15 @@ void RecodeBeamSearch::ExtractBestPathAsWords(const TBOX& line_box,
179179
float scale_factor, bool debug,
180180
const UNICHARSET* unicharset,
181181
PointerVector<WERD_RES>* words,
182-
int glyph_confidence) {
182+
int lstm_choice_mode) {
183183
words->truncate(0);
184184
GenericVector<int> unichar_ids;
185185
GenericVector<float> certs;
186186
GenericVector<float> ratings;
187187
GenericVector<int> xcoords;
188188
GenericVector<const RecodeNode*> best_nodes;
189189
GenericVector<const RecodeNode*> second_nodes;
190-
std::deque<std::pair<int,int>> best_glyphs;
190+
std::deque<std::pair<int,int>> best_choices;
191191
ExtractBestPaths(&best_nodes, &second_nodes);
192192
if (debug) {
193193
DebugPath(unicharset, best_nodes);
@@ -199,15 +199,15 @@ void RecodeBeamSearch::ExtractBestPathAsWords(const TBOX& line_box,
199199
}
200200
int current_char;
201201
int timestepEnd = 0;
202-
//if glyph confidence is required in granularity level 2 it stores the x
203-
//Coordinates of every chosen character to match the alternative glyphs to it
204-
if (glyph_confidence == 2) {
202+
//if lstm choice mode is required in granularity level 2 it stores the x
203+
//Coordinates of every chosen character to match the alternative choices to it
204+
if (lstm_choice_mode == 2) {
205205
ExtractPathAsUnicharIds(best_nodes, &unichar_ids, &certs, &ratings,
206-
&xcoords, &best_glyphs);
207-
if (best_glyphs.size() > 0) {
208-
current_char = best_glyphs.front().first;
209-
timestepEnd = best_glyphs.front().second;
210-
best_glyphs.pop_front();
206+
&xcoords, &best_choices);
207+
if (best_choices.size() > 0) {
208+
current_char = best_choices.front().first;
209+
timestepEnd = best_choices.front().second;
210+
best_choices.pop_front();
211211
}
212212
} else {
213213
ExtractPathAsUnicharIds(best_nodes, &unichar_ids, &certs, &ratings,
@@ -243,25 +243,25 @@ void RecodeBeamSearch::ExtractBestPathAsWords(const TBOX& line_box,
243243
WERD_RES* word_res = InitializeWord(
244244
leading_space, line_box, word_start, word_end,
245245
std::min(space_cert, prev_space_cert), unicharset, xcoords, scale_factor);
246-
if (glyph_confidence == 1) {
246+
if (lstm_choice_mode == 1) {
247247
for (size_t i = timestepEnd; i < xcoords[word_end]; i++) {
248248
word_res->timesteps.push_back(timesteps[i]);
249249
}
250250
timestepEnd = xcoords[word_end];
251-
} else if (glyph_confidence == 2) {
251+
} else if (lstm_choice_mode == 2) {
252252
float sum = 0;
253-
std::vector<std::pair<const char*, float>> glyph_pairs;
253+
std::vector<std::pair<const char*, float>> choice_pairs;
254254
for (size_t i = timestepEnd; i < xcoords[word_end]; i++) {
255-
for (std::pair<const char*, float> glyph : timesteps[i]) {
256-
if (std::strcmp(glyph.first, "") != 0) {
257-
sum += glyph.second;
258-
glyph_pairs.push_back(glyph);
255+
for (std::pair<const char*, float> choice : timesteps[i]) {
256+
if (std::strcmp(choice.first, "") != 0) {
257+
sum += choice.second;
258+
choice_pairs.push_back(choice);
259259
}
260260
}
261-
if (best_glyphs.size() > 0 && i == best_glyphs.front().second-1
261+
if (best_choices.size() > 0 && i == best_choices.front().second - 1
262262
|| i == xcoords[word_end]-1) {
263263
std::map<const char*, float> summed_propabilities;
264-
for(auto it = glyph_pairs.begin(); it != glyph_pairs.end(); ++it) {
264+
for (auto it = choice_pairs.begin(); it != choice_pairs.end(); ++it) {
265265
summed_propabilities[it->first] += it->second;
266266
}
267267
std::vector<std::pair<const char*, float>> accumulated_timestep;
@@ -282,11 +282,11 @@ void RecodeBeamSearch::ExtractBestPathAsWords(const TBOX& line_box,
282282
std::pair<const char*,float>(it->first,
283283
it->second));
284284
}
285-
if (best_glyphs.size() > 0) {
286-
current_char = best_glyphs.front().first;
287-
best_glyphs.pop_front();
285+
if (best_choices.size() > 0) {
286+
current_char = best_choices.front().first;
287+
best_choices.pop_front();
288288
}
289-
glyph_pairs.clear();
289+
choice_pairs.clear();
290290
word_res->timesteps.push_back(accumulated_timestep);
291291
sum = 0;
292292
}
@@ -366,7 +366,7 @@ void RecodeBeamSearch::ExtractPathAsUnicharIds(
366366
const GenericVector<const RecodeNode*>& best_nodes,
367367
GenericVector<int>* unichar_ids, GenericVector<float>* certs,
368368
GenericVector<float>* ratings, GenericVector<int>* xcoords,
369-
std::deque<std::pair<int,int>>* best_glyphs) {
369+
std::deque<std::pair<int, int>>* best_choices) {
370370
unichar_ids->truncate(0);
371371
certs->truncate(0);
372372
ratings->truncate(0);
@@ -395,8 +395,8 @@ void RecodeBeamSearch::ExtractPathAsUnicharIds(
395395
}
396396
unichar_ids->push_back(unichar_id);
397397
xcoords->push_back(t);
398-
if(best_glyphs != nullptr) {
399-
best_glyphs->push_back(std::pair<int,int>(unichar_id,t));
398+
if (best_choices != nullptr) {
399+
best_choices->push_back(std::pair<int, int>(unichar_id, t));
400400
}
401401
do {
402402
double cert = best_nodes[t++]->certainty;

src/lstm/recodebeam.h

+5-5
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,7 @@ class RecodeBeamSearch {
186186
// If charset is not null, it enables detailed debugging of the beam search.
187187
void Decode(const NetworkIO& output, double dict_ratio, double cert_offset,
188188
double worst_dict_cert, const UNICHARSET* charset,
189-
int glyph_confidence = 0);
189+
int lstm_choice_mode = 0);
190190
void Decode(const GENERIC_2D_ARRAY<float>& output, double dict_ratio,
191191
double cert_offset, double worst_dict_cert,
192192
const UNICHARSET* charset);
@@ -206,7 +206,7 @@ class RecodeBeamSearch {
206206
void ExtractBestPathAsWords(const TBOX& line_box, float scale_factor,
207207
bool debug, const UNICHARSET* unicharset,
208208
PointerVector<WERD_RES>* words,
209-
int glyph_confidence = 0);
209+
int lstm_choice_mode = 0);
210210

211211
// Generates debug output of the content of the beams after a Decode.
212212
void DebugBeams(const UNICHARSET& unicharset) const;
@@ -282,7 +282,7 @@ class RecodeBeamSearch {
282282
const GenericVector<const RecodeNode*>& best_nodes,
283283
GenericVector<int>* unichar_ids, GenericVector<float>* certs,
284284
GenericVector<float>* ratings, GenericVector<int>* xcoords,
285-
std::deque<std::pair<int,int>>* best_glyphs = nullptr);
285+
std::deque<std::pair<int,int>>* best_choices = nullptr);
286286

287287
// Sets up a word with the ratings matrix and fake blobs with boxes in the
288288
// right places.
@@ -303,8 +303,8 @@ class RecodeBeamSearch {
303303
double cert_offset, double worst_dict_cert,
304304
const UNICHARSET* charset, bool debug = false);
305305

306-
//Saves the most certain glyphs for the current time-step
307-
void SaveMostCertainGlyphs(const float* outputs, int num_outputs, const UNICHARSET* charset, int xCoord);
306+
//Saves the most certain choices for the current time-step
307+
void SaveMostCertainChoices(const float* outputs, int num_outputs, const UNICHARSET* charset, int xCoord);
308308

309309
// Adds to the appropriate beams the legal (according to recoder)
310310
// continuations of context prev, which is from the given index to beams_,

0 commit comments

Comments
 (0)