Skip to content

Commit a303ab9

Browse files
committed
Misc fixes, mostly clang formatting, but some bug fixes in matrix, werd, and tesstrain_utils. Also updates unicharset to match traineddata files.
1 parent d00d833 commit a303ab9

16 files changed

+19140
-21637
lines changed

api/pdfrenderer.cpp

+3-3
Original file line numberDiff line numberDiff line change
@@ -636,9 +636,9 @@ bool TessPDFRenderer::BeginDocumentHandler() {
636636
" /Length1 %ld\n"
637637
">>\n"
638638
"stream\n", size, size);
639-
if (n >= sizeof(buf)) {
640-
delete[] buffer;
641-
return false;
639+
if (n >= sizeof(buf)) {
640+
delete[] buffer;
641+
return false;
642642
}
643643
AppendString(buf);
644644
objsize = strlen(buf);

ccmain/pgedit.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -314,6 +314,7 @@ void Tesseract::do_re_display(
314314
image_win->Image(pix_binary_, 0, 0);
315315
}
316316

317+
image_win->Brush(ScrollView::NONE);
317318
PAGE_RES_IT pr_it(current_page_res);
318319
for (WERD_RES* word = pr_it.word(); word != NULL; word = pr_it.forward()) {
319320
(this->*word_painter)(&pr_it);

ccmain/tessedit.cpp

+5-3
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
/**********************************************************************
22
* File: tessedit.cpp (Formerly tessedit.c)
3-
* Description: Main program for merge of tess and editor.
4-
* Author: Ray Smith
5-
* Created: Tue Jan 07 15:21:46 GMT 1992
3+
* Description: (Previously) Main program for merge of tess and editor.
4+
* Now just code to load the language model and various
5+
* engine-specific data files.
6+
* Author: Ray Smith
7+
* Created: Tue Jan 07 15:21:46 GMT 1992
68
*
79
* (C) Copyright 1992, Hewlett-Packard Ltd.
810
** Licensed under the Apache License, Version 2.0 (the "License");

ccstruct/matrix.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -96,11 +96,11 @@ MATRIX* MATRIX::DeepCopy() const {
9696
int band_width = bandwidth();
9797
MATRIX* result = new MATRIX(dim, band_width);
9898
for (int col = 0; col < dim; ++col) {
99-
for (int row = col; row < col + band_width; ++row) {
99+
for (int row = col; row < dim && row < col + band_width; ++row) {
100100
BLOB_CHOICE_LIST* choices = get(col, row);
101101
if (choices != NULL) {
102102
BLOB_CHOICE_LIST* copy_choices = new BLOB_CHOICE_LIST;
103-
choices->deep_copy(copy_choices, &BLOB_CHOICE::deep_copy);
103+
copy_choices->deep_copy(choices, &BLOB_CHOICE::deep_copy);
104104
result->put(col, row, copy_choices);
105105
}
106106
}

ccstruct/werd.cpp

+2-5
Original file line numberDiff line numberDiff line change
@@ -50,17 +50,14 @@ WERD::WERD(C_BLOB_LIST *blob_list, uinT8 blank_count, const char *text)
5050
flags(0),
5151
script_id_(0),
5252
correct(text) {
53-
C_BLOB_IT start_it = blob_list;
54-
C_BLOB_IT end_it = blob_list;
53+
C_BLOB_IT start_it = &cblobs;
5554
C_BLOB_IT rej_cblob_it = &rej_cblobs;
5655
C_OUTLINE_IT c_outline_it;
5756
inT16 inverted_vote = 0;
5857
inT16 non_inverted_vote = 0;
5958

6059
// Move blob_list's elements into cblobs.
61-
while (!end_it.at_last())
62-
end_it.forward();
63-
cblobs.assign_to_sublist(&start_it, &end_it);
60+
start_it.add_list_after(blob_list);
6461

6562
/*
6663
Set white on black flag for the WERD, moving any duff blobs onto the

ccutil/unicharset.cpp

+68-75
Original file line numberDiff line numberDiff line change
@@ -99,12 +99,12 @@ void UNICHARSET::UNICHAR_PROPERTIES::SetRangesOpen() {
9999
max_bottom = MAX_UINT8;
100100
min_top = 0;
101101
max_top = MAX_UINT8;
102-
min_width = 0;
103-
max_width = MAX_INT16;
104-
min_bearing = 0;
105-
max_bearing = MAX_INT16;
106-
min_advance = 0;
107-
max_advance = MAX_INT16;
102+
width = 0.0f;
103+
width_sd = 0.0f;
104+
bearing = 0.0f;
105+
bearing_sd = 0.0f;
106+
advance = 0.0f;
107+
advance_sd = 0.0f;
108108
}
109109

110110
// Sets all ranges to empty. Used before expanding with font-based data.
@@ -113,20 +113,18 @@ void UNICHARSET::UNICHAR_PROPERTIES::SetRangesEmpty() {
113113
max_bottom = 0;
114114
min_top = MAX_UINT8;
115115
max_top = 0;
116-
min_width = MAX_INT16;
117-
max_width = 0;
118-
min_bearing = MAX_INT16;
119-
max_bearing = 0;
120-
min_advance = MAX_INT16;
121-
max_advance = 0;
116+
width = 0.0f;
117+
width_sd = 0.0f;
118+
bearing = 0.0f;
119+
bearing_sd = 0.0f;
120+
advance = 0.0f;
121+
advance_sd = 0.0f;
122122
}
123123

124-
// Returns true if any of the top/bottom/width/bearing/advance ranges is
125-
// emtpy.
124+
// Returns true if any of the top/bottom/width/bearing/advance ranges/stats
125+
// is emtpy.
126126
bool UNICHARSET::UNICHAR_PROPERTIES::AnyRangeEmpty() const {
127-
return min_bottom > max_bottom || min_top > max_top ||
128-
min_width > max_width || min_bearing > max_bearing ||
129-
min_advance > max_advance;
127+
return width == 0.0f || advance == 0.0f;
130128
}
131129

132130
// Expands the ranges with the ranges from the src properties.
@@ -136,12 +134,18 @@ void UNICHARSET::UNICHAR_PROPERTIES::ExpandRangesFrom(
136134
UpdateRange(src.max_bottom, &min_bottom, &max_bottom);
137135
UpdateRange(src.min_top, &min_top, &max_top);
138136
UpdateRange(src.max_top, &min_top, &max_top);
139-
UpdateRange(src.min_width, &min_width, &max_width);
140-
UpdateRange(src.max_width, &min_width, &max_width);
141-
UpdateRange(src.min_bearing, &min_bearing, &max_bearing);
142-
UpdateRange(src.max_bearing, &min_bearing, &max_bearing);
143-
UpdateRange(src.min_advance, &min_advance, &max_advance);
144-
UpdateRange(src.max_advance, &min_advance, &max_advance);
137+
if (src.width_sd > width_sd) {
138+
width = src.width;
139+
width_sd = src.width_sd;
140+
}
141+
if (src.bearing_sd > bearing_sd) {
142+
bearing = src.bearing;
143+
bearing_sd = src.bearing_sd;
144+
}
145+
if (src.advance_sd > advance_sd) {
146+
advance = src.advance;
147+
advance_sd = src.advance_sd;
148+
}
145149
}
146150

147151
// Copies the properties from src into this.
@@ -430,8 +434,6 @@ void UNICHARSET::PartialSetPropertiesFromOther(int start_index,
430434
}
431435
unichars[ch].properties.CopyFrom(properties);
432436
set_normed_ids(ch);
433-
} else {
434-
tprintf("Failed to get properties for index %d = %s\n", ch, utf8);
435437
}
436438
}
437439
}
@@ -473,15 +475,15 @@ void UNICHARSET::AppendOtherUnicharset(const UNICHARSET& src) {
473475
for (int ch = 0; ch < src.size_used; ++ch) {
474476
const UNICHAR_PROPERTIES& src_props = src.unichars[ch].properties;
475477
const char* utf8 = src.id_to_unichar(ch);
476-
if (strcmp(utf8, " ") != 0 && src_props.AnyRangeEmpty()) {
478+
if (ch >= SPECIAL_UNICHAR_CODES_COUNT && src_props.AnyRangeEmpty()) {
477479
// Only use fully valid entries.
478480
tprintf("Bad properties for index %d, char %s: "
479-
"%d,%d %d,%d %d,%d %d,%d %d,%d\n",
481+
"%d,%d %d,%d %g,%g %g,%g %g,%g\n",
480482
ch, utf8, src_props.min_bottom, src_props.max_bottom,
481483
src_props.min_top, src_props.max_top,
482-
src_props.min_width, src_props.max_width,
483-
src_props.min_bearing, src_props.max_bearing,
484-
src_props.min_advance, src_props.max_advance);
484+
src_props.width, src_props.width_sd,
485+
src_props.bearing, src_props.bearing_sd,
486+
src_props.advance, src_props.advance_sd);
485487
continue;
486488
}
487489
int id = size_used;
@@ -564,8 +566,6 @@ bool UNICHARSET::GetStrProperties(const char* utf8_str,
564566
UNICHAR_PROPERTIES* props) const {
565567
props->Init();
566568
props->SetRangesEmpty();
567-
props->min_advance = 0;
568-
props->max_advance = 0;
569569
int total_unicodes = 0;
570570
GenericVector<UNICHAR_ID> encoding;
571571
if (!encode_string(utf8_str, true, &encoding, NULL, NULL))
@@ -586,21 +586,16 @@ bool UNICHARSET::GetStrProperties(const char* utf8_str,
586586
UpdateRange(src_props.max_bottom, &props->min_bottom, &props->max_bottom);
587587
UpdateRange(src_props.min_top, &props->min_top, &props->max_top);
588588
UpdateRange(src_props.max_top, &props->min_top, &props->max_top);
589-
int bearing = ClipToRange(props->min_advance + src_props.min_bearing,
590-
-MAX_INT16, MAX_INT16);
591-
if (total_unicodes == 0 || bearing < props->min_bearing)
592-
props->min_bearing = bearing;
593-
bearing = ClipToRange(props->max_advance + src_props.max_bearing,
594-
-MAX_INT16, MAX_INT16);
595-
if (total_unicodes == 0 || bearing < props->max_bearing)
596-
props->max_bearing = bearing;
597-
props->min_advance = ClipToRange(props->min_advance + src_props.min_advance,
598-
-MAX_INT16, MAX_INT16);
599-
props->max_advance = ClipToRange(props->max_advance + src_props.max_advance,
600-
-MAX_INT16, MAX_INT16);
589+
float bearing = props->advance + src_props.bearing;
590+
if (total_unicodes == 0 || bearing < props->bearing) {
591+
props->bearing = bearing;
592+
props->bearing_sd = props->advance_sd + src_props.bearing_sd;
593+
}
594+
props->advance += src_props.advance;
595+
props->advance_sd += src_props.advance_sd;
601596
// With a single width, just use the widths stored in the unicharset.
602-
props->min_width = src_props.min_width;
603-
props->max_width = src_props.max_width;
597+
props->width = src_props.width;
598+
props->width_sd = src_props.width_sd;
604599
// Use the first script id, other_case, mirror, direction.
605600
// Note that these will need translation, except direction.
606601
if (total_unicodes == 0) {
@@ -616,10 +611,8 @@ bool UNICHARSET::GetStrProperties(const char* utf8_str,
616611
}
617612
if (total_unicodes > 1) {
618613
// Estimate the total widths from the advance - bearing.
619-
props->min_width = ClipToRange(props->min_advance - props->max_bearing,
620-
-MAX_INT16, MAX_INT16);
621-
props->max_width = ClipToRange(props->max_advance - props->min_bearing,
622-
-MAX_INT16, MAX_INT16);
614+
props->width = props->advance - props->bearing;
615+
props->width_sd = props->advance_sd + props->bearing_sd;
623616
}
624617
return total_unicodes > 0;
625618
}
@@ -707,23 +700,23 @@ bool UNICHARSET::save_to_string(STRING *str) const {
707700
for (UNICHAR_ID id = 0; id < this->size(); ++id) {
708701
int min_bottom, max_bottom, min_top, max_top;
709702
get_top_bottom(id, &min_bottom, &max_bottom, &min_top, &max_top);
710-
int min_width, max_width;
711-
get_width_range(id, &min_width, &max_width);
712-
int min_bearing, max_bearing;
713-
get_bearing_range(id, &min_bearing, &max_bearing);
714-
int min_advance, max_advance;
715-
get_advance_range(id, &min_advance, &max_advance);
703+
float width, width_sd;
704+
get_width_stats(id, &width, &width_sd);
705+
float bearing, bearing_sd;
706+
get_bearing_stats(id, &bearing, &bearing_sd);
707+
float advance, advance_sd;
708+
get_advance_stats(id, &advance, &advance_sd);
716709
unsigned int properties = this->get_properties(id);
717710
if (strcmp(this->id_to_unichar(id), " ") == 0) {
718711
snprintf(buffer, kFileBufSize, "%s %x %s %d\n", "NULL", properties,
719712
this->get_script_from_script_id(this->get_script(id)),
720713
this->get_other_case(id));
721714
} else {
722715
snprintf(buffer, kFileBufSize,
723-
"%s %x %d,%d,%d,%d,%d,%d,%d,%d,%d,%d %s %d %d %d %s\t# %s\n",
716+
"%s %x %d,%d,%d,%d,%g,%g,%g,%g,%g,%g %s %d %d %d %s\t# %s\n",
724717
this->id_to_unichar(id), properties,
725-
min_bottom, max_bottom, min_top, max_top, min_width, max_width,
726-
min_bearing, max_bearing, min_advance, max_advance,
718+
min_bottom, max_bottom, min_top, max_top, width, width_sd,
719+
bearing, bearing_sd, advance, advance_sd,
727720
this->get_script_from_script_id(this->get_script(id)),
728721
this->get_other_case(id), this->get_direction(id),
729722
this->get_mirror(id), this->get_normed_unichar(id),
@@ -821,12 +814,12 @@ bool UNICHARSET::load_via_fgets(
821814
int max_bottom = MAX_UINT8;
822815
int min_top = 0;
823816
int max_top = MAX_UINT8;
824-
int min_width = 0;
825-
int max_width = MAX_INT16;
826-
int min_bearing = 0;
827-
int max_bearing = MAX_INT16;
828-
int min_advance = 0;
829-
int max_advance = MAX_INT16;
817+
float width = 0.0f;
818+
float width_sd = 0.0f;
819+
float bearing = 0.0f;
820+
float bearing_sd = 0.0f;
821+
float advance = 0.0f;
822+
float advance_sd = 0.0f;
830823
// TODO(eger): check that this default it ok
831824
// after enabling BiDi iterator for Arabic+Cube.
832825
int direction = UNICHARSET::U_LEFT_TO_RIGHT;
@@ -836,19 +829,19 @@ bool UNICHARSET::load_via_fgets(
836829
int v = -1;
837830
if (fgets_cb->Run(buffer, sizeof (buffer)) == NULL ||
838831
((v = sscanf(buffer,
839-
"%s %x %d,%d,%d,%d,%d,%d,%d,%d,%d,%d %63s %d %d %d %63s",
832+
"%s %x %d,%d,%d,%d,%g,%g,%g,%g,%g,%g %63s %d %d %d %63s",
840833
unichar, &properties,
841834
&min_bottom, &max_bottom, &min_top, &max_top,
842-
&min_width, &max_width, &min_bearing, &max_bearing,
843-
&min_advance, &max_advance, script, &other_case,
835+
&width, &width_sd, &bearing, &bearing_sd,
836+
&advance, &advance_sd, script, &other_case,
844837
&direction, &mirror, normed)) != 17 &&
845838
(v = sscanf(buffer,
846-
"%s %x %d,%d,%d,%d,%d,%d,%d,%d,%d,%d %63s %d %d %d",
839+
"%s %x %d,%d,%d,%d,%g,%g,%g,%g,%g,%g %63s %d %d %d",
847840
unichar, &properties,
848841
&min_bottom, &max_bottom, &min_top, &max_top,
849-
&min_width, &max_width, &min_bearing, &max_bearing,
850-
&min_advance, &max_advance,
851-
script, &other_case, &direction, &mirror)) != 16 &&
842+
&width, &width_sd, &bearing, &bearing_sd,
843+
&advance, &advance_sd, script, &other_case,
844+
&direction, &mirror)) != 16 &&
852845
(v = sscanf(buffer, "%s %x %d,%d,%d,%d %63s %d %d %d",
853846
unichar, &properties,
854847
&min_bottom, &max_bottom, &min_top, &max_top,
@@ -888,9 +881,9 @@ bool UNICHARSET::load_via_fgets(
888881
this->set_script(id, script);
889882
this->unichars[id].properties.enabled = true;
890883
this->set_top_bottom(id, min_bottom, max_bottom, min_top, max_top);
891-
this->set_width_range(id, min_width, max_width);
892-
this->set_bearing_range(id, min_bearing, max_bearing);
893-
this->set_advance_range(id, min_advance, max_advance);
884+
this->set_width_stats(id, width, width_sd);
885+
this->set_bearing_stats(id, bearing, bearing_sd);
886+
this->set_advance_stats(id, advance, advance_sd);
894887
this->set_direction(id, static_cast<UNICHARSET::Direction>(direction));
895888
ASSERT_HOST(other_case < unicharset_size);
896889
this->set_other_case(id, (v>3) ? other_case : id);

0 commit comments

Comments
 (0)