Skip to content

Commit 6a0f8e8

Browse files
committed
ColPartition: Rename median_size_ -> median_height_
This implements a TODO. Rename also some related items. Signed-off-by: Stefan Weil <[email protected]>
1 parent 4370714 commit 6a0f8e8

File tree

6 files changed

+50
-51
lines changed

6 files changed

+50
-51
lines changed

src/textord/colfind.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -1177,12 +1177,12 @@ void ColumnFinder::InsertRemainingNoise(TO_BLOCK* block) {
11771177
}
11781178
}
11791179
if (best_part != nullptr &&
1180-
best_distance < kMaxDistToPartSizeRatio * best_part->median_size()) {
1180+
best_distance < kMaxDistToPartSizeRatio * best_part->median_height()) {
11811181
// Close enough to merge.
11821182
if (debug) {
11831183
tprintf("Adding noise blob with distance %d, thr=%g:box:",
11841184
best_distance,
1185-
kMaxDistToPartSizeRatio * best_part->median_size());
1185+
kMaxDistToPartSizeRatio * best_part->median_height());
11861186
blob->bounding_box().print();
11871187
tprintf("To partition:");
11881188
best_part->Print();

src/textord/colpartition.cpp

+20-20
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ const int kMaxColorDistance = 900;
8080
// Vertical is the direction of logical vertical on the possibly skewed image.
8181
ColPartition::ColPartition(BlobRegionType blob_type, const ICOORD& vertical)
8282
: left_margin_(-INT32_MAX), right_margin_(INT32_MAX),
83-
median_bottom_(INT32_MAX), median_top_(-INT32_MAX), median_size_(0),
83+
median_bottom_(INT32_MAX), median_top_(-INT32_MAX), median_height_(0),
8484
median_left_(INT32_MAX), median_right_(-INT32_MAX), median_width_(0),
8585
blob_type_(blob_type), flow_(BTFT_NONE), good_blob_score_(0),
8686
good_width_(false), good_column_(false),
@@ -163,7 +163,7 @@ ColPartition* ColPartition::MakeLinePartition(BlobRegionType blob_type,
163163
part->bounding_box_ = TBOX(left, bottom, right, top);
164164
part->median_bottom_ = bottom;
165165
part->median_top_ = top;
166-
part->median_size_ = top - bottom;
166+
part->median_height_ = top - bottom;
167167
part->median_left_ = left;
168168
part->median_right_ = right;
169169
part->median_width_ = right - left;
@@ -416,7 +416,7 @@ bool ColPartition::MatchingSizes(const ColPartition& other) const {
416416
if (blob_type_ == BRT_VERT_TEXT || other.blob_type_ == BRT_VERT_TEXT)
417417
return !TabFind::DifferentSizes(median_width_, other.median_width_);
418418
else
419-
return !TabFind::DifferentSizes(median_size_, other.median_size_);
419+
return !TabFind::DifferentSizes(median_height_, other.median_height_);
420420
}
421421

422422
// Returns true if there is no tabstop violation in merging this and other.
@@ -904,14 +904,14 @@ void ColPartition::ComputeLimits() {
904904
blob_type() == BRT_POLYIMAGE) {
905905
median_top_ = bounding_box_.top();
906906
median_bottom_ = bounding_box_.bottom();
907-
median_size_ = bounding_box_.height();
907+
median_height_ = bounding_box_.height();
908908
median_left_ = bounding_box_.left();
909909
median_right_ = bounding_box_.right();
910910
median_width_ = bounding_box_.width();
911911
} else {
912912
STATS top_stats(bounding_box_.bottom(), bounding_box_.top() + 1);
913913
STATS bottom_stats(bounding_box_.bottom(), bounding_box_.top() + 1);
914-
STATS size_stats(0, bounding_box_.height() + 1);
914+
STATS height_stats(0, bounding_box_.height() + 1);
915915
STATS left_stats(bounding_box_.left(), bounding_box_.right() + 1);
916916
STATS right_stats(bounding_box_.left(), bounding_box_.right() + 1);
917917
STATS width_stats(0, bounding_box_.width() + 1);
@@ -922,15 +922,15 @@ void ColPartition::ComputeLimits() {
922922
int area = box.area();
923923
top_stats.add(box.top(), area);
924924
bottom_stats.add(box.bottom(), area);
925-
size_stats.add(box.height(), area);
925+
height_stats.add(box.height(), area);
926926
left_stats.add(box.left(), area);
927927
right_stats.add(box.right(), area);
928928
width_stats.add(box.width(), area);
929929
}
930930
}
931931
median_top_ = static_cast<int>(top_stats.median() + 0.5);
932932
median_bottom_ = static_cast<int>(bottom_stats.median() + 0.5);
933-
median_size_ = static_cast<int>(size_stats.median() + 0.5);
933+
median_height_ = static_cast<int>(height_stats.median() + 0.5);
934934
median_left_ = static_cast<int>(left_stats.median() + 0.5);
935935
median_right_ = static_cast<int>(right_stats.median() + 0.5);
936936
median_width_ = static_cast<int>(width_stats.median() + 0.5);
@@ -1492,23 +1492,23 @@ void ColPartition::LineSpacingBlocks(const ICOORD& bleft, const ICOORD& tright,
14921492
" sizes %d %d %d\n",
14931493
part->top_spacing(), part->bottom_spacing(),
14941494
next_part->top_spacing(), next_part->bottom_spacing(),
1495-
part->median_size(), next_part->median_size(),
1496-
third_part != nullptr ? third_part->median_size() : 0);
1495+
part->median_height(), next_part->median_height(),
1496+
third_part != nullptr ? third_part->median_height() : 0);
14971497
}
14981498
// We can only consider adding the next line to the block if the sizes
14991499
// match and the lines are close enough for their size.
15001500
if (part->SizesSimilar(*next_part) &&
1501-
next_part->median_size() * kMaxSameBlockLineSpacing >
1501+
next_part->median_height() * kMaxSameBlockLineSpacing >
15021502
part->bottom_spacing() &&
1503-
part->median_size() * kMaxSameBlockLineSpacing >
1503+
part->median_height() * kMaxSameBlockLineSpacing >
15041504
part->top_spacing()) {
15051505
// Even now, we can only add it as long as the third line doesn't
15061506
// match in the same way and have a smaller bottom spacing.
15071507
if (third_part == nullptr ||
15081508
!next_part->SizesSimilar(*third_part) ||
1509-
third_part->median_size() * kMaxSameBlockLineSpacing <=
1509+
third_part->median_height() * kMaxSameBlockLineSpacing <=
15101510
next_part->bottom_spacing() ||
1511-
next_part->median_size() * kMaxSameBlockLineSpacing <=
1511+
next_part->median_height() * kMaxSameBlockLineSpacing <=
15121512
next_part->top_spacing() ||
15131513
next_part->bottom_spacing() > part->bottom_spacing()) {
15141514
// Add to the current block.
@@ -1532,7 +1532,7 @@ void ColPartition::LineSpacingBlocks(const ICOORD& bleft, const ICOORD& tright,
15321532
tprintf("Spacings equal: upper:%d/%d, lower:%d/%d, median:%d/%d\n",
15331533
part->top_spacing(), part->bottom_spacing(),
15341534
next_part->top_spacing(), next_part->bottom_spacing(),
1535-
part->median_size(), next_part->median_size());
1535+
part->median_height(), next_part->median_height());
15361536
}
15371537
}
15381538
}
@@ -1647,7 +1647,7 @@ TO_BLOCK* ColPartition::MakeBlock(const ICOORD& bleft, const ICOORD& tright,
16471647
// put the average spacing in each partition, so we can just take the
16481648
// linespacing from the first partition.
16491649
int line_spacing = part->bottom_spacing();
1650-
if (line_spacing < part->median_size())
1650+
if (line_spacing < part->median_height())
16511651
line_spacing = part->bounding_box().height();
16521652
ICOORDELT_LIST vertices;
16531653
ICOORDELT_IT vert_it(&vertices);
@@ -1715,7 +1715,7 @@ TO_BLOCK* ColPartition::MakeVerticalTextBlock(const ICOORD& bleft,
17151715
TO_ROW* ColPartition::MakeToRow() {
17161716
BLOBNBOX_C_IT blob_it(&boxes_);
17171717
TO_ROW* row = nullptr;
1718-
int line_size = IsVerticalType() ? median_width_ : median_size_;
1718+
int line_size = IsVerticalType() ? median_width_ : median_height_;
17191719
// Add all the blobs to a single TO_ROW.
17201720
for (; !blob_it.empty(); blob_it.forward()) {
17211721
BLOBNBOX* blob = blob_it.extract();
@@ -1746,7 +1746,7 @@ ColPartition* ColPartition::ShallowCopy() const {
17461746
sizeof(special_blobs_densities_));
17471747
part->median_bottom_ = median_bottom_;
17481748
part->median_top_ = median_top_;
1749-
part->median_size_ = median_size_;
1749+
part->median_height_ = median_height_;
17501750
part->median_left_ = median_left_;
17511751
part->median_right_ = median_right_;
17521752
part->median_width_ = median_width_;
@@ -2398,15 +2398,15 @@ int ColPartition::BottomSpacingMargin(int resolution) const {
23982398
// Returns a suitable spacing margin that can be applied to tops of
23992399
// text lines, based on the resolution and the stored side_step_.
24002400
int ColPartition::TopSpacingMargin(int resolution) const {
2401-
return static_cast<int>(kMaxTopSpacingFraction * median_size_ + 0.5) +
2401+
return static_cast<int>(kMaxTopSpacingFraction * median_height_ + 0.5) +
24022402
BottomSpacingMargin(resolution);
24032403
}
24042404

24052405
// Returns true if the median text sizes of this and other agree to within
24062406
// a reasonable multiplicative factor.
24072407
bool ColPartition::SizesSimilar(const ColPartition& other) const {
2408-
return median_size_ <= other.median_size_ * kMaxSizeRatio &&
2409-
other.median_size_ <= median_size_ * kMaxSizeRatio;
2408+
return median_height_ <= other.median_height_ * kMaxSizeRatio &&
2409+
other.median_height_ <= median_height_ * kMaxSizeRatio;
24102410
}
24112411

24122412
// Helper updates margin_left and margin_right, being the bounds of the left

src/textord/colpartition.h

+5-6
Original file line numberDiff line numberDiff line change
@@ -134,11 +134,11 @@ class ColPartition : public ELIST2_LINK {
134134
int median_right() const {
135135
return median_right_;
136136
}
137-
int median_size() const {
138-
return median_size_;
137+
int median_height() const {
138+
return median_height_;
139139
}
140-
void set_median_size(int size) {
141-
median_size_ = size;
140+
void set_median_height(int height) {
141+
median_height_ = height;
142142
}
143143
int median_width() const {
144144
return median_width_;
@@ -839,8 +839,7 @@ class ColPartition : public ELIST2_LINK {
839839
int median_bottom_;
840840
int median_top_;
841841
// Median height of blobs in this partition.
842-
// TODO(rays) rename median_height_.
843-
int median_size_;
842+
int median_height_;
844843
// Median left and right of blobs in this partition.
845844
int median_left_;
846845
int median_right_;

src/textord/colpartitiongrid.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -694,7 +694,7 @@ void ColPartitionGrid::ExtractPartitionsAsBlocks(BLOCK_LIST* blocks,
694694
// Get metrics from the row that will be used for the block.
695695
TBOX box = part->bounding_box();
696696
int median_width = part->median_width();
697-
int median_height = part->median_size();
697+
int median_height = part->median_height();
698698
// Turn the partition into a TO_ROW.
699699
TO_ROW* row = part->MakeToRow();
700700
if (row == nullptr) {

src/textord/strokewidth.cpp

+10-10
Original file line numberDiff line numberDiff line change
@@ -297,21 +297,21 @@ void StrokeWidth::RemoveLineResidue(ColPartition_LIST* big_part_list) {
297297
box.bottom());
298298
// Find the largest object in the search box not equal to bbox.
299299
BlobGridSearch rsearch(this);
300-
int max_size = 0;
300+
int max_height = 0;
301301
BLOBNBOX* n;
302302
rsearch.StartRectSearch(search_box);
303303
while ((n = rsearch.NextRectSearch()) != nullptr) {
304304
if (n == bbox) continue;
305305
TBOX nbox = n->bounding_box();
306-
if (nbox.height() > max_size) {
307-
max_size = nbox.height();
306+
if (nbox.height() > max_height) {
307+
max_height = nbox.height();
308308
}
309309
}
310310
if (debug) {
311-
tprintf("Max neighbour size=%d for candidate line box at:", max_size);
311+
tprintf("Max neighbour size=%d for candidate line box at:", max_height);
312312
box.print();
313313
}
314-
if (max_size * kLineResidueSizeRatio < box.height()) {
314+
if (max_height * kLineResidueSizeRatio < box.height()) {
315315
#ifndef GRAPHICS_DISABLED
316316
if (leaders_win_ != nullptr) {
317317
// We are debugging, so display deleted in pink blobs in the same
@@ -582,7 +582,7 @@ bool StrokeWidth::FixBrokenCJK(TO_BLOCK* block) {
582582
BLOBNBOX_LIST* blobs = &block->blobs;
583583
int median_height = UpperQuartileCJKSize(gridsize(), blobs);
584584
int max_dist = static_cast<int>(median_height * kCJKBrokenDistanceFraction);
585-
int max_size = static_cast<int>(median_height * kCJKAspectRatio);
585+
int max_height = static_cast<int>(median_height * kCJKAspectRatio);
586586
int num_fixed = 0;
587587
BLOBNBOX_IT blob_it(blobs);
588588

@@ -594,12 +594,12 @@ bool StrokeWidth::FixBrokenCJK(TO_BLOCK* block) {
594594
bool debug = AlignedBlob::WithinTestRegion(3, bbox.left(),
595595
bbox.bottom());
596596
if (debug) {
597-
tprintf("Checking for Broken CJK (max size=%d):", max_size);
597+
tprintf("Checking for Broken CJK (max size=%d):", max_height);
598598
bbox.print();
599599
}
600600
// Generate a list of blobs that overlap or are near enough to merge.
601601
BLOBNBOX_CLIST overlapped_blobs;
602-
AccumulateOverlaps(blob, debug, max_size, max_dist,
602+
AccumulateOverlaps(blob, debug, max_height, max_dist,
603603
&bbox, &overlapped_blobs);
604604
if (!overlapped_blobs.empty()) {
605605
// There are overlapping blobs, so qualify them as being satisfactory
@@ -1596,10 +1596,10 @@ bool StrokeWidth::DiacriticBlob(BlobGrid* small_grid, BLOBNBOX* blob) {
15961596
if (debug) tprintf("xgap=%d, y=%d, total dist=%d\n",
15971597
x_gap, y_gap, total_distance);
15981598
if (total_distance >
1599-
neighbour->owner()->median_size() * kMaxDiacriticDistanceRatio) {
1599+
neighbour->owner()->median_height() * kMaxDiacriticDistanceRatio) {
16001600
if (debug) {
16011601
tprintf("Neighbour with median size %d too far away:",
1602-
neighbour->owner()->median_size());
1602+
neighbour->owner()->median_height());
16031603
neighbour->bounding_box().print();
16041604
}
16051605
continue; // Diacritics must not be too distant.

src/textord/tablefind.cpp

+12-12
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ const double kMaxBlobOverlapFactor = 4.0;
8181
const double kMaxTableCellXheight = 2.0;
8282

8383
// Maximum line spacing between a table column header and column contents
84-
// for merging the two (as a multiple of the partition's median_size).
84+
// for merging the two (as a multiple of the partition's median_height).
8585
const int kMaxColumnHeaderDistance = 4;
8686

8787
// Minimum ratio of num_table_partitions to num_text_partitions in a column
@@ -493,7 +493,7 @@ bool TableFinder::AllowTextPartition(const ColPartition& part) const {
493493
const int median_area = global_median_xheight_ * global_median_blob_width_;
494494
const double kAreaPerBlobRequired = median_area * kAllowTextArea;
495495
// Keep comparisons strictly greater to disallow 0!
496-
return part.median_size() > kHeightRequired &&
496+
return part.median_height() > kHeightRequired &&
497497
part.median_width() > kWidthRequired &&
498498
part.bounding_box().area() > kAreaPerBlobRequired * part.boxes_count();
499499
}
@@ -724,7 +724,7 @@ void TableFinder::SetGlobalSpacings(ColPartitionGrid* grid) {
724724
// table find runs. Alternative solution.
725725
// part->ComputeLimits();
726726
if (part->IsTextType()) {
727-
// xheight_stats.add(part->median_size(), part->boxes_count());
727+
// xheight_stats.add(part->median_height(), part->boxes_count());
728728
// width_stats.add(part->median_width(), part->boxes_count());
729729

730730
// This loop can be removed when above issues are fixed.
@@ -835,7 +835,7 @@ void TableFinder::MarkPartitionsUsingLocalInformation() {
835835
if (!part->IsTextType()) // Only consider text partitions
836836
continue;
837837
// Only consider partitions in dominant font size or smaller
838-
if (part->median_size() > kMaxTableCellXheight * global_median_xheight_)
838+
if (part->median_height() > kMaxTableCellXheight * global_median_xheight_)
839839
continue;
840840
// Mark partitions with a large gap, or no significant gap as
841841
// table partitions.
@@ -863,7 +863,7 @@ bool TableFinder::HasWideOrNoInterWordGap(ColPartition* part) const {
863863
BLOBNBOX_C_IT it(part_boxes);
864864
// Check if this is a relatively small partition (such as a single word)
865865
if (part->bounding_box().width() <
866-
kMinBoxesInTextPartition * part->median_size() &&
866+
kMinBoxesInTextPartition * part->median_height() &&
867867
part_boxes->length() < kMinBoxesInTextPartition)
868868
return true;
869869

@@ -876,8 +876,8 @@ bool TableFinder::HasWideOrNoInterWordGap(ColPartition* part) const {
876876
// Text partition gap limits. If this is text (and not a table),
877877
// there should be at least one gap larger than min_gap and no gap
878878
// larger than max_gap.
879-
const double max_gap = kMaxGapInTextPartition * part->median_size();
880-
const double min_gap = kMinMaxGapInTextPartition * part->median_size();
879+
const double max_gap = kMaxGapInTextPartition * part->median_height();
880+
const double min_gap = kMinMaxGapInTextPartition * part->median_height();
881881

882882
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
883883
BLOBNBOX* blob = it.data();
@@ -895,7 +895,7 @@ bool TableFinder::HasWideOrNoInterWordGap(ColPartition* part) const {
895895
// More likely case, the blobs slightly overlap. This can happen
896896
// with diacritics (accents) or broken alphabet symbols (characters).
897897
// Merge boxes together by taking max of right sides.
898-
if (-gap < part->median_size() * kMaxBlobOverlapFactor) {
898+
if (-gap < part->median_height() * kMaxBlobOverlapFactor) {
899899
previous_x1 = std::max(previous_x1, current_x1);
900900
continue;
901901
}
@@ -918,7 +918,7 @@ bool TableFinder::HasWideOrNoInterWordGap(ColPartition* part) const {
918918
// Since no large gap was found, return false if the partition is too
919919
// long to be a data cell
920920
if (part->bounding_box().width() >
921-
kMaxBoxesInDataPartition * part->median_size() ||
921+
kMaxBoxesInDataPartition * part->median_height() ||
922922
part_boxes->length() > kMaxBoxesInDataPartition)
923923
return false;
924924

@@ -1051,7 +1051,7 @@ void TableFinder::FilterParagraphEndings() {
10511051
// TODO(nbeato): This would be untrue if the text was right aligned.
10521052
// How often is that?
10531053
if (part->space_to_left() >
1054-
kMaxParagraphEndingLeftSpaceMultiple * part->median_size())
1054+
kMaxParagraphEndingLeftSpaceMultiple * part->median_height())
10551055
continue;
10561056
// The line above it should be right aligned (assuming justified format).
10571057
// Since we can't assume justified text, we compare whitespace to text.
@@ -1647,7 +1647,7 @@ bool TableFinder::HLineBelongsToTable(const ColPartition& part,
16471647
extra_space_to_left++;
16481648
continue;
16491649
}
1650-
int space_threshold = kSideSpaceMargin * part.median_size();
1650+
int space_threshold = kSideSpaceMargin * part.median_height();
16511651
if (extra_part->space_to_right() > space_threshold)
16521652
extra_space_to_right++;
16531653
if (extra_part->space_to_left() > space_threshold)
@@ -1672,7 +1672,7 @@ void TableFinder::IncludeLeftOutColumnHeaders(TBOX* table_box) {
16721672
while ((neighbor = vsearch.NextVerticalSearch(false)) != nullptr) {
16731673
// Max distance to find a table heading.
16741674
const int max_distance = kMaxColumnHeaderDistance *
1675-
neighbor->median_size();
1675+
neighbor->median_height();
16761676
int table_top = table_box->top();
16771677
const TBOX& box = neighbor->bounding_box();
16781678
// Do not continue if the next box is way above

0 commit comments

Comments
 (0)