20# include "config_auto.h"
41#ifndef GRAPHICS_DISABLED
42static INT_VAR(textord_tabfind_show_strokewidths, 0,
"Show stroke widths (ScrollView)");
44static INT_VAR(textord_tabfind_show_strokewidths, 0,
"Show stroke widths");
46static BOOL_VAR(textord_tabfind_only_strokewidths,
false,
"Only run stroke widths");
116 , nontext_map_(nullptr)
117 , projection_(nullptr)
119 , grid_box_(bleft, tright)
120 , rerotation_(1.0f, 0.0f) {
124#ifndef GRAPHICS_DISABLED
125 if (widths_win_ !=
nullptr) {
127 if (textord_tabfind_only_strokewidths) {
133 delete initial_widths_win_;
135 delete textlines_win_;
136 delete smoothed_win_;
137 delete diacritics_win_;
147 BLOBNBOX_IT blob_it(&block->
blobs);
148 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
149 SetNeighbours(
false,
false, blob_it.data());
161 InsertBlobs(input_block);
163 while (cjk_merge && FixBrokenCJK(input_block)) {
166 FindTextlineFlowDirection(pageseg_mode,
false);
172static void CollectHorizVertBlobs(BLOBNBOX_LIST *input_blobs,
int *num_vertical_blobs,
173 int *num_horizontal_blobs, BLOBNBOX_CLIST *vertical_blobs,
174 BLOBNBOX_CLIST *horizontal_blobs,
175 BLOBNBOX_CLIST *nondescript_blobs) {
176 BLOBNBOX_C_IT v_it(vertical_blobs);
177 BLOBNBOX_C_IT h_it(horizontal_blobs);
178 BLOBNBOX_C_IT n_it(nondescript_blobs);
179 BLOBNBOX_IT blob_it(input_blobs);
180 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
183 float y_x =
static_cast<float>(box.
height()) / box.
width();
184 float x_y = 1.0f / y_x;
186 float ratio = x_y > y_x ? x_y : y_x;
190 ++*num_vertical_blobs;
192 v_it.add_after_then_move(blob);
195 ++*num_horizontal_blobs;
197 h_it.add_after_then_move(blob);
199 }
else if (ok_blob) {
200 n_it.add_after_then_move(blob);
212 BLOBNBOX_CLIST *osd_blobs) {
213 int vertical_boxes = 0;
214 int horizontal_boxes = 0;
216 BLOBNBOX_CLIST vertical_blobs;
217 BLOBNBOX_CLIST horizontal_blobs;
218 BLOBNBOX_CLIST nondescript_blobs;
219 CollectHorizVertBlobs(&block->
blobs, &vertical_boxes, &horizontal_boxes, &vertical_blobs,
220 &horizontal_blobs, &nondescript_blobs);
221 CollectHorizVertBlobs(&block->
large_blobs, &vertical_boxes, &horizontal_boxes, &vertical_blobs,
222 &horizontal_blobs, &nondescript_blobs);
224 tprintf(
"TextDir hbox=%d vs vbox=%d, %dH, %dV, %dN osd blobs\n", horizontal_boxes,
225 vertical_boxes, horizontal_blobs.length(), vertical_blobs.length(),
226 nondescript_blobs.length());
228 if (osd_blobs !=
nullptr && vertical_boxes == 0 && horizontal_boxes == 0) {
230 BLOBNBOX_C_IT osd_it(osd_blobs);
231 osd_it.add_list_after(&nondescript_blobs);
235 static_cast<int>((vertical_boxes + horizontal_boxes) * find_vertical_text_ratio);
236 if (vertical_boxes >= min_vert_boxes) {
237 if (osd_blobs !=
nullptr) {
238 BLOBNBOX_C_IT osd_it(osd_blobs);
239 osd_it.add_list_after(&vertical_blobs);
243 if (osd_blobs !=
nullptr) {
244 BLOBNBOX_C_IT osd_it(osd_blobs);
245 osd_it.add_list_after(&horizontal_blobs);
255 rerotation_.
set_x(rotation.
x());
256 rerotation_.
set_y(-rotation.
y());
263 ColPartition_LIST leader_parts;
264 FindLeadersAndMarkNoise(block, &leader_parts);
268 for (ColPartition_IT it(&leader_parts); !it.empty(); it.forward()) {
271 MarkLeaderNeighbours(part,
LR_LEFT);
272 MarkLeaderNeighbours(part,
LR_RIGHT);
294 TBOX search_box = box;
295 search_box.
pad(padding, padding);
307 if (nbox.
height() > max_height) {
308 max_height = nbox.
height();
312 tprintf(
"Max neighbour size=%d for candidate line box at:", max_height);
316#ifndef GRAPHICS_DISABLED
317 if (leaders_win_ !=
nullptr) {
347 BLOBNBOX_LIST *diacritic_blobs,
349 ColPartition_LIST *big_parts) {
350 nontext_map_ = nontext_pix;
351 projection_ = projection;
362 FindTextlineFlowDirection(pageseg_mode,
false);
364#ifndef GRAPHICS_DISABLED
365 if (textord_tabfind_show_strokewidths) {
377 FindTextlineFlowDirection(pageseg_mode,
true);
379 diacritic_blobs, part_grid, big_parts, &skew);
381 tprintf(
"Detected %d diacritics\n", diacritic_blobs->length());
385 FindTextlineFlowDirection(pageseg_mode,
true);
386 r = FindInitialPartitions(pageseg_mode, rerotation,
false, block, diacritic_blobs, part_grid,
389 nontext_map_ =
nullptr;
390 projection_ =
nullptr;
394static void PrintBoxWidths(
BLOBNBOX *neighbour) {
396 tprintf(
"Box (%d,%d)->(%d,%d): h-width=%.1f, v-width=%.1f p-width=%1.f\n", nbox.
left(),
409 FCOORD click(
static_cast<float>(
x),
static_cast<float>(
y));
413 PrintBoxWidths(neighbour);
429 "Left gap=%d, right=%d, above=%d, below=%d, horz=%d, vert=%d\n"
430 "Good= %d %d %d %d\n",
449void StrokeWidth::FindLeadersAndMarkNoise(
TO_BLOCK *block, ColPartition_LIST *leader_parts) {
455 gsearch.StartFullSearch();
456 while ((bbox = gsearch.NextFullSearch()) !=
nullptr) {
457 SetNeighbours(
true,
false, bbox);
459 ColPartition_IT part_it(leader_parts);
460 gsearch.StartFullSearch();
461 while ((bbox = gsearch.NextFullSearch()) !=
nullptr) {
467 auto *part =
new ColPartition(
BRT_UNKNOWN, ICOORD(0, 1));
469 for (blob = bbox; blob !=
nullptr && blob->flow() ==
BTFT_NONE;
477 if (part->MarkAsLeaderIfMonospaced()) {
478 part_it.add_after_then_move(part);
484#ifndef GRAPHICS_DISABLED
485 if (textord_tabfind_show_strokewidths) {
486 leaders_win_ = DisplayGoodBlobs(
"LeaderNeighbours", 0, 0);
491 BLOBNBOX_IT blob_it(&block->
blobs);
493 for (small_it.mark_cycle_pt(); !small_it.cycled_list(); small_it.forward()) {
494 BLOBNBOX *blob = small_it.data();
499 blob->ClearNeighbours();
500 blob_it.add_to_end(small_it.extract());
507 for (noise_it.mark_cycle_pt(); !noise_it.cycled_list(); noise_it.forward()) {
508 BLOBNBOX *blob = noise_it.data();
509 if (blob->flow() ==
BTFT_LEADER || blob->joined_to_prev()) {
510 small_it.add_to_end(noise_it.extract());
513 blob->ClearNeighbours();
522void StrokeWidth::InsertBlobs(TO_BLOCK *block) {
530void StrokeWidth::MarkLeaderNeighbours(
const ColPartition *part,
LeftOrRight side) {
531 const TBOX &part_box = part->bounding_box();
534 BLOBNBOX *best_blob =
nullptr;
536 blobsearch.StartSideSearch(side ==
LR_LEFT ? part_box.left() : part_box.right(),
537 part_box.bottom(), part_box.top());
539 while ((blob = blobsearch.NextSideSearch(side ==
LR_LEFT)) !=
nullptr) {
540 const TBOX &blob_box = blob->bounding_box();
541 if (!blob_box.y_overlap(part_box)) {
544 int x_gap = blob_box.x_gap(part_box);
547 }
else if (best_blob ==
nullptr || x_gap < best_gap) {
552 if (best_blob !=
nullptr) {
554 best_blob->set_leader_on_right(
true);
556 best_blob->set_leader_on_left(
true);
558#ifndef GRAPHICS_DISABLED
559 if (leaders_win_ !=
nullptr) {
561 const TBOX &blob_box = best_blob->bounding_box();
562 leaders_win_->
Rectangle(blob_box.left(), blob_box.bottom(), blob_box.right(), blob_box.top());
569static int UpperQuartileCJKSize(
int gridsize, BLOBNBOX_LIST *blobs) {
571 BLOBNBOX_IT it(blobs);
572 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
573 BLOBNBOX *blob = it.data();
574 int width = blob->bounding_box().width();
575 int height = blob->bounding_box().height();
577 sizes.add(height, 1);
580 return static_cast<int>(sizes.ile(0.75f) + 0.5);
588bool StrokeWidth::FixBrokenCJK(TO_BLOCK *block) {
589 BLOBNBOX_LIST *blobs = &block->blobs;
590 int median_height = UpperQuartileCJKSize(
gridsize(), blobs);
594 BLOBNBOX_IT blob_it(blobs);
596 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
597 BLOBNBOX *blob = blob_it.data();
598 if (blob->cblob() ==
nullptr || blob->cblob()->out_list()->empty()) {
604 tprintf(
"Checking for Broken CJK (max size=%d):", max_height);
608 BLOBNBOX_CLIST overlapped_blobs;
609 AccumulateOverlaps(blob, debug, max_height, max_dist, &bbox, &overlapped_blobs);
610 if (!overlapped_blobs.empty()) {
617 tprintf(
"Bad final aspectratio:");
625 tprintf(
"Too many neighbours: %d\n", overlapped_blobs.length());
630 BLOBNBOX_C_IT n_it(&overlapped_blobs);
631 for (n_it.mark_cycle_pt(); !n_it.cycled_list(); n_it.forward()) {
632 BLOBNBOX *neighbour =
nullptr;
633 neighbour = n_it.data();
638 if (!n_it.cycled_list()) {
641 PrintBoxWidths(blob);
651 for (n_it.mark_cycle_pt(); !n_it.cycled_list(); n_it.forward()) {
652 BLOBNBOX *neighbour = n_it.data();
656 blob->really_merge(neighbour);
657 if (rerotation_.
x() != 1.0f || rerotation_.
y() != 0.0f) {
658 blob->rotate_box(rerotation_);
670 int num_remaining = 0;
671 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
672 BLOBNBOX *blob = blob_it.data();
673 if (blob->cblob() !=
nullptr && !blob->cblob()->out_list()->empty()) {
679 block->DeleteUnownedNoise();
688static bool AcceptableCJKMerge(
const TBOX &bbox,
const TBOX &nbox,
bool debug,
int max_size,
689 int max_dist,
int *x_gap,
int *y_gap) {
690 *x_gap = bbox.x_gap(nbox);
691 *y_gap = bbox.y_gap(nbox);
695 tprintf(
"gaps = %d, %d, merged_box:", *x_gap, *y_gap);
698 if (*x_gap <= max_dist && *y_gap <= max_dist && merged.width() <= max_size &&
699 merged.height() <= max_size) {
701 double old_ratio =
static_cast<double>(bbox.width()) / bbox.height();
702 if (old_ratio < 1.0) {
703 old_ratio = 1.0 / old_ratio;
705 double new_ratio =
static_cast<double>(merged.width()) / merged.height();
706 if (new_ratio < 1.0) {
707 new_ratio = 1.0 / new_ratio;
720void StrokeWidth::AccumulateOverlaps(
const BLOBNBOX *not_this,
bool debug,
int max_size,
721 int max_dist,
TBOX *bbox, BLOBNBOX_CLIST *blobs) {
727 for (
auto &nearest : nearests) {
730 int x = (bbox->left() + bbox->right()) / 2;
731 int y = (bbox->bottom() + bbox->top()) / 2;
736 while ((neighbour = radsearch.NextRadSearch()) !=
nullptr) {
737 if (neighbour == not_this) {
740 TBOX nbox = neighbour->bounding_box();
742 if (AcceptableCJKMerge(*bbox, nbox, debug, max_size, max_dist, &x_gap, &y_gap)) {
745 blobs->add_sorted(SortByBoxLeft<BLOBNBOX>,
true, neighbour);
751 for (
int dir = 0; dir <
BND_COUNT; ++dir) {
752 if (nearests[dir] ==
nullptr) {
755 nbox = nearests[dir]->bounding_box();
756 if (AcceptableCJKMerge(*bbox, nbox, debug, max_size, max_dist, &x_gap, &y_gap)) {
759 blobs->add_sorted(SortByBoxLeft<BLOBNBOX>,
true, nearests[dir]);
764 nearests[dir] =
nullptr;
768 }
else if (x_gap < 0 && x_gap <= y_gap) {
771 if (nearests[dir] ==
nullptr || y_gap < bbox->y_gap(nearests[dir]->bounding_box())) {
772 nearests[dir] = neighbour;
774 }
else if (y_gap < 0 && y_gap <= x_gap) {
777 if (nearests[dir] ==
nullptr || x_gap < bbox->x_gap(nearests[dir]->bounding_box())) {
778 nearests[dir] = neighbour;
787 for (
auto &nearest : nearests) {
788 if (nearest ==
nullptr) {
791 const TBOX &nbox = nearest->bounding_box();
793 tprintf(
"Testing for overlap with:");
796 if (bbox->overlap(nbox)) {
797 blobs->shallow_clear();
799 tprintf(
"Final box overlaps nearest\n");
813void StrokeWidth::FindTextlineFlowDirection(
PageSegMode pageseg_mode,
bool display_if_debugging) {
817 gsearch.StartFullSearch();
818 while ((bbox = gsearch.NextFullSearch()) !=
nullptr) {
819 SetNeighbours(
false, display_if_debugging, bbox);
822 gsearch.StartFullSearch();
823 while ((bbox = gsearch.NextFullSearch()) !=
nullptr) {
824 SimplifyObviousNeighbours(bbox);
827 gsearch.StartFullSearch();
828 while ((bbox = gsearch.NextFullSearch()) !=
nullptr) {
829 if (FindingVerticalOnly(pageseg_mode)) {
830 bbox->set_vert_possible(
true);
831 bbox->set_horz_possible(
false);
832 }
else if (FindingHorizontalOnly(pageseg_mode)) {
833 bbox->set_vert_possible(
false);
834 bbox->set_horz_possible(
true);
836 SetNeighbourFlows(bbox);
839#ifndef GRAPHICS_DISABLED
840 if ((textord_tabfind_show_strokewidths && display_if_debugging) ||
841 textord_tabfind_show_strokewidths > 1) {
842 initial_widths_win_ = DisplayGoodBlobs(
"InitialStrokewidths", 400, 0);
846 gsearch.StartFullSearch();
847 while ((bbox = gsearch.NextFullSearch()) !=
nullptr) {
848 SmoothNeighbourTypes(pageseg_mode,
false, bbox);
851 gsearch.StartFullSearch();
852 while ((bbox = gsearch.NextFullSearch()) !=
nullptr) {
853 SmoothNeighbourTypes(pageseg_mode,
true, bbox);
856 gsearch.StartFullSearch();
857 while ((bbox = gsearch.NextFullSearch()) !=
nullptr) {
858 SmoothNeighbourTypes(pageseg_mode,
true, bbox);
860#ifndef GRAPHICS_DISABLED
861 if ((textord_tabfind_show_strokewidths && display_if_debugging) ||
862 textord_tabfind_show_strokewidths > 1) {
863 widths_win_ = DisplayGoodBlobs(
"ImprovedStrokewidths", 800, 0);
872void StrokeWidth::SetNeighbours(
bool leaders,
bool activate_line_trap, BLOBNBOX *blob) {
873 int line_trap_count = 0;
874 for (
int dir = 0; dir <
BND_COUNT; ++dir) {
876 line_trap_count += FindGoodNeighbour(bnd, leaders, blob);
878 if (line_trap_count > 0 && activate_line_trap) {
880 blob->ClearNeighbours();
881 const TBOX &box = blob->bounding_box();
892int StrokeWidth::FindGoodNeighbour(
BlobNeighbourDir dir,
bool leaders, BLOBNBOX *blob) {
894 TBOX blob_box = blob->bounding_box();
897 tprintf(
"FGN in dir %d for blob:", dir);
900 int top = blob_box.top();
901 int bottom = blob_box.bottom();
902 int left = blob_box.left();
903 int right = blob_box.right();
904 int width = right - left;
905 int height = top - bottom;
913 int line_trap_count = 0;
915 int min_good_overlap = (dir ==
BND_LEFT || dir ==
BND_RIGHT) ? height / 2 : width / 2;
916 int min_decent_overlap = (dir ==
BND_LEFT || dir ==
BND_RIGHT) ? height / 3 : width / 3;
918 min_good_overlap = min_decent_overlap = 1;
926 TBOX search_box = blob_box;
930 search_box.set_left(search_box.left() - search_pad);
933 search_box.set_right(search_box.right() + search_pad);
936 search_box.set_bottom(search_box.bottom() - search_pad);
939 search_box.set_top(search_box.top() + search_pad);
946 rectsearch.StartRectSearch(search_box);
947 BLOBNBOX *best_neighbour =
nullptr;
948 double best_goodness = 0.0;
949 bool best_is_good =
false;
951 while ((neighbour = rectsearch.NextRectSearch()) !=
nullptr) {
952 TBOX nbox = neighbour->bounding_box();
953 if (neighbour == blob) {
956 int mid_x = (nbox.left() + nbox.right()) / 2;
957 if (mid_x < blob->left_rule() || mid_x > blob->right_rule()) {
967 int n_width = nbox.width();
968 int n_height = nbox.height();
969 if (std::min(n_width, n_height) > line_trap_min &&
970 std::max(n_width, n_height) < line_trap_max) {
993 overlap = std::min(
static_cast<int>(nbox.top()), top) -
994 std::max(
static_cast<int>(nbox.bottom()), bottom);
995 if (overlap == nbox.height() && nbox.width() > nbox.height()) {
996 perp_overlap = nbox.width();
998 perp_overlap = overlap;
1000 gap = dir ==
BND_LEFT ? left - nbox.left() : nbox.right() - right;
1009 overlap = std::min(
static_cast<int>(nbox.right()), right) -
1010 std::max(
static_cast<int>(nbox.left()), left);
1011 if (overlap == nbox.width() && nbox.height() > nbox.width()) {
1012 perp_overlap = nbox.height();
1014 perp_overlap = overlap;
1016 gap = dir ==
BND_BELOW ? bottom - nbox.bottom() : nbox.top() - top;
1025 if (-gap > overlap) {
1027 tprintf(
"Overlaps wrong way\n");
1031 if (perp_overlap < min_decent_overlap) {
1033 tprintf(
"Doesn't overlap enough\n");
1040 overlap >= min_good_overlap && !bad_sizes &&
1048 double goodness = (1.0 + is_good) * overlap / gap;
1050 tprintf(
"goodness = %g vs best of %g, good=%d, overlap=%d, gap=%d\n", goodness, best_goodness,
1051 is_good, overlap, gap);
1053 if (goodness > best_goodness) {
1054 best_neighbour = neighbour;
1055 best_goodness = goodness;
1056 best_is_good = is_good;
1059 blob->set_neighbour(dir, best_neighbour, best_is_good);
1060 return line_trap_count;
1064static void ListNeighbours(
const BLOBNBOX *blob, BLOBNBOX_CLIST *neighbours) {
1065 for (
int dir = 0; dir <
BND_COUNT; ++dir) {
1067 BLOBNBOX *neighbour = blob->neighbour(bnd);
1068 if (neighbour !=
nullptr) {
1069 neighbours->add_sorted(SortByBoxLeft<BLOBNBOX>,
true, neighbour);
1075static void List2ndNeighbours(
const BLOBNBOX *blob, BLOBNBOX_CLIST *neighbours) {
1076 ListNeighbours(blob, neighbours);
1077 for (
int dir = 0; dir <
BND_COUNT; ++dir) {
1079 BLOBNBOX *neighbour = blob->neighbour(bnd);
1080 if (neighbour !=
nullptr) {
1081 ListNeighbours(neighbour, neighbours);
1087static void List3rdNeighbours(
const BLOBNBOX *blob, BLOBNBOX_CLIST *neighbours) {
1088 List2ndNeighbours(blob, neighbours);
1089 for (
int dir = 0; dir <
BND_COUNT; ++dir) {
1091 BLOBNBOX *neighbour = blob->neighbour(bnd);
1092 if (neighbour !=
nullptr) {
1093 List2ndNeighbours(neighbour, neighbours);
1100static void CountNeighbourGaps(
bool debug, BLOBNBOX_CLIST *neighbours,
int *pure_h_count,
1101 int *pure_v_count) {
1105 BLOBNBOX_C_IT it(neighbours);
1106 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1107 BLOBNBOX *blob = it.data();
1108 int h_min, h_max, v_min, v_max;
1109 blob->MinMaxGapsClipped(&h_min, &h_max, &v_min, &v_max);
1111 tprintf(
"Hgaps [%d,%d], vgaps [%d,%d]:", h_min, h_max, v_min, v_max);
1113 if (h_max < v_min || blob->leader_on_left() || blob->leader_on_right()) {
1119 }
else if (v_max < h_min) {
1131 blob->bounding_box().print();
1139void StrokeWidth::SetNeighbourFlows(BLOBNBOX *blob) {
1140 if (blob->DefiniteIndividualFlow()) {
1146 tprintf(
"SetNeighbourFlows (current flow=%d, type=%d) on:", blob->flow(), blob->region_type());
1147 blob->bounding_box().print();
1149 BLOBNBOX_CLIST neighbours;
1150 List3rdNeighbours(blob, &neighbours);
1152 int pure_h_count = 0;
1153 int pure_v_count = 0;
1154 CountNeighbourGaps(debug, &neighbours, &pure_h_count, &pure_v_count);
1156 HandleClick(blob->bounding_box().left() + 1, blob->bounding_box().bottom() + 1);
1157 tprintf(
"SetFlows: h_count=%d, v_count=%d\n", pure_h_count, pure_v_count);
1159 if (!neighbours.empty()) {
1160 blob->set_vert_possible(
true);
1161 blob->set_horz_possible(
true);
1162 if (pure_h_count > 2 * pure_v_count) {
1164 blob->set_vert_possible(
false);
1165 }
else if (pure_v_count > 2 * pure_h_count) {
1167 blob->set_horz_possible(
false);
1171 blob->set_vert_possible(
false);
1172 blob->set_horz_possible(
false);
1177static void CountNeighbourTypes(BLOBNBOX_CLIST *neighbours,
int *pure_h_count,
int *pure_v_count) {
1178 BLOBNBOX_C_IT it(neighbours);
1179 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1180 BLOBNBOX *blob = it.data();
1181 if (blob->UniquelyHorizontal()) {
1184 if (blob->UniquelyVertical()) {
1193void StrokeWidth::SimplifyObviousNeighbours(BLOBNBOX *blob) {
1196 if ((blob->bounding_box().width() > 3 * blob->area_stroke_width() &&
1197 blob->bounding_box().height() > 3 * blob->area_stroke_width())) {
1199 if (blob->bounding_box().width() > 4 * blob->bounding_box().height()) {
1201 blob->set_neighbour(
BND_ABOVE,
nullptr,
false);
1202 blob->set_neighbour(
BND_BELOW,
nullptr,
false);
1205 if (blob->bounding_box().height() > 4 * blob->bounding_box().width()) {
1207 blob->set_neighbour(
BND_LEFT,
nullptr,
false);
1208 blob->set_neighbour(
BND_RIGHT,
nullptr,
false);
1215 int h_min, h_max, v_min, v_max;
1216 blob->MinMaxGapsClipped(&h_min, &h_max, &v_min, &v_max);
1217 if ((h_max + margin < v_min && h_max < margin / 2) || blob->leader_on_left() ||
1218 blob->leader_on_right()) {
1220 blob->set_neighbour(
BND_ABOVE,
nullptr,
false);
1221 blob->set_neighbour(
BND_BELOW,
nullptr,
false);
1222 }
else if (v_max + margin < h_min && v_max < margin / 2) {
1224 blob->set_neighbour(
BND_LEFT,
nullptr,
false);
1225 blob->set_neighbour(
BND_RIGHT,
nullptr,
false);
1232void StrokeWidth::SmoothNeighbourTypes(
PageSegMode pageseg_mode,
bool reset_all, BLOBNBOX *blob) {
1233 if ((blob->vert_possible() && blob->horz_possible()) || reset_all) {
1235 BLOBNBOX_CLIST neighbours;
1236 List2ndNeighbours(blob, &neighbours);
1238 int pure_h_count = 0;
1239 int pure_v_count = 0;
1240 CountNeighbourTypes(&neighbours, &pure_h_count, &pure_v_count);
1242 blob->bounding_box().bottom())) {
1243 HandleClick(blob->bounding_box().left() + 1, blob->bounding_box().bottom() + 1);
1244 tprintf(
"pure_h=%d, pure_v=%d\n", pure_h_count, pure_v_count);
1246 if (pure_h_count > pure_v_count && !FindingVerticalOnly(pageseg_mode)) {
1248 blob->set_vert_possible(
false);
1249 blob->set_horz_possible(
true);
1250 }
else if (pure_v_count > pure_h_count && !FindingHorizontalOnly(pageseg_mode)) {
1252 blob->set_horz_possible(
false);
1253 blob->set_vert_possible(
true);
1256 blob->bounding_box().bottom())) {
1257 HandleClick(blob->bounding_box().left() + 1, blob->bounding_box().bottom() + 1);
1258 tprintf(
"Clean on pass 3!\n");
1275 PageSegMode pageseg_mode,
const FCOORD &rerotation,
bool find_problems, TO_BLOCK *block,
1276 BLOBNBOX_LIST *diacritic_blobs, ColPartitionGrid *part_grid, ColPartition_LIST *big_parts,
1277 FCOORD *skew_angle) {
1278 if (!FindingHorizontalOnly(pageseg_mode)) {
1279 FindVerticalTextChains(part_grid);
1281 if (!FindingVerticalOnly(pageseg_mode)) {
1282 FindHorizontalTextChains(part_grid);
1284#ifndef GRAPHICS_DISABLED
1285 if (textord_tabfind_show_strokewidths) {
1286 chains_win_ =
MakeWindow(0, 400,
"Initial text chains");
1287 part_grid->DisplayBoxes(chains_win_);
1291 if (find_problems) {
1295 part_grid->SplitOverlappingPartitions(big_parts);
1296 EasyMerges(part_grid);
1297 RemoveLargeUnusedBlobs(block, part_grid, big_parts);
1299 while (part_grid->GridSmoothNeighbours(
BTFT_CHAIN, nontext_map_, grid_box, rerotation)) {
1302 while (part_grid->GridSmoothNeighbours(
BTFT_NEIGHBOURS, nontext_map_, grid_box, rerotation)) {
1305 int pre_overlap = part_grid->ComputeTotalOverlap(
nullptr);
1306 TestDiacritics(part_grid, block);
1307 MergeDiacritics(block, part_grid);
1308 if (find_problems && diacritic_blobs !=
nullptr &&
1309 DetectAndRemoveNoise(pre_overlap, grid_box, block, part_grid, diacritic_blobs)) {
1312#ifndef GRAPHICS_DISABLED
1313 if (textord_tabfind_show_strokewidths) {
1314 textlines_win_ =
MakeWindow(400, 400,
"GoodTextline blobs");
1315 part_grid->DisplayBoxes(textlines_win_);
1316 diacritics_win_ = DisplayDiacritics(
"Diacritics", 0, 0, block);
1319 PartitionRemainingBlobs(pageseg_mode, part_grid);
1320 part_grid->SplitOverlappingPartitions(big_parts);
1321 EasyMerges(part_grid);
1322 while (part_grid->GridSmoothNeighbours(
BTFT_CHAIN, nontext_map_, grid_box, rerotation)) {
1325 while (part_grid->GridSmoothNeighbours(
BTFT_NEIGHBOURS, nontext_map_, grid_box, rerotation)) {
1329 while (part_grid->GridSmoothNeighbours(
BTFT_STRONG_CHAIN, nontext_map_, grid_box, rerotation)) {
1332#ifndef GRAPHICS_DISABLED
1333 if (textord_tabfind_show_strokewidths) {
1334 smoothed_win_ =
MakeWindow(800, 400,
"Smoothed blobs");
1335 part_grid->DisplayBoxes(smoothed_win_);
1345bool StrokeWidth::DetectAndRemoveNoise(
int pre_overlap,
const TBOX &grid_box, TO_BLOCK *block,
1346 ColPartitionGrid *part_grid,
1347 BLOBNBOX_LIST *diacritic_blobs) {
1348 ColPartitionGrid *noise_grid =
nullptr;
1349 int post_overlap = part_grid->ComputeTotalOverlap(&noise_grid);
1350 if (pre_overlap == 0) {
1353 BLOBNBOX_IT diacritic_it(diacritic_blobs);
1354 if (noise_grid !=
nullptr) {
1358#ifndef GRAPHICS_DISABLED
1359 if (textord_tabfind_show_strokewidths) {
1360 ScrollView *noise_win =
MakeWindow(1000, 500,
"Noise Areas");
1361 noise_grid->DisplayBoxes(noise_win);
1364 part_grid->DeleteNonLeaderParts();
1365 BLOBNBOX_IT blob_it(&block->noise_blobs);
1367 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
1368 BLOBNBOX *blob = blob_it.data();
1369 blob->ClearNeighbours();
1370 if (!blob->IsDiacritic() || blob->owner() !=
nullptr) {
1373 TBOX search_box(blob->bounding_box());
1375 rsearch.StartRectSearch(search_box);
1376 ColPartition *part = rsearch.NextRectSearch();
1377 if (part !=
nullptr) {
1379 blob->set_owns_cblob(
true);
1380 blob->compute_bounding_box();
1381 diacritic_it.add_after_then_move(blob_it.extract());
1384 noise_grid->DeleteParts();
1388 noise_grid->DeleteParts();
1398static BLOBNBOX *MutualUnusedVNeighbour(
const BLOBNBOX *blob,
BlobNeighbourDir dir) {
1399 BLOBNBOX *next_blob = blob->
neighbour(dir);
1400 if (next_blob ==
nullptr || next_blob->owner() !=
nullptr || next_blob->UniquelyHorizontal()) {
1403 if (next_blob->neighbour(
DirOtherWay(dir)) == blob) {
1410void StrokeWidth::FindVerticalTextChains(ColPartitionGrid *part_grid) {
1416 gsearch.StartFullSearch();
1417 while ((bbox = gsearch.NextFullSearch()) !=
nullptr) {
1421 if (bbox->owner() ==
nullptr && bbox->UniquelyVertical() &&
1422 (blob = MutualUnusedVNeighbour(bbox,
BND_ABOVE)) !=
nullptr) {
1426 while (blob !=
nullptr) {
1428 blob = MutualUnusedVNeighbour(blob,
BND_ABOVE);
1430 blob = MutualUnusedVNeighbour(bbox,
BND_BELOW);
1431 while (blob !=
nullptr) {
1433 blob = MutualUnusedVNeighbour(blob,
BND_BELOW);
1435 CompletePartition(pageseg_mode, part, part_grid);
1444static BLOBNBOX *MutualUnusedHNeighbour(
const BLOBNBOX *blob,
BlobNeighbourDir dir) {
1445 BLOBNBOX *next_blob = blob->
neighbour(dir);
1446 if (next_blob ==
nullptr || next_blob->owner() !=
nullptr || next_blob->UniquelyVertical()) {
1449 if (next_blob->neighbour(
DirOtherWay(dir)) == blob) {
1456void StrokeWidth::FindHorizontalTextChains(ColPartitionGrid *part_grid) {
1462 gsearch.StartFullSearch();
1463 while ((bbox = gsearch.NextFullSearch()) !=
nullptr) {
1465 if (bbox->owner() ==
nullptr && bbox->UniquelyHorizontal() &&
1466 (blob = MutualUnusedHNeighbour(bbox,
BND_RIGHT)) !=
nullptr) {
1468 auto *part =
new ColPartition(
BRT_TEXT, ICOORD(0, 1));
1470 while (blob !=
nullptr) {
1472 blob = MutualUnusedHNeighbour(blob,
BND_RIGHT);
1474 blob = MutualUnusedHNeighbour(bbox,
BND_LEFT);
1475 while (blob !=
nullptr) {
1477 blob = MutualUnusedVNeighbour(blob,
BND_LEFT);
1479 CompletePartition(pageseg_mode, part, part_grid);
1491void StrokeWidth::TestDiacritics(ColPartitionGrid *part_grid, TO_BLOCK *block) {
1493 small_grid.InsertBlobList(&block->noise_blobs);
1494 small_grid.InsertBlobList(&block->blobs);
1495 int medium_diacritics = 0;
1496 int small_diacritics = 0;
1497 BLOBNBOX_IT small_it(&block->noise_blobs);
1498 for (small_it.mark_cycle_pt(); !small_it.cycled_list(); small_it.forward()) {
1499 BLOBNBOX *blob = small_it.data();
1500 if (blob->owner() ==
nullptr && !blob->IsDiacritic() && DiacriticBlob(&small_grid, blob)) {
1504 BLOBNBOX_IT blob_it(&block->blobs);
1505 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
1506 BLOBNBOX *blob = blob_it.data();
1507 if (blob->IsDiacritic()) {
1508 small_it.add_to_end(blob_it.extract());
1511 ColPartition *part = blob->owner();
1512 if (part ==
nullptr && DiacriticBlob(&small_grid, blob)) {
1513 ++medium_diacritics;
1515 small_it.add_to_end(blob_it.extract());
1516 }
else if (part !=
nullptr && !part->block_owned() && part->boxes_count() < 3) {
1522 BLOBNBOX_C_IT box_it(part->boxes());
1523 for (box_it.mark_cycle_pt();
1524 !box_it.cycled_list() && DiacriticBlob(&small_grid, box_it.data()); box_it.forward()) {
1527 if (box_it.cycled_list()) {
1529 while (!box_it.empty()) {
1535 BLOBNBOX *box = box_it.extract();
1536 box->set_owner(
nullptr);
1538 ++medium_diacritics;
1545 small_it.add_to_end(blob_it.extract());
1546 part_grid->RemoveBBox(part);
1550 blob->bounding_box().bottom())) {
1551 tprintf(
"Blob not available to be a diacritic at:");
1552 blob->bounding_box().print();
1555 if (textord_tabfind_show_strokewidths) {
1556 tprintf(
"Found %d small diacritics, %d medium\n", small_diacritics, medium_diacritics);
1566bool StrokeWidth::DiacriticBlob(BlobGrid *small_grid, BLOBNBOX *blob) {
1570 TBOX small_box(blob->bounding_box());
1573 tprintf(
"Testing blob for diacriticness at:");
1576 int x = (small_box.left() + small_box.right()) / 2;
1577 int y = (small_box.bottom() + small_box.top()) / 2;
1580 int height = small_box.height();
1593 BLOBNBOX *best_x_overlap =
nullptr;
1594 BLOBNBOX *best_y_overlap =
nullptr;
1595 int best_total_dist = 0;
1599 TBOX search_box(small_box);
1602 search_box.pad(x_pad, y_pad);
1604 rsearch.SetUniqueMode(
true);
1606 rsearch.StartRectSearch(search_box);
1607 BLOBNBOX *neighbour;
1608 while ((neighbour = rsearch.NextRectSearch()) !=
nullptr) {
1610 neighbour->owner() == blob->owner()) {
1613 TBOX nbox = neighbour->bounding_box();
1614 if (neighbour->owner() ==
nullptr || neighbour->owner()->IsVerticalType() ||
1617 tprintf(
"Neighbour not strong enough:");
1622 if (nbox.height() < min_height) {
1624 tprintf(
"Neighbour not big enough:");
1629 int x_gap = small_box.x_gap(nbox);
1630 int y_gap = small_box.y_gap(nbox);
1633 tprintf(
"xgap=%d, y=%d, total dist=%d\n", x_gap, y_gap, total_distance);
1637 tprintf(
"Neighbour with median size %d too far away:", neighbour->owner()->median_height());
1638 neighbour->bounding_box().print();
1644 tprintf(
"Computing reduced box for :");
1647 int left = small_box.left() - small_box.width();
1648 int right = small_box.right() + small_box.width();
1649 nbox = neighbour->BoundsWithinLimits(left, right);
1650 y_gap = small_box.y_gap(nbox);
1651 if (best_x_overlap ==
nullptr || y_gap < best_y_gap) {
1652 best_x_overlap = neighbour;
1660 tprintf(
"Shrunken box doesn't win:");
1663 }
else if (blob->ConfirmNoTabViolation(*neighbour)) {
1664 if (best_y_overlap ==
nullptr || total_distance < best_total_dist) {
1666 tprintf(
"New best y overlap:");
1669 best_y_overlap = neighbour;
1670 best_total_dist = total_distance;
1672 tprintf(
"New y overlap box doesn't win:");
1676 tprintf(
"Neighbour wrong side of a tab:");
1680 if (best_x_overlap !=
nullptr &&
1681 (best_y_overlap ==
nullptr || best_xbox.major_y_overlap(best_y_overlap->bounding_box()))) {
1682 blob->set_diacritic_box(best_xbox);
1683 blob->set_base_char_blob(best_x_overlap);
1685 tprintf(
"DiacriticBlob OK! (x-overlap:");
1691 if (best_y_overlap !=
nullptr &&
1692 DiacriticXGapFilled(small_grid, small_box, best_y_overlap->bounding_box()) &&
1693 NoNoiseInBetween(small_box, best_y_overlap->bounding_box())) {
1694 blob->set_diacritic_box(best_y_overlap->bounding_box());
1695 blob->set_base_char_blob(best_y_overlap);
1697 tprintf(
"DiacriticBlob OK! (y-overlap:");
1699 best_y_overlap->bounding_box().print();
1704 tprintf(
"DiacriticBlob fails:");
1706 tprintf(
"Best x+y gap = %d, y = %d\n", best_total_dist, best_y_gap);
1707 if (best_y_overlap !=
nullptr) {
1708 tprintf(
"XGapFilled=%d, NoiseBetween=%d\n",
1709 DiacriticXGapFilled(small_grid, small_box, best_y_overlap->bounding_box()),
1710 NoNoiseInBetween(small_box, best_y_overlap->bounding_box()));
1729bool StrokeWidth::DiacriticXGapFilled(BlobGrid *grid,
const TBOX &diacritic_box,
1730 const TBOX &base_box) {
1733 TBOX occupied_box(base_box);
1735 while ((diacritic_gap = diacritic_box.x_gap(occupied_box)) > max_gap) {
1736 TBOX search_box(occupied_box);
1737 if (diacritic_box.left() > search_box.right()) {
1739 search_box.set_left(search_box.right());
1740 search_box.set_right(search_box.left() + max_gap);
1743 search_box.set_right(search_box.left());
1744 search_box.set_left(search_box.left() - max_gap);
1747 rsearch.StartRectSearch(search_box);
1748 BLOBNBOX *neighbour;
1749 while ((neighbour = rsearch.NextRectSearch()) !=
nullptr) {
1750 const TBOX &nbox = neighbour->bounding_box();
1751 if (nbox.x_gap(diacritic_box) < diacritic_gap) {
1752 if (nbox.left() < occupied_box.left()) {
1753 occupied_box.set_left(nbox.left());
1755 if (nbox.right() > occupied_box.right()) {
1756 occupied_box.set_right(nbox.right());
1761 if (neighbour ==
nullptr) {
1769void StrokeWidth::MergeDiacritics(TO_BLOCK *block, ColPartitionGrid *part_grid) {
1770 BLOBNBOX_IT small_it(&block->noise_blobs);
1771 for (small_it.mark_cycle_pt(); !small_it.cycled_list(); small_it.forward()) {
1772 BLOBNBOX *blob = small_it.data();
1773 if (blob->base_char_blob() !=
nullptr) {
1774 ColPartition *part = blob->base_char_blob()->owner();
1777 if (part !=
nullptr && !part->block_owned() && blob->owner() ==
nullptr &&
1778 blob->IsDiacritic()) {
1781 part_grid->RemoveBBox(part);
1783 blob->set_region_type(part->blob_type());
1784 blob->set_flow(part->flow());
1785 blob->set_owner(part);
1786 part_grid->InsertBBox(
true,
true, part);
1789 blob->set_base_char_blob(
nullptr);
1797void StrokeWidth::RemoveLargeUnusedBlobs(TO_BLOCK *block, ColPartitionGrid *part_grid,
1798 ColPartition_LIST *big_parts) {
1799 BLOBNBOX_IT large_it(&block->large_blobs);
1800 for (large_it.mark_cycle_pt(); !large_it.cycled_list(); large_it.forward()) {
1801 BLOBNBOX *blob = large_it.data();
1802 ColPartition *big_part = blob->owner();
1803 if (big_part ==
nullptr) {
1813void StrokeWidth::PartitionRemainingBlobs(
PageSegMode pageseg_mode, ColPartitionGrid *part_grid) {
1816 int prev_grid_x = -1;
1817 int prev_grid_y = -1;
1818 BLOBNBOX_CLIST cell_list;
1819 BLOBNBOX_C_IT cell_it(&cell_list);
1820 bool cell_all_noise =
true;
1821 gsearch.StartFullSearch();
1822 while ((bbox = gsearch.NextFullSearch()) !=
nullptr) {
1823 int grid_x = gsearch.GridX();
1824 int grid_y = gsearch.GridY();
1825 if (grid_x != prev_grid_x || grid_y != prev_grid_y) {
1827 MakePartitionsFromCellList(pageseg_mode, cell_all_noise, part_grid, &cell_list);
1828 cell_it.set_to_list(&cell_list);
1829 prev_grid_x = grid_x;
1830 prev_grid_y = grid_y;
1831 cell_all_noise =
true;
1833 if (bbox->owner() ==
nullptr) {
1834 cell_it.add_to_end(bbox);
1836 cell_all_noise =
false;
1839 cell_all_noise =
false;
1842 MakePartitionsFromCellList(pageseg_mode, cell_all_noise, part_grid, &cell_list);
1847void StrokeWidth::MakePartitionsFromCellList(
PageSegMode pageseg_mode,
bool combine,
1848 ColPartitionGrid *part_grid,
1849 BLOBNBOX_CLIST *cell_list) {
1850 if (cell_list->empty()) {
1853 BLOBNBOX_C_IT cell_it(cell_list);
1855 BLOBNBOX *bbox = cell_it.extract();
1856 auto *part =
new ColPartition(bbox->region_type(), ICOORD(0, 1));
1858 part->set_flow(bbox->flow());
1859 for (cell_it.forward(); !cell_it.empty(); cell_it.forward()) {
1860 part->AddBox(cell_it.extract());
1862 CompletePartition(pageseg_mode, part, part_grid);
1864 for (; !cell_it.empty(); cell_it.forward()) {
1865 BLOBNBOX *bbox = cell_it.extract();
1866 auto *part =
new ColPartition(bbox->region_type(), ICOORD(0, 1));
1867 part->set_flow(bbox->flow());
1869 CompletePartition(pageseg_mode, part, part_grid);
1876void StrokeWidth::CompletePartition(
PageSegMode pageseg_mode, ColPartition *part,
1877 ColPartitionGrid *part_grid) {
1878 part->ComputeLimits();
1879 TBOX box = part->bounding_box();
1883 if (
value > 0 && FindingVerticalOnly(pageseg_mode)) {
1884 value = part->boxes_count() == 1 ? 0 : -2;
1885 }
else if (
value < 0 && FindingHorizontalOnly(pageseg_mode)) {
1886 value = part->boxes_count() == 1 ? 0 : 2;
1888 part->SetRegionAndFlowTypesFromProjectionValue(
value);
1890 part_grid->InsertBBox(
true,
true, part);
1895void StrokeWidth::EasyMerges(ColPartitionGrid *part_grid) {
1896 using namespace std::placeholders;
1897 part_grid->Merges(std::bind(&StrokeWidth::OrientationSearchBox,
this, _1, _2),
1898 std::bind(&StrokeWidth::ConfirmEasyMerge,
this, _1, _2));
1904bool StrokeWidth::OrientationSearchBox(ColPartition *part,
TBOX *box) {
1905 if (part->IsVerticalType()) {
1906 box->set_top(box->top() + box->width());
1907 box->set_bottom(box->bottom() - box->width());
1909 box->set_left(box->left() - box->height());
1910 box->set_right(box->right() + box->height());
1916bool StrokeWidth::ConfirmEasyMerge(
const ColPartition *p1,
const ColPartition *p2) {
1923 if ((p1->IsVerticalType() || p2->IsVerticalType()) && p1->HCoreOverlap(*p2) <= 0 &&
1924 ((!p1->IsSingleton() && !p2->IsSingleton()) ||
1925 !p1->bounding_box().major_overlap(p2->bounding_box()))) {
1928 if ((p1->IsHorizontalType() || p2->IsHorizontalType()) && p1->VCoreOverlap(*p2) <= 0 &&
1929 ((!p1->IsSingleton() && !p2->IsSingleton()) ||
1930 (!p1->bounding_box().major_overlap(p2->bounding_box()) &&
1931 !p1->OKDiacriticMerge(*p2,
false) && !p2->OKDiacriticMerge(*p1,
false)))) {
1934 if (!p1->ConfirmNoTabViolation(*p2)) {
1940 return NoNoiseInBetween(p1->bounding_box(), p2->bounding_box());
1944bool StrokeWidth::NoNoiseInBetween(
const TBOX &box1,
const TBOX &box2)
const {
1948#ifndef GRAPHICS_DISABLED
1953ScrollView *StrokeWidth::DisplayGoodBlobs(
const char *window_name,
int x,
int y) {
1960 gsearch.StartFullSearch();
1962 while ((bbox = gsearch.NextFullSearch()) !=
nullptr) {
1963 const TBOX &box = bbox->bounding_box();
1964 int left_x = box.left();
1965 int right_x = box.right();
1966 int top_y = box.top();
1967 int bottom_y = box.bottom();
1968 int goodness = bbox->GoodTextBlob();
1970 if (bbox->UniquelyVertical()) {
1973 if (bbox->UniquelyHorizontal()) {
1978 if (goodness == 0) {
1980 }
else if (goodness == 1) {
1987 window->Rectangle(left_x, bottom_y, right_x, top_y);
1993static void DrawDiacriticJoiner(
const BLOBNBOX *blob, ScrollView *window) {
1994 const TBOX &blob_box(blob->bounding_box());
1995 int top = std::max(
static_cast<int>(blob_box.top()), blob->base_char_top());
1996 int bottom = std::min(
static_cast<int>(blob_box.bottom()), blob->base_char_bottom());
1997 int x = (blob_box.left() + blob_box.right()) / 2;
1998 window->Line(
x, top,
x, bottom);
2002ScrollView *StrokeWidth::DisplayDiacritics(
const char *window_name,
int x,
int y, TO_BLOCK *block) {
2007 BLOBNBOX_IT it(&block->blobs);
2008 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
2009 BLOBNBOX *blob = it.data();
2010 if (blob->IsDiacritic()) {
2012 DrawDiacriticJoiner(blob, window);
2014 window->Pen(blob->BoxColor());
2016 const TBOX &box = blob->bounding_box();
2017 window->Rectangle(box.left(), box.bottom(), box.right(), box.top());
2019 it.set_to_list(&block->noise_blobs);
2020 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
2021 BLOBNBOX *blob = it.data();
2022 if (blob->IsDiacritic()) {
2024 DrawDiacriticJoiner(blob, window);
2028 const TBOX &box = blob->bounding_box();
2029 window->Rectangle(box.left(), box.bottom(), box.right(), box.top());
#define BOOL_VAR(name, val, comment)
#define INT_VAR(name, val, comment)
const double kMaxDiacriticDistanceRatio
const int kLineResiduePadRatio
const double kNoiseOverlapAreaFactor
const int kCJKMaxComponents
@ PSM_SINGLE_BLOCK_VERT_TEXT
@ PSM_SINGLE_COLUMN
Assume a single column of text of variable sizes.
const double kMinDiacriticSizeRatio
const double kCJKBrokenDistanceFraction
const int kLineTrapLongest
void tprintf(const char *format,...)
const double kCJKAspectRatio
int IntCastRounded(double x)
const double kStrokeWidthTolerance
const double kNoiseOverlapGrowthFactor
const double kCJKAspectRatioIncrease
GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT > ColPartitionGridSearch
const double kNeighbourSearchFactor
const double kLineResidueAspectRatio
int textord_debug_tabfind
const double kBrokenCJKIterationFraction
const int kMaxCJKSizeRatio
const double kDiacriticXPadRatio
const double kLineResidueSizeRatio
const double kMaxDiacriticGapToBaseCharHeight
const int kLineTrapShortest
const double kStrokeWidthFractionTolerance
const double kStrokeWidthFractionCJK
const double kStrokeWidthCJK
const double kDiacriticYPadRatio
BlobNeighbourDir DirOtherWay(BlobNeighbourDir dir)
const float kSizeRatioToReject
const int kMostlyOneDirRatio
GridSearch< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > BlobGridSearch
float vert_stroke_width() const
bool good_stroke_neighbour(BlobNeighbourDir n) const
const TBOX & bounding_box() const
void NeighbourGaps(int gaps[BND_COUNT]) const
bool UniquelyHorizontal() const
bool UniquelyVertical() const
bool vert_possible() const
BlobTextFlowType flow() const
BLOBNBOX * neighbour(BlobNeighbourDir n) const
float horz_stroke_width() const
bool horz_possible() const
static ScrollView::Color TextlineColor(BlobRegionType region_type, BlobTextFlowType flow_type)
static bool UnMergeableType(BlobRegionType type)
BLOBNBOX_LIST small_blobs
BLOBNBOX_LIST large_blobs
BLOBNBOX_LIST noise_blobs
void set_y(float yin)
rewrite function
void set_x(float xin)
rewrite function
TDimension height() const
TDimension bottom() const
void pad(int xpad, int ypad)
bool contains(const FCOORD pt) const
static bool WithinTestRegion(int detail_level, int x, int y)
void StartRadSearch(int x, int y, int max_radius)
void StartRectSearch(const TBOX &rect)
const ICOORD & bleft() const
void GridCoords(int x, int y, int *grid_x, int *grid_y) const
const ICOORD & tright() const
void Init(int gridsize, const ICOORD &bleft, const ICOORD &tright)
void InsertBBox(bool h_spread, bool v_spread, BBC *bbox)
virtual void HandleClick(int x, int y)
ScrollView * MakeWindow(int x, int y, const char *window_name)
void RemoveBBox(BLOBNBOX *bbox)
BlobGrid(int gridsize, const ICOORD &bleft, const ICOORD &tright)
void InsertBlobList(BLOBNBOX_LIST *blobs)
static ColPartition * MakeBigPartition(BLOBNBOX *box, ColPartition_LIST *big_part_list)
static bool BlankImageInBetween(const TBOX &box1, const TBOX &box2, const TBOX &im_box, const FCOORD &rotation, Image pix)
void FindTextlineDirectionAndFixBrokenCJK(PageSegMode pageseg_mode, bool cjk_merge, TO_BLOCK *input_block)
void CorrectForRotation(const FCOORD &rerotation, ColPartitionGrid *part_grid)
void HandleClick(int x, int y) override
StrokeWidth(int gridsize, const ICOORD &bleft, const ICOORD &tright)
void RemoveLineResidue(ColPartition_LIST *big_part_list)
void GradeBlobsIntoPartitions(PageSegMode pageseg_mode, const FCOORD &rerotation, TO_BLOCK *block, Image nontext_pix, const DENORM *denorm, bool cjk_script, TextlineProjection *projection, BLOBNBOX_LIST *diacritic_blobs, ColPartitionGrid *part_grid, ColPartition_LIST *big_parts)
void SetNeighboursOnMediumBlobs(TO_BLOCK *block)
void FindLeaderPartitions(TO_BLOCK *block, ColPartitionGrid *part_grid)
bool TestVerticalTextDirection(double find_vertical_text_ratio, TO_BLOCK *block, BLOBNBOX_CLIST *osd_blobs)
static bool DifferentSizes(int size1, int size2)
static bool VeryDifferentSizes(int size1, int size2)
void ConstructProjection(TO_BLOCK *input_block, const FCOORD &rotation, Image nontext_map)
void MoveNonTextlineBlobs(BLOBNBOX_LIST *blobs, BLOBNBOX_LIST *small_blobs) const
int EvaluateColPartition(const ColPartition &part, const DENORM *denorm, bool debug) const
int DistanceOfBoxFromBox(const TBOX &from_box, const TBOX &to_box, bool horizontal_textline, const DENORM *denorm, bool debug) const
void PlotGradedBlobs(BLOBNBOX_LIST *blobs, ScrollView *win)
void DisplayProjection() const
std::unique_ptr< SVEvent > AwaitEvent(SVEventType type)
void Rectangle(int x1, int y1, int x2, int y2)