21# include "config_auto.h"
45static BOOL_VAR(equationdetect_save_bi_image,
false,
"Save input bi image");
46static BOOL_VAR(equationdetect_save_spt_image,
false,
"Save special character image");
47static BOOL_VAR(equationdetect_save_seed_image,
false,
"Save the seed image");
48static BOOL_VAR(equationdetect_save_merged_image,
false,
"Save the merged image");
53static int SortCPByTopReverse(
const void *p1,
const void *p2) {
54 const ColPartition *cp1 = *
static_cast<ColPartition *
const *
>(p1);
55 const ColPartition *cp2 = *
static_cast<ColPartition *
const *
>(p2);
57 const TBOX &box1(cp1->bounding_box()), &box2(cp2->bounding_box());
58 return box2.top() - box1.top();
61static int SortCPByBottom(
const void *p1,
const void *p2) {
62 const ColPartition *cp1 = *
static_cast<ColPartition *
const *
>(p1);
63 const ColPartition *cp2 = *
static_cast<ColPartition *
const *
>(p2);
65 const TBOX &box1(cp1->bounding_box()), &box2(cp2->bounding_box());
66 return box1.bottom() - box2.bottom();
69static int SortCPByHeight(
const void *p1,
const void *p2) {
70 const ColPartition *cp1 = *
static_cast<ColPartition *
const *
>(p1);
71 const ColPartition *cp2 = *
static_cast<ColPartition *
const *
>(p2);
73 const TBOX &box1(cp1->bounding_box()), &box2(cp2->bounding_box());
74 return box1.height() - box2.height();
99 const char *default_name =
"equ";
100 if (equ_name ==
nullptr) {
101 equ_name = default_name;
109 "Warning: equation region detection requested,"
110 " but %s failed to load from %s\n",
111 equ_name, equ_datapath);
130 if (to_block ==
nullptr) {
131 tprintf(
"Warning: input to_block is nullptr!\n");
135 std::vector<BLOBNBOX_LIST *> blob_lists;
136 blob_lists.push_back(&(to_block->
blobs));
138 for (
auto &blob_list : blob_lists) {
139 BLOBNBOX_IT bbox_it(blob_list);
140 for (bbox_it.mark_cycle_pt(); !bbox_it.cycled_list(); bbox_it.forward()) {
141 bbox_it.data()->set_special_text_type(
BSTT_NONE);
156 BLOB_CHOICE_LIST ratings_equ, ratings_lang;
166 const float x_orig = (box.
left() + box.
right()) / 2.0f, y_orig = box.
bottom();
167 std::unique_ptr<TBLOB> normed_blob(
new TBLOB(*tblob));
168 normed_blob->Normalize(
nullptr,
nullptr,
nullptr, x_orig, y_orig, scaling, scaling, 0.0f,
177 BLOB_CHOICE *lang_choice =
nullptr, *equ_choice =
nullptr;
178 if (ratings_lang.length() > 0) {
179 BLOB_CHOICE_IT choice_it(&ratings_lang);
180 lang_choice = choice_it.data();
182 if (ratings_equ.length() > 0) {
183 BLOB_CHOICE_IT choice_it(&ratings_equ);
184 equ_choice = choice_it.data();
187 const float lang_score = lang_choice ? lang_choice->
certainty() : -FLT_MAX;
188 const float equ_score = equ_choice ? equ_choice->certainty() : -FLT_MAX;
190 const float kConfScoreTh = -5.0f, kConfDiffTh = 1.8;
193 const float diff = std::fabs(lang_score - equ_score);
197 if (std::fmax(lang_score, equ_score) < kConfScoreTh) {
200 }
else if (diff > kConfDiffTh && equ_score > lang_score) {
204 }
else if (lang_choice) {
227 static std::vector<UNICHAR_ID> ids_to_exclude;
228 if (ids_to_exclude.empty()) {
229 static const char *kCharsToEx[] = {
"'",
"`",
"\"",
"\\",
",",
".",
230 "〈",
"〉",
"《",
"》",
"」",
"「"};
231 for (
auto &
i : kCharsToEx) {
234 std::sort(ids_to_exclude.begin(), ids_to_exclude.end());
236 auto found = std::binary_search(ids_to_exclude.begin(), ids_to_exclude.end(),
id);
242 static const char kDigitsChars[] =
"|";
243 if (unicharset.
get_isdigit(
id) || (s.length() == 1 && strchr(kDigitsChars, s[0]) !=
nullptr)) {
256 const int classify_class_pruner =
lang_tesseract_->classify_class_pruner_multiplier;
257 const int classify_integer_matcher =
lang_tesseract_->classify_integer_matcher_multiplier;
269 BLOBNBOX_C_IT bbox_it(part->
boxes());
271 std::vector<int> blob_heights;
272 for (bbox_it.mark_cycle_pt(); !bbox_it.cycled_list(); bbox_it.forward()) {
273 if (bbox_it.data()->special_text_type() !=
BSTT_SKIP) {
274 blob_heights.push_back(bbox_it.data()->bounding_box().height());
277 std::sort(blob_heights.begin(), blob_heights.end());
278 const int height_th = blob_heights[blob_heights.size() / 2] / 3 * 2;
279 for (bbox_it.mark_cycle_pt(); !bbox_it.cycled_list(); bbox_it.forward()) {
280 if (bbox_it.data()->special_text_type() !=
BSTT_SKIP) {
287 lang_tesseract_->classify_class_pruner_multiplier.set_value(classify_class_pruner);
288 lang_tesseract_->classify_integer_matcher_multiplier.set_value(classify_integer_matcher);
290 if (equationdetect_save_spt_image) {
299 BLOBNBOX_C_IT blob_it(part->
boxes());
301 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
305 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
314 BLOBNBOX_C_IT blob_it2 = blob_it;
316 while (!blob_it2.at_last()) {
317 BLOBNBOX *nextblob = blob_it2.forward();
319 if (nextblob_box.
left() >= blob_box.
right()) {
322 const float kWidthR = 0.4, kHeightR = 0.3;
324 yoverlap = blob_box.
y_overlap(nextblob_box);
325 const float widthR =
static_cast<float>(std::min(nextblob_box.
width(), blob_box.
width())) /
326 std::max(nextblob_box.
width(), blob_box.
width());
327 const float heightR =
static_cast<float>(std::min(nextblob_box.
height(), blob_box.
height())) /
330 if (xoverlap && yoverlap && widthR > kWidthR && heightR > kHeightR) {
334 blob_box += nextblob_box;
345 tprintf(
"Warning: lang_tesseract_ is nullptr!\n");
348 if (!part_grid || !best_columns) {
349 tprintf(
"part_grid/best_columns is nullptr!!\n");
359 if (equationdetect_save_bi_image) {
375 if (equationdetect_save_seed_image) {
382 std::vector<ColPartition *> seeds_expanded;
387 seeds_expanded.push_back(cp_seed);
391 for (
auto &
i : seeds_expanded) {
400 if (equationdetect_save_merged_image) {
412 std::vector<ColPartition *> parts_updated;
419 std::vector<ColPartition *> parts_to_merge;
421 if (parts_to_merge.empty()) {
427 for (
auto &
i : parts_to_merge) {
433 parts_updated.push_back(part);
436 if (parts_updated.empty()) {
441 for (
auto &
i : parts_updated) {
448 std::vector<ColPartition *> *parts_overlap) {
449 ASSERT_HOST(seed !=
nullptr && parts_overlap !=
nullptr);
455 const int kRadNeighborCells = 30;
457 (seed_box.
top() + seed_box.
bottom()) / 2, kRadNeighborCells);
458 search.SetUniqueMode(
true);
462 std::vector<ColPartition *> parts;
463 const float kLargeOverlapTh = 0.95;
464 const float kEquXOverlap = 0.4, kEquYOverlap = 0.5;
465 while ((part =
search.NextRadSearch()) !=
nullptr) {
476 if (x_overlap_fraction >= kLargeOverlapTh && y_overlap_fraction >= kLargeOverlapTh) {
479 if ((x_overlap_fraction > kEquXOverlap && y_overlap_fraction > 0.0) ||
480 (x_overlap_fraction > 0.0 && y_overlap_fraction > kEquYOverlap)) {
487 parts_overlap->push_back(part);
523 std::vector<ColPartition *> seeds1, seeds2;
525 std::vector<int> indented_texts_left;
527 std::vector<float> texts_foreground_density;
534 const int kTextBlobsTh = 20;
538 seeds1.push_back(part);
544 seeds2.push_back(part);
549 indented_texts_left.push_back(box.
left());
557 std::sort(indented_texts_left.begin(), indented_texts_left.end());
558 std::sort(texts_foreground_density.begin(), texts_foreground_density.end());
559 float foreground_density_th = 0.15;
560 if (!texts_foreground_density.empty()) {
562 foreground_density_th = 0.8 * texts_foreground_density[texts_foreground_density.size() / 2];
565 for (
auto &
i : seeds1) {
566 const TBOX &box =
i->bounding_box();
578 for (
auto &
i : seeds2) {
579 if (
CheckForSeed2(indented_texts_left, foreground_density_th,
i)) {
588 const int pix_height = pixGetHeight(pix_bi);
589 Box *box = boxCreate(tbox.
left(), pix_height - tbox.
top(), tbox.
width(), tbox.
height());
590 Image pix_sub = pixClipRectangle(pix_bi, box,
nullptr);
592 pixForegroundFraction(pix_sub, &fract);
603 std::vector<TBOX> sub_boxes;
605 float parts_passed = 0.0;
606 for (
auto &sub_boxe : sub_boxes) {
608 if (density < density_th) {
614 const float kSeedPartRatioTh = 0.3;
615 bool retval = (parts_passed / sub_boxes.size() >= kSeedPartRatioTh);
628 for (
auto data : *parts_splitted) {
631 parts_splitted->clear();
634 bool found_split =
true;
635 while (found_split) {
637 BLOBNBOX_C_IT box_it(right_part->
boxes());
642 int previous_right = INT32_MIN;
645 for (box_it.mark_cycle_pt(); !box_it.cycled_list(); box_it.forward()) {
646 const TBOX &box = box_it.data()->bounding_box();
647 if (previous_right != INT32_MIN && box.
left() - previous_right > kThreshold) {
650 const int mid_x = (box.
left() + previous_right) / 2;
652 right_part = left_part->
SplitAt(mid_x);
654 parts_splitted->push_back(left_part);
661 previous_right = std::max(previous_right,
static_cast<int>(box.
right()));
667 parts_splitted->push_back(right_part);
672 splitted_boxes->clear();
684 int previous_right = INT32_MIN;
685 BLOBNBOX_C_IT box_it(part->
boxes());
686 for (box_it.mark_cycle_pt(); !box_it.cycled_list(); box_it.forward()) {
687 const TBOX &box = box_it.data()->bounding_box();
688 if (previous_right != INT32_MIN && box.
left() - previous_right > kThreshold) {
690 splitted_boxes->push_back(union_box);
691 previous_right = INT32_MIN;
693 if (previous_right == INT32_MIN) {
699 previous_right = std::max(previous_right,
static_cast<int>(box.
right()));
703 if (previous_right != INT32_MIN) {
704 splitted_boxes->push_back(union_box);
709 const float foreground_density_th,
ColPartition *part) {
714 if (!indented_texts_left.empty() &&
728 if (sorted_vec.empty()) {
731 const int kDistTh =
static_cast<int>(std::round(0.03f *
resolution_));
732 auto pos = std::upper_bound(sorted_vec.begin(), sorted_vec.end(), val);
733 if (pos > sorted_vec.begin()) {
739 auto index = pos - sorted_vec.begin();
740 while (index >= 0 && abs(val - sorted_vec[index--]) < kDistTh) {
745 index = pos + 1 - sorted_vec.begin();
746 while (
static_cast<size_t>(index) < sorted_vec.size() && sorted_vec[index++] - val < kDistTh) {
774 std::vector<ColPartition *> new_seeds;
778 search.SetUniqueMode(
true);
782 const TBOX &part_box(part->bounding_box());
786 if (left_margin + kMarginDiffTh < right_margin && left_margin < kMarginDiffTh) {
789 right_to_left =
false;
790 }
else if (left_margin > cps_cx) {
794 right_to_left =
true;
796 new_seeds.push_back(part);
800 bool side_neighbor_found =
false;
801 while ((neighbor =
search.NextSideSearch(right_to_left)) !=
nullptr) {
808 side_neighbor_found =
true;
811 if (!side_neighbor_found) {
816 if (neighbor_box.
width() > part_box.
width() &&
820 new_seeds.push_back(part);
835 std::vector<int> ygaps;
840 if (prev !=
nullptr) {
842 const TBOX &prev_box = prev->bounding_box();
845 int gap = current_box.
y_gap(prev_box);
846 if (gap < std::min(current_box.
height(), prev_box.
height())) {
848 ygaps.push_back(gap);
855 if (ygaps.size() < 8) {
860 std::sort(ygaps.begin(), ygaps.end());
864 spacing += ygaps[
count];
866 return spacing /
count;
870 const int textparts_linespacing) {
882 std::vector<ColPartition *> new_seeds;
888 if (
IsInline(!top_to_bottom, textparts_linespacing, part)) {
891 new_seeds.push_back(part);
905 const float kYGapRatioTh = 1.0;
912 search.SetUniqueMode(
true);
913 while ((neighbor =
search.NextVerticalSearch(search_bottom)) !=
nullptr) {
915 if (part_box.
y_gap(neighbor_box) >
916 kYGapRatioTh * std::min(part_box.
height(), neighbor_box.
height())) {
925 const float kHeightRatioTh = 0.5;
926 const int kYGapTh = textparts_linespacing > 0
927 ? textparts_linespacing +
static_cast<int>(std::round(0.02f *
resolution_))
928 :
static_cast<int>(std::round(0.05f *
resolution_));
930 part_box.
y_gap(neighbor_box) <= kYGapTh &&
932 static_cast<float>(std::min(part_box.
height(), neighbor_box.
height())) /
946 const int kSeedMathBlobsCount = 2;
947 const int kSeedMathDigitBlobsCount = 5;
952 math_blobs + digit_blobs <= kSeedMathDigitBlobsCount) {
962 float math_digit_density =
965 if (math_digit_density > math_density_high) {
969 math_digit_density > math_density_low) {
982 const int kXGapTh =
static_cast<int>(std::round(0.5f *
resolution_));
983 const int kRadiusTh =
static_cast<int>(std::round(3.0f *
resolution_));
984 const int kYGapTh =
static_cast<int>(std::round(0.5f *
resolution_));
990 (part_box.
top() + part_box.
bottom()) / 2, kRadiusTh);
991 search.SetUniqueMode(
true);
992 bool left_indented =
false, right_indented =
false;
993 while ((neighbor =
search.NextRadSearch()) !=
nullptr && (!left_indented || !right_indented)) {
994 if (neighbor == part) {
1014 if (part_box.
y_gap(neighbor_box) < kYGapTh) {
1015 const int left_gap = part_box.
left() - neighbor_box.
left();
1016 const int right_gap = neighbor_box.
right() - part_box.
right();
1017 if (left_gap > kXGapTh) {
1018 left_indented =
true;
1020 if (right_gap > kXGapTh) {
1021 right_indented =
true;
1026 if (left_indented && right_indented) {
1029 if (left_indented) {
1032 if (right_indented) {
1039 if (seed ==
nullptr ||
1045 std::vector<ColPartition *> parts_to_merge;
1052 if (parts_to_merge.empty()) {
1060 for (
auto part : parts_to_merge) {
1065 if (part == cp_seed) {
1074 seed->
Absorb(part,
nullptr);
1081 std::vector<ColPartition *> *parts_to_merge) {
1082 ASSERT_HOST(seed !=
nullptr && parts_to_merge !=
nullptr);
1083 const float kYOverlapTh = 0.6;
1084 const int kXGapTh =
static_cast<int>(std::round(0.2f *
resolution_));
1088 const int x = search_left ? seed_box.
left() : seed_box.
right();
1090 search.SetUniqueMode(
true);
1094 while ((part =
search.NextSideSearch(search_left)) !=
nullptr) {
1099 if (part_box.
x_gap(seed_box) > kXGapTh) {
1104 if ((part_box.
left() >= seed_box.
left() && search_left) ||
1105 (part_box.
right() <= seed_box.
right() && !search_left)) {
1128 parts_to_merge->push_back(part);
1133 std::vector<ColPartition *> *parts_to_merge) {
1135 const float kXOverlapTh = 0.4;
1136 const int kYGapTh =
static_cast<int>(std::round(0.2f *
resolution_));
1140 const int y = search_bottom ? seed_box.
bottom() : seed_box.
top();
1142 search.SetUniqueMode(
true);
1146 std::vector<ColPartition *> parts;
1147 int skipped_min_top = std::numeric_limits<int>::max(), skipped_max_bottom = -1;
1148 while ((part =
search.NextVerticalSearch(search_bottom)) !=
nullptr) {
1154 if (part_box.
y_gap(seed_box) > kYGapTh) {
1159 if ((part_box.
bottom() >= seed_box.
bottom() && search_bottom) ||
1160 (part_box.
top() <= seed_box.
top() && !search_bottom)) {
1164 bool skip_part =
false;
1182 if (skipped_min_top > part_box.
top()) {
1183 skipped_min_top = part_box.
top();
1185 if (skipped_max_bottom < part_box.
bottom()) {
1186 skipped_max_bottom = part_box.
bottom();
1190 parts.push_back(part);
1201 for (
auto &part : parts) {
1203 if ((search_bottom && part_box.
top() <= skipped_max_bottom) ||
1204 (!search_bottom && part_box.
bottom() >= skipped_min_top)) {
1208 parts_to_merge->push_back(part);
1214 const int kXGapTh =
static_cast<int>(std::round(0.25f *
resolution_));
1215 const int kYGapTh =
static_cast<int>(std::round(0.05f *
resolution_));
1252 std::vector<ColPartition *> text_parts;
1257 text_parts.push_back(part);
1260 if (text_parts.empty()) {
1265 std::sort(text_parts.begin(), text_parts.end(), &SortCPByHeight);
1266 const TBOX &text_box = text_parts[text_parts.size() / 2]->bounding_box();
1267 int med_height = text_box.
height();
1268 if (text_parts.size() % 2 == 0 && text_parts.size() > 1) {
1269 const TBOX &text_box = text_parts[text_parts.size() / 2 - 1]->bounding_box();
1270 med_height =
static_cast<int>(std::round(0.5f * (text_box.
height() + med_height)));
1274 for (
auto &text_part : text_parts) {
1275 const TBOX &text_box(text_part->bounding_box());
1276 if (text_box.
height() > med_height) {
1279 std::vector<ColPartition *> math_blocks;
1287 for (
auto &math_block : math_blocks) {
1289 text_part->Absorb(math_block,
nullptr);
1296 std::vector<ColPartition *> *math_blocks) {
1297 ASSERT_HOST(part !=
nullptr && math_blocks !=
nullptr);
1298 math_blocks->clear();
1302 int y_gaps[2] = {std::numeric_limits<int>::max(), std::numeric_limits<int>::max()};
1304 int neighbors_left = std::numeric_limits<int>::max(), neighbors_right = 0;
1305 for (
int i = 0;
i < 2; ++
i) {
1309 y_gaps[
i] = neighbor_box.
y_gap(part_box);
1310 if (neighbor_box.
left() < neighbors_left) {
1311 neighbors_left = neighbor_box.
left();
1313 if (neighbor_box.
right() > neighbors_right) {
1314 neighbors_right = neighbor_box.
right();
1318 if (neighbors[0] == neighbors[1]) {
1320 neighbors[1] =
nullptr;
1321 y_gaps[1] = std::numeric_limits<int>::max();
1325 if (part_box.
left() < neighbors_left || part_box.
right() > neighbors_right) {
1330 int index = y_gaps[0] < y_gaps[1] ? 0 : 1;
1334 math_blocks->push_back(neighbors[index]);
1343 math_blocks->push_back(neighbors[index]);
1351 ColPartition *nearest_neighbor =
nullptr, *neighbor =
nullptr;
1352 const int kYGapTh =
static_cast<int>(std::round(
resolution_ * 0.5f));
1355 search.SetUniqueMode(
true);
1357 int y = search_bottom ? part_box.
bottom() : part_box.
top();
1359 int min_y_gap = std::numeric_limits<int>::max();
1360 while ((neighbor =
search.NextVerticalSearch(search_bottom)) !=
nullptr) {
1364 const TBOX &neighbor_box(neighbor->bounding_box());
1365 int y_gap = neighbor_box.
y_gap(part_box);
1366 if (y_gap > kYGapTh) {
1370 (search_bottom && neighbor_box.
bottom() > part_box.
bottom()) ||
1371 (!search_bottom && neighbor_box.
top() < part_box.
top())) {
1374 if (y_gap < min_y_gap) {
1376 nearest_neighbor = neighbor;
1380 return nearest_neighbor;
1387 const int kYGapTh =
static_cast<int>(std::round(
resolution_ * 0.1f));
1394 snprintf(page,
sizeof(page),
"%04d",
page_count_);
1400 pix = pixConvertTo32(pixBi);
1405 BLOBNBOX_C_IT blob_it(part->
boxes());
1406 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
1411 pixWrite(outfile.c_str(), pix, IFF_TIFF_LZW);
1422 Box *box = boxCreate(tbox.
left(), pixGetHeight(pix) - tbox.
top(), tbox.
width(), tbox.
height());
1424 pixRenderBoxArb(pix, box, 5, 255, 0, 0);
1426 pixRenderBoxArb(pix, box, 5, 0, 255, 0);
1428 pixRenderBoxArb(pix, box, 5, 0, 0, 255);
1433 pixWrite(outfile.c_str(), pix, IFF_TIFF_LZW);
1441 tprintf(
"Printing special blobs density values for ColParition (t=%d,b=%d) ", h - box.
top(),
#define BOOL_VAR(name, val, comment)
const float kUnclearDensityTh
const int kSeedBlobsCountTh
const int kLeftIndentAlignmentCountTh
bool IsTextOrEquationType(PolyBlockType type)
const float kMathDigitDensityTh2
void tprintf(const char *format,...)
int IntCastRounded(double x)
bool IsRightIndented(const EquationDetect::IndentType type)
LIST search(LIST list, void *key, int_compare is_equal)
bool PTIsTextType(PolyBlockType type)
const float kMathItalicDensityTh
const float kMathDigitDensityTh1
const int kBlnBaselineOffset
bool IsLeftIndented(const EquationDetect::IndentType type)
bool CheckSeedDensity(const float math_density_high, const float math_density_low, const ColPartition *part) const
EquationDetect(const char *equ_datapath, const char *equ_language)
int EstimateTextPartLineSpacing()
void ProcessMathBlockSatelliteParts()
bool CheckForSeed2(const std::vector< int > &indented_texts_left, const float foreground_density_th, ColPartition *part)
bool CheckSeedBlobsCount(ColPartition *part)
void SetResolution(const int resolution)
int LabelSpecialText(TO_BLOCK *to_block) override
int CountAlignment(const std::vector< int > &sorted_vec, const int val) const
bool CheckSeedNeighborDensity(const ColPartition *part) const
bool IsNearMathNeighbor(const int y_gap, const ColPartition *neighbor) const
IndentType IsIndented(ColPartition *part)
void SplitCPHor(ColPartition *part, std::vector< ColPartition * > *parts_splitted)
void SearchByOverlap(ColPartition *seed, std::vector< ColPartition * > *parts_overlap)
ColPartitionGrid * part_grid_
BlobSpecialTextType EstimateTypeForUnichar(const UNICHARSET &unicharset, const UNICHAR_ID id) const
float ComputeForegroundDensity(const TBOX &tbox)
void ExpandSeedVertical(const bool search_bottom, ColPartition *seed, std::vector< ColPartition * > *parts_to_merge)
ColPartitionSet ** best_columns_
void InsertPartAfterAbsorb(ColPartition *part)
ColPartition * SearchNNVertical(const bool search_bottom, const ColPartition *part)
bool IsNearSmallNeighbor(const TBOX &seed_box, const TBOX &part_box) const
void IdentifyInlinePartsVertical(const bool top_to_bottom, const int textPartsLineSpacing)
void ExpandSeedHorizontal(const bool search_left, ColPartition *seed, std::vector< ColPartition * > *parts_to_merge)
bool ExpandSeed(ColPartition *seed)
void PaintSpecialTexts(const std::string &outfile) const
void IdentifyInlineParts()
void ComputeCPsSuperBBox()
~EquationDetect() override
bool IsMathBlockSatellite(ColPartition *part, std::vector< ColPartition * > *math_blocks)
bool IsInline(const bool search_bottom, const int textPartsLineSpacing, ColPartition *part)
std::vector< ColPartition * > cp_seeds_
bool CheckSeedFgDensity(const float density_th, ColPartition *part)
int FindEquationParts(ColPartitionGrid *part_grid, ColPartitionSet **best_columns) override
void IdentifyBlobsToSkip(ColPartition *part)
void PaintColParts(const std::string &outfile) const
Tesseract * lang_tesseract_
void MergePartsByLocation()
void SplitCPHorLite(ColPartition *part, std::vector< TBOX > *splitted_boxes)
void PrintSpecialBlobsDensity(const ColPartition *part) const
void GetOutputTiffName(const char *name, std::string &image_name) const
void SetLangTesseract(Tesseract *lang_tesseract)
void IdentifySpecialText()
void IdentifyInlinePartsHorizontal()
int init_tesseract(const std::string &arg0, const std::string &textbase, const std::string &language, OcrEngineMode oem, char **configs, int configs_size, const std::vector< std::string > *vars_vec, const std::vector< std::string > *vars_values, bool set_only_non_debug_params, TessdataManager *mgr)
int source_resolution() const
const TBOX & bounding_box() const
void set_special_text_type(BlobSpecialTextType new_type)
BlobSpecialTextType special_text_type() const
bool joined_to_prev() const
BLOBNBOX_LIST large_blobs
TBOX bounding_box() const
static TBLOB * PolygonalCopy(bool allow_detailed_fx, C_BLOB *src)
UNICHAR_ID unichar_id() const
int16_t fontinfo_id() const
bool major_y_overlap(const TBOX &box) const
int y_gap(const TBOX &box) const
TDimension height() const
double y_overlap_fraction(const TBOX &box) const
double x_overlap_fraction(const TBOX &box) const
int x_gap(const TBOX &box) const
bool major_x_overlap(const TBOX &box) const
TDimension bottom() const
bool y_overlap(const TBOX &box) const
bool x_overlap(const TBOX &box) const
std::string imagebasename
bool get_isalpha(UNICHAR_ID unichar_id) const
const char * id_to_unichar(UNICHAR_ID id) const
bool get_isdigit(UNICHAR_ID unichar_id) const
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
bool get_ispunctuation(UNICHAR_ID unichar_id) const
void AdaptiveClassifier(TBLOB *Blob, BLOB_CHOICE_LIST *Choices)
UnicityTable< FontInfo > & get_fontinfo_table()
void RepositionIterator()
void GridCoords(int x, int y, int *grid_x, int *grid_y) const
void InsertBBox(bool h_spread, bool v_spread, BBC *bbox)
void RemoveBBox(BBC *bbox)
BlobTextFlowType flow() const
float SpecialBlobsDensity(const BlobSpecialTextType type) const
int SpecialBlobsCount(const BlobSpecialTextType type)
PolyBlockType type() const
ColPartition * CopyButDontOwnBlobs()
ColPartition * SplitAt(int split_x)
BlobRegionType blob_type() const
void set_blob_type(BlobRegionType t)
const TBOX & bounding_box() const
bool IsVerticalType() const
void set_type(PolyBlockType t)
void ComputeSpecialBlobsDensity()
void SetPartitionType(int resolution, ColPartitionSet *columns)
void Absorb(ColPartition *other, const WidthCallback &cb)
void set_flow(BlobTextFlowType f)
static void RenderSpecialText(Image pix, BLOBNBOX *blob)