19#define _USE_MATH_DEFINES
22# include "config_auto.h"
27#include "arrayaccess.h"
52#include <allheaders.h>
61#define MAX_NEAREST_DIST 600
70 int pix_height = pixGetHeight(pix);
72 int width = box.
width();
74 Box *blob_pix_box = boxCreate(box.
left(), pix_height - box.
top(), width, height);
75 Image pix_blob = pixClipRectangle(pix, blob_pix_box,
nullptr);
76 boxDestroy(&blob_pix_box);
77 Image dist_pix = pixDistanceFunction(pix_blob, 4, 8, L_BOUNDARY_BG);
80 uint32_t *data = pixGetData(dist_pix);
81 int wpl = pixGetWpl(dist_pix);
83 STATS h_stats(0, width);
84 for (
int y = 0;
y < height; ++
y) {
85 uint32_t *pixels = data +
y * wpl;
87 int pixel = GET_DATA_BYTE(pixels, 0);
88 for (
int x = 1;
x < width; ++
x) {
89 int next_pixel = GET_DATA_BYTE(pixels,
x);
92 if (prev_pixel < pixel && (
y == 0 || pixel == GET_DATA_BYTE(pixels - wpl,
x - 1)) &&
93 (
y == height - 1 || pixel == GET_DATA_BYTE(pixels + wpl,
x - 1))) {
94 if (pixel > next_pixel) {
96 h_stats.
add(pixel * 2 - 1, 1);
97 }
else if (pixel == next_pixel &&
x + 1 < width && pixel > GET_DATA_BYTE(pixels,
x + 1)) {
99 h_stats.
add(pixel * 2, 1);
107 STATS v_stats(0, height);
108 for (
int x = 0;
x < width; ++
x) {
110 int pixel = GET_DATA_BYTE(data,
x);
111 for (
int y = 1;
y < height; ++
y) {
112 uint32_t *pixels = data +
y * wpl;
113 int next_pixel = GET_DATA_BYTE(pixels,
x);
116 if (prev_pixel < pixel && (
x == 0 || pixel == GET_DATA_BYTE(pixels - wpl,
x - 1)) &&
117 (
x == width - 1 || pixel == GET_DATA_BYTE(pixels - wpl,
x + 1))) {
118 if (pixel > next_pixel) {
120 v_stats.
add(pixel * 2 - 1, 1);
121 }
else if (pixel == next_pixel &&
y + 1 < height &&
122 pixel > GET_DATA_BYTE(pixels + wpl,
x)) {
124 v_stats.
add(pixel * 2, 1);
138 if (h_stats.
get_total() >= (width + height) / 4) {
140 if (v_stats.
get_total() >= (width + height) / 4) {
164 TO_BLOCK_LIST *port_blocks) {
165 BLOCK_IT block_it = blocks;
167 BLOBNBOX_IT port_box_it;
169 TO_BLOCK_IT port_block_it = port_blocks;
171 for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
172 auto block = block_it.data();
173 auto port_block =
new TO_BLOCK(block);
176 port_box_it.set_to_list(&port_block->blobs);
177 blob_it.set_to_list(block->blob_list());
178 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
179 auto blob = blob_it.extract();
181 newblob->set_owns_cblob(
true);
183 port_box_it.add_after_then_move(newblob);
189 port_box_it.set_to_list(&port_block->noise_blobs);
190 blob_it.set_to_list(block->reject_blobs());
191 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
192 auto blob = blob_it.extract();
194 newblob->set_owns_cblob(
true);
196 port_box_it.add_after_then_move(newblob);
199 port_block_it.add_after_then_move(port_block);
212 int width = pixGetWidth(pix);
213 int height = pixGetHeight(pix);
214 if (width > INT16_MAX || height > INT16_MAX) {
215 tprintf(
"Input image too large! (%d, %d)\n", width, height);
219 BLOCK_IT block_it(blocks);
220 for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
221 BLOCK *block = block_it.data();
228 ICOORD page_tr(width, height);
239 TO_BLOCK_LIST *blocks,
241 TO_BLOCK_IT block_it = blocks;
244#ifndef GRAPHICS_DISABLED
250 for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
251 block = block_it.data();
265#ifndef GRAPHICS_DISABLED
266 if (textord_show_blobs && testing_on) {
272 if (textord_show_boxes && testing_on) {
291float Textord::filter_noise_blobs(BLOBNBOX_LIST *src_list,
292 BLOBNBOX_LIST *noise_list,
293 BLOBNBOX_LIST *small_list,
294 BLOBNBOX_LIST *large_list) {
299 BLOBNBOX_IT src_it = src_list;
300 BLOBNBOX_IT noise_it = noise_list;
301 BLOBNBOX_IT small_it = small_list;
302 BLOBNBOX_IT large_it = large_list;
310 for (src_it.mark_cycle_pt(); !src_it.cycled_list(); src_it.forward()) {
311 blob = src_it.data();
313 noise_it.add_after_then_move(src_it.extract());
316 textord_noise_area_ratio) {
317 small_it.add_after_then_move(src_it.extract());
320 for (src_it.mark_cycle_pt(); !src_it.cycled_list(); src_it.forward()) {
321 size_stats.add(src_it.data()->bounding_box().height(), 1);
323 initial_x = size_stats.ile(textord_initialx_ile);
324 max_y = ceil(initial_x *
328 min_y = std::floor(initial_x / 2);
330 small_it.move_to_first();
331 for (small_it.mark_cycle_pt(); !small_it.cycled_list(); small_it.forward()) {
332 height = small_it.data()->bounding_box().height();
333 if (height > max_y) {
334 large_it.add_after_then_move(small_it.extract());
335 }
else if (height >= min_y) {
336 src_it.add_after_then_move(small_it.extract());
340 for (src_it.mark_cycle_pt(); !src_it.cycled_list(); src_it.forward()) {
341 height = src_it.data()->bounding_box().height();
342 width = src_it.data()->bounding_box().width();
343 if (height < min_y) {
344 small_it.add_after_then_move(src_it.extract());
345 }
else if (height > max_y || width > max_x) {
346 large_it.add_after_then_move(src_it.extract());
348 size_stats.add(height, 1);
351 max_height = size_stats.ile(textord_initialasc_ile);
355 if (max_height > initial_x) {
356 initial_x = max_height;
366void Textord::cleanup_nontext_block(BLOCK *block) {
368 ROW_IT row_it(block->row_list());
369 if (row_it.empty()) {
370 const TBOX &box = block->pdblk.bounding_box();
371 float height = box.height();
372 int32_t xstarts[2] = {box.left(), box.right()};
373 double coeffs[3] = {0.0, 0.0,
static_cast<double>(box.bottom())};
374 ROW *row =
new ROW(1, xstarts, coeffs, height / 2.0f, height / 4.0f, height / 4.0f, 0, 1);
375 row_it.add_after_then_move(row);
378 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
379 ROW *row = row_it.data();
380 WERD_IT w_it(row->word_list());
383 TBOX box = block->row_list()->singleton() ? block->pdblk.bounding_box() : row->bounding_box();
386 C_BLOB_IT blob_it(&blobs);
387 blob_it.add_after_then_move(blob);
388 WERD *word =
new WERD(&blobs, 0,
nullptr);
389 w_it.add_after_then_move(word);
392 for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) {
393 WERD *word = w_it.data();
398 row->recalc_bounding_box();
408void Textord::cleanup_blocks(
bool clean_noise, BLOCK_LIST *blocks) {
409 BLOCK_IT block_it = blocks;
413 int num_rows_all = 0;
415 int num_blocks_all = 0;
416 for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
417 BLOCK *block = block_it.data();
418 if (block->pdblk.poly_block() !=
nullptr && !block->pdblk.poly_block()->IsText()) {
419 cleanup_nontext_block(block);
425 row_it.set_to_list(block->row_list());
426 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
427 ROW *row = row_it.data();
429 clean_small_noise_from_words(row);
430 if ((textord_noise_rejrows && !row->word_list()->empty() && clean_noise_from_row(row)) ||
431 row->word_list()->empty()) {
432 delete row_it.extract();
434 if (textord_noise_rejwords) {
435 clean_noise_from_words(row_it.data());
437 if (textord_blshift_maxshift >= 0) {
444 if (block->row_list()->empty()) {
445 delete block_it.extract();
450 if (textord_noise_debug) {
451 tprintf(
"cleanup_blocks: # rows = %d / %d\n", num_rows, num_rows_all);
454 if (textord_noise_debug) {
455 tprintf(
"cleanup_blocks: # blocks = %d / %d\n", num_blocks, num_blocks_all);
465bool Textord::clean_noise_from_row(
474 int32_t trans_count = 0;
475 int32_t trans_threshold;
478 int32_t super_norm_count;
480 WERD_IT word_it = row->word_list();
488 super_norm_count = 0;
489 for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) {
490 word = word_it.data();
492 blob_it.set_to_list(word->cblob_list());
493 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
494 blob = blob_it.data();
497 out_it.set_to_list(blob->out_list());
498 for (out_it.mark_cycle_pt(); !out_it.cycled_list(); out_it.forward()) {
499 outline = out_it.data();
500 blob_box = outline->bounding_box();
501 blob_size = blob_box.width() > blob_box.height() ? blob_box.width() : blob_box.height();
502 if (blob_size < textord_noise_sizelimit * row->x_height()) {
505 if (!outline->child()->empty() &&
506 blob_box.height() < (1 + textord_noise_syfract) * row->x_height() &&
507 blob_box.height() > (1 - textord_noise_syfract) * row->x_height() &&
508 blob_box.width() < (1 + textord_noise_sxfract) * row->x_height() &&
509 blob_box.width() > (1 - textord_noise_sxfract) * row->x_height()) {
516 blob_box = blob->bounding_box();
517 blob_size = blob_box.width() > blob_box.height() ? blob_box.width() : blob_box.height();
518 if (blob_size >= textord_noise_sizelimit * row->x_height() &&
519 blob_size < row->x_height() * 2) {
520 trans_threshold = blob_size / textord_noise_sizefraction;
521 trans_count = blob->count_transitions(trans_threshold);
522 if (trans_count < textord_noise_translimit) {
525 }
else if (blob_box.height() > row->x_height() * 2 &&
526 (!word_it.at_first() || !blob_it.at_first())) {
530 tprintf(
"Blob at (%d,%d) -> (%d,%d), ols=%d, tc=%d, bldiff=%g\n", blob_box.left(),
531 blob_box.bottom(), blob_box.right(), blob_box.top(), blob->out_list()->length(),
532 trans_count, blob_box.bottom() - row->base_line(blob_box.left()));
536 if (textord_noise_debug) {
537 tprintf(
"Row ending at (%d,%g):", blob_box.right(), row->base_line(blob_box.right()));
538 tprintf(
" R=%g, dc=%d, nc=%d, %s\n",
539 norm_count > 0 ?
static_cast<float>(dot_count) / norm_count : 9999, dot_count,
541 dot_count > norm_count * textord_noise_normratio && dot_count > 2 ?
"REJECTED"
544 return super_norm_count < textord_noise_sncount &&
545 dot_count > norm_count * textord_noise_rowratio && dot_count > 2;
554void Textord::clean_noise_from_words(
563 int32_t trans_threshold;
570 WERD_IT word_it = row->word_list();
574 ok_words = word_it.length();
575 if (ok_words == 0 || textord_no_rejects) {
579 std::vector<int8_t> word_dud(ok_words);
583 for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) {
584 word = word_it.data();
588 blob_it.set_to_list(word->cblob_list());
589 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
590 blob = blob_it.data();
593 out_it.set_to_list(blob->out_list());
594 for (out_it.mark_cycle_pt(); !out_it.cycled_list(); out_it.forward()) {
595 outline = out_it.data();
596 blob_box = outline->bounding_box();
597 blob_size = blob_box.width() > blob_box.height() ? blob_box.width() : blob_box.height();
598 if (blob_size < textord_noise_sizelimit * row->x_height()) {
601 if (!outline->child()->empty() &&
602 blob_box.height() < (1 + textord_noise_syfract) * row->x_height() &&
603 blob_box.height() > (1 - textord_noise_syfract) * row->x_height() &&
604 blob_box.width() < (1 + textord_noise_sxfract) * row->x_height() &&
605 blob_box.width() > (1 - textord_noise_sxfract) * row->x_height()) {
612 blob_box = blob->bounding_box();
613 blob_size = blob_box.width() > blob_box.height() ? blob_box.width() : blob_box.height();
614 if (blob_size >= textord_noise_sizelimit * row->x_height() &&
615 blob_size < row->x_height() * 2) {
616 trans_threshold = blob_size / textord_noise_sizefraction;
617 trans_count = blob->count_transitions(trans_threshold);
618 if (trans_count < textord_noise_translimit) {
621 }
else if (blob_box.height() > row->x_height() * 2 &&
622 (!word_it.at_first() || !blob_it.at_first())) {
626 if (dot_count > 2 && !word->flag(
W_REP_CHAR)) {
627 if (dot_count > norm_count * textord_noise_normratio * 2) {
628 word_dud[word_index] = 2;
629 }
else if (dot_count > norm_count * textord_noise_normratio) {
630 word_dud[word_index] = 1;
632 word_dud[word_index] = 0;
635 word_dud[word_index] = 0;
637 if (word_dud[word_index] == 2) {
646 for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) {
647 if (word_dud[word_index] == 2 || (word_dud[word_index] == 1 && dud_words > ok_words)) {
648 word = word_it.data();
652 word->CleanNoise(textord_noise_sizelimit * row->x_height());
660void Textord::clean_small_noise_from_words(ROW *row) {
661 WERD_IT word_it(row->word_list());
662 for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) {
663 WERD *word = word_it.data();
664 int min_size =
static_cast<int>(textord_noise_hfract * word->bounding_box().height() + 0.5);
665 C_BLOB_IT blob_it(word->cblob_list());
666 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
667 C_BLOB *blob = blob_it.data();
668 C_OUTLINE_IT out_it(blob->out_list());
669 for (out_it.mark_cycle_pt(); !out_it.cycled_list(); out_it.forward()) {
670 C_OUTLINE *outline = out_it.data();
671 outline->RemoveSmallRecursive(min_size, &out_it);
673 if (blob->out_list()->empty()) {
674 delete blob_it.extract();
677 if (word->cblob_list()->empty()) {
678 if (!word_it.at_last()) {
681 WERD *next_word = word_it.data_relative(1);
686 delete word_it.extract();
716void Textord::TransferDiacriticsToBlockGroups(BLOBNBOX_LIST *diacritic_blobs, BLOCK_LIST *blocks) {
719 const double kMaxAngleDiff = 0.01;
720 std::vector<std::unique_ptr<BlockGroup>> groups;
721 BLOCK_IT bk_it(blocks);
722 for (bk_it.mark_cycle_pt(); !bk_it.cycled_list(); bk_it.forward()) {
723 BLOCK *block = bk_it.data();
730 float best_angle_diff = FLT_MAX;
731 for (
const auto &
group : groups) {
732 double angle_diff = std::fabs(block_angle -
group->angle);
733 if (angle_diff > M_PI) {
734 angle_diff = fabs(angle_diff - 2.0 * M_PI);
736 if (angle_diff < best_angle_diff) {
737 best_angle_diff = angle_diff;
738 best_g = &
group - &groups[0];
741 if (best_angle_diff > kMaxAngleDiff) {
742 groups.push_back(std::make_unique<BlockGroup>(block));
744 groups[best_g]->blocks.push_back(block);
747 if (x_height < groups[best_g]->min_xheight) {
748 groups[best_g]->min_xheight = x_height;
753 std::vector<std::unique_ptr<WordWithBox>> word_ptrs;
754 for (
const auto &
group : groups) {
755 if (
group->bounding_box.null_box()) {
759 group->bounding_box.topright());
760 for (
auto b :
group->blocks) {
761 ROW_IT row_it(b->row_list());
762 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
763 ROW *row = row_it.data();
765 WERD_IT w_it(row->word_list());
766 for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) {
767 WERD *word = w_it.data();
768 auto box_word = std::make_unique<WordWithBox>(word);
769 word_grid.InsertBBox(
true,
true, box_word.get());
771 word_ptrs.emplace_back(std::move(box_word));
775 FCOORD rotation =
group->rotation;
777 rotation.set_y(-rotation.y());
778 TransferDiacriticsToWords(diacritic_blobs, rotation, &word_grid);
785void Textord::TransferDiacriticsToWords(BLOBNBOX_LIST *diacritic_blobs,
const FCOORD &rotation,
788 BLOBNBOX_IT b_it(diacritic_blobs);
792 for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
793 BLOBNBOX *blobnbox = b_it.data();
794 TBOX blob_box = blobnbox->bounding_box();
795 blob_box.rotate(rotation);
796 ws.StartRectSearch(blob_box);
801 WordWithBox *best_above_word =
nullptr;
802 WordWithBox *best_below_word =
nullptr;
803 int best_above_distance = 0;
804 int best_below_distance = 0;
805 for (WordWithBox *word = ws.NextRectSearch(); word !=
nullptr; word = ws.NextRectSearch()) {
809 TBOX word_box = word->true_bounding_box();
810 int x_distance = blob_box.x_gap(word_box);
811 int y_distance = blob_box.y_gap(word_box);
812 if (x_distance > 0) {
818 if (word_box.major_y_overlap(blob_box) && blob_box.left() > word_box.right()) {
821 y_distance += x_distance;
823 if (word_box.y_middle() > blob_box.y_middle() &&
824 (best_above_word ==
nullptr || y_distance < best_above_distance)) {
825 best_above_word = word;
826 best_above_distance = y_distance;
828 if (word_box.y_middle() <= blob_box.y_middle() &&
829 (best_below_word ==
nullptr || y_distance < best_below_distance)) {
830 best_below_word = word;
831 best_below_distance = y_distance;
834 bool above_good = best_above_word !=
nullptr &&
835 (best_below_word ==
nullptr ||
836 best_above_distance < best_below_distance + blob_box.height());
837 bool below_good = best_below_word !=
nullptr && best_below_word != best_above_word &&
838 (best_above_word ==
nullptr ||
839 best_below_distance < best_above_distance + blob_box.height());
842 copied_blob->rotate(rotation);
844 C_BLOB_IT blob_it(best_below_word->RejBlobs());
845 blob_it.add_to_end(copied_blob);
849 copied_blob->rotate(rotation);
851 C_BLOB_IT blob_it(best_above_word->RejBlobs());
852 blob_it.add_to_end(copied_blob);
878 for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) {
879 word = word_it.data();
883 if (blob_count == 0) {
887 std::vector<int32_t> xstarts(blob_count + row->baseline.segments + 1);
889 std::vector<double> coeffs((blob_count + row->baseline.segments) * 3);
893 xstarts[0] = row->baseline.xcoords[0];
894 for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) {
895 word = word_it.data();
898 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
899 blob = blob_it.data();
901 x_centre = (blob_box.
left() + blob_box.
right()) / 2.0;
908 if (ydiff < blshift_maxshift && blob_box.
height() / row->
x_height() > blshift_xfraction) {
909 if (xstarts[dest_index] >= x_centre) {
910 xstarts[dest_index] = blob_box.
left();
912 coeffs[dest_index * 3] = 0;
913 coeffs[dest_index * 3 + 1] = 0;
914 coeffs[dest_index * 3 + 2] = blob_box.
bottom();
917 xstarts[dest_index] = blob_box.
right() + 1;
919 if (xstarts[dest_index] <= x_centre) {
920 while (row->baseline.xcoords[src_index + 1] <= x_centre &&
921 src_index < row->
baseline.segments - 1) {
922 if (row->baseline.xcoords[src_index + 1] > xstarts[dest_index]) {
923 coeffs[dest_index * 3] = row->baseline.quadratics[src_index].
a;
924 coeffs[dest_index * 3 + 1] = row->baseline.quadratics[src_index].
b;
925 coeffs[dest_index * 3 + 2] = row->baseline.quadratics[src_index].
c;
927 xstarts[dest_index] = row->baseline.xcoords[src_index + 1];
931 coeffs[dest_index * 3] = row->baseline.quadratics[src_index].
a;
932 coeffs[dest_index * 3 + 1] = row->baseline.quadratics[src_index].
b;
933 coeffs[dest_index * 3 + 2] = row->baseline.quadratics[src_index].
c;
935 xstarts[dest_index] = row->baseline.xcoords[src_index + 1];
940 while (src_index < row->
baseline.segments &&
941 row->baseline.xcoords[src_index + 1] <= xstarts[dest_index]) {
944 while (src_index < row->
baseline.segments) {
945 coeffs[dest_index * 3] = row->baseline.quadratics[src_index].
a;
946 coeffs[dest_index * 3 + 1] = row->baseline.quadratics[src_index].
b;
947 coeffs[dest_index * 3 + 2] = row->baseline.quadratics[src_index].
c;
950 xstarts[dest_index] = row->baseline.xcoords[src_index];
953 row->baseline =
QSPLINE(dest_index, &xstarts[0], &coeffs[0]);
@ W_DONT_CHOP
fixed pitch chopped
@ W_REP_CHAR
repeated character
@ W_FUZZY_NON
fuzzy nonspace
BBGrid< WordWithBox, WordWithBox_CLIST, WordWithBox_C_IT > WordGrid
void tprintf(const char *format,...)
double textord_excess_blobsize
void plot_box_list(ScrollView *win, BLOBNBOX_LIST *list, ScrollView::Color body_colour)
void SetBlobStrokeWidth(Image pix, BLOBNBOX *blob)
bool textord_test_landscape
double textord_min_linesize
ScrollView * create_to_win(ICOORD page_tr)
double textord_width_limit
void assign_blobs_to_blocks2(Image pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *port_blocks)
void tweak_row_baseline(ROW *row, double blshift_maxshift, double blshift_xfraction)
void extract_edges(Image pix, BLOCK *block)
GridSearch< WordWithBox, WordWithBox_CLIST, WordWithBox_C_IT > WordSearch
const TBOX & bounding_box() const
int32_t enclosed_area() const
void set_horz_stroke_width(float width)
void set_vert_stroke_width(float width)
BLOBNBOX_LIST small_blobs
void plot_graded_blobs(ScrollView *to_win)
BLOBNBOX_LIST large_blobs
BLOBNBOX_LIST noise_blobs
static const double kXHeightCapRatio
static const double kXHeightFraction
static const double kDescenderFraction
static const double kAscenderFraction
FCOORD re_rotation() const
PDBLK pdblk
Page Description Block.
int32_t x_height() const
return xheight
float base_line(float xpos) const
POLY_BLOCK * poly_block() const
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
float angle() const
find angle
TDimension height() const
TDimension bottom() const
void add(int32_t value, int32_t count)
int32_t get_total() const
double ile(double frac) const
static C_BLOB * FakeBlob(const TBOX &box)
TBOX bounding_box() const
static C_BLOB * deep_copy(const C_BLOB *src)
C_BLOB_LIST * cblob_list()
void filter_blobs(ICOORD page_tr, TO_BLOCK_LIST *blocks, bool testing_on)
void find_components(Image pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks)
std::vector< BLOCK * > blocks