tesseract v5.3.3.20231005
tesseract::ColumnFinder Class Reference

#include <colfind.h>

Inheritance diagram for tesseract::ColumnFinder:
tesseract::TabFind tesseract::AlignedBlob tesseract::BlobGrid tesseract::BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > tesseract::GridBase

Public Member Functions

 ColumnFinder (int gridsize, const ICOORD &bleft, const ICOORD &tright, int resolution, bool cjk_script, double aligned_gap_fraction, TabVector_LIST *vlines, TabVector_LIST *hlines, int vertical_x, int vertical_y)
 
 ~ColumnFinder () override
 
const DENORMdenorm () const
 
const TextlineProjectionprojection () const
 
void set_cjk_script (bool is_cjk)
 
void SetupAndFilterNoise (PageSegMode pageseg_mode, Image photo_mask_pix, TO_BLOCK *input_block)
 
bool IsVerticallyAlignedText (double find_vertical_text_ratio, TO_BLOCK *block, BLOBNBOX_CLIST *osd_blobs)
 
void CorrectOrientation (TO_BLOCK *block, bool vertical_text_lines, int recognition_rotation)
 
int FindBlocks (PageSegMode pageseg_mode, Image scaled_color, int scaled_factor, TO_BLOCK *block, Image photo_mask_pix, Image thresholds_pix, Image grey_pix, DebugPixa *pixa_debug, BLOCK_LIST *blocks, BLOBNBOX_LIST *diacritic_blobs, TO_BLOCK_LIST *to_blocks)
 
void GetDeskewVectors (FCOORD *deskew, FCOORD *reskew)
 
void SetEquationDetect (EquationDetectBase *detect)
 
- Public Member Functions inherited from tesseract::TabFind
 TabFind (int gridsize, const ICOORD &bleft, const ICOORD &tright, TabVector_LIST *vlines, int vertical_x, int vertical_y, int resolution)
 
 ~TabFind () override
 
void InsertBlobsToGrid (bool h_spread, bool v_spread, BLOBNBOX_LIST *blobs, BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *grid)
 
bool InsertBlob (bool h_spread, bool v_spread, BLOBNBOX *blob, BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *grid)
 
void SetBlockRuleEdges (TO_BLOCK *block)
 
void SetBlobRuleEdges (BLOBNBOX_LIST *blobs)
 
int GutterWidth (int bottom_y, int top_y, const TabVector &v, bool ignore_unmergeables, int max_gutter_width, int *required_shift)
 
void GutterWidthAndNeighbourGap (int tab_x, int mean_height, int max_gutter, bool left, BLOBNBOX *bbox, int *gutter_width, int *neighbour_gap)
 
int RightEdgeForBox (const TBOX &box, bool crossing, bool extended)
 
int LeftEdgeForBox (const TBOX &box, bool crossing, bool extended)
 
TabVectorRightTabForBox (const TBOX &box, bool crossing, bool extended)
 
TabVectorLeftTabForBox (const TBOX &box, bool crossing, bool extended)
 
bool CommonWidth (int width)
 
WidthCallback WidthCB ()
 
const ICOORDimage_origin () const
 
- Public Member Functions inherited from tesseract::AlignedBlob
 AlignedBlob (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
 ~AlignedBlob () override
 
ScrollViewDisplayTabs (const char *window_name, ScrollView *tab_win)
 
TabVectorFindVerticalAlignment (AlignedBlobParams align_params, BLOBNBOX *bbox, int *vertical_x, int *vertical_y)
 
- Public Member Functions inherited from tesseract::BlobGrid
 BlobGrid (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
 ~BlobGrid () override
 
void InsertBlobList (BLOBNBOX_LIST *blobs)
 
- Public Member Functions inherited from tesseract::BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT >
 BBGrid ()
 
 BBGrid (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
 ~BBGrid () override
 
void Init (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
void Clear ()
 
void ClearGridData (void(*free_method)(BLOBNBOX *))
 
void InsertBBox (bool h_spread, bool v_spread, BLOBNBOX *bbox)
 
void InsertPixPtBBox (int left, int bottom, Image pix, BLOBNBOX *bbox)
 
void RemoveBBox (BLOBNBOX *bbox)
 
bool RectangleEmpty (const TBOX &rect)
 
IntGridCountCellElements ()
 
ScrollViewMakeWindow (int x, int y, const char *window_name)
 
void DisplayBoxes (ScrollView *window)
 
void AssertNoDuplicates ()
 
virtual void HandleClick (int x, int y)
 
- Public Member Functions inherited from tesseract::GridBase
 GridBase ()=default
 
 GridBase (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
virtual ~GridBase ()
 
void Init (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
int gridsize () const
 
int gridwidth () const
 
int gridheight () const
 
const ICOORDbleft () const
 
const ICOORDtright () const
 
void GridCoords (int x, int y, int *grid_x, int *grid_y) const
 
void ClipGridCoords (int *x, int *y) const
 

Additional Inherited Members

- Static Public Member Functions inherited from tesseract::TabFind
static bool DifferentSizes (int size1, int size2)
 
static bool VeryDifferentSizes (int size1, int size2)
 
- Static Public Member Functions inherited from tesseract::AlignedBlob
static bool WithinTestRegion (int detail_level, int x, int y)
 
- Protected Member Functions inherited from tesseract::TabFind
TabVector_LIST * vectors ()
 
TabVector_LIST * dead_vectors ()
 
bool FindTabVectors (TabVector_LIST *hlines, BLOBNBOX_LIST *image_blobs, TO_BLOCK *block, int min_gutter_width, double tabfind_aligned_gap_fraction, ColPartitionGrid *part_grid, FCOORD *deskew, FCOORD *reskew)
 
void DontFindTabVectors (BLOBNBOX_LIST *image_blobs, TO_BLOCK *block, FCOORD *deskew, FCOORD *reskew)
 
void TidyBlobs (TO_BLOCK *block)
 
void SetupTabSearch (int x, int y, int *min_key, int *max_key)
 
ScrollViewDisplayTabVectors (ScrollView *tab_win)
 
ScrollViewFindInitialTabVectors (BLOBNBOX_LIST *image_blobs, int min_gutter_width, double tabfind_aligned_gap_fraction, TO_BLOCK *block)
 
void ResetForVerticalText (const FCOORD &rotate, const FCOORD &rerotate, TabVector_LIST *horizontal_lines, int *min_gutter_width)
 
void Reset ()
 
void ReflectInYAxis ()
 
- Static Protected Member Functions inherited from tesseract::TabFind
static void RotateBlobList (const FCOORD &rotation, BLOBNBOX_LIST *blobs)
 
- Protected Attributes inherited from tesseract::TabFind
ICOORD vertical_skew_
 Estimate of true vertical in this image. More...
 
int resolution_
 Of source image in pixels per inch. More...
 
- Protected Attributes inherited from tesseract::BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT >
BLOBNBOX_CLIST * grid_
 
- Protected Attributes inherited from tesseract::GridBase
int gridsize_
 
int gridwidth_
 
int gridheight_
 
int gridbuckets_
 
ICOORD bleft_
 
ICOORD tright_
 

Detailed Description

Definition at line 50 of file colfind.h.

Constructor & Destructor Documentation

◆ ColumnFinder()

tesseract::ColumnFinder::ColumnFinder ( int  gridsize,
const ICOORD bleft,
const ICOORD tright,
int  resolution,
bool  cjk_script,
double  aligned_gap_fraction,
TabVector_LIST *  vlines,
TabVector_LIST *  hlines,
int  vertical_x,
int  vertical_y 
)

Definition at line 78 of file colfind.cpp.

81 : TabFind(gridsize, bleft, tright, vlines, vertical_x, vertical_y, resolution)
82 , cjk_script_(cjk_script)
83 , min_gutter_width_(static_cast<int>(kMinGutterWidthGrid * gridsize))
84 , mean_column_gap_(tright.x() - bleft.x())
85 , tabfind_aligned_gap_fraction_(aligned_gap_fraction)
86 , deskew_(0.0f, 0.0f)
87 , reskew_(1.0f, 0.0f)
88 , rotation_(1.0f, 0.0f)
89 , rerotate_(1.0f, 0.0f)
90 , text_rotation_(0.0f, 0.0f)
91 , best_columns_(nullptr)
92 , stroke_width_(nullptr)
93 , part_grid_(gridsize, bleft, tright)
94 , nontext_map_(nullptr)
95 , projection_(resolution)
96 , denorm_(nullptr)
97 , equation_detect_(nullptr) {
98 TabVector_IT h_it(&horizontal_lines_);
99 h_it.add_list_after(hlines);
100}
const double kMinGutterWidthGrid
Definition: colfind.cpp:53
TDimension x() const
access function
Definition: points.h:58
int gridsize() const
Definition: bbgrid.h:63
const ICOORD & bleft() const
Definition: bbgrid.h:72
const ICOORD & tright() const
Definition: bbgrid.h:75
TabFind(int gridsize, const ICOORD &bleft, const ICOORD &tright, TabVector_LIST *vlines, int vertical_x, int vertical_y, int resolution)
Definition: tabfind.cpp:65

◆ ~ColumnFinder()

tesseract::ColumnFinder::~ColumnFinder ( )
override

Definition at line 102 of file colfind.cpp.

102 {
103 for (auto set : column_sets_) {
104 delete set;
105 }
106 delete[] best_columns_;
107 delete stroke_width_;
108#ifndef GRAPHICS_DISABLED
109 delete input_blobs_win_;
110#endif
111 nontext_map_.destroy();
112 while (denorm_ != nullptr) {
113 DENORM *dead_denorm = denorm_;
114 denorm_ = const_cast<DENORM *>(denorm_->predecessor());
115 delete dead_denorm;
116 }
117
118 // The ColPartitions are destroyed automatically, but any boxes in
119 // the noise_parts_ list are owned and need to be deleted explicitly.
120 ColPartition_IT part_it(&noise_parts_);
121 for (part_it.mark_cycle_pt(); !part_it.cycled_list(); part_it.forward()) {
122 ColPartition *part = part_it.data();
123 part->DeleteBoxes();
124 }
125 // Likewise any boxes in the good_parts_ list need to be deleted.
126 // These are just the image parts. Text parts have already given their
127 // boxes on to the TO_BLOCK, and have empty lists.
128 part_it.set_to_list(&good_parts_);
129 for (part_it.mark_cycle_pt(); !part_it.cycled_list(); part_it.forward()) {
130 ColPartition *part = part_it.data();
131 part->DeleteBoxes();
132 }
133 // Also, any blobs on the image_bblobs_ list need to have their cblobs
134 // deleted. This only happens if there has been an early return from
135 // FindColumns, as in a normal return, the blobs go into the grid and
136 // end up in noise_parts_, good_parts_ or the output blocks.
137 BLOBNBOX_IT bb_it(&image_bblobs_);
138 for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
139 BLOBNBOX *bblob = bb_it.data();
140 delete bblob->cblob();
141 }
142}
void destroy()
Definition: image.cpp:32
const DENORM * predecessor() const
Definition: normalis.h:255

Member Function Documentation

◆ CorrectOrientation()

void tesseract::ColumnFinder::CorrectOrientation ( TO_BLOCK block,
bool  vertical_text_lines,
int  recognition_rotation 
)

Definition at line 202 of file colfind.cpp.

203 {
204 const FCOORD anticlockwise90(0.0f, 1.0f);
205 const FCOORD clockwise90(0.0f, -1.0f);
206 const FCOORD rotation180(-1.0f, 0.0f);
207 const FCOORD norotation(1.0f, 0.0f);
208
209 text_rotation_ = norotation;
210 // Rotate the page to make the text upright, as implied by
211 // recognition_rotation.
212 rotation_ = norotation;
213 if (recognition_rotation == 1) {
214 rotation_ = anticlockwise90;
215 } else if (recognition_rotation == 2) {
216 rotation_ = rotation180;
217 } else if (recognition_rotation == 3) {
218 rotation_ = clockwise90;
219 }
220 // We infer text writing direction to be vertical if there are several
221 // vertical text lines detected, and horizontal if not. But if the page
222 // orientation was determined to be 90 or 270 degrees, the true writing
223 // direction is the opposite of what we inferred.
224 if (recognition_rotation & 1) {
225 vertical_text_lines = !vertical_text_lines;
226 }
227 // If we still believe the writing direction is vertical, we use the
228 // convention of rotating the page ccw 90 degrees to make the text lines
229 // horizontal, and mark the blobs for rotation cw 90 degrees for
230 // classification so that the text order is correct after recognition.
231 if (vertical_text_lines) {
232 rotation_.rotate(anticlockwise90);
233 text_rotation_.rotate(clockwise90);
234 }
235 // Set rerotate_ to the inverse of rotation_.
236 rerotate_ = FCOORD(rotation_.x(), -rotation_.y());
237 if (rotation_.x() != 1.0f || rotation_.y() != 0.0f) {
238 // Rotate all the blobs and tab vectors.
239 RotateBlobList(rotation_, &block->large_blobs);
240 RotateBlobList(rotation_, &block->blobs);
241 RotateBlobList(rotation_, &block->small_blobs);
242 RotateBlobList(rotation_, &block->noise_blobs);
243 TabFind::ResetForVerticalText(rotation_, rerotate_, &horizontal_lines_, &min_gutter_width_);
244 part_grid_.Init(gridsize(), bleft(), tright());
245 // Reset all blobs to initial state and filter by size.
246 // Since they have rotated, the list they belong on could have changed.
247 block->ReSetAndReFilterBlobs();
248 SetBlockRuleEdges(block);
249 stroke_width_->CorrectForRotation(rerotate_, &part_grid_);
250 }
252 tprintf("Vertical=%d, orientation=%d, final rotation=(%f, %f)+(%f,%f)\n", vertical_text_lines,
253 recognition_rotation, rotation_.x(), rotation_.y(), text_rotation_.x(),
254 text_rotation_.y());
255 }
256 // Setup the denormalization.
257 ASSERT_HOST(denorm_ == nullptr);
258 denorm_ = new DENORM;
259 denorm_->SetupNormalization(nullptr, &rotation_, nullptr, 0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f);
260}
#define ASSERT_HOST(x)
Definition: errcode.h:54
void tprintf(const char *format,...)
Definition: tprintf.cpp:41
int textord_debug_tabfind
Definition: alignedblob.cpp:29
void SetupNormalization(const BLOCK *block, const FCOORD *rotation, const DENORM *predecessor, float x_origin, float y_origin, float x_scale, float y_scale, float final_xshift, float final_yshift)
Definition: normalis.cpp:99
void rotate(const FCOORD vec)
Definition: points.h:712
float y() const
Definition: points.h:209
float x() const
Definition: points.h:206
void Init(int gridsize, const ICOORD &bleft, const ICOORD &tright)
Definition: bbgrid.h:488
void CorrectForRotation(const FCOORD &rerotation, ColPartitionGrid *part_grid)
static void RotateBlobList(const FCOORD &rotation, BLOBNBOX_LIST *blobs)
Definition: tabfind.cpp:1278
void ResetForVerticalText(const FCOORD &rotate, const FCOORD &rerotate, TabVector_LIST *horizontal_lines, int *min_gutter_width)
Definition: tabfind.cpp:1323
void SetBlockRuleEdges(TO_BLOCK *block)
Definition: tabfind.cpp:128

◆ denorm()

const DENORM * tesseract::ColumnFinder::denorm ( ) const
inline

Definition at line 67 of file colfind.h.

67 {
68 return denorm_;
69 }

◆ FindBlocks()

int tesseract::ColumnFinder::FindBlocks ( PageSegMode  pageseg_mode,
Image  scaled_color,
int  scaled_factor,
TO_BLOCK block,
Image  photo_mask_pix,
Image  thresholds_pix,
Image  grey_pix,
DebugPixa pixa_debug,
BLOCK_LIST *  blocks,
BLOBNBOX_LIST *  diacritic_blobs,
TO_BLOCK_LIST *  to_blocks 
)

Definition at line 286 of file colfind.cpp.

289 {
290 photo_mask_pix |= nontext_map_;
291 stroke_width_->FindLeaderPartitions(input_block, &part_grid_);
292 stroke_width_->RemoveLineResidue(&big_parts_);
293 FindInitialTabVectors(nullptr, min_gutter_width_, tabfind_aligned_gap_fraction_, input_block);
294 SetBlockRuleEdges(input_block);
295 stroke_width_->GradeBlobsIntoPartitions(pageseg_mode, rerotate_, input_block, nontext_map_,
296 denorm_, cjk_script_, &projection_, diacritic_blobs,
297 &part_grid_, &big_parts_);
298 if (!PSM_SPARSE(pageseg_mode)) {
299 ImageFind::FindImagePartitions(photo_mask_pix, rotation_, rerotate_, input_block, this,
300 pixa_debug, &part_grid_, &big_parts_);
301 ImageFind::TransferImagePartsToImageMask(rerotate_, &part_grid_, photo_mask_pix);
302 ImageFind::FindImagePartitions(photo_mask_pix, rotation_, rerotate_, input_block, this,
303 pixa_debug, &part_grid_, &big_parts_);
304 }
305 part_grid_.ReTypeBlobs(&image_bblobs_);
306 TidyBlobs(input_block);
307 Reset();
308 // TODO(rays) need to properly handle big_parts_.
309 ColPartition_IT p_it(&big_parts_);
310 for (p_it.mark_cycle_pt(); !p_it.cycled_list(); p_it.forward()) {
311 p_it.data()->DisownBoxesNoAssert();
312 }
313 big_parts_.clear();
314 delete stroke_width_;
315 stroke_width_ = nullptr;
316 // Compute the edge offsets whether or not there is a grey_pix. It is done
317 // here as the c_blobs haven't been touched by rotation or anything yet,
318 // so no denorm is required, yet the text has been separated from image, so
319 // no time is wasted running it on image blobs.
320 input_block->ComputeEdgeOffsets(thresholds_pix, grey_pix);
321
322 // A note about handling right-to-left scripts (Hebrew/Arabic):
323 // The columns must be reversed and come out in right-to-left instead of
324 // the normal left-to-right order. Because the left-to-right ordering
325 // is implicit in many data structures, it is simpler to fool the algorithms
326 // into thinking they are dealing with left-to-right text.
327 // To do this, we reflect the needed data in the y-axis and then reflect
328 // the blocks back after they have been created. This is a temporary
329 // arrangement that is confined to this function only, so the reflection
330 // is completely invisible in the output blocks.
331 // The only objects reflected are:
332 // The vertical separator lines that have already been found;
333 // The bounding boxes of all BLOBNBOXES on all lists on the input_block
334 // plus the image_bblobs. The outlines are not touched, since they are
335 // not looked at.
336 bool input_is_rtl = input_block->block->right_to_left();
337 if (input_is_rtl) {
338 // Reflect the vertical separator lines (member of TabFind).
340 // Reflect the blob boxes.
341 ReflectForRtl(input_block, &image_bblobs_);
342 part_grid_.ReflectInYAxis();
343 }
344
345 if (!PSM_SPARSE(pageseg_mode)) {
346 if (!PSM_COL_FIND_ENABLED(pageseg_mode)) {
347 // No tab stops needed. Just the grid that FindTabVectors makes.
348 DontFindTabVectors(&image_bblobs_, input_block, &deskew_, &reskew_);
349 } else {
350 SetBlockRuleEdges(input_block);
351 // Find the tab stops, estimate skew, and deskew the tabs, blobs and
352 // part_grid_.
353 FindTabVectors(&horizontal_lines_, &image_bblobs_, input_block, min_gutter_width_,
354 tabfind_aligned_gap_fraction_, &part_grid_, &deskew_, &reskew_);
355 // Add the deskew to the denorm_.
356 auto *new_denorm = new DENORM;
357 new_denorm->SetupNormalization(nullptr, &deskew_, denorm_, 0.0f, 0.0f, 1.0f, 1.0f, 0.0f,
358 0.0f);
359 denorm_ = new_denorm;
360 }
361 SetBlockRuleEdges(input_block);
362 part_grid_.SetTabStops(this);
363
364 // Make the column_sets_.
365 if (!MakeColumns(false)) {
366 tprintf("Empty page!!\n");
367 part_grid_.DeleteParts();
368 return 0; // This is an empty page.
369 }
370
371 // Refill the grid using rectangular spreading, and get the benefit
372 // of the completed tab vectors marking the rule edges of each blob.
373 Clear();
374#ifndef GRAPHICS_DISABLED
375 if (textord_tabfind_show_reject_blobs) {
376 ScrollView *rej_win = MakeWindow(500, 300, "Rejected blobs");
377 input_block->plot_graded_blobs(rej_win);
378 }
379#endif // !GRAPHICS_DISABLED
380 InsertBlobsToGrid(false, false, &image_bblobs_, this);
381 InsertBlobsToGrid(true, true, &input_block->blobs, this);
382
383 part_grid_.GridFindMargins(best_columns_);
384 // Split and merge the partitions by looking at local neighbours.
385 GridSplitPartitions();
386 // Resolve unknown partitions by adding to an existing partition, fixing
387 // the type, or declaring them noise.
388 part_grid_.GridFindMargins(best_columns_);
389 GridMergePartitions();
390 // Insert any unused noise blobs that are close enough to an appropriate
391 // partition.
392 InsertRemainingNoise(input_block);
393 // Add horizontal line separators as partitions.
394 GridInsertHLinePartitions();
395 GridInsertVLinePartitions();
396 // Recompute margins based on a local neighbourhood search.
397 part_grid_.GridFindMargins(best_columns_);
398 SetPartitionTypes();
399 }
400#ifndef GRAPHICS_DISABLED
401 if (textord_tabfind_show_initial_partitions) {
402 ScrollView *part_win = MakeWindow(100, 300, "InitialPartitions");
403 part_grid_.DisplayBoxes(part_win);
404 DisplayTabVectors(part_win);
405 }
406#endif
407 if (!PSM_SPARSE(pageseg_mode)) {
408#ifndef DISABLED_LEGACY_ENGINE
409 if (equation_detect_) {
410 equation_detect_->FindEquationParts(&part_grid_, best_columns_);
411 }
412#endif
413 if (textord_tabfind_find_tables) {
414 TableFinder table_finder;
415 table_finder.Init(gridsize(), bleft(), tright());
416 table_finder.set_resolution(resolution_);
417 table_finder.set_left_to_right_language(!input_block->block->right_to_left());
418 // Copy cleaned partitions from part_grid_ to clean_part_grid_ and
419 // insert dot-like noise into period_grid_
420 table_finder.InsertCleanPartitions(&part_grid_, input_block);
421 // Get Table Regions
422 table_finder.LocateTables(&part_grid_, best_columns_, WidthCB(), reskew_);
423 }
424 GridRemoveUnderlinePartitions();
425 part_grid_.DeleteUnknownParts(input_block);
426
427 // Build the partitions into chains that belong in the same block and
428 // refine into one-to-one links, then smooth the types within each chain.
429 part_grid_.FindPartitionPartners();
430 part_grid_.FindFigureCaptions();
431 part_grid_.RefinePartitionPartners(true);
432 SmoothPartnerRuns();
433
434#ifndef GRAPHICS_DISABLED
435 if (textord_tabfind_show_partitions) {
436 ScrollView *window = MakeWindow(400, 300, "Partitions");
437 if (window != nullptr) {
438 part_grid_.DisplayBoxes(window);
440 DisplayTabVectors(window);
441 }
442 if (window != nullptr && textord_tabfind_show_partitions > 1) {
443 window->AwaitEvent(SVET_DESTROY);
444 }
445 }
446 }
447#endif // !GRAPHICS_DISABLED
448 part_grid_.AssertNoDuplicates();
449 }
450 // Ownership of the ColPartitions moves from part_sets_ to part_grid_ here,
451 // and ownership of the BLOBNBOXes moves to the ColPartitions.
452 // (They were previously owned by the block or the image_bblobs list.)
453 ReleaseBlobsAndCleanupUnused(input_block);
454 // Ownership of the ColPartitions moves from part_grid_ to good_parts_ and
455 // noise_parts_ here. In text blocks, ownership of the BLOBNBOXes moves
456 // from the ColPartitions to the output TO_BLOCK. In non-text, the
457 // BLOBNBOXes stay with the ColPartitions and get deleted in the destructor.
458 if (PSM_SPARSE(pageseg_mode)) {
459 part_grid_.ExtractPartitionsAsBlocks(blocks, to_blocks);
460 } else {
461 TransformToBlocks(blocks, to_blocks);
462 }
464 tprintf("Found %d blocks, %d to_blocks\n", blocks->length(), to_blocks->length());
465 }
466
467#ifndef GRAPHICS_DISABLED
468 if (textord_tabfind_show_blocks) {
469 DisplayBlocks(blocks);
470 }
471#endif
472 RotateAndReskewBlocks(input_is_rtl, to_blocks);
473 int result = 0;
474#ifndef GRAPHICS_DISABLED
475 if (blocks_win_ != nullptr) {
476 bool waiting = false;
477 do {
478 waiting = false;
479 auto event = blocks_win_->AwaitEvent(SVET_ANY);
480 if (event->type == SVET_INPUT && event->parameter != nullptr) {
481 if (*event->parameter == 'd') {
482 result = -1;
483 } else {
484 blocks->clear();
485 }
486 } else if (event->type == SVET_DESTROY) {
487 blocks_win_ = nullptr;
488 } else {
489 waiting = true;
490 }
491 } while (waiting);
492 }
493#endif // !GRAPHICS_DISABLED
494 return result;
495}
@ SVET_DESTROY
Definition: scrollview.h:54
@ SVET_INPUT
Definition: scrollview.h:58
bool PSM_COL_FIND_ENABLED(int pageseg_mode)
Definition: publictypes.h:192
bool textord_debug_printable
Definition: alignedblob.cpp:43
bool PSM_SPARSE(int pageseg_mode)
Definition: publictypes.h:195
void DisplayBoxes(ScrollView *window)
Definition: bbgrid.h:649
void AssertNoDuplicates()
Definition: bbgrid.h:674
ScrollView * MakeWindow(int x, int y, const char *window_name)
Definition: bbgrid.h:633
void ExtractPartitionsAsBlocks(BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks)
void SetTabStops(TabFind *tabgrid)
void RefinePartitionPartners(bool get_desperate)
void GridFindMargins(ColPartitionSet **best_columns)
void DeleteUnknownParts(TO_BLOCK *block)
void ReTypeBlobs(BLOBNBOX_LIST *im_blobs)
virtual int FindEquationParts(ColPartitionGrid *part_grid, ColPartitionSet **best_columns)=0
static void FindImagePartitions(Image image_pix, const FCOORD &rotation, const FCOORD &rerotation, TO_BLOCK *block, TabFind *tab_grid, DebugPixa *pixa_debug, ColPartitionGrid *part_grid, ColPartition_LIST *big_parts)
Definition: imagefind.cpp:1141
static void TransferImagePartsToImageMask(const FCOORD &rerotation, ColPartitionGrid *part_grid, Image image_mask)
Definition: imagefind.cpp:1088
void RemoveLineResidue(ColPartition_LIST *big_part_list)
void GradeBlobsIntoPartitions(PageSegMode pageseg_mode, const FCOORD &rerotation, TO_BLOCK *block, Image nontext_pix, const DENORM *denorm, bool cjk_script, TextlineProjection *projection, BLOBNBOX_LIST *diacritic_blobs, ColPartitionGrid *part_grid, ColPartition_LIST *big_parts)
void FindLeaderPartitions(TO_BLOCK *block, ColPartitionGrid *part_grid)
void InsertBlobsToGrid(bool h_spread, bool v_spread, BLOBNBOX_LIST *blobs, BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *grid)
Definition: tabfind.cpp:89
void DontFindTabVectors(BLOBNBOX_LIST *image_blobs, TO_BLOCK *block, FCOORD *deskew, FCOORD *reskew)
Definition: tabfind.cpp:449
int resolution_
Of source image in pixels per inch.
Definition: tabfind.h:346
bool FindTabVectors(TabVector_LIST *hlines, BLOBNBOX_LIST *image_blobs, TO_BLOCK *block, int min_gutter_width, double tabfind_aligned_gap_fraction, ColPartitionGrid *part_grid, FCOORD *deskew, FCOORD *reskew)
Definition: tabfind.cpp:422
void TidyBlobs(TO_BLOCK *block)
Definition: tabfind.cpp:462
WidthCallback WidthCB()
Definition: tabfind.h:152
ScrollView * FindInitialTabVectors(BLOBNBOX_LIST *image_blobs, int min_gutter_width, double tabfind_aligned_gap_fraction, TO_BLOCK *block)
Definition: tabfind.cpp:512
ScrollView * DisplayTabVectors(ScrollView *tab_win)
Definition: tabfind.cpp:495
std::unique_ptr< SVEvent > AwaitEvent(SVEventType type)
Definition: scrollview.cpp:432

◆ GetDeskewVectors()

void tesseract::ColumnFinder::GetDeskewVectors ( FCOORD deskew,
FCOORD reskew 
)

Definition at line 498 of file colfind.cpp.

498 {
499 *reskew = reskew_;
500 *deskew = reskew_;
501 deskew->set_y(-deskew->y());
502}
void set_y(float yin)
rewrite function
Definition: points.h:217

◆ IsVerticallyAlignedText()

bool tesseract::ColumnFinder::IsVerticallyAlignedText ( double  find_vertical_text_ratio,
TO_BLOCK block,
BLOBNBOX_CLIST *  osd_blobs 
)

Definition at line 186 of file colfind.cpp.

187 {
188 return stroke_width_->TestVerticalTextDirection(find_vertical_text_ratio, block, osd_blobs);
189}
bool TestVerticalTextDirection(double find_vertical_text_ratio, TO_BLOCK *block, BLOBNBOX_CLIST *osd_blobs)

◆ projection()

const TextlineProjection * tesseract::ColumnFinder::projection ( ) const
inline

Definition at line 70 of file colfind.h.

70 {
71 return &projection_;
72 }

◆ set_cjk_script()

void tesseract::ColumnFinder::set_cjk_script ( bool  is_cjk)
inline

Definition at line 73 of file colfind.h.

73 {
74 cjk_script_ = is_cjk;
75 }

◆ SetEquationDetect()

void tesseract::ColumnFinder::SetEquationDetect ( EquationDetectBase detect)

Definition at line 505 of file colfind.cpp.

505 {
506 equation_detect_ = detect;
507}

◆ SetupAndFilterNoise()

void tesseract::ColumnFinder::SetupAndFilterNoise ( PageSegMode  pageseg_mode,
Image  photo_mask_pix,
TO_BLOCK input_block 
)

Definition at line 151 of file colfind.cpp.

152 {
153 part_grid_.Init(gridsize(), bleft(), tright());
154 delete stroke_width_;
155 stroke_width_ = new StrokeWidth(gridsize(), bleft(), tright());
156 min_gutter_width_ = static_cast<int>(kMinGutterWidthGrid * gridsize());
157 input_block->ReSetAndReFilterBlobs();
158#ifndef GRAPHICS_DISABLED
159 if (textord_tabfind_show_blocks) {
160 input_blobs_win_ = MakeWindow(0, 0, "Filtered Input Blobs");
161 input_block->plot_graded_blobs(input_blobs_win_);
162 }
163#endif // !GRAPHICS_DISABLED
164 SetBlockRuleEdges(input_block);
165 nontext_map_.destroy();
166 // Run a preliminary strokewidth neighbour detection on the medium blobs.
167 stroke_width_->SetNeighboursOnMediumBlobs(input_block);
168 CCNonTextDetect nontext_detect(gridsize(), bleft(), tright());
169 // Remove obvious noise and make the initial non-text map.
170 nontext_map_ =
171 nontext_detect.ComputeNonTextMask(textord_debug_tabfind, photo_mask_pix, input_block);
172 stroke_width_->FindTextlineDirectionAndFixBrokenCJK(pageseg_mode, cjk_script_, input_block);
173 // Clear the strokewidth grid ready for rotation or leader finding.
174 stroke_width_->Clear();
175}
void FindTextlineDirectionAndFixBrokenCJK(PageSegMode pageseg_mode, bool cjk_merge, TO_BLOCK *input_block)
void SetNeighboursOnMediumBlobs(TO_BLOCK *block)

The documentation for this class was generated from the following files: