tesseract v5.3.3.20231005
tesseract::TabFind Class Reference

#include <tabfind.h>

Inheritance diagram for tesseract::TabFind:
tesseract::AlignedBlob tesseract::BlobGrid tesseract::BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > tesseract::GridBase tesseract::ColumnFinder

Public Member Functions

 TabFind (int gridsize, const ICOORD &bleft, const ICOORD &tright, TabVector_LIST *vlines, int vertical_x, int vertical_y, int resolution)
 
 ~TabFind () override
 
void InsertBlobsToGrid (bool h_spread, bool v_spread, BLOBNBOX_LIST *blobs, BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *grid)
 
bool InsertBlob (bool h_spread, bool v_spread, BLOBNBOX *blob, BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *grid)
 
void SetBlockRuleEdges (TO_BLOCK *block)
 
void SetBlobRuleEdges (BLOBNBOX_LIST *blobs)
 
int GutterWidth (int bottom_y, int top_y, const TabVector &v, bool ignore_unmergeables, int max_gutter_width, int *required_shift)
 
void GutterWidthAndNeighbourGap (int tab_x, int mean_height, int max_gutter, bool left, BLOBNBOX *bbox, int *gutter_width, int *neighbour_gap)
 
int RightEdgeForBox (const TBOX &box, bool crossing, bool extended)
 
int LeftEdgeForBox (const TBOX &box, bool crossing, bool extended)
 
TabVectorRightTabForBox (const TBOX &box, bool crossing, bool extended)
 
TabVectorLeftTabForBox (const TBOX &box, bool crossing, bool extended)
 
bool CommonWidth (int width)
 
WidthCallback WidthCB ()
 
const ICOORDimage_origin () const
 
- Public Member Functions inherited from tesseract::AlignedBlob
 AlignedBlob (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
 ~AlignedBlob () override
 
ScrollViewDisplayTabs (const char *window_name, ScrollView *tab_win)
 
TabVectorFindVerticalAlignment (AlignedBlobParams align_params, BLOBNBOX *bbox, int *vertical_x, int *vertical_y)
 
- Public Member Functions inherited from tesseract::BlobGrid
 BlobGrid (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
 ~BlobGrid () override
 
void InsertBlobList (BLOBNBOX_LIST *blobs)
 
- Public Member Functions inherited from tesseract::BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT >
 BBGrid ()
 
 BBGrid (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
 ~BBGrid () override
 
void Init (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
void Clear ()
 
void ClearGridData (void(*free_method)(BLOBNBOX *))
 
void InsertBBox (bool h_spread, bool v_spread, BLOBNBOX *bbox)
 
void InsertPixPtBBox (int left, int bottom, Image pix, BLOBNBOX *bbox)
 
void RemoveBBox (BLOBNBOX *bbox)
 
bool RectangleEmpty (const TBOX &rect)
 
IntGridCountCellElements ()
 
ScrollViewMakeWindow (int x, int y, const char *window_name)
 
void DisplayBoxes (ScrollView *window)
 
void AssertNoDuplicates ()
 
virtual void HandleClick (int x, int y)
 
- Public Member Functions inherited from tesseract::GridBase
 GridBase ()=default
 
 GridBase (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
virtual ~GridBase ()
 
void Init (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
int gridsize () const
 
int gridwidth () const
 
int gridheight () const
 
const ICOORDbleft () const
 
const ICOORDtright () const
 
void GridCoords (int x, int y, int *grid_x, int *grid_y) const
 
void ClipGridCoords (int *x, int *y) const
 

Static Public Member Functions

static bool DifferentSizes (int size1, int size2)
 
static bool VeryDifferentSizes (int size1, int size2)
 
- Static Public Member Functions inherited from tesseract::AlignedBlob
static bool WithinTestRegion (int detail_level, int x, int y)
 

Protected Member Functions

TabVector_LIST * vectors ()
 
TabVector_LIST * dead_vectors ()
 
bool FindTabVectors (TabVector_LIST *hlines, BLOBNBOX_LIST *image_blobs, TO_BLOCK *block, int min_gutter_width, double tabfind_aligned_gap_fraction, ColPartitionGrid *part_grid, FCOORD *deskew, FCOORD *reskew)
 
void DontFindTabVectors (BLOBNBOX_LIST *image_blobs, TO_BLOCK *block, FCOORD *deskew, FCOORD *reskew)
 
void TidyBlobs (TO_BLOCK *block)
 
void SetupTabSearch (int x, int y, int *min_key, int *max_key)
 
ScrollViewDisplayTabVectors (ScrollView *tab_win)
 
ScrollViewFindInitialTabVectors (BLOBNBOX_LIST *image_blobs, int min_gutter_width, double tabfind_aligned_gap_fraction, TO_BLOCK *block)
 
void ResetForVerticalText (const FCOORD &rotate, const FCOORD &rerotate, TabVector_LIST *horizontal_lines, int *min_gutter_width)
 
void Reset ()
 
void ReflectInYAxis ()
 

Static Protected Member Functions

static void RotateBlobList (const FCOORD &rotation, BLOBNBOX_LIST *blobs)
 

Protected Attributes

ICOORD vertical_skew_
 Estimate of true vertical in this image. More...
 
int resolution_
 Of source image in pixels per inch. More...
 
- Protected Attributes inherited from tesseract::BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT >
BLOBNBOX_CLIST * grid_
 
- Protected Attributes inherited from tesseract::GridBase
int gridsize_
 
int gridwidth_
 
int gridheight_
 
int gridbuckets_
 
ICOORD bleft_
 
ICOORD tright_
 

Detailed Description

The TabFind class contains code to find tab-stops and maintain the vectors_ list of tab vectors. Also provides an interface to find neighbouring blobs in the grid of BLOBNBOXes that is used by multiple subclasses. Searching is a complex operation because of the need to enforce rule/separator lines, and tabstop boundaries, (when available), so as the holder of the list of TabVectors this class provides the functions.

Definition at line 52 of file tabfind.h.

Constructor & Destructor Documentation

◆ TabFind()

tesseract::TabFind::TabFind ( int  gridsize,
const ICOORD bleft,
const ICOORD tright,
TabVector_LIST *  vlines,
int  vertical_x,
int  vertical_y,
int  resolution 
)

Definition at line 65 of file tabfind.cpp.

68 , resolution_(resolution)
69 , image_origin_(0, tright.y() - 1)
70 , v_it_(&vectors_) {
71 width_cb_ = nullptr;
72 v_it_.add_list_after(vlines);
73 SetVerticalSkewAndParallelize(vertical_x, vertical_y);
74 using namespace std::placeholders; // for _1
75 width_cb_ = std::bind(&TabFind::CommonWidth, this, _1);
76}
TDimension y() const
access_function
Definition: points.h:62
AlignedBlob(int gridsize, const ICOORD &bleft, const ICOORD &tright)
int gridsize() const
Definition: bbgrid.h:63
const ICOORD & bleft() const
Definition: bbgrid.h:72
const ICOORD & tright() const
Definition: bbgrid.h:75
bool CommonWidth(int width)
Definition: tabfind.cpp:393
int resolution_
Of source image in pixels per inch.
Definition: tabfind.h:346

◆ ~TabFind()

tesseract::TabFind::~TabFind ( )
overridedefault

Member Function Documentation

◆ CommonWidth()

bool tesseract::TabFind::CommonWidth ( int  width)

Return true if the given width is close to one of the common widths in column_widths_.

Definition at line 393 of file tabfind.cpp.

393 {
394 width /= kColumnWidthFactor;
395 ICOORDELT_IT it(&column_widths_);
396 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
397 ICOORDELT *w = it.data();
398 if (w->x() - 1 <= width && width <= w->y() + 1) {
399 return true;
400 }
401 }
402 return false;
403}
const double y
const int kColumnWidthFactor
Definition: tabfind.h:41

◆ dead_vectors()

TabVector_LIST * tesseract::TabFind::dead_vectors ( )
inlineprotected

Definition at line 170 of file tabfind.h.

170 {
171 return &dead_vectors_;
172 }

◆ DifferentSizes()

bool tesseract::TabFind::DifferentSizes ( int  size1,
int  size2 
)
static

Return true if the sizes are more than a factor of 2 different.

Definition at line 407 of file tabfind.cpp.

407 {
408 return size1 > size2 * 2 || size2 > size1 * 2;
409}

◆ DisplayTabVectors()

ScrollView * tesseract::TabFind::DisplayTabVectors ( ScrollView tab_win)
protected

Display the tab vectors found in this grid.

Definition at line 495 of file tabfind.cpp.

495 {
496 // For every vector, display it.
497 TabVector_IT it(&vectors_);
498 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
499 TabVector *vector = it.data();
500 vector->Display(tab_win);
501 }
502 tab_win->Update();
503 return tab_win;
504}

◆ DontFindTabVectors()

void tesseract::TabFind::DontFindTabVectors ( BLOBNBOX_LIST *  image_blobs,
TO_BLOCK block,
FCOORD deskew,
FCOORD reskew 
)
protected

Definition at line 449 of file tabfind.cpp.

450 {
451 InsertBlobsToGrid(false, false, image_blobs, this);
452 InsertBlobsToGrid(true, false, &block->blobs, this);
453 deskew->set_x(1.0f);
454 deskew->set_y(0.0f);
455 reskew->set_x(1.0f);
456 reskew->set_y(0.0f);
457}
void InsertBlobsToGrid(bool h_spread, bool v_spread, BLOBNBOX_LIST *blobs, BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *grid)
Definition: tabfind.cpp:89

◆ FindInitialTabVectors()

ScrollView * tesseract::TabFind::FindInitialTabVectors ( BLOBNBOX_LIST *  image_blobs,
int  min_gutter_width,
double  tabfind_aligned_gap_fraction,
TO_BLOCK block 
)
protected

Definition at line 512 of file tabfind.cpp.

513 {
514#ifndef GRAPHICS_DISABLED
515 if (textord_tabfind_show_initialtabs) {
516 ScrollView *line_win = MakeWindow(0, 0, "VerticalLines");
517 line_win = DisplayTabVectors(line_win);
518 }
519#endif
520 // Prepare the grid.
521 if (image_blobs != nullptr) {
522 InsertBlobsToGrid(true, false, image_blobs, this);
523 }
524 InsertBlobsToGrid(true, false, &block->blobs, this);
525 ScrollView *initial_win = FindTabBoxes(min_gutter_width, tabfind_aligned_gap_fraction);
526 FindAllTabVectors(min_gutter_width);
527
529 SortVectors();
530 EvaluateTabs();
531#ifndef GRAPHICS_DISABLED
532 if (textord_tabfind_show_initialtabs && initial_win != nullptr) {
533 initial_win = DisplayTabVectors(initial_win);
534 }
535#endif
536 MarkVerticalText();
537 return initial_win;
538}
ScrollView * MakeWindow(int x, int y, const char *window_name)
Definition: bbgrid.h:633
ICOORD vertical_skew_
Estimate of true vertical in this image.
Definition: tabfind.h:345
ScrollView * DisplayTabVectors(ScrollView *tab_win)
Definition: tabfind.cpp:495
static void MergeSimilarTabVectors(const ICOORD &vertical, TabVector_LIST *vectors, BlobGrid *grid)
Definition: tabvector.cpp:352

◆ FindTabVectors()

bool tesseract::TabFind::FindTabVectors ( TabVector_LIST *  hlines,
BLOBNBOX_LIST *  image_blobs,
TO_BLOCK block,
int  min_gutter_width,
double  tabfind_aligned_gap_fraction,
ColPartitionGrid part_grid,
FCOORD deskew,
FCOORD reskew 
)
protected

Top-level function to find TabVectors in an input page block. Returns false if the detected skew angle is impossible. Applies the detected skew angle to deskew the tabs, blobs and part_grid. tabfind_aligned_gap_fraction should be the value of parameter textord_tabfind_aligned_gap_fraction

Definition at line 422 of file tabfind.cpp.

424 {
425 ScrollView *tab_win =
426 FindInitialTabVectors(image_blobs, min_gutter_width, tabfind_aligned_gap_fraction, block);
427 ComputeColumnWidths(tab_win, part_grid);
429 SortVectors();
430 CleanupTabs();
431 if (!Deskew(hlines, image_blobs, block, deskew, reskew)) {
432 return false; // Skew angle is too large.
433 }
434 part_grid->Deskew(*deskew);
435 ApplyTabConstraints();
436#ifndef GRAPHICS_DISABLED
437 if (textord_tabfind_show_finaltabs) {
438 tab_win = MakeWindow(640, 50, "FinalTabs");
439 DisplayBoxes(tab_win);
440 DisplayTabs("FinalTabs", tab_win);
441 tab_win = DisplayTabVectors(tab_win);
442 }
443#endif // !GRAPHICS_DISABLED
444 return true;
445}
ScrollView * DisplayTabs(const char *window_name, ScrollView *tab_win)
ScrollView * FindInitialTabVectors(BLOBNBOX_LIST *image_blobs, int min_gutter_width, double tabfind_aligned_gap_fraction, TO_BLOCK *block)
Definition: tabfind.cpp:512

◆ GutterWidth()

int tesseract::TabFind::GutterWidth ( int  bottom_y,
int  top_y,
const TabVector v,
bool  ignore_unmergeables,
int  max_gutter_width,
int *  required_shift 
)

Definition at line 156 of file tabfind.cpp.

157 {
158 bool right_to_left = v.IsLeftTab();
159 int bottom_x = v.XAtY(bottom_y);
160 int top_x = v.XAtY(top_y);
161 int start_x = right_to_left ? std::max(top_x, bottom_x) : std::min(top_x, bottom_x);
162 BlobGridSearch sidesearch(this);
163 sidesearch.StartSideSearch(start_x, bottom_y, top_y);
164 int min_gap = max_gutter_width;
165 *required_shift = 0;
166 BLOBNBOX *blob = nullptr;
167 while ((blob = sidesearch.NextSideSearch(right_to_left)) != nullptr) {
168 const TBOX &box = blob->bounding_box();
169 if (box.bottom() >= top_y || box.top() <= bottom_y) {
170 continue; // Doesn't overlap enough.
171 }
172 if (box.height() >= gridsize() * 2 && box.height() > box.width() * kLineFragmentAspectRatio) {
173 // Skip likely separator line residue.
174 continue;
175 }
176 if (ignore_unmergeables && BLOBNBOX::UnMergeableType(blob->region_type())) {
177 continue; // Skip non-text if required.
178 }
179 int mid_y = (box.bottom() + box.top()) / 2;
180 // We use the x at the mid-y so that the required_shift guarantees
181 // to clear all the blobs on the tab-stop. If we use the min/max
182 // of x at top/bottom of the blob, then exactness would be required,
183 // which is not a good thing.
184 int tab_x = v.XAtY(mid_y);
185 int gap;
186 if (right_to_left) {
187 gap = tab_x - box.right();
188 if (gap < 0 && box.left() - tab_x < *required_shift) {
189 *required_shift = box.left() - tab_x;
190 }
191 } else {
192 gap = box.left() - tab_x;
193 if (gap < 0 && box.right() - tab_x > *required_shift) {
194 *required_shift = box.right() - tab_x;
195 }
196 }
197 if (gap > 0 && gap < min_gap) {
198 min_gap = gap;
199 }
200 }
201 // Result may be negative, in which case, this is a really bad tabstop.
202 return min_gap - abs(*required_shift);
203}
@ TBOX
STL namespace.
const double kLineFragmentAspectRatio
Definition: tabfind.cpp:54
GridSearch< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > BlobGridSearch
Definition: blobgrid.h:30
static bool UnMergeableType(BlobRegionType type)
Definition: blobbox.h:447

◆ GutterWidthAndNeighbourGap()

void tesseract::TabFind::GutterWidthAndNeighbourGap ( int  tab_x,
int  mean_height,
int  max_gutter,
bool  left,
BLOBNBOX bbox,
int *  gutter_width,
int *  neighbour_gap 
)

Find the gutter width and distance to inner neighbour for the given blob.

Definition at line 206 of file tabfind.cpp.

207 {
208 const TBOX &box = bbox->bounding_box();
209 // The gutter and internal sides of the box.
210 int gutter_x = left ? box.left() : box.right();
211 int internal_x = left ? box.right() : box.left();
212 // On ragged edges, the gutter side of the box is away from the tabstop.
213 int tab_gap = left ? gutter_x - tab_x : tab_x - gutter_x;
214 *gutter_width = max_gutter;
215 // If the box is away from the tabstop, we need to increase
216 // the allowed gutter width.
217 if (tab_gap > 0) {
218 *gutter_width += tab_gap;
219 }
220 bool debug = WithinTestRegion(2, box.left(), box.bottom());
221 if (debug) {
222 tprintf("Looking in gutter\n");
223 }
224 // Find the nearest blob on the outside of the column.
225 BLOBNBOX *gutter_bbox = AdjacentBlob(bbox, left, bbox->flow() == BTFT_TEXT_ON_IMAGE, 0.0,
226 *gutter_width, box.top(), box.bottom());
227 if (gutter_bbox != nullptr) {
228 const TBOX &gutter_box = gutter_bbox->bounding_box();
229 *gutter_width = left ? tab_x - gutter_box.right() : gutter_box.left() - tab_x;
230 }
231 if (*gutter_width >= max_gutter) {
232 // If there is no box because a tab was in the way, get the tab coord.
233 TBOX gutter_box(box);
234 if (left) {
235 gutter_box.set_left(tab_x - max_gutter - 1);
236 gutter_box.set_right(tab_x - max_gutter);
237 int tab_gutter = RightEdgeForBox(gutter_box, true, false);
238 if (tab_gutter < tab_x - 1) {
239 *gutter_width = tab_x - tab_gutter;
240 }
241 } else {
242 gutter_box.set_left(tab_x + max_gutter);
243 gutter_box.set_right(tab_x + max_gutter + 1);
244 int tab_gutter = LeftEdgeForBox(gutter_box, true, false);
245 if (tab_gutter > tab_x + 1) {
246 *gutter_width = tab_gutter - tab_x;
247 }
248 }
249 }
250 if (*gutter_width > max_gutter) {
251 *gutter_width = max_gutter;
252 }
253 // Now look for a neighbour on the inside.
254 if (debug) {
255 tprintf("Looking for neighbour\n");
256 }
257 BLOBNBOX *neighbour = AdjacentBlob(bbox, !left, bbox->flow() == BTFT_TEXT_ON_IMAGE, 0.0,
258 *gutter_width, box.top(), box.bottom());
259 int neighbour_edge = left ? RightEdgeForBox(box, true, false) : LeftEdgeForBox(box, true, false);
260 if (neighbour != nullptr) {
261 const TBOX &n_box = neighbour->bounding_box();
262 if (debug) {
263 tprintf("Found neighbour:");
264 n_box.print();
265 }
266 if (left && n_box.left() < neighbour_edge) {
267 neighbour_edge = n_box.left();
268 } else if (!left && n_box.right() > neighbour_edge) {
269 neighbour_edge = n_box.right();
270 }
271 }
272 *neighbour_gap = left ? neighbour_edge - internal_x : internal_x - neighbour_edge;
273}
void tprintf(const char *format,...)
Definition: tprintf.cpp:41
@ BTFT_TEXT_ON_IMAGE
Definition: blobbox.h:116
static bool WithinTestRegion(int detail_level, int x, int y)
int LeftEdgeForBox(const TBOX &box, bool crossing, bool extended)
Definition: tabfind.cpp:284
int RightEdgeForBox(const TBOX &box, bool crossing, bool extended)
Definition: tabfind.cpp:279

◆ image_origin()

const ICOORD & tesseract::TabFind::image_origin ( ) const
inline

Return the coords at which to draw the image backdrop.

Definition at line 159 of file tabfind.h.

159 {
160 return image_origin_;
161 }

◆ InsertBlob()

bool tesseract::TabFind::InsertBlob ( bool  h_spread,
bool  v_spread,
BLOBNBOX blob,
BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *  grid 
)

Insert a single blob into the given grid (not necessarily this). If h_spread, then all cells covered horizontally by the box are used, otherwise, just the bottom-left. Similarly for v_spread. A side effect is that the left and right rule edges of the blob are set according to the tab vectors in this (not grid).

Definition at line 113 of file tabfind.cpp.

114 {
115 TBOX box = blob->bounding_box();
116 blob->set_left_rule(LeftEdgeForBox(box, false, false));
117 blob->set_right_rule(RightEdgeForBox(box, false, false));
118 blob->set_left_crossing_rule(LeftEdgeForBox(box, true, false));
119 blob->set_right_crossing_rule(RightEdgeForBox(box, true, false));
120 if (blob->joined_to_prev()) {
121 return false;
122 }
123 grid->InsertBBox(h_spread, v_spread, blob);
124 return true;
125}
void InsertBBox(bool h_spread, bool v_spread, BBC *bbox)
Definition: bbgrid.h:529

◆ InsertBlobsToGrid()

void tesseract::TabFind::InsertBlobsToGrid ( bool  h_spread,
bool  v_spread,
BLOBNBOX_LIST *  blobs,
BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *  grid 
)

Insert a list of blobs into the given grid (not necessarily this). See InsertBlob for the other arguments. It would seem to make more sense to swap this and grid, but this way around allows grid to not be derived from TabFind, eg a ColPartitionGrid, while the grid that provides the tab stops(this) has to be derived from TabFind.

Definition at line 89 of file tabfind.cpp.

90 {
91 BLOBNBOX_IT blob_it(blobs);
92 int b_count = 0;
93 int reject_count = 0;
94 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
95 BLOBNBOX *blob = blob_it.data();
96 // if (InsertBlob(true, true, blob, grid)) {
97 if (InsertBlob(h_spread, v_spread, blob, grid)) {
98 ++b_count;
99 } else {
100 ++reject_count;
101 }
102 }
104 tprintf("Inserted %d blobs into grid, %d rejected.\n", b_count, reject_count);
105 }
106}
int textord_debug_tabfind
Definition: alignedblob.cpp:29
bool InsertBlob(bool h_spread, bool v_spread, BLOBNBOX *blob, BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *grid)
Definition: tabfind.cpp:113

◆ LeftEdgeForBox()

int tesseract::TabFind::LeftEdgeForBox ( const TBOX box,
bool  crossing,
bool  extended 
)

As RightEdgeForBox, but finds the left Edge instead.

Definition at line 284 of file tabfind.cpp.

284 {
285 TabVector *v = LeftTabForBox(box, crossing, extended);
286 return v == nullptr ? bleft_.x() : v->XAtY((box.top() + box.bottom()) / 2);
287}
TDimension x() const
access function
Definition: points.h:58
TabVector * LeftTabForBox(const TBOX &box, bool crossing, bool extended)
Definition: tabfind.cpp:347

◆ LeftTabForBox()

TabVector * tesseract::TabFind::LeftTabForBox ( const TBOX box,
bool  crossing,
bool  extended 
)

As RightTabForBox, but finds the left TabVector instead.

Definition at line 347 of file tabfind.cpp.

347 {
348 if (v_it_.empty()) {
349 return nullptr;
350 }
351 int top_y = box.top();
352 int bottom_y = box.bottom();
353 int mid_y = (top_y + bottom_y) / 2;
354 int left = crossing ? (box.left() + box.right()) / 2 : box.left();
355 int min_key, max_key;
356 SetupTabSearch(left, mid_y, &min_key, &max_key);
357 // Position the iterator at the last TabVector with sort_key <= max_key.
358 while (!v_it_.at_last() && v_it_.data()->sort_key() <= max_key) {
359 v_it_.forward();
360 }
361 while (!v_it_.at_first() && v_it_.data()->sort_key() > max_key) {
362 v_it_.backward();
363 }
364 // Find the rightmost tab vector that overlaps and has XAtY(mid_y) <= left.
365 TabVector *best_v = nullptr;
366 int best_x = -1;
367 int key_limit = -1;
368 do {
369 TabVector *v = v_it_.data();
370 int x = v->XAtY(mid_y);
371 if (x <= left && (v->VOverlap(top_y, bottom_y) > 0 ||
372 (extended && v->ExtendedOverlap(top_y, bottom_y) > 0))) {
373 if (best_v == nullptr || x > best_x) {
374 best_v = v;
375 best_x = x;
376 // We can guarantee that no better vector can be found if the
377 // sort key is less than that of the best by max_key - min_key.
378 key_limit = v->sort_key() - (max_key - min_key);
379 }
380 }
381 // Break when the search is done to avoid wrapping the iterator and
382 // thereby potentially slowing the next search.
383 if (v_it_.at_first() || (best_v != nullptr && v->sort_key() < key_limit)) {
384 break; // Prevent restarting list for next call.
385 }
386 v_it_.backward();
387 } while (!v_it_.at_last());
388 return best_v;
389}
void SetupTabSearch(int x, int y, int *min_key, int *max_key)
Definition: tabfind.cpp:486

◆ ReflectInYAxis()

void tesseract::TabFind::ReflectInYAxis ( )
protected

Definition at line 1380 of file tabfind.cpp.

1380 {
1381 TabVector_LIST temp_list;
1382 TabVector_IT temp_it(&temp_list);
1383 v_it_.move_to_first();
1384 // The TabVector list only contains vertical lines, but they need to be
1385 // reflected and the list needs to be reversed, so they are still in
1386 // sort_key order.
1387 while (!v_it_.empty()) {
1388 TabVector *v = v_it_.extract();
1389 v_it_.forward();
1390 v->ReflectInYAxis();
1391 temp_it.add_before_then_move(v);
1392 }
1393 v_it_.add_list_after(&temp_list);
1394 v_it_.move_to_first();
1395 // Reset this grid with reflected bounding boxes.
1396 TBOX grid_box(bleft(), tright());
1397 int tmp = grid_box.left();
1398 grid_box.set_left(-grid_box.right());
1399 grid_box.set_right(-tmp);
1400 Init(gridsize(), grid_box.botleft(), grid_box.topright());
1401}
void Init(int gridsize, const ICOORD &bleft, const ICOORD &tright)
Definition: bbgrid.h:488

◆ Reset()

void tesseract::TabFind::Reset ( )
protected

Definition at line 1368 of file tabfind.cpp.

1368 {
1369 v_it_.move_to_first();
1370 for (v_it_.mark_cycle_pt(); !v_it_.cycled_list(); v_it_.forward()) {
1371 if (!v_it_.data()->IsSeparator()) {
1372 delete v_it_.extract();
1373 }
1374 }
1375 Clear();
1376}

◆ ResetForVerticalText()

void tesseract::TabFind::ResetForVerticalText ( const FCOORD rotate,
const FCOORD rerotate,
TabVector_LIST *  horizontal_lines,
int *  min_gutter_width 
)
protected

Definition at line 1323 of file tabfind.cpp.

1324 {
1325 // Rotate the horizontal and vertical vectors and swap them over.
1326 // Only the separators are kept and rotated; other tabs are used
1327 // to estimate the gutter width then thrown away.
1328 TabVector_LIST ex_verticals;
1329 TabVector_IT ex_v_it(&ex_verticals);
1330 TabVector_LIST vlines;
1331 TabVector_IT v_it(&vlines);
1332 while (!v_it_.empty()) {
1333 TabVector *v = v_it_.extract();
1334 if (v->IsSeparator()) {
1335 v->Rotate(rotate);
1336 ex_v_it.add_after_then_move(v);
1337 } else {
1338 v_it.add_after_then_move(v);
1339 }
1340 v_it_.forward();
1341 }
1342
1343 // Adjust the min gutter width for better tabbox selection
1344 // in 2nd call to FindInitialTabVectors().
1345 int median_gutter = FindMedianGutterWidth(&vlines);
1346 if (median_gutter > *min_gutter_width) {
1347 *min_gutter_width = median_gutter;
1348 }
1349
1350 TabVector_IT h_it(horizontal_lines);
1351 for (h_it.mark_cycle_pt(); !h_it.cycled_list(); h_it.forward()) {
1352 TabVector *h = h_it.data();
1353 h->Rotate(rotate);
1354 }
1355 v_it_.add_list_after(horizontal_lines);
1356 v_it_.move_to_first();
1357 h_it.set_to_list(horizontal_lines);
1358 h_it.add_list_after(&ex_verticals);
1359
1360 // Rebuild the grid to the new size.
1361 TBOX grid_box(bleft(), tright());
1362 grid_box.rotate_large(rotate);
1363 Init(gridsize(), grid_box.botleft(), grid_box.topright());
1364}

◆ RightEdgeForBox()

int tesseract::TabFind::RightEdgeForBox ( const TBOX box,
bool  crossing,
bool  extended 
)

Return the x-coord that corresponds to the right edge for the given box. If there is a rule line to the right that vertically overlaps it, then return the x-coord of the rule line, otherwise return the right edge of the page. For details see RightTabForBox below.

Definition at line 279 of file tabfind.cpp.

279 {
280 TabVector *v = RightTabForBox(box, crossing, extended);
281 return v == nullptr ? tright_.x() : v->XAtY((box.top() + box.bottom()) / 2);
282}
ICOORD tright_
Definition: bbgrid.h:91
TabVector * RightTabForBox(const TBOX &box, bool crossing, bool extended)
Definition: tabfind.cpp:302

◆ RightTabForBox()

TabVector * tesseract::TabFind::RightTabForBox ( const TBOX box,
bool  crossing,
bool  extended 
)

Return the TabVector that corresponds to the right edge for the given box. If there is a TabVector to the right that vertically overlaps it, then return it, otherwise return nullptr. Note that Right and Left refer to the position of the TabVector, not its type, ie RightTabForBox returns the nearest TabVector to the right of the box, regardless of its type. If a TabVector crosses right through the box (as opposed to grazing one edge or missing entirely), then crossing false will ignore such a line. Crossing true will return the line for BOTH left and right edges. If extended is true, then TabVectors are considered to extend to their extended_start/end_y, otherwise, just the startpt_ and endpt_. These functions make use of an internal iterator to the vectors_ list for speed when used repeatedly on neighbouring boxes. The caveat is that the iterator must be updated whenever the list is modified.

Definition at line 302 of file tabfind.cpp.

302 {
303 if (v_it_.empty()) {
304 return nullptr;
305 }
306 int top_y = box.top();
307 int bottom_y = box.bottom();
308 int mid_y = (top_y + bottom_y) / 2;
309 int right = crossing ? (box.left() + box.right()) / 2 : box.right();
310 int min_key, max_key;
311 SetupTabSearch(right, mid_y, &min_key, &max_key);
312 // Position the iterator at the first TabVector with sort_key >= min_key.
313 while (!v_it_.at_first() && v_it_.data()->sort_key() >= min_key) {
314 v_it_.backward();
315 }
316 while (!v_it_.at_last() && v_it_.data()->sort_key() < min_key) {
317 v_it_.forward();
318 }
319 // Find the leftmost tab vector that overlaps and has XAtY(mid_y) >= right.
320 TabVector *best_v = nullptr;
321 int best_x = -1;
322 int key_limit = -1;
323 do {
324 TabVector *v = v_it_.data();
325 int x = v->XAtY(mid_y);
326 if (x >= right && (v->VOverlap(top_y, bottom_y) > 0 ||
327 (extended && v->ExtendedOverlap(top_y, bottom_y) > 0))) {
328 if (best_v == nullptr || x < best_x) {
329 best_v = v;
330 best_x = x;
331 // We can guarantee that no better vector can be found if the
332 // sort key exceeds that of the best by max_key - min_key.
333 key_limit = v->sort_key() + max_key - min_key;
334 }
335 }
336 // Break when the search is done to avoid wrapping the iterator and
337 // thereby potentially slowing the next search.
338 if (v_it_.at_last() || (best_v != nullptr && v->sort_key() > key_limit)) {
339 break; // Prevent restarting list for next call.
340 }
341 v_it_.forward();
342 } while (!v_it_.at_first());
343 return best_v;
344}

◆ RotateBlobList()

void tesseract::TabFind::RotateBlobList ( const FCOORD rotation,
BLOBNBOX_LIST *  blobs 
)
staticprotected

Definition at line 1278 of file tabfind.cpp.

1278 {
1279 BLOBNBOX_IT it(blobs);
1280 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1281 it.data()->rotate_box(rotation);
1282 }
1283}

◆ SetBlobRuleEdges()

void tesseract::TabFind::SetBlobRuleEdges ( BLOBNBOX_LIST *  blobs)

Definition at line 137 of file tabfind.cpp.

137 {
138 BLOBNBOX_IT blob_it(blobs);
139 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
140 BLOBNBOX *blob = blob_it.data();
141 TBOX box = blob->bounding_box();
142 blob->set_left_rule(LeftEdgeForBox(box, false, false));
143 blob->set_right_rule(RightEdgeForBox(box, false, false));
144 blob->set_left_crossing_rule(LeftEdgeForBox(box, true, false));
145 blob->set_right_crossing_rule(RightEdgeForBox(box, true, false));
146 }
147}

◆ SetBlockRuleEdges()

void tesseract::TabFind::SetBlockRuleEdges ( TO_BLOCK block)

Definition at line 128 of file tabfind.cpp.

128 {
129 SetBlobRuleEdges(&block->blobs);
130 SetBlobRuleEdges(&block->small_blobs);
131 SetBlobRuleEdges(&block->noise_blobs);
132 SetBlobRuleEdges(&block->large_blobs);
133}
void SetBlobRuleEdges(BLOBNBOX_LIST *blobs)
Definition: tabfind.cpp:137

◆ SetupTabSearch()

void tesseract::TabFind::SetupTabSearch ( int  x,
int  y,
int *  min_key,
int *  max_key 
)
protected

Definition at line 486 of file tabfind.cpp.

486 {
487 int key1 = TabVector::SortKey(vertical_skew_, x, (y + tright_.y()) / 2);
488 int key2 = TabVector::SortKey(vertical_skew_, x, (y + bleft_.y()) / 2);
489 *min_key = std::min(key1, key2);
490 *max_key = std::max(key1, key2);
491}
static int SortKey(const ICOORD &vertical, int x, int y)
Definition: tabvector.h:274

◆ TidyBlobs()

void tesseract::TabFind::TidyBlobs ( TO_BLOCK block)
protected

Definition at line 462 of file tabfind.cpp.

462 {
463 BLOBNBOX_IT large_it = &block->large_blobs;
464 BLOBNBOX_IT blob_it = &block->blobs;
465 int b_count = 0;
466 for (large_it.mark_cycle_pt(); !large_it.cycled_list(); large_it.forward()) {
467 BLOBNBOX *large_blob = large_it.data();
468 if (large_blob->owner() != nullptr) {
469 blob_it.add_to_end(large_it.extract());
470 ++b_count;
471 }
472 }
474 tprintf("Moved %d large blobs to normal list\n", b_count);
475#ifndef GRAPHICS_DISABLED
476 ScrollView *rej_win = MakeWindow(500, 300, "Image blobs");
477 block->plot_graded_blobs(rej_win);
478 block->plot_noise_blobs(rej_win);
479 rej_win->Update();
480#endif // !GRAPHICS_DISABLED
481 }
482 block->DeleteUnownedNoise();
483}

◆ vectors()

TabVector_LIST * tesseract::TabFind::vectors ( )
inlineprotected

Accessors

Definition at line 167 of file tabfind.h.

167 {
168 return &vectors_;
169 }

◆ VeryDifferentSizes()

bool tesseract::TabFind::VeryDifferentSizes ( int  size1,
int  size2 
)
static

Return true if the sizes are more than a factor of 5 different.

Definition at line 413 of file tabfind.cpp.

413 {
414 return size1 > size2 * 5 || size2 > size1 * 5;
415}

◆ WidthCB()

WidthCallback tesseract::TabFind::WidthCB ( )
inline

Return a callback for testing CommonWidth.

Definition at line 152 of file tabfind.h.

152 {
153 return width_cb_;
154 }

Member Data Documentation

◆ resolution_

int tesseract::TabFind::resolution_
protected

Of source image in pixels per inch.

Definition at line 346 of file tabfind.h.

◆ vertical_skew_

ICOORD tesseract::TabFind::vertical_skew_
protected

Estimate of true vertical in this image.

Definition at line 345 of file tabfind.h.


The documentation for this class was generated from the following files: