All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
tesseract::TabFind Class Reference

#include <tabfind.h>

Inheritance diagram for tesseract::TabFind:
tesseract::AlignedBlob tesseract::BlobGrid tesseract::BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > tesseract::GridBase tesseract::ColumnFinder

Public Member Functions

 TabFind (int gridsize, const ICOORD &bleft, const ICOORD &tright, TabVector_LIST *vlines, int vertical_x, int vertical_y, int resolution)
 
virtual ~TabFind ()
 
void InsertBlobsToGrid (bool h_spread, bool v_spread, BLOBNBOX_LIST *blobs, BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *grid)
 
bool InsertBlob (bool h_spread, bool v_spread, BLOBNBOX *blob, BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *grid)
 
void SetBlockRuleEdges (TO_BLOCK *block)
 
void SetBlobRuleEdges (BLOBNBOX_LIST *blobs)
 
int GutterWidth (int bottom_y, int top_y, const TabVector &v, bool ignore_unmergeables, int max_gutter_width, int *required_shift)
 
void GutterWidthAndNeighbourGap (int tab_x, int mean_height, int max_gutter, bool left, BLOBNBOX *bbox, int *gutter_width, int *neighbour_gap)
 
int RightEdgeForBox (const TBOX &box, bool crossing, bool extended)
 
int LeftEdgeForBox (const TBOX &box, bool crossing, bool extended)
 
TabVectorRightTabForBox (const TBOX &box, bool crossing, bool extended)
 
TabVectorLeftTabForBox (const TBOX &box, bool crossing, bool extended)
 
bool CommonWidth (int width)
 
WidthCallbackWidthCB ()
 
const ICOORDimage_origin () const
 
- Public Member Functions inherited from tesseract::AlignedBlob
 AlignedBlob (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
virtual ~AlignedBlob ()
 
ScrollViewDisplayTabs (const char *window_name, ScrollView *tab_win)
 
TabVectorFindVerticalAlignment (AlignedBlobParams align_params, BLOBNBOX *bbox, int *vertical_x, int *vertical_y)
 
- Public Member Functions inherited from tesseract::BlobGrid
 BlobGrid (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
virtual ~BlobGrid ()
 
void InsertBlobList (BLOBNBOX_LIST *blobs)
 
- Public Member Functions inherited from tesseract::BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT >
 BBGrid ()
 
 BBGrid (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
virtual ~BBGrid ()
 
void Init (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
void Clear ()
 
void ClearGridData (void(*free_method)(BLOBNBOX *))
 
void InsertBBox (bool h_spread, bool v_spread, BLOBNBOX *bbox)
 
void InsertPixPtBBox (int left, int bottom, Pix *pix, BLOBNBOX *bbox)
 
void RemoveBBox (BLOBNBOX *bbox)
 
bool RectangleEmpty (const TBOX &rect)
 
IntGridCountCellElements ()
 
ScrollViewMakeWindow (int x, int y, const char *window_name)
 
void DisplayBoxes (ScrollView *window)
 
void AssertNoDuplicates ()
 
virtual void HandleClick (int x, int y)
 
- Public Member Functions inherited from tesseract::GridBase
 GridBase ()
 
 GridBase (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
virtual ~GridBase ()
 
void Init (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
int gridsize () const
 
int gridwidth () const
 
int gridheight () const
 
const ICOORDbleft () const
 
const ICOORDtright () const
 
void GridCoords (int x, int y, int *grid_x, int *grid_y) const
 
void ClipGridCoords (int *x, int *y) const
 

Static Public Member Functions

static bool DifferentSizes (int size1, int size2)
 
static bool VeryDifferentSizes (int size1, int size2)
 
- Static Public Member Functions inherited from tesseract::AlignedBlob
static bool WithinTestRegion (int detail_level, int x, int y)
 
static void IncrementDebugPix ()
 
static const STRINGtextord_debug_pix ()
 

Protected Member Functions

TabVector_LIST * vectors ()
 
TabVector_LIST * dead_vectors ()
 
bool FindTabVectors (TabVector_LIST *hlines, BLOBNBOX_LIST *image_blobs, TO_BLOCK *block, int min_gutter_width, double tabfind_aligned_gap_fraction, ColPartitionGrid *part_grid, FCOORD *deskew, FCOORD *reskew)
 
void DontFindTabVectors (BLOBNBOX_LIST *image_blobs, TO_BLOCK *block, FCOORD *deskew, FCOORD *reskew)
 
void TidyBlobs (TO_BLOCK *block)
 
void SetupTabSearch (int x, int y, int *min_key, int *max_key)
 
ScrollViewDisplayTabVectors (ScrollView *tab_win)
 
ScrollViewFindInitialTabVectors (BLOBNBOX_LIST *image_blobs, int min_gutter_width, double tabfind_aligned_gap_fraction, TO_BLOCK *block)
 
void ResetForVerticalText (const FCOORD &rotate, const FCOORD &rerotate, TabVector_LIST *horizontal_lines, int *min_gutter_width)
 
void Reset ()
 
void ReflectInYAxis ()
 

Static Protected Member Functions

static void RotateBlobList (const FCOORD &rotation, BLOBNBOX_LIST *blobs)
 

Protected Attributes

ICOORD vertical_skew_
 
int resolution_
 
- Protected Attributes inherited from tesseract::BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT >
BLOBNBOX_CLIST * grid_
 
- Protected Attributes inherited from tesseract::GridBase
int gridsize_
 
int gridwidth_
 
int gridheight_
 
int gridbuckets_
 
ICOORD bleft_
 
ICOORD tright_
 

Detailed Description

The TabFind class contains code to find tab-stops and maintain the vectors_ list of tab vectors. Also provides an interface to find neighbouring blobs in the grid of BLOBNBOXes that is used by multiple subclasses. Searching is a complex operation because of the need to enforce rule/separator lines, and tabstop boundaries, (when available), so as the holder of the list of TabVectors this class provides the functions.

Definition at line 53 of file tabfind.h.

Constructor & Destructor Documentation

tesseract::TabFind::TabFind ( int  gridsize,
const ICOORD bleft,
const ICOORD tright,
TabVector_LIST *  vlines,
int  vertical_x,
int  vertical_y,
int  resolution 
)

Definition at line 86 of file tabfind.cpp.

89  : AlignedBlob(gridsize, bleft, tright),
90  resolution_(resolution),
91  image_origin_(0, tright.y() - 1) {
92  width_cb_ = NULL;
93  v_it_.set_to_list(&vectors_);
94  v_it_.add_list_after(vlines);
95  SetVerticalSkewAndParellelize(vertical_x, vertical_y);
97 }
bool CommonWidth(int width)
Definition: tabfind.cpp:416
inT16 y() const
access_function
Definition: points.h:56
int gridsize() const
Definition: bbgrid.h:63
_ConstTessMemberResultCallback_0_0< false, R, T1 >::base * NewPermanentTessCallback(const T1 *obj, R(T2::*member)() const)
Definition: tesscallback.h:116
#define NULL
Definition: host.h:144
AlignedBlob(int gridsize, const ICOORD &bleft, const ICOORD &tright)
tesseract::TabFind::~TabFind ( )
virtual

Definition at line 99 of file tabfind.cpp.

99  {
100  if (width_cb_ != NULL)
101  delete width_cb_;
102 }
#define NULL
Definition: host.h:144

Member Function Documentation

bool tesseract::TabFind::CommonWidth ( int  width)

Return true if the given width is close to one of the common widths in column_widths_.

Definition at line 416 of file tabfind.cpp.

416  {
417  width /= kColumnWidthFactor;
418  ICOORDELT_IT it(&column_widths_);
419  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
420  ICOORDELT* w = it.data();
421  if (w->x() - 1 <= width && width <= w->y() + 1)
422  return true;
423  }
424  return false;
425 }
inT16 y() const
access_function
Definition: points.h:56
inT16 x() const
access function
Definition: points.h:52
const int kColumnWidthFactor
Definition: tabfind.h:42
TabVector_LIST* tesseract::TabFind::dead_vectors ( )
inlineprotected

Definition at line 176 of file tabfind.h.

176  {
177  return &dead_vectors_;
178  }
bool tesseract::TabFind::DifferentSizes ( int  size1,
int  size2 
)
static

Return true if the sizes are more than a factor of 2 different.

Definition at line 429 of file tabfind.cpp.

429  {
430  return size1 > size2 * 2 || size2 > size1 * 2;
431 }
ScrollView * tesseract::TabFind::DisplayTabVectors ( ScrollView tab_win)
protected

Display the tab vectors found in this grid.

Definition at line 524 of file tabfind.cpp.

524  {
525 #ifndef GRAPHICS_DISABLED
526  // For every vector, display it.
527  TabVector_IT it(&vectors_);
528  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
529  TabVector* vector = it.data();
530  vector->Display(tab_win);
531  }
532  tab_win->Update();
533 #endif
534  return tab_win;
535 }
static void Update()
Definition: scrollview.cpp:715
void tesseract::TabFind::DontFindTabVectors ( BLOBNBOX_LIST *  image_blobs,
TO_BLOCK block,
FCOORD deskew,
FCOORD reskew 
)
protected

Definition at line 479 of file tabfind.cpp.

480  {
481  InsertBlobsToGrid(false, false, image_blobs, this);
482  InsertBlobsToGrid(true, false, &block->blobs, this);
483  deskew->set_x(1.0f);
484  deskew->set_y(0.0f);
485  reskew->set_x(1.0f);
486  reskew->set_y(0.0f);
487 }
void InsertBlobsToGrid(bool h_spread, bool v_spread, BLOBNBOX_LIST *blobs, BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *grid)
Definition: tabfind.cpp:113
void set_x(float xin)
rewrite function
Definition: points.h:216
void set_y(float yin)
rewrite function
Definition: points.h:220
BLOBNBOX_LIST blobs
Definition: blobbox.h:768
ScrollView * tesseract::TabFind::FindInitialTabVectors ( BLOBNBOX_LIST *  image_blobs,
int  min_gutter_width,
double  tabfind_aligned_gap_fraction,
TO_BLOCK block 
)
protected

Definition at line 541 of file tabfind.cpp.

544  {
546  ScrollView* line_win = MakeWindow(0, 0, "VerticalLines");
547  line_win = DisplayTabVectors(line_win);
548  }
549  // Prepare the grid.
550  if (image_blobs != NULL)
551  InsertBlobsToGrid(true, false, image_blobs, this);
552  InsertBlobsToGrid(true, false, &block->blobs, this);
553  ScrollView* initial_win = FindTabBoxes(min_gutter_width,
554  tabfind_aligned_gap_fraction);
555  FindAllTabVectors(min_gutter_width);
556 
558  SortVectors();
559  EvaluateTabs();
560  if (textord_tabfind_show_initialtabs && initial_win != NULL)
561  initial_win = DisplayTabVectors(initial_win);
562  MarkVerticalText();
563  return initial_win;
564 }
void InsertBlobsToGrid(bool h_spread, bool v_spread, BLOBNBOX_LIST *blobs, BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *grid)
Definition: tabfind.cpp:113
static void MergeSimilarTabVectors(const ICOORD &vertical, TabVector_LIST *vectors, BlobGrid *grid)
Definition: tabvector.cpp:361
ScrollView * DisplayTabVectors(ScrollView *tab_win)
Definition: tabfind.cpp:524
ScrollView * MakeWindow(int x, int y, const char *window_name)
bool textord_tabfind_show_initialtabs
Definition: tabfind.cpp:83
#define NULL
Definition: host.h:144
ICOORD vertical_skew_
Definition: tabfind.h:367
BLOBNBOX_LIST blobs
Definition: blobbox.h:768
bool tesseract::TabFind::FindTabVectors ( TabVector_LIST *  hlines,
BLOBNBOX_LIST *  image_blobs,
TO_BLOCK block,
int  min_gutter_width,
double  tabfind_aligned_gap_fraction,
ColPartitionGrid part_grid,
FCOORD deskew,
FCOORD reskew 
)
protected

Top-level function to find TabVectors in an input page block. Returns false if the detected skew angle is impossible. Applies the detected skew angle to deskew the tabs, blobs and part_grid. tabfind_aligned_gap_fraction should be the value of parameter textord_tabfind_aligned_gap_fraction

Definition at line 444 of file tabfind.cpp.

449  {
450  ScrollView* tab_win = FindInitialTabVectors(image_blobs, min_gutter_width,
451  tabfind_aligned_gap_fraction,
452  block);
453  ComputeColumnWidths(tab_win, part_grid);
455  SortVectors();
456  CleanupTabs();
457  if (!Deskew(hlines, image_blobs, block, deskew, reskew))
458  return false; // Skew angle is too large.
459  part_grid->Deskew(*deskew);
460  ApplyTabConstraints();
461  #ifndef GRAPHICS_DISABLED
463  tab_win = MakeWindow(640, 50, "FinalTabs");
464  if (textord_debug_images) {
465  tab_win->Image(AlignedBlob::textord_debug_pix().string(),
466  image_origin_.x(), image_origin_.y());
467  } else {
468  DisplayBoxes(tab_win);
469  DisplayTabs("FinalTabs", tab_win);
470  }
471  tab_win = DisplayTabVectors(tab_win);
472  }
473  #endif // GRAPHICS_DISABLED
474  return true;
475 }
bool textord_debug_images
Definition: alignedblob.cpp:33
static const STRING & textord_debug_pix()
Definition: alignedblob.h:112
void Image(struct Pix *image, int x_pos, int y_pos)
Definition: scrollview.cpp:773
ScrollView * DisplayTabs(const char *window_name, ScrollView *tab_win)
static void MergeSimilarTabVectors(const ICOORD &vertical, TabVector_LIST *vectors, BlobGrid *grid)
Definition: tabvector.cpp:361
inT16 y() const
access_function
Definition: points.h:56
ScrollView * DisplayTabVectors(ScrollView *tab_win)
Definition: tabfind.cpp:524
ScrollView * MakeWindow(int x, int y, const char *window_name)
inT16 x() const
access function
Definition: points.h:52
ICOORD vertical_skew_
Definition: tabfind.h:367
bool textord_tabfind_show_finaltabs
Definition: tabfind.cpp:84
ScrollView * FindInitialTabVectors(BLOBNBOX_LIST *image_blobs, int min_gutter_width, double tabfind_aligned_gap_fraction, TO_BLOCK *block)
Definition: tabfind.cpp:541
int tesseract::TabFind::GutterWidth ( int  bottom_y,
int  top_y,
const TabVector v,
bool  ignore_unmergeables,
int  max_gutter_width,
int *  required_shift 
)

Definition at line 183 of file tabfind.cpp.

185  {
186  bool right_to_left = v.IsLeftTab();
187  int bottom_x = v.XAtY(bottom_y);
188  int top_x = v.XAtY(top_y);
189  int start_x = right_to_left ? MAX(top_x, bottom_x) : MIN(top_x, bottom_x);
190  BlobGridSearch sidesearch(this);
191  sidesearch.StartSideSearch(start_x, bottom_y, top_y);
192  int min_gap = max_gutter_width;
193  *required_shift = 0;
194  BLOBNBOX* blob = NULL;
195  while ((blob = sidesearch.NextSideSearch(right_to_left)) != NULL) {
196  const TBOX& box = blob->bounding_box();
197  if (box.bottom() >= top_y || box.top() <= bottom_y)
198  continue; // Doesn't overlap enough.
199  if (box.height() >= gridsize() * 2 &&
200  box.height() > box.width() * kLineFragmentAspectRatio) {
201  // Skip likely separator line residue.
202  continue;
203  }
204  if (ignore_unmergeables && BLOBNBOX::UnMergeableType(blob->region_type()))
205  continue; // Skip non-text if required.
206  int mid_y = (box.bottom() + box.top()) / 2;
207  // We use the x at the mid-y so that the required_shift guarantees
208  // to clear all the blobs on the tab-stop. If we use the min/max
209  // of x at top/bottom of the blob, then exactness would be required,
210  // which is not a good thing.
211  int tab_x = v.XAtY(mid_y);
212  int gap;
213  if (right_to_left) {
214  gap = tab_x - box.right();
215  if (gap < 0 && box.left() - tab_x < *required_shift)
216  *required_shift = box.left() - tab_x;
217  } else {
218  gap = box.left() - tab_x;
219  if (gap < 0 && box.right() - tab_x > *required_shift)
220  *required_shift = box.right() - tab_x;
221  }
222  if (gap > 0 && gap < min_gap)
223  min_gap = gap;
224  }
225  // Result may be negative, in which case, this is a really bad tabstop.
226  return min_gap - abs(*required_shift);
227 }
GridSearch< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > BlobGridSearch
Definition: blobgrid.h:31
#define MAX(x, y)
Definition: ndminx.h:24
static bool UnMergeableType(BlobRegionType type)
Definition: blobbox.h:415
#define MIN(x, y)
Definition: ndminx.h:28
inT16 right() const
Definition: rect.h:75
inT16 left() const
Definition: rect.h:68
int gridsize() const
Definition: bbgrid.h:63
BlobRegionType region_type() const
Definition: blobbox.h:268
inT16 bottom() const
Definition: rect.h:61
inT16 height() const
Definition: rect.h:104
inT16 width() const
Definition: rect.h:111
Definition: rect.h:30
#define NULL
Definition: host.h:144
const TBOX & bounding_box() const
Definition: blobbox.h:215
const double kLineFragmentAspectRatio
Definition: tabfind.cpp:56
inT16 top() const
Definition: rect.h:54
void tesseract::TabFind::GutterWidthAndNeighbourGap ( int  tab_x,
int  mean_height,
int  max_gutter,
bool  left,
BLOBNBOX bbox,
int *  gutter_width,
int *  neighbour_gap 
)

Find the gutter width and distance to inner neighbour for the given blob.

Definition at line 230 of file tabfind.cpp.

233  {
234  const TBOX& box = bbox->bounding_box();
235  // The gutter and internal sides of the box.
236  int gutter_x = left ? box.left() : box.right();
237  int internal_x = left ? box.right() : box.left();
238  // On ragged edges, the gutter side of the box is away from the tabstop.
239  int tab_gap = left ? gutter_x - tab_x : tab_x - gutter_x;
240  *gutter_width = max_gutter;
241  // If the box is away from the tabstop, we need to increase
242  // the allowed gutter width.
243  if (tab_gap > 0)
244  *gutter_width += tab_gap;
245  bool debug = WithinTestRegion(2, box.left(), box.bottom());
246  if (debug)
247  tprintf("Looking in gutter\n");
248  // Find the nearest blob on the outside of the column.
249  BLOBNBOX* gutter_bbox = AdjacentBlob(bbox, left,
250  bbox->flow() == BTFT_TEXT_ON_IMAGE, 0.0,
251  *gutter_width, box.top(), box.bottom());
252  if (gutter_bbox != NULL) {
253  TBOX gutter_box = gutter_bbox->bounding_box();
254  *gutter_width = left ? tab_x - gutter_box.right()
255  : gutter_box.left() - tab_x;
256  }
257  if (*gutter_width >= max_gutter) {
258  // If there is no box because a tab was in the way, get the tab coord.
259  TBOX gutter_box(box);
260  if (left) {
261  gutter_box.set_left(tab_x - max_gutter - 1);
262  gutter_box.set_right(tab_x - max_gutter);
263  int tab_gutter = RightEdgeForBox(gutter_box, true, false);
264  if (tab_gutter < tab_x - 1)
265  *gutter_width = tab_x - tab_gutter;
266  } else {
267  gutter_box.set_left(tab_x + max_gutter);
268  gutter_box.set_right(tab_x + max_gutter + 1);
269  int tab_gutter = LeftEdgeForBox(gutter_box, true, false);
270  if (tab_gutter > tab_x + 1)
271  *gutter_width = tab_gutter - tab_x;
272  }
273  }
274  if (*gutter_width > max_gutter)
275  *gutter_width = max_gutter;
276  // Now look for a neighbour on the inside.
277  if (debug)
278  tprintf("Looking for neighbour\n");
279  BLOBNBOX* neighbour = AdjacentBlob(bbox, !left,
280  bbox->flow() == BTFT_TEXT_ON_IMAGE, 0.0,
281  *gutter_width, box.top(), box.bottom());
282  int neighbour_edge = left ? RightEdgeForBox(box, true, false)
283  : LeftEdgeForBox(box, true, false);
284  if (neighbour != NULL) {
285  TBOX n_box = neighbour->bounding_box();
286  if (debug) {
287  tprintf("Found neighbour:");
288  n_box.print();
289  }
290  if (left && n_box.left() < neighbour_edge)
291  neighbour_edge = n_box.left();
292  else if (!left && n_box.right() > neighbour_edge)
293  neighbour_edge = n_box.right();
294  }
295  *neighbour_gap = left ? neighbour_edge - internal_x
296  : internal_x - neighbour_edge;
297 }
int RightEdgeForBox(const TBOX &box, bool crossing, bool extended)
Definition: tabfind.cpp:303
static bool WithinTestRegion(int detail_level, int x, int y)
#define tprintf(...)
Definition: tprintf.h:31
void set_right(int x)
Definition: rect.h:78
void print() const
Definition: rect.h:270
int LeftEdgeForBox(const TBOX &box, bool crossing, bool extended)
Definition: tabfind.cpp:308
inT16 right() const
Definition: rect.h:75
void set_left(int x)
Definition: rect.h:71
inT16 left() const
Definition: rect.h:68
inT16 bottom() const
Definition: rect.h:61
Definition: rect.h:30
#define NULL
Definition: host.h:144
const TBOX & bounding_box() const
Definition: blobbox.h:215
inT16 top() const
Definition: rect.h:54
BlobTextFlowType flow() const
Definition: blobbox.h:280
const ICOORD& tesseract::TabFind::image_origin ( ) const
inline

Return the coords at which to draw the image backdrop.

Definition at line 165 of file tabfind.h.

165  {
166  return image_origin_;
167  }
bool tesseract::TabFind::InsertBlob ( bool  h_spread,
bool  v_spread,
BLOBNBOX blob,
BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *  grid 
)

Insert a single blob into the given grid (not necessarily this). If h_spread, then all cells covered horizontally by the box are used, otherwise, just the bottom-left. Similarly for v_spread. A side effect is that the left and right rule edges of the blob are set according to the tab vectors in this (not grid).

Definition at line 140 of file tabfind.cpp.

142  {
143  TBOX box = blob->bounding_box();
144  blob->set_left_rule(LeftEdgeForBox(box, false, false));
145  blob->set_right_rule(RightEdgeForBox(box, false, false));
146  blob->set_left_crossing_rule(LeftEdgeForBox(box, true, false));
147  blob->set_right_crossing_rule(RightEdgeForBox(box, true, false));
148  if (blob->joined_to_prev())
149  return false;
150  grid->InsertBBox(h_spread, v_spread, blob);
151  return true;
152 }
int RightEdgeForBox(const TBOX &box, bool crossing, bool extended)
Definition: tabfind.cpp:303
void set_right_crossing_rule(int new_right)
Definition: blobbox.h:319
bool joined_to_prev() const
Definition: blobbox.h:241
int LeftEdgeForBox(const TBOX &box, bool crossing, bool extended)
Definition: tabfind.cpp:308
void InsertBBox(bool h_spread, bool v_spread, BBC *bbox)
Definition: bbgrid.h:489
void set_left_crossing_rule(int new_left)
Definition: blobbox.h:313
void set_right_rule(int new_right)
Definition: blobbox.h:307
Definition: rect.h:30
void set_left_rule(int new_left)
Definition: blobbox.h:301
const TBOX & bounding_box() const
Definition: blobbox.h:215
void tesseract::TabFind::InsertBlobsToGrid ( bool  h_spread,
bool  v_spread,
BLOBNBOX_LIST *  blobs,
BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *  grid 
)

Insert a list of blobs into the given grid (not necessarily this). See InsertBlob for the other arguments. It would seem to make more sense to swap this and grid, but this way around allows grid to not be derived from TabFind, eg a ColPartitionGrid, while the grid that provides the tab stops(this) has to be derived from TabFind.

Definition at line 113 of file tabfind.cpp.

116  {
117  BLOBNBOX_IT blob_it(blobs);
118  int b_count = 0;
119  int reject_count = 0;
120  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
121  BLOBNBOX* blob = blob_it.data();
122 // if (InsertBlob(true, true, blob, grid)) {
123  if (InsertBlob(h_spread, v_spread, blob, grid)) {
124  ++b_count;
125  } else {
126  ++reject_count;
127  }
128  }
129  if (textord_debug_tabfind) {
130  tprintf("Inserted %d blobs into grid, %d rejected.\n",
131  b_count, reject_count);
132  }
133 }
#define tprintf(...)
Definition: tprintf.h:31
bool InsertBlob(bool h_spread, bool v_spread, BLOBNBOX *blob, BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *grid)
Definition: tabfind.cpp:140
int textord_debug_tabfind
Definition: alignedblob.cpp:27
int tesseract::TabFind::LeftEdgeForBox ( const TBOX box,
bool  crossing,
bool  extended 
)

As RightEdgeForBox, but finds the left Edge instead.

Definition at line 308 of file tabfind.cpp.

308  {
309  TabVector* v = LeftTabForBox(box, crossing, extended);
310  return v == NULL ? bleft_.x() : v->XAtY((box.top() + box.bottom()) / 2);
311 }
inT16 bottom() const
Definition: rect.h:61
inT16 x() const
access function
Definition: points.h:52
#define NULL
Definition: host.h:144
TabVector * LeftTabForBox(const TBOX &box, bool crossing, bool extended)
Definition: tabfind.cpp:370
inT16 top() const
Definition: rect.h:54
TabVector * tesseract::TabFind::LeftTabForBox ( const TBOX box,
bool  crossing,
bool  extended 
)

As RightTabForBox, but finds the left TabVector instead.

Definition at line 370 of file tabfind.cpp.

371  {
372  if (v_it_.empty())
373  return NULL;
374  int top_y = box.top();
375  int bottom_y = box.bottom();
376  int mid_y = (top_y + bottom_y) / 2;
377  int left = crossing ? (box.left() + box.right()) / 2 : box.left();
378  int min_key, max_key;
379  SetupTabSearch(left, mid_y, &min_key, &max_key);
380  // Position the iterator at the last TabVector with sort_key <= max_key.
381  while (!v_it_.at_last() && v_it_.data()->sort_key() <= max_key)
382  v_it_.forward();
383  while (!v_it_.at_first() && v_it_.data()->sort_key() > max_key) {
384  v_it_.backward();
385  }
386  // Find the rightmost tab vector that overlaps and has XAtY(mid_y) <= left.
387  TabVector* best_v = NULL;
388  int best_x = -1;
389  int key_limit = -1;
390  do {
391  TabVector* v = v_it_.data();
392  int x = v->XAtY(mid_y);
393  if (x <= left &&
394  (v->VOverlap(top_y, bottom_y) > 0 ||
395  (extended && v->ExtendedOverlap(top_y, bottom_y) > 0))) {
396  if (best_v == NULL || x > best_x) {
397  best_v = v;
398  best_x = x;
399  // We can guarantee that no better vector can be found if the
400  // sort key is less than that of the best by max_key - min_key.
401  key_limit = v->sort_key() - (max_key - min_key);
402  }
403  }
404  // Break when the search is done to avoid wrapping the iterator and
405  // thereby potentially slowing the next search.
406  if (v_it_.at_first() ||
407  (best_v != NULL && v->sort_key() < key_limit))
408  break; // Prevent restarting list for next call.
409  v_it_.backward();
410  } while (!v_it_.at_last());
411  return best_v;
412 }
inT16 right() const
Definition: rect.h:75
inT16 left() const
Definition: rect.h:68
void SetupTabSearch(int x, int y, int *min_key, int *max_key)
Definition: tabfind.cpp:517
inT16 bottom() const
Definition: rect.h:61
#define NULL
Definition: host.h:144
inT16 top() const
Definition: rect.h:54
void tesseract::TabFind::ReflectInYAxis ( )
protected

Definition at line 1409 of file tabfind.cpp.

1409  {
1410  TabVector_LIST temp_list;
1411  TabVector_IT temp_it(&temp_list);
1412  v_it_.move_to_first();
1413  // The TabVector list only contains vertical lines, but they need to be
1414  // reflected and the list needs to be reversed, so they are still in
1415  // sort_key order.
1416  while (!v_it_.empty()) {
1417  TabVector* v = v_it_.extract();
1418  v_it_.forward();
1419  v->ReflectInYAxis();
1420  temp_it.add_before_then_move(v);
1421  }
1422  v_it_.add_list_after(&temp_list);
1423  v_it_.move_to_first();
1424  // Reset this grid with reflected bounding boxes.
1425  TBOX grid_box(bleft(), tright());
1426  int tmp = grid_box.left();
1427  grid_box.set_left(-grid_box.right());
1428  grid_box.set_right(-tmp);
1429  Init(gridsize(), grid_box.botleft(), grid_box.topright());
1430 }
void Init(int gridsize, const ICOORD &bleft, const ICOORD &tright)
int gridsize() const
Definition: bbgrid.h:63
const ICOORD & bleft() const
Definition: bbgrid.h:72
Definition: rect.h:30
const ICOORD & tright() const
Definition: bbgrid.h:75
void tesseract::TabFind::Reset ( )
protected

Definition at line 1398 of file tabfind.cpp.

1398  {
1399  v_it_.move_to_first();
1400  for (v_it_.mark_cycle_pt(); !v_it_.cycled_list(); v_it_.forward()) {
1401  if (!v_it_.data()->IsSeparator())
1402  delete v_it_.extract();
1403  }
1404  Clear();
1405 }
void tesseract::TabFind::ResetForVerticalText ( const FCOORD rotate,
const FCOORD rerotate,
TabVector_LIST *  horizontal_lines,
int *  min_gutter_width 
)
protected

Definition at line 1353 of file tabfind.cpp.

1355  {
1356  // Rotate the horizontal and vertical vectors and swap them over.
1357  // Only the separators are kept and rotated; other tabs are used
1358  // to estimate the gutter width then thrown away.
1359  TabVector_LIST ex_verticals;
1360  TabVector_IT ex_v_it(&ex_verticals);
1361  TabVector_LIST vlines;
1362  TabVector_IT v_it(&vlines);
1363  while (!v_it_.empty()) {
1364  TabVector* v = v_it_.extract();
1365  if (v->IsSeparator()) {
1366  v->Rotate(rotate);
1367  ex_v_it.add_after_then_move(v);
1368  } else {
1369  v_it.add_after_then_move(v);
1370  }
1371  v_it_.forward();
1372  }
1373 
1374  // Adjust the min gutter width for better tabbox selection
1375  // in 2nd call to FindInitialTabVectors().
1376  int median_gutter = FindMedianGutterWidth(&vlines);
1377  if (median_gutter > *min_gutter_width)
1378  *min_gutter_width = median_gutter;
1379 
1380  TabVector_IT h_it(horizontal_lines);
1381  for (h_it.mark_cycle_pt(); !h_it.cycled_list(); h_it.forward()) {
1382  TabVector* h = h_it.data();
1383  h->Rotate(rotate);
1384  }
1385  v_it_.add_list_after(horizontal_lines);
1386  v_it_.move_to_first();
1387  h_it.set_to_list(horizontal_lines);
1388  h_it.add_list_after(&ex_verticals);
1389 
1390  // Rebuild the grid to the new size.
1391  TBOX grid_box(bleft(), tright());
1392  grid_box.rotate_large(rotate);
1393  Init(gridsize(), grid_box.botleft(), grid_box.topright());
1394 }
void Init(int gridsize, const ICOORD &bleft, const ICOORD &tright)
int gridsize() const
Definition: bbgrid.h:63
const ICOORD & bleft() const
Definition: bbgrid.h:72
Definition: rect.h:30
const ICOORD & tright() const
Definition: bbgrid.h:75
int tesseract::TabFind::RightEdgeForBox ( const TBOX box,
bool  crossing,
bool  extended 
)

Return the x-coord that corresponds to the right edge for the given box. If there is a rule line to the right that vertically overlaps it, then return the x-coord of the rule line, otherwise return the right edge of the page. For details see RightTabForBox below.

Definition at line 303 of file tabfind.cpp.

303  {
304  TabVector* v = RightTabForBox(box, crossing, extended);
305  return v == NULL ? tright_.x() : v->XAtY((box.top() + box.bottom()) / 2);
306 }
TabVector * RightTabForBox(const TBOX &box, bool crossing, bool extended)
Definition: tabfind.cpp:326
inT16 bottom() const
Definition: rect.h:61
inT16 x() const
access function
Definition: points.h:52
#define NULL
Definition: host.h:144
ICOORD tright_
Definition: bbgrid.h:91
inT16 top() const
Definition: rect.h:54
TabVector * tesseract::TabFind::RightTabForBox ( const TBOX box,
bool  crossing,
bool  extended 
)

Return the TabVector that corresponds to the right edge for the given box. If there is a TabVector to the right that vertically overlaps it, then return it, otherwise return NULL. Note that Right and Left refer to the position of the TabVector, not its type, ie RightTabForBox returns the nearest TabVector to the right of the box, regardless of its type. If a TabVector crosses right through the box (as opposed to grazing one edge or missing entirely), then crossing false will ignore such a line. Crossing true will return the line for BOTH left and right edges. If extended is true, then TabVectors are considered to extend to their extended_start/end_y, otherwise, just the startpt_ and endpt_. These functions make use of an internal iterator to the vectors_ list for speed when used repeatedly on neighbouring boxes. The caveat is that the iterator must be updated whenever the list is modified.

Definition at line 326 of file tabfind.cpp.

327  {
328  if (v_it_.empty())
329  return NULL;
330  int top_y = box.top();
331  int bottom_y = box.bottom();
332  int mid_y = (top_y + bottom_y) / 2;
333  int right = crossing ? (box.left() + box.right()) / 2 : box.right();
334  int min_key, max_key;
335  SetupTabSearch(right, mid_y, &min_key, &max_key);
336  // Position the iterator at the first TabVector with sort_key >= min_key.
337  while (!v_it_.at_first() && v_it_.data()->sort_key() >= min_key)
338  v_it_.backward();
339  while (!v_it_.at_last() && v_it_.data()->sort_key() < min_key)
340  v_it_.forward();
341  // Find the leftmost tab vector that overlaps and has XAtY(mid_y) >= right.
342  TabVector* best_v = NULL;
343  int best_x = -1;
344  int key_limit = -1;
345  do {
346  TabVector* v = v_it_.data();
347  int x = v->XAtY(mid_y);
348  if (x >= right &&
349  (v->VOverlap(top_y, bottom_y) > 0 ||
350  (extended && v->ExtendedOverlap(top_y, bottom_y) > 0))) {
351  if (best_v == NULL || x < best_x) {
352  best_v = v;
353  best_x = x;
354  // We can guarantee that no better vector can be found if the
355  // sort key exceeds that of the best by max_key - min_key.
356  key_limit = v->sort_key() + max_key - min_key;
357  }
358  }
359  // Break when the search is done to avoid wrapping the iterator and
360  // thereby potentially slowing the next search.
361  if (v_it_.at_last() ||
362  (best_v != NULL && v->sort_key() > key_limit))
363  break; // Prevent restarting list for next call.
364  v_it_.forward();
365  } while (!v_it_.at_first());
366  return best_v;
367 }
inT16 right() const
Definition: rect.h:75
inT16 left() const
Definition: rect.h:68
void SetupTabSearch(int x, int y, int *min_key, int *max_key)
Definition: tabfind.cpp:517
inT16 bottom() const
Definition: rect.h:61
#define NULL
Definition: host.h:144
inT16 top() const
Definition: rect.h:54
void tesseract::TabFind::RotateBlobList ( const FCOORD rotation,
BLOBNBOX_LIST *  blobs 
)
staticprotected

Definition at line 1283 of file tabfind.cpp.

1283  {
1284  BLOBNBOX_IT it(blobs);
1285  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1286  it.data()->rotate_box(rotation);
1287  }
1288 }
void tesseract::TabFind::SetBlobRuleEdges ( BLOBNBOX_LIST *  blobs)

Definition at line 164 of file tabfind.cpp.

164  {
165  BLOBNBOX_IT blob_it(blobs);
166  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
167  BLOBNBOX* blob = blob_it.data();
168  TBOX box = blob->bounding_box();
169  blob->set_left_rule(LeftEdgeForBox(box, false, false));
170  blob->set_right_rule(RightEdgeForBox(box, false, false));
171  blob->set_left_crossing_rule(LeftEdgeForBox(box, true, false));
172  blob->set_right_crossing_rule(RightEdgeForBox(box, true, false));
173  }
174 }
int RightEdgeForBox(const TBOX &box, bool crossing, bool extended)
Definition: tabfind.cpp:303
void set_right_crossing_rule(int new_right)
Definition: blobbox.h:319
int LeftEdgeForBox(const TBOX &box, bool crossing, bool extended)
Definition: tabfind.cpp:308
void set_left_crossing_rule(int new_left)
Definition: blobbox.h:313
void set_right_rule(int new_right)
Definition: blobbox.h:307
Definition: rect.h:30
void set_left_rule(int new_left)
Definition: blobbox.h:301
const TBOX & bounding_box() const
Definition: blobbox.h:215
void tesseract::TabFind::SetBlockRuleEdges ( TO_BLOCK block)

Definition at line 155 of file tabfind.cpp.

155  {
156  SetBlobRuleEdges(&block->blobs);
157  SetBlobRuleEdges(&block->small_blobs);
158  SetBlobRuleEdges(&block->noise_blobs);
159  SetBlobRuleEdges(&block->large_blobs);
160 }
BLOBNBOX_LIST small_blobs
Definition: blobbox.h:771
BLOBNBOX_LIST noise_blobs
Definition: blobbox.h:770
void SetBlobRuleEdges(BLOBNBOX_LIST *blobs)
Definition: tabfind.cpp:164
BLOBNBOX_LIST large_blobs
Definition: blobbox.h:772
BLOBNBOX_LIST blobs
Definition: blobbox.h:768
void tesseract::TabFind::SetupTabSearch ( int  x,
int  y,
int *  min_key,
int *  max_key 
)
protected

Definition at line 517 of file tabfind.cpp.

517  {
518  int key1 = TabVector::SortKey(vertical_skew_, x, (y + tright_.y()) / 2);
519  int key2 = TabVector::SortKey(vertical_skew_, x, (y + bleft_.y()) / 2);
520  *min_key = MIN(key1, key2);
521  *max_key = MAX(key1, key2);
522 }
#define MAX(x, y)
Definition: ndminx.h:24
#define MIN(x, y)
Definition: ndminx.h:28
static int SortKey(const ICOORD &vertical, int x, int y)
Definition: tabvector.h:280
inT16 y() const
access_function
Definition: points.h:56
ICOORD tright_
Definition: bbgrid.h:91
ICOORD vertical_skew_
Definition: tabfind.h:367
void tesseract::TabFind::TidyBlobs ( TO_BLOCK block)
protected

Definition at line 492 of file tabfind.cpp.

492  {
493  BLOBNBOX_IT large_it = &block->large_blobs;
494  BLOBNBOX_IT blob_it = &block->blobs;
495  int b_count = 0;
496  for (large_it.mark_cycle_pt(); !large_it.cycled_list(); large_it.forward()) {
497  BLOBNBOX* large_blob = large_it.data();
498  if (large_blob->owner() != NULL) {
499  blob_it.add_to_end(large_it.extract());
500  ++b_count;
501  }
502  }
503  if (textord_debug_tabfind) {
504  tprintf("Moved %d large blobs to normal list\n",
505  b_count);
506  #ifndef GRAPHICS_DISABLED
507  ScrollView* rej_win = MakeWindow(500, 300, "Image blobs");
508  block->plot_graded_blobs(rej_win);
509  block->plot_noise_blobs(rej_win);
510  rej_win->Update();
511  #endif // GRAPHICS_DISABLED
512  }
513  block->DeleteUnownedNoise();
514 }
static void Update()
Definition: scrollview.cpp:715
#define tprintf(...)
Definition: tprintf.h:31
void DeleteUnownedNoise()
Definition: blobbox.cpp:1031
int textord_debug_tabfind
Definition: alignedblob.cpp:27
tesseract::ColPartition * owner() const
Definition: blobbox.h:337
void plot_graded_blobs(ScrollView *to_win)
Definition: blobbox.cpp:1065
ScrollView * MakeWindow(int x, int y, const char *window_name)
#define NULL
Definition: host.h:144
void plot_noise_blobs(ScrollView *to_win)
Definition: blobbox.cpp:1057
BLOBNBOX_LIST large_blobs
Definition: blobbox.h:772
BLOBNBOX_LIST blobs
Definition: blobbox.h:768
TabVector_LIST* tesseract::TabFind::vectors ( )
inlineprotected

Accessors

Definition at line 173 of file tabfind.h.

173  {
174  return &vectors_;
175  }
bool tesseract::TabFind::VeryDifferentSizes ( int  size1,
int  size2 
)
static

Return true if the sizes are more than a factor of 5 different.

Definition at line 435 of file tabfind.cpp.

435  {
436  return size1 > size2 * 5 || size2 > size1 * 5;
437 }
WidthCallback* tesseract::TabFind::WidthCB ( )
inline

Return a callback for testing CommonWidth.

Definition at line 158 of file tabfind.h.

158  {
159  return width_cb_;
160  }

Member Data Documentation

int tesseract::TabFind::resolution_
protected

Definition at line 368 of file tabfind.h.

ICOORD tesseract::TabFind::vertical_skew_
protected

Definition at line 367 of file tabfind.h.


The documentation for this class was generated from the following files: