tesseract v5.3.3.20231005
tesseract::StrokeWidth Class Reference

#include <strokewidth.h>

Inheritance diagram for tesseract::StrokeWidth:
tesseract::BlobGrid tesseract::BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > tesseract::GridBase

Public Member Functions

 StrokeWidth (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
 ~StrokeWidth () override
 
void SetNeighboursOnMediumBlobs (TO_BLOCK *block)
 
void FindTextlineDirectionAndFixBrokenCJK (PageSegMode pageseg_mode, bool cjk_merge, TO_BLOCK *input_block)
 
bool TestVerticalTextDirection (double find_vertical_text_ratio, TO_BLOCK *block, BLOBNBOX_CLIST *osd_blobs)
 
void CorrectForRotation (const FCOORD &rerotation, ColPartitionGrid *part_grid)
 
void FindLeaderPartitions (TO_BLOCK *block, ColPartitionGrid *part_grid)
 
void RemoveLineResidue (ColPartition_LIST *big_part_list)
 
void GradeBlobsIntoPartitions (PageSegMode pageseg_mode, const FCOORD &rerotation, TO_BLOCK *block, Image nontext_pix, const DENORM *denorm, bool cjk_script, TextlineProjection *projection, BLOBNBOX_LIST *diacritic_blobs, ColPartitionGrid *part_grid, ColPartition_LIST *big_parts)
 
void HandleClick (int x, int y) override
 
- Public Member Functions inherited from tesseract::BlobGrid
 BlobGrid (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
 ~BlobGrid () override
 
void InsertBlobList (BLOBNBOX_LIST *blobs)
 
- Public Member Functions inherited from tesseract::BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT >
 BBGrid ()
 
 BBGrid (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
 ~BBGrid () override
 
void Init (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
void Clear ()
 
void ClearGridData (void(*free_method)(BLOBNBOX *))
 
void InsertBBox (bool h_spread, bool v_spread, BLOBNBOX *bbox)
 
void InsertPixPtBBox (int left, int bottom, Image pix, BLOBNBOX *bbox)
 
void RemoveBBox (BLOBNBOX *bbox)
 
bool RectangleEmpty (const TBOX &rect)
 
IntGridCountCellElements ()
 
ScrollViewMakeWindow (int x, int y, const char *window_name)
 
void DisplayBoxes (ScrollView *window)
 
void AssertNoDuplicates ()
 
virtual void HandleClick (int x, int y)
 
- Public Member Functions inherited from tesseract::GridBase
 GridBase ()=default
 
 GridBase (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
virtual ~GridBase ()
 
void Init (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
int gridsize () const
 
int gridwidth () const
 
int gridheight () const
 
const ICOORDbleft () const
 
const ICOORDtright () const
 
void GridCoords (int x, int y, int *grid_x, int *grid_y) const
 
void ClipGridCoords (int *x, int *y) const
 

Additional Inherited Members

- Protected Attributes inherited from tesseract::BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT >
BLOBNBOX_CLIST * grid_
 
- Protected Attributes inherited from tesseract::GridBase
int gridsize_
 
int gridwidth_
 
int gridheight_
 
int gridbuckets_
 
ICOORD bleft_
 
ICOORD tright_
 

Detailed Description

The StrokeWidth class holds all the normal and large blobs. It is used to find good large blobs and move them to the normal blobs by virtue of having a reasonable strokewidth compatible neighbour.

Definition at line 53 of file strokewidth.h.

Constructor & Destructor Documentation

◆ StrokeWidth()

tesseract::StrokeWidth::StrokeWidth ( int  gridsize,
const ICOORD bleft,
const ICOORD tright 
)

Definition at line 114 of file strokewidth.cpp.

116 , nontext_map_(nullptr)
117 , projection_(nullptr)
118 , denorm_(nullptr)
119 , grid_box_(bleft, tright)
120 , rerotation_(1.0f, 0.0f) {
121}
int gridsize() const
Definition: bbgrid.h:63
const ICOORD & bleft() const
Definition: bbgrid.h:72
const ICOORD & tright() const
Definition: bbgrid.h:75
BlobGrid(int gridsize, const ICOORD &bleft, const ICOORD &tright)
Definition: blobgrid.cpp:24

◆ ~StrokeWidth()

tesseract::StrokeWidth::~StrokeWidth ( )
override

Definition at line 123 of file strokewidth.cpp.

123 {
124#ifndef GRAPHICS_DISABLED
125 if (widths_win_ != nullptr) {
126 widths_win_->AwaitEvent(SVET_DESTROY);
127 if (textord_tabfind_only_strokewidths) {
128 exit(0);
129 }
130 delete widths_win_;
131 }
132 delete leaders_win_;
133 delete initial_widths_win_;
134 delete chains_win_;
135 delete textlines_win_;
136 delete smoothed_win_;
137 delete diacritics_win_;
138#endif
139}
@ SVET_DESTROY
Definition: scrollview.h:54
std::unique_ptr< SVEvent > AwaitEvent(SVEventType type)
Definition: scrollview.cpp:432

Member Function Documentation

◆ CorrectForRotation()

void tesseract::StrokeWidth::CorrectForRotation ( const FCOORD rerotation,
ColPartitionGrid part_grid 
)

Definition at line 252 of file strokewidth.cpp.

252 {
253 Init(part_grid->gridsize(), part_grid->bleft(), part_grid->tright());
254 grid_box_ = TBOX(bleft(), tright());
255 rerotation_.set_x(rotation.x());
256 rerotation_.set_y(-rotation.y());
257}
@ TBOX
void set_y(float yin)
rewrite function
Definition: points.h:217
void set_x(float xin)
rewrite function
Definition: points.h:213
void Init(int gridsize, const ICOORD &bleft, const ICOORD &tright)
Definition: bbgrid.h:488

◆ FindLeaderPartitions()

void tesseract::StrokeWidth::FindLeaderPartitions ( TO_BLOCK block,
ColPartitionGrid part_grid 
)

Definition at line 260 of file strokewidth.cpp.

260 {
261 Clear();
262 // Find and isolate leaders in the noise list.
263 ColPartition_LIST leader_parts;
264 FindLeadersAndMarkNoise(block, &leader_parts);
265 // Setup the strokewidth grid with the block's remaining (non-noise) blobs.
266 InsertBlobList(&block->blobs);
267 // Mark blobs that have leader neighbours.
268 for (ColPartition_IT it(&leader_parts); !it.empty(); it.forward()) {
269 ColPartition *part = it.extract();
270 part->ClaimBoxes();
271 MarkLeaderNeighbours(part, LR_LEFT);
272 MarkLeaderNeighbours(part, LR_RIGHT);
273 part_grid->InsertBBox(true, true, part);
274 }
275}
void InsertBlobList(BLOBNBOX_LIST *blobs)
Definition: blobgrid.cpp:35

◆ FindTextlineDirectionAndFixBrokenCJK()

void tesseract::StrokeWidth::FindTextlineDirectionAndFixBrokenCJK ( PageSegMode  pageseg_mode,
bool  cjk_merge,
TO_BLOCK input_block 
)

Definition at line 158 of file strokewidth.cpp.

159 {
160 // Setup the grid with the remaining (non-noise) blobs.
161 InsertBlobs(input_block);
162 // Repair broken CJK characters if needed.
163 while (cjk_merge && FixBrokenCJK(input_block)) {
164 }
165 // Grade blobs by inspection of neighbours.
166 FindTextlineFlowDirection(pageseg_mode, false);
167 // Clear the grid ready for rotation or leader finding.
168 Clear();
169}

◆ GradeBlobsIntoPartitions()

void tesseract::StrokeWidth::GradeBlobsIntoPartitions ( PageSegMode  pageseg_mode,
const FCOORD rerotation,
TO_BLOCK block,
Image  nontext_pix,
const DENORM denorm,
bool  cjk_script,
TextlineProjection projection,
BLOBNBOX_LIST *  diacritic_blobs,
ColPartitionGrid part_grid,
ColPartition_LIST *  big_parts 
)

Definition at line 344 of file strokewidth.cpp.

349 {
350 nontext_map_ = nontext_pix;
351 projection_ = projection;
352 denorm_ = denorm;
353 // Clear and re Insert to take advantage of the tab stops in the blobs.
354 Clear();
355 // Setup the strokewidth grid with the remaining non-noise, non-leader blobs.
356 InsertBlobs(block);
357
358 // Run FixBrokenCJK() again if the page is CJK.
359 if (cjk_script) {
360 FixBrokenCJK(block);
361 }
362 FindTextlineFlowDirection(pageseg_mode, false);
363 projection_->ConstructProjection(block, rerotation, nontext_map_);
364#ifndef GRAPHICS_DISABLED
365 if (textord_tabfind_show_strokewidths) {
366 ScrollView *line_blobs_win = MakeWindow(0, 0, "Initial textline Blobs");
367 projection_->PlotGradedBlobs(&block->blobs, line_blobs_win);
368 projection_->PlotGradedBlobs(&block->small_blobs, line_blobs_win);
369 }
370#endif
371 projection_->MoveNonTextlineBlobs(&block->blobs, &block->noise_blobs);
372 projection_->MoveNonTextlineBlobs(&block->small_blobs, &block->noise_blobs);
373 // Clear and re Insert to take advantage of the removed diacritics.
374 Clear();
375 InsertBlobs(block);
376 FCOORD skew;
377 FindTextlineFlowDirection(pageseg_mode, true);
378 PartitionFindResult r = FindInitialPartitions(pageseg_mode, rerotation, true, block,
379 diacritic_blobs, part_grid, big_parts, &skew);
380 if (r == PFR_NOISE) {
381 tprintf("Detected %d diacritics\n", diacritic_blobs->length());
382 // Noise was found, and removed.
383 Clear();
384 InsertBlobs(block);
385 FindTextlineFlowDirection(pageseg_mode, true);
386 r = FindInitialPartitions(pageseg_mode, rerotation, false, block, diacritic_blobs, part_grid,
387 big_parts, &skew);
388 }
389 nontext_map_ = nullptr;
390 projection_ = nullptr;
391 denorm_ = nullptr;
392}
PartitionFindResult
Definition: strokewidth.h:42
void tprintf(const char *format,...)
Definition: tprintf.cpp:41
ScrollView * MakeWindow(int x, int y, const char *window_name)
Definition: bbgrid.h:633
void ConstructProjection(TO_BLOCK *input_block, const FCOORD &rotation, Image nontext_map)
void MoveNonTextlineBlobs(BLOBNBOX_LIST *blobs, BLOBNBOX_LIST *small_blobs) const
void PlotGradedBlobs(BLOBNBOX_LIST *blobs, ScrollView *win)

◆ HandleClick()

void tesseract::StrokeWidth::HandleClick ( int  x,
int  y 
)
overridevirtual

Handles a click event in a display window.

Reimplemented from tesseract::BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT >.

Definition at line 403 of file strokewidth.cpp.

403 {
405 // Run a radial search for blobs that overlap.
406 BlobGridSearch radsearch(this);
407 radsearch.StartRadSearch(x, y, 1);
408 BLOBNBOX *neighbour;
409 FCOORD click(static_cast<float>(x), static_cast<float>(y));
410 while ((neighbour = radsearch.NextRadSearch()) != nullptr) {
411 TBOX nbox = neighbour->bounding_box();
412 if (nbox.contains(click) && neighbour->cblob() != nullptr) {
413 PrintBoxWidths(neighbour);
414 if (neighbour->neighbour(BND_LEFT) != nullptr) {
415 PrintBoxWidths(neighbour->neighbour(BND_LEFT));
416 }
417 if (neighbour->neighbour(BND_RIGHT) != nullptr) {
418 PrintBoxWidths(neighbour->neighbour(BND_RIGHT));
419 }
420 if (neighbour->neighbour(BND_ABOVE) != nullptr) {
421 PrintBoxWidths(neighbour->neighbour(BND_ABOVE));
422 }
423 if (neighbour->neighbour(BND_BELOW) != nullptr) {
424 PrintBoxWidths(neighbour->neighbour(BND_BELOW));
425 }
426 int gaps[BND_COUNT];
427 neighbour->NeighbourGaps(gaps);
428 tprintf(
429 "Left gap=%d, right=%d, above=%d, below=%d, horz=%d, vert=%d\n"
430 "Good= %d %d %d %d\n",
431 gaps[BND_LEFT], gaps[BND_RIGHT], gaps[BND_ABOVE], gaps[BND_BELOW],
432 neighbour->horz_possible(), neighbour->vert_possible(),
433 neighbour->good_stroke_neighbour(BND_LEFT), neighbour->good_stroke_neighbour(BND_RIGHT),
434 neighbour->good_stroke_neighbour(BND_ABOVE), neighbour->good_stroke_neighbour(BND_BELOW));
435 break;
436 }
437 }
438}
const double y
@ BND_LEFT
Definition: blobbox.h:89
@ BND_RIGHT
Definition: blobbox.h:89
@ BND_BELOW
Definition: blobbox.h:89
@ BND_ABOVE
Definition: blobbox.h:89
@ BND_COUNT
Definition: blobbox.h:89
GridSearch< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > BlobGridSearch
Definition: blobgrid.h:30
virtual void HandleClick(int x, int y)
Definition: bbgrid.h:691

◆ RemoveLineResidue()

void tesseract::StrokeWidth::RemoveLineResidue ( ColPartition_LIST *  big_part_list)

Definition at line 279 of file strokewidth.cpp.

279 {
280 BlobGridSearch gsearch(this);
281 BLOBNBOX *bbox;
282 // For every vertical line-like bbox in the grid, search its neighbours
283 // to find the tallest, and if the original box is taller by sufficient
284 // margin, then call it line residue and delete it.
285 gsearch.StartFullSearch();
286 while ((bbox = gsearch.NextFullSearch()) != nullptr) {
287 TBOX box = bbox->bounding_box();
288 if (box.height() < box.width() * kLineResidueAspectRatio) {
289 continue;
290 }
291 // Set up a rectangle search around the blob to find the size of its
292 // neighbours.
293 int padding = box.height() * kLineResiduePadRatio;
294 TBOX search_box = box;
295 search_box.pad(padding, padding);
296 bool debug = AlignedBlob::WithinTestRegion(2, box.left(), box.bottom());
297 // Find the largest object in the search box not equal to bbox.
298 BlobGridSearch rsearch(this);
299 int max_height = 0;
300 BLOBNBOX *n;
301 rsearch.StartRectSearch(search_box);
302 while ((n = rsearch.NextRectSearch()) != nullptr) {
303 if (n == bbox) {
304 continue;
305 }
306 TBOX nbox = n->bounding_box();
307 if (nbox.height() > max_height) {
308 max_height = nbox.height();
309 }
310 }
311 if (debug) {
312 tprintf("Max neighbour size=%d for candidate line box at:", max_height);
313 box.print();
314 }
315 if (max_height * kLineResidueSizeRatio < box.height()) {
316#ifndef GRAPHICS_DISABLED
317 if (leaders_win_ != nullptr) {
318 // We are debugging, so display deleted in pink blobs in the same
319 // window that we use to display leader detection.
320 leaders_win_->Pen(ScrollView::PINK);
321 leaders_win_->Rectangle(box.left(), box.bottom(), box.right(), box.top());
322 }
323#endif // !GRAPHICS_DISABLED
324 ColPartition::MakeBigPartition(bbox, big_part_list);
325 }
326 }
327}
const int kLineResiduePadRatio
const double kLineResidueAspectRatio
Definition: strokewidth.cpp:99
const double kLineResidueSizeRatio
static bool WithinTestRegion(int detail_level, int x, int y)
static ColPartition * MakeBigPartition(BLOBNBOX *box, ColPartition_LIST *big_part_list)
void Pen(Color color)
Definition: scrollview.cpp:710
void Rectangle(int x1, int y1, int x2, int y2)
Definition: scrollview.cpp:576

◆ SetNeighboursOnMediumBlobs()

void tesseract::StrokeWidth::SetNeighboursOnMediumBlobs ( TO_BLOCK block)

Definition at line 144 of file strokewidth.cpp.

144 {
145 // Run a preliminary strokewidth neighbour detection on the medium blobs.
146 InsertBlobList(&block->blobs);
147 BLOBNBOX_IT blob_it(&block->blobs);
148 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
149 SetNeighbours(false, false, blob_it.data());
150 }
151 Clear();
152}

◆ TestVerticalTextDirection()

bool tesseract::StrokeWidth::TestVerticalTextDirection ( double  find_vertical_text_ratio,
TO_BLOCK block,
BLOBNBOX_CLIST *  osd_blobs 
)

Definition at line 211 of file strokewidth.cpp.

212 {
213 int vertical_boxes = 0;
214 int horizontal_boxes = 0;
215 // Count vertical normal and large blobs.
216 BLOBNBOX_CLIST vertical_blobs;
217 BLOBNBOX_CLIST horizontal_blobs;
218 BLOBNBOX_CLIST nondescript_blobs;
219 CollectHorizVertBlobs(&block->blobs, &vertical_boxes, &horizontal_boxes, &vertical_blobs,
220 &horizontal_blobs, &nondescript_blobs);
221 CollectHorizVertBlobs(&block->large_blobs, &vertical_boxes, &horizontal_boxes, &vertical_blobs,
222 &horizontal_blobs, &nondescript_blobs);
224 tprintf("TextDir hbox=%d vs vbox=%d, %dH, %dV, %dN osd blobs\n", horizontal_boxes,
225 vertical_boxes, horizontal_blobs.length(), vertical_blobs.length(),
226 nondescript_blobs.length());
227 }
228 if (osd_blobs != nullptr && vertical_boxes == 0 && horizontal_boxes == 0) {
229 // Only nondescript blobs available, so return those.
230 BLOBNBOX_C_IT osd_it(osd_blobs);
231 osd_it.add_list_after(&nondescript_blobs);
232 return false;
233 }
234 int min_vert_boxes =
235 static_cast<int>((vertical_boxes + horizontal_boxes) * find_vertical_text_ratio);
236 if (vertical_boxes >= min_vert_boxes) {
237 if (osd_blobs != nullptr) {
238 BLOBNBOX_C_IT osd_it(osd_blobs);
239 osd_it.add_list_after(&vertical_blobs);
240 }
241 return true;
242 } else {
243 if (osd_blobs != nullptr) {
244 BLOBNBOX_C_IT osd_it(osd_blobs);
245 osd_it.add_list_after(&horizontal_blobs);
246 }
247 return false;
248 }
249}
int textord_debug_tabfind
Definition: alignedblob.cpp:29

The documentation for this class was generated from the following files: