tesseract  4.00.00dev
ocrblock.h File Reference
#include "ocrpara.h"
#include "ocrrow.h"
#include "pdblock.h"

Go to the source code of this file.

Classes

class  BLOCK
 

Functions

int decreasing_top_order (const void *row1, const void *row2)
 
void PrintSegmentationStats (BLOCK_LIST *block_list)
 
void ExtractBlobsFromSegmentation (BLOCK_LIST *blocks, C_BLOB_LIST *output_blob_list)
 
void RefreshWordBlobsFromNewBlobs (BLOCK_LIST *block_list, C_BLOB_LIST *new_blobs, C_BLOB_LIST *not_found_blobs)
 

Function Documentation

◆ decreasing_top_order()

int decreasing_top_order ( const void *  row1,
const void *  row2 
)

decreasing_top_order

Sort Comparator: Return <0 if row1 top < row2 top

Definition at line 72 of file ocrblock.cpp.

74  {
75  return (*(ROW **) row2)->bounding_box ().top () -
76  (*(ROW **) row1)->bounding_box ().top ();
77 }
Definition: ocrrow.h:32

◆ ExtractBlobsFromSegmentation()

void ExtractBlobsFromSegmentation ( BLOCK_LIST *  blocks,
C_BLOB_LIST *  output_blob_list 
)

Definition at line 445 of file ocrblock.cpp.

446  {
447  C_BLOB_IT return_list_it(output_blob_list);
448  BLOCK_IT block_it(blocks);
449  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
450  BLOCK* block = block_it.data();
451  ROW_IT row_it(block->row_list());
452  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
453  ROW* row = row_it.data();
454  // Iterate over all werds in the row.
455  WERD_IT werd_it(row->word_list());
456  for (werd_it.mark_cycle_pt(); !werd_it.cycled_list(); werd_it.forward()) {
457  WERD* werd = werd_it.data();
458  return_list_it.move_to_last();
459  return_list_it.add_list_after(werd->cblob_list());
460  return_list_it.move_to_last();
461  return_list_it.add_list_after(werd->rej_cblob_list());
462  }
463  }
464  }
465 }
C_BLOB_LIST * cblob_list()
Definition: werd.h:100
ROW_LIST * row_list()
get rows
Definition: ocrblock.h:120
C_BLOB_LIST * rej_cblob_list()
Definition: werd.h:95
Definition: ocrrow.h:32
Definition: ocrblock.h:30
Definition: werd.h:60
WERD_LIST * word_list()
Definition: ocrrow.h:52

◆ PrintSegmentationStats()

void PrintSegmentationStats ( BLOCK_LIST *  block_list)

Definition at line 412 of file ocrblock.cpp.

412  {
413  int num_blocks = 0;
414  int num_rows = 0;
415  int num_words = 0;
416  int num_blobs = 0;
417  BLOCK_IT block_it(block_list);
418  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
419  BLOCK* block = block_it.data();
420  ++num_blocks;
421  ROW_IT row_it(block->row_list());
422  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
423  ++num_rows;
424  ROW* row = row_it.data();
425  // Iterate over all werds in the row.
426  WERD_IT werd_it(row->word_list());
427  for (werd_it.mark_cycle_pt(); !werd_it.cycled_list(); werd_it.forward()) {
428  WERD* werd = werd_it.data();
429  ++num_words;
430  num_blobs += werd->cblob_list()->length();
431  }
432  }
433  }
434  tprintf("Block list stats:\nBlocks = %d\nRows = %d\nWords = %d\nBlobs = %d\n",
435  num_blocks, num_rows, num_words, num_blobs);
436 }
C_BLOB_LIST * cblob_list()
Definition: werd.h:100
ROW_LIST * row_list()
get rows
Definition: ocrblock.h:120
Definition: ocrrow.h:32
Definition: ocrblock.h:30
#define tprintf(...)
Definition: tprintf.h:31
Definition: werd.h:60
WERD_LIST * word_list()
Definition: ocrrow.h:52

◆ RefreshWordBlobsFromNewBlobs()

void RefreshWordBlobsFromNewBlobs ( BLOCK_LIST *  block_list,
C_BLOB_LIST *  new_blobs,
C_BLOB_LIST *  not_found_blobs 
)

Definition at line 480 of file ocrblock.cpp.

482  {
483  // Now iterate over all the blobs in the segmentation_block_list_, and just
484  // replace the corresponding c-blobs inside the werds.
485  BLOCK_IT block_it(block_list);
486  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
487  BLOCK* block = block_it.data();
488  if (block->poly_block() != NULL && !block->poly_block()->IsText())
489  continue; // Don't touch non-text blocks.
490  // Iterate over all rows in the block.
491  ROW_IT row_it(block->row_list());
492  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
493  ROW* row = row_it.data();
494  // Iterate over all werds in the row.
495  WERD_IT werd_it(row->word_list());
496  WERD_LIST new_words;
497  WERD_IT new_words_it(&new_words);
498  for (werd_it.mark_cycle_pt(); !werd_it.cycled_list(); werd_it.forward()) {
499  WERD* werd = werd_it.extract();
500  WERD* new_werd = werd->ConstructWerdWithNewBlobs(new_blobs,
501  not_found_blobs);
502  if (new_werd) {
503  // Insert this new werd into the actual row's werd-list. Remove the
504  // existing one.
505  new_words_it.add_after_then_move(new_werd);
506  delete werd;
507  } else {
508  // Reinsert the older word back, for lack of better options.
509  // This is critical since dropping the words messes up segmentation:
510  // eg. 1st word in the row might otherwise have W_FUZZY_NON turned on.
511  new_words_it.add_after_then_move(werd);
512  }
513  }
514  // Get rid of the old word list & replace it with the new one.
515  row->word_list()->clear();
516  werd_it.move_to_first();
517  werd_it.add_list_after(&new_words);
518  }
519  }
520 }
ROW_LIST * row_list()
get rows
Definition: ocrblock.h:120
Definition: ocrrow.h:32
Definition: ocrblock.h:30
bool IsText() const
Definition: polyblk.h:52
POLY_BLOCK * poly_block() const
Definition: pdblock.h:55
WERD * ConstructWerdWithNewBlobs(C_BLOB_LIST *all_blobs, C_BLOB_LIST *orphan_blobs)
Definition: werd.cpp:412
Definition: werd.h:60
WERD_LIST * word_list()
Definition: ocrrow.h:52