All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
WERD_RES Class Reference

#include <pageres.h>

Inheritance diagram for WERD_RES:
ELIST_LINK

Public Member Functions

 WERD_RES ()
 
 WERD_RES (WERD *the_word)
 
 WERD_RES (const WERD_RES &source)
 
 ~WERD_RES ()
 
const char *const BestUTF8 (int blob_index, bool in_rtl_context) const
 
const char *const RawUTF8 (int blob_index) const
 
UNICHARSET::Direction SymbolDirection (int blob_index) const
 
bool AnyRtlCharsInWord () const
 
bool AnyLtrCharsInWord () const
 
bool UnicharsInReadingOrder () const
 
void InitNonPointers ()
 
void InitPointers ()
 
void Clear ()
 
void ClearResults ()
 
void ClearWordChoices ()
 
void ClearRatings ()
 
WERD_RESoperator= (const WERD_RES &source)
 
void CopySimpleFields (const WERD_RES &source)
 
void InitForRetryRecognition (const WERD_RES &source)
 
bool SetupForRecognition (const UNICHARSET &unicharset_in, tesseract::Tesseract *tesseract, Pix *pix, int norm_mode, const TBOX *norm_box, bool numeric_mode, bool use_body_size, bool allow_detailed_fx, ROW *row, const BLOCK *block)
 
void SetupBasicsFromChoppedWord (const UNICHARSET &unicharset_in)
 
void SetupFake (const UNICHARSET &uch)
 
void SetupWordScript (const UNICHARSET &unicharset_in)
 
void SetupBlamerBundle ()
 
void SetupBlobWidthsAndGaps ()
 
void InsertSeam (int blob_number, SEAM *seam)
 
bool AlternativeChoiceAdjustmentsWorseThan (float threshold) const
 
bool IsAmbiguous ()
 
bool StatesAllValid ()
 
void DebugWordChoices (bool debug, const char *word_to_debug)
 
void DebugTopChoice (const char *msg) const
 
void FilterWordChoices (int debug_level)
 
void ComputeAdaptionThresholds (float certainty_scale, float min_rating, float max_rating, float rating_margin, float *thresholds)
 
bool LogNewRawChoice (WERD_CHOICE *word_choice)
 
bool LogNewCookedChoice (int max_num_choices, bool debug, WERD_CHOICE *word_choice)
 
void PrintBestChoices () const
 
int GetBlobsWidth (int start_blob, int last_blob)
 
int GetBlobsGap (int blob_index)
 
BLOB_CHOICEGetBlobChoice (int index) const
 
BLOB_CHOICE_LIST * GetBlobChoices (int index) const
 
void ConsumeWordResults (WERD_RES *word)
 
void ReplaceBestChoice (WERD_CHOICE *choice)
 
void RebuildBestState ()
 
void CloneChoppedToRebuild ()
 
void SetupBoxWord ()
 
void SetScriptPositions ()
 
void SetAllScriptPositions (tesseract::ScriptPos position)
 
void FakeClassifyWord (int blob_count, BLOB_CHOICE **choices)
 
void FakeWordFromRatings ()
 
void BestChoiceToCorrectText ()
 
bool ConditionalBlobMerge (TessResultCallback2< UNICHAR_ID, UNICHAR_ID, UNICHAR_ID > *class_cb, TessResultCallback2< bool, const TBOX &, const TBOX & > *box_cb)
 
void MergeAdjacentBlobs (int index)
 
UNICHAR_ID BothQuotes (UNICHAR_ID id1, UNICHAR_ID id2)
 
void fix_quotes ()
 
UNICHAR_ID BothHyphens (UNICHAR_ID id1, UNICHAR_ID id2)
 
bool HyphenBoxesOverlap (const TBOX &box1, const TBOX &box2)
 
void fix_hyphens ()
 
UNICHAR_ID BothSpaces (UNICHAR_ID id1, UNICHAR_ID id2)
 
void merge_tess_fails ()
 
void copy_on (WERD_RES *word_res)
 
bool PiecesAllNatural (int start, int count) const
 
- Public Member Functions inherited from ELIST_LINK
 ELIST_LINK ()
 
 ELIST_LINK (const ELIST_LINK &)
 
void operator= (const ELIST_LINK &)
 

Static Public Member Functions

static WERD_RESdeep_copy (const WERD_RES *src)
 

Public Attributes

WERDword
 
tesseract::BoxWordbln_boxes
 
ROWblob_row
 
DENORM denorm
 
const UNICHARSETuch_set
 
TWERDchopped_word
 
GenericVector< SEAM * > seam_array
 
GenericVector< int > blob_widths
 
GenericVector< int > blob_gaps
 
MATRIXratings
 
WERD_CHOICEbest_choice
 
WERD_CHOICEraw_choice
 
WERD_CHOICE_LIST best_choices
 
BlamerBundleblamer_bundle
 
TWERDrebuild_word
 
tesseract::BoxWordbox_word
 
GenericVector< int > best_state
 
GenericVector< STRINGcorrect_text
 
tesseract::Tesseracttesseract
 
WERD_CHOICEep_choice
 
REJMAP reject_map
 
BOOL8 tess_failed
 
BOOL8 tess_accepted
 
BOOL8 tess_would_adapt
 
BOOL8 done
 
bool small_caps
 
bool odd_size
 
inT8 italic
 
inT8 bold
 
const FontInfofontinfo
 
const FontInfofontinfo2
 
inT8 fontinfo_id_count
 
inT8 fontinfo_id2_count
 
BOOL8 guessed_x_ht
 
BOOL8 guessed_caps_ht
 
CRUNCH_MODE unlv_crunch_mode
 
float x_height
 
float caps_height
 
float baseline_shift
 
BOOL8 combination
 
BOOL8 part_of_combo
 
BOOL8 reject_spaces
 

Detailed Description

Definition at line 155 of file pageres.h.

Constructor & Destructor Documentation

WERD_RES::WERD_RES ( )
inline

Definition at line 319 of file pageres.h.

319  {
320  InitNonPointers();
321  InitPointers();
322  }
void InitPointers()
Definition: pageres.cpp:1115
void InitNonPointers()
Definition: pageres.cpp:1088
WERD_RES::WERD_RES ( WERD the_word)
inline

Definition at line 323 of file pageres.h.

323  {
324  InitNonPointers();
325  InitPointers();
326  word = the_word;
327  }
void InitPointers()
Definition: pageres.cpp:1115
void InitNonPointers()
Definition: pageres.cpp:1088
WERD * word
Definition: pageres.h:175
WERD_RES::WERD_RES ( const WERD_RES source)
inline

Definition at line 330 of file pageres.h.

330  {
331  InitPointers();
332  *this = source; // see operator=
333  }
void InitPointers()
Definition: pageres.cpp:1115
WERD_RES::~WERD_RES ( )

Definition at line 1084 of file pageres.cpp.

1084  {
1085  Clear();
1086 }
void Clear()
Definition: pageres.cpp:1130

Member Function Documentation

bool WERD_RES::AlternativeChoiceAdjustmentsWorseThan ( float  threshold) const

Definition at line 430 of file pageres.cpp.

430  {
431  // The choices are not changed by this iteration.
432  WERD_CHOICE_IT wc_it(const_cast<WERD_CHOICE_LIST*>(&best_choices));
433  for (wc_it.forward(); !wc_it.at_first(); wc_it.forward()) {
434  WERD_CHOICE* choice = wc_it.data();
435  if (choice->adjust_factor() <= threshold)
436  return false;
437  }
438  return true;
439 }
WERD_CHOICE_LIST best_choices
Definition: pageres.h:227
float adjust_factor() const
Definition: ratngs.h:303
bool WERD_RES::AnyLtrCharsInWord ( ) const
inline

Definition at line 389 of file pageres.h.

389  {
390  if (uch_set == NULL || best_choice == NULL || best_choice->length() < 1)
391  return false;
392  for (int id = 0; id < best_choice->length(); id++) {
393  int unichar_id = best_choice->unichar_id(id);
394  if (unichar_id < 0 || unichar_id >= uch_set->size())
395  continue; // Ignore illegal chars.
396  UNICHARSET::Direction dir = uch_set->get_direction(unichar_id);
397  if (dir == UNICHARSET::U_LEFT_TO_RIGHT)
398  return true;
399  }
400  return false;
401  }
int length() const
Definition: ratngs.h:300
WERD_CHOICE * best_choice
Definition: pageres.h:219
const UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:312
const UNICHARSET * uch_set
Definition: pageres.h:192
Direction get_direction(UNICHAR_ID unichar_id) const
Definition: unicharset.h:638
#define NULL
Definition: host.h:144
int size() const
Definition: unicharset.h:297
bool WERD_RES::AnyRtlCharsInWord ( ) const
inline

Definition at line 372 of file pageres.h.

372  {
373  if (uch_set == NULL || best_choice == NULL || best_choice->length() < 1)
374  return false;
375  for (int id = 0; id < best_choice->length(); id++) {
376  int unichar_id = best_choice->unichar_id(id);
377  if (unichar_id < 0 || unichar_id >= uch_set->size())
378  continue; // Ignore illegal chars.
380  uch_set->get_direction(unichar_id);
381  if (dir == UNICHARSET::U_RIGHT_TO_LEFT ||
384  return true;
385  }
386  return false;
387  }
int length() const
Definition: ratngs.h:300
WERD_CHOICE * best_choice
Definition: pageres.h:219
const UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:312
const UNICHARSET * uch_set
Definition: pageres.h:192
Direction get_direction(UNICHAR_ID unichar_id) const
Definition: unicharset.h:638
#define NULL
Definition: host.h:144
int size() const
Definition: unicharset.h:297
void WERD_RES::BestChoiceToCorrectText ( )

Definition at line 917 of file pageres.cpp.

917  {
920  for (int i = 0; i < best_choice->length(); ++i) {
921  UNICHAR_ID choice_id = best_choice->unichar_id(i);
922  const char* blob_choice = uch_set->id_to_unichar(choice_id);
923  correct_text.push_back(STRING(blob_choice));
924  }
925 }
int length() const
Definition: ratngs.h:300
WERD_CHOICE * best_choice
Definition: pageres.h:219
int push_back(T object)
GenericVector< STRING > correct_text
Definition: pageres.h:259
#define ASSERT_HOST(x)
Definition: errcode.h:84
const UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:312
const char *const id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:266
const UNICHARSET * uch_set
Definition: pageres.h:192
int UNICHAR_ID
Definition: unichar.h:33
Definition: strngs.h:44
#define NULL
Definition: host.h:144
const char* const WERD_RES::BestUTF8 ( int  blob_index,
bool  in_rtl_context 
) const
inline

Definition at line 342 of file pageres.h.

342  {
343  if (blob_index < 0 || best_choice == NULL ||
344  blob_index >= best_choice->length())
345  return NULL;
346  UNICHAR_ID id = best_choice->unichar_id(blob_index);
347  if (id < 0 || id >= uch_set->size() || id == INVALID_UNICHAR_ID)
348  return NULL;
349  UNICHAR_ID mirrored = uch_set->get_mirror(id);
350  if (in_rtl_context && mirrored > 0 && mirrored != INVALID_UNICHAR_ID)
351  id = mirrored;
352  return uch_set->id_to_unichar_ext(id);
353  }
int length() const
Definition: ratngs.h:300
WERD_CHOICE * best_choice
Definition: pageres.h:219
UNICHAR_ID get_mirror(UNICHAR_ID unichar_id) const
Definition: unicharset.h:645
const UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:312
const UNICHARSET * uch_set
Definition: pageres.h:192
int UNICHAR_ID
Definition: unichar.h:33
#define NULL
Definition: host.h:144
const char *const id_to_unichar_ext(UNICHAR_ID id) const
Definition: unicharset.cpp:274
int size() const
Definition: unicharset.h:297
UNICHAR_ID WERD_RES::BothHyphens ( UNICHAR_ID  id1,
UNICHAR_ID  id2 
)

Definition at line 1024 of file pageres.cpp.

1024  {
1025  const char *ch = uch_set->id_to_unichar(id1);
1026  const char *next_ch = uch_set->id_to_unichar(id2);
1027  if (strlen(ch) == 1 && strlen(next_ch) == 1 &&
1028  (*ch == '-' || *ch == '~') && (*next_ch == '-' || *next_ch == '~'))
1029  return uch_set->unichar_to_id("-");
1030  return INVALID_UNICHAR_ID;
1031 }
const UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:194
const char *const id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:266
const UNICHARSET * uch_set
Definition: pageres.h:192
UNICHAR_ID WERD_RES::BothQuotes ( UNICHAR_ID  id1,
UNICHAR_ID  id2 
)

Definition at line 1002 of file pageres.cpp.

1002  {
1003  const char *ch = uch_set->id_to_unichar(id1);
1004  const char *next_ch = uch_set->id_to_unichar(id2);
1005  if (is_simple_quote(ch, strlen(ch)) &&
1006  is_simple_quote(next_ch, strlen(next_ch)))
1007  return uch_set->unichar_to_id("\"");
1008  return INVALID_UNICHAR_ID;
1009 }
const UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:194
const char *const id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:266
const UNICHARSET * uch_set
Definition: pageres.h:192
UNICHAR_ID WERD_RES::BothSpaces ( UNICHAR_ID  id1,
UNICHAR_ID  id2 
)

Definition at line 1053 of file pageres.cpp.

1053  {
1054  if (id1 == id2 && id1 == uch_set->unichar_to_id(" "))
1055  return id1;
1056  else
1057  return INVALID_UNICHAR_ID;
1058 }
const UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:194
const UNICHARSET * uch_set
Definition: pageres.h:192
void WERD_RES::Clear ( )

Definition at line 1130 of file pageres.cpp.

1130  {
1131  if (word != NULL && combination) {
1132  delete word;
1133  }
1134  word = NULL;
1135  delete blamer_bundle;
1136  blamer_bundle = NULL;
1137  ClearResults();
1138 }
void ClearResults()
Definition: pageres.cpp:1140
BOOL8 combination
Definition: pageres.h:315
WERD * word
Definition: pageres.h:175
#define NULL
Definition: host.h:144
BlamerBundle * blamer_bundle
Definition: pageres.h:230
void WERD_RES::ClearRatings ( )

Definition at line 1185 of file pageres.cpp.

1185  {
1186  if (ratings != NULL) {
1188  delete ratings;
1189  ratings = NULL;
1190  }
1191 }
MATRIX * ratings
Definition: pageres.h:215
void delete_matrix_pointers()
Definition: matrix.h:191
#define NULL
Definition: host.h:144
void WERD_RES::ClearResults ( )

Definition at line 1140 of file pageres.cpp.

1140  {
1141  done = false;
1142  fontinfo = NULL;
1143  fontinfo2 = NULL;
1144  fontinfo_id_count = 0;
1145  fontinfo_id2_count = 0;
1146  if (bln_boxes != NULL) {
1147  delete bln_boxes;
1148  bln_boxes = NULL;
1149  }
1150  blob_row = NULL;
1151  if (chopped_word != NULL) {
1152  delete chopped_word;
1153  chopped_word = NULL;
1154  }
1155  if (rebuild_word != NULL) {
1156  delete rebuild_word;
1157  rebuild_word = NULL;
1158  }
1159  if (box_word != NULL) {
1160  delete box_word;
1161  box_word = NULL;
1162  }
1163  best_state.clear();
1164  correct_text.clear();
1166  seam_array.clear();
1167  blob_widths.clear();
1168  blob_gaps.clear();
1169  ClearRatings();
1170  ClearWordChoices();
1172 }
tesseract::BoxWord * box_word
Definition: pageres.h:250
void ClearWordChoices()
Definition: pageres.cpp:1173
TWERD * chopped_word
Definition: pageres.h:201
GenericVector< STRING > correct_text
Definition: pageres.h:259
const FontInfo * fontinfo
Definition: pageres.h:288
inT8 fontinfo_id_count
Definition: pageres.h:290
TWERD * rebuild_word
Definition: pageres.h:244
void delete_data_pointers()
GenericVector< int > blob_gaps
Definition: pageres.h:208
GenericVector< SEAM * > seam_array
Definition: pageres.h:203
const FontInfo * fontinfo2
Definition: pageres.h:289
BOOL8 done
Definition: pageres.h:282
inT8 fontinfo_id2_count
Definition: pageres.h:291
ROW * blob_row
Definition: pageres.h:186
tesseract::BoxWord * bln_boxes
Definition: pageres.h:184
GenericVector< int > best_state
Definition: pageres.h:255
#define NULL
Definition: host.h:144
GenericVector< int > blob_widths
Definition: pageres.h:205
void ClearResults()
Definition: blamer.h:173
BlamerBundle * blamer_bundle
Definition: pageres.h:230
void ClearRatings()
Definition: pageres.cpp:1185
void WERD_RES::ClearWordChoices ( )

Definition at line 1173 of file pageres.cpp.

1173  {
1174  best_choice = NULL;
1175  if (raw_choice != NULL) {
1176  delete raw_choice;
1177  raw_choice = NULL;
1178  }
1179  best_choices.clear();
1180  if (ep_choice != NULL) {
1181  delete ep_choice;
1182  ep_choice = NULL;
1183  }
1184 }
WERD_CHOICE_LIST best_choices
Definition: pageres.h:227
WERD_CHOICE * best_choice
Definition: pageres.h:219
WERD_CHOICE * raw_choice
Definition: pageres.h:224
WERD_CHOICE * ep_choice
Definition: pageres.h:270
#define NULL
Definition: host.h:144
void WERD_RES::CloneChoppedToRebuild ( )

Definition at line 828 of file pageres.cpp.

828  {
829  if (rebuild_word != NULL)
830  delete rebuild_word;
832  SetupBoxWord();
833  int word_len = box_word->length();
834  best_state.reserve(word_len);
835  correct_text.reserve(word_len);
836  for (int i = 0; i < word_len; ++i) {
839  }
840 }
tesseract::BoxWord * box_word
Definition: pageres.h:250
int push_back(T object)
TWERD * chopped_word
Definition: pageres.h:201
GenericVector< STRING > correct_text
Definition: pageres.h:259
TWERD * rebuild_word
Definition: pageres.h:244
const int length() const
Definition: boxword.h:85
void reserve(int size)
void SetupBoxWord()
Definition: pageres.cpp:843
Definition: strngs.h:44
GenericVector< int > best_state
Definition: pageres.h:255
#define NULL
Definition: host.h:144
Definition: blobs.h:395
void WERD_RES::ComputeAdaptionThresholds ( float  certainty_scale,
float  min_rating,
float  max_rating,
float  rating_margin,
float *  thresholds 
)

Definition at line 553 of file pageres.cpp.

557  {
558  int chunk = 0;
559  int end_chunk = best_choice->state(0);
560  int end_raw_chunk = raw_choice->state(0);
561  int raw_blob = 0;
562  for (int i = 0; i < best_choice->length(); i++, thresholds++) {
563  float avg_rating = 0.0f;
564  int num_error_chunks = 0;
565 
566  // For each chunk in best choice blob i, count non-matching raw results.
567  while (chunk < end_chunk) {
568  if (chunk >= end_raw_chunk) {
569  ++raw_blob;
570  end_raw_chunk += raw_choice->state(raw_blob);
571  }
572  if (best_choice->unichar_id(i) !=
573  raw_choice->unichar_id(raw_blob)) {
574  avg_rating += raw_choice->certainty(raw_blob);
575  ++num_error_chunks;
576  }
577  ++chunk;
578  }
579 
580  if (num_error_chunks > 0) {
581  avg_rating /= num_error_chunks;
582  *thresholds = (avg_rating / -certainty_scale) * (1.0 - rating_margin);
583  } else {
584  *thresholds = max_rating;
585  }
586 
587  if (*thresholds > max_rating)
588  *thresholds = max_rating;
589  if (*thresholds < min_rating)
590  *thresholds = min_rating;
591  }
592 }
int length() const
Definition: ratngs.h:300
WERD_CHOICE * best_choice
Definition: pageres.h:219
int state(int index) const
Definition: ratngs.h:316
float certainty() const
Definition: ratngs.h:327
const UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:312
WERD_CHOICE * raw_choice
Definition: pageres.h:224
bool WERD_RES::ConditionalBlobMerge ( TessResultCallback2< UNICHAR_ID, UNICHAR_ID, UNICHAR_ID > *  class_cb,
TessResultCallback2< bool, const TBOX &, const TBOX & > *  box_cb 
)

Definition at line 932 of file pageres.cpp.

934  {
935  ASSERT_HOST(best_choice->length() == 0 || ratings != NULL);
936  bool modified = false;
937  for (int i = 0; i + 1 < best_choice->length(); ++i) {
938  UNICHAR_ID new_id = class_cb->Run(best_choice->unichar_id(i),
939  best_choice->unichar_id(i+1));
940  if (new_id != INVALID_UNICHAR_ID &&
941  (box_cb == NULL || box_cb->Run(box_word->BlobBox(i),
942  box_word->BlobBox(i + 1)))) {
943  // Raw choice should not be fixed.
944  best_choice->set_unichar_id(new_id, i);
945  modified = true;
947  const MATRIX_COORD& coord = best_choice->MatrixCoord(i);
948  if (!coord.Valid(*ratings)) {
949  ratings->IncreaseBandSize(coord.row + 1 - coord.col);
950  }
951  BLOB_CHOICE_LIST* blob_choices = GetBlobChoices(i);
952  if (FindMatchingChoice(new_id, blob_choices) == NULL) {
953  // Insert a fake result.
954  BLOB_CHOICE* blob_choice = new BLOB_CHOICE;
955  blob_choice->set_unichar_id(new_id);
956  BLOB_CHOICE_IT bc_it(blob_choices);
957  bc_it.add_before_then_move(blob_choice);
958  }
959  }
960  }
961  delete class_cb;
962  delete box_cb;
963  return modified;
964 }
void set_unichar_id(UNICHAR_ID unichar_id, int index)
Definition: ratngs.h:356
virtual R Run(A1, A2)=0
tesseract::BoxWord * box_word
Definition: pageres.h:250
MATRIX * ratings
Definition: pageres.h:215
int length() const
Definition: ratngs.h:300
WERD_CHOICE * best_choice
Definition: pageres.h:219
const TBOX & BlobBox(int index) const
Definition: boxword.h:88
BLOB_CHOICE * FindMatchingChoice(UNICHAR_ID char_id, BLOB_CHOICE_LIST *bc_list)
Definition: ratngs.cpp:160
#define ASSERT_HOST(x)
Definition: errcode.h:84
MATRIX_COORD MatrixCoord(int index) const
Definition: ratngs.cpp:280
const UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:312
bool Valid(const MATRIX &m) const
Definition: matrix.h:327
int UNICHAR_ID
Definition: unichar.h:33
void MergeAdjacentBlobs(int index)
Definition: pageres.cpp:968
#define NULL
Definition: host.h:144
BLOB_CHOICE_LIST * GetBlobChoices(int index) const
Definition: pageres.cpp:751
void set_unichar_id(UNICHAR_ID newunichar_id)
Definition: ratngs.h:144
void IncreaseBandSize(int bandwidth)
Definition: matrix.cpp:49
void WERD_RES::ConsumeWordResults ( WERD_RES word)

Definition at line 757 of file pageres.cpp.

757  {
758  denorm = word->denorm;
759  blob_row = word->blob_row;
760  MovePointerData(&chopped_word, &word->chopped_word);
761  MovePointerData(&rebuild_word, &word->rebuild_word);
762  MovePointerData(&box_word, &word->box_word);
764  seam_array = word->seam_array;
765  word->seam_array.clear();
766  best_state.move(&word->best_state);
768  blob_widths.move(&word->blob_widths);
769  blob_gaps.move(&word->blob_gaps);
771  MovePointerData(&ratings, &word->ratings);
772  best_choice = word->best_choice;
773  MovePointerData(&raw_choice, &word->raw_choice);
774  best_choices.clear();
775  WERD_CHOICE_IT wc_it(&best_choices);
776  wc_it.add_list_after(&word->best_choices);
777  reject_map = word->reject_map;
778  if (word->blamer_bundle != NULL) {
779  assert(blamer_bundle != NULL);
781  }
782  CopySimpleFields(*word);
783 }
WERD_CHOICE_LIST best_choices
Definition: pageres.h:227
tesseract::BoxWord * box_word
Definition: pageres.h:250
MATRIX * ratings
Definition: pageres.h:215
WERD_CHOICE * best_choice
Definition: pageres.h:219
REJMAP reject_map
Definition: pageres.h:271
TWERD * chopped_word
Definition: pageres.h:201
GenericVector< STRING > correct_text
Definition: pageres.h:259
void delete_matrix_pointers()
Definition: matrix.h:191
TWERD * rebuild_word
Definition: pageres.h:244
void delete_data_pointers()
DENORM denorm
Definition: pageres.h:190
GenericVector< int > blob_gaps
Definition: pageres.h:208
WERD_CHOICE * raw_choice
Definition: pageres.h:224
GenericVector< SEAM * > seam_array
Definition: pageres.h:203
ROW * blob_row
Definition: pageres.h:186
void CopyResults(const BlamerBundle &other)
Definition: blamer.h:194
void move(GenericVector< T > *from)
GenericVector< int > best_state
Definition: pageres.h:255
#define NULL
Definition: host.h:144
GenericVector< int > blob_widths
Definition: pageres.h:205
BlamerBundle * blamer_bundle
Definition: pageres.h:230
void CopySimpleFields(const WERD_RES &source)
Definition: pageres.cpp:241
void WERD_RES::copy_on ( WERD_RES word_res)
inline

Definition at line 641 of file pageres.h.

641  { //from this word
642  word->set_flag(W_BOL, word->flag(W_BOL) || word_res->word->flag(W_BOL));
643  word->set_flag(W_EOL, word->flag(W_EOL) || word_res->word->flag(W_EOL));
644  word->copy_on(word_res->word);
645  }
Definition: werd.h:35
Definition: werd.h:36
WERD * word
Definition: pageres.h:175
BOOL8 flag(WERD_FLAGS mask) const
Definition: werd.h:128
void set_flag(WERD_FLAGS mask, BOOL8 value)
Definition: werd.h:129
void copy_on(WERD *other)
Definition: werd.cpp:234
void WERD_RES::CopySimpleFields ( const WERD_RES source)

Definition at line 241 of file pageres.cpp.

241  {
242  tess_failed = source.tess_failed;
243  tess_accepted = source.tess_accepted;
245  done = source.done;
247  small_caps = source.small_caps;
248  odd_size = source.odd_size;
249  italic = source.italic;
250  bold = source.bold;
251  fontinfo = source.fontinfo;
252  fontinfo2 = source.fontinfo2;
255  x_height = source.x_height;
256  caps_height = source.caps_height;
258  guessed_x_ht = source.guessed_x_ht;
260  reject_spaces = source.reject_spaces;
261  uch_set = source.uch_set;
262  tesseract = source.tesseract;
263 }
BOOL8 tess_accepted
Definition: pageres.h:280
inT8 bold
Definition: pageres.h:286
BOOL8 reject_spaces
Definition: pageres.h:317
float caps_height
Definition: pageres.h:296
const FontInfo * fontinfo
Definition: pageres.h:288
bool small_caps
Definition: pageres.h:283
float x_height
Definition: pageres.h:295
BOOL8 tess_would_adapt
Definition: pageres.h:281
inT8 fontinfo_id_count
Definition: pageres.h:290
const UNICHARSET * uch_set
Definition: pageres.h:192
bool odd_size
Definition: pageres.h:284
tesseract::Tesseract * tesseract
Definition: pageres.h:266
float baseline_shift
Definition: pageres.h:297
BOOL8 guessed_x_ht
Definition: pageres.h:292
const FontInfo * fontinfo2
Definition: pageres.h:289
BOOL8 done
Definition: pageres.h:282
inT8 fontinfo_id2_count
Definition: pageres.h:291
BOOL8 tess_failed
Definition: pageres.h:272
CRUNCH_MODE unlv_crunch_mode
Definition: pageres.h:294
inT8 italic
Definition: pageres.h:285
BOOL8 guessed_caps_ht
Definition: pageres.h:293
void WERD_RES::DebugTopChoice ( const char *  msg) const

Definition at line 490 of file pageres.cpp.

490  {
491  tprintf("Best choice: accepted=%d, adaptable=%d, done=%d : ",
493  if (best_choice == NULL)
494  tprintf("<Null choice>\n");
495  else
496  best_choice->print(msg);
497 }
BOOL8 tess_accepted
Definition: pageres.h:280
WERD_CHOICE * best_choice
Definition: pageres.h:219
#define tprintf(...)
Definition: tprintf.h:31
BOOL8 tess_would_adapt
Definition: pageres.h:281
BOOL8 done
Definition: pageres.h:282
void print() const
Definition: ratngs.h:563
#define NULL
Definition: host.h:144
void WERD_RES::DebugWordChoices ( bool  debug,
const char *  word_to_debug 
)

Definition at line 471 of file pageres.cpp.

471  {
472  if (debug ||
473  (word_to_debug != NULL && *word_to_debug != '\0' && best_choice != NULL &&
474  best_choice->unichar_string() == STRING(word_to_debug))) {
475  if (raw_choice != NULL)
476  raw_choice->print("\nBest Raw Choice");
477 
478  WERD_CHOICE_IT it(&best_choices);
479  int index = 0;
480  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward(), ++index) {
481  WERD_CHOICE* choice = it.data();
482  STRING label;
483  label.add_str_int("\nCooked Choice #", index);
484  choice->print(label.string());
485  }
486  }
487 }
WERD_CHOICE_LIST best_choices
Definition: pageres.h:227
WERD_CHOICE * best_choice
Definition: pageres.h:219
const STRING & unichar_string() const
Definition: ratngs.h:524
WERD_CHOICE * raw_choice
Definition: pageres.h:224
void add_str_int(const char *str, int number)
Definition: strngs.cpp:376
void print() const
Definition: ratngs.h:563
Definition: strngs.h:44
#define NULL
Definition: host.h:144
const char * string() const
Definition: strngs.cpp:193
static WERD_RES* WERD_RES::deep_copy ( const WERD_RES src)
inlinestatic

Definition at line 630 of file pageres.h.

630  {
631  WERD_RES* result = new WERD_RES(*src);
632  // That didn't copy the ratings, but we want a copy if there is one to
633  // begin width.
634  if (src->ratings != NULL)
635  result->ratings = src->ratings->DeepCopy();
636  return result;
637  }
MATRIX * ratings
Definition: pageres.h:215
MATRIX * DeepCopy() const
Definition: matrix.cpp:94
WERD_RES()
Definition: pageres.h:319
#define NULL
Definition: host.h:144
void WERD_RES::FakeClassifyWord ( int  blob_count,
BLOB_CHOICE **  choices 
)

Definition at line 872 of file pageres.cpp.

872  {
873  // Setup the WERD_RES.
875  ASSERT_HOST(blob_count == box_word->length());
877  ClearRatings();
878  ratings = new MATRIX(blob_count, 1);
879  for (int c = 0; c < blob_count; ++c) {
880  BLOB_CHOICE_LIST* choice_list = new BLOB_CHOICE_LIST;
881  BLOB_CHOICE_IT choice_it(choice_list);
882  choice_it.add_after_then_move(choices[c]);
883  ratings->put(c, c, choice_list);
884  }
886  reject_map.initialise(blob_count);
887  done = true;
888 }
tesseract::BoxWord * box_word
Definition: pageres.h:250
void ClearWordChoices()
Definition: pageres.cpp:1173
MATRIX * ratings
Definition: pageres.h:215
REJMAP reject_map
Definition: pageres.h:271
void put(int column, int row, const T &thing)
Definition: matrix.h:166
#define ASSERT_HOST(x)
Definition: errcode.h:84
BOOL8 done
Definition: pageres.h:282
const int length() const
Definition: boxword.h:85
Definition: matrix.h:289
void initialise(inT16 length)
Definition: rejctmap.cpp:318
#define NULL
Definition: host.h:144
void FakeWordFromRatings()
Definition: pageres.cpp:892
void ClearRatings()
Definition: pageres.cpp:1185
void WERD_RES::FakeWordFromRatings ( )

Definition at line 892 of file pageres.cpp.

892  {
893  int num_blobs = ratings->dimension();
894  WERD_CHOICE* word_choice = new WERD_CHOICE(uch_set, num_blobs);
895  word_choice->set_permuter(TOP_CHOICE_PERM);
896  for (int b = 0; b < num_blobs; ++b) {
897  UNICHAR_ID unichar_id = UNICHAR_SPACE;
898  float rating = MAX_INT32;
899  float certainty = -MAX_INT32;
900  BLOB_CHOICE_LIST* choices = ratings->get(b, b);
901  if (choices != NULL && !choices->empty()) {
902  BLOB_CHOICE_IT bc_it(choices);
903  BLOB_CHOICE* choice = bc_it.data();
904  unichar_id = choice->unichar_id();
905  rating = choice->rating();
906  certainty = choice->certainty();
907  }
908  word_choice->append_unichar_id_space_allocated(unichar_id, 1, rating,
909  certainty);
910  }
911  LogNewRawChoice(word_choice);
912  // Ownership of word_choice taken by word here.
913  LogNewCookedChoice(1, false, word_choice);
914 }
bool LogNewRawChoice(WERD_CHOICE *word_choice)
Definition: pageres.cpp:596
MATRIX * ratings
Definition: pageres.h:215
T get(int column, int row) const
Definition: matrix.h:171
bool LogNewCookedChoice(int max_num_choices, bool debug, WERD_CHOICE *word_choice)
Definition: pageres.cpp:612
void append_unichar_id_space_allocated(UNICHAR_ID unichar_id, int blob_count, float rating, float certainty)
Definition: ratngs.h:449
void set_permuter(uinT8 perm)
Definition: ratngs.h:372
int dimension() const
Definition: matrix.h:247
float rating() const
Definition: ratngs.h:79
const UNICHARSET * uch_set
Definition: pageres.h:192
int UNICHAR_ID
Definition: unichar.h:33
#define MAX_INT32
Definition: host.h:120
#define NULL
Definition: host.h:144
float certainty() const
Definition: ratngs.h:82
UNICHAR_ID unichar_id() const
Definition: ratngs.h:76
void WERD_RES::FilterWordChoices ( int  debug_level)

Definition at line 504 of file pageres.cpp.

504  {
505  if (best_choice == NULL || best_choices.singleton())
506  return;
507 
508  if (debug_level >= 2)
509  best_choice->print("\nFiltering against best choice");
510  WERD_CHOICE_IT it(&best_choices);
511  int index = 0;
512  for (it.forward(); !it.at_first(); it.forward(), ++index) {
513  WERD_CHOICE* choice = it.data();
514  float threshold = StopperAmbigThreshold(best_choice->adjust_factor(),
515  choice->adjust_factor());
516  // i, j index the blob choice in choice, best_choice.
517  // chunk is an index into the chopped_word blobs (AKA chunks).
518  // Since the two words may use different segmentations of the chunks, we
519  // iterate over the chunks to find out whether a comparable blob
520  // classification is much worse than the best result.
521  int i = 0, j = 0, chunk = 0;
522  // Each iteration of the while deals with 1 chunk. On entry choice_chunk
523  // and best_chunk are the indices of the first chunk in the NEXT blob,
524  // i.e. we don't have to increment i, j while chunk < choice_chunk and
525  // best_chunk respectively.
526  int choice_chunk = choice->state(0), best_chunk = best_choice->state(0);
527  while (i < choice->length() && j < best_choice->length()) {
528  if (choice->unichar_id(i) != best_choice->unichar_id(j) &&
529  choice->certainty(i) - best_choice->certainty(j) < threshold) {
530  if (debug_level >= 2) {
531  STRING label;
532  label.add_str_int("\nDiscarding bad choice #", index);
533  choice->print(label.string());
534  tprintf("i %d j %d Chunk %d Choice->Blob[i].Certainty %.4g"
535  " BestChoice->ChunkCertainty[Chunk] %g Threshold %g\n",
536  i, j, chunk, choice->certainty(i),
537  best_choice->certainty(j), threshold);
538  }
539  delete it.extract();
540  break;
541  }
542  ++chunk;
543  // If needed, advance choice_chunk to keep up with chunk.
544  while (choice_chunk < chunk && ++i < choice->length())
545  choice_chunk += choice->state(i);
546  // If needed, advance best_chunk to keep up with chunk.
547  while (best_chunk < chunk && ++j < best_choice->length())
548  best_chunk += best_choice->state(j);
549  }
550  }
551 }
WERD_CHOICE_LIST best_choices
Definition: pageres.h:227
WERD_CHOICE * best_choice
Definition: pageres.h:219
#define tprintf(...)
Definition: tprintf.h:31
int state(int index) const
Definition: ratngs.h:316
float certainty() const
Definition: ratngs.h:327
const UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:312
void add_str_int(const char *str, int number)
Definition: strngs.cpp:376
float adjust_factor() const
Definition: ratngs.h:303
void print() const
Definition: ratngs.h:563
Definition: strngs.h:44
#define NULL
Definition: host.h:144
const char * string() const
Definition: strngs.cpp:193
void WERD_RES::fix_hyphens ( )

Definition at line 1041 of file pageres.cpp.

1041  {
1042  if (!uch_set->contains_unichar("-") ||
1044  return; // Don't create it if it is disallowed.
1045 
1049 }
const UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:194
UNICHAR_ID BothHyphens(UNICHAR_ID id1, UNICHAR_ID id2)
Definition: pageres.cpp:1024
bool HyphenBoxesOverlap(const TBOX &box1, const TBOX &box2)
Definition: pageres.cpp:1035
const UNICHARSET * uch_set
Definition: pageres.h:192
_ConstTessMemberResultCallback_0_0< false, R, T1 >::base * NewPermanentTessCallback(const T1 *obj, R(T2::*member)() const)
Definition: tesscallback.h:116
bool ConditionalBlobMerge(TessResultCallback2< UNICHAR_ID, UNICHAR_ID, UNICHAR_ID > *class_cb, TessResultCallback2< bool, const TBOX &, const TBOX & > *box_cb)
Definition: pageres.cpp:932
bool get_enabled(UNICHAR_ID unichar_id) const
Definition: unicharset.h:826
bool contains_unichar(const char *const unichar_repr) const
Definition: unicharset.cpp:644
void WERD_RES::fix_quotes ( )

Definition at line 1012 of file pageres.cpp.

1012  {
1013  if (!uch_set->contains_unichar("\"") ||
1015  return; // Don't create it if it is disallowed.
1016 
1019  NULL);
1020 }
const UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:194
const UNICHARSET * uch_set
Definition: pageres.h:192
_ConstTessMemberResultCallback_0_0< false, R, T1 >::base * NewPermanentTessCallback(const T1 *obj, R(T2::*member)() const)
Definition: tesscallback.h:116
bool ConditionalBlobMerge(TessResultCallback2< UNICHAR_ID, UNICHAR_ID, UNICHAR_ID > *class_cb, TessResultCallback2< bool, const TBOX &, const TBOX & > *box_cb)
Definition: pageres.cpp:932
bool get_enabled(UNICHAR_ID unichar_id) const
Definition: unicharset.h:826
bool contains_unichar(const char *const unichar_repr) const
Definition: unicharset.cpp:644
#define NULL
Definition: host.h:144
UNICHAR_ID BothQuotes(UNICHAR_ID id1, UNICHAR_ID id2)
Definition: pageres.cpp:1002
BLOB_CHOICE * WERD_RES::GetBlobChoice ( int  index) const

Definition at line 742 of file pageres.cpp.

742  {
743  if (index < 0 || index >= best_choice->length()) return NULL;
744  BLOB_CHOICE_LIST* choices = GetBlobChoices(index);
745  return FindMatchingChoice(best_choice->unichar_id(index), choices);
746 }
int length() const
Definition: ratngs.h:300
WERD_CHOICE * best_choice
Definition: pageres.h:219
BLOB_CHOICE * FindMatchingChoice(UNICHAR_ID char_id, BLOB_CHOICE_LIST *bc_list)
Definition: ratngs.cpp:160
const UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:312
#define NULL
Definition: host.h:144
BLOB_CHOICE_LIST * GetBlobChoices(int index) const
Definition: pageres.cpp:751
BLOB_CHOICE_LIST * WERD_RES::GetBlobChoices ( int  index) const

Definition at line 751 of file pageres.cpp.

751  {
752  return best_choice->blob_choices(index, ratings);
753 }
BLOB_CHOICE_LIST * blob_choices(int index, MATRIX *ratings) const
Definition: ratngs.cpp:268
MATRIX * ratings
Definition: pageres.h:215
WERD_CHOICE * best_choice
Definition: pageres.h:219
int WERD_RES::GetBlobsGap ( int  blob_index)

Definition at line 732 of file pageres.cpp.

732  {
733  if (blob_index < 0 || blob_index >= blob_gaps.size())
734  return 0;
735  return blob_gaps[blob_index];
736 }
int size() const
Definition: genericvector.h:72
GenericVector< int > blob_gaps
Definition: pageres.h:208
int WERD_RES::GetBlobsWidth ( int  start_blob,
int  last_blob 
)

Definition at line 722 of file pageres.cpp.

722  {
723  int result = 0;
724  for (int b = start_blob; b <= last_blob; ++b) {
725  result += blob_widths[b];
726  if (b < last_blob)
727  result += blob_gaps[b];
728  }
729  return result;
730 }
GenericVector< int > blob_gaps
Definition: pageres.h:208
GenericVector< int > blob_widths
Definition: pageres.h:205
bool WERD_RES::HyphenBoxesOverlap ( const TBOX box1,
const TBOX box2 
)

Definition at line 1035 of file pageres.cpp.

1035  {
1036  return box1.right() >= box2.left();
1037 }
inT16 right() const
Definition: rect.h:75
inT16 left() const
Definition: rect.h:68
void WERD_RES::InitForRetryRecognition ( const WERD_RES source)

Definition at line 269 of file pageres.cpp.

269  {
270  word = source.word;
271  CopySimpleFields(source);
272  if (source.blamer_bundle != NULL) {
273  blamer_bundle = new BlamerBundle();
275  }
276 }
WERD * word
Definition: pageres.h:175
void CopyTruth(const BlamerBundle &other)
Definition: blamer.h:187
#define NULL
Definition: host.h:144
BlamerBundle * blamer_bundle
Definition: pageres.h:230
void CopySimpleFields(const WERD_RES &source)
Definition: pageres.cpp:241
void WERD_RES::InitNonPointers ( )

Definition at line 1088 of file pageres.cpp.

1088  {
1089  tess_failed = FALSE;
1090  tess_accepted = FALSE;
1092  done = FALSE;
1094  small_caps = false;
1095  odd_size = false;
1096  italic = FALSE;
1097  bold = FALSE;
1098  // The fontinfos and tesseract count as non-pointers as they point to
1099  // data owned elsewhere.
1100  fontinfo = NULL;
1101  fontinfo2 = NULL;
1102  tesseract = NULL;
1103  fontinfo_id_count = 0;
1104  fontinfo_id2_count = 0;
1105  x_height = 0.0;
1106  caps_height = 0.0;
1107  baseline_shift = 0.0f;
1108  guessed_x_ht = TRUE;
1110  combination = FALSE;
1111  part_of_combo = FALSE;
1112  reject_spaces = FALSE;
1113 }
BOOL8 tess_accepted
Definition: pageres.h:280
inT8 bold
Definition: pageres.h:286
BOOL8 reject_spaces
Definition: pageres.h:317
float caps_height
Definition: pageres.h:296
const FontInfo * fontinfo
Definition: pageres.h:288
bool small_caps
Definition: pageres.h:283
float x_height
Definition: pageres.h:295
BOOL8 tess_would_adapt
Definition: pageres.h:281
BOOL8 part_of_combo
Definition: pageres.h:316
BOOL8 combination
Definition: pageres.h:315
inT8 fontinfo_id_count
Definition: pageres.h:290
bool odd_size
Definition: pageres.h:284
float baseline_shift
Definition: pageres.h:297
BOOL8 guessed_x_ht
Definition: pageres.h:292
const FontInfo * fontinfo2
Definition: pageres.h:289
BOOL8 done
Definition: pageres.h:282
inT8 fontinfo_id2_count
Definition: pageres.h:291
#define FALSE
Definition: capi.h:29
BOOL8 tess_failed
Definition: pageres.h:272
#define TRUE
Definition: capi.h:28
CRUNCH_MODE unlv_crunch_mode
Definition: pageres.h:294
#define NULL
Definition: host.h:144
inT8 italic
Definition: pageres.h:285
BOOL8 guessed_caps_ht
Definition: pageres.h:293
void WERD_RES::InitPointers ( )

Definition at line 1115 of file pageres.cpp.

1115  {
1116  word = NULL;
1117  bln_boxes = NULL;
1118  blob_row = NULL;
1119  uch_set = NULL;
1120  chopped_word = NULL;
1121  rebuild_word = NULL;
1122  box_word = NULL;
1123  ratings = NULL;
1124  best_choice = NULL;
1125  raw_choice = NULL;
1126  ep_choice = NULL;
1127  blamer_bundle = NULL;
1128 }
tesseract::BoxWord * box_word
Definition: pageres.h:250
MATRIX * ratings
Definition: pageres.h:215
WERD_CHOICE * best_choice
Definition: pageres.h:219
TWERD * chopped_word
Definition: pageres.h:201
TWERD * rebuild_word
Definition: pageres.h:244
const UNICHARSET * uch_set
Definition: pageres.h:192
WERD_CHOICE * raw_choice
Definition: pageres.h:224
WERD * word
Definition: pageres.h:175
ROW * blob_row
Definition: pageres.h:186
WERD_CHOICE * ep_choice
Definition: pageres.h:270
tesseract::BoxWord * bln_boxes
Definition: pageres.h:184
#define NULL
Definition: host.h:144
BlamerBundle * blamer_bundle
Definition: pageres.h:230
void WERD_RES::InsertSeam ( int  blob_number,
SEAM seam 
)

Definition at line 409 of file pageres.cpp.

409  {
410  // Insert the seam into the SEAMS array.
411  seam->PrepareToInsertSeam(seam_array, chopped_word->blobs, blob_number, true);
412  seam_array.insert(seam, blob_number);
413  if (ratings != NULL) {
414  // Expand the ratings matrix.
415  ratings = ratings->ConsumeAndMakeBigger(blob_number);
416  // Fix all the segmentation states.
417  if (raw_choice != NULL)
418  raw_choice->UpdateStateForSplit(blob_number);
419  WERD_CHOICE_IT wc_it(&best_choices);
420  for (wc_it.mark_cycle_pt(); !wc_it.cycled_list(); wc_it.forward()) {
421  WERD_CHOICE* choice = wc_it.data();
422  choice->UpdateStateForSplit(blob_number);
423  }
425  }
426 }
WERD_CHOICE_LIST best_choices
Definition: pageres.h:227
MATRIX * ratings
Definition: pageres.h:215
TWERD * chopped_word
Definition: pageres.h:201
void UpdateStateForSplit(int blob_position)
Definition: ratngs.cpp:685
bool PrepareToInsertSeam(const GenericVector< SEAM * > &seams, const GenericVector< TBLOB * > &blobs, int insert_index, bool modify)
Definition: seam.cpp:82
void insert(T t, int index)
WERD_CHOICE * raw_choice
Definition: pageres.h:224
void SetupBlobWidthsAndGaps()
Definition: pageres.cpp:391
GenericVector< SEAM * > seam_array
Definition: pageres.h:203
GenericVector< TBLOB * > blobs
Definition: blobs.h:436
MATRIX * ConsumeAndMakeBigger(int ind)
Definition: matrix.cpp:58
#define NULL
Definition: host.h:144
bool WERD_RES::IsAmbiguous ( )

Definition at line 443 of file pageres.cpp.

443  {
444  return !best_choices.singleton() || best_choice->dangerous_ambig_found();
445 }
WERD_CHOICE_LIST best_choices
Definition: pageres.h:227
WERD_CHOICE * best_choice
Definition: pageres.h:219
bool dangerous_ambig_found() const
Definition: ratngs.h:360
bool WERD_RES::LogNewCookedChoice ( int  max_num_choices,
bool  debug,
WERD_CHOICE word_choice 
)

Definition at line 612 of file pageres.cpp.

613  {
614  if (best_choice != NULL) {
615  // Throw out obviously bad choices to save some work.
616  // TODO(rays) Get rid of this! This piece of code produces different
617  // results according to the order in which words are found, which is an
618  // undesirable behavior. It would be better to keep all the choices and
619  // prune them later when more information is available.
620  float max_certainty_delta =
621  StopperAmbigThreshold(best_choice->adjust_factor(),
622  word_choice->adjust_factor());
623  if (max_certainty_delta > -kStopperAmbiguityThresholdOffset)
624  max_certainty_delta = -kStopperAmbiguityThresholdOffset;
625  if (word_choice->certainty() - best_choice->certainty() <
626  max_certainty_delta) {
627  if (debug) {
628  STRING bad_string;
629  word_choice->string_and_lengths(&bad_string, NULL);
630  tprintf("Discarding choice \"%s\" with an overly low certainty"
631  " %.3f vs best choice certainty %.3f (Threshold: %.3f)\n",
632  bad_string.string(), word_choice->certainty(),
634  max_certainty_delta + best_choice->certainty());
635  }
636  delete word_choice;
637  return false;
638  }
639  }
640 
641  // Insert in the list in order of increasing rating, but knock out worse
642  // string duplicates.
643  WERD_CHOICE_IT it(&best_choices);
644  const STRING& new_str = word_choice->unichar_string();
645  bool inserted = false;
646  int num_choices = 0;
647  if (!it.empty()) {
648  do {
649  WERD_CHOICE* choice = it.data();
650  if (choice->rating() > word_choice->rating() && !inserted) {
651  // Time to insert.
652  it.add_before_stay_put(word_choice);
653  inserted = true;
654  if (num_choices == 0)
655  best_choice = word_choice; // This is the new best.
656  ++num_choices;
657  }
658  if (choice->unichar_string() == new_str) {
659  if (inserted) {
660  // New is better.
661  delete it.extract();
662  } else {
663  // Old is better.
664  if (debug) {
665  tprintf("Discarding duplicate choice \"%s\", rating %g vs %g\n",
666  new_str.string(), word_choice->rating(), choice->rating());
667  }
668  delete word_choice;
669  return false;
670  }
671  } else {
672  ++num_choices;
673  if (num_choices > max_num_choices)
674  delete it.extract();
675  }
676  it.forward();
677  } while (!it.at_first());
678  }
679  if (!inserted && num_choices < max_num_choices) {
680  it.add_to_end(word_choice);
681  inserted = true;
682  if (num_choices == 0)
683  best_choice = word_choice; // This is the new best.
684  }
685  if (debug) {
686  if (inserted)
687  tprintf("New %s", best_choice == word_choice ? "Best" : "Secondary");
688  else
689  tprintf("Poor");
690  word_choice->print(" Word Choice");
691  }
692  if (!inserted) {
693  delete word_choice;
694  return false;
695  }
696  return true;
697 }
WERD_CHOICE_LIST best_choices
Definition: pageres.h:227
float rating() const
Definition: ratngs.h:324
WERD_CHOICE * best_choice
Definition: pageres.h:219
#define tprintf(...)
Definition: tprintf.h:31
const STRING & unichar_string() const
Definition: ratngs.h:524
float certainty() const
Definition: ratngs.h:327
void string_and_lengths(STRING *word_str, STRING *word_lengths_str) const
Definition: ratngs.cpp:427
float adjust_factor() const
Definition: ratngs.h:303
void print() const
Definition: ratngs.h:563
Definition: strngs.h:44
#define NULL
Definition: host.h:144
const char * string() const
Definition: strngs.cpp:193
bool WERD_RES::LogNewRawChoice ( WERD_CHOICE word_choice)

Definition at line 596 of file pageres.cpp.

596  {
597  if (raw_choice == NULL || word_choice->rating() < raw_choice->rating()) {
598  delete raw_choice;
599  raw_choice = new WERD_CHOICE(*word_choice);
601  return true;
602  }
603  return false;
604 }
float rating() const
Definition: ratngs.h:324
void set_permuter(uinT8 perm)
Definition: ratngs.h:372
WERD_CHOICE * raw_choice
Definition: pageres.h:224
#define NULL
Definition: host.h:144
void WERD_RES::merge_tess_fails ( )

Definition at line 1061 of file pageres.cpp.

1061  {
1064  int len = best_choice->length();
1065  ASSERT_HOST(reject_map.length() == len);
1066  ASSERT_HOST(box_word->length() == len);
1067  }
1068 }
tesseract::BoxWord * box_word
Definition: pageres.h:250
inT32 length() const
Definition: rejctmap.h:237
int length() const
Definition: ratngs.h:300
WERD_CHOICE * best_choice
Definition: pageres.h:219
REJMAP reject_map
Definition: pageres.h:271
#define ASSERT_HOST(x)
Definition: errcode.h:84
UNICHAR_ID BothSpaces(UNICHAR_ID id1, UNICHAR_ID id2)
Definition: pageres.cpp:1053
_ConstTessMemberResultCallback_0_0< false, R, T1 >::base * NewPermanentTessCallback(const T1 *obj, R(T2::*member)() const)
Definition: tesscallback.h:116
const int length() const
Definition: boxword.h:85
bool ConditionalBlobMerge(TessResultCallback2< UNICHAR_ID, UNICHAR_ID, UNICHAR_ID > *class_cb, TessResultCallback2< bool, const TBOX &, const TBOX & > *box_cb)
Definition: pageres.cpp:932
#define NULL
Definition: host.h:144
void WERD_RES::MergeAdjacentBlobs ( int  index)

Definition at line 968 of file pageres.cpp.

968  {
969  if (reject_map.length() == best_choice->length())
970  reject_map.remove_pos(index);
971  best_choice->remove_unichar_id(index + 1);
972  rebuild_word->MergeBlobs(index, index + 2);
973  box_word->MergeBoxes(index, index + 2);
974  if (index + 1 < best_state.length()) {
975  best_state[index] += best_state[index + 1];
976  best_state.remove(index + 1);
977  }
978 }
tesseract::BoxWord * box_word
Definition: pageres.h:250
void remove_unichar_id(int index)
Definition: ratngs.h:481
inT32 length() const
Definition: rejctmap.h:237
int length() const
Definition: genericvector.h:79
int length() const
Definition: ratngs.h:300
WERD_CHOICE * best_choice
Definition: pageres.h:219
REJMAP reject_map
Definition: pageres.h:271
void MergeBoxes(int start, int end)
Definition: boxword.cpp:134
void MergeBlobs(int start, int end)
Definition: blobs.cpp:892
TWERD * rebuild_word
Definition: pageres.h:244
void remove_pos(inT16 pos)
Definition: rejctmap.cpp:365
void remove(int index)
GenericVector< int > best_state
Definition: pageres.h:255
WERD_RES & WERD_RES::operator= ( const WERD_RES source)

Definition at line 178 of file pageres.cpp.

178  {
179  this->ELIST_LINK::operator=(source);
180  Clear();
181  if (source.combination) {
182  word = new WERD;
183  *word = *(source.word); // deep copy
184  } else {
185  word = source.word; // pt to same word
186  }
187  if (source.bln_boxes != NULL)
188  bln_boxes = new tesseract::BoxWord(*source.bln_boxes);
189  if (source.chopped_word != NULL)
190  chopped_word = new TWERD(*source.chopped_word);
191  if (source.rebuild_word != NULL)
192  rebuild_word = new TWERD(*source.rebuild_word);
193  // TODO(rays) Do we ever need to copy the seam_array?
194  blob_row = source.blob_row;
195  denorm = source.denorm;
196  if (source.box_word != NULL)
197  box_word = new tesseract::BoxWord(*source.box_word);
198  best_state = source.best_state;
199  correct_text = source.correct_text;
200  blob_widths = source.blob_widths;
201  blob_gaps = source.blob_gaps;
202  // None of the uses of operator= require the ratings matrix to be copied,
203  // so don't as it would be really slow.
204 
205  // Copy the cooked choices.
206  WERD_CHOICE_IT wc_it(const_cast<WERD_CHOICE_LIST*>(&source.best_choices));
207  WERD_CHOICE_IT wc_dest_it(&best_choices);
208  for (wc_it.mark_cycle_pt(); !wc_it.cycled_list(); wc_it.forward()) {
209  const WERD_CHOICE *choice = wc_it.data();
210  wc_dest_it.add_after_then_move(new WERD_CHOICE(*choice));
211  }
212  if (!wc_dest_it.empty()) {
213  wc_dest_it.move_to_first();
214  best_choice = wc_dest_it.data();
215  } else {
216  best_choice = NULL;
217  }
218 
219  if (source.raw_choice != NULL) {
220  raw_choice = new WERD_CHOICE(*source.raw_choice);
221  } else {
222  raw_choice = NULL;
223  }
224  if (source.ep_choice != NULL) {
225  ep_choice = new WERD_CHOICE(*source.ep_choice);
226  } else {
227  ep_choice = NULL;
228  }
229  reject_map = source.reject_map;
230  combination = source.combination;
231  part_of_combo = source.part_of_combo;
232  CopySimpleFields(source);
233  if (source.blamer_bundle != NULL) {
234  blamer_bundle = new BlamerBundle(*(source.blamer_bundle));
235  }
236  return *this;
237 }
WERD_CHOICE_LIST best_choices
Definition: pageres.h:227
tesseract::BoxWord * box_word
Definition: pageres.h:250
WERD_CHOICE * best_choice
Definition: pageres.h:219
REJMAP reject_map
Definition: pageres.h:271
TWERD * chopped_word
Definition: pageres.h:201
GenericVector< STRING > correct_text
Definition: pageres.h:259
void operator=(const ELIST_LINK &)
Definition: elst.h:101
BOOL8 part_of_combo
Definition: pageres.h:316
BOOL8 combination
Definition: pageres.h:315
TWERD * rebuild_word
Definition: pageres.h:244
DENORM denorm
Definition: pageres.h:190
GenericVector< int > blob_gaps
Definition: pageres.h:208
WERD_CHOICE * raw_choice
Definition: pageres.h:224
Definition: werd.h:60
WERD * word
Definition: pageres.h:175
ROW * blob_row
Definition: pageres.h:186
WERD_CHOICE * ep_choice
Definition: pageres.h:270
tesseract::BoxWord * bln_boxes
Definition: pageres.h:184
GenericVector< int > best_state
Definition: pageres.h:255
#define NULL
Definition: host.h:144
GenericVector< int > blob_widths
Definition: pageres.h:205
Definition: blobs.h:395
BlamerBundle * blamer_bundle
Definition: pageres.h:230
void Clear()
Definition: pageres.cpp:1130
void CopySimpleFields(const WERD_RES &source)
Definition: pageres.cpp:241
bool WERD_RES::PiecesAllNatural ( int  start,
int  count 
) const

Definition at line 1072 of file pageres.cpp.

1072  {
1073  // all seams must have no splits.
1074  for (int index = start; index < start + count - 1; ++index) {
1075  if (index >= 0 && index < seam_array.size()) {
1076  SEAM* seam = seam_array[index];
1077  if (seam != NULL && seam->HasAnySplits()) return false;
1078  }
1079  }
1080  return true;
1081 }
int size() const
Definition: genericvector.h:72
bool HasAnySplits() const
Definition: seam.h:67
GenericVector< SEAM * > seam_array
Definition: pageres.h:203
int count(LIST var_list)
Definition: oldlist.cpp:108
#define NULL
Definition: host.h:144
Definition: seam.h:44
void WERD_RES::PrintBestChoices ( ) const

Definition at line 709 of file pageres.cpp.

709  {
710  STRING alternates_str;
711  WERD_CHOICE_IT it(const_cast<WERD_CHOICE_LIST*>(&best_choices));
712  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
713  if (!it.at_first()) alternates_str += "\", \"";
714  alternates_str += it.data()->unichar_string();
715  }
716  tprintf("Alternates for \"%s\": {\"%s\"}\n",
717  best_choice->unichar_string().string(), alternates_str.string());
718 }
WERD_CHOICE_LIST best_choices
Definition: pageres.h:227
WERD_CHOICE * best_choice
Definition: pageres.h:219
#define tprintf(...)
Definition: tprintf.h:31
const STRING & unichar_string() const
Definition: ratngs.h:524
Definition: strngs.h:44
const char * string() const
Definition: strngs.cpp:193
const char* const WERD_RES::RawUTF8 ( int  blob_index) const
inline

Definition at line 355 of file pageres.h.

355  {
356  if (blob_index < 0 || blob_index >= raw_choice->length())
357  return NULL;
358  UNICHAR_ID id = raw_choice->unichar_id(blob_index);
359  if (id < 0 || id >= uch_set->size() || id == INVALID_UNICHAR_ID)
360  return NULL;
361  return uch_set->id_to_unichar(id);
362  }
int length() const
Definition: ratngs.h:300
const UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:312
const char *const id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:266
const UNICHARSET * uch_set
Definition: pageres.h:192
WERD_CHOICE * raw_choice
Definition: pageres.h:224
int UNICHAR_ID
Definition: unichar.h:33
#define NULL
Definition: host.h:144
int size() const
Definition: unicharset.h:297
void WERD_RES::RebuildBestState ( )

Definition at line 800 of file pageres.cpp.

800  {
802  if (rebuild_word != NULL)
803  delete rebuild_word;
804  rebuild_word = new TWERD;
805  if (seam_array.empty())
807  best_state.truncate(0);
808  int start = 0;
809  for (int i = 0; i < best_choice->length(); ++i) {
810  int length = best_choice->state(i);
811  best_state.push_back(length);
812  if (length > 1) {
814  start + length - 1);
815  }
816  TBLOB* blob = chopped_word->blobs[start];
817  rebuild_word->blobs.push_back(new TBLOB(*blob));
818  if (length > 1) {
820  start + length - 1);
821  }
822  start += length;
823  }
824 }
Definition: blobs.h:261
void truncate(int size)
int length() const
Definition: ratngs.h:300
WERD_CHOICE * best_choice
Definition: pageres.h:219
int push_back(T object)
TWERD * chopped_word
Definition: pageres.h:201
void start_seam_list(TWERD *word, GenericVector< SEAM * > *seam_array)
Definition: seam.cpp:269
#define ASSERT_HOST(x)
Definition: errcode.h:84
int state(int index) const
Definition: ratngs.h:316
TWERD * rebuild_word
Definition: pageres.h:244
GenericVector< SEAM * > seam_array
Definition: pageres.h:203
static void JoinPieces(const GenericVector< SEAM * > &seams, const GenericVector< TBLOB * > &blobs, int first, int last)
Definition: seam.cpp:216
bool empty() const
Definition: genericvector.h:84
GenericVector< TBLOB * > blobs
Definition: blobs.h:436
GenericVector< int > best_state
Definition: pageres.h:255
#define NULL
Definition: host.h:144
Definition: blobs.h:395
static void BreakPieces(const GenericVector< SEAM * > &seams, const GenericVector< TBLOB * > &blobs, int first, int last)
Definition: seam.cpp:194
void WERD_RES::ReplaceBestChoice ( WERD_CHOICE choice)

Definition at line 787 of file pageres.cpp.

787  {
788  best_choice = choice;
790  SetupBoxWord();
791  // Make up a fake reject map of the right length to keep the
792  // rejection pass happy.
796 }
BOOL8 tess_accepted
Definition: pageres.h:280
void SetScriptPositions()
Definition: pageres.cpp:853
void RebuildBestState()
Definition: pageres.cpp:800
int length() const
Definition: genericvector.h:79
WERD_CHOICE * best_choice
Definition: pageres.h:219
REJMAP reject_map
Definition: pageres.h:271
BOOL8 tess_would_adapt
Definition: pageres.h:281
BOOL8 done
Definition: pageres.h:282
void SetupBoxWord()
Definition: pageres.cpp:843
void initialise(inT16 length)
Definition: rejctmap.cpp:318
GenericVector< int > best_state
Definition: pageres.h:255
void WERD_RES::SetAllScriptPositions ( tesseract::ScriptPos  position)

Definition at line 860 of file pageres.cpp.

860  {
862  WERD_CHOICE_IT wc_it(&best_choices);
863  for (wc_it.mark_cycle_pt(); !wc_it.cycled_list(); wc_it.forward())
864  wc_it.data()->SetAllScriptPositions(position);
865 }
WERD_CHOICE_LIST best_choices
Definition: pageres.h:227
WERD_CHOICE * raw_choice
Definition: pageres.h:224
void SetAllScriptPositions(tesseract::ScriptPos position)
Definition: ratngs.cpp:609
void WERD_RES::SetScriptPositions ( )

Definition at line 853 of file pageres.cpp.

853  {
855 }
void SetScriptPositions(bool small_caps, TWERD *word)
Definition: ratngs.cpp:528
WERD_CHOICE * best_choice
Definition: pageres.h:219
TWERD * chopped_word
Definition: pageres.h:201
bool small_caps
Definition: pageres.h:283
void WERD_RES::SetupBasicsFromChoppedWord ( const UNICHARSET unicharset_in)

Definition at line 334 of file pageres.cpp.

334  {
339 }
void ClearWordChoices()
Definition: pageres.cpp:1173
TWERD * chopped_word
Definition: pageres.h:201
void start_seam_list(TWERD *word, GenericVector< SEAM * > *seam_array)
Definition: seam.cpp:269
void SetupBlobWidthsAndGaps()
Definition: pageres.cpp:391
GenericVector< SEAM * > seam_array
Definition: pageres.h:203
tesseract::BoxWord * bln_boxes
Definition: pageres.h:184
static BoxWord * CopyFromNormalized(TWERD *tessword)
Definition: boxword.cpp:59
void WERD_RES::SetupBlamerBundle ( )

Definition at line 384 of file pageres.cpp.

384  {
385  if (blamer_bundle != NULL) {
387  }
388 }
DENORM denorm
Definition: pageres.h:190
#define NULL
Definition: host.h:144
void SetupNormTruthWord(const DENORM &denorm)
Definition: blamer.cpp:145
BlamerBundle * blamer_bundle
Definition: pageres.h:230
void WERD_RES::SetupBlobWidthsAndGaps ( )

Definition at line 391 of file pageres.cpp.

391  {
393  blob_gaps.truncate(0);
394  int num_blobs = chopped_word->NumBlobs();
395  for (int b = 0; b < num_blobs; ++b) {
396  TBLOB *blob = chopped_word->blobs[b];
397  TBOX box = blob->bounding_box();
398  blob_widths.push_back(box.width());
399  if (b + 1 < num_blobs) {
401  chopped_word->blobs[b + 1]->bounding_box().left() - box.right());
402  }
403  }
404 }
Definition: blobs.h:261
void truncate(int size)
int push_back(T object)
TWERD * chopped_word
Definition: pageres.h:201
inT16 right() const
Definition: rect.h:75
int NumBlobs() const
Definition: blobs.h:425
GenericVector< int > blob_gaps
Definition: pageres.h:208
inT16 width() const
Definition: rect.h:111
GenericVector< TBLOB * > blobs
Definition: blobs.h:436
Definition: rect.h:30
GenericVector< int > blob_widths
Definition: pageres.h:205
TBOX bounding_box() const
Definition: blobs.cpp:482
void WERD_RES::SetupBoxWord ( )

Definition at line 843 of file pageres.cpp.

843  {
844  if (box_word != NULL)
845  delete box_word;
849 }
tesseract::BoxWord * box_word
Definition: pageres.h:250
void ComputeBoundingBoxes()
Definition: blobs.cpp:875
void ClipToOriginalWord(const BLOCK *block, WERD *original_word)
Definition: boxword.cpp:95
TWERD * rebuild_word
Definition: pageres.h:244
DENORM denorm
Definition: pageres.h:190
WERD * word
Definition: pageres.h:175
static BoxWord * CopyFromNormalized(TWERD *tessword)
Definition: boxword.cpp:59
const BLOCK * block() const
Definition: normalis.h:275
#define NULL
Definition: host.h:144
void WERD_RES::SetupFake ( const UNICHARSET uch)

Definition at line 343 of file pageres.cpp.

343  {
344  ClearResults();
345  SetupWordScript(unicharset_in);
346  chopped_word = new TWERD;
347  rebuild_word = new TWERD;
350  int blob_count = word->cblob_list()->length();
351  if (blob_count > 0) {
352  BLOB_CHOICE** fake_choices = new BLOB_CHOICE*[blob_count];
353  // For non-text blocks, just pass any blobs through to the box_word
354  // and call the word failed with a fake classification.
355  C_BLOB_IT b_it(word->cblob_list());
356  int blob_id = 0;
357  for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
358  TBOX box = b_it.data()->bounding_box();
359  box_word->InsertBox(box_word->length(), box);
360  fake_choices[blob_id++] = new BLOB_CHOICE;
361  }
362  FakeClassifyWord(blob_count, fake_choices);
363  delete [] fake_choices;
364  } else {
365  WERD_CHOICE* word = new WERD_CHOICE(&unicharset_in);
366  word->make_bad();
367  LogNewRawChoice(word);
368  // Ownership of word is taken by *this WERD_RES in LogNewCookedChoice.
369  LogNewCookedChoice(1, false, word);
370  }
371  tess_failed = true;
372  done = true;
373 }
bool LogNewRawChoice(WERD_CHOICE *word_choice)
Definition: pageres.cpp:596
tesseract::BoxWord * box_word
Definition: pageres.h:250
void ClearResults()
Definition: pageres.cpp:1140
TWERD * chopped_word
Definition: pageres.h:201
bool LogNewCookedChoice(int max_num_choices, bool debug, WERD_CHOICE *word_choice)
Definition: pageres.cpp:612
void SetupWordScript(const UNICHARSET &unicharset_in)
Definition: pageres.cpp:375
void FakeClassifyWord(int blob_count, BLOB_CHOICE **choices)
Definition: pageres.cpp:872
void make_bad()
Set the fields in this choice to be default (bad) values.
Definition: ratngs.h:440
TWERD * rebuild_word
Definition: pageres.h:244
BOOL8 done
Definition: pageres.h:282
WERD * word
Definition: pageres.h:175
const int length() const
Definition: boxword.h:85
tesseract::BoxWord * bln_boxes
Definition: pageres.h:184
BOOL8 tess_failed
Definition: pageres.h:272
Definition: rect.h:30
void InsertBox(int index, const TBOX &box)
Definition: boxword.cpp:151
Definition: blobs.h:395
C_BLOB_LIST * cblob_list()
Definition: werd.h:100
bool WERD_RES::SetupForRecognition ( const UNICHARSET unicharset_in,
tesseract::Tesseract tesseract,
Pix *  pix,
int  norm_mode,
const TBOX norm_box,
bool  numeric_mode,
bool  use_body_size,
bool  allow_detailed_fx,
ROW row,
const BLOCK block 
)

Definition at line 294 of file pageres.cpp.

301  {
302  tesseract::OcrEngineMode norm_mode_hint =
303  static_cast<tesseract::OcrEngineMode>(norm_mode);
304  tesseract = tess;
305  POLY_BLOCK* pb = block != NULL ? block->poly_block() : NULL;
306  if ((norm_mode_hint != tesseract::OEM_CUBE_ONLY &&
307  word->cblob_list()->empty()) || (pb != NULL && !pb->IsText())) {
308  // Empty words occur when all the blobs have been moved to the rej_blobs
309  // list, which seems to occur frequently in junk.
310  SetupFake(unicharset_in);
311  word->set_flag(W_REP_CHAR, false);
312  return false;
313  }
314  ClearResults();
315  SetupWordScript(unicharset_in);
316  chopped_word = TWERD::PolygonalCopy(allow_detailed_fx, word);
317  float word_xheight = use_body_size && row != NULL && row->body_size() > 0.0f
318  ? row->body_size() : x_height;
319  chopped_word->BLNormalize(block, row, pix, word->flag(W_INVERSE),
320  word_xheight, baseline_shift, numeric_mode,
321  norm_mode_hint, norm_box, &denorm);
322  blob_row = row;
323  SetupBasicsFromChoppedWord(unicharset_in);
325  int num_blobs = chopped_word->NumBlobs();
326  ratings = new MATRIX(num_blobs, kWordrecMaxNumJoinChunks);
327  tess_failed = false;
328  return true;
329 }
void SetupFake(const UNICHARSET &uch)
Definition: pageres.cpp:343
void ClearResults()
Definition: pageres.cpp:1140
MATRIX * ratings
Definition: pageres.h:215
TWERD * chopped_word
Definition: pageres.h:201
void SetupBlamerBundle()
Definition: pageres.cpp:384
void SetupBasicsFromChoppedWord(const UNICHARSET &unicharset_in)
Definition: pageres.cpp:334
void SetupWordScript(const UNICHARSET &unicharset_in)
Definition: pageres.cpp:375
bool IsText() const
Definition: polyblk.h:52
float x_height
Definition: pageres.h:295
static TWERD * PolygonalCopy(bool allow_detailed_fx, WERD *src)
Definition: blobs.cpp:793
int NumBlobs() const
Definition: blobs.h:425
DENORM denorm
Definition: pageres.h:190
float baseline_shift
Definition: pageres.h:297
Definition: werd.h:44
WERD * word
Definition: pageres.h:175
ROW * blob_row
Definition: pageres.h:186
BOOL8 tess_failed
Definition: pageres.h:272
const int kWordrecMaxNumJoinChunks
Definition: pageres.cpp:41
BOOL8 flag(WERD_FLAGS mask) const
Definition: werd.h:128
Definition: matrix.h:289
#define NULL
Definition: host.h:144
POLY_BLOCK * poly_block() const
Definition: pdblock.h:59
void set_flag(WERD_FLAGS mask, BOOL8 value)
Definition: werd.h:129
void BLNormalize(const BLOCK *block, const ROW *row, Pix *pix, bool inverse, float x_height, float baseline_shift, bool numeric_mode, tesseract::OcrEngineMode hint, const TBOX *norm_box, DENORM *word_denorm)
Definition: blobs.cpp:807
C_BLOB_LIST * cblob_list()
Definition: werd.h:100
float body_size() const
Definition: ocrrow.h:70
void WERD_RES::SetupWordScript ( const UNICHARSET unicharset_in)

Definition at line 375 of file pageres.cpp.

375  {
376  uch_set = &uch;
377  int script = uch.default_sid();
378  word->set_script_id(script);
379  word->set_flag(W_SCRIPT_HAS_XHEIGHT, uch.script_has_xheight());
380  word->set_flag(W_SCRIPT_IS_LATIN, script == uch.latin_sid());
381 }
const UNICHARSET * uch_set
Definition: pageres.h:192
void set_script_id(int id)
Definition: werd.h:113
WERD * word
Definition: pageres.h:175
int default_sid() const
Definition: unicharset.h:839
void set_flag(WERD_FLAGS mask, BOOL8 value)
Definition: werd.h:129
bool WERD_RES::StatesAllValid ( )

Definition at line 449 of file pageres.cpp.

449  {
450  int ratings_dim = ratings->dimension();
451  if (raw_choice->TotalOfStates() != ratings_dim) {
452  tprintf("raw_choice has total of states = %d vs ratings dim of %d\n",
453  raw_choice->TotalOfStates(), ratings_dim);
454  return false;
455  }
456  WERD_CHOICE_IT it(&best_choices);
457  int index = 0;
458  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward(), ++index) {
459  WERD_CHOICE* choice = it.data();
460  if (choice->TotalOfStates() != ratings_dim) {
461  tprintf("Cooked #%d has total of states = %d vs ratings dim of %d\n",
462  choice->TotalOfStates(), ratings_dim);
463  return false;
464  }
465  }
466  return true;
467 }
WERD_CHOICE_LIST best_choices
Definition: pageres.h:227
MATRIX * ratings
Definition: pageres.h:215
#define tprintf(...)
Definition: tprintf.h:31
int dimension() const
Definition: matrix.h:247
WERD_CHOICE * raw_choice
Definition: pageres.h:224
int TotalOfStates() const
Definition: ratngs.cpp:697
UNICHARSET::Direction WERD_RES::SymbolDirection ( int  blob_index) const
inline

Definition at line 364 of file pageres.h.

364  {
365  if (best_choice == NULL ||
366  blob_index >= best_choice->length() ||
367  blob_index < 0)
369  return uch_set->get_direction(best_choice->unichar_id(blob_index));
370  }
int length() const
Definition: ratngs.h:300
WERD_CHOICE * best_choice
Definition: pageres.h:219
const UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:312
const UNICHARSET * uch_set
Definition: pageres.h:192
Direction get_direction(UNICHAR_ID unichar_id) const
Definition: unicharset.h:638
#define NULL
Definition: host.h:144
bool WERD_RES::UnicharsInReadingOrder ( ) const
inline

Definition at line 406 of file pageres.h.

406  {
408  }
bool unichars_in_script_order() const
Definition: ratngs.h:518
WERD_CHOICE * best_choice
Definition: pageres.h:219

Member Data Documentation

float WERD_RES::baseline_shift

Definition at line 297 of file pageres.h.

WERD_CHOICE* WERD_RES::best_choice

Definition at line 219 of file pageres.h.

WERD_CHOICE_LIST WERD_RES::best_choices

Definition at line 227 of file pageres.h.

GenericVector<int> WERD_RES::best_state

Definition at line 255 of file pageres.h.

BlamerBundle* WERD_RES::blamer_bundle

Definition at line 230 of file pageres.h.

tesseract::BoxWord* WERD_RES::bln_boxes

Definition at line 184 of file pageres.h.

GenericVector<int> WERD_RES::blob_gaps

Definition at line 208 of file pageres.h.

ROW* WERD_RES::blob_row

Definition at line 186 of file pageres.h.

GenericVector<int> WERD_RES::blob_widths

Definition at line 205 of file pageres.h.

inT8 WERD_RES::bold

Definition at line 286 of file pageres.h.

tesseract::BoxWord* WERD_RES::box_word

Definition at line 250 of file pageres.h.

float WERD_RES::caps_height

Definition at line 296 of file pageres.h.

TWERD* WERD_RES::chopped_word

Definition at line 201 of file pageres.h.

BOOL8 WERD_RES::combination

Definition at line 315 of file pageres.h.

GenericVector<STRING> WERD_RES::correct_text

Definition at line 259 of file pageres.h.

DENORM WERD_RES::denorm

Definition at line 190 of file pageres.h.

BOOL8 WERD_RES::done

Definition at line 282 of file pageres.h.

WERD_CHOICE* WERD_RES::ep_choice

Definition at line 270 of file pageres.h.

const FontInfo* WERD_RES::fontinfo

Definition at line 288 of file pageres.h.

const FontInfo* WERD_RES::fontinfo2

Definition at line 289 of file pageres.h.

inT8 WERD_RES::fontinfo_id2_count

Definition at line 291 of file pageres.h.

inT8 WERD_RES::fontinfo_id_count

Definition at line 290 of file pageres.h.

BOOL8 WERD_RES::guessed_caps_ht

Definition at line 293 of file pageres.h.

BOOL8 WERD_RES::guessed_x_ht

Definition at line 292 of file pageres.h.

inT8 WERD_RES::italic

Definition at line 285 of file pageres.h.

bool WERD_RES::odd_size

Definition at line 284 of file pageres.h.

BOOL8 WERD_RES::part_of_combo

Definition at line 316 of file pageres.h.

MATRIX* WERD_RES::ratings

Definition at line 215 of file pageres.h.

WERD_CHOICE* WERD_RES::raw_choice

Definition at line 224 of file pageres.h.

TWERD* WERD_RES::rebuild_word

Definition at line 244 of file pageres.h.

REJMAP WERD_RES::reject_map

Definition at line 271 of file pageres.h.

BOOL8 WERD_RES::reject_spaces

Definition at line 317 of file pageres.h.

GenericVector<SEAM*> WERD_RES::seam_array

Definition at line 203 of file pageres.h.

bool WERD_RES::small_caps

Definition at line 283 of file pageres.h.

BOOL8 WERD_RES::tess_accepted

Definition at line 280 of file pageres.h.

BOOL8 WERD_RES::tess_failed

Definition at line 272 of file pageres.h.

BOOL8 WERD_RES::tess_would_adapt

Definition at line 281 of file pageres.h.

tesseract::Tesseract* WERD_RES::tesseract

Definition at line 266 of file pageres.h.

const UNICHARSET* WERD_RES::uch_set

Definition at line 192 of file pageres.h.

CRUNCH_MODE WERD_RES::unlv_crunch_mode

Definition at line 294 of file pageres.h.

WERD* WERD_RES::word

Definition at line 175 of file pageres.h.

float WERD_RES::x_height

Definition at line 295 of file pageres.h.


The documentation for this class was generated from the following files: