tesseract  4.00.00dev
ratngs.h File Reference
#include <assert.h>
#include "clst.h"
#include "elst.h"
#include "fontinfo.h"
#include "genericvector.h"
#include "matrix.h"
#include "unichar.h"
#include "unicharset.h"
#include "werd.h"

Go to the source code of this file.

Classes

class  BLOB_CHOICE
 
class  WERD_CHOICE
 

Namespaces

 tesseract
 

Typedefs

typedef GenericVector< BLOB_CHOICE_LIST * > BLOB_CHOICE_LIST_VECTOR
 

Enumerations

enum  BlobChoiceClassifier {
  BCC_STATIC_CLASSIFIER, BCC_ADAPTED_CLASSIFIER, BCC_SPECKLE_CLASSIFIER, BCC_AMBIG,
  BCC_FAKE
}
 
enum  PermuterType {
  NO_PERM, PUNC_PERM, TOP_CHOICE_PERM, LOWER_CASE_PERM,
  UPPER_CASE_PERM, NGRAM_PERM, NUMBER_PERM, USER_PATTERN_PERM,
  SYSTEM_DAWG_PERM, DOC_DAWG_PERM, USER_DAWG_PERM, FREQ_DAWG_PERM,
  COMPOUND_PERM, NUM_PERMUTER_TYPES
}
 
enum  tesseract::ScriptPos { tesseract::SP_NORMAL, tesseract::SP_SUBSCRIPT, tesseract::SP_SUPERSCRIPT, tesseract::SP_DROPCAP }
 

Functions

BLOB_CHOICEFindMatchingChoice (UNICHAR_ID char_id, BLOB_CHOICE_LIST *bc_list)
 
const char * tesseract::ScriptPosToString (enum ScriptPos script_pos)
 
bool EqualIgnoringCaseAndTerminalPunct (const WERD_CHOICE &word1, const WERD_CHOICE &word2)
 
void print_ratings_list (const char *msg, BLOB_CHOICE_LIST *ratings, const UNICHARSET &current_unicharset)
 

Typedef Documentation

◆ BLOB_CHOICE_LIST_VECTOR

typedef GenericVector<BLOB_CHOICE_LIST *> BLOB_CHOICE_LIST_VECTOR

Definition at line 640 of file ratngs.h.

Enumeration Type Documentation

◆ BlobChoiceClassifier

Enumerator
BCC_STATIC_CLASSIFIER 
BCC_ADAPTED_CLASSIFIER 
BCC_SPECKLE_CLASSIFIER 
BCC_AMBIG 
BCC_FAKE 

Definition at line 40 of file ratngs.h.

40  {
41  BCC_STATIC_CLASSIFIER, // From the char_norm classifier.
42  BCC_ADAPTED_CLASSIFIER, // From the adaptive classifier.
43  BCC_SPECKLE_CLASSIFIER, // Backup for failed classification.
44  BCC_AMBIG, // Generated by ambiguity detection.
45  BCC_FAKE, // From some other process.
46 };

◆ PermuterType

Enumerator
NO_PERM 
PUNC_PERM 
TOP_CHOICE_PERM 
LOWER_CASE_PERM 
UPPER_CASE_PERM 
NGRAM_PERM 
NUMBER_PERM 
USER_PATTERN_PERM 
SYSTEM_DAWG_PERM 
DOC_DAWG_PERM 
USER_DAWG_PERM 
FREQ_DAWG_PERM 
COMPOUND_PERM 
NUM_PERMUTER_TYPES 

Definition at line 238 of file ratngs.h.

238  {
239  NO_PERM, // 0
240  PUNC_PERM, // 1
241  TOP_CHOICE_PERM, // 2
242  LOWER_CASE_PERM, // 3
243  UPPER_CASE_PERM, // 4
244  NGRAM_PERM, // 5
245  NUMBER_PERM, // 6
246  USER_PATTERN_PERM, // 7
247  SYSTEM_DAWG_PERM, // 8
248  DOC_DAWG_PERM, // 9
249  USER_DAWG_PERM, // 10
250  FREQ_DAWG_PERM, // 11
251  COMPOUND_PERM, // 12
252 
254 };

Function Documentation

◆ EqualIgnoringCaseAndTerminalPunct()

bool EqualIgnoringCaseAndTerminalPunct ( const WERD_CHOICE word1,
const WERD_CHOICE word2 
)

Definition at line 794 of file ratngs.cpp.

795  {
796  const UNICHARSET *uchset = word1.unicharset();
797  if (word2.unicharset() != uchset) return false;
798  int w1start, w1end;
799  word1.punct_stripped(&w1start, &w1end);
800  int w2start, w2end;
801  word2.punct_stripped(&w2start, &w2end);
802  if (w1end - w1start != w2end - w2start) return false;
803  for (int i = 0; i < w1end - w1start; i++) {
804  if (uchset->to_lower(word1.unichar_id(w1start + i)) !=
805  uchset->to_lower(word2.unichar_id(w2start + i))) {
806  return false;
807  }
808  }
809  return true;
810 }
const UNICHARSET * unicharset() const
Definition: ratngs.h:296
UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:311
UNICHAR_ID to_lower(UNICHAR_ID unichar_id) const
Definition: unicharset.h:703
void punct_stripped(int *start_core, int *end_core) const
Definition: ratngs.cpp:364

◆ FindMatchingChoice()

BLOB_CHOICE* FindMatchingChoice ( UNICHAR_ID  char_id,
BLOB_CHOICE_LIST *  bc_list 
)

Definition at line 161 of file ratngs.cpp.

162  {
163  // Find the corresponding best BLOB_CHOICE.
164  BLOB_CHOICE_IT choice_it(bc_list);
165  for (choice_it.mark_cycle_pt(); !choice_it.cycled_list();
166  choice_it.forward()) {
167  BLOB_CHOICE* choice = choice_it.data();
168  if (choice->unichar_id() == char_id) {
169  return choice;
170  }
171  }
172  return NULL;
173 }
UNICHAR_ID unichar_id() const
Definition: ratngs.h:76

◆ print_ratings_list()

void print_ratings_list ( const char *  msg,
BLOB_CHOICE_LIST *  ratings,
const UNICHARSET current_unicharset 
)

print_ratings_list

Send all the ratings out to the logfile.

Parameters
msgintro message
ratingslist of ratings
current_unicharsetunicharset that can be used for id-to-unichar conversion

Definition at line 822 of file ratngs.cpp.

824  {
825  if (ratings->length() == 0) {
826  tprintf("%s:<none>\n", msg);
827  return;
828  }
829  if (*msg != '\0') {
830  tprintf("%s\n", msg);
831  }
832  BLOB_CHOICE_IT c_it;
833  c_it.set_to_list(ratings);
834  for (c_it.mark_cycle_pt(); !c_it.cycled_list(); c_it.forward()) {
835  c_it.data()->print(&current_unicharset);
836  if (!c_it.at_last()) tprintf("\n");
837  }
838  tprintf("\n");
839  fflush(stdout);
840 }
#define tprintf(...)
Definition: tprintf.h:31