tesseract v5.3.3.20231005
tesseract::LMConsistencyInfo Struct Reference

#include <lm_consistency.h>

Public Types

enum  ChartypeEnum { CT_NONE , CT_ALPHA , CT_DIGIT , CT_OTHER }
 

Public Member Functions

 LMConsistencyInfo (const LMConsistencyInfo *parent_info)
 
int NumInconsistentPunc () const
 
int NumInconsistentCase () const
 
int NumInconsistentChartype () const
 
bool Consistent () const
 
int NumInconsistentSpaces () const
 
int InconsistentXHeight () const
 
void ComputeXheightConsistency (const BLOB_CHOICE *b, bool is_punc)
 
float BodyMinXHeight () const
 
float BodyMaxXHeight () const
 

Public Attributes

EDGE_REF punc_ref
 
int num_alphas
 
int num_digits
 
int num_punc
 
int num_other
 
ChartypeEnum chartype
 
XHeightConsistencyEnum xht_decision
 
int num_non_first_upper
 
int num_lower
 
int script_id
 
int num_inconsistent_spaces
 
float xht_lo [kNumPos]
 
float xht_hi [kNumPos]
 
int16_t xht_count [kNumPos]
 
int16_t xht_count_punc [kNumPos]
 
int16_t xht_sp
 
int16_t xpos_entropy
 
bool invalid_punc
 
bool inconsistent_script
 
bool inconsistent_font
 

Static Public Attributes

static const int kShiftThresh = 1
 
static const int kMaxEntropy = 1
 
static const int kSUB = 0
 
static const int kNORM = 1
 
static const int kSUP = 2
 
static const int kNumPos = 3
 

Detailed Description

Definition at line 38 of file lm_consistency.h.

Member Enumeration Documentation

◆ ChartypeEnum

Constructor & Destructor Documentation

◆ LMConsistencyInfo()

tesseract::LMConsistencyInfo::LMConsistencyInfo ( const LMConsistencyInfo parent_info)
inlineexplicit

Definition at line 53 of file lm_consistency.h.

53 {
54 if (parent_info == nullptr) {
55 // Initialize from scratch.
56 num_alphas = 0;
57 num_digits = 0;
58 num_punc = 0;
59 num_other = 0;
61 punc_ref = NO_EDGE;
62 invalid_punc = false;
64 num_lower = 0;
65 script_id = 0;
66 inconsistent_script = false;
68 inconsistent_font = false;
69 // Initialize XHeight stats.
70 for (int i = 0; i < kNumPos; i++) {
71 xht_count[i] = 0;
72 xht_count_punc[i] = 0;
73 xht_lo[i] = 0;
74 xht_hi[i] = 256; // kBlnCellHeight
75 }
76 xht_sp = -1; // This invalid value indicates that there was no parent.
77 xpos_entropy = 0;
79 } else {
80 // Copy parent info
81 *this = *parent_info;
82 }
83 }
@ XH_GOOD
Definition: dict.h:81
int16_t xht_count_punc[kNumPos]
XHeightConsistencyEnum xht_decision

Member Function Documentation

◆ BodyMaxXHeight()

float tesseract::LMConsistencyInfo::BodyMaxXHeight ( ) const
inline

Definition at line 112 of file lm_consistency.h.

112 {
113 if (InconsistentXHeight()) {
114 return static_cast<float>(INT16_MAX);
115 }
116 return xht_hi[kNORM];
117 }

◆ BodyMinXHeight()

float tesseract::LMConsistencyInfo::BodyMinXHeight ( ) const
inline

Definition at line 106 of file lm_consistency.h.

106 {
107 if (InconsistentXHeight()) {
108 return 0.0f;
109 }
110 return xht_lo[kNORM];
111 }

◆ ComputeXheightConsistency()

void tesseract::LMConsistencyInfo::ComputeXheightConsistency ( const BLOB_CHOICE b,
bool  is_punc 
)

Definition at line 29 of file lm_consistency.cpp.

29 {
31 return; // It isn't going to get any better.
32 }
33
34 // Compute xheight consistency.
35 bool parent_null = xht_sp < 0;
36 int parent_sp = xht_sp;
37 // Debug strings.
38 if (b->yshift() > LMConsistencyInfo::kShiftThresh) {
40 } else if (b->yshift() < -LMConsistencyInfo::kShiftThresh) {
42 } else {
44 }
46 if (is_punc) {
48 }
49 if (!parent_null) {
50 xpos_entropy += abs(parent_sp - xht_sp);
51 }
52 // TODO(eger): Figure out a better way to account for small caps.
53 // For the first character not y-shifted, we only care if it is too small.
54 // Too large is common in drop caps and small caps.
55 // int16_t small_xht = b->min_xheight();
56 // if (parent_vse == nullptr && sp == LanguageModelConsistencyInfo::kNORM) {
57 // small_xht = 0;
58 // }
59 IntersectRange(b->min_xheight(), b->max_xheight(), &(xht_lo[xht_sp]), &(xht_hi[xht_sp]));
60
61 // Compute xheight inconsistency kinds.
62 if (parent_null) {
63 if (xht_count[kNORM] == 1) {
65 } else {
67 }
68 return;
69 }
70
71 // When we intersect the ranges of xheights in pixels for all characters in
72 // each position (subscript, normal, superscript),
73 // How much range must be left? 0? [exactly one pixel height for xheight] 1?
74 // TODO(eger): Extend this code to take a prior for the rest of the line.
75 const int kMinIntersectedXHeightRange = 0;
76 for (int i = 0; i < kNumPos; i++) {
77 if (xht_lo[i] > xht_hi[i] - kMinIntersectedXHeightRange) {
79 return;
80 }
81 }
82
83 // Reject as improbable anything where there's much punctuation in subscript
84 // or superscript regions.
85 if (xht_count_punc[kSUB] > xht_count[kSUB] * 0.4 ||
88 return;
89 }
90
91 // Now check that the subscript and superscript aren't too small relative to
92 // the mainline.
93 auto mainline_xht = static_cast<double>(xht_lo[kNORM]);
94 double kMinSizeRatio = 0.4;
95 if (mainline_xht > 0.0 && (static_cast<double>(xht_hi[kSUB]) / mainline_xht < kMinSizeRatio ||
96 static_cast<double>(xht_hi[kSUP]) / mainline_xht < kMinSizeRatio)) {
98 return;
99 }
100 // TODO(eger): Check into inconsistency of super/subscript y offsets.
103 return;
104 }
105 if (xht_count[kSUB] == 0 && xht_count[kSUP] == 0) {
107 return;
108 }
110}
@ XH_SUBNORMAL
Definition: dict.h:81
@ XH_INCONSISTENT
Definition: dict.h:81
void IntersectRange(const T &lower1, const T &upper1, T *lower2, T *upper2)
Definition: helpers.h:141
static const int kShiftThresh
static const int kMaxEntropy

◆ Consistent()

bool tesseract::LMConsistencyInfo::Consistent ( ) const
inline

Definition at line 94 of file lm_consistency.h.

◆ InconsistentXHeight()

int tesseract::LMConsistencyInfo::InconsistentXHeight ( ) const
inline

Definition at line 102 of file lm_consistency.h.

102 {
104 }

◆ NumInconsistentCase()

int tesseract::LMConsistencyInfo::NumInconsistentCase ( ) const
inline

Definition at line 87 of file lm_consistency.h.

◆ NumInconsistentChartype()

int tesseract::LMConsistencyInfo::NumInconsistentChartype ( ) const
inline

Definition at line 90 of file lm_consistency.h.

90 {
91 return (NumInconsistentPunc() + num_other +
93 }

◆ NumInconsistentPunc()

int tesseract::LMConsistencyInfo::NumInconsistentPunc ( ) const
inline

Definition at line 84 of file lm_consistency.h.

84 {
85 return invalid_punc ? num_punc : 0;
86 }

◆ NumInconsistentSpaces()

int tesseract::LMConsistencyInfo::NumInconsistentSpaces ( ) const
inline

Definition at line 99 of file lm_consistency.h.

99 {
101 }

Member Data Documentation

◆ chartype

ChartypeEnum tesseract::LMConsistencyInfo::chartype

Definition at line 124 of file lm_consistency.h.

◆ inconsistent_font

bool tesseract::LMConsistencyInfo::inconsistent_font

Definition at line 139 of file lm_consistency.h.

◆ inconsistent_script

bool tesseract::LMConsistencyInfo::inconsistent_script

Definition at line 138 of file lm_consistency.h.

◆ invalid_punc

bool tesseract::LMConsistencyInfo::invalid_punc

Definition at line 137 of file lm_consistency.h.

◆ kMaxEntropy

const int tesseract::LMConsistencyInfo::kMaxEntropy = 1
static

Definition at line 47 of file lm_consistency.h.

◆ kNORM

const int tesseract::LMConsistencyInfo::kNORM = 1
static

Definition at line 50 of file lm_consistency.h.

◆ kNumPos

const int tesseract::LMConsistencyInfo::kNumPos = 3
static

Definition at line 51 of file lm_consistency.h.

◆ kShiftThresh

const int tesseract::LMConsistencyInfo::kShiftThresh = 1
static

Definition at line 43 of file lm_consistency.h.

◆ kSUB

const int tesseract::LMConsistencyInfo::kSUB = 0
static

Definition at line 50 of file lm_consistency.h.

◆ kSUP

const int tesseract::LMConsistencyInfo::kSUP = 2
static

Definition at line 50 of file lm_consistency.h.

◆ num_alphas

int tesseract::LMConsistencyInfo::num_alphas

Definition at line 120 of file lm_consistency.h.

◆ num_digits

int tesseract::LMConsistencyInfo::num_digits

Definition at line 121 of file lm_consistency.h.

◆ num_inconsistent_spaces

int tesseract::LMConsistencyInfo::num_inconsistent_spaces

Definition at line 129 of file lm_consistency.h.

◆ num_lower

int tesseract::LMConsistencyInfo::num_lower

Definition at line 127 of file lm_consistency.h.

◆ num_non_first_upper

int tesseract::LMConsistencyInfo::num_non_first_upper

Definition at line 126 of file lm_consistency.h.

◆ num_other

int tesseract::LMConsistencyInfo::num_other

Definition at line 123 of file lm_consistency.h.

◆ num_punc

int tesseract::LMConsistencyInfo::num_punc

Definition at line 122 of file lm_consistency.h.

◆ punc_ref

EDGE_REF tesseract::LMConsistencyInfo::punc_ref

Definition at line 119 of file lm_consistency.h.

◆ script_id

int tesseract::LMConsistencyInfo::script_id

Definition at line 128 of file lm_consistency.h.

◆ xht_count

int16_t tesseract::LMConsistencyInfo::xht_count[kNumPos]

Definition at line 133 of file lm_consistency.h.

◆ xht_count_punc

int16_t tesseract::LMConsistencyInfo::xht_count_punc[kNumPos]

Definition at line 134 of file lm_consistency.h.

◆ xht_decision

XHeightConsistencyEnum tesseract::LMConsistencyInfo::xht_decision

Definition at line 125 of file lm_consistency.h.

◆ xht_hi

float tesseract::LMConsistencyInfo::xht_hi[kNumPos]

Definition at line 132 of file lm_consistency.h.

◆ xht_lo

float tesseract::LMConsistencyInfo::xht_lo[kNumPos]

Definition at line 131 of file lm_consistency.h.

◆ xht_sp

int16_t tesseract::LMConsistencyInfo::xht_sp

Definition at line 135 of file lm_consistency.h.

◆ xpos_entropy

int16_t tesseract::LMConsistencyInfo::xpos_entropy

Definition at line 136 of file lm_consistency.h.


The documentation for this struct was generated from the following files: