tesseract v5.3.3.20231005
tesseract::AssociateUtils Class Reference

#include <associate.h>

Static Public Member Functions

static float ComputeOutlineLength (float rating_cert_scale, const BLOB_CHOICE &b)
 
static float ComputeRating (float rating_cert_scale, float cert, int width)
 
static void ComputeStats (int col, int row, const AssociateStats *parent_stats, int parent_path_length, bool fixed_pitch, float max_char_wh_ratio, WERD_RES *word_res, bool debug, AssociateStats *stats)
 
static float FixedPitchWidthCost (float norm_width, float right_gap, bool end_pos, float max_char_wh_ratio)
 
static float FixedPitchGapCost (float norm_gap, bool end_pos)
 

Static Public Attributes

static const float kMaxFixedPitchCharAspectRatio = 2.0f
 
static const float kMinGap = 0.03f
 

Detailed Description

Definition at line 71 of file associate.h.

Member Function Documentation

◆ ComputeOutlineLength()

static float tesseract::AssociateUtils::ComputeOutlineLength ( float  rating_cert_scale,
const BLOB_CHOICE b 
)
inlinestatic

Definition at line 84 of file associate.h.

84 {
85 return rating_cert_scale * b.rating() / b.certainty();
86 }

◆ ComputeRating()

static float tesseract::AssociateUtils::ComputeRating ( float  rating_cert_scale,
float  cert,
int  width 
)
inlinestatic

Definition at line 87 of file associate.h.

87 {
88 return static_cast<float>(width) * cert / rating_cert_scale;
89 }

◆ ComputeStats()

void tesseract::AssociateUtils::ComputeStats ( int  col,
int  row,
const AssociateStats parent_stats,
int  parent_path_length,
bool  fixed_pitch,
float  max_char_wh_ratio,
WERD_RES word_res,
bool  debug,
AssociateStats stats 
)
static

Definition at line 33 of file associate.cpp.

35 {
36 stats->Clear();
37
38 ASSERT_HOST(word_res != nullptr);
39 if (word_res->blob_widths.empty()) {
40 return;
41 }
42 if (debug) {
43 tprintf("AssociateUtils::ComputeStats() for col=%d, row=%d%s\n", col, row,
44 fixed_pitch ? " (fixed pitch)" : "");
45 }
46 float normalizing_height = kBlnXHeight;
47 ROW *blob_row = word_res->blob_row;
48 // TODO(rays/daria) Can unicharset.script_has_xheight be useful here?
49 if (fixed_pitch && blob_row != nullptr) {
50 // For fixed pitch language like CJK, we use the full text height
51 // as the normalizing factor so we are not dependent on xheight
52 // calculation.
53 if (blob_row->body_size() > 0.0f) {
54 normalizing_height = word_res->denorm.y_scale() * blob_row->body_size();
55 } else {
56 normalizing_height =
57 word_res->denorm.y_scale() * (blob_row->x_height() + blob_row->ascenders());
58 }
59 if (debug) {
60 tprintf("normalizing height = %g (scale %g xheight %g ascenders %g)\n", normalizing_height,
61 word_res->denorm.y_scale(), blob_row->x_height(), blob_row->ascenders());
62 }
63 }
64 float wh_ratio = word_res->GetBlobsWidth(col, row) / normalizing_height;
65 if (wh_ratio > max_char_wh_ratio) {
66 stats->bad_shape = true;
67 }
68 // Compute the gap sum for this shape. If there are only negative or only
69 // positive gaps, record their sum in stats->gap_sum. However, if there is
70 // a mixture, record only the sum of the positive gaps.
71 // TODO(antonova): explain fragment.
72 int negative_gap_sum = 0;
73 for (int c = col; c < row; ++c) {
74 int gap = word_res->GetBlobsGap(c);
75 (gap > 0) ? stats->gap_sum += gap : negative_gap_sum += gap;
76 }
77 if (stats->gap_sum == 0) {
78 stats->gap_sum = negative_gap_sum;
79 }
80 if (debug) {
81 tprintf("wh_ratio=%g (max_char_wh_ratio=%g) gap_sum=%d %s\n", wh_ratio, max_char_wh_ratio,
82 stats->gap_sum, stats->bad_shape ? "bad_shape" : "");
83 }
84 // Compute shape_cost (for fixed pitch mode).
85 if (fixed_pitch) {
86 bool end_row = (row == (word_res->ratings->dimension() - 1));
87
88 // Ensure that the blob has gaps on the left and the right sides
89 // (except for beginning and ending punctuation) and that there is
90 // no cutting through ink at the blob boundaries.
91 if (col > 0) {
92 float left_gap = word_res->GetBlobsGap(col - 1) / normalizing_height;
93 SEAM *left_seam = word_res->seam_array[col - 1];
94 if ((!end_row && left_gap < kMinGap) || left_seam->priority() > 0.0f) {
95 stats->bad_shape = true;
96 }
97 if (debug) {
98 tprintf("left_gap %g, left_seam %g %s\n", left_gap, left_seam->priority(),
99 stats->bad_shape ? "bad_shape" : "");
100 }
101 }
102 float right_gap = 0.0f;
103 if (!end_row) {
104 right_gap = word_res->GetBlobsGap(row) / normalizing_height;
105 SEAM *right_seam = word_res->seam_array[row];
106 if (right_gap < kMinGap || right_seam->priority() > 0.0f) {
107 stats->bad_shape = true;
108 if (right_gap < kMinGap) {
109 stats->bad_fixed_pitch_right_gap = true;
110 }
111 }
112 if (debug) {
113 tprintf("right_gap %g right_seam %g %s\n", right_gap, right_seam->priority(),
114 stats->bad_shape ? "bad_shape" : "");
115 }
116 }
117
118 // Impose additional segmentation penalties if blob widths or gaps
119 // distribution don't fit a fixed-pitch model.
120 // Since we only know the widths and gaps of the path explored so far,
121 // the means and variances are computed for the path so far (not
122 // considering characters to the right of the last character on the path).
123 stats->full_wh_ratio = wh_ratio + right_gap;
124 if (parent_stats != nullptr) {
125 stats->full_wh_ratio_total = (parent_stats->full_wh_ratio_total + stats->full_wh_ratio);
126 float mean = stats->full_wh_ratio_total / static_cast<float>(parent_path_length + 1);
127 stats->full_wh_ratio_var =
128 parent_stats->full_wh_ratio_var + pow(mean - stats->full_wh_ratio, 2);
129 } else {
130 stats->full_wh_ratio_total = stats->full_wh_ratio;
131 }
132 if (debug) {
133 tprintf("full_wh_ratio %g full_wh_ratio_total %g full_wh_ratio_var %g\n",
134 stats->full_wh_ratio, stats->full_wh_ratio_total, stats->full_wh_ratio_var);
135 }
136
137 stats->shape_cost = FixedPitchWidthCost(wh_ratio, right_gap, end_row, max_char_wh_ratio);
138
139 // For some reason Tesseract prefers to treat the whole CJ words
140 // as one blob when the initial segmentation is particularly bad.
141 // This hack is to avoid favoring such states.
142 if (col == 0 && end_row && wh_ratio > max_char_wh_ratio) {
143 stats->shape_cost += 10;
144 }
145 stats->shape_cost += stats->full_wh_ratio_var;
146 if (debug) {
147 tprintf("shape_cost %g\n", stats->shape_cost);
148 }
149 }
150}
#define ASSERT_HOST(x)
Definition: errcode.h:54
void tprintf(const char *format,...)
Definition: tprintf.cpp:41
const int kBlnXHeight
Definition: normalis.h:33
static const float kMinGap
Definition: associate.h:74
static float FixedPitchWidthCost(float norm_width, float right_gap, bool end_pos, float max_char_wh_ratio)
Definition: associate.cpp:152

◆ FixedPitchGapCost()

static float tesseract::AssociateUtils::FixedPitchGapCost ( float  norm_gap,
bool  end_pos 
)
inlinestatic

Definition at line 113 of file associate.h.

113 {
114 return (norm_gap < 0.05 && !end_pos) ? 5.0f : 0.0f;
115 }

◆ FixedPitchWidthCost()

float tesseract::AssociateUtils::FixedPitchWidthCost ( float  norm_width,
float  right_gap,
bool  end_pos,
float  max_char_wh_ratio 
)
static

Definition at line 152 of file associate.cpp.

153 {
154 float cost = 0.0f;
155 if (norm_width > max_char_wh_ratio) {
156 cost += norm_width;
157 }
158 if (norm_width > kMaxFixedPitchCharAspectRatio) {
159 cost += norm_width * norm_width; // extra penalty for merging CJK chars
160 }
161 // Penalize skinny blobs, except for punctuation in the last position.
162 if (norm_width + right_gap < 0.5f && !end_pos) {
163 cost += 1.0f - (norm_width + right_gap);
164 }
165 return cost;
166}
static const float kMaxFixedPitchCharAspectRatio
Definition: associate.h:73

Member Data Documentation

◆ kMaxFixedPitchCharAspectRatio

const float tesseract::AssociateUtils::kMaxFixedPitchCharAspectRatio = 2.0f
static

Definition at line 73 of file associate.h.

◆ kMinGap

const float tesseract::AssociateUtils::kMinGap = 0.03f
static

Definition at line 74 of file associate.h.


The documentation for this class was generated from the following files: