tesseract  4.00.00dev
wordseg.cpp
Go to the documentation of this file.
1 /**********************************************************************
2  * File: wordseg.cpp (Formerly wspace.c)
3  * Description: Code to segment the blobs into words.
4  * Author: Ray Smith
5  * Created: Fri Oct 16 11:32:28 BST 1992
6  *
7  * (C) Copyright 1992, Hewlett-Packard Ltd.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18  **********************************************************************/
19 
20 #ifdef __UNIX__
21 #include <assert.h>
22 #endif
23 #include "stderr.h"
24 #include "blobbox.h"
25 #include "statistc.h"
26 #include "drawtord.h"
27 #include "makerow.h"
28 #include "pitsync1.h"
29 #include "tovars.h"
30 #include "topitch.h"
31 #include "cjkpitch.h"
32 #include "textord.h"
33 #include "fpchop.h"
34 #include "wordseg.h"
35 
36 // Include automatically generated configuration file if running autoconf.
37 #ifdef HAVE_CONFIG_H
38 #include "config_auto.h"
39 #endif
40 
41 #define EXTERN
42 
43 EXTERN BOOL_VAR(textord_fp_chopping, TRUE, "Do fixed pitch chopping");
45  "Force proportional word segmentation on all rows");
47  "Chopper is being tested.");
48 
49 #define FIXED_WIDTH_MULTIPLE 5
50 #define BLOCK_STATS_CLUSTERS 10
51 
52 
60 void make_single_word(bool one_blob, TO_ROW_LIST *rows, ROW_LIST* real_rows) {
61  TO_ROW_IT to_row_it(rows);
62  ROW_IT row_it(real_rows);
63  for (to_row_it.mark_cycle_pt(); !to_row_it.cycled_list();
64  to_row_it.forward()) {
65  TO_ROW* row = to_row_it.data();
66  // The blobs have to come out of the BLOBNBOX into the C_BLOB_LIST ready
67  // to create the word.
68  C_BLOB_LIST cblobs;
69  C_BLOB_IT cblob_it(&cblobs);
70  BLOBNBOX_IT box_it(row->blob_list());
71  for (;!box_it.empty(); box_it.forward()) {
72  BLOBNBOX* bblob= box_it.extract();
73  if (bblob->joined_to_prev() || (one_blob && !cblob_it.empty())) {
74  if (bblob->cblob() != NULL) {
75  C_OUTLINE_IT cout_it(cblob_it.data()->out_list());
76  cout_it.move_to_last();
77  cout_it.add_list_after(bblob->cblob()->out_list());
78  delete bblob->cblob();
79  }
80  } else {
81  if (bblob->cblob() != NULL)
82  cblob_it.add_after_then_move(bblob->cblob());
83  }
84  delete bblob;
85  }
86  // Convert the TO_ROW to a ROW.
87  ROW* real_row = new ROW(row, static_cast<inT16>(row->kern_size),
88  static_cast<inT16>(row->space_size));
89  WERD_IT word_it(real_row->word_list());
90  WERD* word = new WERD(&cblobs, 0, NULL);
91  word->set_flag(W_BOL, TRUE);
92  word->set_flag(W_EOL, TRUE);
93  word->set_flag(W_DONT_CHOP, one_blob);
94  word_it.add_after_then_move(word);
95  row_it.add_after_then_move(real_row);
96  }
97 }
98 
105  ICOORD page_tr, // top right
106  float gradient, // page skew
107  BLOCK_LIST *blocks, // block list
108  TO_BLOCK_LIST *port_blocks) { // output list
109  TO_BLOCK_IT block_it; // iterator
110  TO_BLOCK *block; // current block
111 
112  if (textord->use_cjk_fp_model()) {
113  compute_fixed_pitch_cjk(page_tr, port_blocks);
114  } else {
115  compute_fixed_pitch(page_tr, port_blocks, gradient, FCOORD(0.0f, -1.0f),
117  }
118  textord->to_spacing(page_tr, port_blocks);
119  block_it.set_to_list(port_blocks);
120  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
121  block = block_it.data();
122  make_real_words(textord, block, FCOORD(1.0f, 0.0f));
123  }
124 }
125 
126 
134 void set_row_spaces( //find space sizes
135  TO_BLOCK *block, //block to do
136  FCOORD rotation, //for drawing
137  BOOL8 testing_on //correct orientation
138  ) {
139  TO_ROW *row; //current row
140  TO_ROW_IT row_it = block->get_rows ();
141 
142  if (row_it.empty ())
143  return; //empty block
144  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
145  row = row_it.data ();
146  if (row->fixed_pitch == 0) {
147  row->min_space =
148  (inT32) ceil (row->pr_space -
149  (row->pr_space -
151  row->max_nonspace =
152  (inT32) floor (row->pr_nonsp +
153  (row->pr_space -
155  if (testing_on && textord_show_initial_words) {
156  tprintf ("Assigning defaults %d non, %d space to row at %g\n",
157  row->max_nonspace, row->min_space, row->intercept ());
158  }
159  row->space_threshold = (row->max_nonspace + row->min_space) / 2;
160  row->space_size = row->pr_space;
161  row->kern_size = row->pr_nonsp;
162  }
163 #ifndef GRAPHICS_DISABLED
164  if (textord_show_initial_words && testing_on) {
166  }
167 #endif
168  }
169 }
170 
171 
178 inT32 row_words( //compute space size
179  TO_BLOCK *block, //block it came from
180  TO_ROW *row, //row to operate on
181  inT32 maxwidth, //max expected space size
182  FCOORD rotation, //for drawing
183  BOOL8 testing_on //for debug
184  ) {
185  BOOL8 testing_row; //contains testpt
186  BOOL8 prev_valid; //if decent size
187  inT32 prev_x; //end of prev blob
188  inT32 cluster_count; //no of clusters
189  inT32 gap_index; //which cluster
190  inT32 smooth_factor; //for smoothing stats
191  BLOBNBOX *blob; //current blob
192  float lower, upper; //clustering parameters
193  float gaps[3]; //gap clusers
194  ICOORD testpt;
195  TBOX blob_box; //bounding box
196  //iterator
197  BLOBNBOX_IT blob_it = row->blob_list ();
198  STATS gap_stats (0, maxwidth);
199  STATS cluster_stats[4]; //clusters
200 
202  smooth_factor =
203  (inT32) (block->xheight * textord_wordstats_smooth_factor + 1.5);
204  // if (testing_on)
205  // tprintf("Row smooth factor=%d\n",smooth_factor);
206  prev_valid = FALSE;
207  prev_x = -MAX_INT32;
208  testing_row = FALSE;
209  for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
210  blob = blob_it.data ();
211  blob_box = blob->bounding_box ();
212  if (blob_box.contains (testpt))
213  testing_row = TRUE;
214  gap_stats.add (blob_box.width (), 1);
215  }
216  gap_stats.clear ();
217  for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
218  blob = blob_it.data ();
219  if (!blob->joined_to_prev ()) {
220  blob_box = blob->bounding_box ();
221  if (prev_valid && blob_box.left () - prev_x < maxwidth) {
222  gap_stats.add (blob_box.left () - prev_x, 1);
223  }
224  prev_valid = TRUE;
225  prev_x = blob_box.right ();
226  }
227  }
228  if (gap_stats.get_total () == 0) {
229  row->min_space = 0; //no evidence
230  row->max_nonspace = 0;
231  return 0;
232  }
233  gap_stats.smooth (smooth_factor);
234  lower = row->xheight * textord_words_initial_lower;
235  upper = row->xheight * textord_words_initial_upper;
236  cluster_count = gap_stats.cluster (lower, upper,
238  cluster_stats);
239  while (cluster_count < 2 && ceil (lower) < floor (upper)) {
240  //shrink gap
241  upper = (upper * 3 + lower) / 4;
242  lower = (lower * 3 + upper) / 4;
243  cluster_count = gap_stats.cluster (lower, upper,
245  cluster_stats);
246  }
247  if (cluster_count < 2) {
248  row->min_space = 0; //no evidence
249  row->max_nonspace = 0;
250  return 0;
251  }
252  for (gap_index = 0; gap_index < cluster_count; gap_index++)
253  gaps[gap_index] = cluster_stats[gap_index + 1].ile (0.5);
254  //get medians
255  if (cluster_count > 2) {
256  if (testing_on && textord_show_initial_words) {
257  tprintf ("Row at %g has 3 sizes of gap:%g,%g,%g\n",
258  row->intercept (),
259  cluster_stats[1].ile (0.5),
260  cluster_stats[2].ile (0.5), cluster_stats[3].ile (0.5));
261  }
262  lower = gaps[0];
263  if (gaps[1] > lower) {
264  upper = gaps[1]; //prefer most frequent
265  if (upper < block->xheight * textord_words_min_minspace
266  && gaps[2] > gaps[1]) {
267  upper = gaps[2];
268  }
269  }
270  else if (gaps[2] > lower
271  && gaps[2] >= block->xheight * textord_words_min_minspace)
272  upper = gaps[2];
273  else if (lower >= block->xheight * textord_words_min_minspace) {
274  upper = lower; //not nice
275  lower = gaps[1];
276  if (testing_on && textord_show_initial_words) {
277  tprintf ("Had to switch most common from lower to upper!!\n");
278  gap_stats.print();
279  }
280  }
281  else {
282  row->min_space = 0; //no evidence
283  row->max_nonspace = 0;
284  return 0;
285  }
286  }
287  else {
288  if (gaps[1] < gaps[0]) {
289  if (testing_on && textord_show_initial_words) {
290  tprintf ("Had to switch most common from lower to upper!!\n");
291  gap_stats.print();
292  }
293  lower = gaps[1];
294  upper = gaps[0];
295  }
296  else {
297  upper = gaps[1];
298  lower = gaps[0];
299  }
300  }
301  if (upper < block->xheight * textord_words_min_minspace) {
302  row->min_space = 0; //no evidence
303  row->max_nonspace = 0;
304  return 0;
305  }
306  if (upper * 3 < block->min_space * 2 + block->max_nonspace
307  || lower * 3 > block->min_space * 2 + block->max_nonspace) {
308  if (testing_on && textord_show_initial_words) {
309  tprintf ("Disagreement between block and row at %g!!\n",
310  row->intercept ());
311  tprintf ("Lower=%g, upper=%g, Stats:\n", lower, upper);
312  gap_stats.print();
313  }
314  }
315  row->min_space =
316  (inT32) ceil (upper - (upper - lower) * textord_words_definite_spread);
317  row->max_nonspace =
318  (inT32) floor (lower + (upper - lower) * textord_words_definite_spread);
319  row->space_threshold = (row->max_nonspace + row->min_space) / 2;
320  row->space_size = upper;
321  row->kern_size = lower;
322  if (testing_on && textord_show_initial_words) {
323  if (testing_row) {
324  tprintf ("GAP STATS\n");
325  gap_stats.print();
326  tprintf ("SPACE stats\n");
327  cluster_stats[2].print_summary();
328  tprintf ("NONSPACE stats\n");
329  cluster_stats[1].print_summary();
330  }
331  tprintf ("Row at %g has minspace=%d(%g), max_non=%d(%g)\n",
332  row->intercept (), row->min_space, upper,
333  row->max_nonspace, lower);
334  }
335  return cluster_stats[2].get_total ();
336 }
337 
338 
345 inT32 row_words2( //compute space size
346  TO_BLOCK *block, //block it came from
347  TO_ROW *row, //row to operate on
348  inT32 maxwidth, //max expected space size
349  FCOORD rotation, //for drawing
350  BOOL8 testing_on //for debug
351  ) {
352  BOOL8 testing_row; //contains testpt
353  BOOL8 prev_valid; //if decent size
354  BOOL8 this_valid; //current blob big enough
355  inT32 prev_x; //end of prev blob
356  inT32 min_width; //min interesting width
357  inT32 valid_count; //good gaps
358  inT32 total_count; //total gaps
359  inT32 cluster_count; //no of clusters
360  inT32 prev_count; //previous cluster_count
361  inT32 gap_index; //which cluster
362  inT32 smooth_factor; //for smoothing stats
363  BLOBNBOX *blob; //current blob
364  float lower, upper; //clustering parameters
365  ICOORD testpt;
366  TBOX blob_box; //bounding box
367  //iterator
368  BLOBNBOX_IT blob_it = row->blob_list ();
369  STATS gap_stats (0, maxwidth);
370  //gap sizes
371  float gaps[BLOCK_STATS_CLUSTERS];
372  STATS cluster_stats[BLOCK_STATS_CLUSTERS + 1];
373  //clusters
374 
376  smooth_factor =
377  (inT32) (block->xheight * textord_wordstats_smooth_factor + 1.5);
378  // if (testing_on)
379  // tprintf("Row smooth factor=%d\n",smooth_factor);
380  prev_valid = FALSE;
381  prev_x = -MAX_INT16;
382  testing_row = FALSE;
383  //min blob size
384  min_width = (inT32) block->pr_space;
385  total_count = 0;
386  for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
387  blob = blob_it.data ();
388  if (!blob->joined_to_prev ()) {
389  blob_box = blob->bounding_box ();
390  this_valid = blob_box.width () >= min_width;
391  if (this_valid && prev_valid
392  && blob_box.left () - prev_x < maxwidth) {
393  gap_stats.add (blob_box.left () - prev_x, 1);
394  }
395  total_count++; //count possibles
396  prev_x = blob_box.right ();
397  prev_valid = this_valid;
398  }
399  }
400  valid_count = gap_stats.get_total ();
401  if (valid_count < total_count * textord_words_minlarge) {
402  gap_stats.clear ();
403  prev_x = -MAX_INT16;
404  for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
405  blob_it.forward ()) {
406  blob = blob_it.data ();
407  if (!blob->joined_to_prev ()) {
408  blob_box = blob->bounding_box ();
409  if (blob_box.left () - prev_x < maxwidth) {
410  gap_stats.add (blob_box.left () - prev_x, 1);
411  }
412  prev_x = blob_box.right ();
413  }
414  }
415  }
416  if (gap_stats.get_total () == 0) {
417  row->min_space = 0; //no evidence
418  row->max_nonspace = 0;
419  return 0;
420  }
421 
422  cluster_count = 0;
423  lower = block->xheight * words_initial_lower;
424  upper = block->xheight * words_initial_upper;
425  gap_stats.smooth (smooth_factor);
426  do {
427  prev_count = cluster_count;
428  cluster_count = gap_stats.cluster (lower, upper,
430  BLOCK_STATS_CLUSTERS, cluster_stats);
431  }
432  while (cluster_count > prev_count && cluster_count < BLOCK_STATS_CLUSTERS);
433  if (cluster_count < 1) {
434  row->min_space = 0;
435  row->max_nonspace = 0;
436  return 0;
437  }
438  for (gap_index = 0; gap_index < cluster_count; gap_index++)
439  gaps[gap_index] = cluster_stats[gap_index + 1].ile (0.5);
440  //get medians
441  if (testing_on) {
442  tprintf ("cluster_count=%d:", cluster_count);
443  for (gap_index = 0; gap_index < cluster_count; gap_index++)
444  tprintf (" %g(%d)", gaps[gap_index],
445  cluster_stats[gap_index + 1].get_total ());
446  tprintf ("\n");
447  }
448 
449  //Try to find proportional non-space and space for row.
450  for (gap_index = 0; gap_index < cluster_count
451  && gaps[gap_index] > block->max_nonspace; gap_index++);
452  if (gap_index < cluster_count)
453  lower = gaps[gap_index]; //most frequent below
454  else {
455  if (testing_on)
456  tprintf ("No cluster below block threshold!, using default=%g\n",
457  block->pr_nonsp);
458  lower = block->pr_nonsp;
459  }
460  for (gap_index = 0; gap_index < cluster_count
461  && gaps[gap_index] <= block->max_nonspace; gap_index++);
462  if (gap_index < cluster_count)
463  upper = gaps[gap_index]; //most frequent above
464  else {
465  if (testing_on)
466  tprintf ("No cluster above block threshold!, using default=%g\n",
467  block->pr_space);
468  upper = block->pr_space;
469  }
470  row->min_space =
471  (inT32) ceil (upper - (upper - lower) * textord_words_definite_spread);
472  row->max_nonspace =
473  (inT32) floor (lower + (upper - lower) * textord_words_definite_spread);
474  row->space_threshold = (row->max_nonspace + row->min_space) / 2;
475  row->space_size = upper;
476  row->kern_size = lower;
477  if (testing_on) {
478  if (testing_row) {
479  tprintf ("GAP STATS\n");
480  gap_stats.print();
481  tprintf ("SPACE stats\n");
482  cluster_stats[2].print_summary();
483  tprintf ("NONSPACE stats\n");
484  cluster_stats[1].print_summary();
485  }
486  tprintf ("Row at %g has minspace=%d(%g), max_non=%d(%g)\n",
487  row->intercept (), row->min_space, upper,
488  row->max_nonspace, lower);
489  }
490  return 1;
491 }
492 
493 
501  tesseract::Textord *textord,
502  TO_BLOCK *block, //block to do
503  FCOORD rotation //for drawing
504  ) {
505  TO_ROW *row; //current row
506  TO_ROW_IT row_it = block->get_rows ();
507  ROW *real_row = NULL; //output row
508  ROW_IT real_row_it = block->block->row_list ();
509 
510  if (row_it.empty ())
511  return; //empty block
512  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
513  row = row_it.data ();
514  if (row->blob_list ()->empty () && !row->rep_words.empty ()) {
515  real_row = make_rep_words (row, block);
516  } else if (!row->blob_list()->empty()) {
517  // In a fixed pitch document, some lines may be detected as fixed pitch
518  // while others don't, and will go through different path.
519  // For non-space delimited language like CJK, fixed pitch chop always
520  // leave the entire line as one word. We can force consistent chopping
521  // with force_make_prop_words flag.
522  POLY_BLOCK* pb = block->block->poly_block();
523  if (textord_chopper_test) {
524  real_row = textord->make_blob_words (row, rotation);
525  } else if (textord_force_make_prop_words ||
526  (pb != NULL && !pb->IsText()) ||
527  row->pitch_decision == PITCH_DEF_PROP ||
529  real_row = textord->make_prop_words (row, rotation);
530  } else if (row->pitch_decision == PITCH_DEF_FIXED ||
532  real_row = fixed_pitch_words (row, rotation);
533  } else {
535  }
536  }
537  if (real_row != NULL) {
538  //put row in block
539  real_row_it.add_after_then_move (real_row);
540  }
541  }
542  block->block->set_stats (block->fixed_pitch == 0, (inT16) block->kern_size,
543  (inT16) block->space_size,
544  (inT16) block->fixed_pitch);
545  block->block->check_pitch ();
546 }
547 
548 
556 ROW *make_rep_words( //make a row
557  TO_ROW *row, //row to convert
558  TO_BLOCK *block //block it lives in
559  ) {
560  ROW *real_row; //output row
561  TBOX word_box; //bounding box
562  //iterator
563  WERD_IT word_it = &row->rep_words;
564 
565  if (word_it.empty ())
566  return NULL;
567  word_box = word_it.data ()->bounding_box ();
568  for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ())
569  word_box += word_it.data ()->bounding_box ();
570  row->xheight = block->xheight;
571  real_row = new ROW(row,
572  (inT16) block->kern_size, (inT16) block->space_size);
573  word_it.set_to_list (real_row->word_list ());
574  //put words in row
575  word_it.add_list_after (&row->rep_words);
576  real_row->recalc_bounding_box ();
577  return real_row;
578 }
579 
580 
588 WERD *make_real_word(BLOBNBOX_IT *box_it, //iterator
589  inT32 blobcount, //no of blobs to use
590  BOOL8 bol, //start of line
591  uinT8 blanks //no of blanks
592  ) {
593  C_OUTLINE_IT cout_it;
594  C_BLOB_LIST cblobs;
595  C_BLOB_IT cblob_it = &cblobs;
596  WERD *word; // new word
597  BLOBNBOX *bblob; // current blob
598  inT32 blobindex; // in row
599 
600  for (blobindex = 0; blobindex < blobcount; blobindex++) {
601  bblob = box_it->extract();
602  if (bblob->joined_to_prev()) {
603  if (bblob->cblob() != NULL) {
604  cout_it.set_to_list(cblob_it.data()->out_list());
605  cout_it.move_to_last();
606  cout_it.add_list_after(bblob->cblob()->out_list());
607  delete bblob->cblob();
608  }
609  }
610  else {
611  if (bblob->cblob() != NULL)
612  cblob_it.add_after_then_move(bblob->cblob());
613  }
614  delete bblob;
615  box_it->forward(); // next one
616  }
617 
618  if (blanks < 1)
619  blanks = 1;
620 
621  word = new WERD(&cblobs, blanks, NULL);
622 
623  if (bol)
624  word->set_flag(W_BOL, TRUE);
625  if (box_it->at_first())
626  word->set_flag(W_EOL, TRUE); // at end of line
627 
628  return word;
629 }
Definition: points.h:189
float space_size
Definition: blobbox.h:787
void add(inT32 value, inT32 count)
Definition: statistc.cpp:99
void make_single_word(bool one_blob, TO_ROW_LIST *rows, ROW_LIST *real_rows)
Definition: wordseg.cpp:60
float space_size
Definition: blobbox.h:663
bool use_cjk_fp_model() const
Definition: textord.h:92
BLOCK * block
Definition: blobbox.h:773
#define TRUE
Definition: capi.h:45
void clear()
Definition: statistc.cpp:81
ROW * make_blob_words(TO_ROW *row, FCOORD rotation)
Definition: tospace.cpp:1183
float xheight
Definition: blobbox.h:784
TO_ROW_LIST * get_rows()
Definition: blobbox.h:700
Definition: werd.h:36
PITCH_TYPE pitch_decision
Definition: blobbox.h:646
EXTERN bool textord_chopper_test
Definition: wordseg.cpp:47
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:595
void plot_word_decisions(ScrollView *win, inT16 pitch, TO_ROW *row)
Definition: drawtord.cpp:250
EXTERN double textord_words_minlarge
Definition: tovars.cpp:57
float pr_nonsp
Definition: blobbox.h:793
#define MAX_INT32
Definition: host.h:62
float pr_space
Definition: blobbox.h:792
ROW_LIST * row_list()
get rows
Definition: ocrblock.h:120
int16_t inT16
Definition: host.h:36
EXTERN double textord_words_definite_spread
Definition: tovars.cpp:76
Definition: ocrrow.h:32
float kern_size
Definition: blobbox.h:786
void recalc_bounding_box()
Definition: ocrrow.cpp:101
void set_row_spaces(TO_BLOCK *block, FCOORD rotation, BOOL8 testing_on)
Definition: wordseg.cpp:134
bool textord_test_landscape
Definition: makerow.cpp:50
Definition: werd.h:35
#define MAX_INT16
Definition: host.h:61
bool IsText() const
Definition: polyblk.h:52
C_OUTLINE_LIST * out_list()
Definition: stepblob.h:64
void set_stats(BOOL8 prop, inT16 kern, inT16 space, inT16 ch_pitch)
Definition: ocrblock.h:62
EXTERN ScrollView * to_win
Definition: drawtord.cpp:38
EXTERN bool textord_show_initial_words
Definition: tovars.cpp:25
#define tprintf(...)
Definition: tprintf.h:31
void smooth(inT32 factor)
Definition: statistc.cpp:287
void set_flag(WERD_FLAGS mask, BOOL8 value)
Definition: werd.h:129
const TBOX & bounding_box() const
Definition: blobbox.h:215
uint8_t uinT8
Definition: host.h:35
ROW * fixed_pitch_words(TO_ROW *row, FCOORD rotation)
Definition: fpchop.cpp:51
inT32 cluster(float lower, float upper, float multiple, inT32 max_clusters, STATS *clusters)
Definition: statistc.cpp:318
WERD_LIST rep_words
Definition: blobbox.h:664
C_BLOB * cblob() const
Definition: blobbox.h:253
int textord_test_x
Definition: makerow.cpp:62
WERD * make_real_word(BLOBNBOX_IT *box_it, inT32 blobcount, BOOL8 bol, uinT8 blanks)
Definition: wordseg.cpp:588
#define FALSE
Definition: capi.h:46
void check_pitch()
check proportional
Definition: ocrblock.cpp:171
inT32 max_nonspace
Definition: blobbox.h:660
EXTERN double textord_words_initial_lower
Definition: tovars.cpp:53
float xheight
Definition: blobbox.h:653
EXTERN bool textord_fp_chopping
Definition: wordseg.cpp:43
float pr_space
Definition: blobbox.h:650
int32_t inT32
Definition: host.h:38
EXTERN double words_initial_upper
Definition: tovars.cpp:71
Definition: rect.h:30
bool joined_to_prev() const
Definition: blobbox.h:241
inT16 left() const
Definition: rect.h:68
bool contains(const FCOORD pt) const
Definition: rect.h:323
#define ASSERT_HOST(x)
Definition: errcode.h:84
float fixed_pitch
Definition: blobbox.h:785
#define BOOL_VAR(name, val, comment)
Definition: params.h:279
#define EXTERN
Definition: wordseg.cpp:41
EXTERN double textord_spacesize_ratioprop
Definition: tovars.cpp:80
EXTERN double textord_words_min_minspace
Definition: tovars.cpp:49
void compute_fixed_pitch_cjk(ICOORD page_tr, TO_BLOCK_LIST *port_blocks)
Definition: cjkpitch.cpp:1057
void compute_fixed_pitch(ICOORD page_tr, TO_BLOCK_LIST *port_blocks, float gradient, FCOORD rotation, BOOL8 testing_on)
Definition: topitch.cpp:73
ROW * make_rep_words(TO_ROW *row, TO_BLOCK *block)
Definition: wordseg.cpp:556
inT32 get_total() const
Definition: statistc.h:86
inT32 row_words2(TO_BLOCK *block, TO_ROW *row, inT32 maxwidth, FCOORD rotation, BOOL8 testing_on)
Definition: wordseg.cpp:345
void make_real_words(tesseract::Textord *textord, TO_BLOCK *block, FCOORD rotation)
Definition: wordseg.cpp:500
Definition: statistc.h:33
POLY_BLOCK * poly_block() const
Definition: pdblock.h:55
inT32 space_threshold
Definition: blobbox.h:661
int textord_test_y
Definition: makerow.cpp:63
unsigned char BOOL8
Definition: host.h:44
void make_words(tesseract::Textord *textord, ICOORD page_tr, float gradient, BLOCK_LIST *blocks, TO_BLOCK_LIST *port_blocks)
Definition: wordseg.cpp:104
inT32 min_space
Definition: blobbox.h:788
float intercept() const
Definition: blobbox.h:584
EXTERN double words_initial_lower
Definition: tovars.cpp:70
float fixed_pitch
Definition: blobbox.h:647
EXTERN bool textord_force_make_prop_words
Definition: wordseg.cpp:45
void print_summary() const
Definition: statistc.cpp:558
inT32 row_words(TO_BLOCK *block, TO_ROW *row, inT32 maxwidth, FCOORD rotation, BOOL8 testing_on)
Definition: wordseg.cpp:178
Definition: werd.h:60
inT32 min_space
Definition: blobbox.h:659
double ile(double frac) const
Definition: statistc.cpp:172
#define BLOCK_STATS_CLUSTERS
Definition: wordseg.cpp:50
inT16 right() const
Definition: rect.h:75
integer coordinate
Definition: points.h:30
WERD_LIST * word_list()
Definition: ocrrow.h:52
ROW * make_prop_words(TO_ROW *row, FCOORD rotation)
Definition: tospace.cpp:890
float pr_nonsp
Definition: blobbox.h:651
EXTERN double textord_wordstats_smooth_factor
Definition: tovars.cpp:39
inT32 max_nonspace
Definition: blobbox.h:789
EXTERN double textord_words_initial_upper
Definition: tovars.cpp:55
void print() const
Definition: statistc.cpp:532
float kern_size
Definition: blobbox.h:662
inT16 width() const
Definition: rect.h:111
void to_spacing(ICOORD page_tr, TO_BLOCK_LIST *blocks)
Definition: tospace.cpp:42