tesseract-ocr.github.io/3.x/a00709_source.html

 /******************************************************************

  * File:        output.cpp  (Formerly output.c)

  * Description: Output pass

  * Author:                                      Phil Cheatle

  * Created:                                     Thu Aug  4 10:56:08 BST 1994

  *

  * (C) Copyright 1994, Hewlett-Packard Ltd.

  ** Licensed under the Apache License, Version 2.0 (the "License");

  ** you may not use this file except in compliance with the License.

  ** You may obtain a copy of the License at

  ** http://www.apache.org/licenses/LICENSE-2.0

  ** Unless required by applicable law or agreed to in writing, software

  ** distributed under the License is distributed on an "AS IS" BASIS,

  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

  ** See the License for the specific language governing permissions and

  ** limitations under the License.

  *

  **********************************************************************/


 #ifdef _MSC_VER

 #pragma warning(disable:4244)  // Conversion warnings

 #endif


 #include <string.h>

 #include <ctype.h>

 #ifdef __UNIX__

 #include          <assert.h>

 #include          <unistd.h>

 #include          <errno.h>

 #endif

 #include "helpers.h"

 #include "tessvars.h"

 #include "control.h"

 #include "reject.h"

 #include "docqual.h"

 #include "output.h"

 #include "globals.h"

 #include "tesseractclass.h"


 #define EPAPER_EXT      ".ep"

 #define PAGE_YSIZE      3508

 #define CTRL_INSET      '\024'   //dc4=text inset

 #define CTRL_FONT       '\016'   //so=font change

 #define CTRL_DEFAULT      '\017' //si=default font

 #define CTRL_SHIFT      '\022'   //dc2=x shift

 #define CTRL_TAB        '\011'   //tab

 #define CTRL_NEWLINE      '\012' //newline

 #define CTRL_HARDLINE   '\015'   //cr


 /**********************************************************************

  * pixels_to_pts

  *

  * Convert an integer number of pixels to the nearest integer

  * number of points.

  **********************************************************************/


 inT32 pixels_to_pts(               //convert coords

                     inT32 pixels,

                     inT32 pix_res  //resolution

                    ) {

   float pts;                     //converted value


   pts = pixels * 72.0 / pix_res;

   return (inT32) (pts + 0.5);    //round it

 }


 namespace tesseract {

 void Tesseract::output_pass(  //Tess output pass //send to api

                             PAGE_RES_IT &page_res_it,

                             const TBOX *target_word_box) {

   BLOCK_RES *block_of_last_word;

   BOOL8 force_eol;               //During output

   BLOCK *nextblock;              //block of next word

   WERD *nextword;                //next word


   page_res_it.restart_page ();

   block_of_last_word = NULL;

   while (page_res_it.word () != NULL) {

     check_debug_pt (page_res_it.word (), 120);


         if (target_word_box)

         {


                 TBOX current_word_box=page_res_it.word ()->word->bounding_box();

                 FCOORD center_pt((current_word_box.right()+current_word_box.left())/2,(current_word_box.bottom()+current_word_box.top())/2);

                 if (!target_word_box->contains(center_pt))

                 {

                         page_res_it.forward ();

                         continue;

                 }


         }

     if (tessedit_write_block_separators &&

     block_of_last_word != page_res_it.block ()) {

       block_of_last_word = page_res_it.block ();

     }


     force_eol = (tessedit_write_block_separators &&

       (page_res_it.block () != page_res_it.next_block ())) ||

       (page_res_it.next_word () == NULL);


     if (page_res_it.next_word () != NULL)

       nextword = page_res_it.next_word ()->word;

     else

       nextword = NULL;

     if (page_res_it.next_block () != NULL)

       nextblock = page_res_it.next_block ()->block;

     else

       nextblock = NULL;

                                  //regardless of tilde crunching

     write_results(page_res_it,

                   determine_newline_type(page_res_it.word()->word,

                                          page_res_it.block()->block,

                                          nextword, nextblock), force_eol);

     page_res_it.forward();

   }

 }


 /*************************************************************************

  * write_results()

  *

  * All recognition and rejection has now been done. Generate the following:

  *   .txt file     - giving the final best choices with NO highlighting

  *   .raw file     - giving the tesseract top choice output for each word

  *   .map file     - showing how the .txt file has been rejected in the .ep file

  *   epchoice list - a list of one element per word, containing the text for the

  *                   epaper. Reject strings are inserted.

  *   inset list    - a list of bounding boxes of reject insets - indexed by the

  *                   reject strings in the epchoice text.

  *************************************************************************/

 void Tesseract::write_results(PAGE_RES_IT &page_res_it,

                               char newline_type,  // type of newline

                               BOOL8 force_eol) {  // override tilde crunch?

   WERD_RES *word = page_res_it.word();

   const UNICHARSET &uchset = *word->uch_set;

   int i;

   BOOL8 need_reject = FALSE;

   UNICHAR_ID space = uchset.unichar_to_id(" ");


   if ((word->unlv_crunch_mode != CR_NONE ||

        word->best_choice->length() == 0) &&

       !tessedit_zero_kelvin_rejection && !tessedit_word_for_word) {

     if ((word->unlv_crunch_mode != CR_DELETE) &&

         (!stats_.tilde_crunch_written ||

          ((word->unlv_crunch_mode == CR_KEEP_SPACE) &&

           (word->word->space () > 0) &&

           !word->word->flag (W_FUZZY_NON) &&

           !word->word->flag (W_FUZZY_SP)))) {

       if (!word->word->flag (W_BOL) &&

           (word->word->space () > 0) &&

           !word->word->flag (W_FUZZY_NON) &&

           !word->word->flag (W_FUZZY_SP)) {

         stats_.last_char_was_tilde = false;

       }

       need_reject = TRUE;

     }

     if ((need_reject && !stats_.last_char_was_tilde) ||

         (force_eol && stats_.write_results_empty_block)) {

       /* Write a reject char - mark as rejected unless zero_rejection mode */

       stats_.last_char_was_tilde = TRUE;

       stats_.tilde_crunch_written = true;

       stats_.last_char_was_newline = false;

       stats_.write_results_empty_block = false;

     }


     if ((word->word->flag (W_EOL) && !stats_.last_char_was_newline) || force_eol) {

       stats_.tilde_crunch_written = false;

       stats_.last_char_was_newline = true;

       stats_.last_char_was_tilde = false;

     }


     if (force_eol)

       stats_.write_results_empty_block = true;

     return;

   }


   /* NORMAL PROCESSING of non tilde crunched words */


   stats_.tilde_crunch_written = false;

   if (newline_type)

     stats_.last_char_was_newline = true;

   else

     stats_.last_char_was_newline = false;

   stats_.write_results_empty_block = force_eol;  // about to write a real word


   if (unlv_tilde_crunching &&

       stats_.last_char_was_tilde &&

       (word->word->space() == 0) &&

       !(word->word->flag(W_REP_CHAR) && tessedit_write_rep_codes) &&

       (word->best_choice->unichar_id(0) == space)) {

     /* Prevent adjacent tilde across words - we know that adjacent tildes within

        words have been removed */

     word->MergeAdjacentBlobs(0);

   }

   if (newline_type ||

     (word->word->flag (W_REP_CHAR) && tessedit_write_rep_codes))

     stats_.last_char_was_tilde = false;

   else {

     if (word->reject_map.length () > 0) {

       if (word->best_choice->unichar_id(word->reject_map.length() - 1) == space)

         stats_.last_char_was_tilde = true;

       else

         stats_.last_char_was_tilde = false;

     }

     else if (word->word->space () > 0)

       stats_.last_char_was_tilde = false;

     /* else it is unchanged as there are no output chars */

   }


   ASSERT_HOST (word->best_choice->length() == word->reject_map.length());


   set_unlv_suspects(word);

   check_debug_pt (word, 120);

   if (tessedit_rejection_debug) {

     tprintf ("Dict word: \"%s\": %d\n",

              word->best_choice->debug_string().string(),

              dict_word(*(word->best_choice)));

   }

   if (!word->word->flag(W_REP_CHAR) || !tessedit_write_rep_codes) {

     if (tessedit_zero_rejection) {

       /* OVERRIDE ALL REJECTION MECHANISMS - ONLY REJECT TESS FAILURES */

       for (i = 0; i < word->best_choice->length(); ++i) {

         if (word->reject_map[i].rejected())

           word->reject_map[i].setrej_minimal_rej_accept();

       }

     }

     if (tessedit_minimal_rejection) {

       /* OVERRIDE ALL REJECTION MECHANISMS - ONLY REJECT TESS FAILURES */

       for (i = 0; i < word->best_choice->length(); ++i) {

         if ((word->best_choice->unichar_id(i) != space) &&

             word->reject_map[i].rejected())

           word->reject_map[i].setrej_minimal_rej_accept();

       }

     }

   }

 }

 }  // namespace tesseract


 /**********************************************************************

  * determine_newline_type

  *

  * Find whether we have a wrapping or hard newline.

  * Return FALSE if not at end of line.

  **********************************************************************/


 char determine_newline_type(                   //test line ends

                             WERD *word,        //word to do

                             BLOCK *block,      //current block

                             WERD *next_word,   //next word

                             BLOCK *next_block  //block of next word

                            ) {

   inT16 end_gap;                 //to right edge

   inT16 width;                   //of next word

   TBOX word_box;                  //bounding

   TBOX next_box;                  //next word

   TBOX block_box;                 //block bounding


   if (!word->flag (W_EOL))

     return FALSE;                //not end of line

   if (next_word == NULL || next_block == NULL || block != next_block)

     return CTRL_NEWLINE;

   if (next_word->space () > 0)

     return CTRL_HARDLINE;        //it is tabbed

   word_box = word->bounding_box ();

   next_box = next_word->bounding_box ();

   block_box = block->bounding_box ();

                                  //gap to eol

   end_gap = block_box.right () - word_box.right ();

   end_gap -= (inT32) block->space ();

   width = next_box.right () - next_box.left ();

   //      tprintf("end_gap=%d-%d=%d, width=%d-%d=%d, nl=%d\n",

   //              block_box.right(),word_box.right(),end_gap,

   //              next_box.right(),next_box.left(),width,

   //              end_gap>width ? CTRL_HARDLINE : CTRL_NEWLINE);

   return end_gap > width ? CTRL_HARDLINE : CTRL_NEWLINE;

 }


 /*************************************************************************

  * get_rep_char()

  * Return the first accepted character from the repetition string. This is the

  * character which is repeated - as determined earlier by fix_rep_char()

  *************************************************************************/

 namespace tesseract {

 UNICHAR_ID Tesseract::get_rep_char(WERD_RES *word) {  // what char is repeated?

   int i;

   for (i = 0; ((i < word->reject_map.length()) &&

                (word->reject_map[i].rejected())); ++i);


   if (i < word->reject_map.length()) {

     return word->best_choice->unichar_id(i);

   } else {

     return word->uch_set->unichar_to_id(unrecognised_char.string());

   }

 }


 /*************************************************************************

  * SUSPECT LEVELS

  *

  * 0 - dont reject ANYTHING

  * 1,2 - partial rejection

  * 3 - BEST

  *

  * NOTE: to reject JUST tess failures in the .map file set suspect_level 3 and

  * tessedit_minimal_rejection.

  *************************************************************************/

 void Tesseract::set_unlv_suspects(WERD_RES *word_res) {

   int len = word_res->reject_map.length();

   const WERD_CHOICE &word = *(word_res->best_choice);

   const UNICHARSET &uchset = *word.unicharset();

   int i;

   float rating_per_ch;


   if (suspect_level == 0) {

     for (i = 0; i < len; i++) {

       if (word_res->reject_map[i].rejected())

         word_res->reject_map[i].setrej_minimal_rej_accept();

     }

     return;

   }


   if (suspect_level >= 3)

     return;                      //Use defaults


   /* NOW FOR LEVELS 1 and 2 Find some stuff to unreject*/


   if (safe_dict_word(word_res) &&

       (count_alphas(word) > suspect_short_words)) {

     /* Unreject alphas in dictionary words */

     for (i = 0; i < len; ++i) {

       if (word_res->reject_map[i].rejected() &&

           uchset.get_isalpha(word.unichar_id(i)))

         word_res->reject_map[i].setrej_minimal_rej_accept();

     }

   }


   rating_per_ch = word.rating() / word_res->reject_map.length();


   if (rating_per_ch >= suspect_rating_per_ch)

     return;                      //Dont touch bad ratings


   if ((word_res->tess_accepted) || (rating_per_ch < suspect_accept_rating)) {

     /* Unreject any Tess Acceptable word - but NOT tess reject chs*/

     for (i = 0; i < len; ++i) {

       if (word_res->reject_map[i].rejected() &&

           (!uchset.eq(word.unichar_id(i), " ")))

         word_res->reject_map[i].setrej_minimal_rej_accept();

     }

   }


   for (i = 0; i < len; i++) {

     if (word_res->reject_map[i].rejected()) {

       if (word_res->reject_map[i].flag(R_DOC_REJ))

         word_res->reject_map[i].setrej_minimal_rej_accept();

       if (word_res->reject_map[i].flag(R_BLOCK_REJ))

         word_res->reject_map[i].setrej_minimal_rej_accept();

       if (word_res->reject_map[i].flag(R_ROW_REJ))

         word_res->reject_map[i].setrej_minimal_rej_accept();

     }

   }


   if (suspect_level == 2)

     return;


   if (!suspect_constrain_1Il ||

       (word_res->reject_map.length() <= suspect_short_words)) {

     for (i = 0; i < len; i++) {

       if (word_res->reject_map[i].rejected()) {

         if ((word_res->reject_map[i].flag(R_1IL_CONFLICT) ||

           word_res->reject_map[i].flag(R_POSTNN_1IL)))

           word_res->reject_map[i].setrej_minimal_rej_accept();


         if (!suspect_constrain_1Il &&

           word_res->reject_map[i].flag(R_MM_REJECT))

           word_res->reject_map[i].setrej_minimal_rej_accept();

       }

     }

   }


   if (acceptable_word_string(*word_res->uch_set,

                              word.unichar_string().string(),

                              word.unichar_lengths().string()) !=

                                  AC_UNACCEPTABLE ||

       acceptable_number_string(word.unichar_string().string(),

                                word.unichar_lengths().string())) {

     if (word_res->reject_map.length() > suspect_short_words) {

       for (i = 0; i < len; i++) {

         if (word_res->reject_map[i].rejected() &&

           (!word_res->reject_map[i].perm_rejected() ||

            word_res->reject_map[i].flag (R_1IL_CONFLICT) ||

            word_res->reject_map[i].flag (R_POSTNN_1IL) ||

            word_res->reject_map[i].flag (R_MM_REJECT))) {

           word_res->reject_map[i].setrej_minimal_rej_accept();

         }

       }

     }

   }

 }


 inT16 Tesseract::count_alphas(const WERD_CHOICE &word) {

   int count = 0;

   for (int i = 0; i < word.length(); ++i) {

     if (word.unicharset()->get_isalpha(word.unichar_id(i)))

       count++;

   }

   return count;

 }


 inT16 Tesseract::count_alphanums(const WERD_CHOICE &word) {

   int count = 0;

   for (int i = 0; i < word.length(); ++i) {

     if (word.unicharset()->get_isalpha(word.unichar_id(i)) ||

         word.unicharset()->get_isdigit(word.unichar_id(i)))

       count++;

   }

   return count;

 }


 BOOL8 Tesseract::acceptable_number_string(const char *s,

                                           const char *lengths) {

   BOOL8 prev_digit = FALSE;


   if (*lengths == 1 && *s == '(')

     s++;


   if (*lengths == 1 &&

       ((*s == '$') || (*s == '.') || (*s == '+') || (*s == '-')))

     s++;


   for (; *s != '\0'; s += *(lengths++)) {

     if (unicharset.get_isdigit(s, *lengths))

       prev_digit = TRUE;

     else if (prev_digit &&

              (*lengths == 1 && ((*s == '.') || (*s == ',') || (*s == '-'))))

       prev_digit = FALSE;

     else if (prev_digit && *lengths == 1 &&

              (*(s + *lengths) == '\0') && ((*s == '%') || (*s == ')')))

       return TRUE;

     else if (prev_digit &&

              *lengths == 1 && (*s == '%') &&

              (*(lengths + 1) == 1 && *(s + *lengths) == ')') &&

              (*(s + *lengths + *(lengths + 1)) == '\0'))

       return TRUE;

     else

       return FALSE;

   }

   return TRUE;

 }

 }  // namespace tesseract

WERD_RES::tess_accepted
BOOL8 tess_accepted
Definition: pageres.h:280

CTRL_NEWLINE
#define CTRL_NEWLINE
Definition: output.cpp:47

globals.h

R_ROW_REJ
Definition: rejctmap.h:82

tesseract::Tesseract::count_alphas
inT16 count_alphas(const WERD_CHOICE &word)
Definition: output.cpp:400

tesseract::TesseractStats::last_char_was_tilde
bool last_char_was_tilde
Definition: tesseractclass.h:139

output.h

WERD_CHOICE::rating
float rating() const
Definition: ratngs.h:324

UNICHARSET::unichar_to_id
const UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:194

determine_newline_type
char determine_newline_type(WERD *word, BLOCK *block, WERD *next_word, BLOCK *next_block)
Definition: output.cpp:247

WERD_RES
Definition: pageres.h:155

REJMAP::length
inT32 length() const
Definition: rejctmap.h:237

pixels_to_pts
inT32 pixels_to_pts(inT32 pixels, inT32 pix_res)
Definition: output.cpp:57

R_MM_REJECT
Definition: rejctmap.h:60

control.h

tesseract::Tesseract::suspect_short_words
int suspect_short_words
Definition: tesseractclass.h:1013

CR_KEEP_SPACE
Definition: pageres.h:148

R_BLOCK_REJ
Definition: rejctmap.h:81

R_POSTNN_1IL
Definition: rejctmap.h:58

UNICHARSET::eq
bool eq(UNICHAR_ID unichar_id, const char *const unichar_repr) const
Definition: unicharset.cpp:656

WERD_CHOICE::length
int length() const
Definition: ratngs.h:300

WERD_RES::best_choice
WERD_CHOICE * best_choice
Definition: pageres.h:219

WERD_RES::reject_map
REJMAP reject_map
Definition: pageres.h:271

PAGE_RES_IT
Definition: pageres.h:656

tesseract::Tesseract::tessedit_zero_kelvin_rejection
bool tessedit_zero_kelvin_rejection
Definition: tesseractclass.h:1022

tprintf
#define tprintf(...)
Definition: tprintf.h:31

tesseract::CCUtil::unicharset
UNICHARSET unicharset
Definition: ccutil.h:72

tesseract::Tesseract::output_pass
void output_pass(PAGE_RES_IT &page_res_it, const TBOX *target_word_box)
Definition: output.cpp:68

W_FUZZY_NON
Definition: werd.h:43

WERD_CHOICE::unichar_lengths
const STRING & unichar_lengths() const
Definition: ratngs.h:531

BOOL8
unsigned char BOOL8
Definition: host.h:113

WERD::bounding_box
TBOX bounding_box() const
Definition: werd.cpp:160

WERD_CHOICE
Definition: ratngs.h:271

TBOX::right
inT16 right() const
Definition: rect.h:75

tesseract::Tesseract::tessedit_word_for_word
bool tessedit_word_for_word
Definition: tesseractclass.h:1020

tesseract::Wordrec::dict_word
int dict_word(const WERD_CHOICE &word)
Definition: tface.cpp:124

R_1IL_CONFLICT
Definition: rejctmap.h:57

tesseract::Tesseract::tessedit_rejection_debug
bool tessedit_rejection_debug
Definition: tesseractclass.h:1025

tesseract::Tesseract::safe_dict_word
inT16 safe_dict_word(const WERD_RES *werd_res)
Definition: reject.cpp:607

BLOCK_RES::block
BLOCK * block
Definition: pageres.h:99

tesseract::Tesseract::write_results
void write_results(PAGE_RES_IT &page_res_it, char newline_type, BOOL8 force_eol)
Definition: output.cpp:132

ASSERT_HOST
#define ASSERT_HOST(x)
Definition: errcode.h:84

tesseract::Tesseract::suspect_accept_rating
double suspect_accept_rating
Definition: tesseractclass.h:1016

tesseract::Tesseract::count_alphanums
inT16 count_alphanums(const WERD_CHOICE &word)
Definition: output.cpp:410

WERD_CHOICE::unichar_string
const STRING & unichar_string() const
Definition: ratngs.h:524

W_BOL
Definition: werd.h:35

PAGE_RES_IT::block
BLOCK_RES * block() const
Definition: pageres.h:739

WERD_CHOICE::unicharset
const UNICHARSET * unicharset() const
Definition: ratngs.h:297

PAGE_RES_IT::forward
WERD_RES * forward()
Definition: pageres.h:713

UNICHARSET::get_isdigit
bool get_isdigit(UNICHAR_ID unichar_id) const
Definition: unicharset.h:470

tessvars.h

tesseract::Tesseract::tessedit_zero_rejection
bool tessedit_zero_rejection
Definition: tesseractclass.h:1018

PAGE_RES_IT::restart_page
WERD_RES * restart_page()
Definition: pageres.h:680

tesseract::Tesseract::acceptable_number_string
BOOL8 acceptable_number_string(const char *s, const char *lengths)
Definition: output.cpp:421

W_EOL
Definition: werd.h:36

tesseract::Tesseract::check_debug_pt
BOOL8 check_debug_pt(WERD_RES *word, int location)
Definition: control.cpp:1767

TBOX::left
inT16 left() const
Definition: rect.h:68

WERD_CHOICE::unichar_id
const UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:312

BLOCK
Definition: ocrblock.h:30

WERD_RES::uch_set
const UNICHARSET * uch_set
Definition: pageres.h:192

W_REP_CHAR
Definition: werd.h:41

W_FUZZY_SP
Definition: werd.h:42

R_DOC_REJ
Definition: rejctmap.h:80

tesseract::Tesseract::unlv_tilde_crunching
bool unlv_tilde_crunching
Definition: tesseractclass.h:933

WERD_CHOICE::debug_string
const STRING debug_string() const
Definition: ratngs.h:502

PDBLK::bounding_box
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
Definition: pdblock.h:67

tesseract::Tesseract::get_rep_char
UNICHAR_ID get_rep_char(WERD_RES *word)
Definition: output.cpp:285

tesseract::TesseractStats::write_results_empty_block
bool write_results_empty_block
Definition: tesseractclass.h:140

tesseract::Tesseract::acceptable_word_string
ACCEPTABLE_WERD_TYPE acceptable_word_string(const UNICHARSET &char_set, const char *s, const char *lengths)
Definition: control.cpp:1663

UNICHAR_ID
int UNICHAR_ID
Definition: unichar.h:33

PAGE_RES_IT::next_block
BLOCK_RES * next_block() const
Definition: pageres.h:748

WERD
Definition: werd.h:60

reject.h

TBOX::bottom
inT16 bottom() const
Definition: rect.h:61

tesseract::Tesseract::set_unlv_suspects
void set_unlv_suspects(WERD_RES *word)
Definition: output.cpp:307

helpers.h

WERD_RES::word
WERD * word
Definition: pageres.h:175

docqual.h

AC_UNACCEPTABLE
Unacceptable word.
Definition: control.h:36

BLOCK_RES
Definition: pageres.h:97

CTRL_HARDLINE
#define CTRL_HARDLINE
Definition: output.cpp:48

FALSE
#define FALSE
Definition: capi.h:29

tesseract
Definition: baseapi.cpp:83

count
int count(LIST var_list)
Definition: oldlist.cpp:108

UNICHARSET::get_isalpha
bool get_isalpha(UNICHAR_ID unichar_id) const
Definition: unicharset.h:449

tesseract::TesseractStats::tilde_crunch_written
bool tilde_crunch_written
Definition: tesseractclass.h:137

tesseract::Tesseract::suspect_rating_per_ch
double suspect_rating_per_ch
Definition: tesseractclass.h:1015

TBOX
Definition: rect.h:30

tesseract::Tesseract::suspect_constrain_1Il
bool suspect_constrain_1Il
Definition: tesseractclass.h:1014

CR_NONE
Definition: pageres.h:147

TRUE
#define TRUE
Definition: capi.h:28

UNICHARSET
Definition: unicharset.h:139

WERD::space
uinT8 space()
Definition: werd.h:104

PAGE_RES_IT::next_word
WERD_RES * next_word() const
Definition: pageres.h:742

WERD_RES::MergeAdjacentBlobs
void MergeAdjacentBlobs(int index)
Definition: pageres.cpp:968

WERD::flag
BOOL8 flag(WERD_FLAGS mask) const
Definition: werd.h:128

tesseract::Tesseract::tessedit_write_rep_codes
bool tessedit_write_rep_codes
Definition: tesseractclass.h:1002

TBOX::contains
bool contains(const FCOORD pt) const
Definition: rect.h:323

tesseract::Tesseract::suspect_level
int suspect_level
Definition: tesseractclass.h:1009

WERD_RES::unlv_crunch_mode
CRUNCH_MODE unlv_crunch_mode
Definition: pageres.h:294

NULL
#define NULL
Definition: host.h:144

CR_DELETE
Definition: pageres.h:150

tesseract::Tesseract::tessedit_write_block_separators
bool tessedit_write_block_separators
Definition: tesseractclass.h:1000

tesseract::TesseractStats::last_char_was_newline
bool last_char_was_newline
Definition: tesseractclass.h:138

tesseract::Tesseract::tessedit_minimal_rejection
bool tessedit_minimal_rejection
Definition: tesseractclass.h:1017

BLOCK::space
inT16 space() const
return spacing
Definition: ocrblock.h:102

STRING::string
const char * string() const
Definition: strngs.cpp:193

TBOX::top
inT16 top() const
Definition: rect.h:54

tesseract::Tesseract::unrecognised_char
char * unrecognised_char
Definition: tesseractclass.h:1008

FCOORD
Definition: points.h:189

PAGE_RES_IT::word
WERD_RES * word() const
Definition: pageres.h:733

inT16
short inT16
Definition: host.h:100

inT32
int inT32
Definition: host.h:102

tesseractclass.h