tesseract-ocr.github.io/5.3.3/a00143_source.html

/******************************************************************

 * File:        superscript.cpp

 * Description: Correction pass to fix superscripts and subscripts.

 * Author:      David Eger

 *

 * (C) Copyright 2012, Google, Inc.

 ** Licensed under the Apache License, Version 2.0 (the "License");

 ** you may not use this file except in compliance with the License.

 ** You may obtain a copy of the License at

 ** http://www.apache.org/licenses/LICENSE-2.0

 ** Unless required by applicable law or agreed to in writing, software

 ** distributed under the License is distributed on an "AS IS" BASIS,

 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

 ** See the License for the specific language governing permissions and

 ** limitations under the License.

 *

 **********************************************************************/


#include "normalis.h"

#include "tesseractclass.h"


namespace tesseract {


static int LeadingUnicharsToChopped(WERD_RES *word, int num_unichars) {

  int num_chopped = 0;

  for (int i = 0; i < num_unichars; i++) {

    num_chopped += word->best_state[i];

  }

  return num_chopped;

}


static int TrailingUnicharsToChopped(WERD_RES *word, int num_unichars) {

  int num_chopped = 0;

  for (int i = 0; i < num_unichars; i++) {

    num_chopped += word->best_state[word->best_state.size() - 1 - i];

  }

  return num_chopped;

}


static void YOutlierPieces(WERD_RES *word, int rebuilt_blob_index, int super_y_bottom,

                           int sub_y_top, ScriptPos *leading_pos, int *num_leading_outliers,

                           ScriptPos *trailing_pos, int *num_trailing_outliers) {

  ScriptPos sp_unused1, sp_unused2;

  int unused1, unused2;

  if (!leading_pos) {

    leading_pos = &sp_unused1;

  }

  if (!num_leading_outliers) {

    num_leading_outliers = &unused1;

  }

  if (!trailing_pos) {

    trailing_pos = &sp_unused2;

  }

  if (!num_trailing_outliers) {

    num_trailing_outliers = &unused2;

  }


  *num_leading_outliers = *num_trailing_outliers = 0;

  *leading_pos = *trailing_pos = SP_NORMAL;


  int chopped_start = LeadingUnicharsToChopped(word, rebuilt_blob_index);

  int num_chopped_pieces = word->best_state[rebuilt_blob_index];

  ScriptPos last_pos = SP_NORMAL;

  int trailing_outliers = 0;

  for (int i = 0; i < num_chopped_pieces; i++) {

    TBOX box = word->chopped_word->blobs[chopped_start + i]->bounding_box();

    ScriptPos pos = SP_NORMAL;

    if (box.bottom() >= super_y_bottom) {

      pos = SP_SUPERSCRIPT;

    } else if (box.top() <= sub_y_top) {

      pos = SP_SUBSCRIPT;

    }

    if (pos == SP_NORMAL) {

      if (trailing_outliers == i) {

        *num_leading_outliers = trailing_outliers;

        *leading_pos = last_pos;

      }

      trailing_outliers = 0;

    } else {

      if (pos == last_pos) {

        trailing_outliers++;

      } else {

        trailing_outliers = 1;

      }

    }

    last_pos = pos;

  }

  *num_trailing_outliers = trailing_outliers;

  *trailing_pos = last_pos;

}


bool Tesseract::SubAndSuperscriptFix(WERD_RES *word) {

  if (word->tess_failed || word->word->flag(W_REP_CHAR) || !word->best_choice) {

    return false;

  }

  int num_leading, num_trailing;

  ScriptPos sp_leading, sp_trailing;

  float leading_certainty, trailing_certainty;

  float avg_certainty, unlikely_threshold;


  // Calculate the number of whole suspicious characters at the edges.

  GetSubAndSuperscriptCandidates(word, &num_leading, &sp_leading, &leading_certainty, &num_trailing,

                                 &sp_trailing, &trailing_certainty, &avg_certainty,

                                 &unlikely_threshold);


  const char *leading_pos = sp_leading == SP_SUBSCRIPT ? "sub" : "super";

  const char *trailing_pos = sp_trailing == SP_SUBSCRIPT ? "sub" : "super";


  int num_blobs = word->best_choice->length();


  // Calculate the remainder (partial characters) at the edges.

  // This accounts for us having classified the best version of

  // a word as [speaker?'] when it was instead [speaker.^{21}]

  // (that is we accidentally thought the 2 was attached to the period).

  int num_remainder_leading = 0, num_remainder_trailing = 0;

  if (num_leading + num_trailing < num_blobs && unlikely_threshold < 0.0) {

    int super_y_bottom = kBlnBaselineOffset + kBlnXHeight * superscript_min_y_bottom;

    int sub_y_top = kBlnBaselineOffset + kBlnXHeight * subscript_max_y_top;

    int last_word_char = num_blobs - 1 - num_trailing;

    float last_char_certainty = word->best_choice->certainty(last_word_char);

    if (word->best_choice->unichar_id(last_word_char) != 0 &&

        last_char_certainty <= unlikely_threshold) {

      ScriptPos rpos;

      YOutlierPieces(word, last_word_char, super_y_bottom, sub_y_top, nullptr, nullptr, &rpos,

                     &num_remainder_trailing);

      if (num_trailing > 0 && rpos != sp_trailing) {

        num_remainder_trailing = 0;

      }

      if (num_remainder_trailing > 0 && last_char_certainty < trailing_certainty) {

        trailing_certainty = last_char_certainty;

      }

    }

    bool another_blob_available =

        (num_remainder_trailing == 0) || num_leading + num_trailing + 1 < num_blobs;

    int first_char_certainty = word->best_choice->certainty(num_leading);

    if (another_blob_available && word->best_choice->unichar_id(num_leading) != 0 &&

        first_char_certainty <= unlikely_threshold) {

      ScriptPos lpos;

      YOutlierPieces(word, num_leading, super_y_bottom, sub_y_top, &lpos, &num_remainder_leading,

                     nullptr, nullptr);

      if (num_leading > 0 && lpos != sp_leading) {

        num_remainder_leading = 0;

      }

      if (num_remainder_leading > 0 && first_char_certainty < leading_certainty) {

        leading_certainty = first_char_certainty;

      }

    }

  }


  // If nothing to do, bail now.

  if (num_leading + num_trailing + num_remainder_leading + num_remainder_trailing == 0) {

    return false;

  }


  if (superscript_debug >= 1) {

    tprintf("Candidate for superscript detection: %s (",

            word->best_choice->unichar_string().c_str());

    if (num_leading || num_remainder_leading) {

      tprintf("%d.%d %s-leading ", num_leading, num_remainder_leading, leading_pos);

    }

    if (num_trailing || num_remainder_trailing) {

      tprintf("%d.%d %s-trailing ", num_trailing, num_remainder_trailing, trailing_pos);

    }

    tprintf(")\n");

  }

  if (superscript_debug >= 3) {

    word->best_choice->print();

  }

  if (superscript_debug >= 2) {

    tprintf(" Certainties -- Average: %.2f  Unlikely thresh: %.2f  ", avg_certainty,

            unlikely_threshold);

    if (num_leading) {

      tprintf("Orig. leading (min): %.2f  ", leading_certainty);

    }

    if (num_trailing) {

      tprintf("Orig. trailing (min): %.2f  ", trailing_certainty);

    }

    tprintf("\n");

  }


  // We've now calculated the number of rebuilt blobs we want to carve off.

  // However, split_word() works from TBLOBs in chopped_word, so we need to

  // convert to those.

  int num_chopped_leading = LeadingUnicharsToChopped(word, num_leading) + num_remainder_leading;

  int num_chopped_trailing = TrailingUnicharsToChopped(word, num_trailing) + num_remainder_trailing;


  int retry_leading = 0;

  int retry_trailing = 0;

  bool is_good = false;

  WERD_RES *revised = TrySuperscriptSplits(num_chopped_leading, leading_certainty, sp_leading,

                                           num_chopped_trailing, trailing_certainty, sp_trailing,

                                           word, &is_good, &retry_leading, &retry_trailing);

  if (is_good) {

    word->ConsumeWordResults(revised);

  } else if (retry_leading || retry_trailing) {

    int retry_chopped_leading = LeadingUnicharsToChopped(revised, retry_leading);

    int retry_chopped_trailing = TrailingUnicharsToChopped(revised, retry_trailing);

    WERD_RES *revised2 = TrySuperscriptSplits(

        retry_chopped_leading, leading_certainty, sp_leading, retry_chopped_trailing,

        trailing_certainty, sp_trailing, revised, &is_good, &retry_leading, &retry_trailing);

    if (is_good) {

      word->ConsumeWordResults(revised2);

    }

    delete revised2;

  }

  delete revised;

  return is_good;

}


void Tesseract::GetSubAndSuperscriptCandidates(const WERD_RES *word, int *num_rebuilt_leading,

                                               ScriptPos *leading_pos, float *leading_certainty,

                                               int *num_rebuilt_trailing, ScriptPos *trailing_pos,

                                               float *trailing_certainty, float *avg_certainty,

                                               float *unlikely_threshold) {

  *avg_certainty = *unlikely_threshold = 0.0f;

  *num_rebuilt_leading = *num_rebuilt_trailing = 0;

  *leading_certainty = *trailing_certainty = 0.0f;


  int super_y_bottom = kBlnBaselineOffset + kBlnXHeight * superscript_min_y_bottom;

  int sub_y_top = kBlnBaselineOffset + kBlnXHeight * subscript_max_y_top;


  // Step one: Get an average certainty for "normally placed" characters.


  // Counts here are of blobs in the rebuild_word / unichars in best_choice.

  *leading_pos = *trailing_pos = SP_NORMAL;

  int leading_outliers = 0;

  int trailing_outliers = 0;

  int num_normal = 0;

  float normal_certainty_total = 0.0f;

  float worst_normal_certainty = 0.0f;

  ScriptPos last_pos = SP_NORMAL;

  int num_blobs = word->rebuild_word->NumBlobs();

  for (int b = 0; b < num_blobs; ++b) {

    TBOX box = word->rebuild_word->blobs[b]->bounding_box();

    ScriptPos pos = SP_NORMAL;

    if (box.bottom() >= super_y_bottom) {

      pos = SP_SUPERSCRIPT;

    } else if (box.top() <= sub_y_top) {

      pos = SP_SUBSCRIPT;

    }

    if (pos == SP_NORMAL) {

      if (word->best_choice->unichar_id(b) != 0) {

        float char_certainty = word->best_choice->certainty(b);

        if (char_certainty < worst_normal_certainty) {

          worst_normal_certainty = char_certainty;

        }

        num_normal++;

        normal_certainty_total += char_certainty;

      }

      if (trailing_outliers == b) {

        leading_outliers = trailing_outliers;

        *leading_pos = last_pos;

      }

      trailing_outliers = 0;

    } else {

      if (last_pos == pos) {

        trailing_outliers++;

      } else {

        trailing_outliers = 1;

      }

    }

    last_pos = pos;

  }

  *trailing_pos = last_pos;

  if (num_normal >= 3) { // throw out the worst as an outlier.

    num_normal--;

    normal_certainty_total -= worst_normal_certainty;

  }

  if (num_normal > 0) {

    *avg_certainty = normal_certainty_total / num_normal;

    *unlikely_threshold = superscript_worse_certainty * (*avg_certainty);

  }

  if (num_normal == 0 || (leading_outliers == 0 && trailing_outliers == 0)) {

    return;

  }


  // Step two: Try to split off bits of the word that are both outliers

  //           and have much lower certainty than average

  // Calculate num_leading and leading_certainty.

  for (*leading_certainty = 0.0f, *num_rebuilt_leading = 0; *num_rebuilt_leading < leading_outliers;

       (*num_rebuilt_leading)++) {

    float char_certainty = word->best_choice->certainty(*num_rebuilt_leading);

    if (char_certainty > *unlikely_threshold) {

      break;

    }

    if (char_certainty < *leading_certainty) {

      *leading_certainty = char_certainty;

    }

  }


  // Calculate num_trailing and trailing_certainty.

  for (*trailing_certainty = 0.0f, *num_rebuilt_trailing = 0;

       *num_rebuilt_trailing < trailing_outliers; (*num_rebuilt_trailing)++) {

    int blob_idx = num_blobs - 1 - *num_rebuilt_trailing;

    float char_certainty = word->best_choice->certainty(blob_idx);

    if (char_certainty > *unlikely_threshold) {

      break;

    }

    if (char_certainty < *trailing_certainty) {

      *trailing_certainty = char_certainty;

    }

  }

}


WERD_RES *Tesseract::TrySuperscriptSplits(int num_chopped_leading, float leading_certainty,

                                          ScriptPos leading_pos, int num_chopped_trailing,

                                          float trailing_certainty, ScriptPos trailing_pos,

                                          WERD_RES *word, bool *is_good, int *retry_rebuild_leading,

                                          int *retry_rebuild_trailing) {

  int num_chopped = word->chopped_word->NumBlobs();


  *retry_rebuild_leading = *retry_rebuild_trailing = 0;


  // Chop apart the word into up to three pieces.


  BlamerBundle *bb0 = nullptr;

  BlamerBundle *bb1 = nullptr;

  WERD_RES *prefix = nullptr;

  WERD_RES *core = nullptr;

  WERD_RES *suffix = nullptr;

  if (num_chopped_leading > 0) {

    prefix = new WERD_RES(*word);

    split_word(prefix, num_chopped_leading, &core, &bb0);

  } else {

    core = new WERD_RES(*word);

  }


  if (num_chopped_trailing > 0) {

    int split_pt = num_chopped - num_chopped_trailing - num_chopped_leading;

    split_word(core, split_pt, &suffix, &bb1);

  }


  //  Recognize the pieces in turn.

  int saved_cp_multiplier = classify_class_pruner_multiplier;

  int saved_im_multiplier = classify_integer_matcher_multiplier;

  if (prefix) {

    // Turn off Tesseract's y-position penalties for the leading superscript.

    classify_class_pruner_multiplier.set_value(0);

    classify_integer_matcher_multiplier.set_value(0);


    // Adjust our expectations about the baseline for this prefix.

    if (superscript_debug >= 3) {

      tprintf(" recognizing first %d chopped blobs\n", num_chopped_leading);

    }

    recog_word_recursive(prefix);

    if (superscript_debug >= 2) {

      tprintf(" The leading bits look like %s %s\n", ScriptPosToString(leading_pos),

              prefix->best_choice->unichar_string().c_str());

    }


    // Restore the normal y-position penalties.

    classify_class_pruner_multiplier.set_value(saved_cp_multiplier);

    classify_integer_matcher_multiplier.set_value(saved_im_multiplier);

  }


  if (superscript_debug >= 3) {

    tprintf(" recognizing middle %d chopped blobs\n",

            num_chopped - num_chopped_leading - num_chopped_trailing);

  }


  if (suffix) {

    // Turn off Tesseract's y-position penalties for the trailing superscript.

    classify_class_pruner_multiplier.set_value(0);

    classify_integer_matcher_multiplier.set_value(0);


    if (superscript_debug >= 3) {

      tprintf(" recognizing last %d chopped blobs\n", num_chopped_trailing);

    }

    recog_word_recursive(suffix);

    if (superscript_debug >= 2) {

      tprintf(" The trailing bits look like %s %s\n", ScriptPosToString(trailing_pos),

              suffix->best_choice->unichar_string().c_str());

    }


    // Restore the normal y-position penalties.

    classify_class_pruner_multiplier.set_value(saved_cp_multiplier);

    classify_integer_matcher_multiplier.set_value(saved_im_multiplier);

  }


  // Evaluate whether we think the results are believably better

  // than what we already had.

  bool good_prefix =

      !prefix || BelievableSuperscript(superscript_debug >= 1, *prefix,

                                       superscript_bettered_certainty * leading_certainty,

                                       retry_rebuild_leading, nullptr);

  bool good_suffix =

      !suffix || BelievableSuperscript(superscript_debug >= 1, *suffix,

                                       superscript_bettered_certainty * trailing_certainty, nullptr,

                                       retry_rebuild_trailing);


  *is_good = good_prefix && good_suffix;

  if (!*is_good && !*retry_rebuild_leading && !*retry_rebuild_trailing) {

    // None of it is any good. Quit now.

    delete core;

    delete prefix;

    delete suffix;

    delete bb1;

    return nullptr;

  }

  recog_word_recursive(core);


  // Now paste the results together into core.

  if (suffix) {

    suffix->SetAllScriptPositions(trailing_pos);

    join_words(core, suffix, bb1);

  }

  if (prefix) {

    prefix->SetAllScriptPositions(leading_pos);

    join_words(prefix, core, bb0);

    core = prefix;

    prefix = nullptr;

  }


  if (superscript_debug >= 1) {

    tprintf("%s superscript fix: %s\n", *is_good ? "ACCEPT" : "REJECT",

            core->best_choice->unichar_string().c_str());

  }

  return core;

}


bool Tesseract::BelievableSuperscript(bool debug, const WERD_RES &word, float certainty_threshold,

                                      int *left_ok, int *right_ok) const {

  unsigned initial_ok_run_count = 0;

  unsigned ok_run_count = 0;

  float worst_certainty = 0.0f;

  const WERD_CHOICE &wc = *word.best_choice;


  const UnicityTable<FontInfo> &fontinfo_table = get_fontinfo_table();

  for (unsigned i = 0; i < wc.length(); i++) {

    TBLOB *blob = word.rebuild_word->blobs[i];

    UNICHAR_ID unichar_id = wc.unichar_id(i);

    float char_certainty = wc.certainty(i);

    bool bad_certainty = char_certainty < certainty_threshold;

    bool is_punc = wc.unicharset()->get_ispunctuation(unichar_id);

    bool is_italic = word.fontinfo && word.fontinfo->is_italic();

    BLOB_CHOICE *choice = word.GetBlobChoice(i);

    if (choice && fontinfo_table.size() > 0) {

      // Get better information from the specific choice, if available.

      int font_id1 = choice->fontinfo_id();

      bool font1_is_italic = font_id1 >= 0 ? fontinfo_table.at(font_id1).is_italic() : false;

      int font_id2 = choice->fontinfo_id2();

      is_italic = font1_is_italic && (font_id2 < 0 || fontinfo_table.at(font_id2).is_italic());

    }


    float height_fraction = 1.0f;

    float char_height = blob->bounding_box().height();

    float normal_height = char_height;

    if (wc.unicharset()->top_bottom_useful()) {

      int min_bot, max_bot, min_top, max_top;

      wc.unicharset()->get_top_bottom(unichar_id, &min_bot, &max_bot, &min_top, &max_top);

      float hi_height = max_top - max_bot;

      float lo_height = min_top - min_bot;

      normal_height = (hi_height + lo_height) / 2;

      if (normal_height >= kBlnXHeight) {

        // Only ding characters that we have decent information for because

        // they're supposed to be normal sized, not tiny specks or dashes.

        height_fraction = char_height / normal_height;

      }

    }

    bool bad_height = height_fraction < superscript_scaledown_ratio;


    if (debug) {

      if (is_italic) {

        tprintf(" Rejecting: superscript is italic.\n");

      }

      if (is_punc) {

        tprintf(" Rejecting: punctuation present.\n");

      }

      const char *char_str = wc.unicharset()->id_to_unichar(unichar_id);

      if (bad_certainty) {

        tprintf(

            " Rejecting: don't believe character %s with certainty %.2f "

            "which is less than threshold %.2f\n",

            char_str, char_certainty, certainty_threshold);

      }

      if (bad_height) {

        tprintf(

            " Rejecting: character %s seems too small @ %.2f versus "

            "expected %.2f\n",

            char_str, char_height, normal_height);

      }

    }

    if (bad_certainty || bad_height || is_punc || is_italic) {

      if (ok_run_count == i) {

        initial_ok_run_count = ok_run_count;

      }

      ok_run_count = 0;

    } else {

      ok_run_count++;

    }

    if (char_certainty < worst_certainty) {

      worst_certainty = char_certainty;

    }

  }

  bool all_ok = ok_run_count == wc.length();

  if (all_ok && debug) {

    tprintf(" Accept: worst revised certainty is %.2f\n", worst_certainty);

  }

  if (!all_ok) {

    if (left_ok) {

      *left_ok = initial_ok_run_count;

    }

    if (right_ok) {

      *right_ok = ok_run_count;

    }

  }

  return all_ok;

}


} // namespace tesseract

tesseractclass.h

normalis.h

TBOX
@ TBOX
Definition: cleanapi_test.cc:19

i
int i
Definition: gmock-matchers_test.cc:718

tesseract
Definition: baseapi.h:39

tesseract::W_REP_CHAR
@ W_REP_CHAR
repeated character
Definition: werd.h:40

tesseract::tprintf
void tprintf(const char *format,...)
Definition: tprintf.cpp:41

tesseract::ScriptPos
ScriptPos
Definition: ratngs.h:254

tesseract::SP_SUBSCRIPT
@ SP_SUBSCRIPT
Definition: ratngs.h:254

tesseract::SP_NORMAL
@ SP_NORMAL
Definition: ratngs.h:254

tesseract::SP_SUPERSCRIPT
@ SP_SUPERSCRIPT
Definition: ratngs.h:254

tesseract::kBlnXHeight
const int kBlnXHeight
Definition: normalis.h:33

tesseract::UNICHAR_ID
int UNICHAR_ID
Definition: unichar.h:34

tesseract::ScriptPosToString
const char * ScriptPosToString(enum ScriptPos script_pos)
Definition: ratngs.cpp:193

tesseract::kBlnBaselineOffset
const int kBlnBaselineOffset
Definition: normalis.h:34

tesseract::Tesseract::GetSubAndSuperscriptCandidates
void GetSubAndSuperscriptCandidates(const WERD_RES *word, int *num_rebuilt_leading, ScriptPos *leading_pos, float *leading_certainty, int *num_rebuilt_trailing, ScriptPos *trailing_pos, float *trailing_certainty, float *avg_certainty, float *unlikely_threshold)
Definition: superscript.cpp:250

tesseract::Tesseract::split_word
void split_word(WERD_RES *word, unsigned split_pt, WERD_RES **right_piece, BlamerBundle **orig_blamer_bundle) const
Definition: tfacepp.cpp:163

tesseract::Tesseract::SubAndSuperscriptFix
bool SubAndSuperscriptFix(WERD_RES *word_res)
Definition: superscript.cpp:108

tesseract::Tesseract::recog_word_recursive
void recog_word_recursive(WERD_RES *word)
Definition: tfacepp.cpp:94

tesseract::Tesseract::BelievableSuperscript
bool BelievableSuperscript(bool debug, const WERD_RES &word, float certainty_threshold, int *left_ok, int *right_ok) const
Definition: superscript.cpp:503

tesseract::Tesseract::TrySuperscriptSplits
WERD_RES * TrySuperscriptSplits(int num_chopped_leading, float leading_certainty, ScriptPos leading_pos, int num_chopped_trailing, float trailing_certainty, ScriptPos trailing_pos, WERD_RES *word, bool *is_good, int *retry_leading, int *retry_trailing)
Definition: superscript.cpp:369

tesseract::Tesseract::join_words
void join_words(WERD_RES *word, WERD_RES *word2, BlamerBundle *orig_bb) const
Definition: tfacepp.cpp:216

tesseract::BlamerBundle
Definition: blamer.h:107

tesseract::TBLOB
Definition: blobs.h:291

tesseract::TBLOB::bounding_box
TBOX bounding_box() const
Definition: blobs.cpp:466

tesseract::TWERD::blobs
std::vector< TBLOB * > blobs
Definition: blobs.h:462

tesseract::TWERD::NumBlobs
unsigned NumBlobs() const
Definition: blobs.h:449

tesseract::UnicityTable
Definition: unicity_table.h:37

tesseract::UnicityTable::size
int size() const
Return the size used.
Definition: unicity_table.h:51

tesseract::UnicityTable::at
const T & at(int id) const
Return the object from an id.
Definition: unicity_table.h:56

tesseract::FontInfo::is_italic
bool is_italic() const
Definition: fontinfo.h:118

tesseract::WERD_RES
Definition: pageres.h:164

tesseract::WERD_RES::best_choice
WERD_CHOICE * best_choice
Definition: pageres.h:239

tesseract::WERD_RES::chopped_word
TWERD * chopped_word
Definition: pageres.h:210

tesseract::WERD_RES::ConsumeWordResults
void ConsumeWordResults(WERD_RES *word)
Definition: pageres.cpp:785

tesseract::WERD_RES::fontinfo
const FontInfo * fontinfo
Definition: pageres.h:307

tesseract::WERD_RES::word
WERD * word
Definition: pageres.h:184

tesseract::WERD_RES::SetAllScriptPositions
void SetAllScriptPositions(tesseract::ScriptPos position)
Definition: pageres.cpp:895

tesseract::WERD_RES::tess_failed
bool tess_failed
Definition: pageres.h:293

tesseract::WERD_RES::GetBlobChoice
BLOB_CHOICE * GetBlobChoice(unsigned index) const
Definition: pageres.cpp:768

tesseract::WERD_RES::rebuild_word
TWERD * rebuild_word
Definition: pageres.h:264

tesseract::BLOB_CHOICE
Definition: ratngs.h:56

tesseract::BLOB_CHOICE::fontinfo_id2
int16_t fontinfo_id2() const
Definition: ratngs.h:93

tesseract::BLOB_CHOICE::fontinfo_id
int16_t fontinfo_id() const
Definition: ratngs.h:90

tesseract::WERD_CHOICE
Definition: ratngs.h:258

tesseract::WERD_CHOICE::certainty
float certainty() const
Definition: ratngs.h:315

tesseract::WERD_CHOICE::unichar_id
UNICHAR_ID unichar_id(unsigned index) const
Definition: ratngs.h:299

tesseract::WERD_CHOICE::unicharset
const UNICHARSET * unicharset() const
Definition: ratngs.h:281

tesseract::WERD_CHOICE::length
unsigned length() const
Definition: ratngs.h:287

tesseract::WERD_CHOICE::print
void print() const
Definition: ratngs.h:561

tesseract::WERD_CHOICE::unichar_string
std::string & unichar_string()
Definition: ratngs.h:519

tesseract::TBOX
Definition: rect.h:37

tesseract::TBOX::height
TDimension height() const
Definition: rect.h:118

tesseract::TBOX::top
TDimension top() const
Definition: rect.h:68

tesseract::TBOX::bottom
TDimension bottom() const
Definition: rect.h:75

tesseract::WERD::flag
bool flag(WERD_FLAGS mask) const
Definition: werd.h:128

tesseract::UNICHARSET::id_to_unichar
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:279

tesseract::UNICHARSET::get_top_bottom
void get_top_bottom(UNICHAR_ID unichar_id, int *min_bottom, int *max_bottom, int *min_top, int *max_top) const
Definition: unicharset.h:586

tesseract::UNICHARSET::top_bottom_useful
bool top_bottom_useful() const
Definition: unicharset.h:555

tesseract::UNICHARSET::get_ispunctuation
bool get_ispunctuation(UNICHAR_ID unichar_id) const
Definition: unicharset.h:533

tesseract::Classify::get_fontinfo_table
UnicityTable< FontInfo > & get_fontinfo_table()
Definition: classify.h:324