tesseract-ocr.github.io/5.3.3/a00842_source.html

// Copyright 2011 Google Inc. All Rights Reserved.

// Author: rays@google.com (Ray Smith)

//

// Licensed under the Apache License, Version 2.0 (the "License");

// you may not use this file except in compliance with the License.

// You may obtain a copy of the License at

// http://www.apache.org/licenses/LICENSE-2.0

// Unless required by applicable law or agreed to in writing, software

// distributed under the License is distributed on an "AS IS" BASIS,

// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

// See the License for the specific language governing permissions and

// limitations under the License.

//


#ifdef HAVE_CONFIG_H

#  include "config_auto.h"

#endif


#include "errorcounter.h"


#include "fontinfo.h"

#include "sampleiterator.h"

#include "shapeclassifier.h"

#include "shapetable.h"

#include "trainingsample.h"

#include "trainingsampleset.h"

#include "unicity_table.h"


#include <algorithm>

#include <ctime>


namespace tesseract {


// Difference in result rating to be thought of as an "equal" choice.

const double kRatingEpsilon = 1.0 / 32;


// Tests a classifier, computing its error rate.

// See errorcounter.h for description of arguments.

// Iterates over the samples, calling the classifier in normal/silent mode.

// If the classifier makes a CT_UNICHAR_TOPN_ERR error, and the appropriate

// report_level is set (4 or greater), it will then call the classifier again

// with a debug flag and a keep_this argument to find out what is going on.

double ErrorCounter::ComputeErrorRate(ShapeClassifier *classifier, int report_level,

                                      CountTypes boosting_mode, const FontInfoTable &fontinfo_table,

                                      const std::vector<Image > &page_images, SampleIterator *it,

                                      double *unichar_error, double *scaled_error,

                                      std::string *fonts_report) {

  const int fontsize = it->sample_set()->NumFonts();

  ErrorCounter counter(classifier->GetUnicharset(), fontsize);

  std::vector<UnicharRating> results;


  clock_t start = clock();

  unsigned total_samples = 0;

  double unscaled_error = 0.0;

  // Set a number of samples on which to run the classify debug mode.

  int error_samples = report_level > 3 ? report_level * report_level : 0;

  // Iterate over all the samples, accumulating errors.

  for (it->Begin(); !it->AtEnd(); it->Next()) {

    TrainingSample *mutable_sample = it->MutableSample();

    int page_index = mutable_sample->page_num();

    Image page_pix =

        0 <= page_index && page_index < page_images.size() ? page_images[page_index] : nullptr;

    // No debug, no keep this.

    classifier->UnicharClassifySample(*mutable_sample, page_pix, 0, INVALID_UNICHAR_ID, &results);

    bool debug_it = false;

    int correct_id = mutable_sample->class_id();

    if (counter.unicharset_.has_special_codes() &&

        (correct_id == UNICHAR_SPACE || correct_id == UNICHAR_JOINED ||

         correct_id == UNICHAR_BROKEN)) {

      // This is junk so use the special counter.

      debug_it = counter.AccumulateJunk(report_level > 3, results, mutable_sample);

    } else {

      debug_it = counter.AccumulateErrors(report_level > 3, boosting_mode, fontinfo_table, results,

                                          mutable_sample);

    }

    if (debug_it && error_samples > 0) {

      // Running debug, keep the correct answer, and debug the classifier.

      tprintf("Error on sample %d: %s Classifier debug output:\n", it->GlobalSampleIndex(),

              it->sample_set()->SampleToString(*mutable_sample).c_str());

#ifndef GRAPHICS_DISABLED

      classifier->DebugDisplay(*mutable_sample, page_pix, correct_id);

#endif

      --error_samples;

    }

    ++total_samples;

  }

  const double total_time = 1.0 * (clock() - start) / CLOCKS_PER_SEC;

  // Create the appropriate error report.

  unscaled_error = counter.ReportErrors(report_level, boosting_mode, fontinfo_table, *it,

                                        unichar_error, fonts_report);

  if (scaled_error != nullptr) {

    *scaled_error = counter.scaled_error_;

  }

  if (report_level > 1 && total_samples > 0) {

    // It is useful to know the time in microseconds/char.

    tprintf("Errors computed in %.2fs at %.1f μs/char\n", total_time,

            1000000.0 * total_time / total_samples);

  }

  return unscaled_error;

}


// Tests a pair of classifiers, debugging errors of the new against the old.

// See errorcounter.h for description of arguments.

// Iterates over the samples, calling the classifiers in normal/silent mode.

// If the new_classifier makes a boosting_mode error that the old_classifier

// does not, it will then call the new_classifier again with a debug flag

// and a keep_this argument to find out what is going on.

void ErrorCounter::DebugNewErrors(ShapeClassifier *new_classifier, ShapeClassifier *old_classifier,

                                  CountTypes boosting_mode, const FontInfoTable &fontinfo_table,

                                  const std::vector<Image > &page_images, SampleIterator *it) {

  int fontsize = it->sample_set()->NumFonts();

  ErrorCounter old_counter(old_classifier->GetUnicharset(), fontsize);

  ErrorCounter new_counter(new_classifier->GetUnicharset(), fontsize);

  std::vector<UnicharRating> results;


  int total_samples = 0;

  int error_samples = 25;

  int total_new_errors = 0;

  // Iterate over all the samples, accumulating errors.

  for (it->Begin(); !it->AtEnd(); it->Next()) {

    TrainingSample *mutable_sample = it->MutableSample();

    int page_index = mutable_sample->page_num();

    Image page_pix =

        0 <= page_index && page_index < page_images.size() ? page_images[page_index] : nullptr;

    // No debug, no keep this.

    old_classifier->UnicharClassifySample(*mutable_sample, page_pix, 0, INVALID_UNICHAR_ID,

                                          &results);

    int correct_id = mutable_sample->class_id();

    if (correct_id != 0 && !old_counter.AccumulateErrors(true, boosting_mode, fontinfo_table,

                                                         results, mutable_sample)) {

      // old classifier was correct, check the new one.

      new_classifier->UnicharClassifySample(*mutable_sample, page_pix, 0, INVALID_UNICHAR_ID,

                                            &results);

      if (correct_id != 0 && new_counter.AccumulateErrors(true, boosting_mode, fontinfo_table,

                                                          results, mutable_sample)) {

        tprintf("New Error on sample %d: Classifier debug output:\n", it->GlobalSampleIndex());

        ++total_new_errors;

        new_classifier->UnicharClassifySample(*mutable_sample, page_pix, 1, correct_id, &results);

        if (results.size() > 0 && error_samples > 0) {

#ifndef GRAPHICS_DISABLED

          new_classifier->DebugDisplay(*mutable_sample, page_pix, correct_id);

#endif

          --error_samples;

        }

      }

    }

    ++total_samples;

  }

  tprintf("Total new errors = %d\n", total_new_errors);

}


// Constructor is private. Only anticipated use of ErrorCounter is via

// the static ComputeErrorRate.

ErrorCounter::ErrorCounter(const UNICHARSET &unicharset, int fontsize)

    : scaled_error_(0.0)

    , rating_epsilon_(kRatingEpsilon)

    , unichar_counts_(unicharset.size(), unicharset.size(), 0)

    , ok_score_hist_(0, 101)

    , bad_score_hist_(0, 101)

    , unicharset_(unicharset) {

  Counts empty_counts;

  font_counts_.clear();

  font_counts_.resize(fontsize, empty_counts);

  multi_unichar_counts_.clear();

  multi_unichar_counts_.resize(unicharset.size(), 0);

}


// Accumulates the errors from the classifier results on a single sample.

// Returns true if debug is true and a CT_UNICHAR_TOPN_ERR error occurred.

// boosting_mode selects the type of error to be used for boosting and the

// is_error_ member of sample is set according to whether the required type

// of error occurred. The font_table provides access to font properties

// for error counting and shape_table is used to understand the relationship

// between unichar_ids and shape_ids in the results

bool ErrorCounter::AccumulateErrors(bool debug, CountTypes boosting_mode,

                                    const FontInfoTable &font_table,

                                    const std::vector<UnicharRating> &results,

                                    TrainingSample *sample) {

  int num_results = results.size();

  int answer_actual_rank = -1;

  int font_id = sample->font_id();

  int unichar_id = sample->class_id();

  sample->set_is_error(false);

  if (num_results == 0) {

    // Reject. We count rejects as a separate category, but still mark the

    // sample as an error in case any training module wants to use that to

    // improve the classifier.

    sample->set_is_error(true);

    ++font_counts_[font_id].n[CT_REJECT];

  } else {

    // Find rank of correct unichar answer, using rating_epsilon_ to allow

    // different answers to score as equal. (Ignoring the font.)

    int epsilon_rank = 0;

    int answer_epsilon_rank = -1;

    int num_top_answers = 0;

    double prev_rating = results[0].rating;

    bool joined = false;

    bool broken = false;

    int res_index = 0;

    while (res_index < num_results) {

      if (results[res_index].rating < prev_rating - rating_epsilon_) {

        ++epsilon_rank;

        prev_rating = results[res_index].rating;

      }

      if (results[res_index].unichar_id == unichar_id && answer_epsilon_rank < 0) {

        answer_epsilon_rank = epsilon_rank;

        answer_actual_rank = res_index;

      }

      if (results[res_index].unichar_id == UNICHAR_JOINED && unicharset_.has_special_codes()) {

        joined = true;

      } else if (results[res_index].unichar_id == UNICHAR_BROKEN &&

                 unicharset_.has_special_codes()) {

        broken = true;

      } else if (epsilon_rank == 0) {

        ++num_top_answers;

      }

      ++res_index;

    }

    if (answer_actual_rank != 0) {

      // Correct result is not absolute top.

      ++font_counts_[font_id].n[CT_UNICHAR_TOPTOP_ERR];

      if (boosting_mode == CT_UNICHAR_TOPTOP_ERR) {

        sample->set_is_error(true);

      }

    }

    if (answer_epsilon_rank == 0) {

      ++font_counts_[font_id].n[CT_UNICHAR_TOP_OK];

      // Unichar OK, but count if multiple unichars.

      if (num_top_answers > 1) {

        ++font_counts_[font_id].n[CT_OK_MULTI_UNICHAR];

        ++multi_unichar_counts_[unichar_id];

      }

      // Check to see if any font in the top choice has attributes that match.

      // TODO(rays) It is easy to add counters for individual font attributes

      // here if we want them.

      if (font_table.SetContainsFontProperties(font_id, results[answer_actual_rank].fonts)) {

        // Font attributes were matched.

        // Check for multiple properties.

        if (font_table.SetContainsMultipleFontProperties(results[answer_actual_rank].fonts)) {

          ++font_counts_[font_id].n[CT_OK_MULTI_FONT];

        }

      } else {

        // Font attributes weren't matched.

        ++font_counts_[font_id].n[CT_FONT_ATTR_ERR];

      }

    } else {

      // This is a top unichar error.

      ++font_counts_[font_id].n[CT_UNICHAR_TOP1_ERR];

      if (boosting_mode == CT_UNICHAR_TOP1_ERR) {

        sample->set_is_error(true);

      }

      // Count maps from unichar id to wrong unichar id.

      ++unichar_counts_(unichar_id, results[0].unichar_id);

      if (answer_epsilon_rank < 0 || answer_epsilon_rank >= 2) {

        // It is also a 2nd choice unichar error.

        ++font_counts_[font_id].n[CT_UNICHAR_TOP2_ERR];

        if (boosting_mode == CT_UNICHAR_TOP2_ERR) {

          sample->set_is_error(true);

        }

      }

      if (answer_epsilon_rank < 0) {

        // It is also a top-n choice unichar error.

        ++font_counts_[font_id].n[CT_UNICHAR_TOPN_ERR];

        if (boosting_mode == CT_UNICHAR_TOPN_ERR) {

          sample->set_is_error(true);

        }

        answer_epsilon_rank = epsilon_rank;

      }

    }

    // Compute mean number of return values and mean rank of correct answer.

    font_counts_[font_id].n[CT_NUM_RESULTS] += num_results;

    font_counts_[font_id].n[CT_RANK] += answer_epsilon_rank;

    if (joined) {

      ++font_counts_[font_id].n[CT_OK_JOINED];

    }

    if (broken) {

      ++font_counts_[font_id].n[CT_OK_BROKEN];

    }

  }

  // If it was an error for boosting then sum the weight.

  if (sample->is_error()) {

    scaled_error_ += sample->weight();

    if (debug) {

      tprintf("%d results for char %s font %d :", num_results,

              unicharset_.id_to_unichar(unichar_id), font_id);

      for (int i = 0; i < num_results; ++i) {

        tprintf(" %.3f : %s\n", results[i].rating,

                unicharset_.id_to_unichar(results[i].unichar_id));

      }

      return true;

    }

    int percent = 0;

    if (num_results > 0) {

      percent = IntCastRounded(results[0].rating * 100);

    }

    bad_score_hist_.add(percent, 1);

  } else {

    int percent = 0;

    if (answer_actual_rank >= 0) {

      percent = IntCastRounded(results[answer_actual_rank].rating * 100);

    }

    ok_score_hist_.add(percent, 1);

  }

  return false;

}


// Accumulates counts for junk. Counts only whether the junk was correctly

// rejected or not.

bool ErrorCounter::AccumulateJunk(bool debug, const std::vector<UnicharRating> &results,

                                  TrainingSample *sample) {

  // For junk we accept no answer, or an explicit shape answer matching the

  // class id of the sample.

  const int num_results = results.size();

  const int font_id = sample->font_id();

  const int unichar_id = sample->class_id();

  int percent = 0;

  if (num_results > 0) {

    percent = IntCastRounded(results[0].rating * 100);

  }

  if (num_results > 0 && results[0].unichar_id != unichar_id) {

    // This is a junk error.

    ++font_counts_[font_id].n[CT_ACCEPTED_JUNK];

    sample->set_is_error(true);

    // It counts as an error for boosting too so sum the weight.

    scaled_error_ += sample->weight();

    bad_score_hist_.add(percent, 1);

    return debug;

  } else {

    // Correctly rejected.

    ++font_counts_[font_id].n[CT_REJECTED_JUNK];

    sample->set_is_error(false);

    ok_score_hist_.add(percent, 1);

  }

  return false;

}


// Creates a report of the error rate. The report_level controls the detail

// that is reported to stderr via tprintf:

// 0   -> no output.

// >=1 -> bottom-line error rate.

// >=3 -> font-level error rate.

// boosting_mode determines the return value. It selects which (un-weighted)

// error rate to return.

// The fontinfo_table from MasterTrainer provides the names of fonts.

// The it determines the current subset of the training samples.

// If not nullptr, the top-choice unichar error rate is saved in unichar_error.

// If not nullptr, the report string is saved in fonts_report.

// (Ignoring report_level).

double ErrorCounter::ReportErrors(int report_level, CountTypes boosting_mode,

                                  const FontInfoTable &fontinfo_table, const SampleIterator &it,

                                  double *unichar_error, std::string *fonts_report) {

  // Compute totals over all the fonts and report individual font results

  // when required.

  Counts totals;

  int fontsize = font_counts_.size();

  for (int f = 0; f < fontsize; ++f) {

    // Accumulate counts over fonts.

    totals += font_counts_[f];

    std::string font_report;

    if (ReportString(false, font_counts_[f], font_report)) {

      if (fonts_report != nullptr) {

        *fonts_report += fontinfo_table.at(f).name;

        *fonts_report += ": ";

        *fonts_report += font_report;

        *fonts_report += "\n";

      }

      if (report_level > 2) {

        // Report individual font error rates.

        tprintf("%s: %s\n", fontinfo_table.at(f).name, font_report.c_str());

      }

    }

  }

  // Report the totals.

  std::string total_report;

  bool any_results = ReportString(true, totals, total_report);

  if (fonts_report != nullptr && fonts_report->empty()) {

    // Make sure we return something even if there were no samples.

    *fonts_report = "NoSamplesFound: ";

    *fonts_report += total_report;

    *fonts_report += "\n";

  }

  if (report_level > 0) {

    // Report the totals.

    std::string total_report;

    if (any_results) {

      tprintf("TOTAL Scaled Err=%.4g%%, %s\n", scaled_error_ * 100.0, total_report.c_str());

    }

    // Report the worst substitution error only for now.

    if (totals.n[CT_UNICHAR_TOP1_ERR] > 0) {

      int charsetsize = unicharset_.size();

      int worst_uni_id = 0;

      int worst_result_id = 0;

      int worst_err = 0;

      for (int u = 0; u < charsetsize; ++u) {

        for (int v = 0; v < charsetsize; ++v) {

          if (unichar_counts_(u, v) > worst_err) {

            worst_err = unichar_counts_(u, v);

            worst_uni_id = u;

            worst_result_id = v;

          }

        }

      }

      if (worst_err > 0) {

        tprintf("Worst error = %d:%s -> %s with %d/%d=%.2f%% errors\n", worst_uni_id,

                unicharset_.id_to_unichar(worst_uni_id), unicharset_.id_to_unichar(worst_result_id),

                worst_err, totals.n[CT_UNICHAR_TOP1_ERR],

                100.0 * worst_err / totals.n[CT_UNICHAR_TOP1_ERR]);

      }

    }

    tprintf("Multi-unichar shape use:\n");

    for (int u = 0; u < multi_unichar_counts_.size(); ++u) {

      if (multi_unichar_counts_[u] > 0) {

        tprintf("%d multiple answers for unichar: %s\n", multi_unichar_counts_[u],

                unicharset_.id_to_unichar(u));

      }

    }

    tprintf("OK Score histogram:\n");

    ok_score_hist_.print();

    tprintf("ERROR Score histogram:\n");

    bad_score_hist_.print();

  }


  double rates[CT_SIZE];

  if (!ComputeRates(totals, rates)) {

    return 0.0;

  }

  // Set output values if asked for.

  if (unichar_error != nullptr) {

    *unichar_error = rates[CT_UNICHAR_TOP1_ERR];

  }

  return rates[boosting_mode];

}


// Sets the report string to a combined human and machine-readable report

// string of the error rates.

// Returns false if there is no data, leaving report unchanged, unless

// even_if_empty is true.

bool ErrorCounter::ReportString(bool even_if_empty, const Counts &counts, std::string &report) {

  // Compute the error rates.

  double rates[CT_SIZE];

  if (!ComputeRates(counts, rates) && !even_if_empty) {

    return false;

  }

  // Using %.4g%%, the length of the output string should exactly match the

  // length of the format string, but in case of overflow, allow for +eddd

  // on each number.

  const int kMaxExtraLength = 5; // Length of +eddd.

  // Keep this format string and the snprintf in sync with the CountTypes enum.

  const char format_str[] =

      "Unichar=%.4g%%[1], %.4g%%[2], %.4g%%[n], %.4g%%[T] "

      "Mult=%.4g%%, Jn=%.4g%%, Brk=%.4g%%, Rej=%.4g%%, "

      "FontAttr=%.4g%%, Multi=%.4g%%, "

      "Answers=%.3g, Rank=%.3g, "

      "OKjunk=%.4g%%, Badjunk=%.4g%%";

  constexpr size_t max_str_len = sizeof(format_str) + kMaxExtraLength * (CT_SIZE - 1) + 1;

  char formatted_str[max_str_len];

  snprintf(formatted_str, max_str_len, format_str, rates[CT_UNICHAR_TOP1_ERR] * 100.0,

           rates[CT_UNICHAR_TOP2_ERR] * 100.0, rates[CT_UNICHAR_TOPN_ERR] * 100.0,

           rates[CT_UNICHAR_TOPTOP_ERR] * 100.0, rates[CT_OK_MULTI_UNICHAR] * 100.0,

           rates[CT_OK_JOINED] * 100.0, rates[CT_OK_BROKEN] * 100.0, rates[CT_REJECT] * 100.0,

           rates[CT_FONT_ATTR_ERR] * 100.0, rates[CT_OK_MULTI_FONT] * 100.0, rates[CT_NUM_RESULTS],

           rates[CT_RANK], 100.0 * rates[CT_REJECTED_JUNK], 100.0 * rates[CT_ACCEPTED_JUNK]);

  report = formatted_str;

  // Now append each field of counts with a tab in front so the result can

  // be loaded into a spreadsheet.

  for (int ct : counts.n) {

    report += "\t" + std::to_string(ct);

  }

  return true;

}


// Computes the error rates and returns in rates which is an array of size

// CT_SIZE. Returns false if there is no data, leaving rates unchanged.

bool ErrorCounter::ComputeRates(const Counts &counts, double rates[CT_SIZE]) {

  const int ok_samples =

      counts.n[CT_UNICHAR_TOP_OK] + counts.n[CT_UNICHAR_TOP1_ERR] + counts.n[CT_REJECT];

  const int junk_samples = counts.n[CT_REJECTED_JUNK] + counts.n[CT_ACCEPTED_JUNK];

  // Compute rates for normal chars.

  double denominator = static_cast<double>(std::max(ok_samples, 1));

  for (int ct = 0; ct <= CT_RANK; ++ct) {

    rates[ct] = counts.n[ct] / denominator;

  }

  // Compute rates for junk.

  denominator = static_cast<double>(std::max(junk_samples, 1));

  for (int ct = CT_REJECTED_JUNK; ct <= CT_ACCEPTED_JUNK; ++ct) {

    rates[ct] = counts.n[ct] / denominator;

  }

  return ok_samples != 0 || junk_samples != 0;

}


ErrorCounter::Counts::Counts() {

  memset(n, 0, sizeof(n[0]) * CT_SIZE);

}

// Adds other into this for computing totals.

void ErrorCounter::Counts::operator+=(const Counts &other) {

  for (int ct = 0; ct < CT_SIZE; ++ct) {

    n[ct] += other.n[ct];

  }

}


} // namespace tesseract.

unicity_table.h

fontinfo.h

sampleiterator.h

errorcounter.h

trainingsampleset.h

shapetable.h

trainingsample.h

shapeclassifier.h

i
int i
Definition: gmock-matchers_test.cc:718

tesseract
Definition: baseapi.h:39

tesseract::operator+=
ICOORD & operator+=(ICOORD &op1, const ICOORD &op2)
Definition: points.h:372

tesseract::kRatingEpsilon
const double kRatingEpsilon
Definition: errorcounter.cpp:36

tesseract::tprintf
void tprintf(const char *format,...)
Definition: tprintf.cpp:41

tesseract::IntCastRounded
int IntCastRounded(double x)
Definition: helpers.h:170

tesseract::UNICHAR_SPACE
@ UNICHAR_SPACE
Definition: unicharset.h:36

tesseract::UNICHAR_BROKEN
@ UNICHAR_BROKEN
Definition: unicharset.h:38

tesseract::UNICHAR_JOINED
@ UNICHAR_JOINED
Definition: unicharset.h:37

tesseract::CountTypes
CountTypes
Definition: errorcounter.h:69

tesseract::CT_UNICHAR_TOPN_ERR
@ CT_UNICHAR_TOPN_ERR
Definition: errorcounter.h:76

tesseract::CT_UNICHAR_TOP_OK
@ CT_UNICHAR_TOP_OK
Definition: errorcounter.h:70

tesseract::CT_UNICHAR_TOP1_ERR
@ CT_UNICHAR_TOP1_ERR
Definition: errorcounter.h:74

tesseract::CT_UNICHAR_TOP2_ERR
@ CT_UNICHAR_TOP2_ERR
Definition: errorcounter.h:75

tesseract::CT_OK_MULTI_FONT
@ CT_OK_MULTI_FONT
Definition: errorcounter.h:83

tesseract::CT_OK_JOINED
@ CT_OK_JOINED
Definition: errorcounter.h:79

tesseract::CT_REJECT
@ CT_REJECT
Definition: errorcounter.h:81

tesseract::CT_REJECTED_JUNK
@ CT_REJECTED_JUNK
Definition: errorcounter.h:86

tesseract::CT_UNICHAR_TOPTOP_ERR
@ CT_UNICHAR_TOPTOP_ERR
Definition: errorcounter.h:77

tesseract::CT_FONT_ATTR_ERR
@ CT_FONT_ATTR_ERR
Definition: errorcounter.h:82

tesseract::CT_ACCEPTED_JUNK
@ CT_ACCEPTED_JUNK
Definition: errorcounter.h:87

tesseract::CT_OK_MULTI_UNICHAR
@ CT_OK_MULTI_UNICHAR
Definition: errorcounter.h:78

tesseract::CT_OK_BROKEN
@ CT_OK_BROKEN
Definition: errorcounter.h:80

tesseract::CT_SIZE
@ CT_SIZE
Definition: errorcounter.h:89

tesseract::CT_RANK
@ CT_RANK
Definition: errorcounter.h:85

tesseract::CT_NUM_RESULTS
@ CT_NUM_RESULTS
Definition: errorcounter.h:84

tesseract::FontInfoTable
Definition: fontinfo.h:160

tesseract::Image
Definition: image.h:25

tesseract::STATS::print
void print() const
Definition: statistc.cpp:547

tesseract::STATS::add
void add(int32_t value, int32_t count)
Definition: statistc.cpp:99

tesseract::UNICHARSET
Definition: unicharset.h:164

tesseract::UNICHARSET::has_special_codes
bool has_special_codes() const
Definition: unicharset.h:756

tesseract::UNICHARSET::id_to_unichar
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:279

tesseract::UNICHARSET::size
size_t size() const
Definition: unicharset.h:355

tesseract::ShapeClassifier
Definition: shapeclassifier.h:43

tesseract::ShapeClassifier::UnicharClassifySample
virtual int UnicharClassifySample(const TrainingSample &sample, Image page_pix, int debug, UNICHAR_ID keep_this, std::vector< UnicharRating > *results)
Definition: shapeclassifier.cpp:41

tesseract::ShapeClassifier::GetUnicharset
virtual const UNICHARSET & GetUnicharset() const
Definition: shapeclassifier.cpp:86

tesseract::ShapeClassifier::DebugDisplay
void DebugDisplay(const TrainingSample &sample, Image page_pix, UNICHAR_ID unichar_id)
Definition: shapeclassifier.cpp:97

tesseract::TrainingSample
Definition: trainingsample.h:54

tesseract::TrainingSample::page_num
int page_num() const
Definition: trainingsample.h:131

tesseract::TrainingSample::class_id
UNICHAR_ID class_id() const
Definition: trainingsample.h:119

tesseract::ErrorCounter
Definition: errorcounter.h:94

tesseract::ErrorCounter::DebugNewErrors
static void DebugNewErrors(ShapeClassifier *new_classifier, ShapeClassifier *old_classifier, CountTypes boosting_mode, const FontInfoTable &fontinfo_table, const std::vector< Image > &page_images, SampleIterator *it)
Definition: errorcounter.cpp:109

tesseract::ErrorCounter::ComputeErrorRate
static double ComputeErrorRate(ShapeClassifier *classifier, int report_level, CountTypes boosting_mode, const FontInfoTable &fontinfo_table, const std::vector< Image > &page_images, SampleIterator *it, double *unichar_error, double *scaled_error, std::string *fonts_report)
Definition: errorcounter.cpp:44

tesseract::SampleIterator
Definition: sampleiterator.h:91

tesseract::SampleIterator::sample_set
const TrainingSampleSet * sample_set() const
Definition: sampleiterator.h:141

tesseract::SampleIterator::GlobalSampleIndex
int GlobalSampleIndex() const
Definition: sampleiterator.cpp:126

tesseract::SampleIterator::AtEnd
bool AtEnd() const
Definition: sampleiterator.cpp:98

tesseract::SampleIterator::Begin
void Begin()
Definition: sampleiterator.cpp:86

tesseract::SampleIterator::MutableSample
TrainingSample * MutableSample() const
Definition: sampleiterator.cpp:113

tesseract::SampleIterator::Next
void Next()
Definition: sampleiterator.cpp:154

tesseract::TrainingSampleSet::NumFonts
int NumFonts() const
Definition: trainingsampleset.h:59

tesseract::TrainingSampleSet::SampleToString
std::string SampleToString(const TrainingSample &sample) const
Definition: trainingsampleset.cpp:247