tesseract-ocr.github.io/4.0.0-beta.1/a01061_source.html

 // File:        lstmrecognizer.cpp
 // Description: Top-level line recognizer class for LSTM-based networks.
 // Author:      Ray Smith
 // Created:     Thu May 02 10:59:06 PST 2013
 //
 // (C) Copyright 2013, Google Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 // http://www.apache.org/licenses/LICENSE-2.0
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.

 // Include automatically generated configuration file if running autoconf.
 #ifdef HAVE_CONFIG_H
 #include "config_auto.h"
 #endif

 #include "lstmrecognizer.h"

 #include "allheaders.h"
 #include "callcpp.h"
 #include "dict.h"
 #include "genericheap.h"
 #include "helpers.h"
 #include "imagedata.h"
 #include "input.h"
 #include "lstm.h"
 #include "normalis.h"
 #include "pageres.h"
 #include "ratngs.h"
 #include "recodebeam.h"
 #include "scrollview.h"
 #include "shapetable.h"
 #include "statistc.h"
 #include "tprintf.h"

 namespace tesseract {

 // Max number of blob choices to return in any given position.
 const int kMaxChoices = 4;
 // Default ratio between dict and non-dict words.
 const double kDictRatio = 2.25;
 // Default certainty offset to give the dictionary a chance.
 const double kCertOffset = -0.085;

 LSTMRecognizer::LSTMRecognizer()
     : network_(NULL),
       training_flags_(0),
       training_iteration_(0),
       sample_iteration_(0),
       null_char_(UNICHAR_BROKEN),
       learning_rate_(0.0f),
       momentum_(0.0f),
       adam_beta_(0.0f),
       dict_(NULL),
       search_(NULL),
       debug_win_(NULL) {}

 LSTMRecognizer::~LSTMRecognizer() {
   delete network_;
   delete dict_;
   delete search_;
 }

 // Loads a model from mgr, including the dictionary only if lang is not null.
 bool LSTMRecognizer::Load(const char* lang, TessdataManager* mgr) {
   TFile fp;
   if (!mgr->GetComponent(TESSDATA_LSTM, &fp)) return false;
   if (!DeSerialize(mgr, &fp)) return false;
   if (lang == nullptr) return true;
   // Allow it to run without a dictionary.
   LoadDictionary(lang, mgr);
   return true;
 }

 // Writes to the given file. Returns false in case of error.
 bool LSTMRecognizer::Serialize(const TessdataManager* mgr, TFile* fp) const {
   bool include_charsets = mgr == nullptr ||
                           !mgr->IsComponentAvailable(TESSDATA_LSTM_RECODER) ||
                           !mgr->IsComponentAvailable(TESSDATA_LSTM_UNICHARSET);
   if (!network_->Serialize(fp)) return false;
   if (include_charsets && !GetUnicharset().save_to_file(fp)) return false;
   if (!network_str_.Serialize(fp)) return false;
   if (fp->FWrite(&training_flags_, sizeof(training_flags_), 1) != 1)
     return false;
   if (fp->FWrite(&training_iteration_, sizeof(training_iteration_), 1) != 1)
     return false;
   if (fp->FWrite(&sample_iteration_, sizeof(sample_iteration_), 1) != 1)
     return false;
   if (fp->FWrite(&null_char_, sizeof(null_char_), 1) != 1) return false;
   if (fp->FWrite(&adam_beta_, sizeof(adam_beta_), 1) != 1) return false;
   if (fp->FWrite(&learning_rate_, sizeof(learning_rate_), 1) != 1) return false;
   if (fp->FWrite(&momentum_, sizeof(momentum_), 1) != 1) return false;
   if (include_charsets && IsRecoding() && !recoder_.Serialize(fp)) return false;
   return true;
 }

 // Reads from the given file. Returns false in case of error.
 bool LSTMRecognizer::DeSerialize(const TessdataManager* mgr, TFile* fp) {
   delete network_;
   network_ = Network::CreateFromFile(fp);
   if (network_ == NULL) return false;
   bool include_charsets = mgr == nullptr ||
                           !mgr->IsComponentAvailable(TESSDATA_LSTM_RECODER) ||
                           !mgr->IsComponentAvailable(TESSDATA_LSTM_UNICHARSET);
   if (include_charsets && !ccutil_.unicharset.load_from_file(fp, false))
     return false;
   if (!network_str_.DeSerialize(fp)) return false;
   if (fp->FReadEndian(&training_flags_, sizeof(training_flags_), 1) != 1)
     return false;
   if (fp->FReadEndian(&training_iteration_, sizeof(training_iteration_), 1) !=
       1)
     return false;
   if (fp->FReadEndian(&sample_iteration_, sizeof(sample_iteration_), 1) != 1)
     return false;
   if (fp->FReadEndian(&null_char_, sizeof(null_char_), 1) != 1) return false;
   if (fp->FReadEndian(&adam_beta_, sizeof(adam_beta_), 1) != 1) return false;
   if (fp->FReadEndian(&learning_rate_, sizeof(learning_rate_), 1) != 1)
     return false;
   if (fp->FReadEndian(&momentum_, sizeof(momentum_), 1) != 1) return false;
   if (include_charsets && !LoadRecoder(fp)) return false;
   if (!include_charsets && !LoadCharsets(mgr)) return false;
   network_->SetRandomizer(&randomizer_);
   network_->CacheXScaleFactor(network_->XScaleFactor());
   return true;
 }

 // Loads the charsets from mgr.
 bool LSTMRecognizer::LoadCharsets(const TessdataManager* mgr) {
   TFile fp;
   if (!mgr->GetComponent(TESSDATA_LSTM_UNICHARSET, &fp)) return false;
   if (!ccutil_.unicharset.load_from_file(&fp, false)) return false;
   if (!mgr->GetComponent(TESSDATA_LSTM_RECODER, &fp)) return false;
   if (!LoadRecoder(&fp)) return false;
   return true;
 }

 // Loads the Recoder.
 bool LSTMRecognizer::LoadRecoder(TFile* fp) {
   if (IsRecoding()) {
     if (!recoder_.DeSerialize(fp)) return false;
     RecodedCharID code;
     recoder_.EncodeUnichar(UNICHAR_SPACE, &code);
     if (code(0) != UNICHAR_SPACE) {
       tprintf("Space was garbled in recoding!!\n");
       return false;
     }
   } else {
     recoder_.SetupPassThrough(GetUnicharset());
     training_flags_ |= TF_COMPRESS_UNICHARSET;
   }
   return true;
 }

 // Loads the dictionary if possible from the traineddata file.
 // Prints a warning message, and returns false but otherwise fails silently
 // and continues to work without it if loading fails.
 // Note that dictionary load is independent from DeSerialize, but dependent
 // on the unicharset matching. This enables training to deserialize a model
 // from checkpoint or restore without having to go back and reload the
 // dictionary.
 bool LSTMRecognizer::LoadDictionary(const char* lang, TessdataManager* mgr) {
   delete dict_;
   dict_ = new Dict(&ccutil_);
   dict_->SetupForLoad(Dict::GlobalDawgCache());
   dict_->LoadLSTM(lang, mgr);
   if (dict_->FinishLoad()) return true;  // Success.
   tprintf("Failed to load any lstm-specific dictionaries for lang %s!!\n",
           lang);
   delete dict_;
   dict_ = NULL;
   return false;
 }

 // Recognizes the line image, contained within image_data, returning the
 // ratings matrix and matching box_word for each WERD_RES in the output.
 void LSTMRecognizer::RecognizeLine(const ImageData& image_data, bool invert,
                                    bool debug, double worst_dict_cert,
                                    const TBOX& line_box,
                                    PointerVector<WERD_RES>* words) {
   NetworkIO outputs;
   float scale_factor;
   NetworkIO inputs;
   if (!RecognizeLine(image_data, invert, debug, false, false, &scale_factor,
                      &inputs, &outputs))
     return;
   if (search_ == NULL) {
     search_ =
         new RecodeBeamSearch(recoder_, null_char_, SimpleTextOutput(), dict_);
   }
   search_->Decode(outputs, kDictRatio, kCertOffset, worst_dict_cert, NULL);
   search_->ExtractBestPathAsWords(line_box, scale_factor, debug,
                                   &GetUnicharset(), words);
 }

 // Helper computes min and mean best results in the output.
 void LSTMRecognizer::OutputStats(const NetworkIO& outputs, float* min_output,
                                  float* mean_output, float* sd) {
   const int kOutputScale = MAX_INT8;
   STATS stats(0, kOutputScale + 1);
   for (int t = 0; t < outputs.Width(); ++t) {
     int best_label = outputs.BestLabel(t, NULL);
     if (best_label != null_char_) {
       float best_output = outputs.f(t)[best_label];
       stats.add(static_cast<int>(kOutputScale * best_output), 1);
     }
   }
   // If the output is all nulls it could be that the photometric interpretation
   // is wrong, so make it look bad, so the other way can win, even if not great.
   if (stats.get_total() == 0) {
     *min_output = 0.0f;
     *mean_output = 0.0f;
     *sd = 1.0f;
   } else {
     *min_output = static_cast<float>(stats.min_bucket()) / kOutputScale;
     *mean_output = stats.mean() / kOutputScale;
     *sd = stats.sd() / kOutputScale;
   }
 }

 // Recognizes the image_data, returning the labels,
 // scores, and corresponding pairs of start, end x-coords in coords.
 bool LSTMRecognizer::RecognizeLine(const ImageData& image_data, bool invert,
                                    bool debug, bool re_invert, bool upside_down,
                                    float* scale_factor, NetworkIO* inputs,
                                    NetworkIO* outputs) {
   // Maximum width of image to train on.
   const int kMaxImageWidth = 2560;
   // This ensures consistent recognition results.
   SetRandomSeed();
   int min_width = network_->XScaleFactor();
   Pix* pix = Input::PrepareLSTMInputs(image_data, network_, min_width,
                                       &randomizer_, scale_factor);
   if (pix == NULL) {
     tprintf("Line cannot be recognized!!\n");
     return false;
   }
   if (network_->IsTraining() && pixGetWidth(pix) > kMaxImageWidth) {
     tprintf("Image too large to learn!! Size = %dx%d\n", pixGetWidth(pix),
             pixGetHeight(pix));
     pixDestroy(&pix);
     return false;
   }
   if (upside_down) pixRotate180(pix, pix);
   // Reduction factor from image to coords.
   *scale_factor = min_width / *scale_factor;
   inputs->set_int_mode(IsIntMode());
   SetRandomSeed();
   Input::PreparePixInput(network_->InputShape(), pix, &randomizer_, inputs);
   network_->Forward(debug, *inputs, NULL, &scratch_space_, outputs);
   // Check for auto inversion.
   float pos_min, pos_mean, pos_sd;
   OutputStats(*outputs, &pos_min, &pos_mean, &pos_sd);
   if (invert && pos_min < 0.5) {
     // Run again inverted and see if it is any better.
     NetworkIO inv_inputs, inv_outputs;
     inv_inputs.set_int_mode(IsIntMode());
     SetRandomSeed();
     pixInvert(pix, pix);
     Input::PreparePixInput(network_->InputShape(), pix, &randomizer_,
                            &inv_inputs);
     network_->Forward(debug, inv_inputs, NULL, &scratch_space_, &inv_outputs);
     float inv_min, inv_mean, inv_sd;
     OutputStats(inv_outputs, &inv_min, &inv_mean, &inv_sd);
     if (inv_min > pos_min && inv_mean > pos_mean && inv_sd < pos_sd) {
       // Inverted did better. Use inverted data.
       if (debug) {
         tprintf("Inverting image: old min=%g, mean=%g, sd=%g, inv %g,%g,%g\n",
                 pos_min, pos_mean, pos_sd, inv_min, inv_mean, inv_sd);
       }
       *outputs = inv_outputs;
       *inputs = inv_inputs;
     } else if (re_invert) {
       // Inverting was not an improvement, so undo and run again, so the
       // outputs match the best forward result.
       SetRandomSeed();
       network_->Forward(debug, *inputs, NULL, &scratch_space_, outputs);
     }
   }
   pixDestroy(&pix);
   if (debug) {
     GenericVector<int> labels, coords;
     LabelsFromOutputs(*outputs, &labels, &coords);
     DisplayForward(*inputs, labels, coords, "LSTMForward", &debug_win_);
     DebugActivationPath(*outputs, labels, coords);
   }
   return true;
 }

 // Converts an array of labels to utf-8, whether or not the labels are
 // augmented with character boundaries.
 STRING LSTMRecognizer::DecodeLabels(const GenericVector<int>& labels) {
   STRING result;
   int end = 1;
   for (int start = 0; start < labels.size(); start = end) {
     if (labels[start] == null_char_) {
       end = start + 1;
     } else {
       result += DecodeLabel(labels, start, &end, NULL);
     }
   }
   return result;
 }

 // Displays the forward results in a window with the characters and
 // boundaries as determined by the labels and label_coords.
 void LSTMRecognizer::DisplayForward(const NetworkIO& inputs,
                                     const GenericVector<int>& labels,
                                     const GenericVector<int>& label_coords,
                                     const char* window_name,
                                     ScrollView** window) {
 #ifndef GRAPHICS_DISABLED  // do nothing if there's no graphics
   Pix* input_pix = inputs.ToPix();
   Network::ClearWindow(false, window_name, pixGetWidth(input_pix),
                        pixGetHeight(input_pix), window);
   int line_height = Network::DisplayImage(input_pix, *window);
   DisplayLSTMOutput(labels, label_coords, line_height, *window);
 #endif  // GRAPHICS_DISABLED
 }

 // Displays the labels and cuts at the corresponding xcoords.
 // Size of labels should match xcoords.
 void LSTMRecognizer::DisplayLSTMOutput(const GenericVector<int>& labels,
                                        const GenericVector<int>& xcoords,
                                        int height, ScrollView* window) {
 #ifndef GRAPHICS_DISABLED  // do nothing if there's no graphics
   int x_scale = network_->XScaleFactor();
   window->TextAttributes("Arial", height / 4, false, false, false);
   int end = 1;
   for (int start = 0; start < labels.size(); start = end) {
     int xpos = xcoords[start] * x_scale;
     if (labels[start] == null_char_) {
       end = start + 1;
       window->Pen(ScrollView::RED);
     } else {
       window->Pen(ScrollView::GREEN);
       const char* str = DecodeLabel(labels, start, &end, NULL);
       if (*str == '\\') str = "\\\\";
       xpos = xcoords[(start + end) / 2] * x_scale;
       window->Text(xpos, height, str);
     }
     window->Line(xpos, 0, xpos, height * 3 / 2);
   }
   window->Update();
 #endif  // GRAPHICS_DISABLED
 }

 // Prints debug output detailing the activation path that is implied by the
 // label_coords.
 void LSTMRecognizer::DebugActivationPath(const NetworkIO& outputs,
                                          const GenericVector<int>& labels,
                                          const GenericVector<int>& xcoords) {
   if (xcoords[0] > 0)
     DebugActivationRange(outputs, "<null>", null_char_, 0, xcoords[0]);
   int end = 1;
   for (int start = 0; start < labels.size(); start = end) {
     if (labels[start] == null_char_) {
       end = start + 1;
       DebugActivationRange(outputs, "<null>", null_char_, xcoords[start],
                            xcoords[end]);
       continue;
     } else {
       int decoded;
       const char* label = DecodeLabel(labels, start, &end, &decoded);
       DebugActivationRange(outputs, label, labels[start], xcoords[start],
                            xcoords[start + 1]);
       for (int i = start + 1; i < end; ++i) {
         DebugActivationRange(outputs, DecodeSingleLabel(labels[i]), labels[i],
                              xcoords[i], xcoords[i + 1]);
       }
     }
   }
 }

 // Prints debug output detailing activations and 2nd choice over a range
 // of positions.
 void LSTMRecognizer::DebugActivationRange(const NetworkIO& outputs,
                                           const char* label, int best_choice,
                                           int x_start, int x_end) {
   tprintf("%s=%d On [%d, %d), scores=", label, best_choice, x_start, x_end);
   double max_score = 0.0;
   double mean_score = 0.0;
   int width = x_end - x_start;
   for (int x = x_start; x < x_end; ++x) {
     const float* line = outputs.f(x);
     double score = line[best_choice] * 100.0;
     if (score > max_score) max_score = score;
     mean_score += score / width;
     int best_c = 0;
     double best_score = 0.0;
     for (int c = 0; c < outputs.NumFeatures(); ++c) {
       if (c != best_choice && line[c] > best_score) {
         best_c = c;
         best_score = line[c];
       }
     }
     tprintf(" %.3g(%s=%d=%.3g)", score, DecodeSingleLabel(best_c), best_c,
             best_score * 100.0);
   }
   tprintf(", Mean=%g, max=%g\n", mean_score, max_score);
 }

 // Helper returns true if the null_char is the winner at t, and it beats the
 // null_threshold, or the next choice is space, in which case we will use the
 // null anyway.
 static bool NullIsBest(const NetworkIO& output, float null_thr,
                        int null_char, int t) {
   if (output.f(t)[null_char] >= null_thr) return true;
   if (output.BestLabel(t, null_char, null_char, NULL) != UNICHAR_SPACE)
     return false;
   return output.f(t)[null_char] > output.f(t)[UNICHAR_SPACE];
 }

 // Converts the network output to a sequence of labels. Outputs labels, scores
 // and start xcoords of each char, and each null_char_, with an additional
 // final xcoord for the end of the output.
 // The conversion method is determined by internal state.
 void LSTMRecognizer::LabelsFromOutputs(const NetworkIO& outputs,
                                        GenericVector<int>* labels,
                                        GenericVector<int>* xcoords) {
   if (SimpleTextOutput()) {
     LabelsViaSimpleText(outputs, labels, xcoords);
   } else {
     LabelsViaReEncode(outputs, labels, xcoords);
   }
 }

 // As LabelsViaCTC except that this function constructs the best path that
 // contains only legal sequences of subcodes for CJK.
 void LSTMRecognizer::LabelsViaReEncode(const NetworkIO& output,
                                        GenericVector<int>* labels,
                                        GenericVector<int>* xcoords) {
   if (search_ == NULL) {
     search_ =
         new RecodeBeamSearch(recoder_, null_char_, SimpleTextOutput(), dict_);
   }
   search_->Decode(output, 1.0, 0.0, RecodeBeamSearch::kMinCertainty, NULL);
   search_->ExtractBestPathAsLabels(labels, xcoords);
 }

 // Converts the network output to a sequence of labels, with scores, using
 // the simple character model (each position is a char, and the null_char_ is
 // mainly intended for tail padding.)
 void LSTMRecognizer::LabelsViaSimpleText(const NetworkIO& output,
                                          GenericVector<int>* labels,
                                          GenericVector<int>* xcoords) {
   labels->truncate(0);
   xcoords->truncate(0);
   int width = output.Width();
   for (int t = 0; t < width; ++t) {
     float score = 0.0f;
     int label = output.BestLabel(t, &score);
     if (label != null_char_) {
       labels->push_back(label);
       xcoords->push_back(t);
     }
   }
   xcoords->push_back(width);
 }

 // Returns a string corresponding to the label starting at start. Sets *end
 // to the next start and if non-null, *decoded to the unichar id.
 const char* LSTMRecognizer::DecodeLabel(const GenericVector<int>& labels,
                                         int start, int* end, int* decoded) {
   *end = start + 1;
   if (IsRecoding()) {
     // Decode labels via recoder_.
     RecodedCharID code;
     if (labels[start] == null_char_) {
       if (decoded != NULL) {
         code.Set(0, null_char_);
         *decoded = recoder_.DecodeUnichar(code);
       }
       return "<null>";
     }
     int index = start;
     while (index < labels.size() &&
            code.length() < RecodedCharID::kMaxCodeLen) {
       code.Set(code.length(), labels[index++]);
       while (index < labels.size() && labels[index] == null_char_) ++index;
       int uni_id = recoder_.DecodeUnichar(code);
       // If the next label isn't a valid first code, then we need to continue
       // extending even if we have a valid uni_id from this prefix.
       if (uni_id != INVALID_UNICHAR_ID &&
           (index == labels.size() ||
            code.length() == RecodedCharID::kMaxCodeLen ||
            recoder_.IsValidFirstCode(labels[index]))) {
         *end = index;
         if (decoded != NULL) *decoded = uni_id;
         if (uni_id == UNICHAR_SPACE) return " ";
         return GetUnicharset().get_normed_unichar(uni_id);
       }
     }
     return "<Undecodable>";
   } else {
     if (decoded != NULL) *decoded = labels[start];
     if (labels[start] == null_char_) return "<null>";
     if (labels[start] == UNICHAR_SPACE) return " ";
     return GetUnicharset().get_normed_unichar(labels[start]);
   }
 }

 // Returns a string corresponding to a given single label id, falling back to
 // a default of ".." for part of a multi-label unichar-id.
 const char* LSTMRecognizer::DecodeSingleLabel(int label) {
   if (label == null_char_) return "<null>";
   if (IsRecoding()) {
     // Decode label via recoder_.
     RecodedCharID code;
     code.Set(0, label);
     label = recoder_.DecodeUnichar(code);
     if (label == INVALID_UNICHAR_ID) return "..";  // Part of a bigger code.
   }
   if (label == UNICHAR_SPACE) return " ";
   return GetUnicharset().get_normed_unichar(label);
 }

 }  // namespace tesseract.
tesseract::LSTMRecognizer::sample_iteration_
inT32 sample_iteration_
Definition: lstmrecognizer.h:287

tesseract::LSTMRecognizer::DisplayForward
void DisplayForward(const NetworkIO &inputs, const GenericVector< int > &labels, const GenericVector< int > &label_coords, const char *window_name, ScrollView **window)
Definition: lstmrecognizer.cpp:313

MAX_INT8
#define MAX_INT8
Definition: host.h:60

tesseract::Dict::SetupForLoad
void SetupForLoad(DawgCache *dawg_cache)
Definition: dict.cpp:206

ratngs.h

tesseract::Network::Serialize
virtual bool Serialize(TFile *fp) const
Definition: network.cpp:153

tesseract::TFile::FWrite
int FWrite(const void *buffer, int size, int count)
Definition: serialis.cpp:148

tesseract::Dict
Definition: dict.h:87

STATS::add
void add(inT32 value, inT32 count)
Definition: statistc.cpp:99

scrollview.h

tesseract::TESSDATA_LSTM_UNICHARSET
Definition: tessdatamanager.h:83

callcpp.h

tesseract::Dict::FinishLoad
bool FinishLoad()
Definition: dict.cpp:328

tesseract::LSTMRecognizer::DebugActivationPath
void DebugActivationPath(const NetworkIO &outputs, const GenericVector< int > &labels, const GenericVector< int > &xcoords)
Definition: lstmrecognizer.cpp:356

tesseract::TESSDATA_LSTM
Definition: tessdatamanager.h:79

ScrollView::GREEN
Definition: scrollview.h:111

tesseract::LSTMRecognizer::LoadRecoder
bool LoadRecoder(TFile *fp)
Definition: lstmrecognizer.cpp:145

tesseract::kCertOffset
const double kCertOffset
Definition: lstmrecognizer.cpp:50

tesseract::NetworkIO::NumFeatures
int NumFeatures() const
Definition: networkio.h:111

tesseract::Network::SetRandomizer
virtual void SetRandomizer(TRand *randomizer)
Definition: network.cpp:140

tesseract::LSTMRecognizer::ccutil_
CCUtil ccutil_
Definition: lstmrecognizer.h:273

tesseract::LSTMRecognizer::LabelsFromOutputs
void LabelsFromOutputs(const NetworkIO &outputs, GenericVector< int > *labels, GenericVector< int > *xcoords)
Definition: lstmrecognizer.cpp:424

STATS::sd
double sd() const
Definition: statistc.cpp:149

tesseract::LSTMRecognizer::DebugActivationRange
void DebugActivationRange(const NetworkIO &outputs, const char *label, int best_choice, int x_start, int x_end)
Definition: lstmrecognizer.cpp:383

tesseract::RecodeBeamSearch::Decode
void Decode(const NetworkIO &output, double dict_ratio, double cert_offset, double worst_dict_cert, const UNICHARSET *charset)
Definition: recodebeam.cpp:76

tesseract::LSTMRecognizer::debug_win_
ScrollView * debug_win_
Definition: lstmrecognizer.h:307

normalis.h

ScrollView::Update
static void Update()
Definition: scrollview.cpp:715

tesseract::LSTMRecognizer::IsRecoding
bool IsRecoding() const
Definition: lstmrecognizer.h:79

dict.h

tesseract
Definition: baseapi.cpp:84

recodebeam.h

tesseract::RecodedCharID
Definition: unicharcompress.h:34

tesseract::LSTMRecognizer::SetRandomSeed
void SetRandomSeed()
Definition: lstmrecognizer.h:225

tesseract::LSTMRecognizer::OutputStats
void OutputStats(const NetworkIO &outputs, float *min_output, float *mean_output, float *sd)
Definition: lstmrecognizer.cpp:203

tesseract::LSTMRecognizer::network_
Network * network_
Definition: lstmrecognizer.h:270

ScrollView::RED
Definition: scrollview.h:109

tesseract::Input::PreparePixInput
static void PreparePixInput(const StaticShape &shape, const Pix *pix, TRand *randomizer, NetworkIO *input)
Definition: input.cpp:117

tesseract::RecodedCharID::kMaxCodeLen
static const int kMaxCodeLen
Definition: unicharcompress.h:37

GenericVector::size
int size() const
Definition: genericvector.h:72

tesseract::LSTMRecognizer::Serialize
bool Serialize(const TessdataManager *mgr, TFile *fp) const
Definition: lstmrecognizer.cpp:83

ScrollView::Line
void Line(int x1, int y1, int x2, int y2)
Definition: scrollview.cpp:538

tesseract::TESSDATA_LSTM_RECODER
Definition: tessdatamanager.h:84

tesseract::Network::XScaleFactor
virtual int XScaleFactor() const
Definition: network.h:209

tesseract::LSTMRecognizer::training_iteration_
inT32 training_iteration_
Definition: lstmrecognizer.h:285

lstmrecognizer.h

tesseract::Network::ClearWindow
static void ClearWindow(bool tess_coords, const char *window_name, int width, int height, ScrollView **window)
Definition: network.cpp:309

ScrollView
Definition: scrollview.h:102

tesseract::LSTMRecognizer::search_
RecodeBeamSearch * search_
Definition: lstmrecognizer.h:303

tesseract::kMaxChoices
const int kMaxChoices
Definition: lstmrecognizer.cpp:46

tesseract::LSTMRecognizer::null_char_
inT32 null_char_
Definition: lstmrecognizer.h:290

tesseract::TessdataManager::IsComponentAvailable
bool IsComponentAvailable(TessdataType type) const
Definition: tessdatamanager.h:170

tprintf
#define tprintf(...)
Definition: tprintf.h:31

tesseract::TessdataManager::GetComponent
bool GetComponent(TessdataType type, TFile *fp)
Definition: tessdatamanager.cpp:158

UNICHARSET::save_to_file
bool save_to_file(const char *const filename) const
Definition: unicharset.h:347

tesseract::LSTMRecognizer::recoder_
UnicharCompress recoder_
Definition: lstmrecognizer.h:277

tesseract::Network::Forward
virtual void Forward(bool debug, const NetworkIO &input, const TransposedArray *input_transpose, NetworkScratch *scratch, NetworkIO *output)
Definition: network.h:262

tesseract::Network::InputShape
virtual StaticShape InputShape() const
Definition: network.h:127

UNICHAR_BROKEN
Definition: unicharset.h:37

tesseract::TessdataManager
Definition: tessdatamanager.h:131

tesseract::LSTMRecognizer::SimpleTextOutput
bool SimpleTextOutput() const
Definition: lstmrecognizer.h:76

GenericVector::truncate
void truncate(int size)
Definition: genericvector.h:137

tesseract::Network::IsTraining
bool IsTraining() const
Definition: network.h:115

tesseract::LSTMRecognizer::dict_
Dict * dict_
Definition: lstmrecognizer.h:301

STATS::mean
double mean() const
Definition: statistc.cpp:133

tesseract::LSTMRecognizer::DecodeLabel
const char * DecodeLabel(const GenericVector< int > &labels, int start, int *end, int *decoded)
Definition: lstmrecognizer.cpp:469

tesseract::RecodeBeamSearch::ExtractBestPathAsLabels
void ExtractBestPathAsLabels(GenericVector< int > *labels, GenericVector< int > *xcoords) const
Definition: recodebeam.cpp:100

tesseract::UnicharCompress::EncodeUnichar
int EncodeUnichar(int unichar_id, RecodedCharID *code) const
Definition: unicharcompress.cpp:289

tesseract::NetworkIO
Definition: networkio.h:39

tesseract::RecodeBeamSearch
Definition: recodebeam.h:176

tesseract::CCUtil::unicharset
UNICHARSET unicharset
Definition: ccutil.h:68

ScrollView::Pen
void Pen(Color color)
Definition: scrollview.cpp:726

UNICHAR_SPACE
Definition: unicharset.h:35

GenericVector::push_back
int push_back(T object)
Definition: genericvector.h:788

tesseract::LSTMRecognizer::network_str_
STRING network_str_
Definition: lstmrecognizer.h:280

tesseract::NetworkIO::set_int_mode
void set_int_mode(bool is_quantized)
Definition: networkio.h:130

GenericVector< int >

STATS::min_bucket
inT32 min_bucket() const
Definition: statistc.cpp:204

tesseract::Network::CacheXScaleFactor
virtual void CacheXScaleFactor(int factor)
Definition: network.h:215

tesseract::NetworkIO::f
float * f(int t)
Definition: networkio.h:115

UNICHARSET::get_normed_unichar
const char * get_normed_unichar(UNICHAR_ID unichar_id) const
Definition: unicharset.h:827

ScrollView::Text
void Text(int x, int y, const char *mystring)
Definition: scrollview.cpp:658

tesseract::LSTMRecognizer::RecognizeLine
void RecognizeLine(const ImageData &image_data, bool invert, bool debug, double worst_dict_cert, const TBOX &line_box, PointerVector< WERD_RES > *words)
Definition: lstmrecognizer.cpp:183

tesseract::Dict::GlobalDawgCache
static DawgCache * GlobalDawgCache()
Definition: dict.cpp:198

tesseract::LSTMRecognizer::adam_beta_
float adam_beta_
Definition: lstmrecognizer.h:295

tesseract::LSTMRecognizer::DecodeLabels
STRING DecodeLabels(const GenericVector< int > &labels)
Definition: lstmrecognizer.cpp:298

tesseract::LSTMRecognizer::~LSTMRecognizer
~LSTMRecognizer()
Definition: lstmrecognizer.cpp:65

tprintf.h

STRING
Definition: strngs.h:45

tesseract::LSTMRecognizer::DeSerialize
bool DeSerialize(const TessdataManager *mgr, TFile *fp)
Definition: lstmrecognizer.cpp:105

tesseract::RecodedCharID::Set
void Set(int index, int value)
Definition: unicharcompress.h:44

tesseract::ImageData
Definition: imagedata.h:103

tesseract::LSTMRecognizer::learning_rate_
float learning_rate_
Definition: lstmrecognizer.h:292

tesseract::kDictRatio
const double kDictRatio
Definition: lstmrecognizer.cpp:48

TBOX
Definition: rect.h:30

tesseract::TFile
Definition: serialis.h:51

tesseract::LSTMRecognizer::IsIntMode
bool IsIntMode() const
Definition: lstmrecognizer.h:77

lstm.h

tesseract::RecodedCharID::length
int length() const
Definition: unicharcompress.h:57

ScrollView::TextAttributes
void TextAttributes(const char *font, int pixel_size, bool bold, bool italic, bool underlined)
Definition: scrollview.cpp:641

statistc.h

tesseract::UnicharCompress::Serialize
bool Serialize(TFile *fp) const
Definition: unicharcompress.cpp:306

tesseract::NetworkIO::Width
int Width() const
Definition: networkio.h:107

UNICHARSET::load_from_file
bool load_from_file(const char *const filename, bool skip_fragments)
Definition: unicharset.h:387

STATS::get_total
inT32 get_total() const
Definition: statistc.h:86

tesseract::Network::CreateFromFile
static Network * CreateFromFile(TFile *fp)
Definition: network.cpp:203

STATS
Definition: statistc.h:33

tesseract::Network::DisplayImage
static int DisplayImage(Pix *pix, ScrollView *window)
Definition: network.cpp:332

tesseract::TFile::FReadEndian
int FReadEndian(void *buffer, int size, int count)
Definition: serialis.cpp:97

tesseract::PointerVector< WERD_RES >

STRING::DeSerialize
bool DeSerialize(bool swap, FILE *fp)
Definition: strngs.cpp:163

tesseract::RecodeBeamSearch::kMinCertainty
static const float kMinCertainty
Definition: recodebeam.h:213

tesseract::UnicharCompress::SetupPassThrough
void SetupPassThrough(const UNICHARSET &unicharset)
Definition: unicharcompress.cpp:222

tesseract::LSTMRecognizer::LoadCharsets
bool LoadCharsets(const TessdataManager *mgr)
Definition: lstmrecognizer.cpp:135

tesseract::NetworkIO::BestLabel
int BestLabel(int t, float *score) const
Definition: networkio.h:161

helpers.h

tesseract::LSTMRecognizer::training_flags_
inT32 training_flags_
Definition: lstmrecognizer.h:283

tesseract::LSTMRecognizer::DisplayLSTMOutput
void DisplayLSTMOutput(const GenericVector< int > &labels, const GenericVector< int > &xcoords, int height, ScrollView *window)
Definition: lstmrecognizer.cpp:329

imagedata.h

genericheap.h

tesseract::LSTMRecognizer::LoadDictionary
bool LoadDictionary(const char *lang, TessdataManager *mgr)
Definition: lstmrecognizer.cpp:168

input.h

shapetable.h

tesseract::NetworkIO::ToPix
Pix * ToPix() const
Definition: networkio.cpp:291

tesseract::Input::PrepareLSTMInputs
static Pix * PrepareLSTMInputs(const ImageData &image_data, const Network *network, int min_width, TRand *randomizer, float *image_scale)
Definition: input.cpp:89

tesseract::LSTMRecognizer::null_char
int null_char() const
Definition: lstmrecognizer.h:154

tesseract::RecodeBeamSearch::ExtractBestPathAsWords
void ExtractBestPathAsWords(const TBOX &line_box, float scale_factor, bool debug, const UNICHARSET *unicharset, PointerVector< WERD_RES > *words)
Definition: recodebeam.cpp:138

tesseract::LSTMRecognizer::DecodeSingleLabel
const char * DecodeSingleLabel(int label)
Definition: lstmrecognizer.cpp:511

tesseract::LSTMRecognizer::randomizer_
TRand randomizer_
Definition: lstmrecognizer.h:298

tesseract::LSTMRecognizer::LabelsViaReEncode
void LabelsViaReEncode(const NetworkIO &output, GenericVector< int > *labels, GenericVector< int > *xcoords)
Definition: lstmrecognizer.cpp:436

tesseract::LSTMRecognizer::GetUnicharset
const UNICHARSET & GetUnicharset() const
Definition: lstmrecognizer.h:139

pageres.h

STRING::Serialize
bool Serialize(FILE *fp) const
Definition: strngs.cpp:148

tesseract::LSTMRecognizer::LSTMRecognizer
LSTMRecognizer()
Definition: lstmrecognizer.cpp:52

null_char_
int null_char_
Definition: unicharcompress_test.cc:160

tesseract::LSTMRecognizer::Load
bool Load(const char *lang, TessdataManager *mgr)
Definition: lstmrecognizer.cpp:72

tesseract::UnicharCompress::DeSerialize
bool DeSerialize(TFile *fp)
Definition: unicharcompress.cpp:311

tesseract::UnicharCompress::IsValidFirstCode
bool IsValidFirstCode(int code) const
Definition: unicharcompress.h:176

tesseract::UnicharCompress::DecodeUnichar
int DecodeUnichar(const RecodedCharID &code) const
Definition: unicharcompress.cpp:297

tesseract::LSTMRecognizer::scratch_space_
NetworkScratch scratch_space_
Definition: lstmrecognizer.h:299

tesseract::LSTMRecognizer::momentum_
float momentum_
Definition: lstmrecognizer.h:293

tesseract::TF_COMPRESS_UNICHARSET
Definition: lstmrecognizer.h:48

tesseract::LSTMRecognizer::LabelsViaSimpleText
void LabelsViaSimpleText(const NetworkIO &output, GenericVector< int > *labels, GenericVector< int > *xcoords)
Definition: lstmrecognizer.cpp:450

tesseract::Dict::LoadLSTM
void LoadLSTM(const STRING &lang, TessdataManager *data_file)
Definition: dict.cpp:307

tesseract-c_api-demo.lang
string lang
Definition: tesseract-c_api-demo.py:28