All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
cube_reco_context.cpp
Go to the documentation of this file.
1 /**********************************************************************
2  * File: cube_reco_context.cpp
3  * Description: Implementation of the Cube Recognition Context Class
4  * Author: Ahmad Abdulkader
5  * Created: 2007
6  *
7  * (C) Copyright 2008, Google Inc.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18  **********************************************************************/
19 
20 #include <string>
21 #include <limits.h>
22 
23 #include "cube_reco_context.h"
24 
25 #include "classifier_factory.h"
26 #include "cube_tuning_params.h"
27 #include "dict.h"
28 #include "feature_bmp.h"
29 #include "tessdatamanager.h"
30 #include "tesseractclass.h"
31 #include "tess_lang_model.h"
32 
33 namespace tesseract {
34 
43  tess_obj_ = tess_obj;
44  lang_ = "";
45  loaded_ = false;
46  lang_mod_ = NULL;
47  params_ = NULL;
48  char_classifier_ = NULL;
49  char_set_ = NULL;
50  word_size_model_ = NULL;
51  char_bigrams_ = NULL;
52  word_unigrams_ = NULL;
53  noisy_input_ = false;
54  size_normalization_ = false;
55 }
56 
58  if (char_classifier_ != NULL) {
59  delete char_classifier_;
60  char_classifier_ = NULL;
61  }
62 
63  if (word_size_model_ != NULL) {
64  delete word_size_model_;
65  word_size_model_ = NULL;
66  }
67 
68  if (char_set_ != NULL) {
69  delete char_set_;
70  char_set_ = NULL;
71  }
72 
73  if (char_bigrams_ != NULL) {
74  delete char_bigrams_;
75  char_bigrams_ = NULL;
76  }
77 
78  if (word_unigrams_ != NULL) {
79  delete word_unigrams_;
80  word_unigrams_ = NULL;
81  }
82 
83  if (lang_mod_ != NULL) {
84  delete lang_mod_;
85  lang_mod_ = NULL;
86  }
87 
88  if (params_ != NULL) {
89  delete params_;
90  params_ = NULL;
91  }
92 }
93 
98 bool CubeRecoContext::GetDataFilePath(string *path) const {
99  *path = tess_obj_->datadir.string();
100  return true;
101 }
102 
115 bool CubeRecoContext::Load(TessdataManager *tessdata_manager,
116  UNICHARSET *tess_unicharset) {
117  ASSERT_HOST(tess_obj_ != NULL);
118  tess_unicharset_ = tess_unicharset;
119  string data_file_path;
120 
121  // Get the data file path.
122  if (GetDataFilePath(&data_file_path) == false) {
123  fprintf(stderr, "Unable to get data file path\n");
124  return false;
125  }
126 
127  // Get the language from the Tesseract object.
128  lang_ = tess_obj_->lang.string();
129 
130  // Create the char set.
131  if ((char_set_ =
132  CharSet::Create(tessdata_manager, tess_unicharset)) == NULL) {
133  fprintf(stderr, "Cube ERROR (CubeRecoContext::Load): unable to load "
134  "CharSet\n");
135  return false;
136  }
137  // Create the language model.
138  string lm_file_name = data_file_path + lang_ + ".cube.lm";
139  string lm_params;
140  if (!CubeUtils::ReadFileToString(lm_file_name, &lm_params)) {
141  fprintf(stderr, "Cube ERROR (CubeRecoContext::Load): unable to read cube "
142  "language model params from %s\n", lm_file_name.c_str());
143  return false;
144  }
145  lang_mod_ = new TessLangModel(lm_params, data_file_path,
146  tess_obj_->getDict().load_system_dawg,
147  tessdata_manager, this);
148  if (lang_mod_ == NULL) {
149  fprintf(stderr, "Cube ERROR (CubeRecoContext::Load): unable to create "
150  "TessLangModel\n");
151  return false;
152  }
153 
154  // Create the optional char bigrams object.
155  char_bigrams_ = CharBigrams::Create(data_file_path, lang_);
156 
157  // Create the optional word unigrams object.
158  word_unigrams_ = WordUnigrams::Create(data_file_path, lang_);
159 
160  // Create the optional size model.
161  word_size_model_ = WordSizeModel::Create(data_file_path, lang_,
162  char_set_, Contextual());
163 
164  // Load tuning params.
165  params_ = CubeTuningParams::Create(data_file_path, lang_);
166  if (params_ == NULL) {
167  fprintf(stderr, "Cube ERROR (CubeRecoContext::Load): unable to read "
168  "CubeTuningParams from %s\n", data_file_path.c_str());
169  return false;
170  }
171 
172  // Create the char classifier.
173  char_classifier_ = CharClassifierFactory::Create(data_file_path, lang_,
174  lang_mod_, char_set_,
175  params_);
176  if (char_classifier_ == NULL) {
177  fprintf(stderr, "Cube ERROR (CubeRecoContext::Load): unable to load "
178  "CharClassifierFactory object from %s\n", data_file_path.c_str());
179  return false;
180  }
181 
182  loaded_ = true;
183 
184  return true;
185 }
186 
189  TessdataManager *tessdata_manager,
190  UNICHARSET *tess_unicharset) {
191  // create the object
192  CubeRecoContext *cntxt = new CubeRecoContext(tess_obj);
193  if (cntxt == NULL) {
194  fprintf(stderr, "Cube ERROR (CubeRecoContext::Create): unable to create "
195  "CubeRecoContext object\n");
196  return NULL;
197  }
198  // load the necessary components
199  if (cntxt->Load(tessdata_manager, tess_unicharset) == false) {
200  fprintf(stderr, "Cube ERROR (CubeRecoContext::Create): unable to init "
201  "CubeRecoContext object\n");
202  delete cntxt;
203  return NULL;
204  }
205  // success
206  return cntxt;
207 }
208 } // tesseract}
static bool ReadFileToString(const string &file_name, string *str)
Definition: cube_utils.cpp:195
static CharBigrams * Create(const string &data_file_path, const string &lang)
#define ASSERT_HOST(x)
Definition: errcode.h:84
CubeRecoContext(Tesseract *tess_obj)
STRING datadir
Definition: ccutil.h:67
static WordUnigrams * Create(const string &data_file_path, const string &lang)
static WordSizeModel * Create(const string &data_file_path, const string &lang, CharSet *char_set, bool contextual)
static CharClassifier * Create(const string &data_file_path, const string &lang, LangModel *lang_mod, CharSet *char_set, TuningParams *params)
Dict & getDict()
Definition: classify.h:65
bool load_system_dawg
Definition: dict.h:554
STRING lang
Definition: ccutil.h:69
static CubeRecoContext * Create(Tesseract *tess_obj, TessdataManager *tessdata_manager, UNICHARSET *tess_unicharset)
#define NULL
Definition: host.h:144
static CharSet * Create(TessdataManager *tessdata_manager, UNICHARSET *tess_unicharset)
Definition: char_set.cpp:54
const char * string() const
Definition: strngs.cpp:193
bool GetDataFilePath(string *path) const
static CubeTuningParams * Create(const string &data_file, const string &lang)