All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
conv_net_classifier.cpp
Go to the documentation of this file.
1 /**********************************************************************
2  * File: charclassifier.cpp
3  * Description: Implementation of Convolutional-NeuralNet Character Classifier
4  * Author: Ahmad Abdulkader
5  * Created: 2007
6  *
7  * (C) Copyright 2008, Google Inc.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18  **********************************************************************/
19 
20 #include <algorithm>
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <string>
24 #include <vector>
25 #include <wctype.h>
26 
27 #include "char_set.h"
28 #include "classifier_base.h"
29 #include "const.h"
30 #include "conv_net_classifier.h"
31 #include "cube_utils.h"
32 #include "feature_base.h"
33 #include "feature_bmp.h"
34 #include "tess_lang_model.h"
35 
36 namespace tesseract {
37 
39  TuningParams *params,
40  FeatureBase *feat_extract)
41  : CharClassifier(char_set, params, feat_extract) {
42  char_net_ = NULL;
43  net_input_ = NULL;
44  net_output_ = NULL;
45 }
46 
48  if (char_net_ != NULL) {
49  delete char_net_;
50  char_net_ = NULL;
51  }
52 
53  if (net_input_ != NULL) {
54  delete []net_input_;
55  net_input_ = NULL;
56  }
57 
58  if (net_output_ != NULL) {
59  delete []net_output_;
60  net_output_ = NULL;
61  }
62 }
63 
69 bool ConvNetCharClassifier::Train(CharSamp *char_samp, int ClassID) {
70  return false;
71 }
72 
78 bool ConvNetCharClassifier::SetLearnParam(char *var_name, float val) {
79  // TODO(ahmadab): implementation of parameter initializing.
80  return false;
81 }
82 
86 void ConvNetCharClassifier::Fold() {
87  // in case insensitive mode
88  if (case_sensitive_ == false) {
89  int class_cnt = char_set_->ClassCount();
90  // fold case
91  for (int class_id = 0; class_id < class_cnt; class_id++) {
92  // get class string
93  const char_32 *str32 = char_set_->ClassString(class_id);
94  // get the upper case form of the string
95  string_32 upper_form32 = str32;
96  for (int ch = 0; ch < upper_form32.length(); ch++) {
97  if (iswalpha(static_cast<int>(upper_form32[ch])) != 0) {
98  upper_form32[ch] = towupper(upper_form32[ch]);
99  }
100  }
101 
102  // find out the upperform class-id if any
103  int upper_class_id =
104  char_set_->ClassID(reinterpret_cast<const char_32 *>(
105  upper_form32.c_str()));
106  if (upper_class_id != -1 && class_id != upper_class_id) {
107  float max_out = MAX(net_output_[class_id], net_output_[upper_class_id]);
108  net_output_[class_id] = max_out;
109  net_output_[upper_class_id] = max_out;
110  }
111  }
112  }
113 
114  // The folding sets specify how groups of classes should be folded
115  // Folding involved assigning a min-activation to all the members
116  // of the folding set. The min-activation is a fraction of the max-activation
117  // of the members of the folding set
118  for (int fold_set = 0; fold_set < fold_set_cnt_; fold_set++) {
119  if (fold_set_len_[fold_set] == 0)
120  continue;
121  float max_prob = net_output_[fold_sets_[fold_set][0]];
122  for (int ch = 1; ch < fold_set_len_[fold_set]; ch++) {
123  if (net_output_[fold_sets_[fold_set][ch]] > max_prob) {
124  max_prob = net_output_[fold_sets_[fold_set][ch]];
125  }
126  }
127  for (int ch = 0; ch < fold_set_len_[fold_set]; ch++) {
128  net_output_[fold_sets_[fold_set][ch]] = MAX(max_prob * kFoldingRatio,
129  net_output_[fold_sets_[fold_set][ch]]);
130  }
131  }
132 }
133 
138 bool ConvNetCharClassifier::RunNets(CharSamp *char_samp) {
139  if (char_net_ == NULL) {
140  fprintf(stderr, "Cube ERROR (ConvNetCharClassifier::RunNets): "
141  "NeuralNet is NULL\n");
142  return false;
143  }
144  int feat_cnt = char_net_->in_cnt();
145  int class_cnt = char_set_->ClassCount();
146 
147  // allocate i/p and o/p buffers if needed
148  if (net_input_ == NULL) {
149  net_input_ = new float[feat_cnt];
150  if (net_input_ == NULL) {
151  fprintf(stderr, "Cube ERROR (ConvNetCharClassifier::RunNets): "
152  "unable to allocate memory for input nodes\n");
153  return false;
154  }
155 
156  net_output_ = new float[class_cnt];
157  if (net_output_ == NULL) {
158  fprintf(stderr, "Cube ERROR (ConvNetCharClassifier::RunNets): "
159  "unable to allocate memory for output nodes\n");
160  return false;
161  }
162  }
163 
164  // compute input features
165  if (feat_extract_->ComputeFeatures(char_samp, net_input_) == false) {
166  fprintf(stderr, "Cube ERROR (ConvNetCharClassifier::RunNets): "
167  "unable to compute features\n");
168  return false;
169  }
170 
171  if (char_net_ != NULL) {
172  if (char_net_->FeedForward(net_input_, net_output_) == false) {
173  fprintf(stderr, "Cube ERROR (ConvNetCharClassifier::RunNets): "
174  "unable to run feed-forward\n");
175  return false;
176  }
177  } else {
178  return false;
179  }
180  Fold();
181  return true;
182 }
183 
188  if (RunNets(char_samp) == false) {
189  return 0;
190  }
191  return CubeUtils::Prob2Cost(1.0f - net_output_[0]);
192 }
193 
199  // run the needed nets
200  if (RunNets(char_samp) == false) {
201  return NULL;
202  }
203 
204  int class_cnt = char_set_->ClassCount();
205 
206  // create an altlist
207  CharAltList *alt_list = new CharAltList(char_set_, class_cnt);
208  if (alt_list == NULL) {
209  fprintf(stderr, "Cube WARNING (ConvNetCharClassifier::Classify): "
210  "returning emtpy CharAltList\n");
211  return NULL;
212  }
213 
214  for (int out = 1; out < class_cnt; out++) {
215  int cost = CubeUtils::Prob2Cost(net_output_[out]);
216  alt_list->Insert(out, cost);
217  }
218 
219  return alt_list;
220 }
221 
226  if (char_net_ != NULL) {
227  delete char_net_;
228  char_net_ = NULL;
229  }
230  char_net_ = char_net;
231 }
232 
237 bool ConvNetCharClassifier::LoadFoldingSets(const string &data_file_path,
238  const string &lang,
239  LangModel *lang_mod) {
240  fold_set_cnt_ = 0;
241  string fold_file_name;
242  fold_file_name = data_file_path + lang;
243  fold_file_name += ".cube.fold";
244 
245  // folding sets are optional
246  FILE *fp = fopen(fold_file_name.c_str(), "rb");
247  if (fp == NULL) {
248  return true;
249  }
250  fclose(fp);
251 
252  string fold_sets_str;
253  if (!CubeUtils::ReadFileToString(fold_file_name,
254  &fold_sets_str)) {
255  return false;
256  }
257 
258  // split into lines
259  vector<string> str_vec;
260  CubeUtils::SplitStringUsing(fold_sets_str, "\r\n", &str_vec);
261  fold_set_cnt_ = str_vec.size();
262 
263  fold_sets_ = new int *[fold_set_cnt_];
264  if (fold_sets_ == NULL) {
265  return false;
266  }
267  fold_set_len_ = new int[fold_set_cnt_];
268  if (fold_set_len_ == NULL) {
269  fold_set_cnt_ = 0;
270  return false;
271  }
272 
273  for (int fold_set = 0; fold_set < fold_set_cnt_; fold_set++) {
274  reinterpret_cast<TessLangModel *>(lang_mod)->RemoveInvalidCharacters(
275  &str_vec[fold_set]);
276 
277  // if all or all but one character are invalid, invalidate this set
278  if (str_vec[fold_set].length() <= 1) {
279  fprintf(stderr, "Cube WARNING (ConvNetCharClassifier::LoadFoldingSets): "
280  "invalidating folding set %d\n", fold_set);
281  fold_set_len_[fold_set] = 0;
282  fold_sets_[fold_set] = NULL;
283  continue;
284  }
285 
286  string_32 str32;
287  CubeUtils::UTF8ToUTF32(str_vec[fold_set].c_str(), &str32);
288  fold_set_len_[fold_set] = str32.length();
289  fold_sets_[fold_set] = new int[fold_set_len_[fold_set]];
290  if (fold_sets_[fold_set] == NULL) {
291  fprintf(stderr, "Cube ERROR (ConvNetCharClassifier::LoadFoldingSets): "
292  "could not allocate folding set\n");
293  fold_set_cnt_ = fold_set;
294  return false;
295  }
296  for (int ch = 0; ch < fold_set_len_[fold_set]; ch++) {
297  fold_sets_[fold_set][ch] = char_set_->ClassID(str32[ch]);
298  }
299  }
300  return true;
301 }
302 
306 bool ConvNetCharClassifier::Init(const string &data_file_path,
307  const string &lang,
308  LangModel *lang_mod) {
309  if (init_) {
310  return true;
311  }
312 
313  // load the nets if any. This function will return true if the net file
314  // does not exist. But will fail if the net did not pass the sanity checks
315  if (!LoadNets(data_file_path, lang)) {
316  return false;
317  }
318 
319  // load the folding sets if any. This function will return true if the
320  // file does not exist. But will fail if the it did not pass the sanity checks
321  if (!LoadFoldingSets(data_file_path, lang, lang_mod)) {
322  return false;
323  }
324 
325  init_ = true;
326  return true;
327 }
328 
334 bool ConvNetCharClassifier::LoadNets(const string &data_file_path,
335  const string &lang) {
336  string char_net_file;
337 
338  // add the lang identifier
339  char_net_file = data_file_path + lang;
340  char_net_file += ".cube.nn";
341 
342  // neural network is optional
343  FILE *fp = fopen(char_net_file.c_str(), "rb");
344  if (fp == NULL) {
345  return true;
346  }
347  fclose(fp);
348 
349  // load main net
350  char_net_ = tesseract::NeuralNet::FromFile(char_net_file);
351  if (char_net_ == NULL) {
352  fprintf(stderr, "Cube ERROR (ConvNetCharClassifier::LoadNets): "
353  "could not load %s\n", char_net_file.c_str());
354  return false;
355  }
356 
357  // validate net
358  if (char_net_->in_cnt()!= feat_extract_->FeatureCnt()) {
359  fprintf(stderr, "Cube ERROR (ConvNetCharClassifier::LoadNets): "
360  "could not validate net %s\n", char_net_file.c_str());
361  return false;
362  }
363 
364  // alloc net i/o buffers
365  int feat_cnt = char_net_->in_cnt();
366  int class_cnt = char_set_->ClassCount();
367 
368  if (char_net_->out_cnt() != class_cnt) {
369  fprintf(stderr, "Cube ERROR (ConvNetCharClassifier::LoadNets): "
370  "output count (%d) and class count (%d) are not equal\n",
371  char_net_->out_cnt(), class_cnt);
372  return false;
373  }
374 
375  // allocate i/p and o/p buffers if needed
376  if (net_input_ == NULL) {
377  net_input_ = new float[feat_cnt];
378  if (net_input_ == NULL) {
379  return false;
380  }
381 
382  net_output_ = new float[class_cnt];
383  if (net_output_ == NULL) {
384  return false;
385  }
386  }
387 
388  return true;
389 }
390 } // tesseract
virtual int CharCost(CharSamp *char_samp)
#define MAX(x, y)
Definition: ndminx.h:24
virtual bool SetLearnParam(char *var_name, float val)
bool Insert(int class_id, int cost, void *tag=NULL)
static int Prob2Cost(double prob_val)
Definition: cube_utils.cpp:37
int in_cnt() const
Definition: neural_net.h:40
virtual bool ComputeFeatures(CharSamp *char_samp, float *features)=0
void SetNet(tesseract::NeuralNet *net)
basic_string< char_32 > string_32
Definition: string_32.h:41
virtual bool Train(CharSamp *char_samp, int ClassID)
static bool ReadFileToString(const string &file_name, string *str)
Definition: cube_utils.cpp:195
static void UTF8ToUTF32(const char *utf8_str, string_32 *str32)
Definition: cube_utils.cpp:266
int ClassID(const char_32 *str) const
Definition: char_set.h:54
bool FeedForward(const Type *inputs, Type *outputs)
Definition: neural_net.cpp:79
int out_cnt() const
Definition: neural_net.h:41
ConvNetCharClassifier(CharSet *char_set, TuningParams *params, FeatureBase *feat_extract)
static void SplitStringUsing(const string &str, const string &delims, vector< string > *str_vec)
Definition: cube_utils.cpp:230
virtual CharAltList * Classify(CharSamp *char_samp)
signed int char_32
Definition: string_32.h:40
int ClassCount() const
Definition: char_set.h:111
#define NULL
Definition: host.h:144
virtual int FeatureCnt()=0
const char_32 * ClassString(int class_id) const
Definition: char_set.h:104
static NeuralNet * FromFile(const string file_name)
Definition: neural_net.cpp:204