All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
cube_tuning_params.cpp
Go to the documentation of this file.
1 /**********************************************************************
2  * File: cube_tuning_params.cpp
3  * Description: Implementation of the CubeTuningParameters Class
4  * Author: Ahmad Abdulkader
5  * Created: 2007
6  *
7  * (C) Copyright 2008, Google Inc.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18  **********************************************************************/
19 
20 #include <string>
21 #include <vector>
22 #include "cube_tuning_params.h"
23 #include "tuning_params.h"
24 #include "cube_utils.h"
25 
26 namespace tesseract {
28  reco_wgt_ = 1.0;
29  size_wgt_ = 1.0;
30  char_bigrams_wgt_ = 1.0;
31  word_unigrams_wgt_ = 0.0;
33  beam_width_ = 32;
35  tp_feat_ = BMP;
36  conv_grid_size_ = 32;
37  hist_wind_wid_ = 0;
44  ood_wgt_ = 1.0;
45  num_wgt_ = 1.0;
46 
47 }
48 
50 }
51 
52 // Create an Object given the data file path and the language by loading
53 // the approporiate file
54 CubeTuningParams *CubeTuningParams::Create(const string &data_file_path,
55  const string &lang) {
57  if (!obj) {
58  fprintf(stderr, "Cube ERROR (CubeTuningParams::Create): unable to "
59  "allocate new tuning params object\n");
60  return NULL;
61  }
62 
63  string tuning_params_file;
64  tuning_params_file = data_file_path + lang;
65  tuning_params_file += ".cube.params";
66 
67  if (!obj->Load(tuning_params_file)) {
68  fprintf(stderr, "Cube ERROR (CubeTuningParams::Create): unable to "
69  "load tuning parameters from %s\n", tuning_params_file.c_str());
70  delete obj;
71  obj = NULL;
72  }
73 
74  return obj;
75 }
76 
77 // Loads the params file
78 bool CubeTuningParams::Load(string tuning_params_file) {
79  // load the string into memory
80  string param_str;
81 
82  if (CubeUtils::ReadFileToString(tuning_params_file, &param_str) == false) {
83  fprintf(stderr, "Cube ERROR (CubeTuningParams::Load): unable to read "
84  "file %s\n", tuning_params_file.c_str());
85  return false;
86  }
87 
88  // split into lines
89  vector<string> str_vec;
90  CubeUtils::SplitStringUsing(param_str, "\r\n", &str_vec);
91  if (str_vec.size() < 8) {
92  fprintf(stderr, "Cube ERROR (CubeTuningParams::Load): number of rows "
93  "in parameter file is too low\n");
94  return false;
95  }
96 
97  // for all entries
98  for (int entry = 0; entry < str_vec.size(); entry++) {
99  // tokenize
100  vector<string> str_tok;
101 
102  // should be only two tokens
103  CubeUtils::SplitStringUsing(str_vec[entry], "=", &str_tok);
104  if (str_tok.size() != 2) {
105  fprintf(stderr, "Cube ERROR (CubeTuningParams::Load): invalid format in "
106  "line: %s.\n", str_vec[entry].c_str());
107  return false;
108  }
109 
110  double val = 0;
111  char peekchar = (str_tok[1].c_str())[0];
112  if ((peekchar >= '0' && peekchar <= '9') ||
113  peekchar == '-' || peekchar == '+' ||
114  peekchar == '.') {
115  // read the value
116  if (sscanf(str_tok[1].c_str(), "%lf", &val) != 1) {
117  fprintf(stderr, "Cube ERROR (CubeTuningParams::Load): invalid format "
118  "in line: %s.\n", str_vec[entry].c_str());
119  return false;
120  }
121  }
122 
123  // token type
124  if (str_tok[0] == "RecoWgt") {
125  reco_wgt_ = val;
126  } else if (str_tok[0] == "SizeWgt") {
127  size_wgt_ = val;
128  } else if (str_tok[0] == "CharBigramsWgt") {
129  char_bigrams_wgt_ = val;
130  } else if (str_tok[0] == "WordUnigramsWgt") {
131  word_unigrams_wgt_ = val;
132  } else if (str_tok[0] == "MaxSegPerChar") {
133  max_seg_per_char_ = static_cast<int>(val);
134  } else if (str_tok[0] == "BeamWidth") {
135  beam_width_ = static_cast<int>(val);
136  } else if (str_tok[0] == "Classifier") {
137  if (str_tok[1] == "NN") {
139  } else if (str_tok[1] == "HYBRID_NN") {
141  } else {
142  fprintf(stderr, "Cube ERROR (CubeTuningParams::Load): invalid "
143  "classifier type in line: %s.\n", str_vec[entry].c_str());
144  return false;
145  }
146  } else if (str_tok[0] == "FeatureType") {
147  if (str_tok[1] == "BMP") {
149  } else if (str_tok[1] == "CHEBYSHEV") {
151  } else if (str_tok[1] == "HYBRID") {
153  } else {
154  fprintf(stderr, "Cube ERROR (CubeTuningParams::Load): invalid feature "
155  "type in line: %s.\n", str_vec[entry].c_str());
156  return false;
157  }
158  } else if (str_tok[0] == "ConvGridSize") {
159  conv_grid_size_ = static_cast<int>(val);
160  } else if (str_tok[0] == "HistWindWid") {
161  hist_wind_wid_ = val;
162  } else if (str_tok[0] == "MinConCompSize") {
163  min_con_comp_size_ = val;
164  } else if (str_tok[0] == "MaxWordAspectRatio") {
166  } else if (str_tok[0] == "MinSpaceHeightRatio") {
168  } else if (str_tok[0] == "MaxSpaceHeightRatio") {
170  } else if (str_tok[0] == "CombinerRunThresh") {
171  combiner_run_thresh_ = val;
172  } else if (str_tok[0] == "CombinerClassifierThresh") {
174  } else if (str_tok[0] == "OODWgt") {
175  ood_wgt_ = val;
176  } else if (str_tok[0] == "NumWgt") {
177  num_wgt_ = val;
178  } else {
179  fprintf(stderr, "Cube ERROR (CubeTuningParams::Load): unknown parameter "
180  "in line: %s.\n", str_vec[entry].c_str());
181  return false;
182  }
183  }
184 
185  return true;
186 }
187 
188 // Save the parameters to a file
189 bool CubeTuningParams::Save(string file_name) {
190  FILE *params_file = fopen(file_name.c_str(), "wb");
191  if (params_file == NULL) {
192  fprintf(stderr, "Cube ERROR (CubeTuningParams::Save): error opening file "
193  "%s for write.\n", file_name.c_str());
194  return false;
195  }
196 
197  fprintf(params_file, "RecoWgt=%.4f\n", reco_wgt_);
198  fprintf(params_file, "SizeWgt=%.4f\n", size_wgt_);
199  fprintf(params_file, "CharBigramsWgt=%.4f\n", char_bigrams_wgt_);
200  fprintf(params_file, "WordUnigramsWgt=%.4f\n", word_unigrams_wgt_);
201  fprintf(params_file, "MaxSegPerChar=%d\n", max_seg_per_char_);
202  fprintf(params_file, "BeamWidth=%d\n", beam_width_);
203  fprintf(params_file, "ConvGridSize=%d\n", conv_grid_size_);
204  fprintf(params_file, "HistWindWid=%d\n", hist_wind_wid_);
205  fprintf(params_file, "MinConCompSize=%d\n", min_con_comp_size_);
206  fprintf(params_file, "MaxWordAspectRatio=%.4f\n", max_word_aspect_ratio_);
207  fprintf(params_file, "MinSpaceHeightRatio=%.4f\n", min_space_height_ratio_);
208  fprintf(params_file, "MaxSpaceHeightRatio=%.4f\n", max_space_height_ratio_);
209  fprintf(params_file, "CombinerRunThresh=%.4f\n", combiner_run_thresh_);
210  fprintf(params_file, "CombinerClassifierThresh=%.4f\n",
212  fprintf(params_file, "OODWgt=%.4f\n", ood_wgt_);
213  fprintf(params_file, "NumWgt=%.4f\n", num_wgt_);
214 
215  fclose(params_file);
216  return true;
217 }
218 }
static bool ReadFileToString(const string &file_name, string *str)
Definition: cube_utils.cpp:195
static void SplitStringUsing(const string &str, const string &delims, vector< string > *str_vec)
Definition: cube_utils.cpp:230
bool Load(string file_name)
type_classifer tp_classifier_
#define NULL
Definition: host.h:144
static CubeTuningParams * Create(const string &data_file, const string &lang)