All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
word_altlist.cpp
Go to the documentation of this file.
1 /**********************************************************************
2  * File: word_altlist.cpp
3  * Description: Implementation of the Word Alternate List Class
4  * Author: Ahmad Abdulkader
5  * Created: 2008
6  *
7  * (C) Copyright 2008, Google Inc.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18  **********************************************************************/
19 
20 #include "word_altlist.h"
21 
22 namespace tesseract {
24  : AltList(max_alt) {
25  word_alt_ = NULL;
26 }
27 
29  if (word_alt_ != NULL) {
30  for (int alt_idx = 0; alt_idx < alt_cnt_; alt_idx++) {
31  if (word_alt_[alt_idx] != NULL) {
32  delete []word_alt_[alt_idx];
33  }
34  }
35  delete []word_alt_;
36  word_alt_ = NULL;
37  }
38 }
39 
43 bool WordAltList::Insert(char_32 *word_str, int cost, void *tag) {
44  if (word_alt_ == NULL || alt_cost_ == NULL) {
45  word_alt_ = new char_32*[max_alt_];
46  alt_cost_ = new int[max_alt_];
47  alt_tag_ = new void *[max_alt_];
48 
49  if (word_alt_ == NULL || alt_cost_ == NULL || alt_tag_ == NULL) {
50  return false;
51  }
52 
53  memset(alt_tag_, 0, max_alt_ * sizeof(*alt_tag_));
54  } else {
55  // check if alt already exists
56  for (int alt_idx = 0; alt_idx < alt_cnt_; alt_idx++) {
57  if (CubeUtils::StrCmp(word_str, word_alt_[alt_idx]) == 0) {
58  // update the cost if we have a lower one
59  if (cost < alt_cost_[alt_idx]) {
60  alt_cost_[alt_idx] = cost;
61  alt_tag_[alt_idx] = tag;
62  }
63  return true;
64  }
65  }
66  }
67 
68  // determine length of alternate
69  int len = CubeUtils::StrLen(word_str);
70 
71  word_alt_[alt_cnt_] = new char_32[len + 1];
72  if (word_alt_[alt_cnt_] == NULL) {
73  return false;
74  }
75 
76  if (len > 0) {
77  memcpy(word_alt_[alt_cnt_], word_str, len * sizeof(*word_str));
78  }
79 
80  word_alt_[alt_cnt_][len] = 0;
81  alt_cost_[alt_cnt_] = cost;
82  alt_tag_[alt_cnt_] = tag;
83 
84  alt_cnt_++;
85 
86  return true;
87 }
88 
93  for (int alt_idx = 0; alt_idx < alt_cnt_; alt_idx++) {
94  for (int alt = alt_idx + 1; alt < alt_cnt_; alt++) {
95  if (alt_cost_[alt_idx] > alt_cost_[alt]) {
96  char_32 *pchTemp = word_alt_[alt_idx];
97  word_alt_[alt_idx] = word_alt_[alt];
98  word_alt_[alt] = pchTemp;
99 
100  int temp = alt_cost_[alt_idx];
101  alt_cost_[alt_idx] = alt_cost_[alt];
102  alt_cost_[alt] = temp;
103 
104  void *tag = alt_tag_[alt_idx];
105  alt_tag_[alt_idx] = alt_tag_[alt];
106  alt_tag_[alt] = tag;
107  }
108  }
109  }
110 }
111 
113  for (int alt_idx = 0; alt_idx < alt_cnt_; alt_idx++) {
114  char_32 *word_32 = word_alt_[alt_idx];
115  string word_str;
116  CubeUtils::UTF32ToUTF8(word_32, &word_str);
117  int num_unichars = CubeUtils::StrLen(word_32);
118  fprintf(stderr, "Alt[%d]=%s (cost=%d, num_unichars=%d); unichars=", alt_idx,
119  word_str.c_str(), alt_cost_[alt_idx], num_unichars);
120  for (int i = 0; i < num_unichars; ++i)
121  fprintf(stderr, "%d ", word_32[i]);
122  fprintf(stderr, "\n");
123  }
124 }
125 } // namespace tesseract
bool Insert(char_32 *char_ptr, int cost, void *tag=NULL)
void ** alt_tag_
Definition: altlist.h:57
WordAltList(int max_alt)
static int StrLen(const char_32 *str)
Definition: cube_utils.cpp:54
static void UTF32ToUTF8(const char_32 *utf32_str, string *str)
Definition: cube_utils.cpp:282
static int StrCmp(const char_32 *str1, const char_32 *str2)
Definition: cube_utils.cpp:66
signed int char_32
Definition: string_32.h:40
#define NULL
Definition: host.h:144