All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
tess_lang_mod_edge.cpp
Go to the documentation of this file.
1 /**********************************************************************
2  * File: tess_lang_mod_edge.cpp
3  * Description: Implementation of the Tesseract Language Model Edge Class
4  * Author: Ahmad Abdulkader
5  * Created: 2008
6  *
7  * (C) Copyright 2008, Google Inc.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18  **********************************************************************/
19 
20 #include "tess_lang_mod_edge.h"
21 #include "const.h"
22 #include "unichar.h"
23 
24 
25 
26 namespace tesseract {
27 // OOD constructor
29  root_ = false;
30  cntxt_ = cntxt;
31  dawg_ = NULL;
32  start_edge_ = 0;
33  end_edge_ = 0;
34  edge_mask_ = 0;
35  class_id_ = class_id;
36  str_ = cntxt_->CharacterSet()->ClassString(class_id);
37  path_cost_ = Cost();
38 }
39 
44  const Dawg *dawg, EDGE_REF edge_idx, int class_id) {
45  root_ = false;
46  cntxt_ = cntxt;
47  dawg_ = dawg;
48  start_edge_ = edge_idx;
49  end_edge_ = edge_idx;
50  edge_mask_ = 0;
51  class_id_ = class_id;
52  str_ = cntxt_->CharacterSet()->ClassString(class_id);
53  path_cost_ = Cost();
54 }
55 
60  EDGE_REF start_edge_idx, EDGE_REF end_edge_idx,
61  int class_id) {
62  root_ = false;
63  cntxt_ = cntxt;
64  dawg_ = dawg;
65  start_edge_ = start_edge_idx;
66  end_edge_ = end_edge_idx;
67  edge_mask_ = 0;
68  class_id_ = class_id;
69  str_ = cntxt_->CharacterSet()->ClassString(class_id);
70  path_cost_ = Cost();
71 }
72 
74  char *char_ptr = new char[256];
75  if (!char_ptr) {
76  return NULL;
77  }
78 
79  char dawg_str[256];
80  char edge_str[32];
81  if (dawg_ == (Dawg *)DAWG_OOD) {
82  strcpy(dawg_str, "OOD");
83  } else if (dawg_ == (Dawg *)DAWG_NUMBER) {
84  strcpy(dawg_str, "NUM");
85  } else if (dawg_->permuter() == SYSTEM_DAWG_PERM) {
86  strcpy(dawg_str, "Main");
87  } else if (dawg_->permuter() == USER_DAWG_PERM) {
88  strcpy(dawg_str, "User");
89  } else if (dawg_->permuter() == DOC_DAWG_PERM) {
90  strcpy(dawg_str, "Doc");
91  } else {
92  strcpy(dawg_str, "N/A");
93  }
94 
95  sprintf(edge_str, "%d", static_cast<int>(start_edge_));
96  if (IsLeadingPuncEdge(edge_mask_)) {
97  strcat(edge_str, "-LP");
98  }
99  if (IsTrailingPuncEdge(edge_mask_)) {
100  strcat(edge_str, "-TP");
101  }
102  sprintf(char_ptr, "%s(%s)%s, Wtd Dawg Cost=%d",
103  dawg_str, edge_str, IsEOW() ? "-EOW-" : "", path_cost_);
104 
105  return char_ptr;
106 }
107 
109  const Dawg *dawg,
110  NODE_REF parent_node,
111  LangModEdge **edge_array) {
112  int edge_cnt = 0;
113  NodeChildVector vec;
114  dawg->unichar_ids_of(parent_node, &vec, false); // find all children
115  for (int i = 0; i < vec.size(); ++i) {
116  const NodeChild &child = vec[i];
117  if (child.unichar_id == INVALID_UNICHAR_ID) continue;
118  edge_array[edge_cnt] =
119  new TessLangModEdge(cntxt, dawg, child.edge_ref, child.unichar_id);
120  if (edge_array[edge_cnt] != NULL) edge_cnt++;
121  }
122  return edge_cnt;
123 }
124 }
int size() const
Definition: genericvector.h:72
UNICHAR_ID unichar_id
Definition: dawg.h:61
#define IsTrailingPuncEdge(edge_mask)
TessLangModEdge(CubeRecoContext *cntxt, const Dawg *edge_array, EDGE_REF edge, int class_id)
#define DAWG_NUMBER
#define IsLeadingPuncEdge(edge_mask)
static int CreateChildren(CubeRecoContext *cntxt, const Dawg *edges, NODE_REF edge_reg, LangModEdge **lm_edges)
virtual void unichar_ids_of(NODE_REF node, NodeChildVector *vec, bool word_end) const =0
CharSet * CharacterSet() const
inT64 EDGE_REF
Definition: dawg.h:54
#define NULL
Definition: host.h:144
inT64 NODE_REF
Definition: dawg.h:55
#define DAWG_OOD
PermuterType permuter() const
Definition: dawg.h:129
EDGE_REF edge_ref
Definition: dawg.h:62
const char_32 * ClassString(int class_id) const
Definition: char_set.h:104