All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
tesseract::BeamSearch Class Reference

#include <beam_search.h>

Public Member Functions

 BeamSearch (CubeRecoContext *cntxt, bool word_mode=true)
 
 ~BeamSearch ()
 
WordAltListSearch (SearchObject *srch_obj, LangModel *lang_mod=NULL)
 
SearchNodeBestNode () const
 
char_32Alt (int alt) const
 
CharSamp ** BackTrack (SearchObject *srch_obj, int node_index, int *char_cnt, char_32 **str32, Boxa **char_boxes) const
 
CharSamp ** BackTrack (SearchObject *srch_obj, SearchNode *node, int *char_cnt, char_32 **str32, Boxa **char_boxes) const
 
int SizeCost (SearchObject *srch_obj, SearchNode *node, char_32 **str32=NULL) const
 
int WordUnigramCost (char_32 *str32, WordUnigrams *word_unigrams) const
 
int ColCnt () const
 
SearchColumnColumn (int col_idx) const
 
int BestPresortedNodeIndex () const
 

Detailed Description

Definition at line 44 of file beam_search.h.

Constructor & Destructor Documentation

tesseract::BeamSearch::BeamSearch ( CubeRecoContext cntxt,
bool  word_mode = true 
)
explicit

Definition at line 27 of file beam_search.cpp.

27  {
28  cntxt_ = cntxt;
29  seg_pt_cnt_ = 0;
30  col_cnt_ = 1;
31  col_ = NULL;
32  word_mode_ = word_mode;
33 }
#define NULL
Definition: host.h:144
tesseract::BeamSearch::~BeamSearch ( )

Definition at line 47 of file beam_search.cpp.

47  {
48  Cleanup();
49 }

Member Function Documentation

char_32 * tesseract::BeamSearch::Alt ( int  alt) const

Definition at line 298 of file beam_search.cpp.

298  {
299  // get the last column of the lattice
300  if (col_cnt_ <= 0)
301  return NULL;
302 
303  SearchColumn *srch_col = col_[col_cnt_ - 1];
304  if (!srch_col)
305  return NULL;
306 
307  // point to the last node in the selected path
308  if (alt >= srch_col->NodeCount() || srch_col->Nodes() == NULL) {
309  return NULL;
310  }
311 
312  SearchNode *srch_node = srch_col->Nodes()[alt];
313  if (!srch_node)
314  return NULL;
315 
316  // get string
317  char_32 *str32 = srch_node->PathString();
318  if (!str32)
319  return NULL;
320 
321  return str32;
322 }
signed int char_32
Definition: string_32.h:40
#define NULL
Definition: host.h:144
CharSamp ** tesseract::BeamSearch::BackTrack ( SearchObject srch_obj,
int  node_index,
int *  char_cnt,
char_32 **  str32,
Boxa **  char_boxes 
) const

Definition at line 328 of file beam_search.cpp.

330  {
331  // get the last column of the lattice
332  if (col_cnt_ <= 0)
333  return NULL;
334  SearchColumn *srch_col = col_[col_cnt_ - 1];
335  if (!srch_col)
336  return NULL;
337 
338  // point to the last node in the selected path
339  if (node_index >= srch_col->NodeCount() || !srch_col->Nodes())
340  return NULL;
341 
342  SearchNode *srch_node = srch_col->Nodes()[node_index];
343  if (!srch_node)
344  return NULL;
345  return BackTrack(srch_obj, srch_node, char_cnt, str32, char_boxes);
346 }
#define NULL
Definition: host.h:144
CharSamp ** BackTrack(SearchObject *srch_obj, int node_index, int *char_cnt, char_32 **str32, Boxa **char_boxes) const
CharSamp ** tesseract::BeamSearch::BackTrack ( SearchObject srch_obj,
SearchNode node,
int *  char_cnt,
char_32 **  str32,
Boxa **  char_boxes 
) const

Definition at line 352 of file beam_search.cpp.

354  {
355  if (!srch_node)
356  return NULL;
357 
358  if (str32) {
359  if (*str32)
360  delete [](*str32); // clear existing value
361  *str32 = srch_node->PathString();
362  if (!*str32)
363  return NULL;
364  }
365 
366  if (char_boxes && *char_boxes) {
367  boxaDestroy(char_boxes); // clear existing value
368  }
369 
370  CharSamp **chars;
371  chars = SplitByNode(srch_obj, srch_node, char_cnt, char_boxes);
372  if (!chars && str32)
373  delete []*str32;
374  return chars;
375 }
#define NULL
Definition: host.h:144
SearchNode * tesseract::BeamSearch::BestNode ( ) const

Definition at line 286 of file beam_search.cpp.

286  {
287  if (col_cnt_ < 1 || !col_ || !col_[col_cnt_ - 1])
288  return NULL;
289 
290  int node_cnt = col_[col_cnt_ - 1]->NodeCount();
291  SearchNode **srch_nodes = col_[col_cnt_ - 1]->Nodes();
292  if (node_cnt < 1 || !srch_nodes || !srch_nodes[0])
293  return NULL;
294  return srch_nodes[0];
295 }
SearchNode ** Nodes() const
Definition: search_column.h:44
#define NULL
Definition: host.h:144
int tesseract::BeamSearch::BestPresortedNodeIndex ( ) const
inline

Definition at line 81 of file beam_search.h.

81  {
82  return best_presorted_node_idx_;
83  };
int tesseract::BeamSearch::ColCnt ( ) const
inline

Definition at line 76 of file beam_search.h.

76 { return col_cnt_; }
SearchColumn * tesseract::BeamSearch::Column ( int  col_idx) const

Definition at line 279 of file beam_search.cpp.

279  {
280  if (col < 0 || col >= col_cnt_ || !col_)
281  return NULL;
282  return col_[col];
283 }
#define NULL
Definition: host.h:144
WordAltList * tesseract::BeamSearch::Search ( SearchObject srch_obj,
LangModel lang_mod = NULL 
)

Definition at line 98 of file beam_search.cpp.

98  {
99  // verifications
100  if (!lang_mod)
101  lang_mod = cntxt_->LangMod();
102  if (!lang_mod) {
103  fprintf(stderr, "Cube ERROR (BeamSearch::Search): could not construct "
104  "LangModel\n");
105  return NULL;
106  }
107 
108  // free existing state
109  Cleanup();
110 
111  // get seg pt count
112  seg_pt_cnt_ = srch_obj->SegPtCnt();
113  if (seg_pt_cnt_ < 0) {
114  return NULL;
115  }
116  col_cnt_ = seg_pt_cnt_ + 1;
117 
118  // disregard suspicious cases
119  if (seg_pt_cnt_ > 128) {
120  fprintf(stderr, "Cube ERROR (BeamSearch::Search): segment point count is "
121  "suspiciously high; bailing out\n");
122  return NULL;
123  }
124 
125  // alloc memory for columns
126  col_ = new SearchColumn *[col_cnt_];
127  if (!col_) {
128  fprintf(stderr, "Cube ERROR (BeamSearch::Search): could not construct "
129  "SearchColumn array\n");
130  return NULL;
131  }
132  memset(col_, 0, col_cnt_ * sizeof(*col_));
133 
134  // for all possible segments
135  for (int end_seg = 1; end_seg <= (seg_pt_cnt_ + 1); end_seg++) {
136  // create a search column
137  col_[end_seg - 1] = new SearchColumn(end_seg - 1,
138  cntxt_->Params()->BeamWidth());
139  if (!col_[end_seg - 1]) {
140  fprintf(stderr, "Cube ERROR (BeamSearch::Search): could not construct "
141  "SearchColumn for column %d\n", end_seg - 1);
142  return NULL;
143  }
144 
145  // for all possible start segments
146  int init_seg = MAX(0, end_seg - cntxt_->Params()->MaxSegPerChar());
147  for (int strt_seg = init_seg; strt_seg < end_seg; strt_seg++) {
148  int parent_nodes_cnt;
149  SearchNode **parent_nodes;
150 
151  // for the root segment, we do not have a parent
152  if (strt_seg == 0) {
153  parent_nodes_cnt = 1;
154  parent_nodes = NULL;
155  } else {
156  // for all the existing nodes in the starting column
157  parent_nodes_cnt = col_[strt_seg - 1]->NodeCount();
158  parent_nodes = col_[strt_seg - 1]->Nodes();
159  }
160 
161  // run the shape recognizer
162  CharAltList *char_alt_list = srch_obj->RecognizeSegment(strt_seg - 1,
163  end_seg - 1);
164  // for all the possible parents
165  for (int parent_idx = 0; parent_idx < parent_nodes_cnt; parent_idx++) {
166  // point to the parent node
167  SearchNode *parent_node = !parent_nodes ? NULL
168  : parent_nodes[parent_idx];
169  LangModEdge *lm_parent_edge = !parent_node ? lang_mod->Root()
170  : parent_node->LangModelEdge();
171 
172  // compute the cost of not having spaces within the segment range
173  int contig_cost = srch_obj->NoSpaceCost(strt_seg - 1, end_seg - 1);
174 
175  // In phrase mode, compute the cost of not having a space before
176  // this character
177  int no_space_cost = 0;
178  if (!word_mode_ && strt_seg > 0) {
179  no_space_cost = srch_obj->NoSpaceCost(strt_seg - 1);
180  }
181 
182  // if the no space cost is low enough
183  if ((contig_cost + no_space_cost) < MIN_PROB_COST) {
184  // Add the children nodes
185  CreateChildren(col_[end_seg - 1], lang_mod, parent_node,
186  lm_parent_edge, char_alt_list,
187  contig_cost + no_space_cost);
188  }
189 
190  // In phrase mode and if not starting at the root
191  if (!word_mode_ && strt_seg > 0) { // parent_node must be non-NULL
192  // consider starting a new word for nodes that are valid EOW
193  if (parent_node->LangModelEdge()->IsEOW()) {
194  // get the space cost
195  int space_cost = srch_obj->SpaceCost(strt_seg - 1);
196  // if the space cost is low enough
197  if ((contig_cost + space_cost) < MIN_PROB_COST) {
198  // Restart the language model and add nodes as children to the
199  // space node.
200  CreateChildren(col_[end_seg - 1], lang_mod, parent_node, NULL,
201  char_alt_list, contig_cost + space_cost);
202  }
203  }
204  }
205  } // parent
206  } // strt_seg
207 
208  // prune the column nodes
209  col_[end_seg - 1]->Prune();
210 
211  // Free the column hash table. No longer needed
212  col_[end_seg - 1]->FreeHashTable();
213  } // end_seg
214 
215  WordAltList *alt_list = CreateWordAltList(srch_obj);
216  return alt_list;
217 }
#define MAX(x, y)
Definition: ndminx.h:24
LangModel * LangMod() const
int MaxSegPerChar() const
Definition: tuning_params.h:52
#define MIN_PROB_COST
Definition: cube_const.h:26
TuningParams * Params() const
SearchNode ** Nodes() const
Definition: search_column.h:44
#define NULL
Definition: host.h:144
int tesseract::BeamSearch::SizeCost ( SearchObject srch_obj,
SearchNode node,
char_32 **  str32 = NULL 
) const

Definition at line 472 of file beam_search.cpp.

473  {
474  CharSamp **chars = NULL;
475  int char_cnt = 0;
476  if (!node)
477  return 0;
478  // Backtrack to get string and character segmentation
479  chars = BackTrack(srch_obj, node, &char_cnt, str32, NULL);
480  if (!chars)
481  return WORST_COST;
482  int size_cost = (cntxt_->SizeModel() == NULL) ? 0 :
483  cntxt_->SizeModel()->Cost(chars, char_cnt);
484  delete []chars;
485  return size_cost;
486 }
#define WORST_COST
Definition: cube_const.h:30
int Cost(CharSamp **samp_array, int samp_cnt) const
WordSizeModel * SizeModel() const
#define NULL
Definition: host.h:144
CharSamp ** BackTrack(SearchObject *srch_obj, int node_index, int *char_cnt, char_32 **str32, Boxa **char_boxes) const
int tesseract::BeamSearch::WordUnigramCost ( char_32 str32,
WordUnigrams word_unigrams 
) const

The documentation for this class was generated from the following files: