All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
cubeclassifier.cpp
Go to the documentation of this file.
1 // Copyright 2011 Google Inc. All Rights Reserved.
2 // Author: rays@google.com (Ray Smith)
4 // File: cubeclassifier.cpp
5 // Description: Cube implementation of a ShapeClassifier.
6 // Author: Ray Smith
7 // Created: Wed Nov 23 10:39:45 PST 2011
8 //
9 // (C) Copyright 2011, Google Inc.
10 // Licensed under the Apache License, Version 2.0 (the "License");
11 // you may not use this file except in compliance with the License.
12 // You may obtain a copy of the License at
13 // http://www.apache.org/licenses/LICENSE-2.0
14 // Unless required by applicable law or agreed to in writing, software
15 // distributed under the License is distributed on an "AS IS" BASIS,
16 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 // See the License for the specific language governing permissions and
18 // limitations under the License.
19 //
21 
22 #include "cubeclassifier.h"
23 
24 #include "char_altlist.h"
25 #include "char_set.h"
26 #include "cube_object.h"
27 #include "cube_reco_context.h"
28 #include "tessclassifier.h"
29 #include "tesseractclass.h"
30 #include "trainingsample.h"
31 #include "unicharset.h"
32 
33 namespace tesseract {
34 
36  : cube_cntxt_(tesseract->GetCubeRecoContext()),
37  shape_table_(*tesseract->shape_table()) {
38 }
40 }
41 
45  const TrainingSample& sample, Pix* page_pix, int debug,
46  UNICHAR_ID keep_this, GenericVector<UnicharRating>* results) {
47  results->clear();
48  if (page_pix == NULL) return 0;
49 
50  ASSERT_HOST(cube_cntxt_ != NULL);
51  const TBOX& char_box = sample.bounding_box();
52  CubeObject* cube_obj = new tesseract::CubeObject(
53  cube_cntxt_, page_pix, char_box.left(),
54  pixGetHeight(page_pix) - char_box.top(),
55  char_box.width(), char_box.height());
56  CharAltList* alt_list = cube_obj->RecognizeChar();
57  if (alt_list != NULL) {
58  alt_list->Sort();
59  CharSet* char_set = cube_cntxt_->CharacterSet();
60  for (int i = 0; i < alt_list->AltCount(); ++i) {
61  // Convert cube representation to a shape_id.
62  int alt_id = alt_list->Alt(i);
63  int unichar_id = char_set->UnicharID(char_set->ClassString(alt_id));
64  if (unichar_id >= 0)
65  results->push_back(UnicharRating(unichar_id, alt_list->AltProb(i)));
66  }
67  delete alt_list;
68  }
69  delete cube_obj;
70  return results->size();
71 }
72 
75  return &shape_table_;
76 }
77 
79  : cube_cntxt_(tesseract->GetCubeRecoContext()),
80  shape_table_(*tesseract->shape_table()),
81  pruner_(new TessClassifier(true, tesseract)) {
82 }
84  delete pruner_;
85 }
86 
90  const TrainingSample& sample, Pix* page_pix, int debug,
91  UNICHAR_ID keep_this, GenericVector<UnicharRating>* results) {
92  int num_results = pruner_->UnicharClassifySample(sample, page_pix, debug,
93  keep_this, results);
94  if (page_pix == NULL) return num_results;
95 
96  ASSERT_HOST(cube_cntxt_ != NULL);
97  const TBOX& char_box = sample.bounding_box();
98  CubeObject* cube_obj = new tesseract::CubeObject(
99  cube_cntxt_, page_pix, char_box.left(),
100  pixGetHeight(page_pix) - char_box.top(),
101  char_box.width(), char_box.height());
102  CharAltList* alt_list = cube_obj->RecognizeChar();
103  CharSet* char_set = cube_cntxt_->CharacterSet();
104  if (alt_list != NULL) {
105  for (int r = 0; r < num_results; ++r) {
106  // Get the best cube probability of the unichar in the result.
107  double best_prob = 0.0;
108  for (int i = 0; i < alt_list->AltCount(); ++i) {
109  int alt_id = alt_list->Alt(i);
110  int unichar_id = char_set->UnicharID(char_set->ClassString(alt_id));
111  if (unichar_id == (*results)[r].unichar_id &&
112  alt_list->AltProb(i) > best_prob) {
113  best_prob = alt_list->AltProb(i);
114  }
115  }
116  (*results)[r].rating = best_prob;
117  }
118  delete alt_list;
119  // Re-sort by rating.
121  }
122  delete cube_obj;
123  return results->size();
124 }
125 
128  return &shape_table_;
129 }
130 
131 } // namespace tesseract
132 
133 
134 
virtual int UnicharClassifySample(const TrainingSample &sample, Pix *page_pix, int debug, UNICHAR_ID keep_this, GenericVector< UnicharRating > *results)
int size() const
Definition: genericvector.h:72
virtual int UnicharClassifySample(const TrainingSample &sample, Pix *page_pix, int debug, UNICHAR_ID keep_this, GenericVector< UnicharRating > *results)
int push_back(T object)
virtual int UnicharClassifySample(const TrainingSample &sample, Pix *page_pix, int debug, UNICHAR_ID keep_this, GenericVector< UnicharRating > *results)
#define ASSERT_HOST(x)
Definition: errcode.h:84
virtual const ShapeTable * GetShapeTable() const
inT16 left() const
Definition: rect.h:68
static int SortDescendingRating(const void *t1, const void *t2)
Definition: shapetable.h:56
int UnicharID(const char_32 *str) const
Definition: char_set.h:80
int UNICHAR_ID
Definition: unichar.h:33
CharAltList * RecognizeChar()
CubeClassifier(Tesseract *tesseract)
inT16 height() const
Definition: rect.h:104
CubeTessClassifier(Tesseract *tesseract)
inT16 width() const
Definition: rect.h:111
virtual const ShapeTable * GetShapeTable() const
CharSet * CharacterSet() const
Definition: cluster.h:32
Definition: rect.h:30
#define NULL
Definition: host.h:144
const TBOX & bounding_box() const
inT16 top() const
Definition: rect.h:54
const char_32 * ClassString(int class_id) const
Definition: char_set.h:104