tesseract v5.3.3.20231005
intfeaturedist.cpp
Go to the documentation of this file.
1// Copyright 2011 Google Inc. All Rights Reserved.
2// Author: rays@google.com (Ray Smith)
4// File: intfeaturedist.cpp
5// Description: Fast set-difference-based feature distance calculator.
6//
7// Licensed under the Apache License, Version 2.0 (the "License");
8// you may not use this file except in compliance with the License.
9// You may obtain a copy of the License at
10// http://www.apache.org/licenses/LICENSE-2.0
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS,
13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// See the License for the specific language governing permissions and
15// limitations under the License.
16//
18
19#include "intfeaturedist.h"
20#include "intfeaturemap.h"
21
22namespace tesseract {
23
25 : size_(0)
26 , total_feature_weight_(0.0)
27 , feature_map_(nullptr)
28 , features_(nullptr)
29 , features_delta_one_(nullptr)
30 , features_delta_two_(nullptr) {}
31
33 Clear();
34}
35
36// Initialize the table to the given size of feature space.
37void IntFeatureDist::Init(const IntFeatureMap *feature_map) {
38 size_ = feature_map->sparse_size();
39 Clear();
40 feature_map_ = feature_map;
41 features_ = new bool[size_];
42 features_delta_one_ = new bool[size_];
43 features_delta_two_ = new bool[size_];
44 memset(features_, false, size_ * sizeof(features_[0]));
45 memset(features_delta_one_, false, size_ * sizeof(features_delta_one_[0]));
46 memset(features_delta_two_, false, size_ * sizeof(features_delta_two_[0]));
47 total_feature_weight_ = 0.0;
48}
49
50// Setup the map for the given indexed_features that have been indexed by
51// feature_map.
52void IntFeatureDist::Set(const std::vector<int> &indexed_features, int canonical_count,
53 bool value) {
54 total_feature_weight_ = canonical_count;
55 for (int f : indexed_features) {
56 features_[f] = value;
57 for (int dir = -kNumOffsetMaps; dir <= kNumOffsetMaps; ++dir) {
58 if (dir == 0) {
59 continue;
60 }
61 const int mapped_f = feature_map_->OffsetFeature(f, dir);
62 if (mapped_f >= 0) {
63 features_delta_one_[mapped_f] = value;
64 for (int dir2 = -kNumOffsetMaps; dir2 <= kNumOffsetMaps; ++dir2) {
65 if (dir2 == 0) {
66 continue;
67 }
68 const int mapped_f2 = feature_map_->OffsetFeature(mapped_f, dir2);
69 if (mapped_f2 >= 0) {
70 features_delta_two_[mapped_f2] = value;
71 }
72 }
73 }
74 }
75 }
76}
77
78// Compute the distance between the given feature vector and the last
79// Set feature vector.
80double IntFeatureDist::FeatureDistance(const std::vector<int> &features) const {
81 const int num_test_features = features.size();
82 const double denominator = total_feature_weight_ + num_test_features;
83 double misses = denominator;
84 for (int i = 0; i < num_test_features; ++i) {
85 const int index = features[i];
86 const double weight = 1.0;
87 if (features_[index]) {
88 // A perfect match.
89 misses -= 2.0 * weight;
90 } else if (features_delta_one_[index]) {
91 misses -= 1.5 * weight;
92 } else if (features_delta_two_[index]) {
93 // A near miss.
94 misses -= 1.0 * weight;
95 }
96 }
97 return misses / denominator;
98}
99
100// Compute the distance between the given feature vector and the last
101// Set feature vector.
102double IntFeatureDist::DebugFeatureDistance(const std::vector<int> &features) const {
103 const int num_test_features = features.size();
104 const double denominator = total_feature_weight_ + num_test_features;
105 double misses = denominator;
106 for (int i = 0; i < num_test_features; ++i) {
107 const int index = features[i];
108 const double weight = 1.0;
109 INT_FEATURE_STRUCT f = feature_map_->InverseMapFeature(features[i]);
110 tprintf("Testing feature weight %g:", weight);
111 f.print();
112 if (features_[index]) {
113 // A perfect match.
114 misses -= 2.0 * weight;
115 tprintf("Perfect hit\n");
116 } else if (features_delta_one_[index]) {
117 misses -= 1.5 * weight;
118 tprintf("-1 hit\n");
119 } else if (features_delta_two_[index]) {
120 // A near miss.
121 misses -= 1.0 * weight;
122 tprintf("-2 hit\n");
123 } else {
124 tprintf("Total miss\n");
125 }
126 }
127 tprintf("Features present:");
128 for (int i = 0; i < size_; ++i) {
129 if (features_[i]) {
130 INT_FEATURE_STRUCT f = feature_map_->InverseMapFeature(i);
131 f.print();
132 }
133 }
134 tprintf("\nMinus one features:");
135 for (int i = 0; i < size_; ++i) {
136 if (features_delta_one_[i]) {
137 INT_FEATURE_STRUCT f = feature_map_->InverseMapFeature(i);
138 f.print();
139 }
140 }
141 tprintf("\nMinus two features:");
142 for (int i = 0; i < size_; ++i) {
143 if (features_delta_two_[i]) {
144 INT_FEATURE_STRUCT f = feature_map_->InverseMapFeature(i);
145 f.print();
146 }
147 }
148 tprintf("\n");
149 return misses / denominator;
150}
151
152// Clear all data.
153void IntFeatureDist::Clear() {
154 delete[] features_;
155 features_ = nullptr;
156 delete[] features_delta_one_;
157 features_delta_one_ = nullptr;
158 delete[] features_delta_two_;
159 features_delta_two_ = nullptr;
160}
161
162} // namespace tesseract
int value
void tprintf(const char *format,...)
Definition: tprintf.cpp:41
void Set(const std::vector< int > &indexed_features, int canonical_count, bool value)
double DebugFeatureDistance(const std::vector< int > &features) const
void Init(const IntFeatureMap *feature_map)
double FeatureDistance(const std::vector< int > &features) const
INT_FEATURE_STRUCT InverseMapFeature(int map_feature) const
int OffsetFeature(int index_feature, int dir) const