tesseract v5.3.3.20231005
intfeaturemap.h
Go to the documentation of this file.
1// Copyright 2010 Google Inc. All Rights Reserved.
2// Author: rays@google.com (Ray Smith)
4// File: intfeaturemap.h
5// Description: Encapsulation of IntFeatureSpace with IndexMapBiDi
6// to provide a subspace mapping and fast feature lookup.
7//
8// Licensed under the Apache License, Version 2.0 (the "License");
9// you may not use this file except in compliance with the License.
10// You may obtain a copy of the License at
11// http://www.apache.org/licenses/LICENSE-2.0
12// Unless required by applicable law or agreed to in writing, software
13// distributed under the License is distributed on an "AS IS" BASIS,
14// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15// See the License for the specific language governing permissions and
16// limitations under the License.
17//
19
20#ifndef TESSERACT_CLASSIFY_INTFEATUREMAP_H_
21#define TESSERACT_CLASSIFY_INTFEATUREMAP_H_
22
23#include "export.h"
24#include "indexmapbidi.h"
25#include "intfeaturespace.h"
26#include "intproto.h"
27
28namespace tesseract {
29
30class SampleIterator;
31
32// Number of positive and negative offset maps.
33static const int kNumOffsetMaps = 2;
34
35// Class to map a feature space defined by INT_FEATURE_STRUCT to a compact
36// down-sampled subspace of actually used features.
37// The IntFeatureMap copes with 2 stages of transformation:
38// The first step is down-sampling (re-quantization) and converting to a
39// single index value from the 3-D input:
40// INT_FEATURE_STRUCT <-> index feature (via IntFeatureSpace) and
41// the second is a feature-space compaction to map only the feature indices
42// that are actually used. This saves space in classifiers that are built
43// using the mapped feature space.
44// index (sparse) feature <-> map (compact) feature via IndexMapBiDi.
45// Although the transformations are reversible, the inverses are lossy and do
46// not return the exact input INT_FEATURE_STRUCT, due to the many->one nature
47// of both transformations.
48class TESS_COMMON_TRAINING_API IntFeatureMap {
49public:
52
53 // Accessors.
54 int sparse_size() const {
55 return feature_space_.Size();
56 }
57 int compact_size() const {
58 return compact_size_;
59 }
61 return feature_space_;
62 }
63 const IndexMapBiDi &feature_map() const {
64 return feature_map_;
65 }
66
67 // Pseudo-accessors.
68 int IndexFeature(const INT_FEATURE_STRUCT &f) const;
69 int MapFeature(const INT_FEATURE_STRUCT &f) const;
70 int MapIndexFeature(int index_feature) const;
71 INT_FEATURE_STRUCT InverseIndexFeature(int index_feature) const;
72 INT_FEATURE_STRUCT InverseMapFeature(int map_feature) const;
73 void DeleteMapFeature(int map_feature);
74 bool IsMapFeatureDeleted(int map_feature) const;
75
76 // Copies the given feature_space and uses it as the index feature map
77 // from INT_FEATURE_STRUCT.
78 void Init(const IntFeatureSpace &feature_space);
79
80 // Helper to return an offset index feature. In this context an offset
81 // feature with a dir of +/-1 is a feature of a similar direction,
82 // but shifted perpendicular to the direction of the feature. An offset
83 // feature with a dir of +/-2 is feature at the same position, but rotated
84 // by +/- one [compact] quantum. Returns the index of the generated offset
85 // feature, or -1 if it doesn't exist. Dir should be in
86 // [-kNumOffsetMaps, kNumOffsetMaps] to indicate the relative direction.
87 // A dir of 0 is an identity transformation.
88 // Both input and output are from the index(sparse) feature space, not
89 // the mapped/compact feature space, but the offset feature is the minimum
90 // distance moved from the input to guarantee that it maps to the next
91 // available quantum in the mapped/compact space.
92 int OffsetFeature(int index_feature, int dir) const;
93
94 // Computes the features used by the subset of samples defined by
95 // the iterator and sets up the feature mapping.
96 // Returns the size of the compacted feature space.
98
99 // After deleting some features, finish setting up the mapping, and map
100 // all the samples. Returns the size of the compacted feature space.
101 int FinalizeMapping(SampleIterator *it);
102
103 // Indexes the given array of features to a vector of sorted indices.
104 void IndexAndSortFeatures(const INT_FEATURE_STRUCT *features, int num_features,
105 std::vector<int> *sorted_features) const {
106 feature_space_.IndexAndSortFeatures(features, num_features, sorted_features);
107 }
108 // Maps the given array of index/sparse features to an array of map/compact
109 // features.
110 // Assumes the input is sorted. The output indices are sorted and uniqued.
111 // Returns the number of "missed" features, being features that
112 // don't map to the compact feature space.
113 int MapIndexedFeatures(const std::vector<int> &index_features,
114 std::vector<int> *map_features) const {
115 return feature_map_.MapFeatures(index_features, map_features);
116 }
117
118 // Prints the map features from the set in human-readable form.
119 void DebugMapFeatures(const std::vector<int> &map_features) const;
120
121private:
122 void Clear();
123
124 // Helper to compute an offset index feature. In this context an offset
125 // feature with a dir of +/-1 is a feature of a similar direction,
126 // but shifted perpendicular to the direction of the feature. An offset
127 // feature with a dir of +/-2 is feature at the same position, but rotated
128 // by +/- one [compact] quantum. Returns the index of the generated offset
129 // feature, or -1 if it doesn't exist. Dir should be in
130 // [-kNumOffsetMaps, kNumOffsetMaps] to indicate the relative direction.
131 // A dir of 0 is an identity transformation.
132 // Both input and output are from the index(sparse) feature space, not
133 // the mapped/compact feature space, but the offset feature is the minimum
134 // distance moved from the input to guarantee that it maps to the next
135 // available quantum in the mapped/compact space.
136 int ComputeOffsetFeature(int index_feature, int dir) const;
137
138 // True if the mapping has changed since it was last finalized.
139 bool mapping_changed_;
140 // Size of the compacted feature space, after unused features are removed.
141 int compact_size_;
142 // Feature space quantization definition and indexing from INT_FEATURE_STRUCT.
143 IntFeatureSpace feature_space_;
144 // Mapping from indexed feature space to the compacted space with unused
145 // features mapping to -1.
146 IndexMapBiDi feature_map_;
147 // Index tables to map a feature index to the corresponding feature after a
148 // shift perpendicular to the feature direction, or a rotation in place.
149 // An entry of -1 indicates that there is no corresponding feature.
150 // Array of arrays of size feature_space_.Size() owned by this class.
151 int *offset_plus_[kNumOffsetMaps];
152 int *offset_minus_[kNumOffsetMaps];
153
154 // Don't use default copy and assign!
156 void operator=(const IntFeatureMap &);
157};
158
159} // namespace tesseract.
160
161#endif // TESSERACT_CLASSIFY_INTFEATUREMAP_H_
int MapIndexedFeatures(const std::vector< int > &index_features, std::vector< int > *map_features) const
int FindNZFeatureMapping(SampleIterator *it)
void IndexAndSortFeatures(const INT_FEATURE_STRUCT *features, int num_features, std::vector< int > *sorted_features) const
const IntFeatureSpace & feature_space() const
Definition: intfeaturemap.h:60
const IndexMapBiDi & feature_map() const
Definition: intfeaturemap.h:63