tesseract v5.3.3.20231005
indexmapbidi.cpp
Go to the documentation of this file.
1
2// File: indexmapbidi.cpp
3// Description: Bi-directional mapping between a sparse and compact space.
4// Author: rays@google.com (Ray Smith)
5//
6// (C) Copyright 2010, Google Inc.
7// Licensed under the Apache License, Version 2.0 (the "License");
8// you may not use this file except in compliance with the License.
9// You may obtain a copy of the License at
10// http://www.apache.org/licenses/LICENSE-2.0
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS,
13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// See the License for the specific language governing permissions and
15// limitations under the License.
16//
18
19#include "helpers.h"
20#include "indexmapbidi.h"
21#include "serialis.h"
22
23namespace tesseract {
24
25// Destructor.
26// It is defined here, so the compiler can create a single vtable
27// instead of weak vtables in every compilation unit.
28IndexMap::~IndexMap() = default;
29
30// SparseToCompact takes a sparse index to an index in the compact space.
31// Uses a binary search to find the result. For faster speed use
32// IndexMapBiDi, but that takes more memory.
33int IndexMap::SparseToCompact(int sparse_index) const {
34 auto pos = std::upper_bound(compact_map_.begin(), compact_map_.end(), sparse_index);
35 if (pos > compact_map_.begin()) {
36 --pos;
37 }
38 auto result = pos - compact_map_.begin();
39 return compact_map_[result] == sparse_index ? result : -1;
40}
41
42// Copy from the input.
43void IndexMap::CopyFrom(const IndexMap &src) {
46}
50}
51
52// Writes to the given file. Returns false in case of error.
53bool IndexMap::Serialize(FILE *fp) const {
55}
56
57// Reads from the given file. Returns false in case of error.
58// If swap is true, assumes a big/little-endian swap is needed.
59bool IndexMap::DeSerialize(bool swap, FILE *fp) {
60 uint32_t sparse_size;
61 if (!tesseract::DeSerialize(fp, &sparse_size)) {
62 return false;
63 }
64 if (swap) {
65 ReverseN(&sparse_size, sizeof(sparse_size));
66 }
67 // Arbitrarily limit the number of elements to protect against bad data.
68 if (sparse_size > UINT16_MAX) {
69 return false;
70 }
71 sparse_size_ = sparse_size;
72 return tesseract::DeSerialize(swap, fp, compact_map_);
73}
74
75// Destructor.
76// It is defined here, so the compiler can create a single vtable
77// instead of weak vtables in every compilation unit.
79
80// Top-level init function in a single call to initialize a map to select
81// a single contiguous subrange [start, end) of the sparse space to be mapped
82// 1 to 1 to the compact space, with all other elements of the sparse space
83// left unmapped.
84// No need to call Setup after this.
85void IndexMapBiDi::InitAndSetupRange(int sparse_size, int start, int end) {
86 Init(sparse_size, false);
87 for (int i = start; i < end; ++i) {
88 SetMap(i, true);
89 }
90 Setup();
91}
92
93// Initializes just the sparse_map_ to the given size with either all
94// forward indices mapped (all_mapped = true) or none (all_mapped = false).
95// Call Setup immediately after, or make calls to SetMap first to adjust the
96// mapping and then call Setup before using the map.
97void IndexMapBiDi::Init(int size, bool all_mapped) {
98 if (!all_mapped) {
99 sparse_map_.clear();
100 }
101 sparse_map_.resize(size, -1);
102 if (all_mapped) {
103 for (int i = 0; i < size; ++i) {
104 sparse_map_[i] = i;
105 }
106 }
107}
108
109// Sets a given index in the sparse_map_ to be mapped or not.
110void IndexMapBiDi::SetMap(int sparse_index, bool mapped) {
111 sparse_map_[sparse_index] = mapped ? 0 : -1;
112}
113
114// Sets up the sparse_map_ and compact_map_ properly after Init and
115// some calls to SetMap. Assumes an ordered 1-1 map from set indices
116// in the forward map to the compact space.
118 int compact_size = 0;
119 for (int &i : sparse_map_) {
120 if (i >= 0) {
121 i = compact_size++;
122 }
123 }
124 compact_map_.clear();
125 compact_map_.resize(compact_size, -1);
126 for (size_t i = 0; i < sparse_map_.size(); ++i) {
127 if (sparse_map_[i] >= 0) {
128 compact_map_[sparse_map_[i]] = i;
129 }
130 }
131 sparse_size_ = sparse_map_.size();
132}
133
134// Copy from the input.
136 sparse_map_ = src.sparse_map_;
138 sparse_size_ = sparse_map_.size();
139}
140
141// Merges the two compact space indices. May be called many times, but
142// the merges must be concluded by a call to CompleteMerges.
143// Returns true if a merge was actually performed.
144bool IndexMapBiDi::Merge(int compact_index1, int compact_index2) {
145 // Find the current master index for index1 and index2.
146 compact_index1 = MasterCompactIndex(compact_index1);
147 compact_index2 = MasterCompactIndex(compact_index2);
148 // Be sure that index1 < index2.
149 if (compact_index1 > compact_index2) {
150 int tmp = compact_index1;
151 compact_index1 = compact_index2;
152 compact_index2 = tmp;
153 } else if (compact_index1 == compact_index2) {
154 return false;
155 }
156 // To save iterating over all sparse_map_ entries, simply make the master
157 // entry for index2 point to index1.
158 // This leaves behind a potential chain of parents that needs to be chased,
159 // as above.
160 sparse_map_[compact_map_[compact_index2]] = compact_index1;
161 if (compact_index1 >= 0) {
162 compact_map_[compact_index2] = compact_map_[compact_index1];
163 }
164 return true;
165}
166
167// Completes one or more Merge operations by further compacting the
168// compact space. Unused compact space indices are removed, and the used
169// ones above shuffled down to fill the gaps.
170// Example:
171// Input sparse_map_: (x indicates -1)
172// x x 0 x 2 x x 4 x 0 x 2 x
173// Output sparse_map_:
174// x x 0 x 1 x x 2 x 0 x 1 x
175// Output compact_map_:
176// 2 4 7.
178 // Ensure each sparse_map_entry contains a master compact_map_ index.
179 int compact_size = 0;
180 for (int &i : sparse_map_) {
181 int compact_index = MasterCompactIndex(i);
182 i = compact_index;
183 if (compact_index >= compact_size) {
184 compact_size = compact_index + 1;
185 }
186 }
187 // Re-generate the compact_map leaving holes for unused indices.
188 compact_map_.clear();
189 compact_map_.resize(compact_size, -1);
190 for (size_t i = 0; i < sparse_map_.size(); ++i) {
191 if (sparse_map_[i] >= 0) {
192 if (compact_map_[sparse_map_[i]] == -1) {
193 compact_map_[sparse_map_[i]] = i;
194 }
195 }
196 }
197 // Compact the compact_map, leaving tmp_compact_map saying where each
198 // index went to in the compacted map.
199 std::vector<int32_t> tmp_compact_map(compact_size, -1);
200 compact_size = 0;
201 for (size_t i = 0; i < compact_map_.size(); ++i) {
202 if (compact_map_[i] >= 0) {
203 tmp_compact_map[i] = compact_size;
204 compact_map_[compact_size++] = compact_map_[i];
205 }
206 }
207 compact_map_.resize(compact_size);
208 // Now modify the entries in the sparse map to point to the new locations.
209 for (int &i : sparse_map_) {
210 if (i >= 0) {
211 i = tmp_compact_map[i];
212 }
213 }
214}
215
216// Writes to the given file. Returns false in case of error.
217bool IndexMapBiDi::Serialize(FILE *fp) const {
218 if (!IndexMap::Serialize(fp)) {
219 return false;
220 }
221 // Make a vector containing the rest of the map. If the map is many-to-one
222 // then each additional sparse entry needs to be stored.
223 // Normally we store only the compact map to save space.
224 std::vector<int32_t> remaining_pairs;
225 for (unsigned i = 0; i < sparse_map_.size(); ++i) {
226 if (sparse_map_[i] >= 0 && static_cast<unsigned>(compact_map_[sparse_map_[i]]) != i) {
227 remaining_pairs.push_back(i);
228 remaining_pairs.push_back(sparse_map_[i]);
229 }
230 }
231 return tesseract::Serialize(fp, remaining_pairs);
232}
233
234// Reads from the given file. Returns false in case of error.
235// If swap is true, assumes a big/little-endian swap is needed.
236bool IndexMapBiDi::DeSerialize(bool swap, FILE *fp) {
237 if (!IndexMap::DeSerialize(swap, fp)) {
238 return false;
239 }
240 std::vector<int32_t> remaining_pairs;
241 if (!tesseract::DeSerialize(swap, fp, remaining_pairs)) {
242 return false;
243 }
244 sparse_map_.clear();
245 sparse_map_.resize(sparse_size_, -1);
246 for (unsigned i = 0; i < compact_map_.size(); ++i) {
247 sparse_map_[compact_map_[i]] = i;
248 }
249 for (size_t i = 0; i < remaining_pairs.size(); ++i) {
250 int sparse_index = remaining_pairs[i++];
251 sparse_map_[sparse_index] = remaining_pairs[i];
252 }
253 return true;
254}
255
256// Bulk calls to SparseToCompact.
257// Maps the given array of sparse indices to an array of compact indices.
258// Assumes the input is sorted. The output indices are sorted and uniqued.
259// Return value is the number of "missed" features, being features that
260// don't map to the compact feature space.
261int IndexMapBiDi::MapFeatures(const std::vector<int> &sparse, std::vector<int> *compact) const {
262 compact->clear();
263 int num_features = sparse.size();
264 int missed_features = 0;
265 int prev_good_feature = -1;
266 for (int f = 0; f < num_features; ++f) {
267 int feature = sparse_map_[sparse[f]];
268 if (feature >= 0) {
269 if (feature != prev_good_feature) {
270 compact->push_back(feature);
271 prev_good_feature = feature;
272 }
273 } else {
274 ++missed_features;
275 }
276 }
277 return missed_features;
278}
279
280} // namespace tesseract.
void ReverseN(void *ptr, int num_bytes)
Definition: helpers.h:184
bool DeSerialize(bool swap, FILE *fp, std::vector< T > &data)
Definition: helpers.h:205
bool Serialize(FILE *fp, const std::vector< T > &data)
Definition: helpers.h:236
bool Serialize(FILE *fp) const
bool DeSerialize(bool swap, FILE *fp)
void CopyFrom(const IndexMap &src)
virtual int SparseToCompact(int sparse_index) const
std::vector< int32_t > compact_map_
Definition: indexmapbidi.h:82
void Init(int size, bool all_mapped)
int MapFeatures(const std::vector< int > &sparse, std::vector< int > *compact) const
bool Merge(int compact_index1, int compact_index2)
void CopyFrom(const IndexMapBiDi &src)
void SetMap(int sparse_index, bool mapped)
int SparseSize() const override
Definition: indexmapbidi.h:144
void InitAndSetupRange(int sparse_size, int start, int end)
bool Serialize(FILE *fp) const
bool DeSerialize(bool swap, FILE *fp)