tesseract v5.3.3.20231005
indexmapbidi.h
Go to the documentation of this file.
1
2// File: indexmapbidi.h
3// Description: Bi-directional mapping between a sparse and compact space.
4// Author: rays@google.com (Ray Smith)
5//
6// (C) Copyright 2010, Google Inc.
7// Licensed under the Apache License, Version 2.0 (the "License");
8// you may not use this file except in compliance with the License.
9// You may obtain a copy of the License at
10// http://www.apache.org/licenses/LICENSE-2.0
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS,
13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// See the License for the specific language governing permissions and
15// limitations under the License.
16//
18
19#ifndef TESSERACT_CCUTIL_INDEXMAPBIDI_H_
20#define TESSERACT_CCUTIL_INDEXMAPBIDI_H_
21
22#include <tesseract/export.h> // for TESS_API
23
24#include <cstdint> // for int32_t
25#include <cstdio>
26#include <vector>
27
28namespace tesseract {
29
30class IndexMapBiDi;
31
32// Bidirectional one-to-one mapping between a sparse and a compact discrete
33// space. Many entries in the sparse space are unmapped, but those that are
34// mapped have a 1-1 mapping to (and from) the compact space, where all
35// values are used. This is useful for forming subsets of larger collections,
36// such as subsets of character sets, or subsets of binary feature spaces.
37//
38// This base class provides basic functionality with binary search for the
39// SparseToCompact mapping to save memory.
40// For a faster inverse mapping, or to allow a many-to-one mapping, use
41// IndexMapBiDi below.
42// NOTE: there are currently no methods to setup an IndexMap on its own!
43// It must be initialized by copying from an IndexMapBiDi or by DeSerialize.
45public:
46 virtual ~IndexMap();
47
48 // SparseToCompact takes a sparse index to an index in the compact space.
49 // Uses a binary search to find the result. For faster speed use
50 // IndexMapBiDi, but that takes more memory.
51 virtual int SparseToCompact(int sparse_index) const;
52
53 // CompactToSparse takes a compact index to the corresponding index in the
54 // sparse space.
55 int CompactToSparse(int compact_index) const {
56 return compact_map_[compact_index];
57 }
58 // The size of the sparse space.
59 virtual int SparseSize() const {
60 return sparse_size_;
61 }
62 // The size of the compact space.
63 int CompactSize() const {
64 return compact_map_.size();
65 }
66
67 // Copy from the input.
68 void CopyFrom(const IndexMap &src);
69 void CopyFrom(const IndexMapBiDi &src);
70
71 // Writes to the given file. Returns false in case of error.
72 bool Serialize(FILE *fp) const;
73 // Reads from the given file. Returns false in case of error.
74 // If swap is true, assumes a big/little-endian swap is needed.
75 bool DeSerialize(bool swap, FILE *fp);
76
77protected:
78 // The sparse space covers integers in the range [0, sparse_size_-1].
79 int32_t sparse_size_;
80 // The compact space covers integers in the range [0, compact_map_.size()-1].
81 // Each element contains the corresponding sparse index.
82 std::vector<int32_t> compact_map_;
83};
84
85// Bidirectional many-to-one mapping between a sparse and a compact discrete
86// space. As with IndexMap, many entries may be unmapped, but unlike IndexMap,
87// of those that are, many may be mapped to the same compact index.
88// If the map is many-to-one, it is not possible to directly obtain all the
89// sparse indices that map to a single compact index.
90// This map is time- rather than space-efficient. It stores the entire sparse
91// space.
92// IndexMapBiDi may be initialized in one of 3 ways:
93// 1. Init(size, true);
94// Setup();
95// Sets a complete 1:1 mapping with no unmapped elements.
96// 2. Init(size, false);
97// for ... SetMap(index, true);
98// Setup();
99// Specifies precisely which sparse indices are mapped. The mapping is 1:1.
100// 3. Either of the above, followed by:
101// for ... Merge(index1, index2);
102// CompleteMerges();
103// Allows a many-to-one mapping by merging compact space indices.
105public:
106 ~IndexMapBiDi() override;
107
108 // Top-level init function in a single call to initialize a map to select
109 // a single contiguous subrange [start, end) of the sparse space to be mapped
110 // 1 to 1 to the compact space, with all other elements of the sparse space
111 // left unmapped.
112 // No need to call Setup after this.
113 void InitAndSetupRange(int sparse_size, int start, int end);
114
115 // Initializes just the sparse_map_ to the given size with either all
116 // forward indices mapped (all_mapped = true) or none (all_mapped = false).
117 // Call Setup immediately after, or make calls to SetMap first to adjust the
118 // mapping and then call Setup before using the map.
119 void Init(int size, bool all_mapped);
120 // Sets a given index in the sparse_map_ to be mapped or not.
121 void SetMap(int sparse_index, bool mapped);
122 // Sets up the sparse_map_ and compact_map_ properly after Init and
123 // some calls to SetMap. Assumes an ordered 1-1 map from set indices
124 // in the sparse space to the compact space.
125 void Setup();
126
127 // Merges the two compact space indices. May be called many times, but
128 // the merges must be concluded by a call to CompleteMerges.
129 // Returns true if a merge was actually performed.
130 bool Merge(int compact_index1, int compact_index2);
131 // Returns true if the given compact index has been deleted.
132 bool IsCompactDeleted(int index) const {
133 return MasterCompactIndex(index) < 0;
134 }
135 // Completes one or more Merge operations by further compacting the
136 // compact space.
137 void CompleteMerges();
138
139 // SparseToCompact takes a sparse index to an index in the compact space.
140 int SparseToCompact(int sparse_index) const override {
141 return sparse_map_[sparse_index];
142 }
143 // The size of the sparse space.
144 int SparseSize() const override {
145 return sparse_map_.size();
146 }
147
148 // Copy from the input.
149 void CopyFrom(const IndexMapBiDi &src);
150
151 // Writes to the given file. Returns false in case of error.
152 bool Serialize(FILE *fp) const;
153 // Reads from the given file. Returns false in case of error.
154 // If swap is true, assumes a big/little-endian swap is needed.
155 bool DeSerialize(bool swap, FILE *fp);
156
157 // Bulk calls to SparseToCompact.
158 // Maps the given array of sparse indices to an array of compact indices.
159 // Assumes the input is sorted. The output indices are sorted and uniqued.
160 // Return value is the number of "missed" features, being features that
161 // don't map to the compact feature space.
162 int MapFeatures(const std::vector<int> &sparse, std::vector<int> *compact) const;
163
164private:
165 // Returns the master compact index for a given compact index.
166 // During a multiple merge operation, several compact indices may be
167 // combined, so we need to be able to find the master of all.
168 int MasterCompactIndex(int compact_index) const {
169 while (compact_index >= 0 && sparse_map_[compact_map_[compact_index]] != compact_index) {
170 compact_index = sparse_map_[compact_map_[compact_index]];
171 }
172 return compact_index;
173 }
174
175 // Direct look-up of the compact index for each element in sparse space.
176 std::vector<int32_t> sparse_map_;
177};
178
179} // namespace tesseract.
180
181#endif // TESSERACT_CCUTIL_INDEXMAPBIDI_H_
bool DeSerialize(bool swap, FILE *fp, std::vector< T > &data)
Definition: helpers.h:205
bool Serialize(FILE *fp, const std::vector< T > &data)
Definition: helpers.h:236
virtual int SparseSize() const
Definition: indexmapbidi.h:59
int CompactSize() const
Definition: indexmapbidi.h:63
std::vector< int32_t > compact_map_
Definition: indexmapbidi.h:82
int CompactToSparse(int compact_index) const
Definition: indexmapbidi.h:55
int SparseSize() const override
Definition: indexmapbidi.h:144
int SparseToCompact(int sparse_index) const override
Definition: indexmapbidi.h:140
bool IsCompactDeleted(int index) const
Definition: indexmapbidi.h:132
#define TESS_API
Definition: export.h:32