tesseract v5.3.3.20231005
bitvector.h
Go to the documentation of this file.
1
2// File: bitvector.h
3// Description: Class replacement for BITVECTOR.
4// Author: Ray Smith
5//
6// (C) Copyright 2011, Google Inc.
7// Licensed under the Apache License, Version 2.0 (the "License");
8// you may not use this file except in compliance with the License.
9// You may obtain a copy of the License at
10// http://www.apache.org/licenses/LICENSE-2.0
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS,
13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// See the License for the specific language governing permissions and
15// limitations under the License.
16//
18
19#ifndef TESSERACT_CCUTIL_BITVECTOR_H_
20#define TESSERACT_CCUTIL_BITVECTOR_H_
21
22#include <tesseract/export.h>
23
24#include <cassert>
25#include <cstdint> // for uint8_t
26#include <cstdio>
27#include <vector> // for std::vector
28
29namespace tesseract {
30
31// Trivial class to encapsulate a fixed-length array of bits, with
32// Serialize/DeSerialize. Replaces the old macros.
34public:
35 // Fast lookup table to get the first least significant set bit in a byte.
36 // For zero, the table has 255, but since it is a special case, most code
37 // that uses this table will check for zero before looking up lsb_index_.
38 static const uint8_t lsb_index_[256];
39 // Fast lookup table to get the residual bits after zeroing the least
40 // significant set bit in a byte.
41 static const uint8_t lsb_eroded_[256];
42 // Fast lookup table to give the number of set bits in a byte.
43 static const int hamming_table_[256];
44
45 BitVector() = default;
46 // Initializes the array to length * false.
47 explicit BitVector(int length) : bit_size_(length), array_(WordLength()) {
48 }
49 BitVector(const BitVector &src) : bit_size_(src.bit_size_), array_(src.array_) {
50 }
51 BitVector &operator=(const BitVector &src);
52 ~BitVector() = default;
53
54 // Initializes the array to length * false.
55 void Init(int length);
56
57 int empty() const {
58 return bit_size_ == 0;
59 }
60
61 // Returns the number of bits that are accessible in the vector.
62 int size() const {
63 return bit_size_;
64 }
65
66 // Writes to the given file. Returns false in case of error.
67 bool Serialize(FILE *fp) const;
68 // Reads from the given file. Returns false in case of error.
69 // If swap is true, assumes a big/little-endian swap is needed.
70 bool DeSerialize(bool swap, FILE *fp);
71
72 void SetAllFalse();
73 void SetAllTrue();
74
75 // Accessors to set/reset/get bits.
76 // The range of index is [0, size()-1].
77 // There is debug-only bounds checking.
78 void SetBit(int index) {
79 array_[WordIndex(index)] |= BitMask(index);
80 }
81 void ResetBit(int index) {
82 array_[WordIndex(index)] &= ~BitMask(index);
83 }
84 void SetValue(int index, bool value) {
85 if (value) {
86 SetBit(index);
87 } else {
88 ResetBit(index);
89 }
90 }
91 bool At(int index) const {
92 return (array_[WordIndex(index)] & BitMask(index)) != 0;
93 }
94 bool operator[](int index) const {
95 return (array_[WordIndex(index)] & BitMask(index)) != 0;
96 }
97
98 // Returns the index of the next set bit after the given index.
99 // Useful for quickly iterating through the set bits in a sparse vector.
100 int NextSetBit(int prev_bit) const;
101
102 // Returns the number of set bits in the vector.
103 int NumSetBits() const;
104
105 // Logical in-place operations on whole bit vectors. Tries to do something
106 // sensible if they aren't the same size, but they should be really.
107 void operator|=(const BitVector &other);
108 void operator&=(const BitVector &other);
109 void operator^=(const BitVector &other);
110 // Set subtraction *this = v1 - v2.
111 void SetSubtract(const BitVector &v1, const BitVector &v2);
112
113private:
114 // Allocates memory for a vector of the given length.
115 void Alloc(int length);
116
117 // Computes the index to array_ for the given index, with debug range
118 // checking.
119 int WordIndex(int index) const {
120 assert(0 <= index && index < bit_size_);
121 return index / kBitFactor;
122 }
123 // Returns a mask to select the appropriate bit for the given index.
124 uint32_t BitMask(int index) const {
125 return 1 << (index & (kBitFactor - 1));
126 }
127 // Returns the number of array elements needed to represent the current
128 // bit_size_.
129 int WordLength() const {
130 return (bit_size_ + kBitFactor - 1) / kBitFactor;
131 }
132 // Returns the number of bytes consumed by the array_.
133 int ByteLength() const {
134 return WordLength() * sizeof(array_[0]);
135 }
136
137 // Number of bits in this BitVector.
138 int32_t bit_size_ = 0;
139 // Array of words used to pack the bits.
140 // Bits are stored little-endian by uint32_t word, ie by word first and then
141 // starting with the least significant bit in each word.
142 std::vector<uint32_t> array_;
143 // Number of bits in an array_ element.
144 static const int kBitFactor = sizeof(array_[0]) * 8;
145};
146
147} // namespace tesseract.
148
149#endif // TESSERACT_CCUTIL_BITVECTOR_H_
int value
TBOX & operator&=(TBOX &op1, const TBOX &op2)
Definition: rect.cpp:242
bool DeSerialize(bool swap, FILE *fp, std::vector< T > &data)
Definition: helpers.h:205
bool Serialize(FILE *fp, const std::vector< T > &data)
Definition: helpers.h:236
BitVector(const BitVector &src)
Definition: bitvector.h:49
void SetValue(int index, bool value)
Definition: bitvector.h:84
bool At(int index) const
Definition: bitvector.h:91
int empty() const
Definition: bitvector.h:57
bool operator[](int index) const
Definition: bitvector.h:94
BitVector(int length)
Definition: bitvector.h:47
void SetBit(int index)
Definition: bitvector.h:78
int size() const
Definition: bitvector.h:62
void ResetBit(int index)
Definition: bitvector.h:81
#define TESS_API
Definition: export.h:32