22 #ifndef TESSERACT_CCUTIL_UNICHARCOMPRESS_H_ 23 #define TESSERACT_CCUTIL_UNICHARCOMPRESS_H_ 25 #include <unordered_map> 40 memset(code_, 0,
sizeof(code_));
44 void Set(
int index,
int value) {
46 if (length_ <= index) length_ = index + 1;
50 void Set3(
int code0,
int code1,
int code2) {
57 int length()
const {
return length_; }
62 if (fp->
FWrite(&self_normalized_,
sizeof(self_normalized_), 1) != 1)
64 if (fp->
FWrite(&length_,
sizeof(length_), 1) != 1)
return false;
65 if (fp->
FWrite(code_,
sizeof(code_[0]), length_) != length_)
return false;
71 if (fp->
FRead(&self_normalized_,
sizeof(self_normalized_), 1) != 1)
73 if (fp->
FReadEndian(&length_,
sizeof(length_), 1) != 1)
return false;
74 if (fp->
FReadEndian(code_,
sizeof(code_[0]), length_) != length_)
79 if (length_ != other.length_)
return false;
80 for (
int i = 0; i < length_; ++i) {
81 if (code_[i] != other.code_[i])
return false;
89 for (
int i = 0; i < code.length_; ++i) {
90 result ^= code(i) << (7 * i);
99 inT8 self_normalized_;
142 static const int kFirstHangul = 0xac00;
144 static const int kNumHangul = 11172;
147 static const int kLCount = 19;
148 static const int kVCount = 21;
149 static const int kTCount = 28;
155 bool ComputeEncoding(
const UNICHARSET& unicharset,
int null_id,
156 STRING* radical_stroke_table);
159 void SetupPassThrough(
const UNICHARSET& unicharset);
171 int EncodeUnichar(
int unichar_id,
RecodedCharID* code)
const;
180 auto it = next_codes_.find(code);
181 return it == next_codes_.end() ? NULL : it->second;
186 auto it = final_codes_.find(code);
187 return it == final_codes_.end() ? NULL : it->second;
208 static bool DecomposeHangul(
int unicode,
int* leading,
int* vowel,
213 void DefragmentCodeValues(
int encoded_null);
215 void ComputeCodeRange();
225 std::unordered_map<RecodedCharID, int, RecodedCharID::RecodedCharIDHash>
231 std::unordered_map<RecodedCharID, GenericVectorEqEq<int>*,
236 std::unordered_map<RecodedCharID, GenericVectorEqEq<int>*,
245 #endif // TESSERACT_CCUTIL_UNICHARCOMPRESS_H_ const GenericVector< int > * GetFinalCodes(const RecodedCharID &code) const
int FWrite(const void *buffer, int size, int count)
bool DeSerialize(TFile *fp)
bool operator==(const RecodedCharID &other) const
int operator()(int index) const
static const int kMaxCodeLen
size_t operator()(const RecodedCharID &code) const
void Set(int index, int value)
void Truncate(int length)
const GenericVector< int > * GetNextCodes(const RecodedCharID &code) const
int FReadEndian(void *buffer, int size, int count)
void Set3(int code0, int code1, int code2)
bool Serialize(TFile *fp) const
bool IsValidFirstCode(int code) const
int FRead(void *buffer, int size, int count)