21#ifndef TESSERACT_CCUTIL_UNICHARCOMPRESS_H_
22#define TESSERACT_CCUTIL_UNICHARCOMPRESS_H_
24#include <unordered_map>
38 memset(code_, 0,
sizeof(code_));
46 if (length_ <= index) {
52 void Set3(
int code0,
int code1,
int code2) {
80 if (length_ != other.length_) {
83 for (
int i = 0;
i < length_; ++
i) {
84 if (code_[
i] != other.code_[
i]) {
94 for (
int i = 0;
i < code.length_; ++
i) {
95 result ^=
static_cast<uint64_t
>(code(
i)) << (7 *
i);
104 int8_t self_normalized_;
147 static const int kFirstHangul = 0xac00;
149 static const int kNumHangul = 11172;
152 static const int kLCount = 19;
153 static const int kVCount = 21;
154 static const int kTCount = 28;
160 bool ComputeEncoding(
const UNICHARSET &unicharset,
int null_id, std::string *radical_stroke_table);
163 void SetupPassThrough(
const UNICHARSET &unicharset);
166 void SetupDirect(
const std::vector<RecodedCharID> &codes);
177 int EncodeUnichar(
unsigned unichar_id,
RecodedCharID *code)
const;
183 return is_valid_start_[code];
188 auto it = next_codes_.find(code);
189 return it == next_codes_.end() ? nullptr : it->second;
194 auto it = final_codes_.find(code);
195 return it == final_codes_.end() ? nullptr : it->second;
211 std::string GetEncodingAsString(
const UNICHARSET &unicharset)
const;
216 static bool DecomposeHangul(
int unicode,
int *leading,
int *vowel,
int *trailing);
220 void DefragmentCodeValues(
int encoded_null);
222 void ComputeCodeRange();
230 std::vector<RecodedCharID> encoder_;
232 std::unordered_map<RecodedCharID, int, RecodedCharID::RecodedCharIDHash> decoder_;
234 std::vector<bool> is_valid_start_;
bool DeSerialize(bool swap, FILE *fp, std::vector< T > &data)
bool Serialize(FILE *fp, const std::vector< T > &data)
bool DeSerialize(std::string &data)
bool Serialize(const std::string &data)
bool DeSerialize(TFile *fp)
bool Serialize(TFile *fp) const
void Truncate(int length)
void Set(int index, int value)
static const int kMaxCodeLen
int operator()(int index) const
void Set3(int code0, int code1, int code2)
bool operator==(const RecodedCharID &other) const
uint64_t operator()(const RecodedCharID &code) const
const std::vector< int > * GetFinalCodes(const RecodedCharID &code) const
bool IsValidFirstCode(int code) const
const std::vector< int > * GetNextCodes(const RecodedCharID &code) const