21 #include <sys/types.h> 39 if (lang.empty())
return true;
40 string dirname = output_dir +
"/" +
lang;
44 _mkdir(dirname.c_str());
46 mkdir(dirname.c_str(), S_IRWXU | S_IRWXG);
48 string filename = dirname +
"/" + lang + suffix;
49 if (writer ==
nullptr)
52 return (*writer)(data, filename.c_str());
58 if (filename.empty())
return STRING();
61 if (reader ==
nullptr)
64 read_result = (*reader)(filename.c_str(), &data);
65 if (read_result)
return STRING(&data[0], data.
size());
66 tprintf(
"Failed to read data from: %s\n", filename.c_str());
79 unicharset_data.
size());
80 return WriteFile(output_dir, lang,
".unicharset", unicharset_data, writer);
86 const string& output_dir,
const string&
lang,
103 tprintf(
"Null char=%d\n", null_char);
104 if (!recoder.
ComputeEncoding(unicharset, null_char, radical_table_data)) {
105 tprintf(
"Creation of encoded unicharset failed!!\n");
112 if (!recoder.
Serialize(&fp))
return false;
114 recoder_data.
size());
117 memcpy(&recoder_data[0], &encoding[0], encoding.
length());
121 return WriteFile(output_dir, lang, suffix.string(), recoder_data, writer);
133 tprintf(
"Reducing Trie to SquishedDawg\n");
134 std::unique_ptr<SquishedDawg> dawg(trie.trie_to_dawg());
135 if (dawg ==
nullptr || dawg->NumEdges() == 0)
return false;
139 if (!dawg->write_squished_dawg(&fp))
return false;
153 tprintf(
"Must have non-empty puncs list to use language models!!\n");
162 if (!words.
empty() &&
175 if (!numbers.
empty() &&
186 const string& version_str,
const string& output_dir,
187 const string&
lang,
bool pass_through_recoder,
194 if (!version_str.empty()) {
199 if (!
WriteUnicharset(unicharset, output_dir, lang, writer, &traineddata)) {
200 tprintf(
"Error writing unicharset!!\n");
203 tprintf(
"Config file is optional, continuing...\n");
206 string config_filename = script_dir +
"/" + lang +
"/" + lang +
".config";
208 if (config_file.
length() > 0) {
212 string radical_filename = script_dir +
"/radical-stroke.txt";
214 if (radical_data.
length() == 0) {
215 tprintf(
"Error reading radical code table %s\n", radical_filename.c_str());
218 if (!
WriteRecoder(unicharset, pass_through_recoder, output_dir, lang, writer,
219 &radical_data, &traineddata)) {
220 tprintf(
"Error writing recoder!!\n");
223 if (!WriteDawgs(words, puncs, numbers, lang_is_rtl, unicharset,
225 tprintf(
"Error during conversion of wordlists to DAWGs!!\n");
232 traineddata.
Serialize(&traineddata_data);
233 if (!
WriteFile(output_dir, lang,
".traineddata", traineddata_data, writer)) {
234 tprintf(
"Error writing output traineddata file!!\n");
STRING GetEncodingAsString(const UNICHARSET &unicharset) const
int CombineLangModel(const UNICHARSET &unicharset, const string &script_dir, const string &version_str, const string &output_dir, const string &lang, bool pass_through_recoder, const GenericVector< STRING > &words, const GenericVector< STRING > &puncs, const GenericVector< STRING > &numbers, bool lang_is_rtl, FileReader reader, FileWriter writer)
bool add_word_list(const GenericVector< STRING > &words, const UNICHARSET &unicharset, Trie::RTLReversePolicy reverse_policy)
void OpenWrite(GenericVector< char > *data)
void add_str_int(const char *str, int number)
bool WriteUnicharset(const UNICHARSET &unicharset, const string &output_dir, const string &lang, FileWriter writer, TessdataManager *traineddata)
bool WriteRecoder(const UNICHARSET &unicharset, bool pass_through, const string &output_dir, const string &lang, FileWriter writer, STRING *radical_table_data, TessdataManager *traineddata)
bool save_to_file(const char *const filename) const
bool ComputeEncoding(const UNICHARSET &unicharset, int null_id, STRING *radical_stroke_table)
bool(* FileWriter)(const GenericVector< char > &data, const STRING &filename)
string VersionString() const
bool has_special_codes() const
void Serialize(GenericVector< char > *data) const
bool SaveDataToFile(const GenericVector< char > &data, const STRING &filename)
bool Serialize(TFile *fp) const
void SetupPassThrough(const UNICHARSET &unicharset)
bool WriteFile(const string &output_dir, const string &lang, const string &suffix, const GenericVector< char > &data, FileWriter writer)
STRING ReadFile(const string &filename, FileReader reader)
void OverwriteEntry(TessdataType type, const char *data, int size)
bool LoadDataFromFile(const char *filename, GenericVector< char > *data)
bool(* FileReader)(const STRING &filename, GenericVector< char > *data)
void init_to_size(int size, T t)
void SetVersionString(const string &v_str)