24 "Unicharset to complete and use in encoding");
26 "Directory name for input script unicharsets");
28 "File listing words to use for the system dictionary");
35 "True if lang being processed is written right-to-left");
37 "If true, the recoder is a simple pass-through of the" 38 " unicharset. Otherwise, potentially a compression of it");
40 int main(
int argc,
char** argv) {
44 if (FLAGS_input_unicharset.empty() || FLAGS_script_dir.empty() ||
45 FLAGS_output_dir.empty() || FLAGS_lang.empty()) {
46 tprintf(
"Usage: %s --input_unicharset filename --script_dir dirname\n",
48 tprintf(
" --output_dir rootdir --lang lang [--lang_is_rtl]\n");
49 tprintf(
" [--words file --puncs file --numbers file]\n");
50 tprintf(
"Sets properties on the input unicharset file, and writes:\n");
51 tprintf(
"rootdir/lang/lang.charset_size=ddd.txt\n");
52 tprintf(
"rootdir/lang/lang.traineddata\n");
53 tprintf(
"rootdir/lang/lang.unicharset\n");
54 tprintf(
"If the 3 word lists are provided, the dawgs are also added to");
55 tprintf(
" the traineddata file.\n");
56 tprintf(
"The output unicharset and charset_size files are just for human");
67 if (!unicharset.
load_from_file(FLAGS_input_unicharset.c_str(),
false)) {
68 tprintf(
"Failed to load unicharset from %s\n",
69 FLAGS_input_unicharset.c_str());
72 tprintf(
"Loaded unicharset of size %d from file %s\n", unicharset.
size(),
73 FLAGS_input_unicharset.c_str());
76 tprintf(
"Setting unichar properties\n");
79 tprintf(
"Setting script properties\n");
83 unicharset, FLAGS_script_dir.c_str(), FLAGS_version_str.c_str(),
84 FLAGS_output_dir.c_str(), FLAGS_lang.c_str(), FLAGS_pass_through_recoder,
85 words, puncs, numbers, FLAGS_lang_is_rtl,
nullptr,
int CombineLangModel(const UNICHARSET &unicharset, const string &script_dir, const string &version_str, const string &output_dir, const string &lang, bool pass_through_recoder, const GenericVector< STRING > &words, const GenericVector< STRING > &puncs, const GenericVector< STRING > &numbers, bool lang_is_rtl, FileReader reader, FileWriter writer)
void SetScriptProperties(const string &script_dir, UNICHARSET *unicharset)
void SetupBasicProperties(bool report_errors, bool decompose, UNICHARSET *unicharset)
STRING_PARAM_FLAG(input_unicharset, "", "Unicharset to complete and use in encoding")
BOOL_PARAM_FLAG(lang_is_rtl, false, "True if lang being processed is written right-to-left")
void ParseCommandLineFlags(const char *usage, int *argc, char ***argv, const bool remove_flags)
bool load_from_file(const char *const filename, bool skip_fragments)
STRING ReadFile(const string &filename, FileReader reader)
int main(int argc, char **argv)