67 int main(
int argc,
char **argv) {
71 printf(
"Combining tessdata files\n");
73 char*
last = &argv[1][strlen(argv[1])-1];
77 output_file += kTrainedDataSuffix;
79 printf(
"Error combining tessdata files into %s\n",
82 printf(
"Output %s created successfully.\n", output_file.
string());
84 }
else if (argc >= 4 && (strcmp(argv[1],
"-e") == 0 ||
85 strcmp(argv[1],
"-u") == 0)) {
87 if (!tm.
Init(argv[2])) {
88 tprintf(
"Failed to read %s\n", argv[2]);
91 printf(
"Extracting tessdata components from %s\n", argv[2]);
92 if (strcmp(argv[1],
"-e") == 0) {
93 for (i = 3; i < argc; ++i) {
95 printf(
"Wrote %s\n", argv[i]);
97 printf(
"Not extracting %s, since this component" 98 " is not present\n", argv[i]);
104 char*
last = &argv[3][strlen(argv[3])-1];
107 filename += tesseract::kTessdataFileSuffixes[i];
109 printf(
"Wrote %s\n", filename.
string());
113 }
else if (argc >= 4 && strcmp(argv[1],
"-o") == 0) {
115 const char *new_traineddata_filename = argv[2];
116 STRING traineddata_filename = new_traineddata_filename;
117 traineddata_filename +=
".__tmp__";
118 if (rename(new_traineddata_filename, traineddata_filename.
string()) != 0) {
119 tprintf(
"Failed to create a temporary file %s\n",
120 traineddata_filename.
string());
129 }
else if (argc == 3 && strcmp(argv[1],
"-c") == 0) {
130 if (!tm.
Init(argv[2])) {
131 tprintf(
"Failed to read %s\n", argv[2]);
136 tprintf(
"No LSTM Component found in %s!\n", argv[2]);
141 tprintf(
"Failed to deserialize LSTM in %s!\n", argv[2]);
150 if (!tm.
SaveFile(argv[2],
nullptr)) {
151 tprintf(
"Failed to write modified traineddata:%s!\n", argv[2]);
154 }
else if (argc == 3 && strcmp(argv[1],
"-d") == 0) {
158 printf(
"Usage for combining tessdata components:\n" 159 " %s language_data_path_prefix\n" 160 " (e.g. %s tessdata/eng.)\n\n", argv[0], argv[0]);
161 printf(
"Usage for extracting tessdata components:\n" 162 " %s -e traineddata_file [output_component_file...]\n" 163 " (e.g. %s -e eng.traineddata eng.unicharset)\n\n",
165 printf(
"Usage for overwriting tessdata components:\n" 166 " %s -o traineddata_file [input_component_file...]\n" 167 " (e.g. %s -o eng.traineddata eng.unicharset)\n\n",
169 printf(
"Usage for unpacking all tessdata components:\n" 170 " %s -u traineddata_file output_path_prefix\n" 171 " (e.g. %s -u eng.traineddata tmp/eng.)\n", argv[0], argv[0]);
173 "Usage for listing directory of components:\n" 174 " %s -d traineddata_file\n",
177 "Usage for compacting LSTM component to int:\n" 178 " %s -c traineddata_file\n",
bool SaveFile(const STRING &filename, FileWriter writer) const
void OpenWrite(GenericVector< char > *data)
bool ExtractToFile(const char *filename)
bool Serialize(const TessdataManager *mgr, TFile *fp) const
bool GetComponent(TessdataType type, TFile *fp)
bool CombineDataFiles(const char *language_data_path_prefix, const char *output_filename)
const char * string() const
bool DeSerialize(const TessdataManager *mgr, TFile *fp)
bool OverwriteComponents(const char *new_traineddata_filename, char **component_filenames, int num_new_components)
void OverwriteEntry(TessdataType type, const char *data, int size)
bool Init(const char *data_file_name)
int main(int argc, char **argv)