117 {
118 tesseract::CheckSharedLibraryVersion();
119
122 if (argc > 1 && (!strcmp(argv[1], "-v") || !strcmp(argv[1], "--version"))) {
124 return EXIT_SUCCESS;
125 } else if (argc == 2) {
126 printf("Combining tessdata files\n");
127 std::string lang = argv[1];
128 char *
last = &argv[1][strlen(argv[1]) - 1];
130 lang += '.';
131 }
132 std::string output_file = lang;
133 output_file += kTrainedDataSuffix;
135 printf("Error combining tessdata files into %s\n", output_file.c_str());
136 } else {
137 printf("Output %s created successfully.\n", output_file.c_str());
138 }
139 } else if (argc >= 4 &&
140 (strcmp(argv[1], "-e") == 0 || strcmp(argv[1], "-u") == 0)) {
141
142 if (!tm.
Init(argv[2])) {
143 tprintf(
"Failed to read %s\n", argv[2]);
144 return EXIT_FAILURE;
145 }
146 printf("Extracting tessdata components from %s\n", argv[2]);
147 if (strcmp(argv[1], "-e") == 0) {
148 for (
i = 3;
i < argc; ++
i) {
149 errno = 0;
151 printf(
"Wrote %s\n", argv[
i]);
152 } else if (errno == 0) {
153 printf(
154 "Not extracting %s, since this component"
155 " is not present\n",
157 return EXIT_FAILURE;
158 } else {
159 printf(
"Error, could not extract %s: %s\n", argv[
i], strerror(errno));
160 return EXIT_FAILURE;
161 }
162 }
163 } else {
165 std::string filename = argv[3];
166 char *
last = &argv[3][strlen(argv[3]) - 1];
168 filename += '.';
169 }
170 filename += tesseract::kTessdataFileSuffixes[
i];
171 errno = 0;
173 printf("Wrote %s\n", filename.c_str());
174 } else if (errno != 0) {
175 printf("Error, could not extract %s: %s\n", filename.c_str(),
176 strerror(errno));
177 return EXIT_FAILURE;
178 }
179 }
180 }
181 } else if (argc >= 4 && strcmp(argv[1], "-o") == 0) {
182
183 const char *new_traineddata_filename = argv[2];
184 std::string traineddata_filename = new_traineddata_filename;
185 traineddata_filename += ".__tmp__";
186 if (rename(new_traineddata_filename, traineddata_filename.c_str()) != 0) {
187 tprintf(
"Failed to create a temporary file %s\n",
188 traineddata_filename.c_str());
189 return EXIT_FAILURE;
190 }
191
192
193 tm.
Init(traineddata_filename.c_str());
194
195
197 } else if (argc == 3 && strcmp(argv[1], "-c") == 0) {
198 if (!tm.
Init(argv[2])) {
199 tprintf(
"Failed to read %s\n", argv[2]);
200 return EXIT_FAILURE;
201 }
204 tprintf(
"No LSTM Component found in %s!\n", argv[2]);
205 return EXIT_FAILURE;
206 }
209 tprintf(
"Failed to deserialize LSTM in %s!\n", argv[2]);
210 return EXIT_FAILURE;
211 }
213 std::vector<char> lstm_data;
217 lstm_data.size());
218 if (!tm.
SaveFile(argv[2],
nullptr)) {
219 tprintf(
"Failed to write modified traineddata:%s!\n", argv[2]);
220 return EXIT_FAILURE;
221 }
222 } else if (argc == 3 && strcmp(argv[1], "-d") == 0) {
223 return list_components(tm, argv[2]);
224 } else if (argc == 3 && strcmp(argv[1], "-l") == 0) {
225 return list_network(tm, argv[2]);
226 } else if (argc == 3 && strcmp(argv[1], "-dl") == 0) {
227 int result = list_components(tm, argv[2]);
228 if (result == EXIT_SUCCESS) {
229 result = list_network(tm, nullptr);
230 }
231 return result;
232 } else if (argc == 3 && strcmp(argv[1], "-ld") == 0) {
233 int result = list_network(tm, argv[2]);
234 if (result == EXIT_SUCCESS) {
235 result = list_components(tm, nullptr);
236 }
237 return result;
238 } else {
239 printf(
240 "Usage for combining tessdata components:\n"
241 " %s language_data_path_prefix\n"
242 " (e.g. %s tessdata/eng.)\n\n",
243 argv[0], argv[0]);
244 printf(
245 "Usage for extracting tessdata components:\n"
246 " %s -e traineddata_file [output_component_file...]\n"
247 " (e.g. %s -e eng.traineddata eng.unicharset)\n\n",
248 argv[0], argv[0]);
249 printf(
250 "Usage for overwriting tessdata components:\n"
251 " %s -o traineddata_file [input_component_file...]\n"
252 " (e.g. %s -o eng.traineddata eng.unicharset)\n\n",
253 argv[0], argv[0]);
254 printf(
255 "Usage for unpacking all tessdata components:\n"
256 " %s -u traineddata_file output_path_prefix\n"
257 " (e.g. %s -u eng.traineddata tmp/eng.)\n\n",
258 argv[0], argv[0]);
259 printf(
260 "Usage for listing the network information\n"
261 " %s -l traineddata_file\n"
262 " (e.g. %s -l eng.traineddata)\n\n",
263 argv[0], argv[0]);
264 printf(
265 "Usage for listing directory of components:\n"
266 " %s -d traineddata_file\n\n",
267 argv[0]);
268 printf(
269 "Usage for compacting LSTM component to int:\n"
270 " %s -c traineddata_file\n",
271 argv[0]);
272 return EXIT_FAILURE;
273 }
275 return EXIT_SUCCESS;
276}
void tprintf(const char *format,...)
static const char * Version()
void OpenWrite(std::vector< char > *data)
void OverwriteEntry(TessdataType type, const char *data, int size)
bool CombineDataFiles(const char *language_data_path_prefix, const char *output_filename)
bool GetComponent(TessdataType type, TFile *fp)
bool SaveFile(const char *filename, FileWriter writer) const
bool OverwriteComponents(const char *new_traineddata_filename, char **component_filenames, int num_new_components)
bool ExtractToFile(const char *filename)
bool Init(const char *data_file_name)
bool Serialize(const TessdataManager *mgr, TFile *fp) const
bool DeSerialize(const TessdataManager *mgr, TFile *fp)