31 if (filename !=
nullptr && !tm.
Init(filename)) {
32 tprintf(
"Failed to read %s\n", filename);
40 if (filename !=
nullptr && !tm.
Init(filename)) {
41 tprintf(
"Failed to read %s\n", filename);
48 tprintf(
"Failed to deserialize LSTM in %s!\n", filename);
51 std::cout <<
"LSTM: network=" << recognizer.
GetNetwork()
52 <<
", int_mode=" << recognizer.
IsIntMode()
56 <<
", null_char=" << recognizer.
null_char()
59 <<
", adam_beta=" << recognizer.
GetAdamBeta() <<
'\n';
61 std::cout <<
"Layer Learning Rates: ";
63 for (
const auto &
id : layers) {
64 auto layer = recognizer.
GetLayer(
id);
65 std::cout <<
id <<
"(" << layer->
name() <<
")"
67 << (layers[layers.size() - 1] !=
id ?
", " :
"");
117int main(
int argc,
char **argv) {
118 tesseract::CheckSharedLibraryVersion();
122 if (argc > 1 && (!strcmp(argv[1],
"-v") || !strcmp(argv[1],
"--version"))) {
125 }
else if (argc == 2) {
126 printf(
"Combining tessdata files\n");
127 std::string lang = argv[1];
128 char *
last = &argv[1][strlen(argv[1]) - 1];
132 std::string output_file = lang;
133 output_file += kTrainedDataSuffix;
135 printf(
"Error combining tessdata files into %s\n", output_file.c_str());
137 printf(
"Output %s created successfully.\n", output_file.c_str());
139 }
else if (argc >= 4 &&
140 (strcmp(argv[1],
"-e") == 0 || strcmp(argv[1],
"-u") == 0)) {
142 if (!tm.
Init(argv[2])) {
143 tprintf(
"Failed to read %s\n", argv[2]);
146 printf(
"Extracting tessdata components from %s\n", argv[2]);
147 if (strcmp(argv[1],
"-e") == 0) {
148 for (
i = 3;
i < argc; ++
i) {
151 printf(
"Wrote %s\n", argv[
i]);
152 }
else if (errno == 0) {
154 "Not extracting %s, since this component"
159 printf(
"Error, could not extract %s: %s\n", argv[
i], strerror(errno));
165 std::string filename = argv[3];
166 char *
last = &argv[3][strlen(argv[3]) - 1];
170 filename += tesseract::kTessdataFileSuffixes[
i];
173 printf(
"Wrote %s\n", filename.c_str());
174 }
else if (errno != 0) {
175 printf(
"Error, could not extract %s: %s\n", filename.c_str(),
181 }
else if (argc >= 4 && strcmp(argv[1],
"-o") == 0) {
183 const char *new_traineddata_filename = argv[2];
184 std::string traineddata_filename = new_traineddata_filename;
185 traineddata_filename +=
".__tmp__";
186 if (rename(new_traineddata_filename, traineddata_filename.c_str()) != 0) {
187 tprintf(
"Failed to create a temporary file %s\n",
188 traineddata_filename.c_str());
193 tm.
Init(traineddata_filename.c_str());
197 }
else if (argc == 3 && strcmp(argv[1],
"-c") == 0) {
198 if (!tm.
Init(argv[2])) {
199 tprintf(
"Failed to read %s\n", argv[2]);
204 tprintf(
"No LSTM Component found in %s!\n", argv[2]);
209 tprintf(
"Failed to deserialize LSTM in %s!\n", argv[2]);
213 std::vector<char> lstm_data;
218 if (!tm.
SaveFile(argv[2],
nullptr)) {
219 tprintf(
"Failed to write modified traineddata:%s!\n", argv[2]);
222 }
else if (argc == 3 && strcmp(argv[1],
"-d") == 0) {
223 return list_components(tm, argv[2]);
224 }
else if (argc == 3 && strcmp(argv[1],
"-l") == 0) {
225 return list_network(tm, argv[2]);
226 }
else if (argc == 3 && strcmp(argv[1],
"-dl") == 0) {
227 int result = list_components(tm, argv[2]);
228 if (result == EXIT_SUCCESS) {
229 result = list_network(tm,
nullptr);
232 }
else if (argc == 3 && strcmp(argv[1],
"-ld") == 0) {
233 int result = list_network(tm, argv[2]);
234 if (result == EXIT_SUCCESS) {
235 result = list_components(tm,
nullptr);
240 "Usage for combining tessdata components:\n"
241 " %s language_data_path_prefix\n"
242 " (e.g. %s tessdata/eng.)\n\n",
245 "Usage for extracting tessdata components:\n"
246 " %s -e traineddata_file [output_component_file...]\n"
247 " (e.g. %s -e eng.traineddata eng.unicharset)\n\n",
250 "Usage for overwriting tessdata components:\n"
251 " %s -o traineddata_file [input_component_file...]\n"
252 " (e.g. %s -o eng.traineddata eng.unicharset)\n\n",
255 "Usage for unpacking all tessdata components:\n"
256 " %s -u traineddata_file output_path_prefix\n"
257 " (e.g. %s -u eng.traineddata tmp/eng.)\n\n",
260 "Usage for listing the network information\n"
261 " %s -l traineddata_file\n"
262 " (e.g. %s -l eng.traineddata)\n\n",
265 "Usage for listing directory of components:\n"
266 " %s -d traineddata_file\n\n",
269 "Usage for compacting LSTM component to int:\n"
270 " %s -c traineddata_file\n",
int main(int argc, char **argv)
void tprintf(const char *format,...)
static const char * Version()
void OpenWrite(std::vector< char > *data)
void OverwriteEntry(TessdataType type, const char *data, int size)
bool CombineDataFiles(const char *language_data_path_prefix, const char *output_filename)
bool GetComponent(TessdataType type, TFile *fp)
bool SaveFile(const char *filename, FileWriter writer) const
bool OverwriteComponents(const char *new_traineddata_filename, char **component_filenames, int num_new_components)
bool ExtractToFile(const char *filename)
bool Init(const char *data_file_name)
float GetAdamBeta() const
float GetMomentum() const
float learning_rate() const
int training_iteration() const
int sample_iteration() const
std::vector< std::string > EnumerateLayers() const
float GetLayerLearningRate(const std::string &id) const
const char * GetNetwork() const
Network * GetLayer(const std::string &id) const
bool Serialize(const TessdataManager *mgr, TFile *fp) const
bool DeSerialize(const TessdataManager *mgr, TFile *fp)
const std::string & name() const