tesseract  4.00.00dev
tesseractmain.cpp
Go to the documentation of this file.
1 /**********************************************************************
2  * File: tesseractmain.cpp (Formerly tessedit.c)
3  * Description: Main program for merge of tess and editor.
4  * Author: Ray Smith
5  * Created: Tue Jan 07 15:21:46 GMT 1992
6  *
7  * (C) Copyright 1992, Hewlett-Packard Ltd.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18  **********************************************************************/
19 
20 // Include automatically generated configuration file if running autoconf
21 #ifdef HAVE_CONFIG_H
22 #include "config_auto.h"
23 #endif
24 
25 #include <iostream>
26 
27 #include "allheaders.h"
28 #include "baseapi.h"
29 #include "basedir.h"
30 #include "dict.h"
31 #include "openclwrapper.h"
32 #include "osdetect.h"
33 #include "renderer.h"
34 #include "simddetect.h"
35 #include "strngs.h"
36 #include "tprintf.h"
37 
38 #if defined(HAVE_TIFFIO_H) && defined(_WIN32)
39 
40 #include <tiffio.h>
41 
42 static void Win32WarningHandler(const char* module, const char* fmt,
43  va_list ap) {
44  if (module != NULL) {
45  fprintf(stderr, "%s: ", module);
46  }
47  fprintf(stderr, "Warning, ");
48  vfprintf(stderr, fmt, ap);
49  fprintf(stderr, ".\n");
50 }
51 
52 #endif /* HAVE_TIFFIO_H && _WIN32 */
53 
54 static void PrintVersionInfo() {
55  char* versionStrP;
56 
57  printf("tesseract %s\n", tesseract::TessBaseAPI::Version());
58 
59  versionStrP = getLeptonicaVersion();
60  printf(" %s\n", versionStrP);
61  lept_free(versionStrP);
62 
63  versionStrP = getImagelibVersions();
64  printf(" %s\n", versionStrP);
65  lept_free(versionStrP);
66 
67 #ifdef USE_OPENCL
68  cl_platform_id platform[4];
69  cl_uint num_platforms;
70 
71  printf(" OpenCL info:\n");
72  if (clGetPlatformIDs(4, platform, &num_platforms) == CL_SUCCESS) {
73  printf(" Found %u platform(s).\n", num_platforms);
74  for (unsigned n = 0; n < num_platforms; n++) {
75  char info[256];
76  if (clGetPlatformInfo(platform[n], CL_PLATFORM_NAME, 256, info, 0) ==
77  CL_SUCCESS) {
78  printf(" Platform %u name: %s.\n", n + 1, info);
79  }
80  if (clGetPlatformInfo(platform[n], CL_PLATFORM_VERSION, 256, info, 0) ==
81  CL_SUCCESS) {
82  printf(" Version: %s.\n", info);
83  }
84  cl_device_id devices[2];
85  cl_uint num_devices;
86  if (clGetDeviceIDs(platform[n], CL_DEVICE_TYPE_ALL, 2, devices,
87  &num_devices) == CL_SUCCESS) {
88  printf(" Found %u device(s).\n", num_devices);
89  for (unsigned i = 0; i < num_devices; ++i) {
90  if (clGetDeviceInfo(devices[i], CL_DEVICE_NAME, 256, info, 0) ==
91  CL_SUCCESS) {
92  printf(" Device %u name: %s.\n", i + 1, info);
93  }
94  }
95  }
96  }
97  }
98 #endif
99  if (SIMDDetect::IsAVX512BWAvailable()) printf(" Found AVX512BW\n");
100  if (SIMDDetect::IsAVX512FAvailable()) printf(" Found AVX512F\n");
101  if (SIMDDetect::IsAVX2Available()) printf(" Found AVX2\n");
102  if (SIMDDetect::IsAVXAvailable()) printf(" Found AVX\n");
103  if (SIMDDetect::IsSSEAvailable()) printf(" Found SSE\n");
104 }
105 
106 static void PrintHelpForPSM() {
107  const char* msg =
108  "Page segmentation modes:\n"
109  " 0 Orientation and script detection (OSD) only.\n"
110  " 1 Automatic page segmentation with OSD.\n"
111  " 2 Automatic page segmentation, but no OSD, or OCR.\n"
112  " 3 Fully automatic page segmentation, but no OSD. (Default)\n"
113  " 4 Assume a single column of text of variable sizes.\n"
114  " 5 Assume a single uniform block of vertically aligned text.\n"
115  " 6 Assume a single uniform block of text.\n"
116  " 7 Treat the image as a single text line.\n"
117  " 8 Treat the image as a single word.\n"
118  " 9 Treat the image as a single word in a circle.\n"
119  " 10 Treat the image as a single character.\n"
120  " 11 Sparse text. Find as much text as possible in no"
121  " particular order.\n"
122  " 12 Sparse text with OSD.\n"
123  " 13 Raw line. Treat the image as a single text line,\n"
124  " bypassing hacks that are Tesseract-specific.\n";
125 
126  printf("%s", msg);
127 }
128 
129 static void PrintHelpForOEM() {
130  const char* msg =
131  "OCR Engine modes:\n"
132  " 0 Legacy engine only.\n"
133  " 1 Neural nets LSTM engine only.\n"
134  " 2 Legacy + LSTM engines.\n"
135  " 3 Default, based on what is available.\n";
136 
137  printf("%s", msg);
138 }
139 
140 static void PrintHelpExtra(const char* program) {
141  printf(
142  "Usage:\n"
143  " %s --help | --help-extra | --help-psm | --help-oem | --version\n"
144  " %s --list-langs [--tessdata-dir PATH]\n"
145  " %s --print-parameters [options...] [configfile...]\n"
146  " %s imagename|imagelist|stdin outputbase|stdout [options...] [configfile...]\n"
147  "\n"
148  "OCR options:\n"
149  " --tessdata-dir PATH Specify the location of tessdata path.\n"
150  " --user-words PATH Specify the location of user words file.\n"
151  " --user-patterns PATH Specify the location of user patterns file.\n"
152  " -l LANG[+LANG] Specify language(s) used for OCR.\n"
153  " -c VAR=VALUE Set value for config variables.\n"
154  " Multiple -c arguments are allowed.\n"
155  " --psm NUM Specify page segmentation mode.\n"
156  " --oem NUM Specify OCR Engine mode.\n"
157  "NOTE: These options must occur before any configfile.\n"
158  "\n",
159  program, program, program, program
160  );
161 
162  PrintHelpForPSM();
163  printf("\n");
164  PrintHelpForOEM();
165 
166  printf(
167  "\n"
168  "Single options:\n"
169  " -h, --help Show minimal help message.\n"
170  " --help-extra Show extra help for advanced users.\n"
171  " --help-psm Show page segmentation modes.\n"
172  " --help-oem Show OCR Engine modes.\n"
173  " -v, --version Show version information.\n"
174  " --list-langs List available languages for tesseract engine.\n"
175  " --print-parameters Print tesseract parameters.\n"
176  );
177 }
178 
179 static void PrintHelpMessage(const char* program) {
180  printf(
181  "Usage:\n"
182  " %s --help | --help-extra | --version\n"
183  " %s --list-langs\n"
184  " %s imagename outputbase [options...] [configfile...]\n"
185  "\n"
186  "OCR options:\n"
187  " -l LANG[+LANG] Specify language(s) used for OCR.\n"
188  "NOTE: These options must occur before any configfile.\n"
189  "\n"
190  "Single options:\n"
191  " --help Show this help message.\n"
192  " --help-extra Show extra help for advanced users.\n"
193  " --version Show version information.\n"
194  " --list-langs List available languages for tesseract engine.\n",
195  program, program, program
196  );
197 }
198 
199 static void SetVariablesFromCLArgs(tesseract::TessBaseAPI* api, int argc,
200  char** argv) {
201  char opt1[256], opt2[255];
202  for (int i = 0; i < argc; i++) {
203  if (strcmp(argv[i], "-c") == 0 && i + 1 < argc) {
204  strncpy(opt1, argv[i + 1], 255);
205  opt1[255] = '\0';
206  char* p = strchr(opt1, '=');
207  if (!p) {
208  fprintf(stderr, "Missing = in configvar assignment\n");
209  exit(1);
210  }
211  *p = 0;
212  strncpy(opt2, strchr(argv[i + 1], '=') + 1, 255);
213  opt2[254] = 0;
214  ++i;
215 
216  if (!api->SetVariable(opt1, opt2)) {
217  fprintf(stderr, "Could not set option: %s=%s\n", opt1, opt2);
218  }
219  }
220  }
221 }
222 
223 static void PrintLangsList(tesseract::TessBaseAPI* api) {
224  GenericVector<STRING> languages;
225  api->GetAvailableLanguagesAsVector(&languages);
226  printf("List of available languages (%d):\n", languages.size());
227  for (int index = 0; index < languages.size(); ++index) {
228  STRING& string = languages[index];
229  printf("%s\n", string.string());
230  }
231  api->End();
232 }
233 
234 static void PrintBanner() {
235  tprintf("Tesseract Open Source OCR Engine v%s with Leptonica\n",
237 }
238 
253 static void FixPageSegMode(tesseract::TessBaseAPI* api,
254  tesseract::PageSegMode pagesegmode) {
256  api->SetPageSegMode(pagesegmode);
257 }
258 
259 static void checkArgValues(int arg, const char* mode, int count) {
260  if (arg >= count || arg < 0) {
261  printf("Invalid %s value, please enter a number between 0-%d\n", mode, count - 1);
262  exit(0);
263  }
264 }
265 
266 // NOTE: arg_i is used here to avoid ugly *i so many times in this function
267 static void ParseArgs(const int argc, char** argv, const char** lang,
268  const char** image, const char** outputbase,
269  const char** datapath,
270  bool* list_langs, bool* print_parameters,
271  GenericVector<STRING>* vars_vec,
272  GenericVector<STRING>* vars_values, int* arg_i,
273  tesseract::PageSegMode* pagesegmode,
274  tesseract::OcrEngineMode* enginemode) {
275  if (argc == 1) {
276  PrintHelpMessage(argv[0]);
277  exit(0);
278  }
279 
280  if (argc == 2) {
281  if ((strcmp(argv[1], "-h") == 0) || (strcmp(argv[1], "--help") == 0)) {
282  PrintHelpMessage(argv[0]);
283  exit(0);
284  }
285  if (strcmp(argv[1], "--help-extra") == 0) {
286  PrintHelpExtra(argv[0]);
287  exit(0);
288  }
289  if ((strcmp(argv[1], "--help-psm") == 0)) {
290  PrintHelpForPSM();
291  exit(0);
292  }
293  if ((strcmp(argv[1], "--help-oem") == 0)) {
294  PrintHelpForOEM();
295  exit(0);
296  }
297  if ((strcmp(argv[1], "-v") == 0) || (strcmp(argv[1], "--version") == 0)) {
298  PrintVersionInfo();
299  exit(0);
300  }
301  }
302 
303  bool noocr = false;
304  int i = 1;
305  while (i < argc && (*outputbase == NULL || argv[i][0] == '-')) {
306  if (strcmp(argv[i], "-l") == 0 && i + 1 < argc) {
307  *lang = argv[i + 1];
308  ++i;
309  } else if (strcmp(argv[i], "--tessdata-dir") == 0 && i + 1 < argc) {
310  *datapath = argv[i + 1];
311  ++i;
312  } else if (strcmp(argv[i], "--user-words") == 0 && i + 1 < argc) {
313  vars_vec->push_back("user_words_file");
314  vars_values->push_back(argv[i + 1]);
315  ++i;
316  } else if (strcmp(argv[i], "--user-patterns") == 0 && i + 1 < argc) {
317  vars_vec->push_back("user_patterns_file");
318  vars_values->push_back(argv[i + 1]);
319  ++i;
320  } else if (strcmp(argv[i], "--list-langs") == 0) {
321  noocr = true;
322  *list_langs = true;
323  } else if (strcmp(argv[i], "-psm") == 0 && i + 1 < argc) {
324  // The parameter -psm is deprecated and was replaced by --psm.
325  // It is still supported for compatibility reasons.
326  checkArgValues(atoi(argv[i+1]), "PSM", tesseract::PSM_COUNT);
327  *pagesegmode = static_cast<tesseract::PageSegMode>(atoi(argv[i + 1]));
328  ++i;
329  } else if (strcmp(argv[i], "--psm") == 0 && i + 1 < argc) {
330  checkArgValues(atoi(argv[i+1]), "PSM", tesseract::PSM_COUNT);
331  *pagesegmode = static_cast<tesseract::PageSegMode>(atoi(argv[i + 1]));
332  ++i;
333  } else if (strcmp(argv[i], "--oem") == 0 && i + 1 < argc) {
334  int oem = atoi(argv[i + 1]);
335  checkArgValues(oem, "OEM", tesseract::OEM_COUNT);
336  *enginemode = static_cast<tesseract::OcrEngineMode>(oem);
337  ++i;
338  } else if (strcmp(argv[i], "--print-parameters") == 0) {
339  noocr = true;
340  *print_parameters = true;
341  } else if (strcmp(argv[i], "-c") == 0 && i + 1 < argc) {
342  // handled properly after api init
343  ++i;
344  } else if (*image == NULL) {
345  *image = argv[i];
346  } else if (*outputbase == NULL) {
347  *outputbase = argv[i];
348  }
349  ++i;
350  }
351 
352  *arg_i = i;
353 
354  if (argc == 2 && strcmp(argv[1], "--list-langs") == 0) {
355  *list_langs = true;
356  noocr = true;
357  }
358 
359  if (*outputbase == NULL && noocr == false) {
360  PrintHelpMessage(argv[0]);
361  exit(1);
362  }
363 }
364 
365 static void PreloadRenderers(
368  tesseract::PageSegMode pagesegmode, const char* outputbase) {
369  if (pagesegmode == tesseract::PSM_OSD_ONLY) {
370  renderers->push_back(new tesseract::TessOsdRenderer(outputbase));
371  } else {
372  bool b;
373  api->GetBoolVariable("tessedit_create_hocr", &b);
374  if (b) {
375  bool font_info;
376  api->GetBoolVariable("hocr_font_info", &font_info);
377  renderers->push_back(
378  new tesseract::TessHOcrRenderer(outputbase, font_info));
379  }
380 
381  api->GetBoolVariable("tessedit_create_tsv", &b);
382  if (b) {
383  bool font_info;
384  api->GetBoolVariable("hocr_font_info", &font_info);
385  renderers->push_back(
386  new tesseract::TessTsvRenderer(outputbase, font_info));
387  }
388 
389  api->GetBoolVariable("tessedit_create_pdf", &b);
390  if (b) {
391  bool textonly;
392  api->GetBoolVariable("textonly_pdf", &textonly);
393  renderers->push_back(new tesseract::TessPDFRenderer(
394  outputbase, api->GetDatapath(), textonly));
395  }
396 
397  api->GetBoolVariable("tessedit_write_unlv", &b);
398  if (b) {
399  renderers->push_back(new tesseract::TessUnlvRenderer(outputbase));
400  }
401 
402  api->GetBoolVariable("tessedit_create_boxfile", &b);
403  if (b) {
404  renderers->push_back(new tesseract::TessBoxTextRenderer(outputbase));
405  }
406 
407  api->GetBoolVariable("tessedit_create_txt", &b);
408  if (b || renderers->empty()) {
409  renderers->push_back(new tesseract::TessTextRenderer(outputbase));
410  }
411  }
412 
413  if (!renderers->empty()) {
414  // Since the PointerVector auto-deletes, null-out the renderers that are
415  // added to the root, and leave the root in the vector.
416  for (int r = 1; r < renderers->size(); ++r) {
417  (*renderers)[0]->insert((*renderers)[r]);
418  (*renderers)[r] = NULL;
419  }
420  }
421 }
422 
423 /**********************************************************************
424  * main()
425  *
426  **********************************************************************/
427 
428 int main(int argc, char** argv) {
429  const char* lang = "eng";
430  const char* image = NULL;
431  const char* outputbase = NULL;
432  const char* datapath = NULL;
433  bool list_langs = false;
434  bool print_parameters = false;
435  int arg_i = 1;
438  /* main() calls functions like ParseArgs which call exit().
439  * This results in memory leaks if vars_vec and vars_values are
440  * declared as auto variables (destructor is not called then). */
441  static GenericVector<STRING> vars_vec;
442  static GenericVector<STRING> vars_values;
443 
444 #if !defined(DEBUG)
445  // Disable debugging and informational messages from Leptonica.
446  setMsgSeverity(L_SEVERITY_ERROR);
447 #endif
448 
449 #if defined(HAVE_TIFFIO_H) && defined(_WIN32)
450  /* Show libtiff warnings on console (not in GUI). */
451  TIFFSetWarningHandler(Win32WarningHandler);
452 #endif /* HAVE_TIFFIO_H && _WIN32 */
453 
454  ParseArgs(argc, argv, &lang, &image, &outputbase, &datapath, &list_langs,
455  &print_parameters, &vars_vec, &vars_values, &arg_i, &pagesegmode,
456  &enginemode);
457 
458  bool banner = false;
459  if (outputbase != NULL && strcmp(outputbase, "-") &&
460  strcmp(outputbase, "stdout")) {
461  banner = true;
462  }
463 
464  PERF_COUNT_START("Tesseract:main")
465 
466  // Call GlobalDawgCache here to create the global DawgCache object before
467  // the TessBaseAPI object. This fixes the order of destructor calls:
468  // first TessBaseAPI must be destructed, DawgCache must be the last object.
470 
471  // Avoid memory leak caused by auto variable when return is called.
473 
474  api.SetOutputName(outputbase);
475 
476  int init_failed = api.Init(datapath, lang, enginemode, &(argv[arg_i]),
477  argc - arg_i, &vars_vec, &vars_values, false);
478 
479  SetVariablesFromCLArgs(&api, argc, argv);
480 
481  if (list_langs) {
482  PrintLangsList(&api);
483  return EXIT_SUCCESS;
484  }
485 
486  if (init_failed) {
487  fprintf(stderr, "Could not initialize tesseract.\n");
488  return EXIT_FAILURE;
489  }
490 
491  if (print_parameters) {
492  FILE* fout = stdout;
493  fprintf(stdout, "Tesseract parameters:\n");
494  api.PrintVariables(fout);
495  api.End();
496  return EXIT_SUCCESS;
497  }
498 
499  FixPageSegMode(&api, pagesegmode);
500 
501  if (pagesegmode == tesseract::PSM_AUTO_ONLY) {
502  int ret_val = EXIT_SUCCESS;
503 
504  Pix* pixs = pixRead(image);
505  if (!pixs) {
506  fprintf(stderr, "Cannot open input file: %s\n", image);
507  return 2;
508  }
509 
510  api.SetImage(pixs);
511 
512  tesseract::Orientation orientation;
515  float deskew_angle;
516 
518  if (it) {
519  it->Orientation(&orientation, &direction, &order, &deskew_angle);
520  tprintf(
521  "Orientation: %d\nWritingDirection: %d\nTextlineOrder: %d\n"
522  "Deskew angle: %.4f\n",
523  orientation, direction, order, deskew_angle);
524  } else {
525  ret_val = EXIT_FAILURE;
526  }
527 
528  delete it;
529 
530  pixDestroy(&pixs);
531  return ret_val;
532  }
533 
534  // set in_training_mode to true when using one of these configs:
535  // ambigs.train, box.train, box.train.stderr, linebox, rebox
536  bool b = false;
537  bool in_training_mode =
538  (api.GetBoolVariable("tessedit_ambigs_training", &b) && b) ||
539  (api.GetBoolVariable("tessedit_resegment_from_boxes", &b) && b) ||
540  (api.GetBoolVariable("tessedit_make_boxes_from_boxes", &b) && b);
541 
542  // Avoid memory leak caused by auto variable when exit() is called.
544 
545  if (in_training_mode) {
546  renderers.push_back(NULL);
547  } else {
548  PreloadRenderers(&api, &renderers, pagesegmode, outputbase);
549  }
550 
551  if (!renderers.empty()) {
552  if (banner) PrintBanner();
553  bool succeed = api.ProcessPages(image, NULL, 0, renderers[0]);
554  if (!succeed) {
555  fprintf(stderr, "Error during processing.\n");
556  return EXIT_FAILURE;
557  }
558  }
559 
561 
562  return EXIT_SUCCESS;
563 }
bool empty() const
Definition: genericvector.h:91
static bool IsSSEAvailable()
Definition: simddetect.h:38
void GetAvailableLanguagesAsVector(GenericVector< STRING > *langs) const
Definition: baseapi.cpp:445
static bool IsAVX512FAvailable()
Definition: simddetect.h:30
Assume a single uniform block of text. (Default.)
Definition: publictypes.h:172
void Orientation(tesseract::Orientation *orientation, tesseract::WritingDirection *writing_direction, tesseract::TextlineOrder *textline_order, float *deskew_angle) const
Number of enum entries.
Definition: publictypes.h:182
#define PERF_COUNT_END
int Init(const char *datapath, const char *language, OcrEngineMode mode, char **configs, int configs_size, const GenericVector< STRING > *vars_vec, const GenericVector< STRING > *vars_values, bool set_only_non_debug_params)
Definition: baseapi.cpp:332
void PrintVariables(FILE *fp) const
Definition: baseapi.cpp:320
void SetPageSegMode(PageSegMode mode)
Definition: baseapi.cpp:498
const char * GetDatapath()
Definition: baseapi.cpp:946
int size() const
Definition: genericvector.h:72
int direction(EDGEPT *point)
Definition: vecfuncs.cpp:43
#define tprintf(...)
Definition: tprintf.h:31
int count(LIST var_list)
Definition: oldlist.cpp:103
PageSegMode GetPageSegMode() const
Definition: baseapi.cpp:505
int push_back(T object)
void SetImage(const unsigned char *imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line)
Definition: baseapi.cpp:561
static DawgCache * GlobalDawgCache()
Definition: dict.cpp:198
int main(int argc, char **argv)
#define PERF_COUNT_START(FUNCT_NAME)
static bool IsAVX512BWAvailable()
Definition: simddetect.h:34
Definition: strngs.h:45
Automatic page segmentation, but no OSD, or OCR.
Definition: publictypes.h:167
bool ProcessPages(const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
Definition: baseapi.cpp:1052
void SetOutputName(const char *name)
Definition: baseapi.cpp:265
CMD_EVENTS mode
Definition: pgedit.cpp:116
static const char * Version()
Definition: baseapi.cpp:198
bool GetBoolVariable(const char *name, bool *value) const
Definition: baseapi.cpp:292
static bool IsAVX2Available()
Definition: simddetect.h:28
PageIterator * AnalyseLayout()
Definition: baseapi.cpp:803
static bool IsAVXAvailable()
Definition: simddetect.h:26
bool SetVariable(const char *name, const char *value)
Definition: baseapi.cpp:272
Fully automatic page segmentation, but no OSD.
Definition: publictypes.h:168
Orientation and script detection only.
Definition: publictypes.h:164