tesseract v5.3.3.20231005
tesseract.cpp
Go to the documentation of this file.
1/**********************************************************************
2 * File: tesseract.cpp
3 * Description: Main program for merge of tess and editor.
4 * Author: Ray Smith
5 *
6 * (C) Copyright 1992, Hewlett-Packard Ltd.
7 ** Licensed under the Apache License, Version 2.0 (the "License");
8 ** you may not use this file except in compliance with the License.
9 ** You may obtain a copy of the License at
10 ** http://www.apache.org/licenses/LICENSE-2.0
11 ** Unless required by applicable law or agreed to in writing, software
12 ** distributed under the License is distributed on an "AS IS" BASIS,
13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 ** See the License for the specific language governing permissions and
15 ** limitations under the License.
16 *
17 **********************************************************************/
18
19// Include automatically generated configuration file if running autoconf
20#ifdef HAVE_CONFIG_H
21# include "config_auto.h"
22#endif
23
24#include <cerrno> // for errno
25#if defined(__USE_GNU)
26# include <cfenv> // for feenableexcept
27#endif
28#include <climits> // for INT_MIN, INT_MAX
29#include <cstdlib> // for std::getenv
30#include <iostream>
31#include <map> // for std::map
32#include <memory> // std::unique_ptr
33
34#include <allheaders.h>
35#include <tesseract/baseapi.h>
36#include "dict.h"
37#if defined(USE_OPENCL)
38# include "openclwrapper.h" // for OpenclDevice
39#endif
40#include <tesseract/renderer.h>
41#include "simddetect.h"
42#include "tesseractclass.h" // for AnyTessLang
43#include "tprintf.h" // for tprintf
44
45#ifdef _OPENMP
46# include <omp.h>
47#endif
48
49#if defined(HAVE_LIBARCHIVE)
50# include <archive.h>
51#endif
52#if defined(HAVE_LIBCURL)
53# include <curl/curl.h>
54#endif
55
56#if defined(_WIN32)
57# include <fcntl.h>
58# include <io.h>
59# if defined(HAVE_TIFFIO_H)
60
61# include <tiffio.h>
62
63static void Win32ErrorHandler(const char *module, const char *fmt, va_list ap) {
64 if (module != nullptr) {
65 fprintf(stderr, "%s: ", module);
66 }
67 vfprintf(stderr, fmt, ap);
68 fprintf(stderr, ".\n");
69}
70
71static void Win32WarningHandler(const char *module, const char *fmt, va_list ap) {
72 if (module != nullptr) {
73 fprintf(stderr, "%s: ", module);
74 }
75 fprintf(stderr, "Warning, ");
76 vfprintf(stderr, fmt, ap);
77 fprintf(stderr, ".\n");
78}
79
80# endif /* HAVE_TIFFIO_H */
81
82class AutoWin32ConsoleOutputCP {
83public:
84 explicit AutoWin32ConsoleOutputCP(UINT codeCP) {
85 oldCP_ = GetConsoleOutputCP();
86 SetConsoleOutputCP(codeCP);
87 }
88 ~AutoWin32ConsoleOutputCP() {
89 SetConsoleOutputCP(oldCP_);
90 }
91
92private:
93 UINT oldCP_;
94};
95
96static AutoWin32ConsoleOutputCP autoWin32ConsoleOutputCP(CP_UTF8);
97
98#endif // _WIN32
99
100using namespace tesseract;
101
102static void PrintVersionInfo() {
103 char *versionStrP;
104
105 printf("tesseract %s\n", tesseract::TessBaseAPI::Version());
106
107 versionStrP = getLeptonicaVersion();
108 printf(" %s\n", versionStrP);
109 lept_free(versionStrP);
110
111 versionStrP = getImagelibVersions();
112 printf(" %s\n", versionStrP);
113 lept_free(versionStrP);
114
115#ifdef USE_OPENCL
116 cl_platform_id platform[4];
117 cl_uint num_platforms;
118
119 printf(" OpenCL info:\n");
120 if (clGetPlatformIDs(4, platform, &num_platforms) == CL_SUCCESS) {
121 printf(" Found %u platform(s).\n", num_platforms);
122 for (unsigned n = 0; n < num_platforms; n++) {
123 char info[256];
124 if (clGetPlatformInfo(platform[n], CL_PLATFORM_NAME, 256, info, 0) == CL_SUCCESS) {
125 printf(" Platform %u name: %s.\n", n + 1, info);
126 }
127 if (clGetPlatformInfo(platform[n], CL_PLATFORM_VERSION, 256, info, 0) == CL_SUCCESS) {
128 printf(" Version: %s.\n", info);
129 }
130 cl_device_id devices[2];
131 cl_uint num_devices;
132 if (clGetDeviceIDs(platform[n], CL_DEVICE_TYPE_ALL, 2, devices, &num_devices) == CL_SUCCESS) {
133 printf(" Found %u device(s).\n", num_devices);
134 for (unsigned i = 0; i < num_devices; ++i) {
135 if (clGetDeviceInfo(devices[i], CL_DEVICE_NAME, 256, info, 0) == CL_SUCCESS) {
136 printf(" Device %u name: %s.\n", i + 1, info);
137 }
138 }
139 }
140 }
141 }
142#endif
143#if defined(HAVE_NEON) || defined(__aarch64__)
145 printf(" Found NEON\n");
146#else
148 printf(" Found AVX512BW\n");
149 }
151 printf(" Found AVX512F\n");
152 }
154 printf(" Found AVX512VNNI\n");
155 }
157 printf(" Found AVX2\n");
158 }
160 printf(" Found AVX\n");
161 }
163 printf(" Found FMA\n");
164 }
166 printf(" Found SSE4.1\n");
167 }
168#endif
169#ifdef _OPENMP
170 printf(" Found OpenMP %d\n", _OPENMP);
171#endif
172#if defined(HAVE_LIBARCHIVE)
173# if ARCHIVE_VERSION_NUMBER >= 3002000
174 printf(" Found %s\n", archive_version_details());
175# else
176 printf(" Found %s\n", archive_version_string());
177# endif // ARCHIVE_VERSION_NUMBER
178#endif // HAVE_LIBARCHIVE
179#if defined(HAVE_LIBCURL)
180 printf(" Found %s\n", curl_version());
181#endif
182}
183
184static void PrintHelpForPSM() {
185 const char *msg =
186 "Page segmentation modes:\n"
187 " 0 Orientation and script detection (OSD) only.\n"
188 " 1 Automatic page segmentation with OSD.\n"
189 " 2 Automatic page segmentation, but no OSD, or OCR. (not "
190 "implemented)\n"
191 " 3 Fully automatic page segmentation, but no OSD. (Default)\n"
192 " 4 Assume a single column of text of variable sizes.\n"
193 " 5 Assume a single uniform block of vertically aligned text.\n"
194 " 6 Assume a single uniform block of text.\n"
195 " 7 Treat the image as a single text line.\n"
196 " 8 Treat the image as a single word.\n"
197 " 9 Treat the image as a single word in a circle.\n"
198 " 10 Treat the image as a single character.\n"
199 " 11 Sparse text. Find as much text as possible in no"
200 " particular order.\n"
201 " 12 Sparse text with OSD.\n"
202 " 13 Raw line. Treat the image as a single text line,\n"
203 " bypassing hacks that are Tesseract-specific.\n";
204
205#ifdef DISABLED_LEGACY_ENGINE
206 const char *disabled_osd_msg = "\nNOTE: The OSD modes are currently disabled.\n";
207 printf("%s%s", msg, disabled_osd_msg);
208#else
209 printf("%s", msg);
210#endif
211}
212
213#ifndef DISABLED_LEGACY_ENGINE
214static void PrintHelpForOEM() {
215 const char *msg =
216 "OCR Engine modes:\n"
217 " 0 Legacy engine only.\n"
218 " 1 Neural nets LSTM engine only.\n"
219 " 2 Legacy + LSTM engines.\n"
220 " 3 Default, based on what is available.\n";
221
222 printf("%s", msg);
223}
224#endif // ndef DISABLED_LEGACY_ENGINE
225
226static void PrintHelpExtra(const char *program) {
227 printf(
228 "Usage:\n"
229 " %s --help | --help-extra | --help-psm | "
230#ifndef DISABLED_LEGACY_ENGINE
231 "--help-oem | "
232#endif
233 "--version\n"
234 " %s --list-langs [--tessdata-dir PATH]\n"
235#ifndef DISABLED_LEGACY_ENGINE
236 " %s --print-fonts-table [options...] [configfile...]\n"
237#endif // ndef DISABLED_LEGACY_ENGINE
238 " %s --print-parameters [options...] [configfile...]\n"
239 " %s imagename|imagelist|stdin outputbase|stdout [options...] "
240 "[configfile...]\n"
241 "\n"
242 "OCR options:\n"
243 " --tessdata-dir PATH Specify the location of tessdata path.\n"
244 " --user-words PATH Specify the location of user words file.\n"
245 " --user-patterns PATH Specify the location of user patterns file.\n"
246 " --dpi VALUE Specify DPI for input image.\n"
247 " --loglevel LEVEL Specify logging level. LEVEL can be\n"
248 " ALL, TRACE, DEBUG, INFO, WARN, ERROR, FATAL or OFF.\n"
249 " -l LANG[+LANG] Specify language(s) used for OCR.\n"
250 " -c VAR=VALUE Set value for config variables.\n"
251 " Multiple -c arguments are allowed.\n"
252 " --psm NUM Specify page segmentation mode.\n"
253#ifndef DISABLED_LEGACY_ENGINE
254 " --oem NUM Specify OCR Engine mode.\n"
255#endif
256 "NOTE: These options must occur before any configfile.\n"
257 "\n",
258 program, program, program, program
259#ifndef DISABLED_LEGACY_ENGINE
260 , program
261#endif // ndef DISABLED_LEGACY_ENGINE
262 );
263
264 PrintHelpForPSM();
265#ifndef DISABLED_LEGACY_ENGINE
266 printf("\n");
267 PrintHelpForOEM();
268#endif
269
270 printf(
271 "\n"
272 "Single options:\n"
273 " -h, --help Show minimal help message.\n"
274 " --help-extra Show extra help for advanced users.\n"
275 " --help-psm Show page segmentation modes.\n"
276#ifndef DISABLED_LEGACY_ENGINE
277 " --help-oem Show OCR Engine modes.\n"
278#endif
279 " -v, --version Show version information.\n"
280 " --list-langs List available languages for tesseract engine.\n"
281#ifndef DISABLED_LEGACY_ENGINE
282 " --print-fonts-table Print tesseract fonts table.\n"
283#endif // ndef DISABLED_LEGACY_ENGINE
284 " --print-parameters Print tesseract parameters.\n");
285}
286
287static void PrintHelpMessage(const char *program) {
288 printf(
289 "Usage:\n"
290 " %s --help | --help-extra | --version\n"
291 " %s --list-langs\n"
292 " %s imagename outputbase [options...] [configfile...]\n"
293 "\n"
294 "OCR options:\n"
295 " -l LANG[+LANG] Specify language(s) used for OCR.\n"
296 "NOTE: These options must occur before any configfile.\n"
297 "\n"
298 "Single options:\n"
299 " --help Show this help message.\n"
300 " --help-extra Show extra help for advanced users.\n"
301 " --version Show version information.\n"
302 " --list-langs List available languages for tesseract "
303 "engine.\n",
304 program, program, program);
305}
306
307static bool SetVariablesFromCLArgs(tesseract::TessBaseAPI &api, int argc, char **argv) {
308 bool success = true;
309 char opt1[256], opt2[255];
310 for (int i = 0; i < argc; i++) {
311 if (strcmp(argv[i], "-c") == 0 && i + 1 < argc) {
312 strncpy(opt1, argv[i + 1], 255);
313 opt1[255] = '\0';
314 char *p = strchr(opt1, '=');
315 if (!p) {
316 fprintf(stderr, "Missing = in configvar assignment\n");
317 success = false;
318 break;
319 }
320 *p = 0;
321 strncpy(opt2, strchr(argv[i + 1], '=') + 1, sizeof(opt2) - 1);
322 opt2[254] = 0;
323 ++i;
324
325 if (!api.SetVariable(opt1, opt2)) {
326 fprintf(stderr, "Could not set option: %s=%s\n", opt1, opt2);
327 }
328 }
329 }
330 return success;
331}
332
333static void PrintLangsList(tesseract::TessBaseAPI &api) {
334 std::vector<std::string> languages;
335 api.GetAvailableLanguagesAsVector(&languages);
336 printf("List of available languages in \"%s\" (%zu):\n",
337 api.GetDatapath(), languages.size());
338 for (const auto &language : languages) {
339 printf("%s\n", language.c_str());
340 }
341 api.End();
342}
343
358static void FixPageSegMode(tesseract::TessBaseAPI &api, tesseract::PageSegMode pagesegmode) {
360 api.SetPageSegMode(pagesegmode);
361 }
362}
363
364static bool checkArgValues(int arg, const char *mode, int count) {
365 if (arg >= count || arg < 0) {
366 printf("Invalid %s value, please enter a number between 0-%d\n", mode, count - 1);
367 return false;
368 }
369 return true;
370}
371
372// NOTE: arg_i is used here to avoid ugly *i so many times in this function
373static bool ParseArgs(int argc, char **argv, const char **lang, const char **image,
374 const char **outputbase, const char **datapath, l_int32 *dpi,
375 bool *list_langs, bool *print_parameters, bool *print_fonts_table,
376 std::vector<std::string> *vars_vec, std::vector<std::string> *vars_values,
377 l_int32 *arg_i, tesseract::PageSegMode *pagesegmode,
378 tesseract::OcrEngineMode *enginemode) {
379 bool noocr = false;
380 int i;
381 for (i = 1; i < argc && (*outputbase == nullptr || argv[i][0] == '-'); i++) {
382 if (*image != nullptr && *outputbase == nullptr) {
383 // outputbase follows image, don't allow options at that position.
384 *outputbase = argv[i];
385 } else if ((strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "--help") == 0)) {
386 PrintHelpMessage(argv[0]);
387 noocr = true;
388 } else if (strcmp(argv[i], "--help-extra") == 0) {
389 PrintHelpExtra(argv[0]);
390 noocr = true;
391 } else if ((strcmp(argv[i], "--help-psm") == 0)) {
392 PrintHelpForPSM();
393 noocr = true;
394#ifndef DISABLED_LEGACY_ENGINE
395 } else if ((strcmp(argv[i], "--help-oem") == 0)) {
396 PrintHelpForOEM();
397 noocr = true;
398#endif
399 } else if ((strcmp(argv[i], "-v") == 0) || (strcmp(argv[i], "--version") == 0)) {
400 PrintVersionInfo();
401 noocr = true;
402 } else if (strcmp(argv[i], "-l") == 0 && i + 1 < argc) {
403 *lang = argv[i + 1];
404 ++i;
405 } else if (strcmp(argv[i], "--tessdata-dir") == 0 && i + 1 < argc) {
406 *datapath = argv[i + 1];
407 ++i;
408 } else if (strcmp(argv[i], "--dpi") == 0 && i + 1 < argc) {
409 *dpi = atoi(argv[i + 1]);
410 ++i;
411 } else if (strcmp(argv[i], "--loglevel") == 0 && i + 1 < argc) {
412 // Allow the log levels which are used by log4cxx.
413 const std::string loglevel_string = argv[++i];
414 static const std::map<const std::string, int> loglevels {
415 {"ALL", INT_MIN},
416 {"TRACE", 5000},
417 {"DEBUG", 10000},
418 {"INFO", 20000},
419 {"WARN", 30000},
420 {"ERROR", 40000},
421 {"FATAL", 50000},
422 {"OFF", INT_MAX},
423 };
424 try {
425 auto loglevel = loglevels.at(loglevel_string);
426 log_level = loglevel;
427 } catch (const std::out_of_range &e) {
428 // TODO: Allow numeric argument?
429 tprintf("Error, unsupported --loglevel %s\n", loglevel_string.c_str());
430 return false;
431 }
432 } else if (strcmp(argv[i], "--user-words") == 0 && i + 1 < argc) {
433 vars_vec->push_back("user_words_file");
434 vars_values->push_back(argv[i + 1]);
435 ++i;
436 } else if (strcmp(argv[i], "--user-patterns") == 0 && i + 1 < argc) {
437 vars_vec->push_back("user_patterns_file");
438 vars_values->push_back(argv[i + 1]);
439 ++i;
440 } else if (strcmp(argv[i], "--list-langs") == 0) {
441 noocr = true;
442 *list_langs = true;
443 } else if (strcmp(argv[i], "--psm") == 0 && i + 1 < argc) {
444 if (!checkArgValues(atoi(argv[i + 1]), "PSM", tesseract::PSM_COUNT)) {
445 return false;
446 }
447 *pagesegmode = static_cast<tesseract::PageSegMode>(atoi(argv[i + 1]));
448 ++i;
449 } else if (strcmp(argv[i], "--oem") == 0 && i + 1 < argc) {
450#ifndef DISABLED_LEGACY_ENGINE
451 int oem = atoi(argv[i + 1]);
452 if (!checkArgValues(oem, "OEM", tesseract::OEM_COUNT)) {
453 return false;
454 }
455 *enginemode = static_cast<tesseract::OcrEngineMode>(oem);
456#endif
457 ++i;
458 } else if (strcmp(argv[i], "--print-parameters") == 0) {
459 noocr = true;
460 *print_parameters = true;
461#ifndef DISABLED_LEGACY_ENGINE
462 } else if (strcmp(argv[i], "--print-fonts-table") == 0) {
463 noocr = true;
464 *print_fonts_table = true;
465#endif // ndef DISABLED_LEGACY_ENGINE
466 } else if (strcmp(argv[i], "-c") == 0 && i + 1 < argc) {
467 // handled properly after api init
468 ++i;
469 } else if (*image == nullptr) {
470 *image = argv[i];
471 } else {
472 // Unexpected argument.
473 fprintf(stderr, "Error, unknown command line argument '%s'\n", argv[i]);
474 return false;
475 }
476 }
477
478 *arg_i = i;
479
480 if (*pagesegmode == tesseract::PSM_OSD_ONLY) {
481 // OSD = orientation and script detection.
482 if (*lang != nullptr && strcmp(*lang, "osd")) {
483 // If the user explicitly specifies a language (other than osd)
484 // or a script, only orientation can be detected.
485 fprintf(stderr, "Warning, detects only orientation with -l %s\n", *lang);
486 } else {
487 // That mode requires osd.traineddata to detect orientation and script.
488 *lang = "osd";
489 }
490 }
491
492 if (*outputbase == nullptr && noocr == false) {
493 PrintHelpMessage(argv[0]);
494 return false;
495 }
496
497 return true;
498}
499
500static void PreloadRenderers(tesseract::TessBaseAPI &api,
501 std::vector<std::unique_ptr<TessResultRenderer>> &renderers,
502 tesseract::PageSegMode pagesegmode, const char *outputbase) {
503 if (pagesegmode == tesseract::PSM_OSD_ONLY) {
504#ifndef DISABLED_LEGACY_ENGINE
505 renderers.push_back(std::make_unique<tesseract::TessOsdRenderer>(outputbase));
506#endif // ndef DISABLED_LEGACY_ENGINE
507 } else {
508 bool error = false;
509 bool b;
510 api.GetBoolVariable("tessedit_create_hocr", &b);
511 if (b) {
512 bool font_info;
513 api.GetBoolVariable("hocr_font_info", &font_info);
514 auto renderer = std::make_unique<tesseract::TessHOcrRenderer>(outputbase, font_info);
515 if (renderer->happy()) {
516 renderers.push_back(std::move(renderer));
517 } else {
518 tprintf("Error, could not create hOCR output file: %s\n", strerror(errno));
519 error = true;
520 }
521 }
522
523 api.GetBoolVariable("tessedit_create_alto", &b);
524 if (b) {
525 auto renderer = std::make_unique<tesseract::TessAltoRenderer>(outputbase);
526 if (renderer->happy()) {
527 renderers.push_back(std::move(renderer));
528 } else {
529 tprintf("Error, could not create ALTO output file: %s\n", strerror(errno));
530 error = true;
531 }
532 }
533
534 api.GetBoolVariable("tessedit_create_tsv", &b);
535 if (b) {
536 bool font_info;
537 api.GetBoolVariable("hocr_font_info", &font_info);
538 auto renderer = std::make_unique<tesseract::TessTsvRenderer>(outputbase, font_info);
539 if (renderer->happy()) {
540 renderers.push_back(std::move(renderer));
541 } else {
542 tprintf("Error, could not create TSV output file: %s\n", strerror(errno));
543 error = true;
544 }
545 }
546
547 api.GetBoolVariable("tessedit_create_pdf", &b);
548 if (b) {
549#ifdef WIN32
550 if (_setmode(_fileno(stdout), _O_BINARY) == -1)
551 tprintf("ERROR: cin to binary: %s", strerror(errno));
552#endif // WIN32
553 bool textonly;
554 api.GetBoolVariable("textonly_pdf", &textonly);
555 auto renderer = std::make_unique<tesseract::TessPDFRenderer>(outputbase, api.GetDatapath(), textonly);
556 if (renderer->happy()) {
557 renderers.push_back(std::move(renderer));
558 } else {
559 tprintf("Error, could not create PDF output file: %s\n", strerror(errno));
560 error = true;
561 }
562 }
563
564 api.GetBoolVariable("tessedit_write_unlv", &b);
565 if (b) {
566 api.SetVariable("unlv_tilde_crunching", "true");
567 auto renderer = std::make_unique<tesseract::TessUnlvRenderer>(outputbase);
568 if (renderer->happy()) {
569 renderers.push_back(std::move(renderer));
570 } else {
571 tprintf("Error, could not create UNLV output file: %s\n", strerror(errno));
572 error = true;
573 }
574 }
575
576 api.GetBoolVariable("tessedit_create_lstmbox", &b);
577 if (b) {
578 auto renderer = std::make_unique<tesseract::TessLSTMBoxRenderer>(outputbase);
579 if (renderer->happy()) {
580 renderers.push_back(std::move(renderer));
581 } else {
582 tprintf("Error, could not create LSTM BOX output file: %s\n", strerror(errno));
583 error = true;
584 }
585 }
586
587 api.GetBoolVariable("tessedit_create_boxfile", &b);
588 if (b) {
589 auto renderer = std::make_unique<tesseract::TessBoxTextRenderer>(outputbase);
590 if (renderer->happy()) {
591 renderers.push_back(std::move(renderer));
592 } else {
593 tprintf("Error, could not create BOX output file: %s\n", strerror(errno));
594 error = true;
595 }
596 }
597
598 api.GetBoolVariable("tessedit_create_wordstrbox", &b);
599 if (b) {
600 auto renderer = std::make_unique<tesseract::TessWordStrBoxRenderer>(outputbase);
601 if (renderer->happy()) {
602 renderers.push_back(std::move(renderer));
603 } else {
604 tprintf("Error, could not create WordStr BOX output file: %s\n", strerror(errno));
605 error = true;
606 }
607 }
608
609 api.GetBoolVariable("tessedit_create_txt", &b);
610 if (b || (!error && renderers.empty())) {
611 // Create text output if no other output was requested
612 // even if text output was not explicitly requested unless
613 // there was an error.
614 auto renderer = std::make_unique<tesseract::TessTextRenderer>(outputbase);
615 if (renderer->happy()) {
616 renderers.push_back(std::move(renderer));
617 } else {
618 tprintf("Error, could not create TXT output file: %s\n", strerror(errno));
619 }
620 }
621 }
622
623 // Null-out the renderers that are
624 // added to the root, and leave the root in the vector.
625 for (size_t r = 1; r < renderers.size(); ++r) {
626 renderers[0]->insert(renderers[r].get());
627 renderers[r].release(); // at the moment insert() is owning
628 }
629}
630
631/**********************************************************************
632 * main()
633 *
634 **********************************************************************/
635
636int main(int argc, char **argv) {
637#if defined(__USE_GNU) && defined(HAVE_FEENABLEEXCEPT)
638 // Raise SIGFPE.
639# if defined(__clang__)
640 // clang creates code which causes some FP exceptions, so don't enable those.
641 feenableexcept(FE_DIVBYZERO);
642# else
643 feenableexcept(FE_DIVBYZERO | FE_OVERFLOW | FE_INVALID);
644# endif
645#endif
646 const char *lang = nullptr;
647 const char *image = nullptr;
648 const char *outputbase = nullptr;
649 const char *datapath = nullptr;
650 bool list_langs = false;
651 bool print_parameters = false;
652 bool print_fonts_table = false;
653 l_int32 dpi = 0;
654 int arg_i = 1;
656#ifdef DISABLED_LEGACY_ENGINE
657 auto enginemode = tesseract::OEM_LSTM_ONLY;
658#else
660#endif
661 std::vector<std::string> vars_vec;
662 std::vector<std::string> vars_values;
663
664 if (std::getenv("LEPT_MSG_SEVERITY")) {
665 // Get Leptonica message level from environment variable.
666 setMsgSeverity(L_SEVERITY_EXTERNAL);
667 } else {
668 // Disable debugging and informational messages from Leptonica.
669 setMsgSeverity(L_SEVERITY_ERROR);
670 }
671
672#if defined(HAVE_TIFFIO_H) && defined(_WIN32)
673 /* Show libtiff errors and warnings on console (not in GUI). */
674 TIFFSetErrorHandler(Win32ErrorHandler);
675 TIFFSetWarningHandler(Win32WarningHandler);
676#endif // HAVE_TIFFIO_H && _WIN32
677
678 if (!ParseArgs(argc, argv, &lang, &image, &outputbase, &datapath, &dpi, &list_langs,
679 &print_parameters, &print_fonts_table, &vars_vec, &vars_values, &arg_i,
680 &pagesegmode, &enginemode)) {
681 return EXIT_FAILURE;
682 }
683
684 bool in_recognition_mode = !list_langs && !print_parameters && !print_fonts_table;
685
686 if (lang == nullptr && in_recognition_mode) {
687 // Set default language model if none was given and a model file is needed.
688 lang = "eng";
689 }
690
691 if (image == nullptr && in_recognition_mode) {
692 return EXIT_SUCCESS;
693 }
694
695 // Call GlobalDawgCache here to create the global DawgCache object before
696 // the TessBaseAPI object. This fixes the order of destructor calls:
697 // first TessBaseAPI must be destructed, DawgCache must be the last object.
699
700 TessBaseAPI api;
701
702 api.SetOutputName(outputbase);
703
704 const int init_failed = api.Init(datapath, lang, enginemode, &(argv[arg_i]), argc - arg_i,
705 &vars_vec, &vars_values, false);
706
707 if (!SetVariablesFromCLArgs(api, argc, argv)) {
708 return EXIT_FAILURE;
709 }
710
711 // SIMD settings might be overridden by config variable.
713
714 if (list_langs) {
715 PrintLangsList(api);
716 return EXIT_SUCCESS;
717 }
718
719 if (init_failed) {
720 fprintf(stderr, "Could not initialize tesseract.\n");
721 return EXIT_FAILURE;
722 }
723
724 if (print_parameters) {
725 FILE *fout = stdout;
726 fprintf(stdout, "Tesseract parameters:\n");
727 api.PrintVariables(fout);
728 api.End();
729 return EXIT_SUCCESS;
730 }
731
732#ifndef DISABLED_LEGACY_ENGINE
733 if (print_fonts_table) {
734 FILE *fout = stdout;
735 fprintf(stdout, "Tesseract fonts table:\n");
736 api.PrintFontsTable(fout);
737 api.End();
738 return EXIT_SUCCESS;
739 }
740#endif // ndef DISABLED_LEGACY_ENGINE
741
742 FixPageSegMode(api, pagesegmode);
743
744 if (dpi) {
745 auto dpi_string = std::to_string(dpi);
746 api.SetVariable("user_defined_dpi", dpi_string.c_str());
747 }
748
749 int ret_val = EXIT_SUCCESS;
750
751 if (pagesegmode == tesseract::PSM_AUTO_ONLY) {
752 Pix *pixs = pixRead(image);
753 if (!pixs) {
754 fprintf(stderr, "Leptonica can't process input file: %s\n", image);
755 return 2;
756 }
757
758 api.SetImage(pixs);
759
760 tesseract::Orientation orientation;
763 float deskew_angle;
764
765 const std::unique_ptr<const tesseract::PageIterator> it(api.AnalyseLayout());
766 if (it) {
767 // TODO: Implement output of page segmentation, see documentation
768 // ("Automatic page segmentation, but no OSD, or OCR").
769 it->Orientation(&orientation, &direction, &order, &deskew_angle);
770 tprintf(
771 "Orientation: %d\nWritingDirection: %d\nTextlineOrder: %d\n"
772 "Deskew angle: %.4f\n",
773 orientation, direction, order, deskew_angle);
774 } else {
775 ret_val = EXIT_FAILURE;
776 }
777
778 pixDestroy(&pixs);
779 return ret_val;
780 }
781
782 // Set in_training_mode to true when using one of these configs:
783 // ambigs.train, box.train, box.train.stderr, linebox, rebox, lstm.train.
784 // In this mode no other OCR result files are written.
785 bool b = false;
786 bool in_training_mode = (api.GetBoolVariable("tessedit_ambigs_training", &b) && b) ||
787 (api.GetBoolVariable("tessedit_resegment_from_boxes", &b) && b) ||
788 (api.GetBoolVariable("tessedit_make_boxes_from_boxes", &b) && b) ||
789 (api.GetBoolVariable("tessedit_train_line_recognizer", &b) && b);
790
792 if (!api.tesseract()->AnyTessLang()) {
793 fprintf(stderr, "Error, OSD requires a model for the legacy engine\n");
794 return EXIT_FAILURE;
795 }
796 }
797#ifdef DISABLED_LEGACY_ENGINE
798 auto cur_psm = api.GetPageSegMode();
799 auto osd_warning = std::string("");
800 if (cur_psm == tesseract::PSM_OSD_ONLY) {
801 const char *disabled_osd_msg =
802 "\nERROR: The page segmentation mode 0 (OSD Only) is currently "
803 "disabled.\n\n";
804 fprintf(stderr, "%s", disabled_osd_msg);
805 return EXIT_FAILURE;
806 } else if (cur_psm == tesseract::PSM_AUTO_OSD) {
808 osd_warning +=
809 "\nWarning: The page segmentation mode 1 (Auto+OSD) is currently "
810 "disabled. "
811 "Using PSM 3 (Auto) instead.\n\n";
812 } else if (cur_psm == tesseract::PSM_SPARSE_TEXT_OSD) {
814 osd_warning +=
815 "\nWarning: The page segmentation mode 12 (Sparse text + OSD) is "
816 "currently disabled. "
817 "Using PSM 11 (Sparse text) instead.\n\n";
818 }
819#endif // def DISABLED_LEGACY_ENGINE
820
821 std::vector<std::unique_ptr<TessResultRenderer>> renderers;
822
823 if (in_training_mode) {
824 renderers.push_back(nullptr);
825 } else if (outputbase != nullptr) {
826 PreloadRenderers(api, renderers, pagesegmode, outputbase);
827 }
828
829 if (!renderers.empty()) {
830#ifdef DISABLED_LEGACY_ENGINE
831 if (!osd_warning.empty()) {
832 fprintf(stderr, "%s", osd_warning.c_str());
833 }
834#endif
835 bool succeed = api.ProcessPages(image, nullptr, 0, renderers[0].get());
836 if (!succeed) {
837 fprintf(stderr, "Error during processing.\n");
838 ret_val = EXIT_FAILURE;
839 }
840 }
841
842 return ret_val;
843}
int main(int argc, char **argv)
Definition: tesseract.cpp:636
const char * p
int * count
@ PSM_OSD_ONLY
Orientation and script detection only.
Definition: publictypes.h:158
@ PSM_COUNT
Number of enum entries.
Definition: publictypes.h:177
@ PSM_SPARSE_TEXT
Find as much text as possible in no particular order.
Definition: publictypes.h:171
@ PSM_AUTO_ONLY
Automatic page segmentation, but no OSD, or OCR.
Definition: publictypes.h:161
@ PSM_AUTO
Fully automatic page segmentation, but no OSD.
Definition: publictypes.h:162
@ PSM_SINGLE_BLOCK
Assume a single uniform block of text. (Default.)
Definition: publictypes.h:166
@ PSM_SPARSE_TEXT_OSD
Sparse text with orientation and script det.
Definition: publictypes.h:173
void tprintf(const char *format,...)
Definition: tprintf.cpp:41
int log_level
Definition: tprintf.cpp:36
void SetPageSegMode(PageSegMode mode)
Definition: baseapi.cpp:511
const char * GetDatapath()
Definition: baseapi.cpp:935
bool SetVariable(const char *name, const char *value)
Definition: baseapi.cpp:279
static const char * Version()
Definition: baseapi.cpp:241
void GetAvailableLanguagesAsVector(std::vector< std::string > *langs) const
Definition: baseapi.cpp:471
bool ProcessPages(const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
Definition: baseapi.cpp:1071
PageSegMode GetPageSegMode() const
Definition: baseapi.cpp:519
int Init(const char *datapath, const char *language, OcrEngineMode mode, char **configs, int configs_size, const std::vector< std::string > *vars_vec, const std::vector< std::string > *vars_values, bool set_only_non_debug_params)
Definition: baseapi.cpp:368
void PrintVariables(FILE *fp) const
Definition: baseapi.cpp:356
void SetImage(const unsigned char *imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line)
Definition: baseapi.cpp:576
Tesseract * tesseract() const
Definition: baseapi.h:711
PageIterator * AnalyseLayout()
Definition: baseapi.cpp:812
void PrintFontsTable(FILE *fp) const
Definition: baseapi.cpp:338
bool GetBoolVariable(const char *name, bool *value) const
Definition: baseapi.cpp:304
void SetOutputName(const char *name)
Definition: baseapi.cpp:275
static bool IsNEONAvailable()
Definition: simddetect.h:63
static bool IsAVX512BWAvailable()
Definition: simddetect.h:47
static bool IsFMAAvailable()
Definition: simddetect.h:55
static bool IsAVXAvailable()
Definition: simddetect.h:35
static bool IsAVX512VNNIAvailable()
Definition: simddetect.h:51
static bool IsAVX512FAvailable()
Definition: simddetect.h:43
static bool IsSSEAvailable()
Definition: simddetect.h:59
static bool IsAVX2Available()
Definition: simddetect.h:39
static TESS_API void Update()
Definition: simddetect.cpp:272
bool AnyTessLang() const
static DawgCache * GlobalDawgCache()
Definition: dict.cpp:172