21# include "config_auto.h"
34#include <allheaders.h>
37#if defined(USE_OPENCL)
49#if defined(HAVE_LIBARCHIVE)
52#if defined(HAVE_LIBCURL)
53# include <curl/curl.h>
59# if defined(HAVE_TIFFIO_H)
63static void Win32ErrorHandler(
const char *module,
const char *fmt, va_list ap) {
64 if (module !=
nullptr) {
65 fprintf(stderr,
"%s: ", module);
67 vfprintf(stderr, fmt, ap);
68 fprintf(stderr,
".\n");
71static void Win32WarningHandler(
const char *module,
const char *fmt, va_list ap) {
72 if (module !=
nullptr) {
73 fprintf(stderr,
"%s: ", module);
75 fprintf(stderr,
"Warning, ");
76 vfprintf(stderr, fmt, ap);
77 fprintf(stderr,
".\n");
82class AutoWin32ConsoleOutputCP {
84 explicit AutoWin32ConsoleOutputCP(UINT codeCP) {
85 oldCP_ = GetConsoleOutputCP();
86 SetConsoleOutputCP(codeCP);
88 ~AutoWin32ConsoleOutputCP() {
89 SetConsoleOutputCP(oldCP_);
96static AutoWin32ConsoleOutputCP autoWin32ConsoleOutputCP(CP_UTF8);
102static void PrintVersionInfo() {
107 versionStrP = getLeptonicaVersion();
108 printf(
" %s\n", versionStrP);
109 lept_free(versionStrP);
111 versionStrP = getImagelibVersions();
112 printf(
" %s\n", versionStrP);
113 lept_free(versionStrP);
116 cl_platform_id platform[4];
117 cl_uint num_platforms;
119 printf(
" OpenCL info:\n");
120 if (clGetPlatformIDs(4, platform, &num_platforms) == CL_SUCCESS) {
121 printf(
" Found %u platform(s).\n", num_platforms);
122 for (
unsigned n = 0; n < num_platforms; n++) {
124 if (clGetPlatformInfo(platform[n], CL_PLATFORM_NAME, 256, info, 0) == CL_SUCCESS) {
125 printf(
" Platform %u name: %s.\n", n + 1, info);
127 if (clGetPlatformInfo(platform[n], CL_PLATFORM_VERSION, 256, info, 0) == CL_SUCCESS) {
128 printf(
" Version: %s.\n", info);
130 cl_device_id devices[2];
132 if (clGetDeviceIDs(platform[n], CL_DEVICE_TYPE_ALL, 2, devices, &num_devices) == CL_SUCCESS) {
133 printf(
" Found %u device(s).\n", num_devices);
134 for (
unsigned i = 0;
i < num_devices; ++
i) {
135 if (clGetDeviceInfo(devices[
i], CL_DEVICE_NAME, 256, info, 0) == CL_SUCCESS) {
136 printf(
" Device %u name: %s.\n",
i + 1, info);
143#if defined(HAVE_NEON) || defined(__aarch64__)
145 printf(
" Found NEON\n");
148 printf(
" Found AVX512BW\n");
151 printf(
" Found AVX512F\n");
154 printf(
" Found AVX512VNNI\n");
157 printf(
" Found AVX2\n");
160 printf(
" Found AVX\n");
163 printf(
" Found FMA\n");
166 printf(
" Found SSE4.1\n");
170 printf(
" Found OpenMP %d\n", _OPENMP);
172#if defined(HAVE_LIBARCHIVE)
173# if ARCHIVE_VERSION_NUMBER >= 3002000
174 printf(
" Found %s\n", archive_version_details());
176 printf(
" Found %s\n", archive_version_string());
179#if defined(HAVE_LIBCURL)
180 printf(
" Found %s\n", curl_version());
184static void PrintHelpForPSM() {
186 "Page segmentation modes:\n"
187 " 0 Orientation and script detection (OSD) only.\n"
188 " 1 Automatic page segmentation with OSD.\n"
189 " 2 Automatic page segmentation, but no OSD, or OCR. (not "
191 " 3 Fully automatic page segmentation, but no OSD. (Default)\n"
192 " 4 Assume a single column of text of variable sizes.\n"
193 " 5 Assume a single uniform block of vertically aligned text.\n"
194 " 6 Assume a single uniform block of text.\n"
195 " 7 Treat the image as a single text line.\n"
196 " 8 Treat the image as a single word.\n"
197 " 9 Treat the image as a single word in a circle.\n"
198 " 10 Treat the image as a single character.\n"
199 " 11 Sparse text. Find as much text as possible in no"
200 " particular order.\n"
201 " 12 Sparse text with OSD.\n"
202 " 13 Raw line. Treat the image as a single text line,\n"
203 " bypassing hacks that are Tesseract-specific.\n";
205#ifdef DISABLED_LEGACY_ENGINE
206 const char *disabled_osd_msg =
"\nNOTE: The OSD modes are currently disabled.\n";
207 printf(
"%s%s", msg, disabled_osd_msg);
213#ifndef DISABLED_LEGACY_ENGINE
214static void PrintHelpForOEM() {
216 "OCR Engine modes:\n"
217 " 0 Legacy engine only.\n"
218 " 1 Neural nets LSTM engine only.\n"
219 " 2 Legacy + LSTM engines.\n"
220 " 3 Default, based on what is available.\n";
226static void PrintHelpExtra(
const char *program) {
229 " %s --help | --help-extra | --help-psm | "
230#ifndef DISABLED_LEGACY_ENGINE
234 " %s --list-langs [--tessdata-dir PATH]\n"
235#ifndef DISABLED_LEGACY_ENGINE
236 " %s --print-fonts-table [options...] [configfile...]\n"
238 " %s --print-parameters [options...] [configfile...]\n"
239 " %s imagename|imagelist|stdin outputbase|stdout [options...] "
243 " --tessdata-dir PATH Specify the location of tessdata path.\n"
244 " --user-words PATH Specify the location of user words file.\n"
245 " --user-patterns PATH Specify the location of user patterns file.\n"
246 " --dpi VALUE Specify DPI for input image.\n"
247 " --loglevel LEVEL Specify logging level. LEVEL can be\n"
248 " ALL, TRACE, DEBUG, INFO, WARN, ERROR, FATAL or OFF.\n"
249 " -l LANG[+LANG] Specify language(s) used for OCR.\n"
250 " -c VAR=VALUE Set value for config variables.\n"
251 " Multiple -c arguments are allowed.\n"
252 " --psm NUM Specify page segmentation mode.\n"
253#ifndef DISABLED_LEGACY_ENGINE
254 " --oem NUM Specify OCR Engine mode.\n"
256 "NOTE: These options must occur before any configfile.\n"
258 program, program, program, program
259#ifndef DISABLED_LEGACY_ENGINE
265#ifndef DISABLED_LEGACY_ENGINE
273 " -h, --help Show minimal help message.\n"
274 " --help-extra Show extra help for advanced users.\n"
275 " --help-psm Show page segmentation modes.\n"
276#ifndef DISABLED_LEGACY_ENGINE
277 " --help-oem Show OCR Engine modes.\n"
279 " -v, --version Show version information.\n"
280 " --list-langs List available languages for tesseract engine.\n"
281#ifndef DISABLED_LEGACY_ENGINE
282 " --print-fonts-table Print tesseract fonts table.\n"
284 " --print-parameters Print tesseract parameters.\n");
287static void PrintHelpMessage(
const char *program) {
290 " %s --help | --help-extra | --version\n"
292 " %s imagename outputbase [options...] [configfile...]\n"
295 " -l LANG[+LANG] Specify language(s) used for OCR.\n"
296 "NOTE: These options must occur before any configfile.\n"
299 " --help Show this help message.\n"
300 " --help-extra Show extra help for advanced users.\n"
301 " --version Show version information.\n"
302 " --list-langs List available languages for tesseract "
304 program, program, program);
309 char opt1[256], opt2[255];
310 for (
int i = 0;
i < argc;
i++) {
311 if (strcmp(argv[
i],
"-c") == 0 &&
i + 1 < argc) {
312 strncpy(opt1, argv[
i + 1], 255);
314 char *
p = strchr(opt1,
'=');
316 fprintf(stderr,
"Missing = in configvar assignment\n");
321 strncpy(opt2, strchr(argv[
i + 1],
'=') + 1,
sizeof(opt2) - 1);
326 fprintf(stderr,
"Could not set option: %s=%s\n", opt1, opt2);
334 std::vector<std::string> languages;
336 printf(
"List of available languages in \"%s\" (%zu):\n",
338 for (
const auto &language : languages) {
339 printf(
"%s\n", language.c_str());
364static bool checkArgValues(
int arg,
const char *mode,
int count) {
365 if (arg >=
count || arg < 0) {
366 printf(
"Invalid %s value, please enter a number between 0-%d\n", mode,
count - 1);
373static bool ParseArgs(
int argc,
char **argv,
const char **lang,
const char **image,
374 const char **outputbase,
const char **datapath, l_int32 *dpi,
375 bool *list_langs,
bool *print_parameters,
bool *print_fonts_table,
376 std::vector<std::string> *vars_vec, std::vector<std::string> *vars_values,
381 for (
i = 1;
i < argc && (*outputbase ==
nullptr || argv[
i][0] ==
'-');
i++) {
382 if (*image !=
nullptr && *outputbase ==
nullptr) {
384 *outputbase = argv[
i];
385 }
else if ((strcmp(argv[
i],
"-h") == 0) || (strcmp(argv[
i],
"--help") == 0)) {
386 PrintHelpMessage(argv[0]);
388 }
else if (strcmp(argv[
i],
"--help-extra") == 0) {
389 PrintHelpExtra(argv[0]);
391 }
else if ((strcmp(argv[
i],
"--help-psm") == 0)) {
394#ifndef DISABLED_LEGACY_ENGINE
395 }
else if ((strcmp(argv[
i],
"--help-oem") == 0)) {
399 }
else if ((strcmp(argv[
i],
"-v") == 0) || (strcmp(argv[
i],
"--version") == 0)) {
402 }
else if (strcmp(argv[
i],
"-l") == 0 &&
i + 1 < argc) {
405 }
else if (strcmp(argv[
i],
"--tessdata-dir") == 0 &&
i + 1 < argc) {
406 *datapath = argv[
i + 1];
408 }
else if (strcmp(argv[
i],
"--dpi") == 0 &&
i + 1 < argc) {
409 *dpi = atoi(argv[
i + 1]);
411 }
else if (strcmp(argv[
i],
"--loglevel") == 0 &&
i + 1 < argc) {
413 const std::string loglevel_string = argv[++
i];
414 static const std::map<const std::string, int> loglevels {
425 auto loglevel = loglevels.at(loglevel_string);
427 }
catch (
const std::out_of_range &e) {
429 tprintf(
"Error, unsupported --loglevel %s\n", loglevel_string.c_str());
432 }
else if (strcmp(argv[
i],
"--user-words") == 0 &&
i + 1 < argc) {
433 vars_vec->push_back(
"user_words_file");
434 vars_values->push_back(argv[
i + 1]);
436 }
else if (strcmp(argv[
i],
"--user-patterns") == 0 &&
i + 1 < argc) {
437 vars_vec->push_back(
"user_patterns_file");
438 vars_values->push_back(argv[
i + 1]);
440 }
else if (strcmp(argv[
i],
"--list-langs") == 0) {
443 }
else if (strcmp(argv[
i],
"--psm") == 0 &&
i + 1 < argc) {
449 }
else if (strcmp(argv[
i],
"--oem") == 0 &&
i + 1 < argc) {
450#ifndef DISABLED_LEGACY_ENGINE
451 int oem = atoi(argv[
i + 1]);
458 }
else if (strcmp(argv[
i],
"--print-parameters") == 0) {
460 *print_parameters =
true;
461#ifndef DISABLED_LEGACY_ENGINE
462 }
else if (strcmp(argv[
i],
"--print-fonts-table") == 0) {
464 *print_fonts_table =
true;
466 }
else if (strcmp(argv[
i],
"-c") == 0 &&
i + 1 < argc) {
469 }
else if (*image ==
nullptr) {
473 fprintf(stderr,
"Error, unknown command line argument '%s'\n", argv[
i]);
482 if (*lang !=
nullptr && strcmp(*lang,
"osd")) {
485 fprintf(stderr,
"Warning, detects only orientation with -l %s\n", *lang);
492 if (*outputbase ==
nullptr && noocr ==
false) {
493 PrintHelpMessage(argv[0]);
501 std::vector<std::unique_ptr<TessResultRenderer>> &renderers,
504#ifndef DISABLED_LEGACY_ENGINE
505 renderers.push_back(std::make_unique<tesseract::TessOsdRenderer>(outputbase));
514 auto renderer = std::make_unique<tesseract::TessHOcrRenderer>(outputbase, font_info);
515 if (renderer->happy()) {
516 renderers.push_back(std::move(renderer));
518 tprintf(
"Error, could not create hOCR output file: %s\n", strerror(errno));
525 auto renderer = std::make_unique<tesseract::TessAltoRenderer>(outputbase);
526 if (renderer->happy()) {
527 renderers.push_back(std::move(renderer));
529 tprintf(
"Error, could not create ALTO output file: %s\n", strerror(errno));
538 auto renderer = std::make_unique<tesseract::TessTsvRenderer>(outputbase, font_info);
539 if (renderer->happy()) {
540 renderers.push_back(std::move(renderer));
542 tprintf(
"Error, could not create TSV output file: %s\n", strerror(errno));
550 if (_setmode(_fileno(stdout), _O_BINARY) == -1)
551 tprintf(
"ERROR: cin to binary: %s", strerror(errno));
555 auto renderer = std::make_unique<tesseract::TessPDFRenderer>(outputbase, api.
GetDatapath(), textonly);
556 if (renderer->happy()) {
557 renderers.push_back(std::move(renderer));
559 tprintf(
"Error, could not create PDF output file: %s\n", strerror(errno));
567 auto renderer = std::make_unique<tesseract::TessUnlvRenderer>(outputbase);
568 if (renderer->happy()) {
569 renderers.push_back(std::move(renderer));
571 tprintf(
"Error, could not create UNLV output file: %s\n", strerror(errno));
578 auto renderer = std::make_unique<tesseract::TessLSTMBoxRenderer>(outputbase);
579 if (renderer->happy()) {
580 renderers.push_back(std::move(renderer));
582 tprintf(
"Error, could not create LSTM BOX output file: %s\n", strerror(errno));
589 auto renderer = std::make_unique<tesseract::TessBoxTextRenderer>(outputbase);
590 if (renderer->happy()) {
591 renderers.push_back(std::move(renderer));
593 tprintf(
"Error, could not create BOX output file: %s\n", strerror(errno));
600 auto renderer = std::make_unique<tesseract::TessWordStrBoxRenderer>(outputbase);
601 if (renderer->happy()) {
602 renderers.push_back(std::move(renderer));
604 tprintf(
"Error, could not create WordStr BOX output file: %s\n", strerror(errno));
610 if (b || (!error && renderers.empty())) {
614 auto renderer = std::make_unique<tesseract::TessTextRenderer>(outputbase);
615 if (renderer->happy()) {
616 renderers.push_back(std::move(renderer));
618 tprintf(
"Error, could not create TXT output file: %s\n", strerror(errno));
625 for (
size_t r = 1; r < renderers.size(); ++r) {
626 renderers[0]->insert(renderers[r].get());
627 renderers[r].release();
636int main(
int argc,
char **argv) {
637#if defined(__USE_GNU) && defined(HAVE_FEENABLEEXCEPT)
639# if defined(__clang__)
641 feenableexcept(FE_DIVBYZERO);
643 feenableexcept(FE_DIVBYZERO | FE_OVERFLOW | FE_INVALID);
646 const char *lang =
nullptr;
647 const char *image =
nullptr;
648 const char *outputbase =
nullptr;
649 const char *datapath =
nullptr;
650 bool list_langs =
false;
651 bool print_parameters =
false;
652 bool print_fonts_table =
false;
656#ifdef DISABLED_LEGACY_ENGINE
661 std::vector<std::string> vars_vec;
662 std::vector<std::string> vars_values;
664 if (std::getenv(
"LEPT_MSG_SEVERITY")) {
666 setMsgSeverity(L_SEVERITY_EXTERNAL);
669 setMsgSeverity(L_SEVERITY_ERROR);
672#if defined(HAVE_TIFFIO_H) && defined(_WIN32)
674 TIFFSetErrorHandler(Win32ErrorHandler);
675 TIFFSetWarningHandler(Win32WarningHandler);
678 if (!ParseArgs(argc, argv, &lang, &image, &outputbase, &datapath, &dpi, &list_langs,
679 &print_parameters, &print_fonts_table, &vars_vec, &vars_values, &arg_i,
680 &pagesegmode, &enginemode)) {
684 bool in_recognition_mode = !list_langs && !print_parameters && !print_fonts_table;
686 if (lang ==
nullptr && in_recognition_mode) {
691 if (image ==
nullptr && in_recognition_mode) {
704 const int init_failed = api.
Init(datapath, lang, enginemode, &(argv[arg_i]), argc - arg_i,
705 &vars_vec, &vars_values,
false);
707 if (!SetVariablesFromCLArgs(api, argc, argv)) {
720 fprintf(stderr,
"Could not initialize tesseract.\n");
724 if (print_parameters) {
726 fprintf(stdout,
"Tesseract parameters:\n");
732#ifndef DISABLED_LEGACY_ENGINE
733 if (print_fonts_table) {
735 fprintf(stdout,
"Tesseract fonts table:\n");
742 FixPageSegMode(api, pagesegmode);
745 auto dpi_string = std::to_string(dpi);
746 api.
SetVariable(
"user_defined_dpi", dpi_string.c_str());
749 int ret_val = EXIT_SUCCESS;
752 Pix *pixs = pixRead(image);
754 fprintf(stderr,
"Leptonica can't process input file: %s\n", image);
765 const std::unique_ptr<const tesseract::PageIterator> it(api.
AnalyseLayout());
769 it->Orientation(&orientation, &direction, &order, &deskew_angle);
771 "Orientation: %d\nWritingDirection: %d\nTextlineOrder: %d\n"
772 "Deskew angle: %.4f\n",
773 orientation, direction, order, deskew_angle);
775 ret_val = EXIT_FAILURE;
786 bool in_training_mode = (api.
GetBoolVariable(
"tessedit_ambigs_training", &b) && b) ||
793 fprintf(stderr,
"Error, OSD requires a model for the legacy engine\n");
797#ifdef DISABLED_LEGACY_ENGINE
799 auto osd_warning = std::string(
"");
801 const char *disabled_osd_msg =
802 "\nERROR: The page segmentation mode 0 (OSD Only) is currently "
804 fprintf(stderr,
"%s", disabled_osd_msg);
809 "\nWarning: The page segmentation mode 1 (Auto+OSD) is currently "
811 "Using PSM 3 (Auto) instead.\n\n";
815 "\nWarning: The page segmentation mode 12 (Sparse text + OSD) is "
816 "currently disabled. "
817 "Using PSM 11 (Sparse text) instead.\n\n";
821 std::vector<std::unique_ptr<TessResultRenderer>> renderers;
823 if (in_training_mode) {
824 renderers.push_back(
nullptr);
825 }
else if (outputbase !=
nullptr) {
826 PreloadRenderers(api, renderers, pagesegmode, outputbase);
829 if (!renderers.empty()) {
830#ifdef DISABLED_LEGACY_ENGINE
831 if (!osd_warning.empty()) {
832 fprintf(stderr,
"%s", osd_warning.c_str());
835 bool succeed = api.
ProcessPages(image,
nullptr, 0, renderers[0].get());
837 fprintf(stderr,
"Error during processing.\n");
838 ret_val = EXIT_FAILURE;
int main(int argc, char **argv)
@ PSM_OSD_ONLY
Orientation and script detection only.
@ PSM_COUNT
Number of enum entries.
@ PSM_SPARSE_TEXT
Find as much text as possible in no particular order.
@ PSM_AUTO_ONLY
Automatic page segmentation, but no OSD, or OCR.
@ PSM_AUTO
Fully automatic page segmentation, but no OSD.
@ PSM_SINGLE_BLOCK
Assume a single uniform block of text. (Default.)
@ PSM_SPARSE_TEXT_OSD
Sparse text with orientation and script det.
void tprintf(const char *format,...)
void SetPageSegMode(PageSegMode mode)
const char * GetDatapath()
bool SetVariable(const char *name, const char *value)
static const char * Version()
void GetAvailableLanguagesAsVector(std::vector< std::string > *langs) const
bool ProcessPages(const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
PageSegMode GetPageSegMode() const
int Init(const char *datapath, const char *language, OcrEngineMode mode, char **configs, int configs_size, const std::vector< std::string > *vars_vec, const std::vector< std::string > *vars_values, bool set_only_non_debug_params)
void PrintVariables(FILE *fp) const
void SetImage(const unsigned char *imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line)
Tesseract * tesseract() const
PageIterator * AnalyseLayout()
void PrintFontsTable(FILE *fp) const
bool GetBoolVariable(const char *name, bool *value) const
void SetOutputName(const char *name)
static bool IsNEONAvailable()
static bool IsAVX512BWAvailable()
static bool IsFMAAvailable()
static bool IsAVXAvailable()
static bool IsAVX512VNNIAvailable()
static bool IsAVX512FAvailable()
static bool IsSSEAvailable()
static bool IsAVX2Available()
static TESS_API void Update()
static DawgCache * GlobalDawgCache()