tesseract  4.0.0-beta.1-59-g2cc4
tesseractmain.cpp File Reference
#include <iostream>
#include "allheaders.h"
#include "baseapi.h"
#include "basedir.h"
#include "dict.h"
#include "openclwrapper.h"
#include "osdetect.h"
#include "renderer.h"
#include "simddetect.h"
#include "strngs.h"
#include "tprintf.h"

Go to the source code of this file.

Functions

int main (int argc, char **argv)
 

Function Documentation

◆ main()

int main ( int  argc,
char **  argv 
)

This program reads in a text file consisting of feature samples from a training page in the following format:

   FontName UTF8-char-str xmin ymin xmax ymax page-number
    NumberOfFeatureTypes(N)
      FeatureTypeName1 NumberOfFeatures(M)
         Feature1
         ...
         FeatureM
      FeatureTypeName2 NumberOfFeatures(M)
         Feature1
         ...
         FeatureM
      ...
      FeatureTypeNameN NumberOfFeatures(M)
         Feature1
         ...
         FeatureM
   FontName CharName ...

The result of this program is a binary inttemp file used by the OCR engine.

Parameters
argcnumber of command line arguments
argvarray of command line arguments
Returns
none
Note
Exceptions: none
History: Fri Aug 18 08:56:17 1989, DSJ, Created.
History: Mon May 18 1998, Christy Russson, Revistion started.

Definition at line 422 of file tesseractmain.cpp.

422  {
423  const char* lang = "eng";
424  const char* image = NULL;
425  const char* outputbase = NULL;
426  const char* datapath = NULL;
427  bool list_langs = false;
428  bool print_parameters = false;
429  int arg_i = 1;
432  /* main() calls functions like ParseArgs which call exit().
433  * This results in memory leaks if vars_vec and vars_values are
434  * declared as auto variables (destructor is not called then). */
435  static GenericVector<STRING> vars_vec;
436  static GenericVector<STRING> vars_values;
437 
438 #if !defined(DEBUG)
439  // Disable debugging and informational messages from Leptonica.
440  setMsgSeverity(L_SEVERITY_ERROR);
441 #endif
442 
443 #if defined(HAVE_TIFFIO_H) && defined(_WIN32)
444  /* Show libtiff warnings on console (not in GUI). */
445  TIFFSetWarningHandler(Win32WarningHandler);
446 #endif /* HAVE_TIFFIO_H && _WIN32 */
447 
448  ParseArgs(argc, argv, &lang, &image, &outputbase, &datapath, &list_langs,
449  &print_parameters, &vars_vec, &vars_values, &arg_i, &pagesegmode,
450  &enginemode);
451 
452  bool banner = false;
453  if (outputbase != NULL && strcmp(outputbase, "-") &&
454  strcmp(outputbase, "stdout")) {
455  banner = true;
456  }
457 
458  PERF_COUNT_START("Tesseract:main")
459 
460  // Call GlobalDawgCache here to create the global DawgCache object before
461  // the TessBaseAPI object. This fixes the order of destructor calls:
462  // first TessBaseAPI must be destructed, DawgCache must be the last object.
463  tesseract::Dict::GlobalDawgCache();
464 
465  // Avoid memory leak caused by auto variable when return is called.
466  static tesseract::TessBaseAPI api;
467 
468  api.SetOutputName(outputbase);
469 
470  int init_failed = api.Init(datapath, lang, enginemode, &(argv[arg_i]),
471  argc - arg_i, &vars_vec, &vars_values, false);
472 
473  SetVariablesFromCLArgs(&api, argc, argv);
474 
475  if (list_langs) {
476  PrintLangsList(&api);
477  return EXIT_SUCCESS;
478  }
479 
480  if (init_failed) {
481  fprintf(stderr, "Could not initialize tesseract.\n");
482  return EXIT_FAILURE;
483  }
484 
485  if (print_parameters) {
486  FILE* fout = stdout;
487  fprintf(stdout, "Tesseract parameters:\n");
488  api.PrintVariables(fout);
489  api.End();
490  return EXIT_SUCCESS;
491  }
492 
493  FixPageSegMode(&api, pagesegmode);
494 
495  if (pagesegmode == tesseract::PSM_AUTO_ONLY) {
496  int ret_val = EXIT_SUCCESS;
497 
498  Pix* pixs = pixRead(image);
499  if (!pixs) {
500  fprintf(stderr, "Cannot open input file: %s\n", image);
501  return 2;
502  }
503 
504  api.SetImage(pixs);
505 
506  tesseract::Orientation orientation;
509  float deskew_angle;
510 
511  tesseract::PageIterator* it = api.AnalyseLayout();
512  if (it) {
513  it->Orientation(&orientation, &direction, &order, &deskew_angle);
514  tprintf(
515  "Orientation: %d\nWritingDirection: %d\nTextlineOrder: %d\n"
516  "Deskew angle: %.4f\n",
517  orientation, direction, order, deskew_angle);
518  } else {
519  ret_val = EXIT_FAILURE;
520  }
521 
522  delete it;
523 
524  pixDestroy(&pixs);
525  return ret_val;
526  }
527 
528  // set in_training_mode to true when using one of these configs:
529  // ambigs.train, box.train, box.train.stderr, linebox, rebox
530  bool b = false;
531  bool in_training_mode =
532  (api.GetBoolVariable("tessedit_ambigs_training", &b) && b) ||
533  (api.GetBoolVariable("tessedit_resegment_from_boxes", &b) && b) ||
534  (api.GetBoolVariable("tessedit_make_boxes_from_boxes", &b) && b);
535 
536  // Avoid memory leak caused by auto variable when exit() is called.
538 
539  if (in_training_mode) {
540  renderers.push_back(NULL);
541  } else {
542  PreloadRenderers(&api, &renderers, pagesegmode, outputbase);
543  }
544 
545  if (!renderers.empty()) {
546  if (banner) PrintBanner();
547  bool succeed = api.ProcessPages(image, NULL, 0, renderers[0]);
548  if (!succeed) {
549  fprintf(stderr, "Error during processing.\n");
550  return EXIT_FAILURE;
551  }
552  }
553 
555 
556  return EXIT_SUCCESS;
557 }
Fully automatic page segmentation, but no OSD.
Definition: publictypes.h:168
bool empty() const
Definition: genericvector.h:91
#define PERF_COUNT_START(FUNCT_NAME)
struct TessBaseAPI TessBaseAPI
Definition: capi.h:83
void Orientation(tesseract::Orientation *orientation, tesseract::WritingDirection *writing_direction, tesseract::TextlineOrder *textline_order, float *deskew_angle) const
int direction(EDGEPT *point)
Definition: vecfuncs.cpp:43
int push_back(T * object)
#define tprintf(...)
Definition: tprintf.h:31
#define PERF_COUNT_END
Automatic page segmentation, but no OSD, or OCR.
Definition: publictypes.h:167