All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
tesseract::TessBaseAPI Class Reference

#include <baseapi.h>

Public Member Functions

 TessBaseAPI ()
 
virtual ~TessBaseAPI ()
 
void SetInputName (const char *name)
 
const char * GetInputName ()
 
void SetInputImage (Pix *pix)
 
Pix * GetInputImage ()
 
int GetSourceYResolution ()
 
const char * GetDatapath ()
 
void SetOutputName (const char *name)
 
bool SetVariable (const char *name, const char *value)
 
bool SetDebugVariable (const char *name, const char *value)
 
bool GetIntVariable (const char *name, int *value) const
 
bool GetBoolVariable (const char *name, bool *value) const
 
bool GetDoubleVariable (const char *name, double *value) const
 
const char * GetStringVariable (const char *name) const
 
void PrintVariables (FILE *fp) const
 
bool GetVariableAsString (const char *name, STRING *val)
 
int Init (const char *datapath, const char *language, OcrEngineMode mode, char **configs, int configs_size, const GenericVector< STRING > *vars_vec, const GenericVector< STRING > *vars_values, bool set_only_non_debug_params)
 
int Init (const char *datapath, const char *language, OcrEngineMode oem)
 
int Init (const char *datapath, const char *language)
 
const char * GetInitLanguagesAsString () const
 
void GetLoadedLanguagesAsVector (GenericVector< STRING > *langs) const
 
void GetAvailableLanguagesAsVector (GenericVector< STRING > *langs) const
 
int InitLangMod (const char *datapath, const char *language)
 
void InitForAnalysePage ()
 
void ReadConfigFile (const char *filename)
 
void ReadDebugConfigFile (const char *filename)
 
void SetPageSegMode (PageSegMode mode)
 
PageSegMode GetPageSegMode () const
 
char * TesseractRect (const unsigned char *imagedata, int bytes_per_pixel, int bytes_per_line, int left, int top, int width, int height)
 
void ClearAdaptiveClassifier ()
 
void SetImage (const unsigned char *imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line)
 
void SetImage (Pix *pix)
 
void SetSourceResolution (int ppi)
 
void SetRectangle (int left, int top, int width, int height)
 
void SetThresholder (ImageThresholder *thresholder)
 
Pix * GetThresholdedImage ()
 
Boxa * GetRegions (Pixa **pixa)
 
Boxa * GetTextlines (const bool raw_image, const int raw_padding, Pixa **pixa, int **blockids, int **paraids)
 
Boxa * GetTextlines (Pixa **pixa, int **blockids)
 
Boxa * GetStrips (Pixa **pixa, int **blockids)
 
Boxa * GetWords (Pixa **pixa)
 
Boxa * GetConnectedComponents (Pixa **cc)
 
Boxa * GetComponentImages (const PageIteratorLevel level, const bool text_only, const bool raw_image, const int raw_padding, Pixa **pixa, int **blockids, int **paraids)
 
Boxa * GetComponentImages (const PageIteratorLevel level, const bool text_only, Pixa **pixa, int **blockids)
 
int GetThresholdedImageScaleFactor () const
 
void DumpPGM (const char *filename)
 
PageIteratorAnalyseLayout ()
 
PageIteratorAnalyseLayout (bool merge_similar_words)
 
int Recognize (ETEXT_DESC *monitor)
 
int RecognizeForChopTest (ETEXT_DESC *monitor)
 
bool ProcessPages (const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
 
bool ProcessPagesInternal (const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
 
bool ProcessPage (Pix *pix, int page_index, const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
 
ResultIteratorGetIterator ()
 
MutableIteratorGetMutableIterator ()
 
char * GetUTF8Text ()
 
char * GetHOCRText (int page_number)
 
char * GetBoxText (int page_number)
 
char * GetUNLVText ()
 
int MeanTextConf ()
 
int * AllWordConfidences ()
 
bool AdaptToWordStr (PageSegMode mode, const char *wordstr)
 
void Clear ()
 
void End ()
 
int IsValidWord (const char *word)
 
bool IsValidCharacter (const char *utf8_character)
 
bool GetTextDirection (int *out_offset, float *out_slope)
 
void SetDictFunc (DictFunc f)
 
void SetProbabilityInContextFunc (ProbabilityInContextFunc f)
 
void SetFillLatticeFunc (FillLatticeFunc f)
 
bool DetectOS (OSResults *)
 
void GetFeaturesForBlob (TBLOB *blob, INT_FEATURE_STRUCT *int_features, int *num_features, int *feature_outline_index)
 
void RunAdaptiveClassifier (TBLOB *blob, int num_max_matches, int *unichar_ids, float *ratings, int *num_matches_returned)
 
const char * GetUnichar (int unichar_id)
 
const DawgGetDawg (int i) const
 
int NumDawgs () const
 
Tesseract *const tesseract () const
 
OcrEngineMode const oem () const
 
void InitTruthCallback (TruthCallback *cb)
 
CubeRecoContextGetCubeRecoContext () const
 
void set_min_orientation_margin (double margin)
 
void GetBlockTextOrientations (int **block_orientation, bool **vertical_writing)
 
BLOCK_LIST * FindLinesCreateBlockList ()
 

Static Public Member Functions

static const char * Version ()
 
static size_t getOpenCLDevice (void **device)
 
static void CatchSignals ()
 
static void ClearPersistentCache ()
 
static ROWFindRowForBox (BLOCK_LIST *blocks, int left, int top, int right, int bottom)
 
static ROWMakeTessOCRRow (float baseline, float xheight, float descender, float ascender)
 
static TBLOBMakeTBLOB (Pix *pix)
 
static void NormalizeTBLOB (TBLOB *tblob, ROW *row, bool numeric_mode)
 
static void DeleteBlockList (BLOCK_LIST *block_list)
 

Protected Member Functions

TESS_LOCAL bool InternalSetImage ()
 
virtual TESS_LOCAL void Threshold (Pix **pix)
 
TESS_LOCAL int FindLines ()
 
void ClearResults ()
 
TESS_LOCAL LTRResultIteratorGetLTRIterator ()
 
TESS_LOCAL int TextLength (int *blob_count)
 
TESS_LOCAL void AdaptToCharacter (const char *unichar_repr, int length, float baseline, float xheight, float descender, float ascender)
 
TESS_LOCAL PAGE_RESRecognitionPass1 (BLOCK_LIST *block_list)
 
TESS_LOCAL PAGE_RESRecognitionPass2 (BLOCK_LIST *block_list, PAGE_RES *pass1_result)
 
TESS_LOCAL void DetectParagraphs (bool after_text_recognition)
 
TESS_LOCAL const PAGE_RESGetPageRes () const
 

Static Protected Member Functions

static TESS_LOCAL int TesseractExtractResult (char **text, int **lengths, float **costs, int **x0, int **y0, int **x1, int **y1, PAGE_RES *page_res)
 

Protected Attributes

Tesseracttesseract_
 The underlying data object. More...
 
Tesseractosd_tesseract_
 For orientation & script detection. More...
 
EquationDetectequ_detect_
 The equation detector. More...
 
ImageThresholderthresholder_
 Image thresholding module. More...
 
GenericVector< ParagraphModel * > * paragraph_models_
 
BLOCK_LIST * block_list_
 The page layout. More...
 
PAGE_RESpage_res_
 The page-level data. More...
 
STRINGinput_file_
 Name used by training code. More...
 
Pix * input_image_
 Image used for searchable PDF. More...
 
STRINGoutput_file_
 Name used by debug code. More...
 
STRINGdatapath_
 Current location of tessdata. More...
 
STRINGlanguage_
 Last initialized language. More...
 
OcrEngineMode last_oem_requested_
 Last ocr language mode requested. More...
 
bool recognition_done_
 page_res_ contains recognition data. More...
 
TruthCallbacktruth_cb_
 
int rect_left_
 
int rect_top_
 
int rect_width_
 
int rect_height_
 
int image_width_
 
int image_height_
 

Detailed Description

Base class for all tesseract APIs. Specific classes can add ability to work on different inputs or produce different outputs. This class is mostly an interface layer on top of the Tesseract instance class to hide the data types so that users of this class don't have to include any other Tesseract headers.

Definition at line 105 of file baseapi.h.

Constructor & Destructor Documentation

tesseract::TessBaseAPI::TessBaseAPI ( )

Definition at line 112 of file baseapi.cpp.

113  : tesseract_(NULL),
115  equ_detect_(NULL),
116  // Thresholder is initialized to NULL here, but will be set before use by:
117  // A constructor of a derived API, SetThresholder(), or
118  // created implicitly when used in InternalSetImage.
121  block_list_(NULL),
122  page_res_(NULL),
123  input_file_(NULL),
126  datapath_(NULL),
127  language_(NULL),
129  recognition_done_(false),
130  truth_cb_(NULL),
132  image_width_(0), image_height_(0) {
133 }
EquationDetect * equ_detect_
The equation detector.
Definition: baseapi.h:840
OcrEngineMode last_oem_requested_
Last ocr language mode requested.
Definition: baseapi.h:850
TruthCallback * truth_cb_
Definition: baseapi.h:852
GenericVector< ParagraphModel * > * paragraph_models_
Definition: baseapi.h:842
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:841
Pix * input_image_
Image used for searchable PDF.
Definition: baseapi.h:846
STRING * language_
Last initialized language.
Definition: baseapi.h:849
STRING * output_file_
Name used by debug code.
Definition: baseapi.h:847
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:844
STRING * datapath_
Current location of tessdata.
Definition: baseapi.h:848
Tesseract * osd_tesseract_
For orientation & script detection.
Definition: baseapi.h:839
BLOCK_LIST * block_list_
The page layout.
Definition: baseapi.h:843
#define NULL
Definition: host.h:144
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:833
STRING * input_file_
Name used by training code.
Definition: baseapi.h:845
bool recognition_done_
page_res_ contains recognition data.
Definition: baseapi.h:851
tesseract::TessBaseAPI::~TessBaseAPI ( )
virtual

Definition at line 135 of file baseapi.cpp.

135  {
136  End();
137 }

Member Function Documentation

void tesseract::TessBaseAPI::CatchSignals ( )
static

Writes the thresholded image to stderr as a PBM file on receipt of a SIGSEGV, SIGFPE, or SIGBUS signal. (Linux/Unix only).

Definition at line 182 of file baseapi.cpp.

182  {
183 #ifdef __linux__
184  struct sigaction action;
185  memset(&action, 0, sizeof(action));
186  action.sa_handler = &signal_exit;
187  action.sa_flags = SA_RESETHAND;
188  sigaction(SIGSEGV, &action, NULL);
189  sigaction(SIGFPE, &action, NULL);
190  sigaction(SIGBUS, &action, NULL);
191 #else
192  // Warn API users that an implementation is needed.
193  tprintf("CatchSignals has no non-linux implementation!\n");
194 #endif
195 }
#define tprintf(...)
Definition: tprintf.h:31
void signal_exit(int signal_code)
Definition: globaloc.cpp:52
#define NULL
Definition: host.h:144
void tesseract::TessBaseAPI::ClearAdaptiveClassifier ( )

Call between pages or documents etc to free up memory and forget adaptive data.

Definition at line 509 of file baseapi.cpp.

509  {
510  if (tesseract_ == NULL)
511  return;
514 }
#define NULL
Definition: host.h:144
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:833
void tesseract::TessBaseAPI::ClearResults ( )
protected

Delete the pageres and block list ready for a new page.

Delete the pageres and clear the block list ready for a new page.

Definition at line 2126 of file baseapi.cpp.

2126  {
2127  if (tesseract_ != NULL) {
2128  tesseract_->Clear();
2129  }
2130  if (page_res_ != NULL) {
2131  delete page_res_;
2132  page_res_ = NULL;
2133  }
2134  recognition_done_ = false;
2135  if (block_list_ == NULL)
2136  block_list_ = new BLOCK_LIST;
2137  else
2138  block_list_->clear();
2139  if (paragraph_models_ != NULL) {
2141  delete paragraph_models_;
2143  }
2144  SavePixForCrash(0, NULL);
2145 }
void SavePixForCrash(int resolution, Pix *pix)
Definition: globaloc.cpp:34
GenericVector< ParagraphModel * > * paragraph_models_
Definition: baseapi.h:842
void delete_data_pointers()
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:844
BLOCK_LIST * block_list_
The page layout.
Definition: baseapi.h:843
#define NULL
Definition: host.h:144
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:833
bool recognition_done_
page_res_ contains recognition data.
Definition: baseapi.h:851
int tesseract::TessBaseAPI::FindLines ( )
protected

Find lines from the image making the BLOCK_LIST.

Returns
0 on success.

Find lines from the image making the BLOCK_LIST.

Definition at line 2063 of file baseapi.cpp.

2063  {
2064  if (thresholder_ == NULL || thresholder_->IsEmpty()) {
2065  tprintf("Please call SetImage before attempting recognition.");
2066  return -1;
2067  }
2068  if (recognition_done_)
2069  ClearResults();
2070  if (!block_list_->empty()) {
2071  return 0;
2072  }
2073  if (tesseract_ == NULL) {
2074  tesseract_ = new Tesseract;
2076  }
2077  if (tesseract_->pix_binary() == NULL)
2079  if (tesseract_->ImageWidth() > MAX_INT16 ||
2081  tprintf("Image too large: (%d, %d)\n",
2083  return -1;
2084  }
2085 
2087 
2089  if (equ_detect_ == NULL && datapath_ != NULL) {
2090  equ_detect_ = new EquationDetect(datapath_->string(), NULL);
2091  }
2093  }
2094 
2095  Tesseract* osd_tess = osd_tesseract_;
2096  OSResults osr;
2097  if (PSM_OSD_ENABLED(tesseract_->tessedit_pageseg_mode) && osd_tess == NULL) {
2098  if (strcmp(language_->string(), "osd") == 0) {
2099  osd_tess = tesseract_;
2100  } else {
2101  osd_tesseract_ = new Tesseract;
2104  NULL, 0, NULL, NULL, false) == 0) {
2105  osd_tess = osd_tesseract_;
2108  } else {
2109  tprintf("Warning: Auto orientation and script detection requested,"
2110  " but osd language failed to load\n");
2111  delete osd_tesseract_;
2112  osd_tesseract_ = NULL;
2113  }
2114  }
2115  }
2116 
2117  if (tesseract_->SegmentPage(input_file_, block_list_, osd_tess, &osr) < 0)
2118  return -1;
2119  // If Devanagari is being recognized, we use different images for page seg
2120  // and for OCR.
2121  tesseract_->PrepareForTessOCR(block_list_, osd_tess, &osr);
2122  return 0;
2123 }
EquationDetect * equ_detect_
The equation detector.
Definition: baseapi.h:840
#define tprintf(...)
Definition: tprintf.h:31
int ImageHeight() const
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:841
void set_source_resolution(int ppi)
STRING * language_
Last initialized language.
Definition: baseapi.h:849
int SegmentPage(const STRING *input_file, BLOCK_LIST *blocks, Tesseract *osd_tess, OSResults *osr)
int ImageWidth() const
void PrepareForTessOCR(BLOCK_LIST *block_list, Tesseract *osd_tess, OSResults *osr)
int GetSourceYResolution() const
Definition: thresholder.h:90
virtual TESS_LOCAL void Threshold(Pix **pix)
Definition: baseapi.cpp:2022
STRING * datapath_
Current location of tessdata.
Definition: baseapi.h:848
bool IsEmpty() const
Return true if no image has been set.
Definition: thresholder.cpp:50
int init_tesseract(const char *arg0, const char *textbase, const char *language, OcrEngineMode oem, char **configs, int configs_size, const GenericVector< STRING > *vars_vec, const GenericVector< STRING > *vars_values, bool set_only_init_params)
Definition: tessedit.cpp:285
Tesseract * osd_tesseract_
For orientation & script detection.
Definition: baseapi.h:839
void SetEquationDetect(EquationDetect *detector)
BLOCK_LIST * block_list_
The page layout.
Definition: baseapi.h:843
#define MAX_INT16
Definition: host.h:119
#define NULL
Definition: host.h:144
void InitAdaptiveClassifier(bool load_pre_trained_templates)
Definition: adaptmatch.cpp:527
bool PSM_OSD_ENABLED(int pageseg_mode)
Definition: publictypes.h:179
Pix * pix_binary() const
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:833
const char * string() const
Definition: strngs.cpp:193
STRING * input_file_
Name used by training code.
Definition: baseapi.h:845
bool recognition_done_
page_res_ contains recognition data.
Definition: baseapi.h:851
void tesseract::TessBaseAPI::GetAvailableLanguagesAsVector ( GenericVector< STRING > *  langs) const

Returns the available languages in the vector of STRINGs.

Definition at line 370 of file baseapi.cpp.

371  {
372  langs->clear();
373  if (tesseract_ != NULL) {
374 #ifdef _WIN32
375  STRING pattern = tesseract_->datadir + "/*." + kTrainedDataSuffix;
376  char fname[_MAX_FNAME];
377  WIN32_FIND_DATA data;
378  BOOL result = TRUE;
379  HANDLE handle = FindFirstFile(pattern.string(), &data);
380  if (handle != INVALID_HANDLE_VALUE) {
381  for (; result; result = FindNextFile(handle, &data)) {
382  _splitpath(data.cFileName, NULL, NULL, fname, NULL);
383  langs->push_back(STRING(fname));
384  }
385  FindClose(handle);
386  }
387 #else // _WIN32
388  DIR *dir;
389  struct dirent *dirent;
390  char *dot;
391 
392  STRING extension = STRING(".") + kTrainedDataSuffix;
393 
394  dir = opendir(tesseract_->datadir.string());
395  if (dir != NULL) {
396  while ((dirent = readdir(dir))) {
397  // Skip '.', '..', and hidden files
398  if (dirent->d_name[0] != '.') {
399  if (strstr(dirent->d_name, extension.string()) != NULL) {
400  dot = strrchr(dirent->d_name, '.');
401  // This ensures that .traineddata is at the end of the file name
402  if (strncmp(dot, extension.string(),
403  strlen(extension.string())) == 0) {
404  *dot = '\0';
405  langs->push_back(STRING(dirent->d_name));
406  }
407  }
408  }
409  }
410  closedir(dir);
411  }
412 #endif
413  }
414 }
#define BOOL
Definition: capi.h:27
int push_back(T object)
#define DIR
Definition: polyaprx.cpp:39
STRING datadir
Definition: ccutil.h:67
#define TRUE
Definition: capi.h:28
Definition: strngs.h:44
#define NULL
Definition: host.h:144
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:833
const char * string() const
Definition: strngs.cpp:193
bool tesseract::TessBaseAPI::GetBoolVariable ( const char *  name,
bool *  value 
) const

Definition at line 236 of file baseapi.cpp.

236  {
237  BoolParam *p = ParamUtils::FindParam<BoolParam>(
239  if (p == NULL) return false;
240  *value = (BOOL8)(*p);
241  return true;
242 }
unsigned char BOOL8
Definition: host.h:113
name_table name
GenericVector< BoolParam * > bool_params
Definition: params.h:45
ParamsVectors * params()
Definition: ccutil.h:65
tesseract::ParamsVectors * GlobalParams()
Definition: params.cpp:33
#define NULL
Definition: host.h:144
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:833
const char * tesseract::TessBaseAPI::GetDatapath ( )

Definition at line 954 of file baseapi.cpp.

954  {
955  return tesseract_->datadir.c_str();
956 }
STRING datadir
Definition: ccutil.h:67
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:833
const char * c_str() const
Definition: strngs.cpp:204
bool tesseract::TessBaseAPI::GetDoubleVariable ( const char *  name,
double *  value 
) const

Definition at line 250 of file baseapi.cpp.

250  {
251  DoubleParam *p = ParamUtils::FindParam<DoubleParam>(
253  if (p == NULL) return false;
254  *value = (double)(*p);
255  return true;
256 }
name_table name
ParamsVectors * params()
Definition: ccutil.h:65
GenericVector< DoubleParam * > double_params
Definition: params.h:47
tesseract::ParamsVectors * GlobalParams()
Definition: params.cpp:33
#define NULL
Definition: host.h:144
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:833
const char * tesseract::TessBaseAPI::GetInitLanguagesAsString ( ) const

Returns the languages string used in the last valid initialization. If the last initialization specified "deu+hin" then that will be returned. If hin loaded eng automatically as well, then that will not be included in this list. To find the languages actually loaded use GetLoadedLanguagesAsVector. The returned string should NOT be deleted.

Definition at line 346 of file baseapi.cpp.

346  {
347  return (language_ == NULL || language_->string() == NULL) ?
348  "" : language_->string();
349 }
STRING * language_
Last initialized language.
Definition: baseapi.h:849
#define NULL
Definition: host.h:144
const char * string() const
Definition: strngs.cpp:193
Pix * tesseract::TessBaseAPI::GetInputImage ( )

Definition at line 944 of file baseapi.cpp.

944  {
945  return input_image_;
946 }
Pix * input_image_
Image used for searchable PDF.
Definition: baseapi.h:846
const char * tesseract::TessBaseAPI::GetInputName ( )

These functions are required for searchable PDF output. We need our hands on the input file so that we can include it in the PDF without transcoding. If that is not possible, we need the original image. Finally, resolution metadata is stored in the PDF so we need that as well.

Definition at line 948 of file baseapi.cpp.

948  {
949  if (input_file_)
950  return input_file_->c_str();
951  return NULL;
952 }
#define NULL
Definition: host.h:144
STRING * input_file_
Name used by training code.
Definition: baseapi.h:845
const char * c_str() const
Definition: strngs.cpp:204
bool tesseract::TessBaseAPI::GetIntVariable ( const char *  name,
int *  value 
) const

Returns true if the parameter was found among Tesseract parameters. Fills in value with the value of the parameter.

Definition at line 228 of file baseapi.cpp.

228  {
229  IntParam *p = ParamUtils::FindParam<IntParam>(
231  if (p == NULL) return false;
232  *value = (inT32)(*p);
233  return true;
234 }
name_table name
GenericVector< IntParam * > int_params
Definition: params.h:44
ParamsVectors * params()
Definition: ccutil.h:65
tesseract::ParamsVectors * GlobalParams()
Definition: params.cpp:33
#define NULL
Definition: host.h:144
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:833
int inT32
Definition: host.h:102
void tesseract::TessBaseAPI::GetLoadedLanguagesAsVector ( GenericVector< STRING > *  langs) const

Returns the loaded languages in the vector of STRINGs. Includes all languages loaded by the last Init, including those loaded as dependencies of other loaded languages.

Definition at line 356 of file baseapi.cpp.

357  {
358  langs->clear();
359  if (tesseract_ != NULL) {
360  langs->push_back(tesseract_->lang);
361  int num_subs = tesseract_->num_sub_langs();
362  for (int i = 0; i < num_subs; ++i)
363  langs->push_back(tesseract_->get_sub_lang(i)->lang);
364  }
365 }
int push_back(T object)
Tesseract * get_sub_lang(int index) const
int num_sub_langs() const
STRING lang
Definition: ccutil.h:69
#define NULL
Definition: host.h:144
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:833
LTRResultIterator * tesseract::TessBaseAPI::GetLTRIterator ( )
protected

Return an LTR Result Iterator – used only for training, as we really want to ignore all BiDi smarts at that point. delete once you're done with it.

Get a left-to-right iterator to the results of LayoutAnalysis and/or Recognize. The returned iterator must be deleted after use.

Definition at line 1254 of file baseapi.cpp.

1254  {
1255  if (tesseract_ == NULL || page_res_ == NULL)
1256  return NULL;
1257  return new LTRResultIterator(
1261 }
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:841
int GetScaledYResolution() const
Definition: thresholder.h:93
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:844
#define NULL
Definition: host.h:144
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:833
size_t tesseract::TessBaseAPI::getOpenCLDevice ( void **  data)
static

If compiled with OpenCL AND an available OpenCL device is deemed faster than serial code, then "device" is populated with the cl_device_id and returns sizeof(cl_device_id) otherwise *device=NULL and returns 0.

Definition at line 162 of file baseapi.cpp.

162  {
163 #ifdef USE_OPENCL
164 #if USE_DEVICE_SELECTION
165  ds_device device = OpenclDevice::getDeviceSelection();
166  if (device.type == DS_DEVICE_OPENCL_DEVICE) {
167  *data = reinterpret_cast<void*>(new cl_device_id);
168  memcpy(*data, &device.oclDeviceID, sizeof(cl_device_id));
169  return sizeof(cl_device_id);
170  }
171 #endif
172 #endif
173 
174  *data = NULL;
175  return 0;
176 }
#define NULL
Definition: host.h:144
PageSegMode tesseract::TessBaseAPI::GetPageSegMode ( ) const

Return the current page segmentation mode.

Definition at line 467 of file baseapi.cpp.

467  {
468  if (tesseract_ == NULL)
469  return PSM_SINGLE_BLOCK;
470  return static_cast<PageSegMode>(
471  static_cast<int>(tesseract_->tessedit_pageseg_mode));
472 }
Assume a single uniform block of text. (Default.)
Definition: publictypes.h:160
#define NULL
Definition: host.h:144
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:833
int tesseract::TessBaseAPI::GetSourceYResolution ( )

Definition at line 958 of file baseapi.cpp.

958  {
960 }
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:841
int GetSourceYResolution() const
Definition: thresholder.h:90
const char * tesseract::TessBaseAPI::GetStringVariable ( const char *  name) const

Returns the pointer to the string that represents the value of the parameter if it was found among Tesseract parameters.

Definition at line 244 of file baseapi.cpp.

244  {
245  StringParam *p = ParamUtils::FindParam<StringParam>(
247  return (p != NULL) ? p->string() : NULL;
248 }
name_table name
ParamsVectors * params()
Definition: ccutil.h:65
tesseract::ParamsVectors * GlobalParams()
Definition: params.cpp:33
#define NULL
Definition: host.h:144
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:833
GenericVector< StringParam * > string_params
Definition: params.h:46
bool tesseract::TessBaseAPI::GetVariableAsString ( const char *  name,
STRING val 
)

Get value of named variable as a string, if it exists.

Definition at line 259 of file baseapi.cpp.

259  {
261 }
static bool GetParamAsString(const char *name, const ParamsVectors *member_params, STRING *value)
Definition: params.cpp:142
name_table name
ParamsVectors * params()
Definition: ccutil.h:65
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:833
int tesseract::TessBaseAPI::Init ( const char *  datapath,
const char *  language,
OcrEngineMode  oem,
char **  configs,
int  configs_size,
const GenericVector< STRING > *  vars_vec,
const GenericVector< STRING > *  vars_values,
bool  set_only_non_debug_params 
)

Instances are now mostly thread-safe and totally independent, but some global parameters remain. Basically it is safe to use multiple TessBaseAPIs in different threads in parallel, UNLESS: you use SetVariable on some of the Params in classify and textord. If you do, then the effect will be to change it for all your instances.

Start tesseract. Returns zero on success and -1 on failure. NOTE that the only members that may be called before Init are those listed above here in the class definition.

The datapath must be the name of the parent directory of tessdata and must end in / . Any name after the last / will be stripped. The language is (usually) an ISO 639-3 string or NULL will default to eng. It is entirely safe (and eventually will be efficient too) to call Init multiple times on the same instance to change language, or just to reset the classifier. The language may be a string of the form [~]<lang>[+[~]<lang>]* indicating that multiple languages are to be loaded. Eg hin+eng will load Hindi and English. Languages may specify internally that they want to be loaded with one or more other languages, so the ~ sign is available to override that. Eg if hin were set to load eng by default, then hin+~eng would force loading only hin. The number of loaded languages is limited only by memory, with the caveat that loading additional languages will impact both speed and accuracy, as there is more work to do to decide on the applicable language, and there is more chance of hallucinating incorrect words. WARNING: On changing languages, all Tesseract parameters are reset back to their default values. (Which may vary between languages.) If you have a rare need to set a Variable that controls initialization for a second call to Init you should explicitly call End() and then use SetVariable before Init. This is only a very rare use case, since there are very few uses that require any parameters to be set before Init.

If set_only_non_debug_params is true, only params that do not contain "debug" in the name will be set.

The datapath must be the name of the data directory (no ending /) or some other file in which the data directory resides (for instance argv[0].) The language is (usually) an ISO 639-3 string or NULL will default to eng. If numeric_mode is true, then only digits and Roman numerals will be returned.

Returns
: 0 on success and -1 on initialization failure.

Definition at line 276 of file baseapi.cpp.

280  {
281  PERF_COUNT_START("TessBaseAPI::Init")
282  // Default language is "eng".
283  if (language == NULL) language = "eng";
284  // If the datapath, OcrEngineMode or the language have changed - start again.
285  // Note that the language_ field stores the last requested language that was
286  // initialized successfully, while tesseract_->lang stores the language
287  // actually used. They differ only if the requested language was NULL, in
288  // which case tesseract_->lang is set to the Tesseract default ("eng").
289  if (tesseract_ != NULL &&
290  (datapath_ == NULL || language_ == NULL ||
291  *datapath_ != datapath || last_oem_requested_ != oem ||
292  (*language_ != language && tesseract_->lang != language))) {
293  delete tesseract_;
294  tesseract_ = NULL;
295  }
296  // PERF_COUNT_SUB("delete tesseract_")
297 #ifdef USE_OPENCL
298  OpenclDevice od;
299  od.InitEnv();
300 #endif
301  PERF_COUNT_SUB("OD::InitEnv()")
302  bool reset_classifier = true;
303  if (tesseract_ == NULL) {
304  reset_classifier = false;
305  tesseract_ = new Tesseract;
307  datapath, output_file_ != NULL ? output_file_->string() : NULL,
308  language, oem, configs, configs_size, vars_vec, vars_values,
309  set_only_non_debug_params) != 0) {
310  return -1;
311  }
312  }
313  PERF_COUNT_SUB("update tesseract_")
314  // Update datapath and language requested for the last valid initialization.
315  if (datapath_ == NULL)
316  datapath_ = new STRING(datapath);
317  else
318  *datapath_ = datapath;
319  if ((strcmp(datapath_->string(), "") == 0) &&
320  (strcmp(tesseract_->datadir.string(), "") != 0))
321  *datapath_ = tesseract_->datadir;
322 
323  if (language_ == NULL)
324  language_ = new STRING(language);
325  else
326  *language_ = language;
328  // PERF_COUNT_SUB("update last_oem_requested_")
329  // For same language and datapath, just reset the adaptive classifier.
330  if (reset_classifier) {
332  PERF_COUNT_SUB("tesseract_->ResetAdaptiveClassifier()")
333  }
335  return 0;
336 }
OcrEngineMode last_oem_requested_
Last ocr language mode requested.
Definition: baseapi.h:850
#define PERF_COUNT_SUB(SUB)
STRING * language_
Last initialized language.
Definition: baseapi.h:849
STRING * output_file_
Name used by debug code.
Definition: baseapi.h:847
STRING * datapath_
Current location of tessdata.
Definition: baseapi.h:848
#define PERF_COUNT_START(FUNCT_NAME)
int init_tesseract(const char *arg0, const char *textbase, const char *language, OcrEngineMode oem, char **configs, int configs_size, const GenericVector< STRING > *vars_vec, const GenericVector< STRING > *vars_values, bool set_only_init_params)
Definition: tessedit.cpp:285
#define PERF_COUNT_END
Definition: strngs.h:44
#define NULL
Definition: host.h:144
OcrEngineMode const oem() const
Definition: baseapi.h:732
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:833
const char * string() const
Definition: strngs.cpp:193
int tesseract::TessBaseAPI::Init ( const char *  datapath,
const char *  language,
OcrEngineMode  oem 
)
inline

Definition at line 234 of file baseapi.h.

234  {
235  return Init(datapath, language, oem, NULL, 0, NULL, NULL, false);
236  }
int Init(const char *datapath, const char *language, OcrEngineMode mode, char **configs, int configs_size, const GenericVector< STRING > *vars_vec, const GenericVector< STRING > *vars_values, bool set_only_non_debug_params)
Definition: baseapi.cpp:276
#define NULL
Definition: host.h:144
OcrEngineMode const oem() const
Definition: baseapi.h:732
int tesseract::TessBaseAPI::Init ( const char *  datapath,
const char *  language 
)
inline

Definition at line 237 of file baseapi.h.

237  {
238  return Init(datapath, language, OEM_DEFAULT, NULL, 0, NULL, NULL, false);
239  }
int Init(const char *datapath, const char *language, OcrEngineMode mode, char **configs, int configs_size, const GenericVector< STRING > *vars_vec, const GenericVector< STRING > *vars_values, bool set_only_non_debug_params)
Definition: baseapi.cpp:276
#define NULL
Definition: host.h:144
void tesseract::TessBaseAPI::InitForAnalysePage ( )

Init only for page layout analysis. Use only for calls to SetImage and AnalysePage. Calls that attempt recognition will generate an error.

Definition at line 434 of file baseapi.cpp.

434  {
435  if (tesseract_ == NULL) {
436  tesseract_ = new Tesseract;
438  }
439 }
#define NULL
Definition: host.h:144
void InitAdaptiveClassifier(bool load_pre_trained_templates)
Definition: adaptmatch.cpp:527
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:833
int tesseract::TessBaseAPI::InitLangMod ( const char *  datapath,
const char *  language 
)

Init only the lang model component of Tesseract. The only functions that work after this init are SetVariable and IsValidWord. WARNING: temporary! This function will be removed from here and placed in a separate API at some future time.

Definition at line 422 of file baseapi.cpp.

422  {
423  if (tesseract_ == NULL)
424  tesseract_ = new Tesseract;
425  else
427  return tesseract_->init_tesseract_lm(datapath, NULL, language);
428 }
static void ResetToDefaults(ParamsVectors *member_params)
Definition: params.cpp:205
ParamsVectors * params()
Definition: ccutil.h:65
#define NULL
Definition: host.h:144
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:833
int init_tesseract_lm(const char *arg0, const char *textbase, const char *language)
Definition: tessedit.cpp:460
bool tesseract::TessBaseAPI::InternalSetImage ( )
protected

Common code for setting the image. Returns true if Init has been called.

Common code for setting the image.

Definition at line 2005 of file baseapi.cpp.

2005  {
2006  if (tesseract_ == NULL) {
2007  tprintf("Please call Init before attempting to set an image.");
2008  return false;
2009  }
2010  if (thresholder_ == NULL)
2011  thresholder_ = new ImageThresholder;
2012  ClearResults();
2013  return true;
2014 }
#define tprintf(...)
Definition: tprintf.h:31
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:841
#define NULL
Definition: host.h:144
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:833
void tesseract::TessBaseAPI::PrintVariables ( FILE *  fp) const

Print Tesseract parameters to the given file.

Definition at line 264 of file baseapi.cpp.

264  {
266 }
static void PrintParams(FILE *fp, const ParamsVectors *member_params)
Definition: params.cpp:180
ParamsVectors * params()
Definition: ccutil.h:65
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:833
void tesseract::TessBaseAPI::ReadConfigFile ( const char *  filename)

Read a "config" file containing a set of param, value pairs. Searches the standard places: tessdata/configs, tessdata/tessconfigs and also accepts a relative or absolute path name. Note: only non-init params will be set (init params are set by Init()).

Read a "config" file containing a set of parameter name, value pairs. Searches the standard places: tessdata/configs, tessdata/tessconfigs and also accepts a relative or absolute path name.

Definition at line 446 of file baseapi.cpp.

446  {
448 }
void read_config_file(const char *filename, SetParamConstraint constraint)
Definition: tessedit.cpp:52
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:833
void tesseract::TessBaseAPI::ReadDebugConfigFile ( const char *  filename)

Same as above, but only set debug params from the given config file.

Definition at line 451 of file baseapi.cpp.

451  {
453 }
void read_config_file(const char *filename, SetParamConstraint constraint)
Definition: tessedit.cpp:52
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:833
bool tesseract::TessBaseAPI::SetDebugVariable ( const char *  name,
const char *  value 
)

Definition at line 222 of file baseapi.cpp.

222  {
223  if (tesseract_ == NULL) tesseract_ = new Tesseract;
225  tesseract_->params());
226 }
name_table name
static bool SetParam(const char *name, const char *value, SetParamConstraint constraint, ParamsVectors *member_params)
Definition: params.cpp:98
ParamsVectors * params()
Definition: ccutil.h:65
#define NULL
Definition: host.h:144
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:833
void tesseract::TessBaseAPI::SetInputImage ( Pix *  pix)

Definition at line 936 of file baseapi.cpp.

936  {
937  if (input_image_)
938  pixDestroy(&input_image_);
939  input_image_ = NULL;
940  if (pix)
941  input_image_ = pixCopy(NULL, pix);
942 }
Pix * input_image_
Image used for searchable PDF.
Definition: baseapi.h:846
#define NULL
Definition: host.h:144
void tesseract::TessBaseAPI::SetInputName ( const char *  name)

Set the name of the input file. Needed for training and reading a UNLV zone file, and for searchable PDF output.

Set the name of the input file. Needed only for training and loading a UNLV zone file.

Definition at line 201 of file baseapi.cpp.

201  {
202  if (input_file_ == NULL)
203  input_file_ = new STRING(name);
204  else
205  *input_file_ = name;
206 }
name_table name
Definition: strngs.h:44
#define NULL
Definition: host.h:144
STRING * input_file_
Name used by training code.
Definition: baseapi.h:845
void tesseract::TessBaseAPI::SetOutputName ( const char *  name)

Set the name of the bonus output files. Needed only for debugging.

Set the name of the output files. Needed only for debugging.

Definition at line 209 of file baseapi.cpp.

209  {
210  if (output_file_ == NULL)
211  output_file_ = new STRING(name);
212  else
213  *output_file_ = name;
214 }
STRING * output_file_
Name used by debug code.
Definition: baseapi.h:847
name_table name
Definition: strngs.h:44
#define NULL
Definition: host.h:144
void tesseract::TessBaseAPI::SetPageSegMode ( PageSegMode  mode)

Set the current page segmentation mode. Defaults to PSM_SINGLE_BLOCK. The mode is stored as an IntParam so it can also be modified by ReadConfigFile or SetVariable("tessedit_pageseg_mode", mode as string).

Set the current page segmentation mode. Defaults to PSM_AUTO. The mode is stored as an IntParam so it can also be modified by ReadConfigFile or SetVariable("tessedit_pageseg_mode", mode as string).

Definition at line 460 of file baseapi.cpp.

460  {
461  if (tesseract_ == NULL)
462  tesseract_ = new Tesseract;
464 }
CMD_EVENTS mode
Definition: pgedit.cpp:116
#define NULL
Definition: host.h:144
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:833
bool tesseract::TessBaseAPI::SetVariable ( const char *  name,
const char *  value 
)

Set the value of an internal "parameter." Supply the name of the parameter and the value as a string, just as you would in a config file. Returns false if the name lookup failed. Eg SetVariable("tessedit_char_blacklist", "xyz"); to ignore x, y and z. Or SetVariable("classify_bln_numeric_mode", "1"); to set numeric-only mode. SetVariable may be used before Init, but settings will revert to defaults on End().

Note: Must be called after Init(). Only works for non-init variables (init variables should be passed to Init()).

Definition at line 216 of file baseapi.cpp.

216  {
217  if (tesseract_ == NULL) tesseract_ = new Tesseract;
219  tesseract_->params());
220 }
name_table name
static bool SetParam(const char *name, const char *value, SetParamConstraint constraint, ParamsVectors *member_params)
Definition: params.cpp:98
ParamsVectors * params()
Definition: ccutil.h:65
#define NULL
Definition: host.h:144
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:833
char * tesseract::TessBaseAPI::TesseractRect ( const unsigned char *  imagedata,
int  bytes_per_pixel,
int  bytes_per_line,
int  left,
int  top,
int  width,
int  height 
)

Recognize a rectangle from an image and return the result as a string. May be called many times for a single Init. Currently has no error checking. Greyscale of 8 and color of 24 or 32 bits per pixel may be given. Palette color images will not work properly and must be converted to 24 bit. Binary images of 1 bit per pixel may also be given but they must be byte packed with the MSB of the first byte being the first pixel, and a 1 represents WHITE. For binary images set bytes_per_pixel=0. The recognized text is returned as a char* which is coded as UTF8 and must be freed with the delete [] operator.

Note that TesseractRect is the simplified convenience interface. For advanced uses, use SetImage, (optionally) SetRectangle, Recognize, and one or more of the Get*Text functions below.

Recognize a rectangle from an image and return the result as a string. May be called many times for a single Init. Currently has no error checking. Greyscale of 8 and color of 24 or 32 bits per pixel may be given. Palette color images will not work properly and must be converted to 24 bit. Binary images of 1 bit per pixel may also be given but they must be byte packed with the MSB of the first byte being the first pixel, and a one pixel is WHITE. For binary images set bytes_per_pixel=0. The recognized text is returned as a char* which is coded as UTF8 and must be freed with the delete [] operator.

Definition at line 487 of file baseapi.cpp.

491  {
492  if (tesseract_ == NULL || width < kMinRectSize || height < kMinRectSize)
493  return NULL; // Nothing worth doing.
494 
495  // Since this original api didn't give the exact size of the image,
496  // we have to invent a reasonable value.
497  int bits_per_pixel = bytes_per_pixel == 0 ? 1 : bytes_per_pixel * 8;
498  SetImage(imagedata, bytes_per_line * 8 / bits_per_pixel, height + top,
499  bytes_per_pixel, bytes_per_line);
500  SetRectangle(left, top, width, height);
501 
502  return GetUTF8Text();
503 }
const int kMinRectSize
Definition: baseapi.cpp:86
void SetRectangle(int left, int top, int width, int height)
Definition: baseapi.cpp:561
#define NULL
Definition: host.h:144
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:833
void SetImage(const unsigned char *imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line)
Definition: baseapi.cpp:525
int tesseract::TessBaseAPI::TextLength ( int *  blob_count)
protected

Return the length of the output text string, as UTF8, assuming one newline per line and one per block, with a terminator, and assuming a single character reject marker for each rejected character. Also return the number of recognized blobs in blob_count.

Return the length of the output text string, as UTF8, assuming liberally two spacing marks after each word (as paragraphs end with two newlines), and assuming a single character reject marker for each rejected character. Also return the number of recognized blobs in blob_count.

Definition at line 2154 of file baseapi.cpp.

2154  {
2155  if (tesseract_ == NULL || page_res_ == NULL)
2156  return 0;
2157 
2158  PAGE_RES_IT page_res_it(page_res_);
2159  int total_length = 2;
2160  int total_blobs = 0;
2161  // Iterate over the data structures to extract the recognition result.
2162  for (page_res_it.restart_page(); page_res_it.word () != NULL;
2163  page_res_it.forward()) {
2164  WERD_RES *word = page_res_it.word();
2165  WERD_CHOICE* choice = word->best_choice;
2166  if (choice != NULL) {
2167  total_blobs += choice->length() + 2;
2168  total_length += choice->unichar_string().length() + 2;
2169  for (int i = 0; i < word->reject_map.length(); ++i) {
2170  if (word->reject_map[i].rejected())
2171  ++total_length;
2172  }
2173  }
2174  }
2175  if (blob_count != NULL)
2176  *blob_count = total_blobs;
2177  return total_length;
2178 }
inT32 length() const
Definition: rejctmap.h:237
int length() const
Definition: ratngs.h:300
WERD_CHOICE * best_choice
Definition: pageres.h:219
REJMAP reject_map
Definition: pageres.h:271
inT32 length() const
Definition: strngs.cpp:188
const STRING & unichar_string() const
Definition: ratngs.h:524
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:844
WERD * word
Definition: pageres.h:175
#define NULL
Definition: host.h:144
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:833
void tesseract::TessBaseAPI::Threshold ( Pix **  pix)
protectedvirtual

Run the thresholder to make the thresholded image. If pix is not NULL, the source is thresholded to pix instead of the internal IMAGE.

Run the thresholder to make the thresholded image, returned in pix, which must not be NULL. *pix must be initialized to NULL, or point to an existing pixDestroyable Pix. The usual argument to Threshold is Tesseract::mutable_pix_binary().

Definition at line 2022 of file baseapi.cpp.

2022  {
2023  ASSERT_HOST(pix != NULL);
2024  if (*pix != NULL)
2025  pixDestroy(pix);
2026  // Zero resolution messes up the algorithms, so make sure it is credible.
2027  int y_res = thresholder_->GetScaledYResolution();
2028  if (y_res < kMinCredibleResolution || y_res > kMaxCredibleResolution) {
2029  // Use the minimum default resolution, as it is safer to under-estimate
2030  // than over-estimate resolution.
2032  }
2033  PageSegMode pageseg_mode =
2034  static_cast<PageSegMode>(
2035  static_cast<int>(tesseract_->tessedit_pageseg_mode));
2036  thresholder_->ThresholdToPix(pageseg_mode, pix);
2040  if (!thresholder_->IsBinary()) {
2043  } else {
2046  }
2047  // Set the internal resolution that is used for layout parameters from the
2048  // estimated resolution, rather than the image resolution, which may be
2049  // fabricated, but we will use the image resolution, if there is one, to
2050  // report output point sizes.
2051  int estimated_res = ClipToRange(thresholder_->GetScaledEstimatedResolution(),
2054  if (estimated_res != thresholder_->GetScaledEstimatedResolution()) {
2055  tprintf("Estimated resolution %d out of range! Corrected to %d\n",
2056  thresholder_->GetScaledEstimatedResolution(), estimated_res);
2057  }
2058  tesseract_->set_source_resolution(estimated_res);
2059  SavePixForCrash(estimated_res, *pix);
2060 }
void SavePixForCrash(int resolution, Pix *pix)
Definition: globaloc.cpp:34
void set_pix_thresholds(Pix *thresholds)
virtual Pix * GetPixRectThresholds()
const int kMaxCredibleResolution
Definition: baseapi.cpp:110
#define tprintf(...)
Definition: tprintf.h:31
virtual void GetImageSizes(int *left, int *top, int *width, int *height, int *imagewidth, int *imageheight)
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:841
void set_pix_grey(Pix *grey_pix)
void set_source_resolution(int ppi)
T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound)
Definition: helpers.h:115
#define ASSERT_HOST(x)
Definition: errcode.h:84
bool IsBinary() const
Returns true if the source image is binary.
Definition: thresholder.h:75
int GetScaledYResolution() const
Definition: thresholder.h:93
virtual Pix * GetPixRectGrey()
const int kMinCredibleResolution
Minimum believable resolution.
Definition: baseapi.cpp:108
int GetScaledEstimatedResolution() const
Definition: thresholder.h:106
virtual void ThresholdToPix(PageSegMode pageseg_mode, Pix **pix)
void SetSourceYResolution(int ppi)
Definition: thresholder.h:86
#define NULL
Definition: host.h:144
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:833
const char * tesseract::TessBaseAPI::Version ( )
static

Returns the version identifier as a static string. Do not delete.

Definition at line 142 of file baseapi.cpp.

142  {
143 #if defined(GIT_REV) && (defined(DEBUG) || defined(_DEBUG))
144  return GIT_REV;
145 #else
146  return TESSERACT_VERSION_STR;
147 #endif
148 }
#define GIT_REV
Definition: vcsversion.h:1
#define TESSERACT_VERSION_STR
Definition: baseapi.h:23

Member Data Documentation

BLOCK_LIST* tesseract::TessBaseAPI::block_list_
protected

The page layout.

Definition at line 843 of file baseapi.h.

STRING* tesseract::TessBaseAPI::datapath_
protected

Current location of tessdata.

Definition at line 848 of file baseapi.h.

EquationDetect* tesseract::TessBaseAPI::equ_detect_
protected

The equation detector.

Definition at line 840 of file baseapi.h.

STRING* tesseract::TessBaseAPI::input_file_
protected

Name used by training code.

Definition at line 845 of file baseapi.h.

Pix* tesseract::TessBaseAPI::input_image_
protected

Image used for searchable PDF.

Definition at line 846 of file baseapi.h.

STRING* tesseract::TessBaseAPI::language_
protected

Last initialized language.

Definition at line 849 of file baseapi.h.

OcrEngineMode tesseract::TessBaseAPI::last_oem_requested_
protected

Last ocr language mode requested.

Definition at line 850 of file baseapi.h.

Tesseract* tesseract::TessBaseAPI::osd_tesseract_
protected

For orientation & script detection.

Definition at line 839 of file baseapi.h.

STRING* tesseract::TessBaseAPI::output_file_
protected

Name used by debug code.

Definition at line 847 of file baseapi.h.

PAGE_RES* tesseract::TessBaseAPI::page_res_
protected

The page-level data.

Definition at line 844 of file baseapi.h.

GenericVector<ParagraphModel *>* tesseract::TessBaseAPI::paragraph_models_
protected

Definition at line 842 of file baseapi.h.

bool tesseract::TessBaseAPI::recognition_done_
protected

page_res_ contains recognition data.

Definition at line 851 of file baseapi.h.

Tesseract* tesseract::TessBaseAPI::tesseract_
protected

The underlying data object.

Definition at line 833 of file baseapi.h.

ImageThresholder* tesseract::TessBaseAPI::thresholder_
protected

Image thresholding module.

Definition at line 841 of file baseapi.h.

TruthCallback* tesseract::TessBaseAPI::truth_cb_
protected

Definition at line 852 of file baseapi.h.


The documentation for this class was generated from the following files: