All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
baseapi.h
Go to the documentation of this file.
1 // File: baseapi.h
3 // Description: Simple API for calling tesseract.
4 // Author: Ray Smith
5 // Created: Fri Oct 06 15:35:01 PDT 2006
6 //
7 // (C) Copyright 2006, Google Inc.
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
19 
20 #ifndef TESSERACT_API_BASEAPI_H__
21 #define TESSERACT_API_BASEAPI_H__
22 
23 #define TESSERACT_VERSION_STR "3.04.00"
24 #define TESSERACT_VERSION 0x030400
25 #define MAKE_VERSION(major, minor, patch) (((major) << 16) | ((minor) << 8) | \
26  (patch))
27 
28 #include <stdio.h>
29 // To avoid collision with other typenames include the ABSOLUTE MINIMUM
30 // complexity of includes here. Use forward declarations wherever possible
31 // and hide includes of complex types in baseapi.cpp.
32 #include "platform.h"
33 #include "apitypes.h"
34 #include "thresholder.h"
35 #include "unichar.h"
36 #include "tesscallback.h"
37 #include "publictypes.h"
38 #include "pageiterator.h"
39 #include "resultiterator.h"
40 
41 template <typename T> class GenericVector;
42 class PAGE_RES;
43 class PAGE_RES_IT;
44 class ParagraphModel;
45 struct BlamerBundle;
46 class BLOCK_LIST;
47 class DENORM;
48 class MATRIX;
49 class ROW;
50 class STRING;
51 class WERD;
52 struct Pix;
53 struct Box;
54 struct Pixa;
55 struct Boxa;
56 class ETEXT_DESC;
57 struct OSResults;
58 class TBOX;
59 class UNICHARSET;
60 class WERD_CHOICE_LIST;
61 
64 struct TBLOB;
65 
66 namespace tesseract {
67 
68 class CubeRecoContext;
69 class Dawg;
70 class Dict;
71 class EquationDetect;
72 class PageIterator;
73 class LTRResultIterator;
74 class ResultIterator;
75 class MutableIterator;
76 class TessResultRenderer;
77 class Tesseract;
78 class Trie;
79 class Wordrec;
80 
81 typedef int (Dict::*DictFunc)(void* void_dawg_args,
82  UNICHAR_ID unichar_id, bool word_end) const;
83 typedef double (Dict::*ProbabilityInContextFunc)(const char* lang,
84  const char* context,
85  int context_bytes,
86  const char* character,
87  int character_bytes);
88 typedef float (Dict::*ParamsModelClassifyFunc)(
89  const char *lang, void *path);
90 typedef void (Wordrec::*FillLatticeFunc)(const MATRIX &ratings,
91  const WERD_CHOICE_LIST &best_choices,
92  const UNICHARSET &unicharset,
93  BlamerBundle *blamer_bundle);
96 
106  public:
107  TessBaseAPI();
108  virtual ~TessBaseAPI();
109 
113  static const char* Version();
114 
122  static size_t getOpenCLDevice(void **device);
123 
128  static void CatchSignals();
129 
134  void SetInputName(const char* name);
142  const char* GetInputName();
143  void SetInputImage(Pix *pix);
144  Pix* GetInputImage();
145  int GetSourceYResolution();
146  const char* GetDatapath();
147 
149  void SetOutputName(const char* name);
150 
164  bool SetVariable(const char* name, const char* value);
165  bool SetDebugVariable(const char* name, const char* value);
166 
171  bool GetIntVariable(const char *name, int *value) const;
172  bool GetBoolVariable(const char *name, bool *value) const;
173  bool GetDoubleVariable(const char *name, double *value) const;
174 
179  const char *GetStringVariable(const char *name) const;
180 
184  void PrintVariables(FILE *fp) const;
185 
189  bool GetVariableAsString(const char *name, STRING *val);
190 
229  int Init(const char* datapath, const char* language, OcrEngineMode mode,
230  char **configs, int configs_size,
231  const GenericVector<STRING> *vars_vec,
232  const GenericVector<STRING> *vars_values,
233  bool set_only_non_debug_params);
234  int Init(const char* datapath, const char* language, OcrEngineMode oem) {
235  return Init(datapath, language, oem, NULL, 0, NULL, NULL, false);
236  }
237  int Init(const char* datapath, const char* language) {
238  return Init(datapath, language, OEM_DEFAULT, NULL, 0, NULL, NULL, false);
239  }
240 
249  const char* GetInitLanguagesAsString() const;
250 
256  void GetLoadedLanguagesAsVector(GenericVector<STRING>* langs) const;
257 
261  void GetAvailableLanguagesAsVector(GenericVector<STRING>* langs) const;
262 
269  int InitLangMod(const char* datapath, const char* language);
270 
275  void InitForAnalysePage();
276 
283  void ReadConfigFile(const char* filename);
285  void ReadDebugConfigFile(const char* filename);
286 
292  void SetPageSegMode(PageSegMode mode);
293 
295  PageSegMode GetPageSegMode() const;
296 
314  char* TesseractRect(const unsigned char* imagedata,
315  int bytes_per_pixel, int bytes_per_line,
316  int left, int top, int width, int height);
317 
322  void ClearAdaptiveClassifier();
323 
330  /* @{ */
331 
341  void SetImage(const unsigned char* imagedata, int width, int height,
342  int bytes_per_pixel, int bytes_per_line);
343 
354  void SetImage(Pix* pix);
355 
360  void SetSourceResolution(int ppi);
361 
367  void SetRectangle(int left, int top, int width, int height);
368 
376  void SetThresholder(ImageThresholder* thresholder) {
377  if (thresholder_ != NULL)
378  delete thresholder_;
379  thresholder_ = thresholder;
380  ClearResults();
381  }
382 
388  Pix* GetThresholdedImage();
389 
395  Boxa* GetRegions(Pixa** pixa);
396 
408  Boxa* GetTextlines(const bool raw_image, const int raw_padding,
409  Pixa** pixa, int** blockids, int** paraids);
410  /*
411  Helper method to extract from the thresholded image. (most common usage)
412  */
413  Boxa* GetTextlines(Pixa** pixa, int** blockids) {
414  return GetTextlines(false, 0, pixa, blockids, NULL);
415  }
416 
425  Boxa* GetStrips(Pixa** pixa, int** blockids);
426 
432  Boxa* GetWords(Pixa** pixa);
433 
442  Boxa* GetConnectedComponents(Pixa** cc);
443 
457  Boxa* GetComponentImages(const PageIteratorLevel level,
458  const bool text_only, const bool raw_image,
459  const int raw_padding,
460  Pixa** pixa, int** blockids, int** paraids);
461  // Helper function to get binary images with no padding (most common usage).
463  const bool text_only,
464  Pixa** pixa, int** blockids) {
465  return GetComponentImages(level, text_only, false, 0, pixa, blockids, NULL);
466  }
467 
474  int GetThresholdedImageScaleFactor() const;
475 
481  void DumpPGM(const char* filename);
482 
499  return AnalyseLayout(false);
500  }
501  PageIterator* AnalyseLayout(bool merge_similar_words);
502 
509  int Recognize(ETEXT_DESC* monitor);
510 
517  int RecognizeForChopTest(ETEXT_DESC* monitor);
518 
541  bool ProcessPages(const char* filename, const char* retry_config,
542  int timeout_millisec, TessResultRenderer* renderer);
543  // Does the real work of ProcessPages.
544  bool ProcessPagesInternal(const char* filename, const char* retry_config,
545  int timeout_millisec, TessResultRenderer* renderer);
546 
556  bool ProcessPage(Pix* pix, int page_index, const char* filename,
557  const char* retry_config, int timeout_millisec,
558  TessResultRenderer* renderer);
559 
568  ResultIterator* GetIterator();
569 
578  MutableIterator* GetMutableIterator();
579 
584  char* GetUTF8Text();
585 
591  char* GetHOCRText(int page_number);
592 
600  char* GetBoxText(int page_number);
606  char* GetUNLVText();
608  int MeanTextConf();
615  int* AllWordConfidences();
616 
627  bool AdaptToWordStr(PageSegMode mode, const char* wordstr);
628 
635  void Clear();
636 
643  void End();
644 
652  static void ClearPersistentCache();
653 
660  int IsValidWord(const char *word);
661  // Returns true if utf8_character is defined in the UniCharset.
662  bool IsValidCharacter(const char *utf8_character);
663 
664 
665  bool GetTextDirection(int* out_offset, float* out_slope);
666 
668  void SetDictFunc(DictFunc f);
669 
673  void SetProbabilityInContextFunc(ProbabilityInContextFunc f);
674 
676  void SetFillLatticeFunc(FillLatticeFunc f);
677 
682  bool DetectOS(OSResults*);
683 
685  void GetFeaturesForBlob(TBLOB* blob, INT_FEATURE_STRUCT* int_features,
686  int* num_features, int* feature_outline_index);
687 
692  static ROW* FindRowForBox(BLOCK_LIST* blocks, int left, int top,
693  int right, int bottom);
694 
699  void RunAdaptiveClassifier(TBLOB* blob,
700  int num_max_matches,
701  int* unichar_ids,
702  float* ratings,
703  int* num_matches_returned);
704 
706  const char* GetUnichar(int unichar_id);
707 
709  const Dawg *GetDawg(int i) const;
710 
712  int NumDawgs() const;
713 
715  static ROW *MakeTessOCRRow(float baseline, float xheight,
716  float descender, float ascender);
717 
719  static TBLOB *MakeTBLOB(Pix *pix);
720 
726  static void NormalizeTBLOB(TBLOB *tblob, ROW *row, bool numeric_mode);
727 
728  Tesseract* const tesseract() const {
729  return tesseract_;
730  }
731 
732  OcrEngineMode const oem() const {
733  return last_oem_requested_;
734  }
735 
736  void InitTruthCallback(TruthCallback *cb) { truth_cb_ = cb; }
737 
739  CubeRecoContext *GetCubeRecoContext() const;
740 
741  void set_min_orientation_margin(double margin);
742 
747  void GetBlockTextOrientations(int** block_orientation,
748  bool** vertical_writing);
749 
751  BLOCK_LIST* FindLinesCreateBlockList();
752 
758  static void DeleteBlockList(BLOCK_LIST* block_list);
759  /* @} */
760 
761  protected:
762 
764  TESS_LOCAL bool InternalSetImage();
765 
770  TESS_LOCAL virtual void Threshold(Pix** pix);
771 
776  TESS_LOCAL int FindLines();
777 
779  void ClearResults();
780 
786  TESS_LOCAL LTRResultIterator* GetLTRIterator();
787 
794  TESS_LOCAL int TextLength(int* blob_count);
795 
797  /* @{ */
798 
803  TESS_LOCAL void AdaptToCharacter(const char *unichar_repr,
804  int length,
805  float baseline,
806  float xheight,
807  float descender,
808  float ascender);
809 
811  TESS_LOCAL PAGE_RES* RecognitionPass1(BLOCK_LIST* block_list);
812  TESS_LOCAL PAGE_RES* RecognitionPass2(BLOCK_LIST* block_list,
813  PAGE_RES* pass1_result);
814 
816  TESS_LOCAL void DetectParagraphs(bool after_text_recognition);
817 
822  TESS_LOCAL static int TesseractExtractResult(char** text,
823  int** lengths,
824  float** costs,
825  int** x0,
826  int** y0,
827  int** x1,
828  int** y1,
829  PAGE_RES* page_res);
830 
831  TESS_LOCAL const PAGE_RES* GetPageRes() const {
832  return page_res_;
833  };
834  /* @} */
835 
836 
837  protected:
838  Tesseract* tesseract_;
843  BLOCK_LIST* block_list_;
853 
858  /* @{ */
865  /* @} */
866 
867  private:
868  // A list of image filenames gets special consideration
869  bool ProcessPagesFileList(FILE *fp,
870  STRING *buf,
871  const char* retry_config, int timeout_millisec,
872  TessResultRenderer* renderer,
873  int tessedit_page_number);
874  // TIFF supports multipage so gets special consideration
875  bool ProcessPagesMultipageTiff(const unsigned char *data,
876  size_t size,
877  const char* filename,
878  const char* retry_config,
879  int timeout_millisec,
880  TessResultRenderer* renderer,
881  int tessedit_page_number);
882 }; // class TessBaseAPI.
883 
885 STRING HOcrEscape(const char* text);
886 } // namespace tesseract.
887 
888 #endif // TESSERACT_API_BASEAPI_H__
Definition: blobs.h:261
EquationDetect * equ_detect_
The equation detector.
Definition: baseapi.h:840
int Init(const char *datapath, const char *language, OcrEngineMode oem)
Definition: baseapi.h:234
TessCallback4< const UNICHARSET &, int, PageIterator *, Pix * > TruthCallback
Definition: baseapi.h:95
struct TessResultRenderer TessResultRenderer
Definition: capi.h:61
void SetThresholder(ImageThresholder *thresholder)
Definition: baseapi.h:376
OcrEngineMode last_oem_requested_
Last ocr language mode requested.
Definition: baseapi.h:850
TruthCallback * truth_cb_
Definition: baseapi.h:852
PageIterator * AnalyseLayout()
Definition: baseapi.h:498
TESS_LOCAL const PAGE_RES * GetPageRes() const
Definition: baseapi.h:831
GenericVector< ParagraphModel * > * paragraph_models_
Definition: baseapi.h:842
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:841
Pix * input_image_
Image used for searchable PDF.
Definition: baseapi.h:846
STRING * language_
Last initialized language.
Definition: baseapi.h:849
Boxa * GetTextlines(Pixa **pixa, int **blockids)
Definition: baseapi.h:413
CMD_EVENTS mode
Definition: pgedit.cpp:116
Definition: ocrrow.h:32
void InitTruthCallback(TruthCallback *cb)
Definition: baseapi.h:736
void(Wordrec::* FillLatticeFunc)(const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle)
Definition: baseapi.h:90
struct TessBaseAPI TessBaseAPI
Definition: capi.h:67
STRING * output_file_
Name used by debug code.
Definition: baseapi.h:847
float(Dict::* ParamsModelClassifyFunc)(const char *lang, void *path)
Definition: baseapi.h:88
Tesseract *const tesseract() const
Definition: baseapi.h:728
name_table name
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:844
STRING * datapath_
Current location of tessdata.
Definition: baseapi.h:848
STRING HOcrEscape(const char *text)
Definition: baseapi.cpp:2644
int Init(const char *datapath, const char *language)
Definition: baseapi.h:237
double(Dict::* ProbabilityInContextFunc)(const char *lang, const char *context, int context_bytes, const char *character, int character_bytes)
Definition: baseapi.h:83
int UNICHAR_ID
Definition: unichar.h:33
#define TESS_API
Definition: platform.h:73
Definition: werd.h:60
void DetectParagraphs(int debug_level, GenericVector< RowInfo > *row_infos, GenericVector< PARA * > *row_owners, PARA_LIST *paragraphs, GenericVector< ParagraphModel * > *models)
Tesseract * osd_tesseract_
For orientation & script detection.
Definition: baseapi.h:839
BLOCK_LIST * block_list_
The page layout.
Definition: baseapi.h:843
Definition: rect.h:30
Definition: matrix.h:289
Definition: strngs.h:44
INT_FEATURE_STRUCT * INT_FEATURE
Definition: baseapi.h:62
#define NULL
Definition: host.h:144
OcrEngineMode const oem() const
Definition: baseapi.h:732
int(Dict::* DictFunc)(void *void_dawg_args, UNICHAR_ID unichar_id, bool word_end) const
Definition: baseapi.h:81
STRING * input_file_
Name used by training code.
Definition: baseapi.h:845
bool recognition_done_
page_res_ contains recognition data.
Definition: baseapi.h:851
#define TESS_LOCAL
Definition: platform.h:74
Boxa * GetComponentImages(const PageIteratorLevel level, const bool text_only, Pixa **pixa, int **blockids)
Definition: baseapi.h:462