tesseract  4.00.00dev
baseapi.h
Go to the documentation of this file.
1 // File: baseapi.h
3 // Description: Simple API for calling tesseract.
4 // Author: Ray Smith
5 // Created: Fri Oct 06 15:35:01 PDT 2006
6 //
7 // (C) Copyright 2006, Google Inc.
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
19 
20 #ifndef TESSERACT_API_BASEAPI_H_
21 #define TESSERACT_API_BASEAPI_H_
22 
23 #include <stdio.h>
24 // To avoid collision with other typenames include the ABSOLUTE MINIMUM
25 // complexity of includes here. Use forward declarations wherever possible
26 // and hide includes of complex types in baseapi.cpp.
27 #include "apitypes.h"
28 #include "pageiterator.h"
29 #include "platform.h"
30 #include "publictypes.h"
31 #include "resultiterator.h"
32 #include "serialis.h"
33 #include "tesscallback.h"
34 #include "thresholder.h"
35 #include "unichar.h"
36 
37 template <typename T> class GenericVector;
38 class PAGE_RES;
39 class PAGE_RES_IT;
40 class ParagraphModel;
41 struct BlamerBundle;
42 class BLOCK_LIST;
43 class DENORM;
44 class MATRIX;
45 class ROW;
46 class STRING;
47 class WERD;
48 struct Pix;
49 struct Box;
50 struct Pixa;
51 struct Boxa;
52 class ETEXT_DESC;
53 struct OSResults;
54 class TBOX;
55 class UNICHARSET;
56 class WERD_CHOICE_LIST;
57 
60 struct TBLOB;
61 
62 namespace tesseract {
63 
64 class Dawg;
65 class Dict;
66 class EquationDetect;
67 class PageIterator;
68 class LTRResultIterator;
69 class ResultIterator;
70 class MutableIterator;
71 class TessResultRenderer;
72 class Tesseract;
73 class Trie;
74 class Wordrec;
75 
76 typedef int (Dict::*DictFunc)(void* void_dawg_args,
77  UNICHAR_ID unichar_id, bool word_end) const;
78 typedef double (Dict::*ProbabilityInContextFunc)(const char* lang,
79  const char* context,
80  int context_bytes,
81  const char* character,
82  int character_bytes);
83 typedef float (Dict::*ParamsModelClassifyFunc)(
84  const char *lang, void *path);
85 typedef void (Wordrec::*FillLatticeFunc)(const MATRIX &ratings,
86  const WERD_CHOICE_LIST &best_choices,
87  const UNICHARSET &unicharset,
88  BlamerBundle *blamer_bundle);
91 
101  public:
102  TessBaseAPI();
103  virtual ~TessBaseAPI();
104 
108  static const char* Version();
109 
117  static size_t getOpenCLDevice(void **device);
118 
123  static void CatchSignals();
124 
129  void SetInputName(const char* name);
137  const char* GetInputName();
138  // Takes ownership of the input pix.
139  void SetInputImage(Pix *pix);
140  Pix* GetInputImage();
141  int GetSourceYResolution();
142  const char* GetDatapath();
143 
145  void SetOutputName(const char* name);
146 
160  bool SetVariable(const char* name, const char* value);
161  bool SetDebugVariable(const char* name, const char* value);
162 
167  bool GetIntVariable(const char *name, int *value) const;
168  bool GetBoolVariable(const char *name, bool *value) const;
169  bool GetDoubleVariable(const char *name, double *value) const;
170 
175  const char *GetStringVariable(const char *name) const;
176 
180  void PrintVariables(FILE *fp) const;
181 
185  bool GetVariableAsString(const char *name, STRING *val);
186 
225  int Init(const char* datapath, const char* language, OcrEngineMode mode,
226  char **configs, int configs_size,
227  const GenericVector<STRING> *vars_vec,
228  const GenericVector<STRING> *vars_values,
229  bool set_only_non_debug_params);
230  int Init(const char* datapath, const char* language, OcrEngineMode oem) {
231  return Init(datapath, language, oem, NULL, 0, NULL, NULL, false);
232  }
233  int Init(const char* datapath, const char* language) {
234  return Init(datapath, language, OEM_DEFAULT, NULL, 0, NULL, NULL, false);
235  }
236  // In-memory version reads the traineddata file directly from the given
237  // data[data_size] array, and/or reads data via a FileReader.
238  int Init(const char* data, int data_size, const char* language,
239  OcrEngineMode mode, char** configs, int configs_size,
240  const GenericVector<STRING>* vars_vec,
241  const GenericVector<STRING>* vars_values,
242  bool set_only_non_debug_params, FileReader reader);
243 
252  const char* GetInitLanguagesAsString() const;
253 
259  void GetLoadedLanguagesAsVector(GenericVector<STRING>* langs) const;
260 
264  void GetAvailableLanguagesAsVector(GenericVector<STRING>* langs) const;
265 
272  int InitLangMod(const char* datapath, const char* language);
273 
278  void InitForAnalysePage();
279 
286  void ReadConfigFile(const char* filename);
288  void ReadDebugConfigFile(const char* filename);
289 
295  void SetPageSegMode(PageSegMode mode);
296 
298  PageSegMode GetPageSegMode() const;
299 
317  char* TesseractRect(const unsigned char* imagedata,
318  int bytes_per_pixel, int bytes_per_line,
319  int left, int top, int width, int height);
320 
325  void ClearAdaptiveClassifier();
326 
333  /* @{ */
334 
342  void SetImage(const unsigned char* imagedata, int width, int height,
343  int bytes_per_pixel, int bytes_per_line);
344 
353  void SetImage(Pix* pix);
354 
359  void SetSourceResolution(int ppi);
360 
366  void SetRectangle(int left, int top, int width, int height);
367 
375  void SetThresholder(ImageThresholder* thresholder) {
376  delete thresholder_;
377  thresholder_ = thresholder;
378  ClearResults();
379  }
380 
386  Pix* GetThresholdedImage();
387 
393  Boxa* GetRegions(Pixa** pixa);
394 
406  Boxa* GetTextlines(const bool raw_image, const int raw_padding,
407  Pixa** pixa, int** blockids, int** paraids);
408  /*
409  Helper method to extract from the thresholded image. (most common usage)
410  */
411  Boxa* GetTextlines(Pixa** pixa, int** blockids) {
412  return GetTextlines(false, 0, pixa, blockids, NULL);
413  }
414 
423  Boxa* GetStrips(Pixa** pixa, int** blockids);
424 
430  Boxa* GetWords(Pixa** pixa);
431 
440  Boxa* GetConnectedComponents(Pixa** cc);
441 
455  Boxa* GetComponentImages(const PageIteratorLevel level,
456  const bool text_only, const bool raw_image,
457  const int raw_padding,
458  Pixa** pixa, int** blockids, int** paraids);
459  // Helper function to get binary images with no padding (most common usage).
461  const bool text_only,
462  Pixa** pixa, int** blockids) {
463  return GetComponentImages(level, text_only, false, 0, pixa, blockids, NULL);
464  }
465 
472  int GetThresholdedImageScaleFactor() const;
473 
479  void DumpPGM(const char* filename);
480 
496  PageIterator* AnalyseLayout();
497  PageIterator* AnalyseLayout(bool merge_similar_words);
498 
505  int Recognize(ETEXT_DESC* monitor);
506 
513  int RecognizeForChopTest(ETEXT_DESC* monitor);
514 
537  bool ProcessPages(const char* filename, const char* retry_config,
538  int timeout_millisec, TessResultRenderer* renderer);
539  // Does the real work of ProcessPages.
540  bool ProcessPagesInternal(const char* filename, const char* retry_config,
541  int timeout_millisec, TessResultRenderer* renderer);
542 
552  bool ProcessPage(Pix* pix, int page_index, const char* filename,
553  const char* retry_config, int timeout_millisec,
554  TessResultRenderer* renderer);
555 
564  ResultIterator* GetIterator();
565 
574  MutableIterator* GetMutableIterator();
575 
580  char* GetUTF8Text();
581 
591  char* GetHOCRText(ETEXT_DESC* monitor, int page_number);
592 
599  char* GetHOCRText(int page_number);
600 
606  char* GetTSVText(int page_number);
607 
615  char* GetBoxText(int page_number);
616 
622  char* GetUNLVText();
623 
633  bool DetectOrientationScript(int* orient_deg, float* orient_conf,
634  const char** script_name, float* script_conf);
635 
641  char* GetOsdText(int page_number);
642 
644  int MeanTextConf();
651  int* AllWordConfidences();
652 
663  bool AdaptToWordStr(PageSegMode mode, const char* wordstr);
664 
671  void Clear();
672 
679  void End();
680 
688  static void ClearPersistentCache();
689 
696  int IsValidWord(const char *word);
697  // Returns true if utf8_character is defined in the UniCharset.
698  bool IsValidCharacter(const char *utf8_character);
699 
700 
701  bool GetTextDirection(int* out_offset, float* out_slope);
702 
704  void SetDictFunc(DictFunc f);
705 
709  void SetProbabilityInContextFunc(ProbabilityInContextFunc f);
710 
712  void SetFillLatticeFunc(FillLatticeFunc f);
713 
718  bool DetectOS(OSResults*);
719 
721  void GetFeaturesForBlob(TBLOB* blob, INT_FEATURE_STRUCT* int_features,
722  int* num_features, int* feature_outline_index);
723 
728  static ROW* FindRowForBox(BLOCK_LIST* blocks, int left, int top,
729  int right, int bottom);
730 
735  void RunAdaptiveClassifier(TBLOB* blob,
736  int num_max_matches,
737  int* unichar_ids,
738  float* ratings,
739  int* num_matches_returned);
740 
742  const char* GetUnichar(int unichar_id);
743 
745  const Dawg *GetDawg(int i) const;
746 
748  int NumDawgs() const;
749 
751  static ROW *MakeTessOCRRow(float baseline, float xheight,
752  float descender, float ascender);
753 
755  static TBLOB *MakeTBLOB(Pix *pix);
756 
762  static void NormalizeTBLOB(TBLOB *tblob, ROW *row, bool numeric_mode);
763 
764  Tesseract* tesseract() const { return tesseract_; }
765 
766  OcrEngineMode oem() const { return last_oem_requested_; }
767 
768  void InitTruthCallback(TruthCallback *cb) { truth_cb_ = cb; }
769 
770  void set_min_orientation_margin(double margin);
771 
776  void GetBlockTextOrientations(int** block_orientation,
777  bool** vertical_writing);
778 
780  BLOCK_LIST* FindLinesCreateBlockList();
781 
787  static void DeleteBlockList(BLOCK_LIST* block_list);
788  /* @} */
789 
790  protected:
791 
793  TESS_LOCAL bool InternalSetImage();
794 
799  TESS_LOCAL virtual bool Threshold(Pix** pix);
800 
805  TESS_LOCAL int FindLines();
806 
808  void ClearResults();
809 
815  TESS_LOCAL LTRResultIterator* GetLTRIterator();
816 
823  TESS_LOCAL int TextLength(int* blob_count);
824 
826  /* @{ */
827 
832  TESS_LOCAL void AdaptToCharacter(const char *unichar_repr,
833  int length,
834  float baseline,
835  float xheight,
836  float descender,
837  float ascender);
838 
840  TESS_LOCAL PAGE_RES* RecognitionPass1(BLOCK_LIST* block_list);
841  TESS_LOCAL PAGE_RES* RecognitionPass2(BLOCK_LIST* block_list,
842  PAGE_RES* pass1_result);
843 
845  TESS_LOCAL void DetectParagraphs(bool after_text_recognition);
846 
851  TESS_LOCAL static int TesseractExtractResult(char** text,
852  int** lengths,
853  float** costs,
854  int** x0,
855  int** y0,
856  int** x1,
857  int** y1,
858  PAGE_RES* page_res);
859 
860  TESS_LOCAL const PAGE_RES* GetPageRes() const { return page_res_; }
861  /* @} */
862 
863 
864  protected:
871  BLOCK_LIST* block_list_;
880 
885  /* @{ */
892  /* @} */
893 
894  private:
895  // A list of image filenames gets special consideration
896  bool ProcessPagesFileList(FILE *fp,
897  STRING *buf,
898  const char* retry_config, int timeout_millisec,
899  TessResultRenderer* renderer,
900  int tessedit_page_number);
901  // TIFF supports multipage so gets special consideration.
902  bool ProcessPagesMultipageTiff(const unsigned char *data,
903  size_t size,
904  const char* filename,
905  const char* retry_config,
906  int timeout_millisec,
907  TessResultRenderer* renderer,
908  int tessedit_page_number);
909  // There's currently no way to pass a document title from the
910  // Tesseract command line, and we have multiple places that choose
911  // to set the title to an empty string. Using a single named
912  // variable will hopefully reduce confusion if the situation changes
913  // in the future.
914  const char *unknown_title_ = "";
915 }; // class TessBaseAPI.
916 
918 STRING HOcrEscape(const char* text);
919 } // namespace tesseract.
920 
921 #endif // TESSERACT_API_BASEAPI_H_
Tesseract * tesseract() const
Definition: baseapi.h:764
Tesseract * osd_tesseract_
For orientation & script detection.
Definition: baseapi.h:866
#define TESS_LOCAL
Definition: platform.h:88
STRING * language_
Last initialized language.
Definition: baseapi.h:876
GenericVector< ParagraphModel * > * paragraph_models_
Definition: baseapi.h:870
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:865
Boxa * GetComponentImages(const PageIteratorLevel level, const bool text_only, Pixa **pixa, int **blockids)
Definition: baseapi.h:460
EquationDetect * equ_detect_
The equation detector.
Definition: baseapi.h:867
struct TessBaseAPI TessBaseAPI
Definition: capi.h:83
void InitTruthCallback(TruthCallback *cb)
Definition: baseapi.h:768
TruthCallback * truth_cb_
Definition: baseapi.h:879
STRING * output_file_
Name used by debug code.
Definition: baseapi.h:874
void SetThresholder(ImageThresholder *thresholder)
Definition: baseapi.h:375
STRING * input_file_
Name used by training code.
Definition: baseapi.h:873
double(Dict::* ProbabilityInContextFunc)(const char *lang, const char *context, int context_bytes, const char *character, int character_bytes)
Definition: baseapi.h:78
FileReader reader_
Reads files from any filesystem.
Definition: baseapi.h:868
Boxa * GetTextlines(Pixa **pixa, int **blockids)
Definition: baseapi.h:411
STRING HOcrEscape(const char *text)
Definition: baseapi.cpp:2837
Definition: ocrrow.h:32
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:872
bool recognition_done_
page_res_ contains recognition data.
Definition: baseapi.h:878
INT_FEATURE_STRUCT * INT_FEATURE
Definition: baseapi.h:58
float(Dict::* ParamsModelClassifyFunc)(const char *lang, void *path)
Definition: baseapi.h:83
int(Dict::* DictFunc)(void *void_dawg_args, UNICHAR_ID unichar_id, bool word_end) const
Definition: baseapi.h:76
int Init(const char *datapath, const char *language)
Definition: baseapi.h:233
OcrEngineMode last_oem_requested_
Last ocr language mode requested.
Definition: baseapi.h:877
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:869
STRING * datapath_
Current location of tessdata.
Definition: baseapi.h:875
Definition: strngs.h:45
Definition: rect.h:30
OcrEngineMode oem() const
Definition: baseapi.h:766
Definition: matrix.h:570
Definition: blobs.h:261
BLOCK_LIST * block_list_
The page layout.
Definition: baseapi.h:871
CMD_EVENTS mode
Definition: pgedit.cpp:116
#define TESS_API
Definition: platform.h:87
int Init(const char *datapath, const char *language, OcrEngineMode oem)
Definition: baseapi.h:230
TESS_LOCAL const PAGE_RES * GetPageRes() const
Definition: baseapi.h:860
struct TessResultRenderer TessResultRenderer
Definition: capi.h:77
Definition: werd.h:60
void DetectParagraphs(int debug_level, GenericVector< RowInfo > *row_infos, GenericVector< PARA *> *row_owners, PARA_LIST *paragraphs, GenericVector< ParagraphModel *> *models)
void(Wordrec::* FillLatticeFunc)(const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle)
Definition: baseapi.h:85
bool(* FileReader)(const STRING &filename, GenericVector< char > *data)
int UNICHAR_ID
Definition: unichar.h:35
TessCallback4< const UNICHARSET &, int, PageIterator *, Pix * > TruthCallback
Definition: baseapi.h:90