tesseract v5.3.3.20231005
baseapi.h
Go to the documentation of this file.
1// SPDX-License-Identifier: Apache-2.0
2// File: baseapi.h
3// Description: Simple API for calling tesseract.
4// Author: Ray Smith
5//
6// (C) Copyright 2006, Google Inc.
7// Licensed under the Apache License, Version 2.0 (the "License");
8// you may not use this file except in compliance with the License.
9// You may obtain a copy of the License at
10// http://www.apache.org/licenses/LICENSE-2.0
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS,
13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// See the License for the specific language governing permissions and
15// limitations under the License.
16
17#ifndef TESSERACT_API_BASEAPI_H_
18#define TESSERACT_API_BASEAPI_H_
19
20#ifdef HAVE_CONFIG_H
21# include "config_auto.h" // DISABLED_LEGACY_ENGINE
22#endif
23
24#include "export.h"
25#include "pageiterator.h"
26#include "publictypes.h"
27#include "resultiterator.h"
28#include "unichar.h"
29
30#include <tesseract/version.h>
31
32#include <cstdio>
33#include <vector> // for std::vector
34
35struct Pix;
36struct Pixa;
37struct Boxa;
38
39namespace tesseract {
40
41class PAGE_RES;
42class ParagraphModel;
43class BLOCK_LIST;
44class ETEXT_DESC;
45struct OSResults;
46class UNICHARSET;
47
48class Dawg;
49class Dict;
50class EquationDetect;
51class PageIterator;
54class ResultIterator;
55class MutableIterator;
57class Tesseract;
58
59// Function to read a std::vector<char> from a whole file.
60// Returns false on failure.
61using FileReader = bool (*)(const char *filename, std::vector<char> *data);
62
63using DictFunc = int (Dict::*)(void *, const UNICHARSET &, UNICHAR_ID,
64 bool) const;
65using ProbabilityInContextFunc = double (Dict::*)(const char *, const char *,
66 int, const char *, int);
67
77public:
79 virtual ~TessBaseAPI();
80 // Copy constructor and assignment operator are currently unsupported.
81 TessBaseAPI(TessBaseAPI const &) = delete;
82 TessBaseAPI &operator=(TessBaseAPI const &) = delete;
83
87 static const char *Version();
88
96 static size_t getOpenCLDevice(void **device);
97
102 void SetInputName(const char *name);
110 const char *GetInputName();
111 // Takes ownership of the input pix.
112 void SetInputImage(Pix *pix);
113 Pix *GetInputImage();
114 int GetSourceYResolution();
115 const char *GetDatapath();
116
118 void SetOutputName(const char *name);
119
133 bool SetVariable(const char *name, const char *value);
134 bool SetDebugVariable(const char *name, const char *value);
135
140 bool GetIntVariable(const char *name, int *value) const;
141 bool GetBoolVariable(const char *name, bool *value) const;
142 bool GetDoubleVariable(const char *name, double *value) const;
143
148 const char *GetStringVariable(const char *name) const;
149
150#ifndef DISABLED_LEGACY_ENGINE
151
155 void PrintFontsTable(FILE *fp) const;
156
157#endif
158
162 void PrintVariables(FILE *fp) const;
163
167 bool GetVariableAsString(const char *name, std::string *val) const;
168
206 int Init(const char *datapath, const char *language, OcrEngineMode mode,
207 char **configs, int configs_size,
208 const std::vector<std::string> *vars_vec,
209 const std::vector<std::string> *vars_values,
210 bool set_only_non_debug_params);
211 int Init(const char *datapath, const char *language, OcrEngineMode oem) {
212 return Init(datapath, language, oem, nullptr, 0, nullptr, nullptr, false);
213 }
214 int Init(const char *datapath, const char *language) {
215 return Init(datapath, language, OEM_DEFAULT, nullptr, 0, nullptr, nullptr,
216 false);
217 }
218 // In-memory version reads the traineddata file directly from the given
219 // data[data_size] array, and/or reads data via a FileReader.
220 int Init(const char *data, int data_size, const char *language,
221 OcrEngineMode mode, char **configs, int configs_size,
222 const std::vector<std::string> *vars_vec,
223 const std::vector<std::string> *vars_values,
224 bool set_only_non_debug_params, FileReader reader);
225
234 const char *GetInitLanguagesAsString() const;
235
241 void GetLoadedLanguagesAsVector(std::vector<std::string> *langs) const;
242
246 void GetAvailableLanguagesAsVector(std::vector<std::string> *langs) const;
247
252 void InitForAnalysePage();
253
260 void ReadConfigFile(const char *filename);
262 void ReadDebugConfigFile(const char *filename);
263
269 void SetPageSegMode(PageSegMode mode);
270
272 PageSegMode GetPageSegMode() const;
273
291 char *TesseractRect(const unsigned char *imagedata, int bytes_per_pixel,
292 int bytes_per_line, int left, int top, int width,
293 int height);
294
299 void ClearAdaptiveClassifier();
300
307 /* @{ */
308
316 void SetImage(const unsigned char *imagedata, int width, int height,
317 int bytes_per_pixel, int bytes_per_line);
318
327 void SetImage(Pix *pix);
328
333 void SetSourceResolution(int ppi);
334
340 void SetRectangle(int left, int top, int width, int height);
341
347 Pix *GetThresholdedImage();
348
354 Boxa *GetRegions(Pixa **pixa);
355
367 Boxa *GetTextlines(bool raw_image, int raw_padding, Pixa **pixa,
368 int **blockids, int **paraids);
369 /*
370 Helper method to extract from the thresholded image. (most common usage)
371*/
372 Boxa *GetTextlines(Pixa **pixa, int **blockids) {
373 return GetTextlines(false, 0, pixa, blockids, nullptr);
374 }
375
384 Boxa *GetStrips(Pixa **pixa, int **blockids);
385
391 Boxa *GetWords(Pixa **pixa);
392
401 Boxa *GetConnectedComponents(Pixa **cc);
402
415 Boxa *GetComponentImages(PageIteratorLevel level, bool text_only,
416 bool raw_image, int raw_padding, Pixa **pixa,
417 int **blockids, int **paraids);
418 // Helper function to get binary images with no padding (most common usage).
419 Boxa *GetComponentImages(const PageIteratorLevel level, const bool text_only,
420 Pixa **pixa, int **blockids) {
421 return GetComponentImages(level, text_only, false, 0, pixa, blockids,
422 nullptr);
423 }
424
431 int GetThresholdedImageScaleFactor() const;
432
448 PageIterator *AnalyseLayout();
449 PageIterator *AnalyseLayout(bool merge_similar_words);
450
457 int Recognize(ETEXT_DESC *monitor);
458
486 bool ProcessPages(const char *filename, const char *retry_config,
487 int timeout_millisec, TessResultRenderer *renderer);
488 // Does the real work of ProcessPages.
489 bool ProcessPagesInternal(const char *filename, const char *retry_config,
490 int timeout_millisec, TessResultRenderer *renderer);
491
501 bool ProcessPage(Pix *pix, int page_index, const char *filename,
502 const char *retry_config, int timeout_millisec,
503 TessResultRenderer *renderer);
504
513 ResultIterator *GetIterator();
514
523 MutableIterator *GetMutableIterator();
524
529 char *GetUTF8Text();
530
540 char *GetHOCRText(ETEXT_DESC *monitor, int page_number);
541
548 char *GetHOCRText(int page_number);
549
554 char *GetAltoText(ETEXT_DESC *monitor, int page_number);
555
560 char *GetAltoText(int page_number);
561
567 char *GetTSVText(int page_number);
568
575 char *GetLSTMBoxText(int page_number);
576
584 char *GetBoxText(int page_number);
585
592 char *GetWordStrBoxText(int page_number);
593
599 char *GetUNLVText();
600
610 bool DetectOrientationScript(int *orient_deg, float *orient_conf,
611 const char **script_name, float *script_conf);
612
618 char *GetOsdText(int page_number);
619
621 int MeanTextConf();
628 int *AllWordConfidences();
629
630#ifndef DISABLED_LEGACY_ENGINE
641 bool AdaptToWordStr(PageSegMode mode, const char *wordstr);
642#endif // ndef DISABLED_LEGACY_ENGINE
643
650 void Clear();
651
658 void End();
659
667 static void ClearPersistentCache();
668
675 int IsValidWord(const char *word) const;
676 // Returns true if utf8_character is defined in the UniCharset.
677 bool IsValidCharacter(const char *utf8_character) const;
678
679 bool GetTextDirection(int *out_offset, float *out_slope);
680
682 void SetDictFunc(DictFunc f);
683
687 void SetProbabilityInContextFunc(ProbabilityInContextFunc f);
688
693 bool DetectOS(OSResults *);
694
699 void GetBlockTextOrientations(int **block_orientation,
700 bool **vertical_writing);
701
703 const char *GetUnichar(int unichar_id) const;
704
706 const Dawg *GetDawg(int i) const;
707
709 int NumDawgs() const;
710
712 return tesseract_;
713 }
714
716 return last_oem_requested_;
717 }
718
719 void set_min_orientation_margin(double margin);
720 /* @} */
721
722protected:
725 bool InternalSetImage();
726
731 virtual bool Threshold(Pix **pix);
732
737 int FindLines();
738
740 void ClearResults();
741
747 LTRResultIterator *GetLTRIterator();
748
755 int TextLength(int *blob_count) const;
756
758 void DetectParagraphs(bool after_text_recognition);
759
760 const PAGE_RES *GetPageRes() const {
761 return page_res_;
762 }
763
764protected:
770 std::vector<ParagraphModel *> *paragraph_models_;
771 BLOCK_LIST *block_list_;
773 std::string input_file_;
774 std::string output_file_;
775 std::string datapath_;
776 std::string language_;
779
784 /* @{ */
791 /* @} */
792
793private:
794 // A list of image filenames gets special consideration
795 bool ProcessPagesFileList(FILE *fp, std::string *buf,
796 const char *retry_config, int timeout_millisec,
797 TessResultRenderer *renderer,
798 int tessedit_page_number);
799 // TIFF supports multipage so gets special consideration.
800 bool ProcessPagesMultipageTiff(const unsigned char *data, size_t size,
801 const char *filename, const char *retry_config,
802 int timeout_millisec,
803 TessResultRenderer *renderer,
804 int tessedit_page_number);
805}; // class TessBaseAPI.
806
808std::string HOcrEscape(const char *text);
809
810} // namespace tesseract
811
812#endif // TESSERACT_API_BASEAPI_H_
struct TessBaseAPI TessBaseAPI
Definition: capi.h:60
int value
int(Dict::*)(void *, const UNICHARSET &, UNICHAR_ID, bool) const DictFunc
Definition: baseapi.h:64
double(Dict::*)(const char *, const char *, int, const char *, int) ProbabilityInContextFunc
Definition: baseapi.h:66
std::string HOcrEscape(const char *text)
Definition: baseapi.cpp:2378
int UNICHAR_ID
Definition: unichar.h:34
bool(*)(const char *filename, std::vector< char > *data) FileReader
Definition: baseapi.h:61
void DetectParagraphs(int debug_level, std::vector< RowInfo > *row_infos, std::vector< PARA * > *row_owners, PARA_LIST *paragraphs, std::vector< ParagraphModel * > *models)
EquationDetect * equ_detect_
The equation detector.
Definition: baseapi.h:767
std::string input_file_
Name used by training code.
Definition: baseapi.h:773
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:772
TessBaseAPI(TessBaseAPI const &)=delete
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:765
Boxa * GetTextlines(Pixa **pixa, int **blockids)
Definition: baseapi.h:372
const PAGE_RES * GetPageRes() const
Definition: baseapi.h:760
std::string language_
Last initialized language.
Definition: baseapi.h:776
std::string datapath_
Current location of tessdata.
Definition: baseapi.h:775
Boxa * GetComponentImages(const PageIteratorLevel level, const bool text_only, Pixa **pixa, int **blockids)
Definition: baseapi.h:419
int Init(const char *datapath, const char *language, OcrEngineMode oem)
Definition: baseapi.h:211
std::vector< ParagraphModel * > * paragraph_models_
Definition: baseapi.h:770
bool recognition_done_
page_res_ contains recognition data.
Definition: baseapi.h:778
FileReader reader_
Reads files from any filesystem.
Definition: baseapi.h:768
int Init(const char *datapath, const char *language)
Definition: baseapi.h:214
OcrEngineMode oem() const
Definition: baseapi.h:715
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:769
std::string output_file_
Name used by debug code.
Definition: baseapi.h:774
Tesseract * tesseract() const
Definition: baseapi.h:711
TessBaseAPI & operator=(TessBaseAPI const &)=delete
BLOCK_LIST * block_list_
The page layout.
Definition: baseapi.h:771
Tesseract * osd_tesseract_
For orientation & script detection.
Definition: baseapi.h:766
OcrEngineMode last_oem_requested_
Last ocr language mode requested.
Definition: baseapi.h:777
#define TESS_API
Definition: export.h:32