tesseract v5.3.3.20231005
tesseract::TessBaseAPI Class Reference

#include <baseapi.h>

Inheritance diagram for tesseract::TessBaseAPI:
tesseract::FriendlyTessBaseAPI

Public Member Functions

 TessBaseAPI ()
 
virtual ~TessBaseAPI ()
 
 TessBaseAPI (TessBaseAPI const &)=delete
 
TessBaseAPIoperator= (TessBaseAPI const &)=delete
 
void SetInputName (const char *name)
 
const char * GetInputName ()
 
void SetInputImage (Pix *pix)
 
Pix * GetInputImage ()
 
int GetSourceYResolution ()
 
const char * GetDatapath ()
 
void SetOutputName (const char *name)
 
bool SetVariable (const char *name, const char *value)
 
bool SetDebugVariable (const char *name, const char *value)
 
bool GetIntVariable (const char *name, int *value) const
 
bool GetBoolVariable (const char *name, bool *value) const
 
bool GetDoubleVariable (const char *name, double *value) const
 
const char * GetStringVariable (const char *name) const
 
void PrintFontsTable (FILE *fp) const
 
void PrintVariables (FILE *fp) const
 
bool GetVariableAsString (const char *name, std::string *val) const
 
int Init (const char *datapath, const char *language, OcrEngineMode mode, char **configs, int configs_size, const std::vector< std::string > *vars_vec, const std::vector< std::string > *vars_values, bool set_only_non_debug_params)
 
int Init (const char *datapath, const char *language, OcrEngineMode oem)
 
int Init (const char *datapath, const char *language)
 
int Init (const char *data, int data_size, const char *language, OcrEngineMode mode, char **configs, int configs_size, const std::vector< std::string > *vars_vec, const std::vector< std::string > *vars_values, bool set_only_non_debug_params, FileReader reader)
 
const char * GetInitLanguagesAsString () const
 
void GetLoadedLanguagesAsVector (std::vector< std::string > *langs) const
 
void GetAvailableLanguagesAsVector (std::vector< std::string > *langs) const
 
void InitForAnalysePage ()
 
void ReadConfigFile (const char *filename)
 
void ReadDebugConfigFile (const char *filename)
 
void SetPageSegMode (PageSegMode mode)
 
PageSegMode GetPageSegMode () const
 
char * TesseractRect (const unsigned char *imagedata, int bytes_per_pixel, int bytes_per_line, int left, int top, int width, int height)
 
void ClearAdaptiveClassifier ()
 
void SetImage (const unsigned char *imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line)
 
void SetImage (Pix *pix)
 
void SetSourceResolution (int ppi)
 
void SetRectangle (int left, int top, int width, int height)
 
Pix * GetThresholdedImage ()
 
Boxa * GetRegions (Pixa **pixa)
 
Boxa * GetTextlines (bool raw_image, int raw_padding, Pixa **pixa, int **blockids, int **paraids)
 
Boxa * GetTextlines (Pixa **pixa, int **blockids)
 
Boxa * GetStrips (Pixa **pixa, int **blockids)
 
Boxa * GetWords (Pixa **pixa)
 
Boxa * GetConnectedComponents (Pixa **cc)
 
Boxa * GetComponentImages (PageIteratorLevel level, bool text_only, bool raw_image, int raw_padding, Pixa **pixa, int **blockids, int **paraids)
 
Boxa * GetComponentImages (const PageIteratorLevel level, const bool text_only, Pixa **pixa, int **blockids)
 
int GetThresholdedImageScaleFactor () const
 
PageIteratorAnalyseLayout ()
 
PageIteratorAnalyseLayout (bool merge_similar_words)
 
int Recognize (ETEXT_DESC *monitor)
 
bool ProcessPages (const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
 
bool ProcessPagesInternal (const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
 
bool ProcessPage (Pix *pix, int page_index, const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
 
ResultIteratorGetIterator ()
 
MutableIteratorGetMutableIterator ()
 
char * GetUTF8Text ()
 
char * GetHOCRText (ETEXT_DESC *monitor, int page_number)
 
char * GetHOCRText (int page_number)
 
char * GetAltoText (ETEXT_DESC *monitor, int page_number)
 
char * GetAltoText (int page_number)
 
char * GetTSVText (int page_number)
 
char * GetLSTMBoxText (int page_number)
 
char * GetBoxText (int page_number)
 
char * GetWordStrBoxText (int page_number)
 
char * GetUNLVText ()
 
bool DetectOrientationScript (int *orient_deg, float *orient_conf, const char **script_name, float *script_conf)
 
char * GetOsdText (int page_number)
 
int MeanTextConf ()
 
int * AllWordConfidences ()
 
bool AdaptToWordStr (PageSegMode mode, const char *wordstr)
 
void Clear ()
 
void End ()
 
int IsValidWord (const char *word) const
 
bool IsValidCharacter (const char *utf8_character) const
 
bool GetTextDirection (int *out_offset, float *out_slope)
 
void SetDictFunc (DictFunc f)
 
void SetProbabilityInContextFunc (ProbabilityInContextFunc f)
 
bool DetectOS (OSResults *)
 
void GetBlockTextOrientations (int **block_orientation, bool **vertical_writing)
 
const char * GetUnichar (int unichar_id) const
 
const DawgGetDawg (int i) const
 
int NumDawgs () const
 
Tesseracttesseract () const
 
OcrEngineMode oem () const
 
void set_min_orientation_margin (double margin)
 

Static Public Member Functions

static const char * Version ()
 
static size_t getOpenCLDevice (void **device)
 
static void ClearPersistentCache ()
 

Protected Member Functions

bool InternalSetImage ()
 
virtual bool Threshold (Pix **pix)
 
int FindLines ()
 
void ClearResults ()
 
LTRResultIteratorGetLTRIterator ()
 
int TextLength (int *blob_count) const
 
void DetectParagraphs (bool after_text_recognition)
 
const PAGE_RESGetPageRes () const
 

Protected Attributes

Tesseracttesseract_
 The underlying data object. More...
 
Tesseractosd_tesseract_
 For orientation & script detection. More...
 
EquationDetectequ_detect_
 The equation detector. More...
 
FileReader reader_
 Reads files from any filesystem. More...
 
ImageThresholderthresholder_
 Image thresholding module. More...
 
std::vector< ParagraphModel * > * paragraph_models_
 
BLOCK_LIST * block_list_
 The page layout. More...
 
PAGE_RESpage_res_
 The page-level data. More...
 
std::string input_file_
 Name used by training code. More...
 
std::string output_file_
 Name used by debug code. More...
 
std::string datapath_
 Current location of tessdata. More...
 
std::string language_
 Last initialized language. More...
 
OcrEngineMode last_oem_requested_
 Last ocr language mode requested. More...
 
bool recognition_done_
 page_res_ contains recognition data. More...
 
int rect_left_
 
int rect_top_
 
int rect_width_
 
int rect_height_
 
int image_width_
 
int image_height_
 

Detailed Description

Base class for all tesseract APIs. Specific classes can add ability to work on different inputs or produce different outputs. This class is mostly an interface layer on top of the Tesseract instance class to hide the data types so that users of this class don't have to include any other Tesseract headers.

Definition at line 76 of file baseapi.h.

Constructor & Destructor Documentation

◆ TessBaseAPI() [1/2]

tesseract::TessBaseAPI::TessBaseAPI ( )

Definition at line 211 of file baseapi.cpp.

212 : tesseract_(nullptr)
213 , osd_tesseract_(nullptr)
214 , equ_detect_(nullptr)
215 , reader_(nullptr)
216 ,
217 // thresholder_ is initialized to nullptr here, but will be set before use
218 // by: A constructor of a derived API or created
219 // implicitly when used in InternalSetImage.
220 thresholder_(nullptr)
221 , paragraph_models_(nullptr)
222 , block_list_(nullptr)
223 , page_res_(nullptr)
225 , recognition_done_(false)
226 , rect_left_(0)
227 , rect_top_(0)
228 , rect_width_(0)
229 , rect_height_(0)
230 , image_width_(0)
231 , image_height_(0) {
232}
EquationDetect * equ_detect_
The equation detector.
Definition: baseapi.h:767
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:772
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:765
std::vector< ParagraphModel * > * paragraph_models_
Definition: baseapi.h:770
bool recognition_done_
page_res_ contains recognition data.
Definition: baseapi.h:778
FileReader reader_
Reads files from any filesystem.
Definition: baseapi.h:768
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:769
BLOCK_LIST * block_list_
The page layout.
Definition: baseapi.h:771
Tesseract * osd_tesseract_
For orientation & script detection.
Definition: baseapi.h:766
OcrEngineMode last_oem_requested_
Last ocr language mode requested.
Definition: baseapi.h:777

◆ ~TessBaseAPI()

tesseract::TessBaseAPI::~TessBaseAPI ( )
virtual

Definition at line 234 of file baseapi.cpp.

234 {
235 End();
236}

◆ TessBaseAPI() [2/2]

tesseract::TessBaseAPI::TessBaseAPI ( TessBaseAPI const &  )
delete

Member Function Documentation

◆ AdaptToWordStr()

bool tesseract::TessBaseAPI::AdaptToWordStr ( PageSegMode  mode,
const char *  wordstr 
)

Applies the given word to the adaptive classifier if possible. The word must be SPACE-DELIMITED UTF-8 - l i k e t h i s , so it can tell the boundaries of the graphemes. Assumes that SetImage/SetRectangle have been used to set the image to the given word. The mode arg should be PSM_SINGLE_WORD or PSM_CIRCLE_WORD, as that will be used to control layout analysis. The currently set PageSegMode is preserved. Returns false if adaption was not possible for some reason.

Definition at line 1835 of file baseapi.cpp.

1835 {
1836 int debug = 0;
1837 GetIntVariable("applybox_debug", &debug);
1838 bool success = true;
1839 PageSegMode current_psm = GetPageSegMode();
1840 SetPageSegMode(mode);
1841 SetVariable("classify_enable_learning", "0");
1842 const std::unique_ptr<const char[]> text(GetUTF8Text());
1843 if (debug) {
1844 tprintf("Trying to adapt \"%s\" to \"%s\"\n", text.get(), wordstr);
1845 }
1846 if (text != nullptr) {
1847 PAGE_RES_IT it(page_res_);
1848 WERD_RES *word_res = it.word();
1849 if (word_res != nullptr) {
1850 word_res->word->set_text(wordstr);
1851 // Check to see if text matches wordstr.
1852 int w = 0;
1853 int t;
1854 for (t = 0; text[t] != '\0'; ++t) {
1855 if (text[t] == '\n' || text[t] == ' ') {
1856 continue;
1857 }
1858 while (wordstr[w] == ' ') {
1859 ++w;
1860 }
1861 if (text[t] != wordstr[w]) {
1862 break;
1863 }
1864 ++w;
1865 }
1866 if (text[t] != '\0' || wordstr[w] != '\0') {
1867 // No match.
1868 delete page_res_;
1869 std::vector<TBOX> boxes;
1873 PAGE_RES_IT pr_it(page_res_);
1874 if (pr_it.word() == nullptr) {
1875 success = false;
1876 } else {
1877 word_res = pr_it.word();
1878 }
1879 } else {
1880 word_res->BestChoiceToCorrectText();
1881 }
1882 if (success) {
1883 tesseract_->EnableLearning = true;
1884 tesseract_->LearnWord(nullptr, word_res);
1885 }
1886 } else {
1887 success = false;
1888 }
1889 } else {
1890 success = false;
1891 }
1892 SetPageSegMode(current_psm);
1893 return success;
1894}
void tprintf(const char *format,...)
Definition: tprintf.cpp:41
void SetPageSegMode(PageSegMode mode)
Definition: baseapi.cpp:511
bool GetIntVariable(const char *name, int *value) const
Definition: baseapi.cpp:294
bool SetVariable(const char *name, const char *value)
Definition: baseapi.cpp:279
PageSegMode GetPageSegMode() const
Definition: baseapi.cpp:519
void TidyUp(PAGE_RES *page_res)
Definition: applybox.cpp:685
void ReSegmentByClassification(PAGE_RES *page_res)
Definition: applybox.cpp:495
PAGE_RES * SetupApplyBoxes(const std::vector< TBOX > &boxes, BLOCK_LIST *block_list)
Definition: applybox.cpp:197
void LearnWord(const char *fontname, WERD_RES *word)
Definition: adaptmatch.cpp:262

◆ AllWordConfidences()

int * tesseract::TessBaseAPI::AllWordConfidences ( )

Returns all word confidences (between 0 and 100) in an array, terminated by -1. The calling function must delete [] after use. The number of confidences should correspond to the number of space- delimited words in GetUTF8Text.

Returns an array of all word confidences, terminated by -1.

Definition at line 1795 of file baseapi.cpp.

1795 {
1796 if (tesseract_ == nullptr || (!recognition_done_ && Recognize(nullptr) < 0)) {
1797 return nullptr;
1798 }
1799 int n_word = 0;
1800 PAGE_RES_IT res_it(page_res_);
1801 for (res_it.restart_page(); res_it.word() != nullptr; res_it.forward()) {
1802 n_word++;
1803 }
1804
1805 int *conf = new int[n_word + 1];
1806 n_word = 0;
1807 for (res_it.restart_page(); res_it.word() != nullptr; res_it.forward()) {
1808 WERD_RES *word = res_it.word();
1809 WERD_CHOICE *choice = word->best_choice;
1810 int w_conf = static_cast<int>(100 + 5 * choice->certainty());
1811 // This is the eq for converting Tesseract confidence to 1..100
1812 if (w_conf < 0) {
1813 w_conf = 0;
1814 }
1815 if (w_conf > 100) {
1816 w_conf = 100;
1817 }
1818 conf[n_word++] = w_conf;
1819 }
1820 conf[n_word] = -1;
1821 return conf;
1822}
int Recognize(ETEXT_DESC *monitor)
Definition: baseapi.cpp:834

◆ AnalyseLayout() [1/2]

PageIterator * tesseract::TessBaseAPI::AnalyseLayout ( )

Runs page layout analysis in the mode set by SetPageSegMode. May optionally be called prior to Recognize to get access to just the page layout results. Returns an iterator to the results. If merge_similar_words is true, words are combined where suitable for use with a line recognizer. Use if you want to use AnalyseLayout to find the textlines, and then want to process textline fragments with an external line recognizer. Returns nullptr on error or an empty page. The returned iterator must be deleted after use. WARNING! This class points to data held within the TessBaseAPI class, and therefore can only be used while the TessBaseAPI class still exists and has not been subjected to a call of Init, SetImage, Recognize, Clear, End DetectOS, or anything else that changes the internal PAGE_RES.

Definition at line 812 of file baseapi.cpp.

812 {
813 return AnalyseLayout(false);
814}
PageIterator * AnalyseLayout()
Definition: baseapi.cpp:812

◆ AnalyseLayout() [2/2]

PageIterator * tesseract::TessBaseAPI::AnalyseLayout ( bool  merge_similar_words)

Definition at line 816 of file baseapi.cpp.

816 {
817 if (FindLines() == 0) {
818 if (block_list_->empty()) {
819 return nullptr; // The page was empty.
820 }
821 page_res_ = new PAGE_RES(merge_similar_words, block_list_, nullptr);
822 DetectParagraphs(false);
823 return new PageIterator(page_res_, tesseract_, thresholder_->GetScaleFactor(),
826 }
827 return nullptr;
828}
void DetectParagraphs(bool after_text_recognition)
Definition: baseapi.cpp:2344
int GetScaledYResolution() const
Definition: thresholder.h:102

◆ Clear()

void tesseract::TessBaseAPI::Clear ( )

Free up recognition results and any stored image data, without actually freeing any recognition data that would be time-consuming to reload. Afterwards, you must call SetImage or TesseractRect before doing any Recognize or Get* operation.

Definition at line 1903 of file baseapi.cpp.

1903 {
1904 if (thresholder_ != nullptr) {
1906 }
1907 ClearResults();
1908 if (tesseract_ != nullptr) {
1909 SetInputImage(nullptr);
1910 }
1911}
void SetInputImage(Pix *pix)
Definition: baseapi.cpp:920
virtual void Clear()
Destroy the Pix if there is one, freeing memory.
Definition: thresholder.cpp:59

◆ ClearAdaptiveClassifier()

void tesseract::TessBaseAPI::ClearAdaptiveClassifier ( )

Call between pages or documents etc to free up memory and forget adaptive data.

Definition at line 560 of file baseapi.cpp.

560 {
561 if (tesseract_ == nullptr) {
562 return;
563 }
566}

◆ ClearPersistentCache()

void tesseract::TessBaseAPI::ClearPersistentCache ( )
static

Clear any library-level memory caches. There are a variety of expensive-to-load constant data structures (mostly language dictionaries) that are cached globally – surviving the Init() and End() of individual TessBaseAPI's. This function allows the clearing of these caches.

Definition at line 1956 of file baseapi.cpp.

1956 {
1958}
void DeleteUnusedDawgs()
Definition: dawg_cache.h:42
static DawgCache * GlobalDawgCache()
Definition: dict.cpp:172

◆ ClearResults()

void tesseract::TessBaseAPI::ClearResults ( )
protected

Delete the pageres and block list ready for a new page.

Delete the pageres and clear the block list ready for a new page.

Definition at line 2205 of file baseapi.cpp.

2205 {
2206 if (tesseract_ != nullptr) {
2207 tesseract_->Clear();
2208 }
2209 delete page_res_;
2210 page_res_ = nullptr;
2211 recognition_done_ = false;
2212 if (block_list_ == nullptr) {
2213 block_list_ = new BLOCK_LIST;
2214 } else {
2215 block_list_->clear();
2216 }
2217 if (paragraph_models_ != nullptr) {
2218 for (auto model : *paragraph_models_) {
2219 delete model;
2220 }
2221 delete paragraph_models_;
2222 paragraph_models_ = nullptr;
2223 }
2224}

◆ DetectOrientationScript()

bool tesseract::TessBaseAPI::DetectOrientationScript ( int *  orient_deg,
float *  orient_conf,
const char **  script_name,
float *  script_conf 
)

Detect the orientation of the input image and apparent script (alphabet). orient_deg is the detected clockwise rotation of the input image in degrees (0, 90, 180, 270) orient_conf is the confidence (15.0 is reasonably confident) script_name is an ASCII string, the name of the script, e.g. "Latin" script_conf is confidence level in the script Returns true on success and writes values to each parameter as an output

Definition at line 1708 of file baseapi.cpp.

1709 {
1710 OSResults osr;
1711
1712 bool osd = DetectOS(&osr);
1713 if (!osd) {
1714 return false;
1715 }
1716
1717 int orient_id = osr.best_result.orientation_id;
1718 int script_id = osr.get_best_script(orient_id);
1719 if (orient_conf) {
1720 *orient_conf = osr.best_result.oconfidence;
1721 }
1722 if (orient_deg) {
1723 *orient_deg = orient_id * 90; // convert quadrant to degrees
1724 }
1725
1726 if (script_name) {
1727 const char *script = osr.unicharset->get_script_from_script_id(script_id);
1728
1729 *script_name = script;
1730 }
1731
1732 if (script_conf) {
1733 *script_conf = osr.best_result.sconfidence;
1734 }
1735
1736 return true;
1737}
bool DetectOS(OSResults *)
Definition: baseapi.cpp:2266

◆ DetectOS()

bool tesseract::TessBaseAPI::DetectOS ( OSResults osr)

Estimates the Orientation And Script of the image.

Returns
true if the image was processed successfully.

Estimates the Orientation And Script of the image. Returns true if the image was processed successfully.

Definition at line 2266 of file baseapi.cpp.

2266 {
2267 if (tesseract_ == nullptr) {
2268 return false;
2269 }
2270 ClearResults();
2271 if (tesseract_->pix_binary() == nullptr && !Threshold(&tesseract_->mutable_pix_binary()->pix_)) {
2272 return false;
2273 }
2274
2275 if (input_file_.empty()) {
2276 input_file_ = kInputFile;
2277 }
2279}
int orientation_and_script_detection(const char *filename, OSResults *, tesseract::Tesseract *)
Definition: osdetect.cpp:188
std::string input_file_
Name used by training code.
Definition: baseapi.h:773
virtual bool Threshold(Pix **pix)
Definition: baseapi.cpp:2053
Image * mutable_pix_binary()
Image pix_binary() const
Pix * pix_
Definition: image.h:27

◆ DetectParagraphs()

void tesseract::TessBaseAPI::DetectParagraphs ( bool  after_text_recognition)
protected

Definition at line 2344 of file baseapi.cpp.

2344 {
2345 int debug_level = 0;
2346 GetIntVariable("paragraph_debug_level", &debug_level);
2347 if (paragraph_models_ == nullptr) {
2348 paragraph_models_ = new std::vector<ParagraphModel *>;
2349 }
2350 MutableIterator *result_it = GetMutableIterator();
2351 do { // Detect paragraphs for this block
2352 std::vector<ParagraphModel *> models;
2353 ::tesseract::DetectParagraphs(debug_level, after_text_recognition, result_it, &models);
2354 paragraph_models_->insert(paragraph_models_->end(), models.begin(), models.end());
2355 } while (result_it->Next(RIL_BLOCK));
2356 delete result_it;
2357}
void DetectParagraphs(int debug_level, std::vector< RowInfo > *row_infos, std::vector< PARA * > *row_owners, PARA_LIST *paragraphs, std::vector< ParagraphModel * > *models)
MutableIterator * GetMutableIterator()
Definition: baseapi.cpp:1354

◆ End()

void tesseract::TessBaseAPI::End ( )

Close down tesseract and free up all memory. End() is equivalent to destructing and reconstructing your TessBaseAPI. Once End() has been used, none of the other API functions may be used other than Init and anything declared above it in the class definition.

Definition at line 1919 of file baseapi.cpp.

1919 {
1920 Clear();
1921 delete thresholder_;
1922 thresholder_ = nullptr;
1923 delete page_res_;
1924 page_res_ = nullptr;
1925 delete block_list_;
1926 block_list_ = nullptr;
1927 if (paragraph_models_ != nullptr) {
1928 for (auto model : *paragraph_models_) {
1929 delete model;
1930 }
1931 delete paragraph_models_;
1932 paragraph_models_ = nullptr;
1933 }
1934#ifndef DISABLED_LEGACY_ENGINE
1935 if (osd_tesseract_ == tesseract_) {
1936 osd_tesseract_ = nullptr;
1937 }
1938 delete osd_tesseract_;
1939 osd_tesseract_ = nullptr;
1940 delete equ_detect_;
1941 equ_detect_ = nullptr;
1942#endif // ndef DISABLED_LEGACY_ENGINE
1943 delete tesseract_;
1944 tesseract_ = nullptr;
1945 input_file_.clear();
1946 output_file_.clear();
1947 datapath_.clear();
1948 language_.clear();
1949}
std::string language_
Last initialized language.
Definition: baseapi.h:776
std::string datapath_
Current location of tessdata.
Definition: baseapi.h:775
std::string output_file_
Name used by debug code.
Definition: baseapi.h:774

◆ FindLines()

int tesseract::TessBaseAPI::FindLines ( )
protected

Find lines from the image making the BLOCK_LIST.

Returns
0 on success.

Find lines from the image making the BLOCK_LIST.

Definition at line 2128 of file baseapi.cpp.

2128 {
2129 if (thresholder_ == nullptr || thresholder_->IsEmpty()) {
2130 tprintf("Please call SetImage before attempting recognition.\n");
2131 return -1;
2132 }
2133 if (recognition_done_) {
2134 ClearResults();
2135 }
2136 if (!block_list_->empty()) {
2137 return 0;
2138 }
2139 if (tesseract_ == nullptr) {
2140 tesseract_ = new Tesseract;
2141#ifndef DISABLED_LEGACY_ENGINE
2143#endif
2144 }
2145 if (tesseract_->pix_binary() == nullptr && !Threshold(&tesseract_->mutable_pix_binary()->pix_)) {
2146 return -1;
2147 }
2148
2150
2151#ifndef DISABLED_LEGACY_ENGINE
2152 if (tesseract_->textord_equation_detect) {
2153 if (equ_detect_ == nullptr && !datapath_.empty()) {
2154 equ_detect_ = new EquationDetect(datapath_.c_str(), nullptr);
2155 }
2156 if (equ_detect_ == nullptr) {
2157 tprintf("Warning: Could not set equation detector\n");
2158 } else {
2160 }
2161 }
2162#endif // ndef DISABLED_LEGACY_ENGINE
2163
2164 Tesseract *osd_tess = osd_tesseract_;
2165 OSResults osr;
2166#ifndef DISABLED_LEGACY_ENGINE
2167 if (PSM_OSD_ENABLED(tesseract_->tessedit_pageseg_mode) && osd_tess == nullptr) {
2168 if (strcmp(language_.c_str(), "osd") == 0) {
2169 osd_tess = tesseract_;
2170 } else {
2171 osd_tesseract_ = new Tesseract;
2172 TessdataManager mgr(reader_);
2173 if (datapath_.empty()) {
2174 tprintf(
2175 "Warning: Auto orientation and script detection requested,"
2176 " but data path is undefined\n");
2177 delete osd_tesseract_;
2178 osd_tesseract_ = nullptr;
2180 nullptr, 0, nullptr, nullptr, false, &mgr) == 0) {
2181 osd_tess = osd_tesseract_;
2183 } else {
2184 tprintf(
2185 "Warning: Auto orientation and script detection requested,"
2186 " but osd language failed to load\n");
2187 delete osd_tesseract_;
2188 osd_tesseract_ = nullptr;
2189 }
2190 }
2191 }
2192#endif // ndef DISABLED_LEGACY_ENGINE
2193
2194 if (tesseract_->SegmentPage(input_file_.c_str(), block_list_, osd_tess, &osr) < 0) {
2195 return -1;
2196 }
2197
2198 // If Devanagari is being recognized, we use different images for page seg
2199 // and for OCR.
2200 tesseract_->PrepareForTessOCR(block_list_, osd_tess, &osr);
2201 return 0;
2202}
@ OEM_TESSERACT_ONLY
Definition: publictypes.h:264
bool PSM_OSD_ENABLED(int pageseg_mode)
Definition: publictypes.h:186
void SetEquationDetect(EquationDetect *detector)
int init_tesseract(const std::string &arg0, const std::string &textbase, const std::string &language, OcrEngineMode oem, char **configs, int configs_size, const std::vector< std::string > *vars_vec, const std::vector< std::string > *vars_values, bool set_only_non_debug_params, TessdataManager *mgr)
Definition: tessedit.cpp:288
void PrepareForTessOCR(BLOCK_LIST *block_list, Tesseract *osd_tess, OSResults *osr)
int SegmentPage(const char *input_file, BLOCK_LIST *blocks, Tesseract *osd_tess, OSResults *osr)
void set_source_resolution(int ppi)
int GetSourceYResolution() const
Definition: thresholder.h:99
bool IsEmpty() const
Return true if no image has been set.
Definition: thresholder.cpp:64
void InitAdaptiveClassifier(TessdataManager *mgr)
Definition: adaptmatch.cpp:527

◆ GetAltoText() [1/2]

char * tesseract::TessBaseAPI::GetAltoText ( ETEXT_DESC monitor,
int  page_number 
)

Make an XML-formatted string with Alto markup from the internal data structures.

Make an XML-formatted string with ALTO markup from the internal data structures.

Definition at line 137 of file altorenderer.cpp.

137 {
138 if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(monitor) < 0)) {
139 return nullptr;
140 }
141
142 int lcnt = 0, tcnt = 0, bcnt = 0, wcnt = 0;
143
144 if (input_file_.empty()) {
145 SetInputName(nullptr);
146 }
147
148#ifdef _WIN32
149 // convert input name from ANSI encoding to utf-8
150 int str16_len = MultiByteToWideChar(CP_ACP, 0, input_file_.c_str(), -1, nullptr, 0);
151 wchar_t *uni16_str = new WCHAR[str16_len];
152 str16_len = MultiByteToWideChar(CP_ACP, 0, input_file_.c_str(), -1, uni16_str, str16_len);
153 int utf8_len =
154 WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, nullptr, 0, nullptr, nullptr);
155 char *utf8_str = new char[utf8_len];
156 WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, utf8_str, utf8_len, nullptr, nullptr);
157 input_file_ = utf8_str;
158 delete[] uni16_str;
159 delete[] utf8_str;
160#endif
161
162 std::stringstream alto_str;
163 // Use "C" locale (needed for int values larger than 999).
164 alto_str.imbue(std::locale::classic());
165 alto_str << "\t\t<Page WIDTH=\"" << rect_width_ << "\" HEIGHT=\"" << rect_height_
166 << "\" PHYSICAL_IMG_NR=\"" << page_number << "\""
167 << " ID=\"page_" << page_number << "\">\n"
168 << "\t\t\t<PrintSpace HPOS=\"0\" VPOS=\"0\""
169 << " WIDTH=\"" << rect_width_ << "\""
170 << " HEIGHT=\"" << rect_height_ << "\">\n";
171
172 ResultIterator *res_it = GetIterator();
173 while (!res_it->Empty(RIL_BLOCK)) {
174 if (res_it->Empty(RIL_WORD)) {
175 res_it->Next(RIL_WORD);
176 continue;
177 }
178
179 int left, top, right, bottom;
180 auto block_type = res_it->BlockType();
181
182 switch (block_type) {
183 case PT_FLOWING_IMAGE:
184 case PT_HEADING_IMAGE:
185 case PT_PULLOUT_IMAGE: {
186 // Handle all kinds of images.
187 // TODO: optionally add TYPE, for example TYPE="photo".
188 alto_str << "\t\t\t\t<Illustration ID=\"cblock_" << bcnt++ << "\"";
189 AddBoxToAlto(res_it, RIL_BLOCK, alto_str);
190 alto_str << "</Illustration>\n";
191 res_it->Next(RIL_BLOCK);
192 continue;
193 }
194 case PT_HORZ_LINE:
195 case PT_VERT_LINE:
196 // Handle horizontal and vertical lines.
197 alto_str << "\t\t\t\t<GraphicalElement ID=\"cblock_" << bcnt++ << "\"";
198 AddBoxToAlto(res_it, RIL_BLOCK, alto_str);
199 alto_str << "</GraphicalElement >\n";
200 res_it->Next(RIL_BLOCK);
201 continue;
202 case PT_NOISE:
203 tprintf("TODO: Please report image which triggers the noise case.\n");
204 ASSERT_HOST(false);
205 default:
206 break;
207 }
208
209 if (res_it->IsAtBeginningOf(RIL_BLOCK)) {
210 alto_str << "\t\t\t\t<ComposedBlock ID=\"cblock_" << bcnt << "\"";
211 AddBoxToAlto(res_it, RIL_BLOCK, alto_str);
212 alto_str << "\n";
213 }
214
215 if (res_it->IsAtBeginningOf(RIL_PARA)) {
216 alto_str << "\t\t\t\t\t<TextBlock ID=\"block_" << tcnt << "\"";
217 AddBoxToAlto(res_it, RIL_PARA, alto_str);
218 alto_str << "\n";
219 }
220
221 if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) {
222 alto_str << "\t\t\t\t\t\t<TextLine ID=\"line_" << lcnt << "\"";
223 AddBoxToAlto(res_it, RIL_TEXTLINE, alto_str);
224 alto_str << "\n";
225 }
226
227 alto_str << "\t\t\t\t\t\t\t<String ID=\"string_" << wcnt << "\"";
228 AddBoxToAlto(res_it, RIL_WORD, alto_str);
229 alto_str << " CONTENT=\"";
230
231 bool last_word_in_line = res_it->IsAtFinalElement(RIL_TEXTLINE, RIL_WORD);
232 bool last_word_in_tblock = res_it->IsAtFinalElement(RIL_PARA, RIL_WORD);
233 bool last_word_in_cblock = res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD);
234
235 res_it->BoundingBox(RIL_WORD, &left, &top, &right, &bottom);
236
237 do {
238 const std::unique_ptr<const char[]> grapheme(res_it->GetUTF8Text(RIL_SYMBOL));
239 if (grapheme && grapheme[0] != 0) {
240 alto_str << HOcrEscape(grapheme.get()).c_str();
241 }
242 res_it->Next(RIL_SYMBOL);
243 } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD));
244
245 alto_str << "\"/>";
246
247 wcnt++;
248
249 if (last_word_in_line) {
250 alto_str << "\n\t\t\t\t\t\t</TextLine>\n";
251 lcnt++;
252 } else {
253 int hpos = right;
254 int vpos = top;
255 res_it->BoundingBox(RIL_WORD, &left, &top, &right, &bottom);
256 int width = left - hpos;
257 alto_str << "<SP WIDTH=\"" << width << "\" VPOS=\"" << vpos << "\" HPOS=\"" << hpos
258 << "\"/>\n";
259 }
260
261 if (last_word_in_tblock) {
262 alto_str << "\t\t\t\t\t</TextBlock>\n";
263 tcnt++;
264 }
265
266 if (last_word_in_cblock) {
267 alto_str << "\t\t\t\t</ComposedBlock>\n";
268 bcnt++;
269 }
270 }
271
272 alto_str << "\t\t\t</PrintSpace>\n"
273 << "\t\t</Page>\n";
274 const std::string &text = alto_str.str();
275
276 char *result = new char[text.length() + 1];
277 strcpy(result, text.c_str());
278 delete res_it;
279 return result;
280}
#define ASSERT_HOST(x)
Definition: errcode.h:54
std::string HOcrEscape(const char *text)
Definition: baseapi.cpp:2378
@ PT_PULLOUT_IMAGE
Definition: publictypes.h:63
@ PT_HEADING_IMAGE
Definition: publictypes.h:62
@ PT_HORZ_LINE
Definition: publictypes.h:64
@ PT_FLOWING_IMAGE
Definition: publictypes.h:61
@ PT_VERT_LINE
Definition: publictypes.h:65
ResultIterator * GetIterator()
Definition: baseapi.cpp:1337
void SetInputName(const char *name)
Definition: baseapi.cpp:270

◆ GetAltoText() [2/2]

char * tesseract::TessBaseAPI::GetAltoText ( int  page_number)

Make an XML-formatted string with Alto markup from the internal data structures.

Make an XML-formatted string with ALTO markup from the internal data structures.

Definition at line 129 of file altorenderer.cpp.

129 {
130 return GetAltoText(nullptr, page_number);
131}
char * GetAltoText(ETEXT_DESC *monitor, int page_number)

◆ GetAvailableLanguagesAsVector()

void tesseract::TessBaseAPI::GetAvailableLanguagesAsVector ( std::vector< std::string > *  langs) const

Returns the available languages in the sorted vector of std::string.

Definition at line 471 of file baseapi.cpp.

471 {
472 langs->clear();
473 if (tesseract_ != nullptr) {
474 addAvailableLanguages(tesseract_->datadir, "", langs);
475 std::sort(langs->begin(), langs->end());
476 }
477}
std::string datadir
Definition: ccutil.h:57

◆ GetBlockTextOrientations()

void tesseract::TessBaseAPI::GetBlockTextOrientations ( int **  block_orientation,
bool **  vertical_writing 
)

Return text orientation of each block as determined by an earlier run of layout analysis.

Return text orientation of each block as determined in an earlier page layout analysis operation. Orientation is returned as the number of ccw 90-degree rotations (in [0..3]) required to make the text in the block upright (readable). Note that this may not necessary be the block orientation preferred for recognition (such as the case of vertical CJK text).

Also returns whether the text in the block is believed to have vertical writing direction (when in an upright page orientation).

The returned array is of length equal to the number of text blocks, which may be less than the total number of blocks. The ordering is intended to be consistent with GetTextLines().

Definition at line 2300 of file baseapi.cpp.

2300 {
2301 delete[] * block_orientation;
2302 *block_orientation = nullptr;
2303 delete[] * vertical_writing;
2304 *vertical_writing = nullptr;
2305 BLOCK_IT block_it(block_list_);
2306
2307 block_it.move_to_first();
2308 int num_blocks = 0;
2309 for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
2310 if (!block_it.data()->pdblk.poly_block()->IsText()) {
2311 continue;
2312 }
2313 ++num_blocks;
2314 }
2315 if (!num_blocks) {
2316 tprintf("WARNING: Found no blocks\n");
2317 return;
2318 }
2319 *block_orientation = new int[num_blocks];
2320 *vertical_writing = new bool[num_blocks];
2321 block_it.move_to_first();
2322 int i = 0;
2323 for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
2324 if (!block_it.data()->pdblk.poly_block()->IsText()) {
2325 continue;
2326 }
2327 FCOORD re_rotation = block_it.data()->re_rotation();
2328 float re_theta = re_rotation.angle();
2329 FCOORD classify_rotation = block_it.data()->classify_rotation();
2330 float classify_theta = classify_rotation.angle();
2331 double rot_theta = -(re_theta - classify_theta) * 2.0 / M_PI;
2332 if (rot_theta < 0) {
2333 rot_theta += 4;
2334 }
2335 int num_rotations = static_cast<int>(rot_theta + 0.5);
2336 (*block_orientation)[i] = num_rotations;
2337 // The classify_rotation is non-zero only if the text has vertical
2338 // writing direction.
2339 (*vertical_writing)[i] = classify_rotation.y() != 0.0f;
2340 ++i;
2341 }
2342}

◆ GetBoolVariable()

bool tesseract::TessBaseAPI::GetBoolVariable ( const char *  name,
bool *  value 
) const

Definition at line 304 of file baseapi.cpp.

304 {
305 auto *p = ParamUtils::FindParam<BoolParam>(name, GlobalParams()->bool_params,
307 if (p == nullptr) {
308 return false;
309 }
310 *value = bool(*p);
311 return true;
312}
int value
const char * p
tesseract::ParamsVectors * GlobalParams()
Definition: params.cpp:36
ParamsVectors * params()
Definition: ccutil.h:53
std::vector< BoolParam * > bool_params
Definition: params.h:48

◆ GetBoxText()

char * tesseract::TessBaseAPI::GetBoxText ( int  page_number)

The recognized text is returned as a char* which is coded in the same format as a box file used in training. Constructs coordinates in the original image - not just the rectangle. page_number is a 0-based page index that will appear in the box file. Returned string must be freed with the delete [] operator.

The recognized text is returned as a char* which is coded as a UTF8 box file. page_number is a 0-base page index that will appear in the box file. Returned string must be freed with the delete [] operator.

Definition at line 1552 of file baseapi.cpp.

1552 {
1553 if (tesseract_ == nullptr || (!recognition_done_ && Recognize(nullptr) < 0)) {
1554 return nullptr;
1555 }
1556 int blob_count;
1557 int utf8_length = TextLength(&blob_count);
1558 int total_length = blob_count * kBytesPerBoxFileLine + utf8_length + kMaxBytesPerLine;
1559 char *result = new char[total_length];
1560 result[0] = '\0';
1561 int output_length = 0;
1562 LTRResultIterator *it = GetLTRIterator();
1563 do {
1564 int left, top, right, bottom;
1565 if (it->BoundingBox(RIL_SYMBOL, &left, &top, &right, &bottom)) {
1566 const std::unique_ptr</*non-const*/ char[]> text(it->GetUTF8Text(RIL_SYMBOL));
1567 // Tesseract uses space for recognition failure. Fix to a reject
1568 // character, kTesseractReject so we don't create illegal box files.
1569 for (int i = 0; text[i] != '\0'; ++i) {
1570 if (text[i] == ' ') {
1571 text[i] = kTesseractReject;
1572 }
1573 }
1574 snprintf(result + output_length, total_length - output_length, "%s %d %d %d %d %d\n",
1575 text.get(), left, image_height_ - bottom, right, image_height_ - top, page_number);
1576 output_length += strlen(result + output_length);
1577 // Just in case...
1578 if (output_length + kMaxBytesPerLine > total_length) {
1579 break;
1580 }
1581 }
1582 } while (it->Next(RIL_SYMBOL));
1583 delete it;
1584 return result;
1585}
const char kTesseractReject
Definition: baseapi.cpp:109
const int kBytesPerBoxFileLine
Definition: baseapi.cpp:1535
const int kMaxBytesPerLine
Definition: baseapi.cpp:1544
int TextLength(int *blob_count) const
Definition: baseapi.cpp:2233
LTRResultIterator * GetLTRIterator()
Definition: baseapi.cpp:1320

◆ GetComponentImages() [1/2]

Boxa * tesseract::TessBaseAPI::GetComponentImages ( const PageIteratorLevel  level,
const bool  text_only,
Pixa **  pixa,
int **  blockids 
)
inline

Definition at line 419 of file baseapi.h.

420 {
421 return GetComponentImages(level, text_only, false, 0, pixa, blockids,
422 nullptr);
423 }
Boxa * GetComponentImages(PageIteratorLevel level, bool text_only, bool raw_image, int raw_padding, Pixa **pixa, int **blockids, int **paraids)
Definition: baseapi.cpp:702

◆ GetComponentImages() [2/2]

Boxa * tesseract::TessBaseAPI::GetComponentImages ( PageIteratorLevel  level,
bool  text_only,
bool  raw_image,
int  raw_padding,
Pixa **  pixa,
int **  blockids,
int **  paraids 
)

Get the given level kind of components (block, textline, word etc.) as a leptonica-style Boxa, Pixa pair, in reading order. Can be called before or after Recognize. If blockids is not nullptr, the block-id of each component is also returned as an array of one element per component. delete [] after use. If blockids is not nullptr, the paragraph-id of each component with its block is also returned as an array of one element per component. delete [] after use. If raw_image is true, then portions of the original image are extracted instead of the thresholded image and padded with raw_padding. If text_only is true, then only text components are returned.

Get the given level kind of components (block, textline, word etc.) as a leptonica-style Boxa, Pixa pair, in reading order. Can be called before or after Recognize. If blockids is not nullptr, the block-id of each component is also returned as an array of one element per component. delete [] after use. If text_only is true, then only text components are returned.

Definition at line 702 of file baseapi.cpp.

704 {
705 /*non-const*/ std::unique_ptr</*non-const*/ PageIterator> page_it(GetIterator());
706 if (page_it == nullptr) {
707 page_it.reset(AnalyseLayout());
708 }
709 if (page_it == nullptr) {
710 return nullptr; // Failed.
711 }
712
713 // Count the components to get a size for the arrays.
714 int component_count = 0;
715 int left, top, right, bottom;
716
717 if (raw_image) {
718 // Get bounding box in original raw image with padding.
719 do {
720 if (page_it->BoundingBox(level, raw_padding, &left, &top, &right, &bottom) &&
721 (!text_only || PTIsTextType(page_it->BlockType()))) {
722 ++component_count;
723 }
724 } while (page_it->Next(level));
725 } else {
726 // Get bounding box from binarized imaged. Note that this could be
727 // differently scaled from the original image.
728 do {
729 if (page_it->BoundingBoxInternal(level, &left, &top, &right, &bottom) &&
730 (!text_only || PTIsTextType(page_it->BlockType()))) {
731 ++component_count;
732 }
733 } while (page_it->Next(level));
734 }
735
736 Boxa *boxa = boxaCreate(component_count);
737 if (pixa != nullptr) {
738 *pixa = pixaCreate(component_count);
739 }
740 if (blockids != nullptr) {
741 *blockids = new int[component_count];
742 }
743 if (paraids != nullptr) {
744 *paraids = new int[component_count];
745 }
746
747 int blockid = 0;
748 int paraid = 0;
749 int component_index = 0;
750 page_it->Begin();
751 do {
752 bool got_bounding_box;
753 if (raw_image) {
754 got_bounding_box = page_it->BoundingBox(level, raw_padding, &left, &top, &right, &bottom);
755 } else {
756 got_bounding_box = page_it->BoundingBoxInternal(level, &left, &top, &right, &bottom);
757 }
758 if (got_bounding_box && (!text_only || PTIsTextType(page_it->BlockType()))) {
759 Box *lbox = boxCreate(left, top, right - left, bottom - top);
760 boxaAddBox(boxa, lbox, L_INSERT);
761 if (pixa != nullptr) {
762 Pix *pix = nullptr;
763 if (raw_image) {
764 pix = page_it->GetImage(level, raw_padding, GetInputImage(), &left, &top);
765 } else {
766 pix = page_it->GetBinaryImage(level);
767 }
768 pixaAddPix(*pixa, pix, L_INSERT);
769 pixaAddBox(*pixa, lbox, L_CLONE);
770 }
771 if (paraids != nullptr) {
772 (*paraids)[component_index] = paraid;
773 if (page_it->IsAtFinalElement(RIL_PARA, level)) {
774 ++paraid;
775 }
776 }
777 if (blockids != nullptr) {
778 (*blockids)[component_index] = blockid;
779 if (page_it->IsAtFinalElement(RIL_BLOCK, level)) {
780 ++blockid;
781 paraid = 0;
782 }
783 }
784 ++component_index;
785 }
786 } while (page_it->Next(level));
787 return boxa;
788}
bool PTIsTextType(PolyBlockType type)
Definition: publictypes.h:80

◆ GetConnectedComponents()

Boxa * tesseract::TessBaseAPI::GetConnectedComponents ( Pixa **  pixa)

Gets the individual connected (text) components (created after pages segmentation step, but before recognition) as a leptonica-style Boxa, Pixa pair, in reading order. Can be called before or after Recognize. Note: the caller is responsible for calling boxaDestroy() on the returned Boxa array and pixaDestroy() on cc array.

Gets the individual connected (text) components (created after pages segmentation step, but before recognition) as a leptonica-style Boxa, Pixa pair, in reading order. Can be called before or after Recognize.

Definition at line 690 of file baseapi.cpp.

690 {
691 return GetComponentImages(RIL_SYMBOL, true, pixa, nullptr);
692}

◆ GetDatapath()

const char * tesseract::TessBaseAPI::GetDatapath ( )

Definition at line 935 of file baseapi.cpp.

935 {
936 return tesseract_->datadir.c_str();
937}

◆ GetDawg()

const Dawg * tesseract::TessBaseAPI::GetDawg ( int  i) const

Return the pointer to the i-th dawg loaded into tesseract_ object.

Definition at line 2365 of file baseapi.cpp.

2365 {
2366 if (tesseract_ == nullptr || i >= NumDawgs()) {
2367 return nullptr;
2368 }
2369 return tesseract_->getDict().GetDawg(i);
2370}
int NumDawgs() const
Definition: baseapi.cpp:2373
Dict & getDict() override
const Dawg * GetDawg(int index) const
Return i-th dawg pointer recorded in the dawgs_ vector.
Definition: dict.h:385

◆ GetDoubleVariable()

bool tesseract::TessBaseAPI::GetDoubleVariable ( const char *  name,
double *  value 
) const

Definition at line 320 of file baseapi.cpp.

320 {
321 auto *p = ParamUtils::FindParam<DoubleParam>(name, GlobalParams()->double_params,
323 if (p == nullptr) {
324 return false;
325 }
326 *value = (double)(*p);
327 return true;
328}
std::vector< DoubleParam * > double_params
Definition: params.h:50

◆ GetHOCRText() [1/2]

char * tesseract::TessBaseAPI::GetHOCRText ( ETEXT_DESC monitor,
int  page_number 
)

Make a HTML-formatted string with hOCR markup from the internal data structures. page_number is 0-based but will appear in the output as 1-based. monitor can be used to cancel the recognition receive progress callbacks Returned string must be freed with the delete [] operator.

Make a HTML-formatted string with hOCR markup from the internal data structures. page_number is 0-based but will appear in the output as 1-based. Image name/input_file_ can be set by SetInputName before calling GetHOCRText STL removed from original patch submission and refactored by rays. Returned string must be freed with the delete [] operator.

Definition at line 134 of file hocrrenderer.cpp.

134 {
135 if (tesseract_ == nullptr ||
136 (page_res_ == nullptr && Recognize(monitor) < 0)) {
137 return nullptr;
138 }
139
140 int lcnt = 1, bcnt = 1, pcnt = 1, wcnt = 1, scnt = 1, tcnt = 1, ccnt = 1;
141 int page_id = page_number + 1; // hOCR uses 1-based page numbers.
142 bool para_is_ltr = true; // Default direction is LTR
143 const char *paragraph_lang = nullptr;
144 bool font_info = false;
145 bool hocr_boxes = false;
146 GetBoolVariable("hocr_font_info", &font_info);
147 GetBoolVariable("hocr_char_boxes", &hocr_boxes);
148
149 if (input_file_.empty()) {
150 SetInputName(nullptr);
151 }
152
153#ifdef _WIN32
154 // convert input name from ANSI encoding to utf-8
155 int str16_len =
156 MultiByteToWideChar(CP_ACP, 0, input_file_.c_str(), -1, nullptr, 0);
157 wchar_t *uni16_str = new WCHAR[str16_len];
158 str16_len = MultiByteToWideChar(CP_ACP, 0, input_file_.c_str(), -1, uni16_str,
159 str16_len);
160 int utf8_len = WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, nullptr,
161 0, nullptr, nullptr);
162 char *utf8_str = new char[utf8_len];
163 WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, utf8_str, utf8_len,
164 nullptr, nullptr);
165 input_file_ = utf8_str;
166 delete[] uni16_str;
167 delete[] utf8_str;
168#endif
169
170 std::stringstream hocr_str;
171 // Use "C" locale (needed for double values x_size and x_descenders).
172 hocr_str.imbue(std::locale::classic());
173 // Use 8 digits for double values.
174 hocr_str.precision(8);
175 hocr_str << " <div class='ocr_page'"
176 << " id='"
177 << "page_" << page_id << "'"
178 << " title='image \"";
179 if (!input_file_.empty()) {
180 hocr_str << HOcrEscape(input_file_.c_str());
181 } else {
182 hocr_str << "unknown";
183 }
184
185 hocr_str << "\"; bbox " << rect_left_ << " " << rect_top_ << " "
186 << rect_width_ << " " << rect_height_ << "; ppageno " << page_number
187 << "; scan_res " << GetSourceYResolution() << " "
188 << GetSourceYResolution() << "'>\n";
189
190 std::unique_ptr<ResultIterator> res_it(GetIterator());
191 while (!res_it->Empty(RIL_BLOCK)) {
192 int left, top, right, bottom;
193 auto block_type = res_it->BlockType();
194 switch (block_type) {
195 case PT_FLOWING_IMAGE:
196 case PT_HEADING_IMAGE:
197 case PT_PULLOUT_IMAGE: {
198 // Handle all kinds of images.
199 res_it.get()->BoundingBox(RIL_TEXTLINE, &left, &top, &right, &bottom);
200 hocr_str << " <div class='ocr_photo' id='block_" << page_id << '_'
201 << bcnt++ << "' title=\"bbox " << left << " " << top << " "
202 << right << " " << bottom << "\"></div>\n";
203 res_it->Next(RIL_BLOCK);
204 continue;
205 }
206 case PT_HORZ_LINE:
207 case PT_VERT_LINE:
208 // Handle horizontal and vertical lines.
209 res_it.get()->BoundingBox(RIL_TEXTLINE, &left, &top, &right, &bottom);
210 hocr_str << " <div class='ocr_separator' id='block_" << page_id << '_'
211 << bcnt++ << "' title=\"bbox " << left << " " << top << " "
212 << right << " " << bottom << "\"></div>\n";
213 res_it->Next(RIL_BLOCK);
214 continue;
215 case PT_NOISE:
216 tprintf("TODO: Please report image which triggers the noise case.\n");
217 ASSERT_HOST(false);
218 default:
219 break;
220 }
221
222 if (res_it->Empty(RIL_WORD)) {
223 res_it->Next(RIL_WORD);
224 continue;
225 }
226
227 // Open any new block/paragraph/textline.
228 if (res_it->IsAtBeginningOf(RIL_BLOCK)) {
229 para_is_ltr = true; // reset to default direction
230 hocr_str << " <div class='ocr_carea'"
231 << " id='"
232 << "block_" << page_id << "_" << bcnt << "'";
233 AddBoxTohOCR(res_it.get(), RIL_BLOCK, hocr_str);
234 }
235 if (res_it->IsAtBeginningOf(RIL_PARA)) {
236 hocr_str << "\n <p class='ocr_par'";
237 para_is_ltr = res_it->ParagraphIsLtr();
238 if (!para_is_ltr) {
239 hocr_str << " dir='rtl'";
240 }
241 hocr_str << " id='"
242 << "par_" << page_id << "_" << pcnt << "'";
243 paragraph_lang = res_it->WordRecognitionLanguage();
244 if (paragraph_lang) {
245 hocr_str << " lang='" << paragraph_lang << "'";
246 }
247 AddBoxTohOCR(res_it.get(), RIL_PARA, hocr_str);
248 }
249 if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) {
250 hocr_str << "\n <span class='";
251 switch (block_type) {
252 case PT_HEADING_TEXT:
253 hocr_str << "ocr_header";
254 break;
255 case PT_PULLOUT_TEXT:
256 hocr_str << "ocr_textfloat";
257 break;
258 case PT_CAPTION_TEXT:
259 hocr_str << "ocr_caption";
260 break;
261 case PT_FLOWING_IMAGE:
262 case PT_HEADING_IMAGE:
263 case PT_PULLOUT_IMAGE:
264 ASSERT_HOST(false);
265 break;
266 default:
267 hocr_str << "ocr_line";
268 }
269 hocr_str << "' id='"
270 << "line_" << page_id << "_" << lcnt << "'";
271 AddBoxTohOCR(res_it.get(), RIL_TEXTLINE, hocr_str);
272 }
273
274 // Now, process the word...
275 int32_t lstm_choice_mode = tesseract_->lstm_choice_mode;
276 std::vector<std::vector<std::vector<std::pair<const char *, float>>>>
277 *rawTimestepMap = nullptr;
278 std::vector<std::vector<std::pair<const char *, float>>> *CTCMap = nullptr;
279 if (lstm_choice_mode) {
280 CTCMap = res_it->GetBestLSTMSymbolChoices();
281 rawTimestepMap = res_it->GetRawLSTMTimesteps();
282 }
283 hocr_str << "\n <span class='ocrx_word'"
284 << " id='"
285 << "word_" << page_id << "_" << wcnt << "'";
286 bool bold, italic, underlined, monospace, serif, smallcaps;
287 int pointsize, font_id;
288 res_it->BoundingBox(RIL_WORD, &left, &top, &right, &bottom);
289 const char *font_name =
290 res_it->WordFontAttributes(&bold, &italic, &underlined, &monospace,
291 &serif, &smallcaps, &pointsize, &font_id);
292 hocr_str << " title='bbox " << left << " " << top << " " << right << " "
293 << bottom << "; x_wconf "
294 << static_cast<int>(res_it->Confidence(RIL_WORD));
295 if (font_info) {
296 if (font_name) {
297 hocr_str << "; x_font " << HOcrEscape(font_name).c_str();
298 }
299 hocr_str << "; x_fsize " << pointsize;
300 }
301 hocr_str << "'";
302 const char *lang = res_it->WordRecognitionLanguage();
303 if (lang && (!paragraph_lang || strcmp(lang, paragraph_lang))) {
304 hocr_str << " lang='" << lang << "'";
305 }
306 switch (res_it->WordDirection()) {
307 // Only emit direction if different from current paragraph direction
309 if (!para_is_ltr) {
310 hocr_str << " dir='ltr'";
311 }
312 break;
314 if (para_is_ltr) {
315 hocr_str << " dir='rtl'";
316 }
317 break;
318 case DIR_MIX:
319 case DIR_NEUTRAL:
320 default: // Do nothing.
321 break;
322 }
323 hocr_str << ">";
324 bool last_word_in_line = res_it->IsAtFinalElement(RIL_TEXTLINE, RIL_WORD);
325 bool last_word_in_para = res_it->IsAtFinalElement(RIL_PARA, RIL_WORD);
326 bool last_word_in_block = res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD);
327 if (bold) {
328 hocr_str << "<strong>";
329 }
330 if (italic) {
331 hocr_str << "<em>";
332 }
333 do {
334 const std::unique_ptr<const char[]> grapheme(
335 res_it->GetUTF8Text(RIL_SYMBOL));
336 if (grapheme && grapheme[0] != 0) {
337 if (hocr_boxes) {
338 res_it->BoundingBox(RIL_SYMBOL, &left, &top, &right, &bottom);
339 hocr_str << "\n <span class='ocrx_cinfo' title='x_bboxes "
340 << left << " " << top << " " << right << " " << bottom
341 << "; x_conf " << res_it->Confidence(RIL_SYMBOL) << "'>";
342 }
343 hocr_str << HOcrEscape(grapheme.get()).c_str();
344 if (hocr_boxes) {
345 hocr_str << "</span>";
346 tesseract::ChoiceIterator ci(*res_it);
347 if (lstm_choice_mode == 1 && ci.Timesteps() != nullptr) {
348 std::vector<std::vector<std::pair<const char *, float>>> *symbol =
349 ci.Timesteps();
350 hocr_str << "\n <span class='ocr_symbol'"
351 << " id='"
352 << "symbol_" << page_id << "_" << wcnt << "_" << scnt
353 << "'>";
354 for (const auto &timestep : *symbol) {
355 hocr_str << "\n <span class='ocrx_cinfo'"
356 << " id='"
357 << "timestep" << page_id << "_" << wcnt << "_" << tcnt
358 << "'>";
359 for (auto conf : timestep) {
360 hocr_str << "\n <span class='ocrx_cinfo'"
361 << " id='"
362 << "choice_" << page_id << "_" << wcnt << "_" << ccnt
363 << "'"
364 << " title='x_confs " << int(conf.second * 100) << "'>"
365 << HOcrEscape(conf.first).c_str() << "</span>";
366 ++ccnt;
367 }
368 hocr_str << "</span>";
369 ++tcnt;
370 }
371 hocr_str << "\n </span>";
372 ++scnt;
373 } else if (lstm_choice_mode == 2) {
374 hocr_str << "\n <span class='ocrx_cinfo'"
375 << " id='"
376 << "lstm_choices_" << page_id << "_" << wcnt << "_" << tcnt
377 << "'>";
378 do {
379 const char *choice = ci.GetUTF8Text();
380 float choiceconf = ci.Confidence();
381 if (choice != nullptr) {
382 hocr_str << "\n <span class='ocrx_cinfo'"
383 << " id='"
384 << "choice_" << page_id << "_" << wcnt << "_" << ccnt
385 << "'"
386 << " title='x_confs " << choiceconf << "'>"
387 << HOcrEscape(choice).c_str() << "</span>";
388 ccnt++;
389 }
390 } while (ci.Next());
391 hocr_str << "\n </span>";
392 tcnt++;
393 }
394 }
395 }
396 res_it->Next(RIL_SYMBOL);
397 } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD));
398 if (italic) {
399 hocr_str << "</em>";
400 }
401 if (bold) {
402 hocr_str << "</strong>";
403 }
404 // If the lstm choice mode is required it is added here
405 if (lstm_choice_mode == 1 && !hocr_boxes && rawTimestepMap != nullptr) {
406 for (const auto &symbol : *rawTimestepMap) {
407 hocr_str << "\n <span class='ocr_symbol'"
408 << " id='"
409 << "symbol_" << page_id << "_" << wcnt << "_" << scnt << "'>";
410 for (const auto &timestep : symbol) {
411 hocr_str << "\n <span class='ocrx_cinfo'"
412 << " id='"
413 << "timestep" << page_id << "_" << wcnt << "_" << tcnt
414 << "'>";
415 for (auto &&conf : timestep) {
416 hocr_str << "\n <span class='ocrx_cinfo'"
417 << " id='"
418 << "choice_" << page_id << "_" << wcnt << "_" << ccnt
419 << "'"
420 << " title='x_confs " << int(conf.second * 100) << "'>"
421 << HOcrEscape(conf.first).c_str() << "</span>";
422 ++ccnt;
423 }
424 hocr_str << "</span>";
425 ++tcnt;
426 }
427 hocr_str << "</span>";
428 ++scnt;
429 }
430 } else if (lstm_choice_mode == 2 && !hocr_boxes && CTCMap != nullptr) {
431 for (const auto &timestep : *CTCMap) {
432 if (timestep.size() > 0) {
433 hocr_str << "\n <span class='ocrx_cinfo'"
434 << " id='"
435 << "lstm_choices_" << page_id << "_" << wcnt << "_" << tcnt
436 << "'>";
437 for (auto &j : timestep) {
438 float conf = 100 - tesseract_->lstm_rating_coefficient * j.second;
439 if (conf < 0.0f) {
440 conf = 0.0f;
441 }
442 if (conf > 100.0f) {
443 conf = 100.0f;
444 }
445 hocr_str << "\n <span class='ocrx_cinfo'"
446 << " id='"
447 << "choice_" << page_id << "_" << wcnt << "_" << ccnt
448 << "'"
449 << " title='x_confs " << conf << "'>"
450 << HOcrEscape(j.first).c_str() << "</span>";
451 ccnt++;
452 }
453 hocr_str << "</span>";
454 tcnt++;
455 }
456 }
457 }
458 // Close ocrx_word.
459 if (hocr_boxes || lstm_choice_mode > 0) {
460 hocr_str << "\n ";
461 }
462 hocr_str << "</span>";
463 tcnt = 1;
464 ccnt = 1;
465 wcnt++;
466 // Close any ending block/paragraph/textline.
467 if (last_word_in_line) {
468 hocr_str << "\n </span>";
469 lcnt++;
470 }
471 if (last_word_in_para) {
472 hocr_str << "\n </p>\n";
473 pcnt++;
474 para_is_ltr = true; // back to default direction
475 }
476 if (last_word_in_block) {
477 hocr_str << " </div>\n";
478 bcnt++;
479 }
480 }
481 hocr_str << " </div>\n";
482
483 const std::string &text = hocr_str.str();
484 char *result = new char[text.length() + 1];
485 strcpy(result, text.c_str());
486 return result;
487}
@ DIR_MIX
Definition: unichar.h:45
@ DIR_LEFT_TO_RIGHT
Definition: unichar.h:43
@ DIR_RIGHT_TO_LEFT
Definition: unichar.h:44
@ DIR_NEUTRAL
Definition: unichar.h:42
@ PT_CAPTION_TEXT
Definition: publictypes.h:60
@ PT_PULLOUT_TEXT
Definition: publictypes.h:55
@ PT_HEADING_TEXT
Definition: publictypes.h:54
bool GetBoolVariable(const char *name, bool *value) const
Definition: baseapi.cpp:304

◆ GetHOCRText() [2/2]

char * tesseract::TessBaseAPI::GetHOCRText ( int  page_number)

Make a HTML-formatted string with hOCR markup from the internal data structures. page_number is 0-based but will appear in the output as 1-based. Returned string must be freed with the delete [] operator.

Make a HTML-formatted string with hOCR markup from the internal data structures. page_number is 0-based but will appear in the output as 1-based. Image name/input_file_ can be set by SetInputName before calling GetHOCRText STL removed from original patch submission and refactored by rays. Returned string must be freed with the delete [] operator.

Definition at line 121 of file hocrrenderer.cpp.

121 {
122 return GetHOCRText(nullptr, page_number);
123}
char * GetHOCRText(ETEXT_DESC *monitor, int page_number)

◆ GetInitLanguagesAsString()

const char * tesseract::TessBaseAPI::GetInitLanguagesAsString ( ) const

Returns the languages string used in the last valid initialization. If the last initialization specified "deu+hin" then that will be returned. If hin loaded eng automatically as well, then that will not be included in this list. To find the languages actually loaded use GetLoadedLanguagesAsVector. The returned string should NOT be deleted.

Definition at line 448 of file baseapi.cpp.

448 {
449 return language_.c_str();
450}

◆ GetInputImage()

Pix * tesseract::TessBaseAPI::GetInputImage ( )

Definition at line 924 of file baseapi.cpp.

924 {
925 return tesseract_->pix_original();
926}
Image pix_original() const

◆ GetInputName()

const char * tesseract::TessBaseAPI::GetInputName ( )

These functions are required for searchable PDF output. We need our hands on the input file so that we can include it in the PDF without transcoding. If that is not possible, we need the original image. Finally, resolution metadata is stored in the PDF so we need that as well.

Definition at line 928 of file baseapi.cpp.

928 {
929 if (!input_file_.empty()) {
930 return input_file_.c_str();
931 }
932 return nullptr;
933}

◆ GetIntVariable()

bool tesseract::TessBaseAPI::GetIntVariable ( const char *  name,
int *  value 
) const

Returns true if the parameter was found among Tesseract parameters. Fills in value with the value of the parameter.

Definition at line 294 of file baseapi.cpp.

294 {
295 auto *p = ParamUtils::FindParam<IntParam>(name, GlobalParams()->int_params,
297 if (p == nullptr) {
298 return false;
299 }
300 *value = (int32_t)(*p);
301 return true;
302}
std::vector< IntParam * > int_params
Definition: params.h:47

◆ GetIterator()

ResultIterator * tesseract::TessBaseAPI::GetIterator ( )

Get a reading-order iterator to the results of LayoutAnalysis and/or Recognize. The returned iterator must be deleted after use. WARNING! This class points to data held within the TessBaseAPI class, and therefore can only be used while the TessBaseAPI class still exists and has not been subjected to a call of Init, SetImage, Recognize, Clear, End DetectOS, or anything else that changes the internal PAGE_RES.

Definition at line 1337 of file baseapi.cpp.

1337 {
1338 if (tesseract_ == nullptr || page_res_ == nullptr) {
1339 return nullptr;
1340 }
1341 return ResultIterator::StartOfParagraph(LTRResultIterator(
1344}
static ResultIterator * StartOfParagraph(const LTRResultIterator &resit)

◆ GetLoadedLanguagesAsVector()

void tesseract::TessBaseAPI::GetLoadedLanguagesAsVector ( std::vector< std::string > *  langs) const

Returns the loaded languages in the vector of std::string. Includes all languages loaded by the last Init, including those loaded as dependencies of other loaded languages.

Definition at line 457 of file baseapi.cpp.

457 {
458 langs->clear();
459 if (tesseract_ != nullptr) {
460 langs->push_back(tesseract_->lang);
461 int num_subs = tesseract_->num_sub_langs();
462 for (int i = 0; i < num_subs; ++i) {
463 langs->push_back(tesseract_->get_sub_lang(i)->lang);
464 }
465 }
466}
int num_sub_langs() const
Tesseract * get_sub_lang(int index) const
std::string lang
Definition: ccutil.h:59

◆ GetLSTMBoxText()

char * tesseract::TessBaseAPI::GetLSTMBoxText ( int  page_number = 0)

Make a box file for LSTM training from the internal data structures. Constructs coordinates in the original image - not just the rectangle. page_number is a 0-based page index that will appear in the box file. Returned string must be freed with the delete [] operator.

Definition at line 38 of file lstmboxrenderer.cpp.

38 {
39 if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(nullptr) < 0)) {
40 return nullptr;
41 }
42
43 std::string lstm_box_str;
44 bool first_word = true;
45 int left = 0, top = 0, right = 0, bottom = 0;
46
47 LTRResultIterator *res_it = GetLTRIterator();
48 while (!res_it->Empty(RIL_BLOCK)) {
49 if (res_it->Empty(RIL_SYMBOL)) {
50 res_it->Next(RIL_SYMBOL);
51 continue;
52 }
53 if (!first_word) {
54 if (!(res_it->IsAtBeginningOf(RIL_TEXTLINE))) {
55 if (res_it->IsAtBeginningOf(RIL_WORD)) {
56 lstm_box_str += " " + std::to_string(left);
57 AddBoxToLSTM(right, bottom, top, image_height_, page_number, lstm_box_str);
58 lstm_box_str += "\n"; // end of row for word
59 } // word
60 } else {
61 if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) {
62 lstm_box_str += "\t " + std::to_string(left);
63 AddBoxToLSTM(right, bottom, top, image_height_, page_number, lstm_box_str);
64 lstm_box_str += "\n"; // end of row for line
65 } // line
66 }
67 } // not first word
68 first_word = false;
69 // Use bounding box for whole line for everything
70 res_it->BoundingBox(RIL_TEXTLINE, &left, &top, &right, &bottom);
71 do {
72 lstm_box_str += std::unique_ptr<const char[]>(res_it->GetUTF8Text(RIL_SYMBOL)).get();
73 res_it->Next(RIL_SYMBOL);
74 } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_SYMBOL));
75 lstm_box_str += " " + std::to_string(left);
76 AddBoxToLSTM(right, bottom, top, image_height_, page_number, lstm_box_str);
77 lstm_box_str += "\n"; // end of row for symbol
78 }
79 if (!first_word) { // if first_word is true => empty page
80 lstm_box_str += "\t " + std::to_string(left);
81 AddBoxToLSTM(right, bottom, top, image_height_, page_number, lstm_box_str);
82 lstm_box_str += "\n"; // end of PAGE
83 }
84 char *ret = new char[lstm_box_str.length() + 1];
85 strcpy(ret, lstm_box_str.c_str());
86 delete res_it;
87 return ret;
88}

◆ GetLTRIterator()

LTRResultIterator * tesseract::TessBaseAPI::GetLTRIterator ( )
protected

Return an LTR Result Iterator – used only for training, as we really want to ignore all BiDi smarts at that point. delete once you're done with it.

Get a left-to-right iterator to the results of LayoutAnalysis and/or Recognize. The returned iterator must be deleted after use.

Definition at line 1320 of file baseapi.cpp.

1320 {
1321 if (tesseract_ == nullptr || page_res_ == nullptr) {
1322 return nullptr;
1323 }
1324 return new LTRResultIterator(page_res_, tesseract_, thresholder_->GetScaleFactor(),
1327}

◆ GetMutableIterator()

MutableIterator * tesseract::TessBaseAPI::GetMutableIterator ( )

Get a mutable iterator to the results of LayoutAnalysis and/or Recognize. The returned iterator must be deleted after use. WARNING! This class points to data held within the TessBaseAPI class, and therefore can only be used while the TessBaseAPI class still exists and has not been subjected to a call of Init, SetImage, Recognize, Clear, End DetectOS, or anything else that changes the internal PAGE_RES.

Definition at line 1354 of file baseapi.cpp.

1354 {
1355 if (tesseract_ == nullptr || page_res_ == nullptr) {
1356 return nullptr;
1357 }
1358 return new MutableIterator(page_res_, tesseract_, thresholder_->GetScaleFactor(),
1361}

◆ getOpenCLDevice()

size_t tesseract::TessBaseAPI::getOpenCLDevice ( void **  data)
static

If compiled with OpenCL AND an available OpenCL device is deemed faster than serial code, then "device" is populated with the cl_device_id and returns sizeof(cl_device_id) otherwise *device=nullptr and returns 0.

Definition at line 252 of file baseapi.cpp.

252 {
253#ifdef USE_OPENCL
254 ds_device device = OpenclDevice::getDeviceSelection();
255 if (device.type == DS_DEVICE_OPENCL_DEVICE) {
256 *data = new cl_device_id;
257 memcpy(*data, &device.oclDeviceID, sizeof(cl_device_id));
258 return sizeof(cl_device_id);
259 }
260#endif
261
262 *data = nullptr;
263 return 0;
264}

◆ GetOsdText()

char * tesseract::TessBaseAPI::GetOsdText ( int  page_number)

The recognized text is returned as a char* which is coded as UTF8 and must be freed with the delete [] operator. page_number is a 0-based page index that will appear in the osd file.

Definition at line 1744 of file baseapi.cpp.

1744 {
1745 int orient_deg;
1746 float orient_conf;
1747 const char *script_name;
1748 float script_conf;
1749
1750 if (!DetectOrientationScript(&orient_deg, &orient_conf, &script_name, &script_conf)) {
1751 return nullptr;
1752 }
1753
1754 // clockwise rotation needed to make the page upright
1755 int rotate = OrientationIdToValue(orient_deg / 90);
1756
1757 std::stringstream stream;
1758 // Use "C" locale (needed for float values orient_conf and script_conf).
1759 stream.imbue(std::locale::classic());
1760 // Use fixed notation with 2 digits after the decimal point for float values.
1761 stream.precision(2);
1762 stream << std::fixed << "Page number: " << page_number << "\n"
1763 << "Orientation in degrees: " << orient_deg << "\n"
1764 << "Rotate: " << rotate << "\n"
1765 << "Orientation confidence: " << orient_conf << "\n"
1766 << "Script: " << script_name << "\n"
1767 << "Script confidence: " << script_conf << "\n";
1768 const std::string &text = stream.str();
1769 char *result = new char[text.length() + 1];
1770 strcpy(result, text.c_str());
1771 return result;
1772}
TESS_API int OrientationIdToValue(const int &id)
Definition: osdetect.cpp:566
bool DetectOrientationScript(int *orient_deg, float *orient_conf, const char **script_name, float *script_conf)
Definition: baseapi.cpp:1708

◆ GetPageRes()

const PAGE_RES * tesseract::TessBaseAPI::GetPageRes ( ) const
inlineprotected

Definition at line 760 of file baseapi.h.

760 {
761 return page_res_;
762 }

◆ GetPageSegMode()

PageSegMode tesseract::TessBaseAPI::GetPageSegMode ( ) const

Return the current page segmentation mode.

Definition at line 519 of file baseapi.cpp.

519 {
520 if (tesseract_ == nullptr) {
521 return PSM_SINGLE_BLOCK;
522 }
523 return static_cast<PageSegMode>(static_cast<int>(tesseract_->tessedit_pageseg_mode));
524}
@ PSM_SINGLE_BLOCK
Assume a single uniform block of text. (Default.)
Definition: publictypes.h:166

◆ GetRegions()

Boxa * tesseract::TessBaseAPI::GetRegions ( Pixa **  pixa)

Get the result of page layout analysis as a leptonica-style Boxa, Pixa pair, in reading order. Can be called before or after Recognize.

Definition at line 646 of file baseapi.cpp.

646 {
647 return GetComponentImages(RIL_BLOCK, false, pixa, nullptr);
648}

◆ GetSourceYResolution()

int tesseract::TessBaseAPI::GetSourceYResolution ( )

Definition at line 939 of file baseapi.cpp.

939 {
940 if (thresholder_ == nullptr)
941 return -1;
943}

◆ GetStringVariable()

const char * tesseract::TessBaseAPI::GetStringVariable ( const char *  name) const

Returns the pointer to the string that represents the value of the parameter if it was found among Tesseract parameters.

Definition at line 314 of file baseapi.cpp.

314 {
315 auto *p = ParamUtils::FindParam<StringParam>(name, GlobalParams()->string_params,
317 return (p != nullptr) ? p->c_str() : nullptr;
318}
std::vector< StringParam * > string_params
Definition: params.h:49

◆ GetStrips()

Boxa * tesseract::TessBaseAPI::GetStrips ( Pixa **  pixa,
int **  blockids 
)

Get textlines and strips of image regions as a leptonica-style Boxa, Pixa pair, in reading order. Enables downstream handling of non-rectangular regions. Can be called before or after Recognize. If blockids is not nullptr, the block-id of each line is also returned as an array of one element per line. delete [] after use.

Definition at line 671 of file baseapi.cpp.

671 {
672 return GetComponentImages(RIL_TEXTLINE, false, pixa, blockids);
673}

◆ GetTextDirection()

bool tesseract::TessBaseAPI::GetTextDirection ( int *  out_offset,
float *  out_slope 
)

Definition at line 1974 of file baseapi.cpp.

1974 {
1975 const std::unique_ptr<const PageIterator> it(AnalyseLayout());
1976 if (it == nullptr) {
1977 return false;
1978 }
1979 int x1, x2, y1, y2;
1980 it->Baseline(RIL_TEXTLINE, &x1, &y1, &x2, &y2);
1981 // Calculate offset and slope (NOTE: Kind of ugly)
1982 if (x2 <= x1) {
1983 x2 = x1 + 1;
1984 }
1985 // Convert the point pair to slope/offset of the baseline (in image coords.)
1986 *out_slope = static_cast<float>(y2 - y1) / (x2 - x1);
1987 *out_offset = static_cast<int>(y1 - *out_slope * x1);
1988 // Get the y-coord of the baseline at the left and right edges of the
1989 // textline's bounding box.
1990 int left, top, right, bottom;
1991 if (!it->BoundingBox(RIL_TEXTLINE, &left, &top, &right, &bottom)) {
1992 return false;
1993 }
1994 int left_y = IntCastRounded(*out_slope * left + *out_offset);
1995 int right_y = IntCastRounded(*out_slope * right + *out_offset);
1996 // Shift the baseline down so it passes through the nearest bottom-corner
1997 // of the textline's bounding box. This is the difference between the y
1998 // at the lowest (max) edge of the box and the actual box bottom.
1999 *out_offset += bottom - std::max(left_y, right_y);
2000 // Switch back to bottom-up tesseract coordinates. Requires negation of
2001 // the slope and height - offset for the offset.
2002 *out_slope = -*out_slope;
2003 *out_offset = rect_height_ - *out_offset;
2004
2005 return true;
2006}
int IntCastRounded(double x)
Definition: helpers.h:170

◆ GetTextlines() [1/2]

Boxa * tesseract::TessBaseAPI::GetTextlines ( bool  raw_image,
int  raw_padding,
Pixa **  pixa,
int **  blockids,
int **  paraids 
)

Get the textlines as a leptonica-style Boxa, Pixa pair, in reading order. Can be called before or after Recognize. If raw_image is true, then extract from the original image instead of the thresholded image and pad by raw_padding pixels. If blockids is not nullptr, the block-id of each line is also returned as an array of one element per line. delete [] after use. If paraids is not nullptr, the paragraph-id of each line within its block is also returned as an array of one element per line. delete [] after use.

Get the textlines as a leptonica-style Boxa, Pixa pair, in reading order. Can be called before or after Recognize. If blockids is not nullptr, the block-id of each line is also returned as an array of one element per line. delete [] after use. If paraids is not nullptr, the paragraph-id of each line within its block is also returned as an array of one element per line. delete [] after use.

Definition at line 658 of file baseapi.cpp.

659 {
660 return GetComponentImages(RIL_TEXTLINE, true, raw_image, raw_padding, pixa, blockids, paraids);
661}

◆ GetTextlines() [2/2]

Boxa * tesseract::TessBaseAPI::GetTextlines ( Pixa **  pixa,
int **  blockids 
)
inline

Definition at line 372 of file baseapi.h.

372 {
373 return GetTextlines(false, 0, pixa, blockids, nullptr);
374 }
Boxa * GetTextlines(bool raw_image, int raw_padding, Pixa **pixa, int **blockids, int **paraids)
Definition: baseapi.cpp:658

◆ GetThresholdedImage()

Pix * tesseract::TessBaseAPI::GetThresholdedImage ( )

Get a copy of the internal thresholded image from Tesseract. Caller takes ownership of the Pix and must pixDestroy it. May be called any time after SetImage, or after TesseractRect.

ONLY available after SetImage if you have Leptonica installed. Get a copy of the internal thresholded image from Tesseract.

Definition at line 631 of file baseapi.cpp.

631 {
632 if (tesseract_ == nullptr || thresholder_ == nullptr) {
633 return nullptr;
634 }
635 if (tesseract_->pix_binary() == nullptr && !Threshold(&tesseract_->mutable_pix_binary()->pix_)) {
636 return nullptr;
637 }
638 return tesseract_->pix_binary().clone();
639}
Image clone() const
Definition: image.cpp:24

◆ GetThresholdedImageScaleFactor()

int tesseract::TessBaseAPI::GetThresholdedImageScaleFactor ( ) const

Returns the scale factor of the thresholded image that would be returned by GetThresholdedImage() and the various GetX() methods that call GetComponentImages(). Returns 0 if no thresholder has been set.

Definition at line 790 of file baseapi.cpp.

790 {
791 if (thresholder_ == nullptr) {
792 return 0;
793 }
795}

◆ GetTSVText()

char * tesseract::TessBaseAPI::GetTSVText ( int  page_number)

Make a TSV-formatted string from the internal data structures. page_number is 0-based but will appear in the output as 1-based. Returned string must be freed with the delete [] operator.

Definition at line 1412 of file baseapi.cpp.

1412 {
1413 if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(nullptr) < 0)) {
1414 return nullptr;
1415 }
1416
1417 int lcnt = 1, bcnt = 1, pcnt = 1, wcnt = 1;
1418 int page_id = page_number + 1; // we use 1-based page numbers.
1419
1420 int page_num = page_id;
1421 int block_num = 0;
1422 int par_num = 0;
1423 int line_num = 0;
1424 int word_num = 0;
1425
1426 std::string tsv_str;
1427 tsv_str += "1\t" + std::to_string(page_num); // level 1 - page
1428 tsv_str += "\t" + std::to_string(block_num);
1429 tsv_str += "\t" + std::to_string(par_num);
1430 tsv_str += "\t" + std::to_string(line_num);
1431 tsv_str += "\t" + std::to_string(word_num);
1432 tsv_str += "\t" + std::to_string(rect_left_);
1433 tsv_str += "\t" + std::to_string(rect_top_);
1434 tsv_str += "\t" + std::to_string(rect_width_);
1435 tsv_str += "\t" + std::to_string(rect_height_);
1436 tsv_str += "\t-1\t\n";
1437
1438 const std::unique_ptr</*non-const*/ ResultIterator> res_it(GetIterator());
1439 while (!res_it->Empty(RIL_BLOCK)) {
1440 if (res_it->Empty(RIL_WORD)) {
1441 res_it->Next(RIL_WORD);
1442 continue;
1443 }
1444
1445 // Add rows for any new block/paragraph/textline.
1446 if (res_it->IsAtBeginningOf(RIL_BLOCK)) {
1447 block_num++;
1448 par_num = 0;
1449 line_num = 0;
1450 word_num = 0;
1451 tsv_str += "2\t" + std::to_string(page_num); // level 2 - block
1452 tsv_str += "\t" + std::to_string(block_num);
1453 tsv_str += "\t" + std::to_string(par_num);
1454 tsv_str += "\t" + std::to_string(line_num);
1455 tsv_str += "\t" + std::to_string(word_num);
1456 AddBoxToTSV(res_it.get(), RIL_BLOCK, tsv_str);
1457 tsv_str += "\t-1\t\n"; // end of row for block
1458 }
1459 if (res_it->IsAtBeginningOf(RIL_PARA)) {
1460 par_num++;
1461 line_num = 0;
1462 word_num = 0;
1463 tsv_str += "3\t" + std::to_string(page_num); // level 3 - paragraph
1464 tsv_str += "\t" + std::to_string(block_num);
1465 tsv_str += "\t" + std::to_string(par_num);
1466 tsv_str += "\t" + std::to_string(line_num);
1467 tsv_str += "\t" + std::to_string(word_num);
1468 AddBoxToTSV(res_it.get(), RIL_PARA, tsv_str);
1469 tsv_str += "\t-1\t\n"; // end of row for para
1470 }
1471 if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) {
1472 line_num++;
1473 word_num = 0;
1474 tsv_str += "4\t" + std::to_string(page_num); // level 4 - line
1475 tsv_str += "\t" + std::to_string(block_num);
1476 tsv_str += "\t" + std::to_string(par_num);
1477 tsv_str += "\t" + std::to_string(line_num);
1478 tsv_str += "\t" + std::to_string(word_num);
1479 AddBoxToTSV(res_it.get(), RIL_TEXTLINE, tsv_str);
1480 tsv_str += "\t-1\t\n"; // end of row for line
1481 }
1482
1483 // Now, process the word...
1484 int left, top, right, bottom;
1485 res_it->BoundingBox(RIL_WORD, &left, &top, &right, &bottom);
1486 word_num++;
1487 tsv_str += "5\t" + std::to_string(page_num); // level 5 - word
1488 tsv_str += "\t" + std::to_string(block_num);
1489 tsv_str += "\t" + std::to_string(par_num);
1490 tsv_str += "\t" + std::to_string(line_num);
1491 tsv_str += "\t" + std::to_string(word_num);
1492 tsv_str += "\t" + std::to_string(left);
1493 tsv_str += "\t" + std::to_string(top);
1494 tsv_str += "\t" + std::to_string(right - left);
1495 tsv_str += "\t" + std::to_string(bottom - top);
1496 tsv_str += "\t" + std::to_string(res_it->Confidence(RIL_WORD));
1497 tsv_str += "\t";
1498
1499 // Increment counts if at end of block/paragraph/textline.
1500 if (res_it->IsAtFinalElement(RIL_TEXTLINE, RIL_WORD)) {
1501 lcnt++;
1502 }
1503 if (res_it->IsAtFinalElement(RIL_PARA, RIL_WORD)) {
1504 pcnt++;
1505 }
1506 if (res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD)) {
1507 bcnt++;
1508 }
1509
1510 do {
1511 tsv_str += std::unique_ptr<const char[]>(res_it->GetUTF8Text(RIL_SYMBOL)).get();
1512 res_it->Next(RIL_SYMBOL);
1513 } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD));
1514 tsv_str += "\n"; // end of row
1515 wcnt++;
1516 }
1517
1518 char *ret = new char[tsv_str.length() + 1];
1519 strcpy(ret, tsv_str.c_str());
1520 return ret;
1521}

◆ GetUnichar()

const char * tesseract::TessBaseAPI::GetUnichar ( int  unichar_id) const

This method returns the string form of the specified unichar.

Definition at line 2360 of file baseapi.cpp.

2360 {
2361 return tesseract_->unicharset.id_to_unichar(unichar_id);
2362}
UNICHARSET unicharset
Definition: ccutil.h:61
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:279

◆ GetUNLVText()

char * tesseract::TessBaseAPI::GetUNLVText ( )

The recognized text is returned as a char* which is coded as UNLV format Latin-1 with specific reject and suspect codes. Returned string must be freed with the delete [] operator.

Definition at line 1601 of file baseapi.cpp.

1601 {
1602 if (tesseract_ == nullptr || (!recognition_done_ && Recognize(nullptr) < 0)) {
1603 return nullptr;
1604 }
1605 bool tilde_crunch_written = false;
1606 bool last_char_was_newline = true;
1607 bool last_char_was_tilde = false;
1608
1609 int total_length = TextLength(nullptr);
1610 PAGE_RES_IT page_res_it(page_res_);
1611 char *result = new char[total_length];
1612 char *ptr = result;
1613 for (page_res_it.restart_page(); page_res_it.word() != nullptr; page_res_it.forward()) {
1614 WERD_RES *word = page_res_it.word();
1615 // Process the current word.
1616 if (word->unlv_crunch_mode != CR_NONE) {
1617 if (word->unlv_crunch_mode != CR_DELETE &&
1618 (!tilde_crunch_written ||
1619 (word->unlv_crunch_mode == CR_KEEP_SPACE && word->word->space() > 0 &&
1620 !word->word->flag(W_FUZZY_NON) && !word->word->flag(W_FUZZY_SP)))) {
1621 if (!word->word->flag(W_BOL) && word->word->space() > 0 && !word->word->flag(W_FUZZY_NON) &&
1622 !word->word->flag(W_FUZZY_SP)) {
1623 /* Write a space to separate from preceding good text */
1624 *ptr++ = ' ';
1625 last_char_was_tilde = false;
1626 }
1627 if (!last_char_was_tilde) {
1628 // Write a reject char.
1629 last_char_was_tilde = true;
1630 *ptr++ = kUNLVReject;
1631 tilde_crunch_written = true;
1632 last_char_was_newline = false;
1633 }
1634 }
1635 } else {
1636 // NORMAL PROCESSING of non tilde crunched words.
1637 tilde_crunch_written = false;
1639 const char *wordstr = word->best_choice->unichar_string().c_str();
1640 const auto &lengths = word->best_choice->unichar_lengths();
1641 int length = lengths.length();
1642 int i = 0;
1643 int offset = 0;
1644
1645 if (last_char_was_tilde && word->word->space() == 0 && wordstr[offset] == ' ') {
1646 // Prevent adjacent tilde across words - we know that adjacent tildes
1647 // within words have been removed.
1648 // Skip the first character.
1649 offset = lengths[i++];
1650 }
1651 if (i < length && wordstr[offset] != 0) {
1652 if (!last_char_was_newline) {
1653 *ptr++ = ' ';
1654 } else {
1655 last_char_was_newline = false;
1656 }
1657 for (; i < length; offset += lengths[i++]) {
1658 if (wordstr[offset] == ' ' || wordstr[offset] == kTesseractReject) {
1659 *ptr++ = kUNLVReject;
1660 last_char_was_tilde = true;
1661 } else {
1662 if (word->reject_map[i].rejected()) {
1663 *ptr++ = kUNLVSuspect;
1664 }
1665 UNICHAR ch(wordstr + offset, lengths[i]);
1666 int uni_ch = ch.first_uni();
1667 for (int j = 0; kUniChs[j] != 0; ++j) {
1668 if (kUniChs[j] == uni_ch) {
1669 uni_ch = kLatinChs[j];
1670 break;
1671 }
1672 }
1673 if (uni_ch <= 0xff) {
1674 *ptr++ = static_cast<char>(uni_ch);
1675 last_char_was_tilde = false;
1676 } else {
1677 *ptr++ = kUNLVReject;
1678 last_char_was_tilde = true;
1679 }
1680 }
1681 }
1682 }
1683 }
1684 if (word->word->flag(W_EOL) && !last_char_was_newline) {
1685 /* Add a new line output */
1686 *ptr++ = '\n';
1687 tilde_crunch_written = false;
1688 last_char_was_newline = true;
1689 last_char_was_tilde = false;
1690 }
1691 }
1692 *ptr++ = '\n';
1693 *ptr = '\0';
1694 return result;
1695}
@ W_BOL
start of line
Definition: werd.h:34
@ W_FUZZY_SP
fuzzy space
Definition: werd.h:41
@ W_EOL
end of line
Definition: werd.h:35
@ W_FUZZY_NON
fuzzy nonspace
Definition: werd.h:42
@ CR_NONE
Definition: pageres.h:160
@ CR_KEEP_SPACE
Definition: pageres.h:160
@ CR_DELETE
Definition: pageres.h:160
const int kLatinChs[]
Definition: baseapi.cpp:1594
const char kUNLVReject
Definition: baseapi.cpp:111
const char kUNLVSuspect
Definition: baseapi.cpp:113
const int kUniChs[]
Definition: baseapi.cpp:1592
void set_unlv_suspects(WERD_RES *word)
Definition: output.cpp:270

◆ GetUTF8Text()

char * tesseract::TessBaseAPI::GetUTF8Text ( )

The recognized text is returned as a char* which is coded as UTF8 and must be freed with the delete [] operator.

Make a text string from the internal data structures.

Definition at line 1364 of file baseapi.cpp.

1364 {
1365 if (tesseract_ == nullptr || (!recognition_done_ && Recognize(nullptr) < 0)) {
1366 return nullptr;
1367 }
1368 std::string text("");
1369 const std::unique_ptr</*non-const*/ ResultIterator> it(GetIterator());
1370 do {
1371 if (it->Empty(RIL_PARA)) {
1372 continue;
1373 }
1374 auto block_type = it->BlockType();
1375 switch (block_type) {
1376 case PT_FLOWING_IMAGE:
1377 case PT_HEADING_IMAGE:
1378 case PT_PULLOUT_IMAGE:
1379 case PT_HORZ_LINE:
1380 case PT_VERT_LINE:
1381 // Ignore images and lines for text output.
1382 continue;
1383 case PT_NOISE:
1384 tprintf("TODO: Please report image which triggers the noise case.\n");
1385 ASSERT_HOST(false);
1386 default:
1387 break;
1388 }
1389
1390 const std::unique_ptr<const char[]> para_text(it->GetUTF8Text(RIL_PARA));
1391 text += para_text.get();
1392 } while (it->Next(RIL_PARA));
1393 char *result = new char[text.length() + 1];
1394 strncpy(result, text.c_str(), text.length() + 1);
1395 return result;
1396}

◆ GetVariableAsString()

bool tesseract::TessBaseAPI::GetVariableAsString ( const char *  name,
std::string *  val 
) const

Get value of named variable as a string, if it exists.

Definition at line 331 of file baseapi.cpp.

331 {
332 return ParamUtils::GetParamAsString(name, tesseract_->params(), val);
333}
static bool GetParamAsString(const char *name, const ParamsVectors *member_params, std::string *value)
Definition: params.cpp:130

◆ GetWords()

Boxa * tesseract::TessBaseAPI::GetWords ( Pixa **  pixa)

Get the words as a leptonica-style Boxa, Pixa pair, in reading order. Can be called before or after Recognize.

Definition at line 680 of file baseapi.cpp.

680 {
681 return GetComponentImages(RIL_WORD, true, pixa, nullptr);
682}

◆ GetWordStrBoxText()

char * tesseract::TessBaseAPI::GetWordStrBoxText ( int  page_number = 0)

The recognized text is returned as a char* which is coded in the same format as a WordStr box file used in training. page_number is a 0-based page index that will appear in the box file. Returned string must be freed with the delete [] operator.

Create a UTF8 box file with WordStr strings from the internal data structures. page_number is a 0-base page index that will appear in the box file. Returned string must be freed with the delete [] operator.

Definition at line 31 of file wordstrboxrenderer.cpp.

31 {
32 if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(nullptr) < 0)) {
33 return nullptr;
34 }
35
36 std::string wordstr_box_str;
37 int left = 0, top = 0, right = 0, bottom = 0;
38
39 bool first_line = true;
40
41 LTRResultIterator *res_it = GetLTRIterator();
42 while (!res_it->Empty(RIL_BLOCK)) {
43 if (res_it->Empty(RIL_WORD)) {
44 res_it->Next(RIL_WORD);
45 continue;
46 }
47
48 if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) {
49 if (!first_line) {
50 wordstr_box_str += "\n\t " + std::to_string(right + 1);
51 wordstr_box_str += " " + std::to_string(image_height_ - bottom);
52 wordstr_box_str += " " + std::to_string(right + 5);
53 wordstr_box_str += " " + std::to_string(image_height_ - top);
54 wordstr_box_str += " " + std::to_string(page_number); // row for tab for EOL
55 wordstr_box_str += "\n";
56 } else {
57 first_line = false;
58 }
59 // Use bounding box for whole line for WordStr
60 res_it->BoundingBox(RIL_TEXTLINE, &left, &top, &right, &bottom);
61 wordstr_box_str += "WordStr " + std::to_string(left);
62 wordstr_box_str += " " + std::to_string(image_height_ - bottom);
63 wordstr_box_str += " " + std::to_string(right);
64 wordstr_box_str += " " + std::to_string(image_height_ - top);
65 wordstr_box_str += " " + std::to_string(page_number); // word
66 wordstr_box_str += " #";
67 }
68 do {
69 wordstr_box_str += std::unique_ptr<const char[]>(res_it->GetUTF8Text(RIL_WORD)).get();
70 wordstr_box_str += " ";
71 res_it->Next(RIL_WORD);
72 } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD));
73 }
74
75 if (left != 0 && top != 0 && right != 0 && bottom != 0) {
76 wordstr_box_str += "\n\t " + std::to_string(right + 1);
77 wordstr_box_str += " " + std::to_string(image_height_ - bottom);
78 wordstr_box_str += " " + std::to_string(right + 5);
79 wordstr_box_str += " " + std::to_string(image_height_ - top);
80 wordstr_box_str += " " + std::to_string(page_number); // row for tab for EOL
81 wordstr_box_str += "\n";
82 }
83 char *ret = new char[wordstr_box_str.length() + 1];
84 strcpy(ret, wordstr_box_str.c_str());
85 delete res_it;
86 return ret;
87}

◆ Init() [1/4]

int tesseract::TessBaseAPI::Init ( const char *  data,
int  data_size,
const char *  language,
OcrEngineMode  mode,
char **  configs,
int  configs_size,
const std::vector< std::string > *  vars_vec,
const std::vector< std::string > *  vars_values,
bool  set_only_non_debug_params,
FileReader  reader 
)

Definition at line 378 of file baseapi.cpp.

381 {
382 if (language == nullptr) {
383 language = "";
384 }
385 if (data == nullptr) {
386 data = "";
387 }
388 std::string datapath = data_size == 0 ? data : language;
389 // If the datapath, OcrEngineMode or the language have changed - start again.
390 // Note that the language_ field stores the last requested language that was
391 // initialized successfully, while tesseract_->lang stores the language
392 // actually used. They differ only if the requested language was nullptr, in
393 // which case tesseract_->lang is set to the Tesseract default ("eng").
394 if (tesseract_ != nullptr &&
395 (datapath_.empty() || language_.empty() || datapath_ != datapath ||
396 last_oem_requested_ != oem || (language_ != language && tesseract_->lang != language))) {
397 delete tesseract_;
398 tesseract_ = nullptr;
399 }
400#ifdef USE_OPENCL
401 OpenclDevice od;
402 od.InitEnv();
403#endif
404 bool reset_classifier = true;
405 if (tesseract_ == nullptr) {
406 reset_classifier = false;
407 tesseract_ = new Tesseract;
408 if (reader != nullptr) {
409 reader_ = reader;
410 }
411 TessdataManager mgr(reader_);
412 if (data_size != 0) {
413 mgr.LoadMemBuffer(language, data, data_size);
414 }
415 if (tesseract_->init_tesseract(datapath, output_file_, language, oem, configs,
416 configs_size, vars_vec, vars_values, set_only_non_debug_params,
417 &mgr) != 0) {
418 return -1;
419 }
420 }
421
422 // Update datapath and language requested for the last valid initialization.
423 datapath_ = datapath;
424 if (datapath_.empty() && !tesseract_->datadir.empty()) {
426 }
427
428 language_ = language;
430
431#ifndef DISABLED_LEGACY_ENGINE
432 // For same language and datapath, just reset the adaptive classifier.
433 if (reset_classifier) {
435 }
436#endif // ndef DISABLED_LEGACY_ENGINE
437 return 0;
438}
OcrEngineMode oem() const
Definition: baseapi.h:715

◆ Init() [2/4]

int tesseract::TessBaseAPI::Init ( const char *  datapath,
const char *  language 
)
inline

Definition at line 214 of file baseapi.h.

214 {
215 return Init(datapath, language, OEM_DEFAULT, nullptr, 0, nullptr, nullptr,
216 false);
217 }
int Init(const char *datapath, const char *language, OcrEngineMode mode, char **configs, int configs_size, const std::vector< std::string > *vars_vec, const std::vector< std::string > *vars_values, bool set_only_non_debug_params)
Definition: baseapi.cpp:368

◆ Init() [3/4]

int tesseract::TessBaseAPI::Init ( const char *  datapath,
const char *  language,
OcrEngineMode  oem,
char **  configs,
int  configs_size,
const std::vector< std::string > *  vars_vec,
const std::vector< std::string > *  vars_values,
bool  set_only_non_debug_params 
)

Instances are now mostly thread-safe and totally independent, but some global parameters remain. Basically it is safe to use multiple TessBaseAPIs in different threads in parallel, UNLESS: you use SetVariable on some of the Params in classify and textord. If you do, then the effect will be to change it for all your instances.

Start tesseract. Returns zero on success and -1 on failure. NOTE that the only members that may be called before Init are those listed above here in the class definition.

The datapath must be the name of the tessdata directory. The language is (usually) an ISO 639-3 string or nullptr will default to eng. It is entirely safe (and eventually will be efficient too) to call Init multiple times on the same instance to change language, or just to reset the classifier. The language may be a string of the form [~]<lang>[+[~]<lang>]* indicating that multiple languages are to be loaded. Eg hin+eng will load Hindi and English. Languages may specify internally that they want to be loaded with one or more other languages, so the ~ sign is available to override that. Eg if hin were set to load eng by default, then hin+~eng would force loading only hin. The number of loaded languages is limited only by memory, with the caveat that loading additional languages will impact both speed and accuracy, as there is more work to do to decide on the applicable language, and there is more chance of hallucinating incorrect words. WARNING: On changing languages, all Tesseract parameters are reset back to their default values. (Which may vary between languages.) If you have a rare need to set a Variable that controls initialization for a second call to Init you should explicitly call End() and then use SetVariable before Init. This is only a very rare use case, since there are very few uses that require any parameters to be set before Init.

If set_only_non_debug_params is true, only params that do not contain "debug" in the name will be set.

The datapath must be the name of the data directory or some other file in which the data directory resides (for instance argv[0].) The language is (usually) an ISO 639-3 string or nullptr will default to eng. If numeric_mode is true, then only digits and Roman numerals will be returned.

Returns
: 0 on success and -1 on initialization failure.

Definition at line 368 of file baseapi.cpp.

370 {
371 return Init(datapath, 0, language, oem, configs, configs_size, vars_vec, vars_values,
372 set_only_non_debug_params, nullptr);
373}

◆ Init() [4/4]

int tesseract::TessBaseAPI::Init ( const char *  datapath,
const char *  language,
OcrEngineMode  oem 
)
inline

Definition at line 211 of file baseapi.h.

211 {
212 return Init(datapath, language, oem, nullptr, 0, nullptr, nullptr, false);
213 }

◆ InitForAnalysePage()

void tesseract::TessBaseAPI::InitForAnalysePage ( )

Init only for page layout analysis. Use only for calls to SetImage and AnalysePage. Calls that attempt recognition will generate an error.

Definition at line 483 of file baseapi.cpp.

483 {
484 if (tesseract_ == nullptr) {
485 tesseract_ = new Tesseract;
486#ifndef DISABLED_LEGACY_ENGINE
488#endif
489 }
490}

◆ InternalSetImage()

bool tesseract::TessBaseAPI::InternalSetImage ( )
protected

Common code for setting the image. Returns true if Init has been called.

Common code for setting the image.

Definition at line 2035 of file baseapi.cpp.

2035 {
2036 if (tesseract_ == nullptr) {
2037 tprintf("Please call Init before attempting to set an image.\n");
2038 return false;
2039 }
2040 if (thresholder_ == nullptr) {
2041 thresholder_ = new ImageThresholder;
2042 }
2043 ClearResults();
2044 return true;
2045}

◆ IsValidCharacter()

bool tesseract::TessBaseAPI::IsValidCharacter ( const char *  utf8_character) const

Definition at line 1968 of file baseapi.cpp.

1968 {
1969 return tesseract_->unicharset.contains_unichar(utf8_character);
1970}
bool contains_unichar(const char *const unichar_repr) const
Definition: unicharset.cpp:695

◆ IsValidWord()

int tesseract::TessBaseAPI::IsValidWord ( const char *  word) const

Check whether a word is valid according to Tesseract's language model

Returns
0 if the word is invalid, non-zero if valid.
Warning
temporary! This function will be removed from here and placed in a separate API at some future time.

Check whether a word is valid according to Tesseract's language model returns 0 if the word is invalid, non-zero if valid

Definition at line 1964 of file baseapi.cpp.

1964 {
1965 return tesseract_->getDict().valid_word(word);
1966}
int valid_word(const WERD_CHOICE &word, bool numbers_ok) const
Definition: dict.cpp:801

◆ MeanTextConf()

int tesseract::TessBaseAPI::MeanTextConf ( )

Returns the (average) confidence value between 0 and 100.

Returns the average word confidence for Tesseract page result.

Definition at line 1777 of file baseapi.cpp.

1777 {
1778 int *conf = AllWordConfidences();
1779 if (!conf) {
1780 return 0;
1781 }
1782 int sum = 0;
1783 int *pt = conf;
1784 while (*pt >= 0) {
1785 sum += *pt++;
1786 }
1787 if (pt != conf) {
1788 sum /= pt - conf;
1789 }
1790 delete[] conf;
1791 return sum;
1792}

◆ NumDawgs()

int tesseract::TessBaseAPI::NumDawgs ( ) const

Return the number of dawgs loaded into tesseract_ object.

Definition at line 2373 of file baseapi.cpp.

2373 {
2374 return tesseract_ == nullptr ? 0 : tesseract_->getDict().NumDawgs();
2375}
int NumDawgs() const
Return the number of dawgs in the dawgs_ vector.
Definition: dict.h:381

◆ oem()

OcrEngineMode tesseract::TessBaseAPI::oem ( ) const
inline

Definition at line 715 of file baseapi.h.

715 {
716 return last_oem_requested_;
717 }

◆ operator=()

TessBaseAPI & tesseract::TessBaseAPI::operator= ( TessBaseAPI const &  )
delete

◆ PrintFontsTable()

void tesseract::TessBaseAPI::PrintFontsTable ( FILE *  fp) const

Print Tesseract fonts table to the given file.

Definition at line 338 of file baseapi.cpp.

338 {
339 const int fontinfo_size = tesseract_->get_fontinfo_table().size();
340 for (int font_index = 1; font_index < fontinfo_size; ++font_index) {
341 FontInfo font = tesseract_->get_fontinfo_table().at(font_index);
342 fprintf(fp, "ID=%3d: %s is_italic=%s is_bold=%s"
343 " is_fixed_pitch=%s is_serif=%s is_fraktur=%s\n",
344 font_index, font.name,
345 font.is_italic() ? "true" : "false",
346 font.is_bold() ? "true" : "false",
347 font.is_fixed_pitch() ? "true" : "false",
348 font.is_serif() ? "true" : "false",
349 font.is_fraktur() ? "true" : "false");
350 }
351}
UnicityTable< FontInfo > & get_fontinfo_table()
Definition: classify.h:324

◆ PrintVariables()

void tesseract::TessBaseAPI::PrintVariables ( FILE *  fp) const

Print Tesseract parameters to the given file.

Definition at line 356 of file baseapi.cpp.

356 {
358}
static void PrintParams(FILE *fp, const ParamsVectors *member_params)
Definition: params.cpp:164

◆ ProcessPage()

bool tesseract::TessBaseAPI::ProcessPage ( Pix *  pix,
int  page_index,
const char *  filename,
const char *  retry_config,
int  timeout_millisec,
TessResultRenderer renderer 
)

Turn a single image into symbolic text.

The pix is the image processed. filename and page_index are metadata used by side-effect processes, such as reading a box file or formatting as hOCR.

See ProcessPages for descriptions of other parameters.

Definition at line 1253 of file baseapi.cpp.

1255 {
1256 SetInputName(filename);
1257 SetImage(pix);
1258 bool failed = false;
1259
1260 if (tesseract_->tessedit_pageseg_mode == PSM_AUTO_ONLY) {
1261 // Disabled character recognition
1262 if (! std::unique_ptr<const PageIterator>(AnalyseLayout())) {
1263 failed = true;
1264 }
1265 } else if (tesseract_->tessedit_pageseg_mode == PSM_OSD_ONLY) {
1266 failed = FindLines() != 0;
1267 } else if (timeout_millisec > 0) {
1268 // Running with a timeout.
1269 ETEXT_DESC monitor;
1270 monitor.cancel = nullptr;
1271 monitor.cancel_this = nullptr;
1272 monitor.set_deadline_msecs(timeout_millisec);
1273
1274 // Now run the main recognition.
1275 failed = Recognize(&monitor) < 0;
1276 } else {
1277 // Normal layout and character recognition with no timeout.
1278 failed = Recognize(nullptr) < 0;
1279 }
1280
1281 if (tesseract_->tessedit_write_images) {
1282 Pix *page_pix = GetThresholdedImage();
1283 std::string output_filename = output_file_ + ".processed";
1284 if (page_index > 0) {
1285 output_filename += std::to_string(page_index);
1286 }
1287 output_filename += ".tif";
1288 pixWrite(output_filename.c_str(), page_pix, IFF_TIFF_G4);
1289 pixDestroy(&page_pix);
1290 }
1291
1292 if (failed && retry_config != nullptr && retry_config[0] != '\0') {
1293 // Save current config variables before switching modes.
1294 FILE *fp = fopen(kOldVarsFile, "wb");
1295 if (fp == nullptr) {
1296 tprintf("Error, failed to open file \"%s\"\n", kOldVarsFile);
1297 } else {
1298 PrintVariables(fp);
1299 fclose(fp);
1300 }
1301 // Switch to alternate mode for retry.
1302 ReadConfigFile(retry_config);
1303 SetImage(pix);
1304 Recognize(nullptr);
1305 // Restore saved config variables.
1306 ReadConfigFile(kOldVarsFile);
1307 }
1308
1309 if (renderer && !failed) {
1310 failed = !renderer->AddImage(this);
1311 }
1312
1313 return !failed;
1314}
struct ETEXT_DESC ETEXT_DESC
Definition: capi.h:135
@ PSM_OSD_ONLY
Orientation and script detection only.
Definition: publictypes.h:158
@ PSM_AUTO_ONLY
Automatic page segmentation, but no OSD, or OCR.
Definition: publictypes.h:161
void PrintVariables(FILE *fp) const
Definition: baseapi.cpp:356
void SetImage(const unsigned char *imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line)
Definition: baseapi.cpp:576
void ReadConfigFile(const char *filename)
Definition: baseapi.cpp:497
Pix * GetThresholdedImage()
Definition: baseapi.cpp:631

◆ ProcessPages()

bool tesseract::TessBaseAPI::ProcessPages ( const char *  filename,
const char *  retry_config,
int  timeout_millisec,
TessResultRenderer renderer 
)

Methods to retrieve information after SetAndThresholdImage(), Recognize() or TesseractRect(). (Recognize is called implicitly if needed.) Turns images into symbolic text.

filename can point to a single image, a multi-page TIFF, or a plain text list of image filenames.

retry_config is useful for debugging. If not nullptr, you can fall back to an alternate configuration if a page fails for some reason.

timeout_millisec terminates processing if any single page takes too long. Set to 0 for unlimited time.

renderer is responible for creating the output. For example, use the TessTextRenderer if you want plaintext output, or the TessPDFRender to produce searchable PDF.

If tessedit_page_number is non-negative, will only process that single page. Works for multi-page tiff file, or filelist.

Returns true if successful, false on error.

Definition at line 1071 of file baseapi.cpp.

1072 {
1073 bool result = ProcessPagesInternal(filename, retry_config, timeout_millisec, renderer);
1074#ifndef DISABLED_LEGACY_ENGINE
1075 if (result) {
1076 if (tesseract_->tessedit_train_from_boxes && !tesseract_->WriteTRFile(output_file_.c_str())) {
1077 tprintf("Write of TR file failed: %s\n", output_file_.c_str());
1078 return false;
1079 }
1080 }
1081#endif // ndef DISABLED_LEGACY_ENGINE
1082 return result;
1083}
bool ProcessPagesInternal(const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
Definition: baseapi.cpp:1105
bool WriteTRFile(const char *filename)
Definition: blobclass.cpp:60

◆ ProcessPagesInternal()

bool tesseract::TessBaseAPI::ProcessPagesInternal ( const char *  filename,
const char *  retry_config,
int  timeout_millisec,
TessResultRenderer renderer 
)

Definition at line 1105 of file baseapi.cpp.

1106 {
1107 bool stdInput = !strcmp(filename, "stdin") || !strcmp(filename, "-");
1108 if (stdInput) {
1109#ifdef WIN32
1110 if (_setmode(_fileno(stdin), _O_BINARY) == -1)
1111 tprintf("ERROR: cin to binary: %s", strerror(errno));
1112#endif // WIN32
1113 }
1114
1115 if (stream_filelist) {
1116 return ProcessPagesFileList(stdin, nullptr, retry_config, timeout_millisec, renderer,
1117 tesseract_->tessedit_page_number);
1118 }
1119
1120 // At this point we are officially in autodection territory.
1121 // That means any data in stdin must be buffered, to make it
1122 // seekable.
1123 std::string buf;
1124 const l_uint8 *data = nullptr;
1125 if (stdInput) {
1126 buf.assign((std::istreambuf_iterator<char>(std::cin)), (std::istreambuf_iterator<char>()));
1127 data = reinterpret_cast<const l_uint8 *>(buf.data());
1128 } else if (strstr(filename, "://") != nullptr) {
1129 // Get image or image list by URL.
1130#ifdef HAVE_LIBCURL
1131 CURL *curl = curl_easy_init();
1132 if (curl == nullptr) {
1133 fprintf(stderr, "Error, curl_easy_init failed\n");
1134 return false;
1135 } else {
1136 CURLcode curlcode;
1137 auto error = [curl, &curlcode](const char *function) {
1138 fprintf(stderr, "Error, %s failed with error %s\n", function, curl_easy_strerror(curlcode));
1139 curl_easy_cleanup(curl);
1140 return false;
1141 };
1142 curlcode = curl_easy_setopt(curl, CURLOPT_URL, filename);
1143 if (curlcode != CURLE_OK) {
1144 return error("curl_easy_setopt");
1145 }
1146 // Follow HTTP, HTTPS, FTP and FTPS redirects.
1147 curlcode = curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1);
1148 if (curlcode != CURLE_OK) {
1149 return error("curl_easy_setopt");
1150 }
1151 // Allow no more than 8 redirections to prevent endless loops.
1152 curlcode = curl_easy_setopt(curl, CURLOPT_MAXREDIRS, 8);
1153 if (curlcode != CURLE_OK) {
1154 return error("curl_easy_setopt");
1155 }
1156 int timeout = curl_timeout;
1157 if (timeout > 0) {
1158 curlcode = curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1L);
1159 if (curlcode != CURLE_OK) {
1160 return error("curl_easy_setopt");
1161 }
1162 curlcode = curl_easy_setopt(curl, CURLOPT_TIMEOUT, timeout);
1163 if (curlcode != CURLE_OK) {
1164 return error("curl_easy_setopt");
1165 }
1166 }
1167 curlcode = curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteMemoryCallback);
1168 if (curlcode != CURLE_OK) {
1169 return error("curl_easy_setopt");
1170 }
1171 curlcode = curl_easy_setopt(curl, CURLOPT_WRITEDATA, &buf);
1172 if (curlcode != CURLE_OK) {
1173 return error("curl_easy_setopt");
1174 }
1175 curlcode = curl_easy_perform(curl);
1176 if (curlcode != CURLE_OK) {
1177 return error("curl_easy_perform");
1178 }
1179 curl_easy_cleanup(curl);
1180 data = reinterpret_cast<const l_uint8 *>(buf.data());
1181 }
1182#else
1183 fprintf(stderr, "Error, this tesseract has no URL support\n");
1184 return false;
1185#endif
1186 } else {
1187 // Check whether the input file can be read.
1188 if (FILE *file = fopen(filename, "rb")) {
1189 fclose(file);
1190 } else {
1191 fprintf(stderr, "Error, cannot read input file %s: %s\n", filename, strerror(errno));
1192 return false;
1193 }
1194 }
1195
1196 // Here is our autodetection
1197 int format;
1198 int r =
1199 (data != nullptr) ? findFileFormatBuffer(data, &format) : findFileFormat(filename, &format);
1200
1201 // Maybe we have a filelist
1202 if (r != 0 || format == IFF_UNKNOWN) {
1203 std::string s;
1204 if (data != nullptr) {
1205 s = buf.c_str();
1206 } else {
1207 std::ifstream t(filename);
1208 std::string u((std::istreambuf_iterator<char>(t)), std::istreambuf_iterator<char>());
1209 s = u.c_str();
1210 }
1211 return ProcessPagesFileList(nullptr, &s, retry_config, timeout_millisec, renderer,
1212 tesseract_->tessedit_page_number);
1213 }
1214
1215 // Maybe we have a TIFF which is potentially multipage
1216 bool tiff = (format == IFF_TIFF || format == IFF_TIFF_PACKBITS || format == IFF_TIFF_RLE ||
1217 format == IFF_TIFF_G3 || format == IFF_TIFF_G4 || format == IFF_TIFF_LZW ||
1218#if LIBLEPT_MAJOR_VERSION > 1 || LIBLEPT_MINOR_VERSION > 76
1219 format == IFF_TIFF_JPEG ||
1220#endif
1221 format == IFF_TIFF_ZIP);
1222
1223 // Fail early if we can, before producing any output
1224 Pix *pix = nullptr;
1225 if (!tiff) {
1226 pix = (data != nullptr) ? pixReadMem(data, buf.size()) : pixRead(filename);
1227 if (pix == nullptr) {
1228 return false;
1229 }
1230 }
1231
1232 // Begin the output
1233 if (renderer && !renderer->BeginDocument(document_title.c_str())) {
1234 pixDestroy(&pix);
1235 return false;
1236 }
1237
1238 // Produce output
1239 r = (tiff) ? ProcessPagesMultipageTiff(data, buf.size(), filename, retry_config, timeout_millisec,
1240 renderer, tesseract_->tessedit_page_number)
1241 : ProcessPage(pix, 0, filename, retry_config, timeout_millisec, renderer);
1242
1243 // Clean up memory as needed
1244 pixDestroy(&pix);
1245
1246 // End the output
1247 if (!r || (renderer && !renderer->EndDocument())) {
1248 return false;
1249 }
1250 return true;
1251}
bool ProcessPage(Pix *pix, int page_index, const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
Definition: baseapi.cpp:1253

◆ ReadConfigFile()

void tesseract::TessBaseAPI::ReadConfigFile ( const char *  filename)

Read a "config" file containing a set of param, value pairs. Searches the standard places: tessdata/configs, tessdata/tessconfigs and also accepts a relative or absolute path name. Note: only non-init params will be set (init params are set by Init()).

Read a "config" file containing a set of parameter name, value pairs. Searches the standard places: tessdata/configs, tessdata/tessconfigs and also accepts a relative or absolute path name.

Definition at line 497 of file baseapi.cpp.

497 {
499}
@ SET_PARAM_CONSTRAINT_NON_INIT_ONLY
Definition: params.h:43
void read_config_file(const char *filename, SetParamConstraint constraint)
Definition: tessedit.cpp:46

◆ ReadDebugConfigFile()

void tesseract::TessBaseAPI::ReadDebugConfigFile ( const char *  filename)

Same as above, but only set debug params from the given config file.

Definition at line 502 of file baseapi.cpp.

502 {
504}
@ SET_PARAM_CONSTRAINT_DEBUG_ONLY
Definition: params.h:41

◆ Recognize()

int tesseract::TessBaseAPI::Recognize ( ETEXT_DESC monitor)

Recognize the image from SetAndThresholdImage, generating Tesseract internal structures. Returns 0 on success. Optional. The Get*Text functions below will call Recognize if needed. After Recognize, the output is kept internally until the next SetImage.

Recognize the tesseract global image and return the result as Tesseract internal structures.

Definition at line 834 of file baseapi.cpp.

834 {
835 if (tesseract_ == nullptr) {
836 return -1;
837 }
838 if (FindLines() != 0) {
839 return -1;
840 }
841 delete page_res_;
842 if (block_list_->empty()) {
843 page_res_ = new PAGE_RES(false, block_list_, &tesseract_->prev_word_best_choice_);
844 return 0; // Empty page.
845 }
846
848 recognition_done_ = true;
849#ifndef DISABLED_LEGACY_ENGINE
850 if (tesseract_->tessedit_resegment_from_line_boxes) {
852 } else if (tesseract_->tessedit_resegment_from_boxes) {
854 } else
855#endif // ndef DISABLED_LEGACY_ENGINE
856 {
857 page_res_ =
859 }
860
861 if (page_res_ == nullptr) {
862 return -1;
863 }
864
865 if (tesseract_->tessedit_train_line_recognizer) {
867 return -1;
868 }
870 return 0;
871 }
872#ifndef DISABLED_LEGACY_ENGINE
873 if (tesseract_->tessedit_make_boxes_from_boxes) {
875 return 0;
876 }
877#endif // ndef DISABLED_LEGACY_ENGINE
878
879 int result = 0;
880 if (tesseract_->interactive_display_mode) {
881#ifndef GRAPHICS_DISABLED
883#endif // !GRAPHICS_DISABLED
884 // The page_res is invalid after an interactive session, so cleanup
885 // in a way that lets us continue to the next page without crashing.
886 delete page_res_;
887 page_res_ = nullptr;
888 return -1;
889#ifndef DISABLED_LEGACY_ENGINE
890 } else if (tesseract_->tessedit_train_from_boxes) {
891 std::string fontname;
892 ExtractFontName(output_file_.c_str(), &fontname);
894 } else if (tesseract_->tessedit_ambigs_training) {
895 FILE *training_output_file = tesseract_->init_recog_training(input_file_.c_str());
896 // OCR the page segmented into words by tesseract.
898 training_output_file);
899 fclose(training_output_file);
900#endif // ndef DISABLED_LEGACY_ENGINE
901 } else {
902 // Now run the main recognition.
903 bool wait_for_text = true;
904 GetBoolVariable("paragraph_text_based", &wait_for_text);
905 if (!wait_for_text) {
906 DetectParagraphs(false);
907 }
908 if (tesseract_->recog_all_words(page_res_, monitor, nullptr, nullptr, 0)) {
909 if (wait_for_text) {
910 DetectParagraphs(true);
911 }
912 } else {
913 result = -1;
914 }
915 }
916 return result;
917}
bool TrainLineRecognizer(const char *input_imagename, const std::string &output_basename, BLOCK_LIST *block_list)
Definition: linerec.cpp:41
PAGE_RES * ApplyBoxes(const char *filename, bool find_segmentation, BLOCK_LIST *block_list)
Definition: applybox.cpp:110
void ApplyBoxTraining(const std::string &fontname, PAGE_RES *page_res)
Definition: applybox.cpp:751
void recog_training_segmented(const char *filename, PAGE_RES *page_res, volatile ETEXT_DESC *monitor, FILE *output_file)
void CorrectClassifyWords(PAGE_RES *page_res)
Definition: applybox.cpp:764
void pgeditor_main(int width, int height, PAGE_RES *page_res)
Definition: pgedit.cpp:355
FILE * init_recog_training(const char *filename)
bool AnyLSTMLang() const
bool recog_all_words(PAGE_RES *page_res, ETEXT_DESC *monitor, const TBOX *target_word_box, const char *word_config, int dopasses)
Definition: control.cpp:287
WERD_CHOICE * prev_word_best_choice_
Definition: wordrec.h:387

◆ set_min_orientation_margin()

void tesseract::TessBaseAPI::set_min_orientation_margin ( double  margin)

Definition at line 2282 of file baseapi.cpp.

2282 {
2283 tesseract_->min_orientation_margin.set_value(margin);
2284}

◆ SetDebugVariable()

bool tesseract::TessBaseAPI::SetDebugVariable ( const char *  name,
const char *  value 
)

Definition at line 287 of file baseapi.cpp.

287 {
288 if (tesseract_ == nullptr) {
289 tesseract_ = new Tesseract;
290 }
292}
static bool SetParam(const char *name, const char *value, SetParamConstraint constraint, ParamsVectors *member_params)
Definition: params.cpp:81

◆ SetDictFunc()

void tesseract::TessBaseAPI::SetDictFunc ( DictFunc  f)

Sets Dict::letter_is_okay_ function to point to the given function.

Definition at line 2009 of file baseapi.cpp.

2009 {
2010 if (tesseract_ != nullptr) {
2012 }
2013}
int(Dict::* letter_is_okay_)(void *void_dawg_args, const UNICHARSET &unicharset, UNICHAR_ID unichar_id, bool word_end) const
Definition: dict.h:345

◆ SetImage() [1/2]

void tesseract::TessBaseAPI::SetImage ( const unsigned char *  imagedata,
int  width,
int  height,
int  bytes_per_pixel,
int  bytes_per_line 
)

Provide an image for Tesseract to recognize. Format is as TesseractRect above. Copies the image buffer and converts to Pix. SetImage clears all recognition results, and sets the rectangle to the full image, so it may be followed immediately by a GetUTF8Text, and it will automatically perform recognition.

Definition at line 576 of file baseapi.cpp.

577 {
578 if (InternalSetImage()) {
579 thresholder_->SetImage(imagedata, width, height, bytes_per_pixel, bytes_per_line);
581 }
582}
void SetImage(const unsigned char *imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line)
Definition: thresholder.cpp:76

◆ SetImage() [2/2]

void tesseract::TessBaseAPI::SetImage ( Pix *  pix)

Provide an image for Tesseract to recognize. As with SetImage above, Tesseract takes its own copy of the image, so it need not persist until after Recognize. Pix vs raw, which to use? Use Pix where possible. Tesseract uses Pix as its internal representation and it is therefore more efficient to provide a Pix directly.

Definition at line 600 of file baseapi.cpp.

600 {
601 if (InternalSetImage()) {
602 if (pixGetSpp(pix) == 4 && pixGetInputFormat(pix) == IFF_PNG) {
603 // remove alpha channel from png
604 Pix *p1 = pixRemoveAlpha(pix);
605 pixSetSpp(p1, 3);
606 (void)pixCopy(pix, p1);
607 pixDestroy(&p1);
608 }
611 }
612}

◆ SetInputImage()

void tesseract::TessBaseAPI::SetInputImage ( Pix *  pix)

Definition at line 920 of file baseapi.cpp.

920 {
922}
void set_pix_original(Image original_pix)

◆ SetInputName()

void tesseract::TessBaseAPI::SetInputName ( const char *  name)

Set the name of the input file. Needed for training and reading a UNLV zone file, and for searchable PDF output.

Set the name of the input file. Needed only for training and loading a UNLV zone file.

Definition at line 270 of file baseapi.cpp.

270 {
271 input_file_ = name ? name : "";
272}

◆ SetOutputName()

void tesseract::TessBaseAPI::SetOutputName ( const char *  name)

Set the name of the bonus output files. Needed only for debugging.

Set the name of the output files. Needed only for debugging.

Definition at line 275 of file baseapi.cpp.

275 {
276 output_file_ = name ? name : "";
277}

◆ SetPageSegMode()

void tesseract::TessBaseAPI::SetPageSegMode ( PageSegMode  mode)

Set the current page segmentation mode. Defaults to PSM_SINGLE_BLOCK. The mode is stored as an IntParam so it can also be modified by ReadConfigFile or SetVariable("tessedit_pageseg_mode", mode as string).

Set the current page segmentation mode. Defaults to PSM_AUTO. The mode is stored as an IntParam so it can also be modified by ReadConfigFile or SetVariable("tessedit_pageseg_mode", mode as string).

Definition at line 511 of file baseapi.cpp.

511 {
512 if (tesseract_ == nullptr) {
513 tesseract_ = new Tesseract;
514 }
515 tesseract_->tessedit_pageseg_mode.set_value(mode);
516}

◆ SetProbabilityInContextFunc()

void tesseract::TessBaseAPI::SetProbabilityInContextFunc ( ProbabilityInContextFunc  f)

Sets Dict::probability_in_context_ function to point to the given function.

Sets Dict::probability_in_context_ function to point to the given function.

Parameters
fA single function that returns the probability of the current "character" (in general a utf-8 string), given the context of a previous utf-8 string.

Definition at line 2023 of file baseapi.cpp.

2023 {
2024 if (tesseract_ != nullptr) {
2026 // Set it for the sublangs too.
2027 int num_subs = tesseract_->num_sub_langs();
2028 for (int i = 0; i < num_subs; ++i) {
2030 }
2031 }
2032}
double(Dict::* probability_in_context_)(const char *lang, const char *context, int context_bytes, const char *character, int character_bytes)
Probability in context function used by the ngram permuter.
Definition: dict.h:354

◆ SetRectangle()

void tesseract::TessBaseAPI::SetRectangle ( int  left,
int  top,
int  width,
int  height 
)

Restrict recognition to a sub-rectangle of the image. Call after SetImage. Each SetRectangle clears the recogntion results so multiple rectangles can be recognized with the same image.

Restrict recognition to a sub-rectangle of the image. Call after SetImage. Each SetRectangle clears the recognition results so multiple rectangles can be recognized with the same image.

Definition at line 619 of file baseapi.cpp.

619 {
620 if (thresholder_ == nullptr) {
621 return;
622 }
623 thresholder_->SetRectangle(left, top, width, height);
624 ClearResults();
625}
void SetRectangle(int left, int top, int width, int height)

◆ SetSourceResolution()

void tesseract::TessBaseAPI::SetSourceResolution ( int  ppi)

Set the resolution of the source image in pixels per inch so font size information can be calculated in results. Call this after SetImage().

Definition at line 584 of file baseapi.cpp.

584 {
585 if (thresholder_) {
587 } else {
588 tprintf("Please call SetImage before SetSourceResolution.\n");
589 }
590}
void SetSourceYResolution(int ppi)
Definition: thresholder.h:95

◆ SetVariable()

bool tesseract::TessBaseAPI::SetVariable ( const char *  name,
const char *  value 
)

Set the value of an internal "parameter." Supply the name of the parameter and the value as a string, just as you would in a config file. Returns false if the name lookup failed. Eg SetVariable("tessedit_char_blacklist", "xyz"); to ignore x, y and z. Or SetVariable("classify_bln_numeric_mode", "1"); to set numeric-only mode. SetVariable may be used before Init, but settings will revert to defaults on End().

Note: Must be called after Init(). Only works for non-init variables (init variables should be passed to Init()).

Definition at line 279 of file baseapi.cpp.

279 {
280 if (tesseract_ == nullptr) {
281 tesseract_ = new Tesseract;
282 }
284 tesseract_->params());
285}

◆ tesseract()

Tesseract * tesseract::TessBaseAPI::tesseract ( ) const
inline

Definition at line 711 of file baseapi.h.

711 {
712 return tesseract_;
713 }

◆ TesseractRect()

char * tesseract::TessBaseAPI::TesseractRect ( const unsigned char *  imagedata,
int  bytes_per_pixel,
int  bytes_per_line,
int  left,
int  top,
int  width,
int  height 
)

Recognize a rectangle from an image and return the result as a string. May be called many times for a single Init. Currently has no error checking. Greyscale of 8 and color of 24 or 32 bits per pixel may be given. Palette color images will not work properly and must be converted to 24 bit. Binary images of 1 bit per pixel may also be given but they must be byte packed with the MSB of the first byte being the first pixel, and a 1 represents WHITE. For binary images set bytes_per_pixel=0. The recognized text is returned as a char* which is coded as UTF8 and must be freed with the delete [] operator.

Note that TesseractRect is the simplified convenience interface. For advanced uses, use SetImage, (optionally) SetRectangle, Recognize, and one or more of the Get*Text functions below.

Recognize a rectangle from an image and return the result as a string. May be called many times for a single Init. Currently has no error checking. Greyscale of 8 and color of 24 or 32 bits per pixel may be given. Palette color images will not work properly and must be converted to 24 bit. Binary images of 1 bit per pixel may also be given but they must be byte packed with the MSB of the first byte being the first pixel, and a one pixel is WHITE. For binary images set bytes_per_pixel=0. The recognized text is returned as a char* which is coded as UTF8 and must be freed with the delete [] operator.

Definition at line 539 of file baseapi.cpp.

540 {
541 if (tesseract_ == nullptr || width < kMinRectSize || height < kMinRectSize) {
542 return nullptr; // Nothing worth doing.
543 }
544
545 // Since this original api didn't give the exact size of the image,
546 // we have to invent a reasonable value.
547 int bits_per_pixel = bytes_per_pixel == 0 ? 1 : bytes_per_pixel * 8;
548 SetImage(imagedata, bytes_per_line * 8 / bits_per_pixel, height + top, bytes_per_pixel,
549 bytes_per_line);
550 SetRectangle(left, top, width, height);
551
552 return GetUTF8Text();
553}
const int kMinRectSize
Definition: baseapi.cpp:107
void SetRectangle(int left, int top, int width, int height)
Definition: baseapi.cpp:619

◆ TextLength()

int tesseract::TessBaseAPI::TextLength ( int *  blob_count) const
protected

Return the length of the output text string, as UTF8, assuming one newline per line and one per block, with a terminator, and assuming a single character reject marker for each rejected character. Also return the number of recognized blobs in blob_count.

Return the length of the output text string, as UTF8, assuming liberally two spacing marks after each word (as paragraphs end with two newlines), and assuming a single character reject marker for each rejected character. Also return the number of recognized blobs in blob_count.

Definition at line 2233 of file baseapi.cpp.

2233 {
2234 if (tesseract_ == nullptr || page_res_ == nullptr) {
2235 return 0;
2236 }
2237
2238 PAGE_RES_IT page_res_it(page_res_);
2239 int total_length = 2;
2240 int total_blobs = 0;
2241 // Iterate over the data structures to extract the recognition result.
2242 for (page_res_it.restart_page(); page_res_it.word() != nullptr; page_res_it.forward()) {
2243 WERD_RES *word = page_res_it.word();
2244 WERD_CHOICE *choice = word->best_choice;
2245 if (choice != nullptr) {
2246 total_blobs += choice->length() + 2;
2247 total_length += choice->unichar_string().length() + 2;
2248 for (int i = 0; i < word->reject_map.length(); ++i) {
2249 if (word->reject_map[i].rejected()) {
2250 ++total_length;
2251 }
2252 }
2253 }
2254 }
2255 if (blob_count != nullptr) {
2256 *blob_count = total_blobs;
2257 }
2258 return total_length;
2259}

◆ Threshold()

bool tesseract::TessBaseAPI::Threshold ( Pix **  pix)
protectedvirtual

Run the thresholder to make the thresholded image. If pix is not nullptr, the source is thresholded to pix instead of the internal IMAGE.

Run the thresholder to make the thresholded image, returned in pix, which must not be nullptr. *pix must be initialized to nullptr, or point to an existing pixDestroyable Pix. The usual argument to Threshold is Tesseract::mutable_pix_binary().

Definition at line 2053 of file baseapi.cpp.

2053 {
2054 ASSERT_HOST(pix != nullptr);
2055 if (*pix != nullptr) {
2056 pixDestroy(pix);
2057 }
2058 // Zero resolution messes up the algorithms, so make sure it is credible.
2059 int user_dpi = 0;
2060 GetIntVariable("user_defined_dpi", &user_dpi);
2061 int y_res = thresholder_->GetScaledYResolution();
2062 if (user_dpi && (user_dpi < kMinCredibleResolution || user_dpi > kMaxCredibleResolution)) {
2063 tprintf(
2064 "Warning: User defined image dpi is outside of expected range "
2065 "(%d - %d)!\n",
2067 }
2068 // Always use user defined dpi
2069 if (user_dpi) {
2071 } else if (y_res < kMinCredibleResolution || y_res > kMaxCredibleResolution) {
2072 if (y_res != 0) {
2073 // Show warning only if a resolution was given.
2074 tprintf("Warning: Invalid resolution %d dpi. Using %d instead.\n",
2075 y_res, kMinCredibleResolution);
2076 }
2078 }
2079
2080 auto thresholding_method = static_cast<ThresholdMethod>(static_cast<int>(tesseract_->thresholding_method));
2081
2082 if (thresholding_method == ThresholdMethod::Otsu) {
2083 Image pix_binary(*pix);
2084 if (!thresholder_->ThresholdToPix(&pix_binary)) {
2085 return false;
2086 }
2087 *pix = pix_binary;
2088
2089 if (!thresholder_->IsBinary()) {
2092 } else {
2094 tesseract_->set_pix_grey(nullptr);
2095 }
2096 } else {
2097 auto [ok, pix_grey, pix_binary, pix_thresholds] = thresholder_->Threshold(this, thresholding_method);
2098
2099 if (!ok) {
2100 return false;
2101 }
2102 *pix = pix_binary;
2103
2104 tesseract_->set_pix_thresholds(pix_thresholds);
2105 tesseract_->set_pix_grey(pix_grey);
2106 }
2107
2109 &image_height_);
2110
2111 // Set the internal resolution that is used for layout parameters from the
2112 // estimated resolution, rather than the image resolution, which may be
2113 // fabricated, but we will use the image resolution, if there is one, to
2114 // report output point sizes.
2117 if (estimated_res != thresholder_->GetScaledEstimatedResolution()) {
2118 tprintf(
2119 "Estimated internal resolution %d out of range! "
2120 "Corrected to %d.\n",
2121 thresholder_->GetScaledEstimatedResolution(), estimated_res);
2122 }
2123 tesseract_->set_source_resolution(estimated_res);
2124 return true;
2125}
constexpr int kMaxCredibleResolution
Definition: publictypes.h:38
T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound)
Definition: helpers.h:105
constexpr int kMinCredibleResolution
Definition: publictypes.h:36
void set_pix_grey(Image grey_pix)
void set_pix_thresholds(Image thresholds)
int GetScaledEstimatedResolution() const
Definition: thresholder.h:115
virtual Image GetPixRectThresholds()
virtual void GetImageSizes(int *left, int *top, int *width, int *height, int *imagewidth, int *imageheight)
virtual std::tuple< bool, Image, Image, Image > Threshold(TessBaseAPI *api, ThresholdMethod method)
virtual Image GetPixRectGrey()
virtual bool ThresholdToPix(Image *pix)
Returns false on error.
bool IsBinary() const
Returns true if the source image is binary.
Definition: thresholder.h:84

◆ Version()

const char * tesseract::TessBaseAPI::Version ( )
static

Returns the version identifier as a static string. Do not delete.

Definition at line 241 of file baseapi.cpp.

241 {
242 return TESSERACT_VERSION_STR;
243}

Member Data Documentation

◆ block_list_

BLOCK_LIST* tesseract::TessBaseAPI::block_list_
protected

The page layout.

Definition at line 771 of file baseapi.h.

◆ datapath_

std::string tesseract::TessBaseAPI::datapath_
protected

Current location of tessdata.

Definition at line 775 of file baseapi.h.

◆ equ_detect_

EquationDetect* tesseract::TessBaseAPI::equ_detect_
protected

The equation detector.

Definition at line 767 of file baseapi.h.

◆ image_height_

int tesseract::TessBaseAPI::image_height_
protected

Definition at line 790 of file baseapi.h.

◆ image_width_

int tesseract::TessBaseAPI::image_width_
protected

Definition at line 789 of file baseapi.h.

◆ input_file_

std::string tesseract::TessBaseAPI::input_file_
protected

Name used by training code.

Definition at line 773 of file baseapi.h.

◆ language_

std::string tesseract::TessBaseAPI::language_
protected

Last initialized language.

Definition at line 776 of file baseapi.h.

◆ last_oem_requested_

OcrEngineMode tesseract::TessBaseAPI::last_oem_requested_
protected

Last ocr language mode requested.

Definition at line 777 of file baseapi.h.

◆ osd_tesseract_

Tesseract* tesseract::TessBaseAPI::osd_tesseract_
protected

For orientation & script detection.

Definition at line 766 of file baseapi.h.

◆ output_file_

std::string tesseract::TessBaseAPI::output_file_
protected

Name used by debug code.

Definition at line 774 of file baseapi.h.

◆ page_res_

PAGE_RES* tesseract::TessBaseAPI::page_res_
protected

The page-level data.

Definition at line 772 of file baseapi.h.

◆ paragraph_models_

std::vector<ParagraphModel *>* tesseract::TessBaseAPI::paragraph_models_
protected

Definition at line 770 of file baseapi.h.

◆ reader_

FileReader tesseract::TessBaseAPI::reader_
protected

Reads files from any filesystem.

Definition at line 768 of file baseapi.h.

◆ recognition_done_

bool tesseract::TessBaseAPI::recognition_done_
protected

page_res_ contains recognition data.

Definition at line 778 of file baseapi.h.

◆ rect_height_

int tesseract::TessBaseAPI::rect_height_
protected

Definition at line 788 of file baseapi.h.

◆ rect_left_

int tesseract::TessBaseAPI::rect_left_
protected

Definition at line 785 of file baseapi.h.

◆ rect_top_

int tesseract::TessBaseAPI::rect_top_
protected

Definition at line 786 of file baseapi.h.

◆ rect_width_

int tesseract::TessBaseAPI::rect_width_
protected

Definition at line 787 of file baseapi.h.

◆ tesseract_

Tesseract* tesseract::TessBaseAPI::tesseract_
protected

The underlying data object.

Definition at line 765 of file baseapi.h.

◆ thresholder_

ImageThresholder* tesseract::TessBaseAPI::thresholder_
protected

Image thresholding module.

Definition at line 769 of file baseapi.h.


The documentation for this class was generated from the following files: