21 return TessBaseAPI::Version();
24static char *MakeText(
const std::string& srcText) {
25 auto *text =
new char[srcText.size() + 1];
26 srcText.copy(text, srcText.size());
27 text[srcText.size()] = 0;
35static char **MakeTextArray(
const std::vector<std::string>& srcArr) {
36 auto **arr =
new char *[srcArr.size() + 1];
37 for (
size_t i = 0;
i < srcArr.size(); ++
i) {
38 arr[
i] = MakeText(srcArr[
i]);
40 arr[srcArr.size()] =
nullptr;
45 for (
char **pos = arr; *pos !=
nullptr; ++pos) {
101 renderer->insert(
next);
105 return renderer->next();
109 return static_cast<int>(renderer->BeginDocument(title));
113 return static_cast<int>(renderer->AddImage(api));
117 return static_cast<int>(renderer->EndDocument());
121 return renderer->file_extension();
125 return renderer->title();
129 return renderer->imagenum();
141 return TessBaseAPI::getOpenCLDevice(device);
145 handle->SetInputName(name);
149 return handle->GetInputName();
153 handle->SetInputImage(pix);
157 return handle->GetInputImage();
161 return handle->GetSourceYResolution();
165 return handle->GetDatapath();
169 handle->SetOutputName(name);
173 return static_cast<int>(handle->SetVariable(name,
value));
177 return static_cast<int>(handle->SetDebugVariable(name,
value));
181 return static_cast<int>(handle->GetIntVariable(name,
value));
186 bool result = handle->GetBoolVariable(name, &boolValue);
188 *
value =
static_cast<int>(boolValue);
190 return static_cast<int>(result);
194 return static_cast<int>(handle->GetDoubleVariable(name,
value));
198 return handle->GetStringVariable(name);
202 handle->PrintVariables(fp);
206 FILE *fp = fopen(filename,
"w");
208 handle->PrintVariables(fp);
217 char **vars_values,
size_t vars_vec_size,
BOOL set_only_non_debug_params) {
218 std::vector<std::string> varNames;
219 std::vector<std::string> varValues;
220 if (vars_vec !=
nullptr && vars_values !=
nullptr) {
221 for (
size_t i = 0;
i < vars_vec_size;
i++) {
222 varNames.emplace_back(vars_vec[
i]);
223 varValues.emplace_back(vars_values[
i]);
227 return handle->Init(datapath, language, mode, configs, configs_size, &varNames, &varValues,
228 set_only_non_debug_params != 0);
233 return handle->Init(datapath, language, oem, configs, configs_size,
nullptr,
nullptr,
false);
238 return handle->Init(datapath, language, oem);
242 return handle->Init(datapath, language);
247 char **vars_values,
size_t vars_vec_size,
BOOL set_only_non_debug_params) {
248 std::vector<std::string> varNames;
249 std::vector<std::string> varValues;
250 if (vars_vec !=
nullptr && vars_values !=
nullptr) {
251 for (
size_t i = 0;
i < vars_vec_size;
i++) {
252 varNames.emplace_back(vars_vec[
i]);
253 varValues.emplace_back(vars_values[
i]);
257 return handle->Init(data, data_size, language, mode, configs, configs_size, &varNames, &varValues,
258 set_only_non_debug_params != 0,
nullptr);
262 return handle->GetInitLanguagesAsString();
266 std::vector<std::string> languages;
267 handle->GetLoadedLanguagesAsVector(&languages);
268 return MakeTextArray(languages);
272 std::vector<std::string> languages;
273 handle->GetAvailableLanguagesAsVector(&languages);
274 return MakeTextArray(languages);
278 handle->InitForAnalysePage();
282 handle->ReadConfigFile(filename);
286 handle->ReadDebugConfigFile(filename);
290 handle->SetPageSegMode(mode);
294 return handle->GetPageSegMode();
298 int bytes_per_line,
int left,
int top,
int width,
int height) {
299 return handle->TesseractRect(imagedata, bytes_per_pixel, bytes_per_line, left, top, width,
303#ifndef DISABLED_LEGACY_ENGINE
305 handle->ClearAdaptiveClassifier();
310 int bytes_per_pixel,
int bytes_per_line) {
311 handle->SetImage(imagedata, width, height, bytes_per_pixel, bytes_per_line);
315 return handle->SetImage(pix);
319 handle->SetSourceResolution(ppi);
323 handle->SetRectangle(left, top, width, height);
327 return handle->GetThresholdedImage();
331 TessBaseAPI::ClearPersistentCache();
334#ifndef DISABLED_LEGACY_ENGINE
337 const char **script_name,
float *script_conf) {
338 auto success = handle->DetectOrientationScript(orient_deg, orient_conf, script_name, script_conf);
339 return static_cast<BOOL>(success);
345 return handle->GetRegions(pixa);
349 return handle->GetTextlines(pixa, blockids);
353 const int raw_padding,
struct Pixa **pixa,
int **blockids,
355 return handle->GetTextlines(raw_image != 0, raw_padding, pixa, blockids, paraids);
359 return handle->GetStrips(pixa, blockids);
363 return handle->GetWords(pixa);
367 return handle->GetConnectedComponents(cc);
371 BOOL text_only,
struct Pixa **pixa,
int **blockids) {
372 return handle->GetComponentImages(level,
static_cast<bool>(text_only), pixa, blockids);
376 const BOOL text_only,
const BOOL raw_image,
377 const int raw_padding,
struct Pixa **pixa,
378 int **blockids,
int **paraids) {
379 return handle->GetComponentImages(level,
static_cast<bool>(text_only), raw_image != 0,
380 raw_padding, pixa, blockids, paraids);
384 return handle->GetThresholdedImageScaleFactor();
388 return handle->AnalyseLayout();
392 return handle->Recognize(monitor);
397 return static_cast<int>(handle->ProcessPages(filename, retry_config, timeout_millisec, renderer));
401 const char *filename,
const char *retry_config,
int timeout_millisec,
403 return static_cast<int>(
404 handle->ProcessPage(pix, page_index, filename, retry_config, timeout_millisec, renderer));
408 return handle->GetIterator();
412 return handle->GetMutableIterator();
416 return handle->GetUTF8Text();
420 return handle->GetHOCRText(
nullptr, page_number);
424 return handle->GetAltoText(page_number);
428 return handle->GetTSVText(page_number);
432 return handle->GetBoxText(page_number);
436 return handle->GetWordStrBoxText(page_number);
440 return handle->GetLSTMBoxText(page_number);
444 return handle->GetUNLVText();
448 return handle->MeanTextConf();
452 return handle->AllWordConfidences();
455#ifndef DISABLED_LEGACY_ENGINE
457 return static_cast<int>(handle->AdaptToWordStr(mode, wordstr));
470 return handle->IsValidWord(word);
474 return static_cast<int>(handle->GetTextDirection(out_offset, out_slope));
478 return handle->GetUnichar(unichar_id);
482 handle->set_min_orientation_margin(margin);
486 return handle->NumDawgs();
490 return handle->oem();
494 bool **vertical_writing) {
495 handle->GetBlockTextOrientations(block_orientation, vertical_writing);
511 return static_cast<int>(handle->Next(level));
515 return static_cast<int>(handle->IsAtBeginningOf(level));
520 return static_cast<int>(handle->IsAtFinalElement(level, element));
524 int *left,
int *top,
int *right,
int *bottom) {
525 return static_cast<int>(handle->BoundingBox(level, left, top, right, bottom));
529 return handle->BlockType();
534 return handle->GetBinaryImage(level);
538 int padding,
struct Pix *original_image,
int *left,
int *top) {
539 return handle->GetImage(level, padding, original_image, left, top);
543 int *y1,
int *x2,
int *y2) {
544 return static_cast<int>(handle->Baseline(level, x1, y1, x2, y2));
550 handle->Orientation(orientation, writing_direction, textline_order, deskew_angle);
555 BOOL *is_crown,
int *first_line_indent) {
556 bool bool_is_list_item;
558 handle->ParagraphInfo(justification, &bool_is_list_item, &bool_is_crown, first_line_indent);
559 if (is_list_item !=
nullptr) {
560 *is_list_item =
static_cast<int>(bool_is_list_item);
562 if (is_crown !=
nullptr) {
563 *is_crown =
static_cast<int>(bool_is_crown);
588 return static_cast<int>(handle->Next(level));
592 return handle->GetUTF8Text(level);
596 return handle->Confidence(level);
600 return handle->WordRecognitionLanguage();
604 BOOL *is_italic,
BOOL *is_underlined,
606 BOOL *is_smallcaps,
int *pointsize,
int *font_id) {
609 bool bool_is_underlined;
610 bool bool_is_monospace;
612 bool bool_is_smallcaps;
613 const char *ret = handle->WordFontAttributes(&bool_is_bold, &bool_is_italic, &bool_is_underlined,
614 &bool_is_monospace, &bool_is_serif,
615 &bool_is_smallcaps, pointsize, font_id);
616 if (is_bold !=
nullptr) {
617 *is_bold =
static_cast<int>(bool_is_bold);
619 if (is_italic !=
nullptr) {
620 *is_italic =
static_cast<int>(bool_is_italic);
622 if (is_underlined !=
nullptr) {
623 *is_underlined =
static_cast<int>(bool_is_underlined);
625 if (is_monospace !=
nullptr) {
626 *is_monospace =
static_cast<int>(bool_is_monospace);
628 if (is_serif !=
nullptr) {
629 *is_serif =
static_cast<int>(bool_is_serif);
631 if (is_smallcaps !=
nullptr) {
632 *is_smallcaps =
static_cast<int>(bool_is_smallcaps);
638 return static_cast<int>(handle->WordIsFromDictionary());
642 return static_cast<int>(handle->WordIsNumeric());
646 return static_cast<int>(handle->SymbolIsSuperscript());
650 return static_cast<int>(handle->SymbolIsSubscript());
654 return static_cast<int>(handle->SymbolIsDropcap());
662 return static_cast<int>(handle->Next());
666 return handle->GetUTF8Text();
670 return handle->Confidence();
682 monitor->cancel = cancelFunc;
686 monitor->cancel_this = cancelThis;
690 return monitor->cancel_this;
694 monitor->progress_callback2 = progressFunc;
698 return monitor->progress;
702 monitor->set_deadline_msecs(deadline);
TessParagraphJustification
struct TessMutableIterator TessMutableIterator
bool(* TessProgressFunc)(ETEXT_DESC *ths, int left, int right, int top, int bottom)
struct TessBaseAPI TessBaseAPI
struct TessResultRenderer TessResultRenderer
struct TessChoiceIterator TessChoiceIterator
struct TessPageIterator TessPageIterator
bool(* TessCancelFunc)(void *cancel_this, int words)
struct TessResultIterator TessResultIterator
struct ETEXT_DESC ETEXT_DESC
BOOL TessBaseAPIDetectOrientationScript(TessBaseAPI *handle, int *orient_deg, float *orient_conf, const char **script_name, float *script_conf)
BOOL TessBaseAPIPrintVariablesToFile(const TessBaseAPI *handle, const char *filename)
int TessBaseAPIMeanTextConf(TessBaseAPI *handle)
BOOL TessBaseAPIGetDoubleVariable(const TessBaseAPI *handle, const char *name, double *value)
BOOL TessResultIteratorSymbolIsSuperscript(const TessResultIterator *handle)
char * TessBaseAPIGetWordStrBoxText(TessBaseAPI *handle, int page_number)
BOOL TessResultIteratorWordIsFromDictionary(const TessResultIterator *handle)
void TessBaseAPIClear(TessBaseAPI *handle)
BOOL TessResultIteratorSymbolIsDropcap(const TessResultIterator *handle)
struct Pix * TessPageIteratorGetImage(const TessPageIterator *handle, TessPageIteratorLevel level, int padding, struct Pix *original_image, int *left, int *top)
TessResultRenderer * TessBoxTextRendererCreate(const char *outputbase)
void TessDeleteIntArray(const int *arr)
void TessBaseAPISetImage(TessBaseAPI *handle, const unsigned char *imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line)
TessPageIterator * TessBaseAPIAnalyseLayout(TessBaseAPI *handle)
BOOL TessBaseAPIGetBoolVariable(const TessBaseAPI *handle, const char *name, BOOL *value)
int TessBaseAPIInit1(TessBaseAPI *handle, const char *datapath, const char *language, TessOcrEngineMode oem, char **configs, int configs_size)
void TessMonitorDelete(ETEXT_DESC *monitor)
void TessPageIteratorOrientation(TessPageIterator *handle, TessOrientation *orientation, TessWritingDirection *writing_direction, TessTextlineOrder *textline_order, float *deskew_angle)
void TessChoiceIteratorDelete(TessChoiceIterator *handle)
int * TessBaseAPIAllWordConfidences(TessBaseAPI *handle)
struct Boxa * TessBaseAPIGetTextlines(TessBaseAPI *handle, struct Pixa **pixa, int **blockids)
const char * TessVersion()
int TessResultRendererImageNum(TessResultRenderer *renderer)
BOOL TessPageIteratorNext(TessPageIterator *handle, TessPageIteratorLevel level)
TessPageSegMode TessBaseAPIGetPageSegMode(const TessBaseAPI *handle)
Pix * TessBaseAPIGetInputImage(TessBaseAPI *handle)
struct Boxa * TessBaseAPIGetConnectedComponents(TessBaseAPI *handle, struct Pixa **cc)
void TessBaseAPIPrintVariables(const TessBaseAPI *handle, FILE *fp)
const char * TessBaseAPIGetUnichar(TessBaseAPI *handle, int unichar_id)
void TessBaseAPIInitForAnalysePage(TessBaseAPI *handle)
TessPageIterator * TessResultIteratorGetPageIterator(TessResultIterator *handle)
void TessBaseAPISetImage2(TessBaseAPI *handle, struct Pix *pix)
char * TessBaseAPIGetAltoText(TessBaseAPI *handle, int page_number)
BOOL TessBaseAPIGetTextDirection(TessBaseAPI *handle, int *out_offset, float *out_slope)
const char * TessChoiceIteratorGetUTF8Text(const TessChoiceIterator *handle)
void TessResultIteratorDelete(TessResultIterator *handle)
char * TessBaseAPIGetLSTMBoxText(TessBaseAPI *handle, int page_number)
void TessPageIteratorDelete(TessPageIterator *handle)
char * TessBaseAPIRect(TessBaseAPI *handle, const unsigned char *imagedata, int bytes_per_pixel, int bytes_per_line, int left, int top, int width, int height)
struct Pix * TessBaseAPIGetThresholdedImage(TessBaseAPI *handle)
TessMutableIterator * TessBaseAPIGetMutableIterator(TessBaseAPI *handle)
const char * TessResultIteratorWordFontAttributes(const TessResultIterator *handle, BOOL *is_bold, BOOL *is_italic, BOOL *is_underlined, BOOL *is_monospace, BOOL *is_serif, BOOL *is_smallcaps, int *pointsize, int *font_id)
TessResultRenderer * TessUnlvRendererCreate(const char *outputbase)
char * TessBaseAPIGetUNLVText(TessBaseAPI *handle)
TessResultRenderer * TessLSTMBoxRendererCreate(const char *outputbase)
BOOL TessResultRendererEndDocument(TessResultRenderer *renderer)
void * TessMonitorGetCancelThis(ETEXT_DESC *monitor)
int TessBaseAPIGetThresholdedImageScaleFactor(const TessBaseAPI *handle)
char * TessResultIteratorGetUTF8Text(const TessResultIterator *handle, TessPageIteratorLevel level)
struct Pix * TessPageIteratorGetBinaryImage(const TessPageIterator *handle, TessPageIteratorLevel level)
TessResultIterator * TessBaseAPIGetIterator(TessBaseAPI *handle)
BOOL TessBaseAPIProcessPages(TessBaseAPI *handle, const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
const char * TessResultRendererExtention(TessResultRenderer *renderer)
void TessBaseAPISetMinOrientationMargin(TessBaseAPI *handle, double margin)
struct Boxa * TessBaseAPIGetComponentImages1(TessBaseAPI *handle, const TessPageIteratorLevel level, const BOOL text_only, const BOOL raw_image, const int raw_padding, struct Pixa **pixa, int **blockids, int **paraids)
struct Boxa * TessBaseAPIGetTextlines1(TessBaseAPI *handle, const BOOL raw_image, const int raw_padding, struct Pixa **pixa, int **blockids, int **paraids)
void TessBaseAPISetSourceResolution(TessBaseAPI *handle, int ppi)
void TessBaseAPIReadConfigFile(TessBaseAPI *handle, const char *filename)
int TessBaseAPINumDawgs(const TessBaseAPI *handle)
TessChoiceIterator * TessResultIteratorGetChoiceIterator(const TessResultIterator *handle)
struct Boxa * TessBaseAPIGetRegions(TessBaseAPI *handle, struct Pixa **pixa)
int TessBaseAPIIsValidWord(TessBaseAPI *handle, const char *word)
void TessBaseAPISetRectangle(TessBaseAPI *handle, int left, int top, int width, int height)
int TessBaseAPIInit3(TessBaseAPI *handle, const char *datapath, const char *language)
float TessChoiceIteratorConfidence(const TessChoiceIterator *handle)
TessPageIterator * TessPageIteratorCopy(const TessPageIterator *handle)
void TessBaseAPIClearPersistentCache(TessBaseAPI *)
BOOL TessPageIteratorBaseline(const TessPageIterator *handle, TessPageIteratorLevel level, int *x1, int *y1, int *x2, int *y2)
const TessPageIterator * TessResultIteratorGetPageIteratorConst(const TessResultIterator *handle)
const char * TessBaseAPIGetDatapath(TessBaseAPI *handle)
BOOL TessPageIteratorBoundingBox(const TessPageIterator *handle, TessPageIteratorLevel level, int *left, int *top, int *right, int *bottom)
TessResultRenderer * TessTsvRendererCreate(const char *outputbase)
char * TessBaseAPIGetBoxText(TessBaseAPI *handle, int page_number)
void TessMonitorSetCancelFunc(ETEXT_DESC *monitor, TessCancelFunc cancelFunc)
const char * TessBaseAPIGetInitLanguagesAsString(const TessBaseAPI *handle)
BOOL TessResultIteratorWordIsNumeric(const TessResultIterator *handle)
struct Boxa * TessBaseAPIGetWords(TessBaseAPI *handle, struct Pixa **pixa)
char ** TessBaseAPIGetAvailableLanguagesAsVector(const TessBaseAPI *handle)
TessResultRenderer * TessHOcrRendererCreate(const char *outputbase)
void TessBaseAPIDelete(TessBaseAPI *handle)
BOOL TessBaseAPIAdaptToWordStr(TessBaseAPI *handle, TessPageSegMode mode, const char *wordstr)
const char * TessResultRendererTitle(TessResultRenderer *renderer)
void TessPageIteratorParagraphInfo(TessPageIterator *handle, TessParagraphJustification *justification, BOOL *is_list_item, BOOL *is_crown, int *first_line_indent)
BOOL TessResultRendererAddImage(TessResultRenderer *renderer, TessBaseAPI *api)
void TessMonitorSetProgressFunc(ETEXT_DESC *monitor, TessProgressFunc progressFunc)
void TessBaseAPISetOutputName(TessBaseAPI *handle, const char *name)
TessResultRenderer * TessHOcrRendererCreate2(const char *outputbase, BOOL font_info)
void TessBaseAPIReadDebugConfigFile(TessBaseAPI *handle, const char *filename)
BOOL TessBaseAPIGetIntVariable(const TessBaseAPI *handle, const char *name, int *value)
void TessBaseAPISetInputName(TessBaseAPI *handle, const char *name)
int TessBaseAPIInit2(TessBaseAPI *handle, const char *datapath, const char *language, TessOcrEngineMode oem)
int TessBaseAPIInit4(TessBaseAPI *handle, const char *datapath, const char *language, TessOcrEngineMode mode, char **configs, int configs_size, char **vars_vec, char **vars_values, size_t vars_vec_size, BOOL set_only_non_debug_params)
int TessMonitorGetProgress(ETEXT_DESC *monitor)
void TessMonitorSetCancelThis(ETEXT_DESC *monitor, void *cancelThis)
BOOL TessResultIteratorNext(TessResultIterator *handle, TessPageIteratorLevel level)
char * TessBaseAPIGetTsvText(TessBaseAPI *handle, int page_number)
TessResultRenderer * TessPDFRendererCreate(const char *outputbase, const char *datadir, BOOL textonly)
BOOL TessBaseAPISetDebugVariable(TessBaseAPI *handle, const char *name, const char *value)
float TessResultIteratorConfidence(const TessResultIterator *handle, TessPageIteratorLevel level)
void TessResultRendererInsert(TessResultRenderer *renderer, TessResultRenderer *next)
TessResultIterator * TessResultIteratorCopy(const TessResultIterator *handle)
void TessPageIteratorBegin(TessPageIterator *handle)
void TessBaseGetBlockTextOrientations(TessBaseAPI *handle, int **block_orientation, bool **vertical_writing)
BOOL TessPageIteratorIsAtFinalElement(const TessPageIterator *handle, TessPageIteratorLevel level, TessPageIteratorLevel element)
void TessMonitorSetDeadlineMSecs(ETEXT_DESC *monitor, int deadline)
TessResultRenderer * TessWordStrBoxRendererCreate(const char *outputbase)
BOOL TessChoiceIteratorNext(TessChoiceIterator *handle)
TessResultRenderer * TessResultRendererNext(TessResultRenderer *renderer)
ETEXT_DESC * TessMonitorCreate()
const char * TessBaseAPIGetStringVariable(const TessBaseAPI *handle, const char *name)
TessResultRenderer * TessAltoRendererCreate(const char *outputbase)
TessResultRenderer * TessTextRendererCreate(const char *outputbase)
BOOL TessPageIteratorIsAtBeginningOf(const TessPageIterator *handle, TessPageIteratorLevel level)
void TessBaseAPIEnd(TessBaseAPI *handle)
TessPolyBlockType TessPageIteratorBlockType(const TessPageIterator *handle)
int TessBaseAPIInit5(TessBaseAPI *handle, const char *data, int data_size, const char *language, TessOcrEngineMode mode, char **configs, int configs_size, char **vars_vec, char **vars_values, size_t vars_vec_size, BOOL set_only_non_debug_params)
const char * TessBaseAPIGetInputName(TessBaseAPI *handle)
void TessBaseAPISetPageSegMode(TessBaseAPI *handle, TessPageSegMode mode)
void TessBaseAPISetInputImage(TessBaseAPI *handle, Pix *pix)
BOOL TessBaseAPISetVariable(TessBaseAPI *handle, const char *name, const char *value)
TessBaseAPI * TessBaseAPICreate()
const char * TessResultIteratorWordRecognitionLanguage(const TessResultIterator *handle)
BOOL TessBaseAPIProcessPage(TessBaseAPI *handle, struct Pix *pix, int page_index, const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
char * TessBaseAPIGetUTF8Text(TessBaseAPI *handle)
void TessDeleteText(const char *text)
int TessBaseAPIGetSourceYResolution(TessBaseAPI *handle)
char ** TessBaseAPIGetLoadedLanguagesAsVector(const TessBaseAPI *handle)
struct Boxa * TessBaseAPIGetStrips(TessBaseAPI *handle, struct Pixa **pixa, int **blockids)
size_t TessBaseAPIGetOpenCLDevice(TessBaseAPI *, void **device)
char * TessBaseAPIGetHOCRText(TessBaseAPI *handle, int page_number)
BOOL TessResultRendererBeginDocument(TessResultRenderer *renderer, const char *title)
int TessBaseAPIRecognize(TessBaseAPI *handle, ETEXT_DESC *monitor)
struct Boxa * TessBaseAPIGetComponentImages(TessBaseAPI *handle, TessPageIteratorLevel level, BOOL text_only, struct Pixa **pixa, int **blockids)
void TessBaseAPIClearAdaptiveClassifier(TessBaseAPI *handle)
BOOL TessResultIteratorSymbolIsSubscript(const TessResultIterator *handle)
void TessDeleteTextArray(char **arr)
void TessDeleteResultRenderer(TessResultRenderer *renderer)
TessOcrEngineMode TessBaseAPIOem(const TessBaseAPI *handle)