19#define _USE_MATH_DEFINES
23# include "config_auto.h"
32#ifndef DISABLED_LEGACY_ENGINE
39#ifndef DISABLED_LEGACY_ENGINE
44#if defined(USE_OPENCL)
78#include <allheaders.h>
80# include <curl/curl.h>
94# include <sys/types.h>
100static BOOL_VAR(stream_filelist,
false,
"Stream a filelist from stdin");
101static STRING_VAR(document_title,
"",
"Title of output document (used for hOCR and PDF output)");
103static INT_VAR(curl_timeout, 0,
"Timeout for curl in seconds");
117static const char *kOldVarsFile =
"failed_vars.txt";
119#ifndef DISABLED_LEGACY_ENGINE
124static const char *kInputFile =
"noname.tif";
125static const char kUnknownFontName[] =
"UnknownFont";
127static STRING_VAR(classify_font_name, kUnknownFontName,
128 "Default font name to be used in training");
135static void ExtractFontName(
const char* filename, std::string* fontname) {
136 *fontname = classify_font_name;
137 if (*fontname == kUnknownFontName) {
140 const char *basename = strrchr(filename,
'/');
141 const char *firstdot = strchr(basename ? basename : filename,
'.');
142 const char *lastdot = strrchr(filename,
'.');
143 if (firstdot != lastdot && firstdot !=
nullptr && lastdot !=
nullptr) {
145 *fontname = firstdot;
146 fontname->resize(lastdot - firstdot);
154static void addAvailableLanguages(
const std::string &datadir,
const std::string &base,
155 std::vector<std::string> *langs) {
157 if (!base2.empty()) {
160 const size_t extlen =
sizeof(kTrainedDataSuffix);
162 WIN32_FIND_DATA data;
163 HANDLE handle = FindFirstFile((datadir + base2 +
"*").c_str(), &data);
164 if (handle != INVALID_HANDLE_VALUE) {
167 char *name = data.cFileName;
169 if (name[0] !=
'.') {
170 if ((data.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) == FILE_ATTRIBUTE_DIRECTORY) {
171 addAvailableLanguages(datadir, base2 + name, langs);
173 size_t len = strlen(name);
174 if (len > extlen && name[len - extlen] ==
'.' &&
175 strcmp(&name[len - extlen + 1], kTrainedDataSuffix) == 0) {
176 name[len - extlen] =
'\0';
177 langs->push_back(base2 + name);
181 result = FindNextFile(handle, &data);
186 DIR *dir = opendir((datadir + base).c_str());
187 if (dir !=
nullptr) {
189 while ((de = readdir(dir))) {
190 char *name = de->d_name;
192 if (name[0] !=
'.') {
194 if (stat((datadir + base2 + name).c_str(), &st) == 0 && (st.st_mode & S_IFDIR) == S_IFDIR) {
195 addAvailableLanguages(datadir, base2 + name, langs);
197 size_t len = strlen(name);
198 if (len > extlen && name[len - extlen] ==
'.' &&
199 strcmp(&name[len - extlen + 1], kTrainedDataSuffix) == 0) {
200 name[len - extlen] =
'\0';
201 langs->push_back(base2 + name);
212 : tesseract_(nullptr)
213 , osd_tesseract_(nullptr)
214 , equ_detect_(nullptr)
220 thresholder_(nullptr)
221 , paragraph_models_(nullptr)
222 , block_list_(nullptr)
225 , recognition_done_(false)
242 return TESSERACT_VERSION_STR;
254 ds_device device = OpenclDevice::getDeviceSelection();
255 if (device.type == DS_DEVICE_OPENCL_DEVICE) {
256 *data =
new cl_device_id;
257 memcpy(*data, &device.oclDeviceID,
sizeof(cl_device_id));
258 return sizeof(cl_device_id);
295 auto *
p = ParamUtils::FindParam<IntParam>(name,
GlobalParams()->int_params,
305 auto *
p = ParamUtils::FindParam<BoolParam>(name,
GlobalParams()->bool_params,
315 auto *
p = ParamUtils::FindParam<StringParam>(name,
GlobalParams()->string_params,
317 return (
p !=
nullptr) ?
p->c_str() :
nullptr;
321 auto *
p = ParamUtils::FindParam<DoubleParam>(name,
GlobalParams()->double_params,
335#ifndef DISABLED_LEGACY_ENGINE
340 for (
int font_index = 1; font_index < fontinfo_size; ++font_index) {
342 fprintf(fp,
"ID=%3d: %s is_italic=%s is_bold=%s"
343 " is_fixed_pitch=%s is_serif=%s is_fraktur=%s\n",
344 font_index, font.
name,
346 font.
is_bold() ?
"true" :
"false",
369 int configs_size,
const std::vector<std::string> *vars_vec,
370 const std::vector<std::string> *vars_values,
bool set_only_non_debug_params) {
371 return Init(datapath, 0, language,
oem, configs, configs_size, vars_vec, vars_values,
372 set_only_non_debug_params,
nullptr);
379 char **configs,
int configs_size,
const std::vector<std::string> *vars_vec,
380 const std::vector<std::string> *vars_values,
bool set_only_non_debug_params,
382 if (language ==
nullptr) {
385 if (data ==
nullptr) {
388 std::string datapath = data_size == 0 ? data : language;
404 bool reset_classifier =
true;
406 reset_classifier =
false;
408 if (reader !=
nullptr) {
412 if (data_size != 0) {
416 configs_size, vars_vec, vars_values, set_only_non_debug_params,
431#ifndef DISABLED_LEGACY_ENGINE
433 if (reset_classifier) {
462 for (
int i = 0;
i < num_subs; ++
i) {
475 std::sort(langs->begin(), langs->end());
486#ifndef DISABLED_LEGACY_ENGINE
515 tesseract_->tessedit_pageseg_mode.set_value(mode);
540 int bytes_per_line,
int left,
int top,
int width,
int height) {
547 int bits_per_pixel = bytes_per_pixel == 0 ? 1 : bytes_per_pixel * 8;
548 SetImage(imagedata, bytes_per_line * 8 / bits_per_pixel, height + top, bytes_per_pixel,
555#ifndef DISABLED_LEGACY_ENGINE
577 int bytes_per_pixel,
int bytes_per_line) {
588 tprintf(
"Please call SetImage before SetSourceResolution.\n");
602 if (pixGetSpp(pix) == 4 && pixGetInputFormat(pix) == IFF_PNG) {
604 Pix *p1 = pixRemoveAlpha(pix);
606 (void)pixCopy(pix, p1);
659 int **blockids,
int **paraids) {
703 const int raw_padding, Pixa **pixa,
int **blockids,
706 if (page_it ==
nullptr) {
709 if (page_it ==
nullptr) {
714 int component_count = 0;
715 int left, top, right, bottom;
720 if (page_it->BoundingBox(level, raw_padding, &left, &top, &right, &bottom) &&
724 }
while (page_it->Next(level));
729 if (page_it->BoundingBoxInternal(level, &left, &top, &right, &bottom) &&
733 }
while (page_it->Next(level));
736 Boxa *boxa = boxaCreate(component_count);
737 if (pixa !=
nullptr) {
738 *pixa = pixaCreate(component_count);
740 if (blockids !=
nullptr) {
741 *blockids =
new int[component_count];
743 if (paraids !=
nullptr) {
744 *paraids =
new int[component_count];
749 int component_index = 0;
752 bool got_bounding_box;
754 got_bounding_box = page_it->BoundingBox(level, raw_padding, &left, &top, &right, &bottom);
756 got_bounding_box = page_it->BoundingBoxInternal(level, &left, &top, &right, &bottom);
758 if (got_bounding_box && (!text_only ||
PTIsTextType(page_it->BlockType()))) {
759 Box *lbox = boxCreate(left, top, right - left, bottom - top);
760 boxaAddBox(boxa, lbox, L_INSERT);
761 if (pixa !=
nullptr) {
764 pix = page_it->GetImage(level, raw_padding,
GetInputImage(), &left, &top);
766 pix = page_it->GetBinaryImage(level);
768 pixaAddPix(*pixa, pix, L_INSERT);
769 pixaAddBox(*pixa, lbox, L_CLONE);
771 if (paraids !=
nullptr) {
772 (*paraids)[component_index] = paraid;
773 if (page_it->IsAtFinalElement(
RIL_PARA, level)) {
777 if (blockids !=
nullptr) {
778 (*blockids)[component_index] = blockid;
779 if (page_it->IsAtFinalElement(
RIL_BLOCK, level)) {
786 }
while (page_it->Next(level));
849#ifndef DISABLED_LEGACY_ENGINE
850 if (
tesseract_->tessedit_resegment_from_line_boxes) {
852 }
else if (
tesseract_->tessedit_resegment_from_boxes) {
865 if (
tesseract_->tessedit_train_line_recognizer) {
872#ifndef DISABLED_LEGACY_ENGINE
873 if (
tesseract_->tessedit_make_boxes_from_boxes) {
881#ifndef GRAPHICS_DISABLED
889#ifndef DISABLED_LEGACY_ENGINE
890 }
else if (
tesseract_->tessedit_train_from_boxes) {
891 std::string fontname;
894 }
else if (
tesseract_->tessedit_ambigs_training) {
898 training_output_file);
899 fclose(training_output_file);
903 bool wait_for_text =
true;
905 if (!wait_for_text) {
950bool TessBaseAPI::ProcessPagesFileList(FILE *flist, std::string *buf,
const char *retry_config,
952 int tessedit_page_number) {
953 if (!flist && !buf) {
956 unsigned page = (tessedit_page_number >= 0) ? tessedit_page_number : 0;
959 std::vector<std::string> lines;
962 for (
const auto ch : *buf) {
964 lines.push_back(line);
972 lines.push_back(line);
980 for (
unsigned i = 0;
i < page;
i++) {
982 if (fgets(pagename,
sizeof(pagename), flist) ==
nullptr) {
989 if (renderer && !renderer->
BeginDocument(document_title.c_str())) {
996 if (fgets(pagename,
sizeof(pagename), flist) ==
nullptr) {
1000 if (page >= lines.size()) {
1003 snprintf(pagename,
sizeof(pagename),
"%s", lines[page].c_str());
1006 Pix *pix = pixRead(pagename);
1007 if (pix ==
nullptr) {
1008 tprintf(
"Image file %s cannot be read!\n", pagename);
1011 tprintf(
"Page %u : %s\n", page, pagename);
1012 bool r =
ProcessPage(pix, page, pagename, retry_config, timeout_millisec, renderer);
1017 if (tessedit_page_number >= 0) {
1030bool TessBaseAPI::ProcessPagesMultipageTiff(
const l_uint8 *data,
size_t size,
const char *filename,
1031 const char *retry_config,
int timeout_millisec,
1033 int tessedit_page_number) {
1035 int page = (tessedit_page_number >= 0) ? tessedit_page_number : 0;
1038 if (tessedit_page_number >= 0) {
1039 page = tessedit_page_number;
1040 pix = (data) ? pixReadMemTiff(data, size, page) : pixReadTiff(filename, page);
1042 pix = (data) ? pixReadMemFromMultipageTiff(data, size, &offset)
1043 : pixReadFromMultipageTiff(filename, &offset);
1045 if (pix ==
nullptr) {
1048 if (offset || page > 0) {
1050 tprintf(
"Page %d\n", page + 1);
1052 auto page_string = std::to_string(page);
1053 SetVariable(
"applybox_page", page_string.c_str());
1054 bool r =
ProcessPage(pix, page, filename, retry_config, timeout_millisec, renderer);
1059 if (tessedit_page_number >= 0) {
1074#ifndef DISABLED_LEGACY_ENGINE
1086static size_t WriteMemoryCallback(
void *contents,
size_t size,
size_t nmemb,
void *userp) {
1087 size = size * nmemb;
1088 auto *buf =
reinterpret_cast<std::string *
>(userp);
1089 buf->append(
reinterpret_cast<const char *
>(contents), size);
1107 bool stdInput = !strcmp(filename,
"stdin") || !strcmp(filename,
"-");
1110 if (_setmode(_fileno(stdin), _O_BINARY) == -1)
1111 tprintf(
"ERROR: cin to binary: %s", strerror(errno));
1115 if (stream_filelist) {
1116 return ProcessPagesFileList(stdin,
nullptr, retry_config, timeout_millisec, renderer,
1124 const l_uint8 *data =
nullptr;
1126 buf.assign((std::istreambuf_iterator<char>(std::cin)), (std::istreambuf_iterator<char>()));
1127 data =
reinterpret_cast<const l_uint8 *
>(buf.data());
1128 }
else if (strstr(filename,
"://") !=
nullptr) {
1131 CURL *curl = curl_easy_init();
1132 if (curl ==
nullptr) {
1133 fprintf(stderr,
"Error, curl_easy_init failed\n");
1137 auto error = [curl, &curlcode](
const char *function) {
1138 fprintf(stderr,
"Error, %s failed with error %s\n", function, curl_easy_strerror(curlcode));
1139 curl_easy_cleanup(curl);
1142 curlcode = curl_easy_setopt(curl, CURLOPT_URL, filename);
1143 if (curlcode != CURLE_OK) {
1144 return error(
"curl_easy_setopt");
1147 curlcode = curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1);
1148 if (curlcode != CURLE_OK) {
1149 return error(
"curl_easy_setopt");
1152 curlcode = curl_easy_setopt(curl, CURLOPT_MAXREDIRS, 8);
1153 if (curlcode != CURLE_OK) {
1154 return error(
"curl_easy_setopt");
1156 int timeout = curl_timeout;
1158 curlcode = curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1L);
1159 if (curlcode != CURLE_OK) {
1160 return error(
"curl_easy_setopt");
1162 curlcode = curl_easy_setopt(curl, CURLOPT_TIMEOUT, timeout);
1163 if (curlcode != CURLE_OK) {
1164 return error(
"curl_easy_setopt");
1167 curlcode = curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteMemoryCallback);
1168 if (curlcode != CURLE_OK) {
1169 return error(
"curl_easy_setopt");
1171 curlcode = curl_easy_setopt(curl, CURLOPT_WRITEDATA, &buf);
1172 if (curlcode != CURLE_OK) {
1173 return error(
"curl_easy_setopt");
1175 curlcode = curl_easy_perform(curl);
1176 if (curlcode != CURLE_OK) {
1177 return error(
"curl_easy_perform");
1179 curl_easy_cleanup(curl);
1180 data =
reinterpret_cast<const l_uint8 *
>(buf.data());
1183 fprintf(stderr,
"Error, this tesseract has no URL support\n");
1188 if (FILE *
file = fopen(filename,
"rb")) {
1191 fprintf(stderr,
"Error, cannot read input file %s: %s\n", filename, strerror(errno));
1199 (data !=
nullptr) ? findFileFormatBuffer(data, &format) : findFileFormat(filename, &format);
1202 if (r != 0 || format == IFF_UNKNOWN) {
1204 if (data !=
nullptr) {
1207 std::ifstream t(filename);
1208 std::string u((std::istreambuf_iterator<char>(t)), std::istreambuf_iterator<char>());
1211 return ProcessPagesFileList(
nullptr, &s, retry_config, timeout_millisec, renderer,
1216 bool tiff = (format == IFF_TIFF || format == IFF_TIFF_PACKBITS || format == IFF_TIFF_RLE ||
1217 format == IFF_TIFF_G3 || format == IFF_TIFF_G4 || format == IFF_TIFF_LZW ||
1218#if LIBLEPT_MAJOR_VERSION > 1 || LIBLEPT_MINOR_VERSION > 76
1219 format == IFF_TIFF_JPEG ||
1221 format == IFF_TIFF_ZIP);
1226 pix = (data !=
nullptr) ? pixReadMem(data, buf.size()) : pixRead(filename);
1227 if (pix ==
nullptr) {
1233 if (renderer && !renderer->
BeginDocument(document_title.c_str())) {
1239 r = (tiff) ? ProcessPagesMultipageTiff(data, buf.size(), filename, retry_config, timeout_millisec,
1241 :
ProcessPage(pix, 0, filename, retry_config, timeout_millisec, renderer);
1247 if (!r || (renderer && !renderer->
EndDocument())) {
1254 const char *retry_config,
int timeout_millisec,
1258 bool failed =
false;
1262 if (! std::unique_ptr<const PageIterator>(
AnalyseLayout())) {
1267 }
else if (timeout_millisec > 0) {
1270 monitor.
cancel =
nullptr;
1283 std::string output_filename =
output_file_ +
".processed";
1284 if (page_index > 0) {
1285 output_filename += std::to_string(page_index);
1287 output_filename +=
".tif";
1288 pixWrite(output_filename.c_str(), page_pix, IFF_TIFF_G4);
1289 pixDestroy(&page_pix);
1292 if (failed && retry_config !=
nullptr && retry_config[0] !=
'\0') {
1294 FILE *fp = fopen(kOldVarsFile,
"wb");
1295 if (fp ==
nullptr) {
1296 tprintf(
"Error, failed to open file \"%s\"\n", kOldVarsFile);
1309 if (renderer && !failed) {
1310 failed = !renderer->
AddImage(
this);
1368 std::string text(
"");
1374 auto block_type = it->BlockType();
1375 switch (block_type) {
1384 tprintf(
"TODO: Please report image which triggers the noise case.\n");
1390 const std::unique_ptr<const char[]> para_text(it->GetUTF8Text(
RIL_PARA));
1391 text += para_text.get();
1393 char *result =
new char[text.length() + 1];
1394 strncpy(result, text.c_str(), text.length() + 1);
1399 int left, top, right, bottom;
1400 it->
BoundingBox(level, &left, &top, &right, &bottom);
1401 text +=
"\t" + std::to_string(left);
1402 text +=
"\t" + std::to_string(top);
1403 text +=
"\t" + std::to_string(right - left);
1404 text +=
"\t" + std::to_string(bottom - top);
1417 int lcnt = 1, bcnt = 1, pcnt = 1, wcnt = 1;
1418 int page_id = page_number + 1;
1420 int page_num = page_id;
1426 std::string tsv_str;
1427 tsv_str +=
"1\t" + std::to_string(page_num);
1428 tsv_str +=
"\t" + std::to_string(block_num);
1429 tsv_str +=
"\t" + std::to_string(par_num);
1430 tsv_str +=
"\t" + std::to_string(line_num);
1431 tsv_str +=
"\t" + std::to_string(word_num);
1432 tsv_str +=
"\t" + std::to_string(
rect_left_);
1433 tsv_str +=
"\t" + std::to_string(
rect_top_);
1436 tsv_str +=
"\t-1\t\n";
1446 if (res_it->IsAtBeginningOf(
RIL_BLOCK)) {
1451 tsv_str +=
"2\t" + std::to_string(page_num);
1452 tsv_str +=
"\t" + std::to_string(block_num);
1453 tsv_str +=
"\t" + std::to_string(par_num);
1454 tsv_str +=
"\t" + std::to_string(line_num);
1455 tsv_str +=
"\t" + std::to_string(word_num);
1456 AddBoxToTSV(res_it.get(),
RIL_BLOCK, tsv_str);
1457 tsv_str +=
"\t-1\t\n";
1459 if (res_it->IsAtBeginningOf(
RIL_PARA)) {
1463 tsv_str +=
"3\t" + std::to_string(page_num);
1464 tsv_str +=
"\t" + std::to_string(block_num);
1465 tsv_str +=
"\t" + std::to_string(par_num);
1466 tsv_str +=
"\t" + std::to_string(line_num);
1467 tsv_str +=
"\t" + std::to_string(word_num);
1468 AddBoxToTSV(res_it.get(),
RIL_PARA, tsv_str);
1469 tsv_str +=
"\t-1\t\n";
1474 tsv_str +=
"4\t" + std::to_string(page_num);
1475 tsv_str +=
"\t" + std::to_string(block_num);
1476 tsv_str +=
"\t" + std::to_string(par_num);
1477 tsv_str +=
"\t" + std::to_string(line_num);
1478 tsv_str +=
"\t" + std::to_string(word_num);
1480 tsv_str +=
"\t-1\t\n";
1484 int left, top, right, bottom;
1485 res_it->BoundingBox(
RIL_WORD, &left, &top, &right, &bottom);
1487 tsv_str +=
"5\t" + std::to_string(page_num);
1488 tsv_str +=
"\t" + std::to_string(block_num);
1489 tsv_str +=
"\t" + std::to_string(par_num);
1490 tsv_str +=
"\t" + std::to_string(line_num);
1491 tsv_str +=
"\t" + std::to_string(word_num);
1492 tsv_str +=
"\t" + std::to_string(left);
1493 tsv_str +=
"\t" + std::to_string(top);
1494 tsv_str +=
"\t" + std::to_string(right - left);
1495 tsv_str +=
"\t" + std::to_string(bottom - top);
1496 tsv_str +=
"\t" + std::to_string(res_it->Confidence(
RIL_WORD));
1511 tsv_str += std::unique_ptr<const char[]>(res_it->GetUTF8Text(
RIL_SYMBOL)).get();
1518 char *ret =
new char[tsv_str.length() + 1];
1519 strcpy(ret, tsv_str.c_str());
1559 char *result =
new char[total_length];
1561 int output_length = 0;
1564 int left, top, right, bottom;
1569 for (
int i = 0; text[
i] !=
'\0'; ++
i) {
1570 if (text[
i] ==
' ') {
1574 snprintf(result + output_length, total_length - output_length,
"%s %d %d %d %d %d\n",
1576 output_length += strlen(result + output_length);
1592const int kUniChs[] = {0x20ac, 0x201c, 0x201d, 0x2018, 0x2019, 0x2022, 0x2014, 0};
1594const int kLatinChs[] = {0x00a2, 0x0022, 0x0022, 0x0027, 0x0027, 0x00b7, 0x002d, 0};
1605 bool tilde_crunch_written =
false;
1606 bool last_char_was_newline =
true;
1607 bool last_char_was_tilde =
false;
1611 char *result =
new char[total_length];
1618 (!tilde_crunch_written ||
1625 last_char_was_tilde =
false;
1627 if (!last_char_was_tilde) {
1629 last_char_was_tilde =
true;
1631 tilde_crunch_written =
true;
1632 last_char_was_newline =
false;
1637 tilde_crunch_written =
false;
1641 int length = lengths.length();
1645 if (last_char_was_tilde && word->
word->
space() == 0 && wordstr[offset] ==
' ') {
1649 offset = lengths[
i++];
1651 if (
i < length && wordstr[offset] != 0) {
1652 if (!last_char_was_newline) {
1655 last_char_was_newline =
false;
1657 for (;
i < length; offset += lengths[
i++]) {
1660 last_char_was_tilde =
true;
1666 int uni_ch =
ch.first_uni();
1667 for (
int j = 0;
kUniChs[j] != 0; ++j) {
1673 if (uni_ch <= 0xff) {
1674 *ptr++ =
static_cast<char>(uni_ch);
1675 last_char_was_tilde =
false;
1678 last_char_was_tilde =
true;
1687 tilde_crunch_written =
false;
1688 last_char_was_newline =
true;
1689 last_char_was_tilde =
false;
1697#ifndef DISABLED_LEGACY_ENGINE
1709 const char **script_name,
float *script_conf) {
1723 *orient_deg = orient_id * 90;
1729 *script_name = script;
1747 const char *script_name;
1757 std::stringstream stream;
1759 stream.imbue(std::locale::classic());
1761 stream.precision(2);
1762 stream << std::fixed <<
"Page number: " << page_number <<
"\n"
1763 <<
"Orientation in degrees: " << orient_deg <<
"\n"
1764 <<
"Rotate: " << rotate <<
"\n"
1765 <<
"Orientation confidence: " << orient_conf <<
"\n"
1766 <<
"Script: " << script_name <<
"\n"
1767 <<
"Script confidence: " << script_conf <<
"\n";
1768 const std::string &text = stream.str();
1769 char *result =
new char[text.length() + 1];
1770 strcpy(result, text.c_str());
1805 int *conf =
new int[n_word + 1];
1810 int w_conf =
static_cast<int>(100 + 5 * choice->
certainty());
1818 conf[n_word++] = w_conf;
1824#ifndef DISABLED_LEGACY_ENGINE
1838 bool success =
true;
1842 const std::unique_ptr<const char[]> text(
GetUTF8Text());
1844 tprintf(
"Trying to adapt \"%s\" to \"%s\"\n", text.get(), wordstr);
1846 if (text !=
nullptr) {
1849 if (word_res !=
nullptr) {
1854 for (t = 0; text[t] !=
'\0'; ++t) {
1855 if (text[t] ==
'\n' || text[t] ==
' ') {
1858 while (wordstr[w] ==
' ') {
1861 if (text[t] != wordstr[w]) {
1866 if (text[t] !=
'\0' || wordstr[w] !=
'\0') {
1869 std::vector<TBOX> boxes;
1874 if (pr_it.
word() ==
nullptr) {
1877 word_res = pr_it.
word();
1934#ifndef DISABLED_LEGACY_ENGINE
1975 const std::unique_ptr<const PageIterator> it(
AnalyseLayout());
1976 if (it ==
nullptr) {
1986 *out_slope =
static_cast<float>(y2 - y1) / (x2 - x1);
1987 *out_offset =
static_cast<int>(y1 - *out_slope * x1);
1990 int left, top, right, bottom;
1991 if (!it->BoundingBox(
RIL_TEXTLINE, &left, &top, &right, &bottom)) {
1999 *out_offset += bottom - std::max(left_y, right_y);
2002 *out_slope = -*out_slope;
2028 for (
int i = 0;
i < num_subs; ++
i) {
2037 tprintf(
"Please call Init before attempting to set an image.\n");
2055 if (*pix !=
nullptr) {
2064 "Warning: User defined image dpi is outside of expected range "
2074 tprintf(
"Warning: Invalid resolution %d dpi. Using %d instead.\n",
2083 Image pix_binary(*pix);
2097 auto [ok, pix_grey, pix_binary, pix_thresholds] =
thresholder_->
Threshold(
this, thresholding_method);
2119 "Estimated internal resolution %d out of range! "
2120 "Corrected to %d.\n",
2130 tprintf(
"Please call SetImage before attempting recognition.\n");
2141#ifndef DISABLED_LEGACY_ENGINE
2151#ifndef DISABLED_LEGACY_ENGINE
2157 tprintf(
"Warning: Could not set equation detector\n");
2166#ifndef DISABLED_LEGACY_ENGINE
2168 if (strcmp(
language_.c_str(),
"osd") == 0) {
2175 "Warning: Auto orientation and script detection requested,"
2176 " but data path is undefined\n");
2180 nullptr, 0,
nullptr,
nullptr,
false, &mgr) == 0) {
2185 "Warning: Auto orientation and script detection requested,"
2186 " but osd language failed to load\n");
2239 int total_length = 2;
2240 int total_blobs = 0;
2245 if (choice !=
nullptr) {
2246 total_blobs += choice->
length() + 2;
2255 if (blob_count !=
nullptr) {
2256 *blob_count = total_blobs;
2258 return total_length;
2261#ifndef DISABLED_LEGACY_ENGINE
2283 tesseract_->min_orientation_margin.set_value(margin);
2301 delete[] * block_orientation;
2302 *block_orientation =
nullptr;
2303 delete[] * vertical_writing;
2304 *vertical_writing =
nullptr;
2307 block_it.move_to_first();
2309 for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
2310 if (!block_it.data()->pdblk.poly_block()->IsText()) {
2316 tprintf(
"WARNING: Found no blocks\n");
2319 *block_orientation =
new int[num_blocks];
2320 *vertical_writing =
new bool[num_blocks];
2321 block_it.move_to_first();
2323 for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
2324 if (!block_it.data()->pdblk.poly_block()->IsText()) {
2327 FCOORD re_rotation = block_it.data()->re_rotation();
2328 float re_theta = re_rotation.
angle();
2329 FCOORD classify_rotation = block_it.data()->classify_rotation();
2330 float classify_theta = classify_rotation.
angle();
2331 double rot_theta = -(re_theta - classify_theta) * 2.0 / M_PI;
2332 if (rot_theta < 0) {
2335 int num_rotations =
static_cast<int>(rot_theta + 0.5);
2336 (*block_orientation)[
i] = num_rotations;
2339 (*vertical_writing)[
i] = classify_rotation.
y() != 0.0f;
2345 int debug_level = 0;
2352 std::vector<ParagraphModel *> models;
2381 for (ptr = text; *ptr; ptr++) {
struct TessResultRenderer TessResultRenderer
#define BOOL_VAR(name, val, comment)
#define INT_VAR(name, val, comment)
#define STRING_VAR(name, val, comment)
@ W_FUZZY_NON
fuzzy nonspace
@ SET_PARAM_CONSTRAINT_NON_INIT_ONLY
@ SET_PARAM_CONSTRAINT_DEBUG_ONLY
const char kTesseractReject
const int kBytesPerBoxFileLine
TESS_API int OrientationIdToValue(const int &id)
bool PSM_OSD_ENABLED(int pageseg_mode)
@ PSM_OSD_ONLY
Orientation and script detection only.
@ PSM_AUTO_ONLY
Automatic page segmentation, but no OSD, or OCR.
@ PSM_SINGLE_BLOCK
Assume a single uniform block of text. (Default.)
void tprintf(const char *format,...)
int IntCastRounded(double x)
int(Dict::*)(void *, const UNICHARSET &, UNICHAR_ID, bool) const DictFunc
void chomp_string(char *str)
const int kBytesPer64BitNumber
double(Dict::*)(const char *, const char *, int, const char *, int) ProbabilityInContextFunc
const int kMaxBytesPerLine
int orientation_and_script_detection(const char *filename, OSResults *, tesseract::Tesseract *)
constexpr int kMaxCredibleResolution
std::string HOcrEscape(const char *text)
T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound)
const int kBytesPerNumber
tesseract::ParamsVectors * GlobalParams()
const int kNumbersPerBlob
bool(*)(const char *filename, std::vector< char > *data) FileReader
constexpr int kMinCredibleResolution
bool PTIsTextType(PolyBlockType type)
void DetectParagraphs(int debug_level, std::vector< RowInfo > *row_infos, std::vector< PARA * > *row_owners, PARA_LIST *paragraphs, std::vector< ParagraphModel * > *models)
EquationDetect * equ_detect_
The equation detector.
const char * GetInitLanguagesAsString() const
bool ProcessPagesInternal(const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
const char * GetInputName()
std::string input_file_
Name used by training code.
virtual bool Threshold(Pix **pix)
bool ProcessPage(Pix *pix, int page_index, const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
int Recognize(ETEXT_DESC *monitor)
PAGE_RES * page_res_
The page-level data.
void SetPageSegMode(PageSegMode mode)
void GetBlockTextOrientations(int **block_orientation, bool **vertical_writing)
bool SetDebugVariable(const char *name, const char *value)
const char * GetDatapath()
bool GetVariableAsString(const char *name, std::string *val) const
void InitForAnalysePage()
Tesseract * tesseract_
The underlying data object.
bool GetIntVariable(const char *name, int *value) const
Boxa * GetTextlines(bool raw_image, int raw_padding, Pixa **pixa, int **blockids, int **paraids)
void SetRectangle(int left, int top, int width, int height)
MutableIterator * GetMutableIterator()
int IsValidWord(const char *word) const
bool SetVariable(const char *name, const char *value)
bool IsValidCharacter(const char *utf8_character) const
void DetectParagraphs(bool after_text_recognition)
static const char * Version()
Boxa * GetWords(Pixa **pixa)
std::string language_
Last initialized language.
int * AllWordConfidences()
int GetSourceYResolution()
void GetAvailableLanguagesAsVector(std::vector< std::string > *langs) const
void SetSourceResolution(int ppi)
void ReadDebugConfigFile(const char *filename)
ResultIterator * GetIterator()
bool GetTextDirection(int *out_offset, float *out_slope)
bool ProcessPages(const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
int TextLength(int *blob_count) const
std::string datapath_
Current location of tessdata.
int GetThresholdedImageScaleFactor() const
bool DetectOS(OSResults *)
PageSegMode GetPageSegMode() const
static void ClearPersistentCache()
std::vector< ParagraphModel * > * paragraph_models_
void SetDictFunc(DictFunc f)
bool recognition_done_
page_res_ contains recognition data.
const Dawg * GetDawg(int i) const
FileReader reader_
Reads files from any filesystem.
char * GetTSVText(int page_number)
void SetInputName(const char *name)
char * GetOsdText(int page_number)
int Init(const char *datapath, const char *language, OcrEngineMode mode, char **configs, int configs_size, const std::vector< std::string > *vars_vec, const std::vector< std::string > *vars_values, bool set_only_non_debug_params)
OcrEngineMode oem() const
void PrintVariables(FILE *fp) const
void GetLoadedLanguagesAsVector(std::vector< std::string > *langs) const
ImageThresholder * thresholder_
Image thresholding module.
static size_t getOpenCLDevice(void **device)
std::string output_file_
Name used by debug code.
void SetImage(const unsigned char *imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line)
Boxa * GetComponentImages(PageIteratorLevel level, bool text_only, bool raw_image, int raw_padding, Pixa **pixa, int **blockids, int **paraids)
PageIterator * AnalyseLayout()
char * GetBoxText(int page_number)
const char * GetStringVariable(const char *name) const
void ReadConfigFile(const char *filename)
bool AdaptToWordStr(PageSegMode mode, const char *wordstr)
BLOCK_LIST * block_list_
The page layout.
void set_min_orientation_margin(double margin)
Boxa * GetStrips(Pixa **pixa, int **blockids)
bool DetectOrientationScript(int *orient_deg, float *orient_conf, const char **script_name, float *script_conf)
void PrintFontsTable(FILE *fp) const
char * TesseractRect(const unsigned char *imagedata, int bytes_per_pixel, int bytes_per_line, int left, int top, int width, int height)
void SetProbabilityInContextFunc(ProbabilityInContextFunc f)
LTRResultIterator * GetLTRIterator()
Tesseract * osd_tesseract_
For orientation & script detection.
bool GetBoolVariable(const char *name, bool *value) const
void ClearAdaptiveClassifier()
bool GetDoubleVariable(const char *name, double *value) const
Pix * GetThresholdedImage()
const char * GetUnichar(int unichar_id) const
Boxa * GetConnectedComponents(Pixa **cc)
void SetInputImage(Pix *pix)
void SetOutputName(const char *name)
OcrEngineMode last_oem_requested_
Last ocr language mode requested.
Boxa * GetRegions(Pixa **pixa)
char * GetUTF8Text(PageIteratorLevel level) const
void * cancel_this
monitor-aware progress callback
void set_deadline_msecs(int32_t deadline_msecs)
CANCEL_FUNC cancel
for errcode use
TESS_API int get_best_script(int orientation_id) const
virtual bool Next(PageIteratorLevel level)
bool BoundingBox(PageIteratorLevel level, int *left, int *top, int *right, int *bottom) const
bool AddImage(TessBaseAPI *api)
bool BeginDocument(const char *title)
bool Next(PageIteratorLevel level) override
static ResultIterator * StartOfParagraph(const LTRResultIterator &resit)
void SetEquationDetect(EquationDetect *detector)
int init_tesseract(const std::string &arg0, const std::string &textbase, const std::string &language, OcrEngineMode oem, char **configs, int configs_size, const std::vector< std::string > *vars_vec, const std::vector< std::string > *vars_values, bool set_only_non_debug_params, TessdataManager *mgr)
void set_unlv_suspects(WERD_RES *word)
void set_pix_grey(Image grey_pix)
void SetBlackAndWhitelist()
bool TrainLineRecognizer(const char *input_imagename, const std::string &output_basename, BLOCK_LIST *block_list)
PAGE_RES * ApplyBoxes(const char *filename, bool find_segmentation, BLOCK_LIST *block_list)
int num_sub_langs() const
void TidyUp(PAGE_RES *page_res)
void read_config_file(const char *filename, SetParamConstraint constraint)
void ApplyBoxTraining(const std::string &fontname, PAGE_RES *page_res)
void ReSegmentByClassification(PAGE_RES *page_res)
void set_pix_thresholds(Image thresholds)
Dict & getDict() override
Image pix_original() const
Image * mutable_pix_binary()
void recog_training_segmented(const char *filename, PAGE_RES *page_res, volatile ETEXT_DESC *monitor, FILE *output_file)
Tesseract * get_sub_lang(int index) const
void set_pix_original(Image original_pix)
void PrepareForTessOCR(BLOCK_LIST *block_list, Tesseract *osd_tess, OSResults *osr)
void ResetAdaptiveClassifier()
int SegmentPage(const char *input_file, BLOCK_LIST *blocks, Tesseract *osd_tess, OSResults *osr)
void set_source_resolution(int ppi)
void CorrectClassifyWords(PAGE_RES *page_res)
void pgeditor_main(int width, int height, PAGE_RES *page_res)
void ResetDocumentDictionary()
FILE * init_recog_training(const char *filename)
PAGE_RES * SetupApplyBoxes(const std::vector< TBOX > &boxes, BLOCK_LIST *block_list)
bool recog_all_words(PAGE_RES *page_res, ETEXT_DESC *monitor, const TBOX *target_word_box, const char *word_config, int dopasses)
int GetScaledEstimatedResolution() const
virtual Image GetPixRectThresholds()
int GetSourceYResolution() const
virtual void GetImageSizes(int *left, int *top, int *width, int *height, int *imagewidth, int *imageheight)
bool IsEmpty() const
Return true if no image has been set.
void SetImage(const unsigned char *imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line)
int GetScaledYResolution() const
virtual std::tuple< bool, Image, Image, Image > Threshold(TessBaseAPI *api, ThresholdMethod method)
void SetRectangle(int left, int top, int width, int height)
virtual Image GetPixRectGrey()
int GetScaleFactor() const
virtual bool ThresholdToPix(Image *pix)
Returns false on error.
bool IsBinary() const
Returns true if the source image is binary.
void SetSourceYResolution(int ppi)
virtual void Clear()
Destroy the Pix if there is one, freeing memory.
bool is_fixed_pitch() const
WERD_CHOICE * best_choice
CRUNCH_MODE unlv_crunch_mode
void BestChoiceToCorrectText()
WERD_RES * restart_page()
float angle() const
find angle
const std::string & unichar_lengths() const
std::string & unichar_string()
bool flag(WERD_FLAGS mask) const
void set_text(const char *new_text)
std::vector< BoolParam * > bool_params
std::vector< StringParam * > string_params
std::vector< IntParam * > int_params
std::vector< DoubleParam * > double_params
static bool GetParamAsString(const char *name, const ParamsVectors *member_params, std::string *value)
static void PrintParams(FILE *fp, const ParamsVectors *member_params)
static bool SetParam(const char *name, const char *value, SetParamConstraint constraint, ParamsVectors *member_params)
bool LoadMemBuffer(const char *name, const char *data, int size)
const char * get_script_from_script_id(int id) const
const char * id_to_unichar(UNICHAR_ID id) const
bool contains_unichar(const char *const unichar_repr) const
void LearnWord(const char *fontname, WERD_RES *word)
bool WriteTRFile(const char *filename)
void InitAdaptiveClassifier(TessdataManager *mgr)
UnicityTable< FontInfo > & get_fontinfo_table()
static DawgCache * GlobalDawgCache()
int(Dict::* letter_is_okay_)(void *void_dawg_args, const UNICHARSET &unicharset, UNICHAR_ID unichar_id, bool word_end) const
int valid_word(const WERD_CHOICE &word, bool numbers_ok) const
int NumDawgs() const
Return the number of dawgs in the dawgs_ vector.
const Dawg * GetDawg(int index) const
Return i-th dawg pointer recorded in the dawgs_ vector.
double(Dict::* probability_in_context_)(const char *lang, const char *context, int context_bytes, const char *character, int character_bytes)
Probability in context function used by the ngram permuter.
WERD_CHOICE * prev_word_best_choice_