All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
Advanced API

Functions

void tesseract::TessBaseAPI::SetImage (const unsigned char *imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line)
 
void tesseract::TessBaseAPI::SetImage (Pix *pix)
 
void tesseract::TessBaseAPI::SetSourceResolution (int ppi)
 
void tesseract::TessBaseAPI::SetRectangle (int left, int top, int width, int height)
 
void tesseract::TessBaseAPI::SetThresholder (ImageThresholder *thresholder)
 
Pix * tesseract::TessBaseAPI::GetThresholdedImage ()
 
Boxa * tesseract::TessBaseAPI::GetRegions (Pixa **pixa)
 
Boxa * tesseract::TessBaseAPI::GetTextlines (const bool raw_image, const int raw_padding, Pixa **pixa, int **blockids, int **paraids)
 
Boxa * tesseract::TessBaseAPI::GetTextlines (Pixa **pixa, int **blockids)
 
Boxa * tesseract::TessBaseAPI::GetStrips (Pixa **pixa, int **blockids)
 
Boxa * tesseract::TessBaseAPI::GetWords (Pixa **pixa)
 
Boxa * tesseract::TessBaseAPI::GetConnectedComponents (Pixa **cc)
 
Boxa * tesseract::TessBaseAPI::GetComponentImages (const PageIteratorLevel level, const bool text_only, const bool raw_image, const int raw_padding, Pixa **pixa, int **blockids, int **paraids)
 
Boxa * tesseract::TessBaseAPI::GetComponentImages (const PageIteratorLevel level, const bool text_only, Pixa **pixa, int **blockids)
 
int tesseract::TessBaseAPI::GetThresholdedImageScaleFactor () const
 
void tesseract::TessBaseAPI::DumpPGM (const char *filename)
 
PageIterator * tesseract::TessBaseAPI::AnalyseLayout ()
 
PageIterator * tesseract::TessBaseAPI::AnalyseLayout (bool merge_similar_words)
 
int tesseract::TessBaseAPI::Recognize (ETEXT_DESC *monitor)
 
int tesseract::TessBaseAPI::RecognizeForChopTest (ETEXT_DESC *monitor)
 
bool tesseract::TessBaseAPI::ProcessPages (const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
 
bool tesseract::TessBaseAPI::ProcessPagesInternal (const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
 
bool tesseract::TessBaseAPI::ProcessPage (Pix *pix, int page_index, const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
 
ResultIterator * tesseract::TessBaseAPI::GetIterator ()
 
MutableIterator * tesseract::TessBaseAPI::GetMutableIterator ()
 
char * tesseract::TessBaseAPI::GetUTF8Text ()
 
char * tesseract::TessBaseAPI::GetHOCRText (int page_number)
 
char * tesseract::TessBaseAPI::GetBoxText (int page_number)
 
char * tesseract::TessBaseAPI::GetUNLVText ()
 
int tesseract::TessBaseAPI::MeanTextConf ()
 
int * tesseract::TessBaseAPI::AllWordConfidences ()
 
bool tesseract::TessBaseAPI::AdaptToWordStr (PageSegMode mode, const char *wordstr)
 
void tesseract::TessBaseAPI::Clear ()
 
void tesseract::TessBaseAPI::End ()
 
static void tesseract::TessBaseAPI::ClearPersistentCache ()
 
int tesseract::TessBaseAPI::IsValidWord (const char *word)
 
bool tesseract::TessBaseAPI::IsValidCharacter (const char *utf8_character)
 
bool tesseract::TessBaseAPI::GetTextDirection (int *out_offset, float *out_slope)
 
void tesseract::TessBaseAPI::SetDictFunc (DictFunc f)
 
void tesseract::TessBaseAPI::SetProbabilityInContextFunc (ProbabilityInContextFunc f)
 
void tesseract::TessBaseAPI::SetFillLatticeFunc (FillLatticeFunc f)
 
bool tesseract::TessBaseAPI::DetectOS (OSResults *)
 
void tesseract::TessBaseAPI::GetFeaturesForBlob (TBLOB *blob, INT_FEATURE_STRUCT *int_features, int *num_features, int *feature_outline_index)
 
static ROWtesseract::TessBaseAPI::FindRowForBox (BLOCK_LIST *blocks, int left, int top, int right, int bottom)
 
void tesseract::TessBaseAPI::RunAdaptiveClassifier (TBLOB *blob, int num_max_matches, int *unichar_ids, float *ratings, int *num_matches_returned)
 
const char * tesseract::TessBaseAPI::GetUnichar (int unichar_id)
 
const Dawg * tesseract::TessBaseAPI::GetDawg (int i) const
 
int tesseract::TessBaseAPI::NumDawgs () const
 
static ROWtesseract::TessBaseAPI::MakeTessOCRRow (float baseline, float xheight, float descender, float ascender)
 
static TBLOBtesseract::TessBaseAPI::MakeTBLOB (Pix *pix)
 
static void tesseract::TessBaseAPI::NormalizeTBLOB (TBLOB *tblob, ROW *row, bool numeric_mode)
 
Tesseract *const tesseract::TessBaseAPI::tesseract () const
 
OcrEngineMode const tesseract::TessBaseAPI::oem () const
 
void tesseract::TessBaseAPI::InitTruthCallback (TruthCallback *cb)
 
CubeRecoContext * tesseract::TessBaseAPI::GetCubeRecoContext () const
 
void tesseract::TessBaseAPI::set_min_orientation_margin (double margin)
 
void tesseract::TessBaseAPI::GetBlockTextOrientations (int **block_orientation, bool **vertical_writing)
 
BLOCK_LIST * tesseract::TessBaseAPI::FindLinesCreateBlockList ()
 
static void tesseract::TessBaseAPI::DeleteBlockList (BLOCK_LIST *block_list)
 

Detailed Description

The following methods break TesseractRect into pieces, so you can get hold of the thresholded image, get the text in different formats, get bounding boxes, confidences etc.

Function Documentation

bool tesseract::TessBaseAPI::AdaptToWordStr ( PageSegMode  mode,
const char *  wordstr 
)

Applies the given word to the adaptive classifier if possible. The word must be SPACE-DELIMITED UTF-8 - l i k e t h i s , so it can tell the boundaries of the graphemes. Assumes that SetImage/SetRectangle have been used to set the image to the given word. The mode arg should be PSM_SINGLE_WORD or PSM_CIRCLE_WORD, as that will be used to control layout analysis. The currently set PageSegMode is preserved. Returns false if adaption was not possible for some reason.

Definition at line 1784 of file baseapi.cpp.

1784  {
1785  int debug = 0;
1786  GetIntVariable("applybox_debug", &debug);
1787  bool success = true;
1788  PageSegMode current_psm = GetPageSegMode();
1790  SetVariable("classify_enable_learning", "0");
1791  char* text = GetUTF8Text();
1792  if (debug) {
1793  tprintf("Trying to adapt \"%s\" to \"%s\"\n", text, wordstr);
1794  }
1795  if (text != NULL) {
1796  PAGE_RES_IT it(page_res_);
1797  WERD_RES* word_res = it.word();
1798  if (word_res != NULL) {
1799  word_res->word->set_text(wordstr);
1800  } else {
1801  success = false;
1802  }
1803  // Check to see if text matches wordstr.
1804  int w = 0;
1805  int t = 0;
1806  for (t = 0; text[t] != '\0'; ++t) {
1807  if (text[t] == '\n' || text[t] == ' ')
1808  continue;
1809  while (wordstr[w] != '\0' && wordstr[w] == ' ')
1810  ++w;
1811  if (text[t] != wordstr[w])
1812  break;
1813  ++w;
1814  }
1815  if (text[t] != '\0' || wordstr[w] != '\0') {
1816  // No match.
1817  delete page_res_;
1818  GenericVector<TBOX> boxes;
1822  PAGE_RES_IT pr_it(page_res_);
1823  if (pr_it.word() == NULL)
1824  success = false;
1825  else
1826  word_res = pr_it.word();
1827  } else {
1828  word_res->BestChoiceToCorrectText();
1829  }
1830  if (success) {
1831  tesseract_->EnableLearning = true;
1832  tesseract_->LearnWord(NULL, word_res);
1833  }
1834  delete [] text;
1835  } else {
1836  success = false;
1837  }
1838  SetPageSegMode(current_psm);
1839  return success;
1840 }
bool GetIntVariable(const char *name, int *value) const
Definition: baseapi.cpp:228
void set_text(const char *new_text)
Definition: werd.h:126
#define tprintf(...)
Definition: tprintf.h:31
bool SetVariable(const char *name, const char *value)
Definition: baseapi.cpp:216
CMD_EVENTS mode
Definition: pgedit.cpp:116
PageSegMode GetPageSegMode() const
Definition: baseapi.cpp:467
void TidyUp(PAGE_RES *page_res)
Definition: applybox.cpp:706
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:844
WERD * word
Definition: pageres.h:175
BLOCK_LIST * block_list_
The page layout.
Definition: baseapi.h:843
#define NULL
Definition: host.h:144
void LearnWord(const char *fontname, WERD_RES *word)
Definition: adaptmatch.cpp:244
PAGE_RES * SetupApplyBoxes(const GenericVector< TBOX > &boxes, BLOCK_LIST *block_list)
Definition: applybox.cpp:217
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:833
void SetPageSegMode(PageSegMode mode)
Definition: baseapi.cpp:460
void BestChoiceToCorrectText()
Definition: pageres.cpp:917
void ReSegmentByClassification(PAGE_RES *page_res)
Definition: applybox.cpp:509
int * tesseract::TessBaseAPI::AllWordConfidences ( )

Returns all word confidences (between 0 and 100) in an array, terminated by -1. The calling function must delete [] after use. The number of confidences should correspond to the number of space- delimited words in GetUTF8Text.

Returns an array of all word confidences, terminated by -1.

Definition at line 1750 of file baseapi.cpp.

1750  {
1751  if (tesseract_ == NULL ||
1752  (!recognition_done_ && Recognize(NULL) < 0))
1753  return NULL;
1754  int n_word = 0;
1755  PAGE_RES_IT res_it(page_res_);
1756  for (res_it.restart_page(); res_it.word() != NULL; res_it.forward())
1757  n_word++;
1758 
1759  int* conf = new int[n_word+1];
1760  n_word = 0;
1761  for (res_it.restart_page(); res_it.word() != NULL; res_it.forward()) {
1762  WERD_RES *word = res_it.word();
1763  WERD_CHOICE* choice = word->best_choice;
1764  int w_conf = static_cast<int>(100 + 5 * choice->certainty());
1765  // This is the eq for converting Tesseract confidence to 1..100
1766  if (w_conf < 0) w_conf = 0;
1767  if (w_conf > 100) w_conf = 100;
1768  conf[n_word++] = w_conf;
1769  }
1770  conf[n_word] = -1;
1771  return conf;
1772 }
int Recognize(ETEXT_DESC *monitor)
Definition: baseapi.cpp:830
WERD_CHOICE * best_choice
Definition: pageres.h:219
float certainty() const
Definition: ratngs.h:327
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:844
WERD * word
Definition: pageres.h:175
#define NULL
Definition: host.h:144
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:833
bool recognition_done_
page_res_ contains recognition data.
Definition: baseapi.h:851
PageIterator* tesseract::TessBaseAPI::AnalyseLayout ( )
inline

Runs page layout analysis in the mode set by SetPageSegMode. May optionally be called prior to Recognize to get access to just the page layout results. Returns an iterator to the results. If merge_similar_words is true, words are combined where suitable for use with a line recognizer. Use if you want to use AnalyseLayout to find the textlines, and then want to process textline fragments with an external line recognizer. Returns NULL on error or an empty page. The returned iterator must be deleted after use. WARNING! This class points to data held within the TessBaseAPI class, and therefore can only be used while the TessBaseAPI class still exists and has not been subjected to a call of Init, SetImage, Recognize, Clear, End DetectOS, or anything else that changes the internal PAGE_RES.

Definition at line 498 of file baseapi.h.

498  {
499  return AnalyseLayout(false);
500  }
PageIterator * AnalyseLayout()
Definition: baseapi.h:498
PageIterator * tesseract::TessBaseAPI::AnalyseLayout ( bool  merge_similar_words)

Runs page layout analysis in the mode set by SetPageSegMode. May optionally be called prior to Recognize to get access to just the page layout results. Returns an iterator to the results. If merge_similar_words is true, words are combined where suitable for use with a line recognizer. Use if you want to use AnalyseLayout to find the textlines, and then want to process textline fragments with an external line recognizer. Returns NULL on error or an empty page. The returned iterator must be deleted after use. WARNING! This class points to data held within the TessBaseAPI class, and therefore can only be used while the TessBaseAPI class still exists and has not been subjected to a call of Init, SetImage, Recognize, Clear, End DetectOS, or anything else that changes the internal PAGE_RES.

Definition at line 812 of file baseapi.cpp.

812  {
813  if (FindLines() == 0) {
814  if (block_list_->empty())
815  return NULL; // The page was empty.
816  page_res_ = new PAGE_RES(merge_similar_words, block_list_, NULL);
817  DetectParagraphs(false);
818  return new PageIterator(
822  }
823  return NULL;
824 }
TESS_LOCAL void DetectParagraphs(bool after_text_recognition)
Definition: baseapi.cpp:2407
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:841
int GetScaledYResolution() const
Definition: thresholder.h:93
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:844
TESS_LOCAL int FindLines()
Definition: baseapi.cpp:2063
BLOCK_LIST * block_list_
The page layout.
Definition: baseapi.h:843
#define NULL
Definition: host.h:144
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:833
void tesseract::TessBaseAPI::Clear ( )

Free up recognition results and any stored image data, without actually freeing any recognition data that would be time-consuming to reload. Afterwards, you must call SetImage or TesseractRect before doing any Recognize or Get* operation.

Definition at line 1848 of file baseapi.cpp.

1848  {
1849  if (thresholder_ != NULL)
1850  thresholder_->Clear();
1851  ClearResults();
1853 }
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:841
void SetInputImage(Pix *pix)
Definition: baseapi.cpp:936
virtual void Clear()
Destroy the Pix if there is one, freeing memory.
Definition: thresholder.cpp:45
#define NULL
Definition: host.h:144
void tesseract::TessBaseAPI::ClearPersistentCache ( )
static

Clear any library-level memory caches. There are a variety of expensive-to-load constant data structures (mostly language dictionaries) that are cached globally – surviving the Init() and End() of individual TessBaseAPI's. This function allows the clearing of these caches.

Definition at line 1920 of file baseapi.cpp.

1920  {
1922 }
static DawgCache * GlobalDawgCache()
Definition: dict.cpp:186
void DeleteUnusedDawgs()
Definition: dawg_cache.h:46
void tesseract::TessBaseAPI::DeleteBlockList ( BLOCK_LIST *  block_list)
static

Delete a block list. This is to keep BLOCK_LIST pointer opaque and let go of including the other headers.

Definition at line 2273 of file baseapi.cpp.

2273  {
2274  delete block_list;
2275 }
bool tesseract::TessBaseAPI::DetectOS ( OSResults osr)

Estimates the Orientation And Script of the image.

Returns
true if the image was processed successfully.

Estimates the Orientation And Script of the image. Returns true if the image was processed successfully.

Definition at line 2184 of file baseapi.cpp.

2184  {
2185  if (tesseract_ == NULL)
2186  return false;
2187  ClearResults();
2188  if (tesseract_->pix_binary() == NULL)
2190  if (input_file_ == NULL)
2191  input_file_ = new STRING(kInputFile);
2193 }
virtual TESS_LOCAL void Threshold(Pix **pix)
Definition: baseapi.cpp:2022
int orientation_and_script_detection(STRING &filename, OSResults *osr, tesseract::Tesseract *tess)
Definition: osdetect.cpp:189
const char * kInputFile
Definition: baseapi.cpp:97
Definition: strngs.h:44
#define NULL
Definition: host.h:144
Pix * pix_binary() const
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:833
STRING * input_file_
Name used by training code.
Definition: baseapi.h:845
void tesseract::TessBaseAPI::DumpPGM ( const char *  filename)

Dump the internal binary image to a PGM file.

Deprecated:
Use GetThresholdedImage and write the image using pixWrite instead if possible.

Dump the internal binary image to a PGM file.

Definition at line 732 of file baseapi.cpp.

732  {
733  if (tesseract_ == NULL)
734  return;
735  FILE *fp = fopen(filename, "wb");
736  Pix* pix = tesseract_->pix_binary();
737  int width = pixGetWidth(pix);
738  int height = pixGetHeight(pix);
739  l_uint32* data = pixGetData(pix);
740  fprintf(fp, "P5 %d %d 255\n", width, height);
741  for (int y = 0; y < height; ++y, data += pixGetWpl(pix)) {
742  for (int x = 0; x < width; ++x) {
743  uinT8 b = GET_DATA_BIT(data, x) ? 0 : 255;
744  fwrite(&b, 1, 1, fp);
745  }
746  }
747  fclose(fp);
748 }
#define NULL
Definition: host.h:144
Pix * pix_binary() const
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:833
unsigned char uinT8
Definition: host.h:99
void tesseract::TessBaseAPI::End ( )

Close down tesseract and free up all memory. End() is equivalent to destructing and reconstructing your TessBaseAPI. Once End() has been used, none of the other API functions may be used other than Init and anything declared above it in the class definition.

Definition at line 1861 of file baseapi.cpp.

1861  {
1862  if (thresholder_ != NULL) {
1863  delete thresholder_;
1864  thresholder_ = NULL;
1865  }
1866  if (page_res_ != NULL) {
1867  delete page_res_;
1868  page_res_ = NULL;
1869  }
1870  if (block_list_ != NULL) {
1871  delete block_list_;
1872  block_list_ = NULL;
1873  }
1874  if (paragraph_models_ != NULL) {
1876  delete paragraph_models_;
1878  }
1879  if (tesseract_ != NULL) {
1880  delete tesseract_;
1881  if (osd_tesseract_ == tesseract_)
1882  osd_tesseract_ = NULL;
1883  tesseract_ = NULL;
1884  }
1885  if (osd_tesseract_ != NULL) {
1886  delete osd_tesseract_;
1887  osd_tesseract_ = NULL;
1888  }
1889  if (equ_detect_ != NULL) {
1890  delete equ_detect_;
1891  equ_detect_ = NULL;
1892  }
1893  if (input_file_ != NULL) {
1894  delete input_file_;
1895  input_file_ = NULL;
1896  }
1897  if (input_image_ != NULL) {
1898  pixDestroy(&input_image_);
1899  input_image_ = NULL;
1900  }
1901  if (output_file_ != NULL) {
1902  delete output_file_;
1903  output_file_ = NULL;
1904  }
1905  if (datapath_ != NULL) {
1906  delete datapath_;
1907  datapath_ = NULL;
1908  }
1909  if (language_ != NULL) {
1910  delete language_;
1911  language_ = NULL;
1912  }
1913 }
EquationDetect * equ_detect_
The equation detector.
Definition: baseapi.h:840
GenericVector< ParagraphModel * > * paragraph_models_
Definition: baseapi.h:842
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:841
Pix * input_image_
Image used for searchable PDF.
Definition: baseapi.h:846
STRING * language_
Last initialized language.
Definition: baseapi.h:849
STRING * output_file_
Name used by debug code.
Definition: baseapi.h:847
void delete_data_pointers()
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:844
STRING * datapath_
Current location of tessdata.
Definition: baseapi.h:848
Tesseract * osd_tesseract_
For orientation & script detection.
Definition: baseapi.h:839
BLOCK_LIST * block_list_
The page layout.
Definition: baseapi.h:843
#define NULL
Definition: host.h:144
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:833
STRING * input_file_
Name used by training code.
Definition: baseapi.h:845
BLOCK_LIST * tesseract::TessBaseAPI::FindLinesCreateBlockList ( )

Find lines from the image making the BLOCK_LIST.

Definition at line 2261 of file baseapi.cpp.

2261  {
2262  FindLines();
2263  BLOCK_LIST* result = block_list_;
2264  block_list_ = NULL;
2265  return result;
2266 }
TESS_LOCAL int FindLines()
Definition: baseapi.cpp:2063
BLOCK_LIST * block_list_
The page layout.
Definition: baseapi.h:843
#define NULL
Definition: host.h:144
ROW * tesseract::TessBaseAPI::FindRowForBox ( BLOCK_LIST *  blocks,
int  left,
int  top,
int  right,
int  bottom 
)
static

This method returns the row to which a box of specified dimensions would belong. If no good match is found, it returns NULL.

Definition at line 2573 of file baseapi.cpp.

2574  {
2575  TBOX box(left, bottom, right, top);
2576  BLOCK_IT b_it(blocks);
2577  for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
2578  BLOCK* block = b_it.data();
2579  if (!box.major_overlap(block->bounding_box()))
2580  continue;
2581  ROW_IT r_it(block->row_list());
2582  for (r_it.mark_cycle_pt(); !r_it.cycled_list(); r_it.forward()) {
2583  ROW* row = r_it.data();
2584  if (!box.major_overlap(row->bounding_box()))
2585  continue;
2586  WERD_IT w_it(row->word_list());
2587  for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) {
2588  WERD* word = w_it.data();
2589  if (box.major_overlap(word->bounding_box()))
2590  return row;
2591  }
2592  }
2593  }
2594  return NULL;
2595 }
TBOX bounding_box() const
Definition: werd.cpp:160
Definition: ocrrow.h:32
Definition: ocrblock.h:30
TBOX bounding_box() const
Definition: ocrrow.h:85
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
Definition: pdblock.h:67
Definition: werd.h:60
Definition: rect.h:30
#define NULL
Definition: host.h:144
ROW_LIST * row_list()
get rows
Definition: ocrblock.h:120
WERD_LIST * word_list()
Definition: ocrrow.h:52
void tesseract::TessBaseAPI::GetBlockTextOrientations ( int **  block_orientation,
bool **  vertical_writing 
)

Return text orientation of each block as determined by an earlier run of layout analysis.

Return text orientation of each block as determined in an earlier page layout analysis operation. Orientation is returned as the number of ccw 90-degree rotations (in [0..3]) required to make the text in the block upright (readable). Note that this may not necessary be the block orientation preferred for recognition (such as the case of vertical CJK text).

Also returns whether the text in the block is believed to have vertical writing direction (when in an upright page orientation).

The returned array is of length equal to the number of text blocks, which may be less than the total number of blocks. The ordering is intended to be consistent with GetTextLines().

Definition at line 2213 of file baseapi.cpp.

2214  {
2215  delete[] *block_orientation;
2216  *block_orientation = NULL;
2217  delete[] *vertical_writing;
2218  *vertical_writing = NULL;
2219  BLOCK_IT block_it(block_list_);
2220 
2221  block_it.move_to_first();
2222  int num_blocks = 0;
2223  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
2224  if (!block_it.data()->poly_block()->IsText()) {
2225  continue;
2226  }
2227  ++num_blocks;
2228  }
2229  if (!num_blocks) {
2230  tprintf("WARNING: Found no blocks\n");
2231  return;
2232  }
2233  *block_orientation = new int[num_blocks];
2234  *vertical_writing = new bool[num_blocks];
2235  block_it.move_to_first();
2236  int i = 0;
2237  for (block_it.mark_cycle_pt(); !block_it.cycled_list();
2238  block_it.forward()) {
2239  if (!block_it.data()->poly_block()->IsText()) {
2240  continue;
2241  }
2242  FCOORD re_rotation = block_it.data()->re_rotation();
2243  float re_theta = re_rotation.angle();
2244  FCOORD classify_rotation = block_it.data()->classify_rotation();
2245  float classify_theta = classify_rotation.angle();
2246  double rot_theta = - (re_theta - classify_theta) * 2.0 / PI;
2247  if (rot_theta < 0) rot_theta += 4;
2248  int num_rotations = static_cast<int>(rot_theta + 0.5);
2249  (*block_orientation)[i] = num_rotations;
2250  // The classify_rotation is non-zero only if the text has vertical
2251  // writing direction.
2252  (*vertical_writing)[i] = classify_rotation.y() != 0.0f;
2253  ++i;
2254  }
2255 }
#define tprintf(...)
Definition: tprintf.h:31
float angle() const
find angle
Definition: points.h:249
#define PI
Definition: const.h:19
BLOCK_LIST * block_list_
The page layout.
Definition: baseapi.h:843
float y() const
Definition: points.h:212
#define NULL
Definition: host.h:144
Definition: points.h:189
char * tesseract::TessBaseAPI::GetBoxText ( int  page_number)

The recognized text is returned as a char* which is coded in the same format as a box file used in training. Returned string must be freed with the delete [] operator. Constructs coordinates in the original image - not just the rectangle. page_number is a 0-based page index that will appear in the box file.

The recognized text is returned as a char* which is coded as a UTF8 box file and must be freed with the delete [] operator. page_number is a 0-base page index that will appear in the box file.

Definition at line 1581 of file baseapi.cpp.

1581  {
1582  if (tesseract_ == NULL ||
1583  (!recognition_done_ && Recognize(NULL) < 0))
1584  return NULL;
1585  int blob_count;
1586  int utf8_length = TextLength(&blob_count);
1587  int total_length = blob_count * kBytesPerBoxFileLine + utf8_length +
1589  char* result = new char[total_length];
1590  strcpy(result, "\0");
1591  int output_length = 0;
1592  LTRResultIterator* it = GetLTRIterator();
1593  do {
1594  int left, top, right, bottom;
1595  if (it->BoundingBox(RIL_SYMBOL, &left, &top, &right, &bottom)) {
1596  char* text = it->GetUTF8Text(RIL_SYMBOL);
1597  // Tesseract uses space for recognition failure. Fix to a reject
1598  // character, kTesseractReject so we don't create illegal box files.
1599  for (int i = 0; text[i] != '\0'; ++i) {
1600  if (text[i] == ' ')
1601  text[i] = kTesseractReject;
1602  }
1603  snprintf(result + output_length, total_length - output_length,
1604  "%s %d %d %d %d %d\n",
1605  text, left, image_height_ - bottom,
1606  right, image_height_ - top, page_number);
1607  output_length += strlen(result + output_length);
1608  delete [] text;
1609  // Just in case...
1610  if (output_length + kMaxBytesPerLine > total_length)
1611  break;
1612  }
1613  } while (it->Next(RIL_SYMBOL));
1614  delete it;
1615  return result;
1616 }
const char kTesseractReject
Definition: baseapi.cpp:88
int Recognize(ETEXT_DESC *monitor)
Definition: baseapi.cpp:830
const int kBytesPerBoxFileLine
Definition: baseapi.cpp:1564
TESS_LOCAL int TextLength(int *blob_count)
Definition: baseapi.cpp:2154
const int kMaxBytesPerLine
Definition: baseapi.cpp:1573
#define NULL
Definition: host.h:144
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:833
bool recognition_done_
page_res_ contains recognition data.
Definition: baseapi.h:851
TESS_LOCAL LTRResultIterator * GetLTRIterator()
Definition: baseapi.cpp:1254
Boxa * tesseract::TessBaseAPI::GetComponentImages ( const PageIteratorLevel  level,
const bool  text_only,
const bool  raw_image,
const int  raw_padding,
Pixa **  pixa,
int **  blockids,
int **  paraids 
)

Get the given level kind of components (block, textline, word etc.) as a leptonica-style Boxa, Pixa pair, in reading order. Can be called before or after Recognize. If blockids is not NULL, the block-id of each component is also returned as an array of one element per component. delete [] after use. If blockids is not NULL, the paragraph-id of each component with its block is also returned as an array of one element per component. delete [] after use. If raw_image is true, then portions of the original image are extracted instead of the thresholded image and padded with raw_padding. If text_only is true, then only text components are returned.

Get the given level kind of components (block, textline, word etc.) as a leptonica-style Boxa, Pixa pair, in reading order. Can be called before or after Recognize. If blockids is not NULL, the block-id of each component is also returned as an array of one element per component. delete [] after use. If text_only is true, then only text components are returned.

Definition at line 642 of file baseapi.cpp.

646  {
647  PageIterator* page_it = GetIterator();
648  if (page_it == NULL)
649  page_it = AnalyseLayout();
650  if (page_it == NULL)
651  return NULL; // Failed.
652 
653  // Count the components to get a size for the arrays.
654  int component_count = 0;
655  int left, top, right, bottom;
656 
657  TessResultCallback<bool>* get_bbox = NULL;
658  if (raw_image) {
659  // Get bounding box in original raw image with padding.
661  level, raw_padding,
662  &left, &top, &right, &bottom);
663  } else {
664  // Get bounding box from binarized imaged. Note that this could be
665  // differently scaled from the original image.
666  get_bbox = NewPermanentTessCallback(page_it,
668  level, &left, &top, &right, &bottom);
669  }
670  do {
671  if (get_bbox->Run() &&
672  (!text_only || PTIsTextType(page_it->BlockType())))
673  ++component_count;
674  } while (page_it->Next(level));
675 
676  Boxa* boxa = boxaCreate(component_count);
677  if (pixa != NULL)
678  *pixa = pixaCreate(component_count);
679  if (blockids != NULL)
680  *blockids = new int[component_count];
681  if (paraids != NULL)
682  *paraids = new int[component_count];
683 
684  int blockid = 0;
685  int paraid = 0;
686  int component_index = 0;
687  page_it->Begin();
688  do {
689  if (get_bbox->Run() &&
690  (!text_only || PTIsTextType(page_it->BlockType()))) {
691  Box* lbox = boxCreate(left, top, right - left, bottom - top);
692  boxaAddBox(boxa, lbox, L_INSERT);
693  if (pixa != NULL) {
694  Pix* pix = NULL;
695  if (raw_image) {
696  pix = page_it->GetImage(level, raw_padding, input_image_,
697  &left, &top);
698  } else {
699  pix = page_it->GetBinaryImage(level);
700  }
701  pixaAddPix(*pixa, pix, L_INSERT);
702  pixaAddBox(*pixa, lbox, L_CLONE);
703  }
704  if (paraids != NULL) {
705  (*paraids)[component_index] = paraid;
706  if (page_it->IsAtFinalElement(RIL_PARA, level))
707  ++paraid;
708  }
709  if (blockids != NULL) {
710  (*blockids)[component_index] = blockid;
711  if (page_it->IsAtFinalElement(RIL_BLOCK, level)) {
712  ++blockid;
713  paraid = 0;
714  }
715  }
716  ++component_index;
717  }
718  } while (page_it->Next(level));
719  delete page_it;
720  delete get_bbox;
721  return boxa;
722 }
bool BoundingBoxInternal(PageIteratorLevel level, int *left, int *top, int *right, int *bottom) const
PageIterator * AnalyseLayout()
Definition: baseapi.h:498
ResultIterator * GetIterator()
Definition: baseapi.cpp:1271
virtual R Run()=0
bool BoundingBox(PageIteratorLevel level, int *left, int *top, int *right, int *bottom) const
Pix * input_image_
Image used for searchable PDF.
Definition: baseapi.h:846
_ConstTessMemberResultCallback_0_0< false, R, T1 >::base * NewPermanentTessCallback(const T1 *obj, R(T2::*member)() const)
Definition: tesscallback.h:116
bool PTIsTextType(PolyBlockType type)
Definition: publictypes.h:70
#define NULL
Definition: host.h:144
Boxa* tesseract::TessBaseAPI::GetComponentImages ( const PageIteratorLevel  level,
const bool  text_only,
Pixa **  pixa,
int **  blockids 
)
inline

Definition at line 462 of file baseapi.h.

464  {
465  return GetComponentImages(level, text_only, false, 0, pixa, blockids, NULL);
466  }
Boxa * GetComponentImages(const PageIteratorLevel level, const bool text_only, const bool raw_image, const int raw_padding, Pixa **pixa, int **blockids, int **paraids)
Definition: baseapi.cpp:642
#define NULL
Definition: host.h:144
Boxa * tesseract::TessBaseAPI::GetConnectedComponents ( Pixa **  pixa)

Gets the individual connected (text) components (created after pages segmentation step, but before recognition) as a leptonica-style Boxa, Pixa pair, in reading order. Can be called before or after Recognize. Note: the caller is responsible for calling boxaDestroy() on the returned Boxa array and pixaDestroy() on cc array.

Gets the individual connected (text) components (created after pages segmentation step, but before recognition) as a leptonica-style Boxa, Pixa pair, in reading order. Can be called before or after Recognize.

Definition at line 630 of file baseapi.cpp.

630  {
631  return GetComponentImages(RIL_SYMBOL, true, pixa, NULL);
632 }
Boxa * GetComponentImages(const PageIteratorLevel level, const bool text_only, const bool raw_image, const int raw_padding, Pixa **pixa, int **blockids, int **paraids)
Definition: baseapi.cpp:642
#define NULL
Definition: host.h:144
CubeRecoContext * tesseract::TessBaseAPI::GetCubeRecoContext ( ) const

Return a pointer to underlying CubeRecoContext object if present.

Definition at line 2638 of file baseapi.cpp.

2638  {
2639  return (tesseract_ == NULL) ? NULL : tesseract_->GetCubeRecoContext();
2640 }
CubeRecoContext * GetCubeRecoContext()
#define NULL
Definition: host.h:144
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:833
const Dawg * tesseract::TessBaseAPI::GetDawg ( int  i) const

Return the pointer to the i-th dawg loaded into tesseract_ object.

Definition at line 2626 of file baseapi.cpp.

2626  {
2627  if (tesseract_ == NULL || i >= NumDawgs()) return NULL;
2628  return tesseract_->getDict().GetDawg(i);
2629 }
int NumDawgs() const
Definition: baseapi.cpp:2632
Dict & getDict()
Definition: classify.h:65
const Dawg * GetDawg(int index) const
Return i-th dawg pointer recorded in the dawgs_ vector.
Definition: dict.h:406
#define NULL
Definition: host.h:144
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:833
void tesseract::TessBaseAPI::GetFeaturesForBlob ( TBLOB blob,
INT_FEATURE_STRUCT int_features,
int *  num_features,
int *  feature_outline_index 
)

This method returns the features associated with the input image.

This method returns the features associated with the input blob.

Definition at line 2545 of file baseapi.cpp.

2548  {
2549  GenericVector<int> outline_counts;
2552  INT_FX_RESULT_STRUCT fx_info;
2553  tesseract_->ExtractFeatures(*blob, false, &bl_features,
2554  &cn_features, &fx_info, &outline_counts);
2555  if (cn_features.size() == 0 || cn_features.size() > MAX_NUM_INT_FEATURES) {
2556  *num_features = 0;
2557  return; // Feature extraction failed.
2558  }
2559  *num_features = cn_features.size();
2560  memcpy(int_features, &cn_features[0], *num_features * sizeof(cn_features[0]));
2561  // TODO(rays) Pass outline_counts back and simplify the calling code.
2562  if (feature_outline_index != NULL) {
2563  int f = 0;
2564  for (int i = 0; i < outline_counts.size(); ++i) {
2565  while (f < outline_counts[i])
2566  feature_outline_index[f++] = i;
2567  }
2568  }
2569 }
int size() const
Definition: genericvector.h:72
#define MAX_NUM_INT_FEATURES
Definition: intproto.h:132
static void ExtractFeatures(const TBLOB &blob, bool nonlinear_norm, GenericVector< INT_FEATURE_STRUCT > *bl_features, GenericVector< INT_FEATURE_STRUCT > *cn_features, INT_FX_RESULT_STRUCT *results, GenericVector< int > *outline_cn_counts)
Definition: intfx.cpp:445
#define NULL
Definition: host.h:144
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:833
char * tesseract::TessBaseAPI::GetHOCRText ( int  page_number)

Make a HTML-formatted string with hOCR markup from the internal data structures. page_number is 0-based but will appear in the output as 1-based.

Make a HTML-formatted string with hOCR markup from the internal data structures. page_number is 0-based but will appear in the output as 1-based. Image name/input_file_ can be set by SetInputName before calling GetHOCRText STL removed from original patch submission and refactored by rays.

Definition at line 1399 of file baseapi.cpp.

1399  {
1400  if (tesseract_ == NULL ||
1401  (page_res_ == NULL && Recognize(NULL) < 0))
1402  return NULL;
1403 
1404  int lcnt = 1, bcnt = 1, pcnt = 1, wcnt = 1;
1405  int page_id = page_number + 1; // hOCR uses 1-based page numbers.
1406  bool font_info = false;
1407  GetBoolVariable("hocr_font_info", &font_info);
1408 
1409  STRING hocr_str("");
1410 
1411  if (input_file_ == NULL)
1412  SetInputName(NULL);
1413 
1414 #ifdef _WIN32
1415  // convert input name from ANSI encoding to utf-8
1416  int str16_len = MultiByteToWideChar(CP_ACP, 0, input_file_->string(), -1,
1417  NULL, NULL);
1418  wchar_t *uni16_str = new WCHAR[str16_len];
1419  str16_len = MultiByteToWideChar(CP_ACP, 0, input_file_->string(), -1,
1420  uni16_str, str16_len);
1421  int utf8_len = WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, NULL,
1422  NULL, NULL, NULL);
1423  char *utf8_str = new char[utf8_len];
1424  WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, utf8_str,
1425  utf8_len, NULL, NULL);
1426  *input_file_ = utf8_str;
1427  delete[] uni16_str;
1428  delete[] utf8_str;
1429 #endif
1430 
1431  hocr_str.add_str_int(" <div class='ocr_page' id='page_", page_id);
1432  hocr_str += "' title='image \"";
1433  if (input_file_) {
1434  hocr_str += HOcrEscape(input_file_->string());
1435  } else {
1436  hocr_str += "unknown";
1437  }
1438  hocr_str.add_str_int("\"; bbox ", rect_left_);
1439  hocr_str.add_str_int(" ", rect_top_);
1440  hocr_str.add_str_int(" ", rect_width_);
1441  hocr_str.add_str_int(" ", rect_height_);
1442  hocr_str.add_str_int("; ppageno ", page_number);
1443  hocr_str += "'>\n";
1444 
1445  ResultIterator *res_it = GetIterator();
1446  while (!res_it->Empty(RIL_BLOCK)) {
1447  if (res_it->Empty(RIL_WORD)) {
1448  res_it->Next(RIL_WORD);
1449  continue;
1450  }
1451 
1452  // Open any new block/paragraph/textline.
1453  if (res_it->IsAtBeginningOf(RIL_BLOCK)) {
1454  hocr_str.add_str_int(" <div class='ocr_carea' id='block_", page_id);
1455  hocr_str.add_str_int("_", bcnt);
1456  AddBoxTohOCR(res_it, RIL_BLOCK, &hocr_str);
1457  }
1458  if (res_it->IsAtBeginningOf(RIL_PARA)) {
1459  if (res_it->ParagraphIsLtr()) {
1460  hocr_str.add_str_int("\n <p class='ocr_par' dir='ltr' id='par_",
1461  page_id);
1462  hocr_str.add_str_int("_", pcnt);
1463  } else {
1464  hocr_str.add_str_int("\n <p class='ocr_par' dir='rtl' id='par_",
1465  page_id);
1466  hocr_str.add_str_int("_", pcnt);
1467  }
1468  AddBoxTohOCR(res_it, RIL_PARA, &hocr_str);
1469  }
1470  if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) {
1471  hocr_str.add_str_int("\n <span class='ocr_line' id='line_", page_id);
1472  hocr_str.add_str_int("_", lcnt);
1473  AddBoxTohOCR(res_it, RIL_TEXTLINE, &hocr_str);
1474  }
1475 
1476  // Now, process the word...
1477  hocr_str.add_str_int("<span class='ocrx_word' id='word_", page_id);
1478  hocr_str.add_str_int("_", wcnt);
1479  int left, top, right, bottom;
1480  bool bold, italic, underlined, monospace, serif, smallcaps;
1481  int pointsize, font_id;
1482  const char *font_name;
1483  res_it->BoundingBox(RIL_WORD, &left, &top, &right, &bottom);
1484  font_name = res_it->WordFontAttributes(&bold, &italic, &underlined,
1485  &monospace, &serif, &smallcaps,
1486  &pointsize, &font_id);
1487  hocr_str.add_str_int("' title='bbox ", left);
1488  hocr_str.add_str_int(" ", top);
1489  hocr_str.add_str_int(" ", right);
1490  hocr_str.add_str_int(" ", bottom);
1491  hocr_str.add_str_int("; x_wconf ", res_it->Confidence(RIL_WORD));
1492  if (font_info) {
1493  if (font_name) {
1494  hocr_str += "; x_font ";
1495  hocr_str += HOcrEscape(font_name);
1496  }
1497  hocr_str.add_str_int("; x_fsize ", pointsize);
1498  }
1499  hocr_str += "'";
1500  if (res_it->WordRecognitionLanguage()) {
1501  hocr_str += " lang='";
1502  hocr_str += res_it->WordRecognitionLanguage();
1503  hocr_str += "'";
1504  }
1505  switch (res_it->WordDirection()) {
1506  case DIR_LEFT_TO_RIGHT: hocr_str += " dir='ltr'"; break;
1507  case DIR_RIGHT_TO_LEFT: hocr_str += " dir='rtl'"; break;
1508  default: // Do nothing.
1509  break;
1510  }
1511  hocr_str += ">";
1512  bool last_word_in_line = res_it->IsAtFinalElement(RIL_TEXTLINE, RIL_WORD);
1513  bool last_word_in_para = res_it->IsAtFinalElement(RIL_PARA, RIL_WORD);
1514  bool last_word_in_block = res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD);
1515  if (bold) hocr_str += "<strong>";
1516  if (italic) hocr_str += "<em>";
1517  do {
1518  const char *grapheme = res_it->GetUTF8Text(RIL_SYMBOL);
1519  if (grapheme && grapheme[0] != 0) {
1520  hocr_str += HOcrEscape(grapheme);
1521  }
1522  delete []grapheme;
1523  res_it->Next(RIL_SYMBOL);
1524  } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD));
1525  if (italic) hocr_str += "</em>";
1526  if (bold) hocr_str += "</strong>";
1527  hocr_str += "</span> ";
1528  wcnt++;
1529  // Close any ending block/paragraph/textline.
1530  if (last_word_in_line) {
1531  hocr_str += "\n </span>";
1532  lcnt++;
1533  }
1534  if (last_word_in_para) {
1535  hocr_str += "\n </p>\n";
1536  pcnt++;
1537  }
1538  if (last_word_in_block) {
1539  hocr_str += " </div>\n";
1540  bcnt++;
1541  }
1542  }
1543  hocr_str += " </div>\n";
1544 
1545  char *ret = new char[hocr_str.length() + 1];
1546  strcpy(ret, hocr_str.string());
1547  delete res_it;
1548  return ret;
1549 }
int Recognize(ETEXT_DESC *monitor)
Definition: baseapi.cpp:830
ResultIterator * GetIterator()
Definition: baseapi.cpp:1271
bool GetBoolVariable(const char *name, bool *value) const
Definition: baseapi.cpp:236
void SetInputName(const char *name)
Definition: baseapi.cpp:201
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:844
STRING HOcrEscape(const char *text)
Definition: baseapi.cpp:2644
void add_str_int(const char *str, int number)
Definition: strngs.cpp:376
Definition: strngs.h:44
#define NULL
Definition: host.h:144
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:833
const char * string() const
Definition: strngs.cpp:193
STRING * input_file_
Name used by training code.
Definition: baseapi.h:845
ResultIterator * tesseract::TessBaseAPI::GetIterator ( )

Get a reading-order iterator to the results of LayoutAnalysis and/or Recognize. The returned iterator must be deleted after use. WARNING! This class points to data held within the TessBaseAPI class, and therefore can only be used while the TessBaseAPI class still exists and has not been subjected to a call of Init, SetImage, Recognize, Clear, End DetectOS, or anything else that changes the internal PAGE_RES.

Definition at line 1271 of file baseapi.cpp.

1271  {
1272  if (tesseract_ == NULL || page_res_ == NULL)
1273  return NULL;
1274  return ResultIterator::StartOfParagraph(LTRResultIterator(
1278 }
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:841
static ResultIterator * StartOfParagraph(const LTRResultIterator &resit)
int GetScaledYResolution() const
Definition: thresholder.h:93
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:844
#define NULL
Definition: host.h:144
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:833
MutableIterator * tesseract::TessBaseAPI::GetMutableIterator ( )

Get a mutable iterator to the results of LayoutAnalysis and/or Recognize. The returned iterator must be deleted after use. WARNING! This class points to data held within the TessBaseAPI class, and therefore can only be used while the TessBaseAPI class still exists and has not been subjected to a call of Init, SetImage, Recognize, Clear, End DetectOS, or anything else that changes the internal PAGE_RES.

Definition at line 1288 of file baseapi.cpp.

1288  {
1289  if (tesseract_ == NULL || page_res_ == NULL)
1290  return NULL;
1291  return new MutableIterator(page_res_, tesseract_,
1295 }
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:841
int GetScaledYResolution() const
Definition: thresholder.h:93
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:844
#define NULL
Definition: host.h:144
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:833
Boxa * tesseract::TessBaseAPI::GetRegions ( Pixa **  pixa)

Get the result of page layout analysis as a leptonica-style Boxa, Pixa pair, in reading order. Can be called before or after Recognize.

Definition at line 585 of file baseapi.cpp.

585  {
586  return GetComponentImages(RIL_BLOCK, false, pixa, NULL);
587 }
Boxa * GetComponentImages(const PageIteratorLevel level, const bool text_only, const bool raw_image, const int raw_padding, Pixa **pixa, int **blockids, int **paraids)
Definition: baseapi.cpp:642
#define NULL
Definition: host.h:144
Boxa * tesseract::TessBaseAPI::GetStrips ( Pixa **  pixa,
int **  blockids 
)

Get textlines and strips of image regions as a leptonica-style Boxa, Pixa pair, in reading order. Enables downstream handling of non-rectangular regions. Can be called before or after Recognize. If blockids is not NULL, the block-id of each line is also returned as an array of one element per line. delete [] after use.

Definition at line 611 of file baseapi.cpp.

611  {
612  return GetComponentImages(RIL_TEXTLINE, false, pixa, blockids);
613 }
Boxa * GetComponentImages(const PageIteratorLevel level, const bool text_only, const bool raw_image, const int raw_padding, Pixa **pixa, int **blockids, int **paraids)
Definition: baseapi.cpp:642
bool tesseract::TessBaseAPI::GetTextDirection ( int *  out_offset,
float *  out_slope 
)

Definition at line 1939 of file baseapi.cpp.

1939  {
1940  PageIterator* it = AnalyseLayout();
1941  if (it == NULL) {
1942  return false;
1943  }
1944  int x1, x2, y1, y2;
1945  it->Baseline(RIL_TEXTLINE, &x1, &y1, &x2, &y2);
1946  // Calculate offset and slope (NOTE: Kind of ugly)
1947  if (x2 <= x1) x2 = x1 + 1;
1948  // Convert the point pair to slope/offset of the baseline (in image coords.)
1949  *out_slope = static_cast<float>(y2 - y1) / (x2 - x1);
1950  *out_offset = static_cast<int>(y1 - *out_slope * x1);
1951  // Get the y-coord of the baseline at the left and right edges of the
1952  // textline's bounding box.
1953  int left, top, right, bottom;
1954  if (!it->BoundingBox(RIL_TEXTLINE, &left, &top, &right, &bottom)) {
1955  delete it;
1956  return false;
1957  }
1958  int left_y = IntCastRounded(*out_slope * left + *out_offset);
1959  int right_y = IntCastRounded(*out_slope * right + *out_offset);
1960  // Shift the baseline down so it passes through the nearest bottom-corner
1961  // of the textline's bounding box. This is the difference between the y
1962  // at the lowest (max) edge of the box and the actual box bottom.
1963  *out_offset += bottom - MAX(left_y, right_y);
1964  // Switch back to bottom-up tesseract coordinates. Requires negation of
1965  // the slope and height - offset for the offset.
1966  *out_slope = -*out_slope;
1967  *out_offset = rect_height_ - *out_offset;
1968  delete it;
1969 
1970  return true;
1971 }
#define MAX(x, y)
Definition: ndminx.h:24
PageIterator * AnalyseLayout()
Definition: baseapi.h:498
int IntCastRounded(double x)
Definition: helpers.h:172
#define NULL
Definition: host.h:144
Boxa * tesseract::TessBaseAPI::GetTextlines ( const bool  raw_image,
const int  raw_padding,
Pixa **  pixa,
int **  blockids,
int **  paraids 
)

Get the textlines as a leptonica-style Boxa, Pixa pair, in reading order. Can be called before or after Recognize. If raw_image is true, then extract from the original image instead of the thresholded image and pad by raw_padding pixels. If blockids is not NULL, the block-id of each line is also returned as an array of one element per line. delete [] after use. If paraids is not NULL, the paragraph-id of each line within its block is also returned as an array of one element per line. delete [] after use.

Get the textlines as a leptonica-style Boxa, Pixa pair, in reading order. Can be called before or after Recognize. If blockids is not NULL, the block-id of each line is also returned as an array of one element per line. delete [] after use. If paraids is not NULL, the paragraph-id of each line within its block is also returned as an array of one element per line. delete [] after use.

Definition at line 597 of file baseapi.cpp.

598  {
599  return GetComponentImages(RIL_TEXTLINE, true, raw_image, raw_padding,
600  pixa, blockids, paraids);
601 }
Boxa * GetComponentImages(const PageIteratorLevel level, const bool text_only, const bool raw_image, const int raw_padding, Pixa **pixa, int **blockids, int **paraids)
Definition: baseapi.cpp:642
Boxa* tesseract::TessBaseAPI::GetTextlines ( Pixa **  pixa,
int **  blockids 
)
inline

Definition at line 413 of file baseapi.h.

413  {
414  return GetTextlines(false, 0, pixa, blockids, NULL);
415  }
Boxa * GetTextlines(const bool raw_image, const int raw_padding, Pixa **pixa, int **blockids, int **paraids)
Definition: baseapi.cpp:597
#define NULL
Definition: host.h:144
Pix * tesseract::TessBaseAPI::GetThresholdedImage ( )

Get a copy of the internal thresholded image from Tesseract. Caller takes ownership of the Pix and must pixDestroy it. May be called any time after SetImage, or after TesseractRect.

ONLY available after SetImage if you have Leptonica installed. Get a copy of the internal thresholded image from Tesseract.

Definition at line 572 of file baseapi.cpp.

572  {
573  if (tesseract_ == NULL || thresholder_ == NULL)
574  return NULL;
575  if (tesseract_->pix_binary() == NULL)
577  return pixClone(tesseract_->pix_binary());
578 }
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:841
virtual TESS_LOCAL void Threshold(Pix **pix)
Definition: baseapi.cpp:2022
#define NULL
Definition: host.h:144
Pix * pix_binary() const
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:833
int tesseract::TessBaseAPI::GetThresholdedImageScaleFactor ( ) const

Returns the scale factor of the thresholded image that would be returned by GetThresholdedImage() and the various GetX() methods that call GetComponentImages(). Returns 0 if no thresholder has been set.

Definition at line 724 of file baseapi.cpp.

724  {
725  if (thresholder_ == NULL) {
726  return 0;
727  }
728  return thresholder_->GetScaleFactor();
729 }
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:841
#define NULL
Definition: host.h:144
const char * tesseract::TessBaseAPI::GetUnichar ( int  unichar_id)

This method returns the string form of the specified unichar.

Definition at line 2621 of file baseapi.cpp.

2621  {
2622  return tesseract_->unicharset.id_to_unichar(unichar_id);
2623 }
UNICHARSET unicharset
Definition: ccutil.h:72
const char *const id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:266
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:833
char * tesseract::TessBaseAPI::GetUNLVText ( )

The recognized text is returned as a char* which is coded as UNLV format Latin-1 with specific reject and suspect codes and must be freed with the delete [] operator.

Definition at line 1636 of file baseapi.cpp.

1636  {
1637  if (tesseract_ == NULL ||
1638  (!recognition_done_ && Recognize(NULL) < 0))
1639  return NULL;
1640  bool tilde_crunch_written = false;
1641  bool last_char_was_newline = true;
1642  bool last_char_was_tilde = false;
1643 
1644  int total_length = TextLength(NULL);
1645  PAGE_RES_IT page_res_it(page_res_);
1646  char* result = new char[total_length];
1647  char* ptr = result;
1648  for (page_res_it.restart_page(); page_res_it.word () != NULL;
1649  page_res_it.forward()) {
1650  WERD_RES *word = page_res_it.word();
1651  // Process the current word.
1652  if (word->unlv_crunch_mode != CR_NONE) {
1653  if (word->unlv_crunch_mode != CR_DELETE &&
1654  (!tilde_crunch_written ||
1655  (word->unlv_crunch_mode == CR_KEEP_SPACE &&
1656  word->word->space() > 0 &&
1657  !word->word->flag(W_FUZZY_NON) &&
1658  !word->word->flag(W_FUZZY_SP)))) {
1659  if (!word->word->flag(W_BOL) &&
1660  word->word->space() > 0 &&
1661  !word->word->flag(W_FUZZY_NON) &&
1662  !word->word->flag(W_FUZZY_SP)) {
1663  /* Write a space to separate from preceeding good text */
1664  *ptr++ = ' ';
1665  last_char_was_tilde = false;
1666  }
1667  if (!last_char_was_tilde) {
1668  // Write a reject char.
1669  last_char_was_tilde = true;
1670  *ptr++ = kUNLVReject;
1671  tilde_crunch_written = true;
1672  last_char_was_newline = false;
1673  }
1674  }
1675  } else {
1676  // NORMAL PROCESSING of non tilde crunched words.
1677  tilde_crunch_written = false;
1679  const char* wordstr = word->best_choice->unichar_string().string();
1680  const STRING& lengths = word->best_choice->unichar_lengths();
1681  int length = lengths.length();
1682  int i = 0;
1683  int offset = 0;
1684 
1685  if (last_char_was_tilde &&
1686  word->word->space() == 0 && wordstr[offset] == ' ') {
1687  // Prevent adjacent tilde across words - we know that adjacent tildes
1688  // within words have been removed.
1689  // Skip the first character.
1690  offset = lengths[i++];
1691  }
1692  if (i < length && wordstr[offset] != 0) {
1693  if (!last_char_was_newline)
1694  *ptr++ = ' ';
1695  else
1696  last_char_was_newline = false;
1697  for (; i < length; offset += lengths[i++]) {
1698  if (wordstr[offset] == ' ' ||
1699  wordstr[offset] == kTesseractReject) {
1700  *ptr++ = kUNLVReject;
1701  last_char_was_tilde = true;
1702  } else {
1703  if (word->reject_map[i].rejected())
1704  *ptr++ = kUNLVSuspect;
1705  UNICHAR ch(wordstr + offset, lengths[i]);
1706  int uni_ch = ch.first_uni();
1707  for (int j = 0; kUniChs[j] != 0; ++j) {
1708  if (kUniChs[j] == uni_ch) {
1709  uni_ch = kLatinChs[j];
1710  break;
1711  }
1712  }
1713  if (uni_ch <= 0xff) {
1714  *ptr++ = static_cast<char>(uni_ch);
1715  last_char_was_tilde = false;
1716  } else {
1717  *ptr++ = kUNLVReject;
1718  last_char_was_tilde = true;
1719  }
1720  }
1721  }
1722  }
1723  }
1724  if (word->word->flag(W_EOL) && !last_char_was_newline) {
1725  /* Add a new line output */
1726  *ptr++ = '\n';
1727  tilde_crunch_written = false;
1728  last_char_was_newline = true;
1729  last_char_was_tilde = false;
1730  }
1731  }
1732  *ptr++ = '\n';
1733  *ptr = '\0';
1734  return result;
1735 }
const char kUNLVSuspect
Definition: baseapi.cpp:92
const char kTesseractReject
Definition: baseapi.cpp:88
int Recognize(ETEXT_DESC *monitor)
Definition: baseapi.cpp:830
WERD_CHOICE * best_choice
Definition: pageres.h:219
REJMAP reject_map
Definition: pageres.h:271
const STRING & unichar_lengths() const
Definition: ratngs.h:531
inT32 length() const
Definition: strngs.cpp:188
TESS_LOCAL int TextLength(int *blob_count)
Definition: baseapi.cpp:2154
const STRING & unichar_string() const
Definition: ratngs.h:524
Definition: werd.h:35
Definition: werd.h:36
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:844
const int kLatinChs[]
Definition: baseapi.cpp:1627
const char kUNLVReject
Definition: baseapi.cpp:90
void set_unlv_suspects(WERD_RES *word)
Definition: output.cpp:307
WERD * word
Definition: pageres.h:175
uinT8 space()
Definition: werd.h:104
BOOL8 flag(WERD_FLAGS mask) const
Definition: werd.h:128
Definition: strngs.h:44
CRUNCH_MODE unlv_crunch_mode
Definition: pageres.h:294
#define NULL
Definition: host.h:144
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:833
const char * string() const
Definition: strngs.cpp:193
bool recognition_done_
page_res_ contains recognition data.
Definition: baseapi.h:851
const int kUniChs[]
Definition: baseapi.cpp:1623
char * tesseract::TessBaseAPI::GetUTF8Text ( )

The recognized text is returned as a char* which is coded as UTF8 and must be freed with the delete [] operator.

Make a text string from the internal data structures.

Definition at line 1298 of file baseapi.cpp.

1298  {
1299  if (tesseract_ == NULL ||
1300  (!recognition_done_ && Recognize(NULL) < 0))
1301  return NULL;
1302  STRING text("");
1303  ResultIterator *it = GetIterator();
1304  do {
1305  if (it->Empty(RIL_PARA)) continue;
1306  char *para_text = it->GetUTF8Text(RIL_PARA);
1307  text += para_text;
1308  delete []para_text;
1309  } while (it->Next(RIL_PARA));
1310  char* result = new char[text.length() + 1];
1311  strncpy(result, text.string(), text.length() + 1);
1312  delete it;
1313  return result;
1314 }
int Recognize(ETEXT_DESC *monitor)
Definition: baseapi.cpp:830
ResultIterator * GetIterator()
Definition: baseapi.cpp:1271
Definition: strngs.h:44
#define NULL
Definition: host.h:144
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:833
bool recognition_done_
page_res_ contains recognition data.
Definition: baseapi.h:851
Boxa * tesseract::TessBaseAPI::GetWords ( Pixa **  pixa)

Get the words as a leptonica-style Boxa, Pixa pair, in reading order. Can be called before or after Recognize.

Definition at line 620 of file baseapi.cpp.

620  {
621  return GetComponentImages(RIL_WORD, true, pixa, NULL);
622 }
Boxa * GetComponentImages(const PageIteratorLevel level, const bool text_only, const bool raw_image, const int raw_padding, Pixa **pixa, int **blockids, int **paraids)
Definition: baseapi.cpp:642
#define NULL
Definition: host.h:144
void tesseract::TessBaseAPI::InitTruthCallback ( TruthCallback cb)
inline

Definition at line 736 of file baseapi.h.

736 { truth_cb_ = cb; }
TruthCallback * truth_cb_
Definition: baseapi.h:852
bool tesseract::TessBaseAPI::IsValidCharacter ( const char *  utf8_character)

Definition at line 1932 of file baseapi.cpp.

1932  {
1933  return tesseract_->unicharset.contains_unichar(utf8_character);
1934 }
UNICHARSET unicharset
Definition: ccutil.h:72
bool contains_unichar(const char *const unichar_repr) const
Definition: unicharset.cpp:644
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:833
int tesseract::TessBaseAPI::IsValidWord ( const char *  word)

Check whether a word is valid according to Tesseract's language model

Returns
0 if the word is invalid, non-zero if valid.
Warning
temporary! This function will be removed from here and placed in a separate API at some future time.

Check whether a word is valid according to Tesseract's language model returns 0 if the word is invalid, non-zero if valid

Definition at line 1928 of file baseapi.cpp.

1928  {
1929  return tesseract_->getDict().valid_word(word);
1930 }
int valid_word(const WERD_CHOICE &word, bool numbers_ok) const
Definition: dict.cpp:705
Dict & getDict()
Definition: classify.h:65
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:833
TBLOB * tesseract::TessBaseAPI::MakeTBLOB ( Pix *  pix)
static

Returns a TBLOB corresponding to the entire input image.

Creates a TBLOB* from the whole pix.

Definition at line 2295 of file baseapi.cpp.

2295  {
2296  int width = pixGetWidth(pix);
2297  int height = pixGetHeight(pix);
2298  BLOCK block("a character", TRUE, 0, 0, 0, 0, width, height);
2299 
2300  // Create C_BLOBs from the page
2301  extract_edges(pix, &block);
2302 
2303  // Merge all C_BLOBs
2304  C_BLOB_LIST *list = block.blob_list();
2305  C_BLOB_IT c_blob_it(list);
2306  if (c_blob_it.empty())
2307  return NULL;
2308  // Move all the outlines to the first blob.
2309  C_OUTLINE_IT ol_it(c_blob_it.data()->out_list());
2310  for (c_blob_it.forward();
2311  !c_blob_it.at_first();
2312  c_blob_it.forward()) {
2313  C_BLOB *c_blob = c_blob_it.data();
2314  ol_it.add_list_after(c_blob->out_list());
2315  }
2316  // Convert the first blob to the output TBLOB.
2317  return TBLOB::PolygonalCopy(false, c_blob_it.data());
2318 }
void extract_edges(Pix *pix, BLOCK *block)
Definition: edgblob.cpp:334
static TBLOB * PolygonalCopy(bool allow_detailed_fx, C_BLOB *src)
Definition: blobs.cpp:344
C_OUTLINE_LIST * out_list()
Definition: stepblob.h:64
Definition: ocrblock.h:30
#define TRUE
Definition: capi.h:28
#define NULL
Definition: host.h:144
ROW * tesseract::TessBaseAPI::MakeTessOCRRow ( float  baseline,
float  xheight,
float  descender,
float  ascender 
)
static

Returns a ROW object created from the input row specification.

Definition at line 2278 of file baseapi.cpp.

2281  {
2282  inT32 xstarts[] = {-32000};
2283  double quad_coeffs[] = {0, 0, baseline};
2284  return new ROW(1,
2285  xstarts,
2286  quad_coeffs,
2287  xheight,
2288  ascender - (baseline + xheight),
2289  descender - baseline,
2290  0,
2291  0);
2292 }
Definition: ocrrow.h:32
int inT32
Definition: host.h:102
int tesseract::TessBaseAPI::MeanTextConf ( )

Returns the (average) confidence value between 0 and 100.

Returns the average word confidence for Tesseract page result.

Definition at line 1738 of file baseapi.cpp.

1738  {
1739  int* conf = AllWordConfidences();
1740  if (!conf) return 0;
1741  int sum = 0;
1742  int *pt = conf;
1743  while (*pt >= 0) sum += *pt++;
1744  if (pt != conf) sum /= pt - conf;
1745  delete [] conf;
1746  return sum;
1747 }
void tesseract::TessBaseAPI::NormalizeTBLOB ( TBLOB tblob,
ROW row,
bool  numeric_mode 
)
static

This method baseline normalizes a TBLOB in-place. The input row is used for normalization. The denorm is an optional parameter in which the normalization-antidote is returned.

Definition at line 2325 of file baseapi.cpp.

2325  {
2326  TBOX box = tblob->bounding_box();
2327  float x_center = (box.left() + box.right()) / 2.0f;
2328  float baseline = row->base_line(x_center);
2329  float scale = kBlnXHeight / row->x_height();
2330  tblob->Normalize(NULL, NULL, NULL, x_center, baseline, scale, scale,
2331  0.0f, static_cast<float>(kBlnBaselineOffset), false, NULL);
2332 }
const int kBlnXHeight
Definition: normalis.h:28
float x_height() const
Definition: ocrrow.h:61
void Normalize(const BLOCK *block, const FCOORD *rotation, const DENORM *predecessor, float x_origin, float y_origin, float x_scale, float y_scale, float final_xshift, float final_yshift, bool inverse, Pix *pix)
Definition: blobs.cpp:413
inT16 right() const
Definition: rect.h:75
float base_line(float xpos) const
Definition: ocrrow.h:56
inT16 left() const
Definition: rect.h:68
const int kBlnBaselineOffset
Definition: normalis.h:29
Definition: rect.h:30
#define NULL
Definition: host.h:144
TBOX bounding_box() const
Definition: blobs.cpp:482
int tesseract::TessBaseAPI::NumDawgs ( ) const

Return the number of dawgs loaded into tesseract_ object.

Definition at line 2632 of file baseapi.cpp.

2632  {
2633  return tesseract_ == NULL ? 0 : tesseract_->getDict().NumDawgs();
2634 }
Dict & getDict()
Definition: classify.h:65
const int NumDawgs() const
Return the number of dawgs in the dawgs_ vector.
Definition: dict.h:404
#define NULL
Definition: host.h:144
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:833
OcrEngineMode const tesseract::TessBaseAPI::oem ( ) const
inline

Definition at line 732 of file baseapi.h.

732  {
733  return last_oem_requested_;
734  }
OcrEngineMode last_oem_requested_
Last ocr language mode requested.
Definition: baseapi.h:850
bool tesseract::TessBaseAPI::ProcessPage ( Pix *  pix,
int  page_index,
const char *  filename,
const char *  retry_config,
int  timeout_millisec,
TessResultRenderer renderer 
)

Turn a single image into symbolic text.

The pix is the image processed. filename and page_index are metadata used by side-effect processes, such as reading a box file or formatting as hOCR.

See ProcessPages for desciptions of other parameters.

Definition at line 1194 of file baseapi.cpp.

1196  {
1197  PERF_COUNT_START("ProcessPage")
1199  SetImage(pix);
1200  bool failed = false;
1201  if (timeout_millisec > 0) {
1202  // Running with a timeout.
1203  ETEXT_DESC monitor;
1204  monitor.cancel = NULL;
1205  monitor.cancel_this = NULL;
1206  monitor.set_deadline_msecs(timeout_millisec);
1207  // Now run the main recognition.
1208  failed = Recognize(&monitor) < 0;
1211  // Disabled character recognition.
1212  PageIterator* it = AnalyseLayout();
1213  if (it == NULL) {
1214  failed = true;
1215  } else {
1216  delete it;
1218  return true;
1219  }
1220  } else {
1221  // Normal layout and character recognition with no timeout.
1222  failed = Recognize(NULL) < 0;
1223  }
1225 #ifndef ANDROID_BUILD
1226  Pix* page_pix = GetThresholdedImage();
1227  pixWrite("tessinput.tif", page_pix, IFF_TIFF_G4);
1228 #endif
1229  }
1230  if (failed && retry_config != NULL && retry_config[0] != '\0') {
1231  // Save current config variables before switching modes.
1232  FILE* fp = fopen(kOldVarsFile, "wb");
1233  PrintVariables(fp);
1234  fclose(fp);
1235  // Switch to alternate mode for retry.
1236  ReadConfigFile(retry_config);
1237  SetImage(pix);
1238  Recognize(NULL);
1239  // Restore saved config variables.
1241  }
1242 
1243  if (renderer && !failed) {
1244  failed = !renderer->AddImage(this);
1245  }
1247  return !failed;
1248 }
Pix * GetThresholdedImage()
Definition: baseapi.cpp:572
int Recognize(ETEXT_DESC *monitor)
Definition: baseapi.cpp:830
void set_deadline_msecs(inT32 deadline_msecs)
Definition: ocrclass.h:132
PageIterator * AnalyseLayout()
Definition: baseapi.h:498
void * cancel_this
Definition: ocrclass.h:120
void ReadConfigFile(const char *filename)
Definition: baseapi.cpp:446
CANCEL_FUNC cancel
Definition: ocrclass.h:119
void SetInputName(const char *name)
Definition: baseapi.cpp:201
Orientation and script detection only.
Definition: publictypes.h:152
Automatic page segmentation, but no OSD, or OCR.
Definition: publictypes.h:155
#define PERF_COUNT_START(FUNCT_NAME)
#define PERF_COUNT_END
const char * kOldVarsFile
Definition: baseapi.cpp:101
#define NULL
Definition: host.h:144
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:833
void PrintVariables(FILE *fp) const
Definition: baseapi.cpp:264
void SetImage(const unsigned char *imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line)
Definition: baseapi.cpp:525
bool tesseract::TessBaseAPI::ProcessPages ( const char *  filename,
const char *  retry_config,
int  timeout_millisec,
TessResultRenderer renderer 
)

Turns images into symbolic text.

filename can point to a single image, a multi-page TIFF, or a plain text list of image filenames.

retry_config is useful for debugging. If not NULL, you can fall back to an alternate configuration if a page fails for some reason.

timeout_millisec terminates processing if any single page takes too long. Set to 0 for unlimited time.

renderer is responible for creating the output. For example, use the TessTextRenderer if you want plaintext output, or the TessPDFRender to produce searchable PDF.

If tessedit_page_number is non-negative, will only process that single page. Works for multi-page tiff file, or filelist.

Returns true if successful, false on error.

Definition at line 1070 of file baseapi.cpp.

1072  {
1073  bool result =
1074  ProcessPagesInternal(filename, retry_config, timeout_millisec, renderer);
1075  if (result) {
1078  tprintf("Write of TR file failed: %s\n", output_file_->string());
1079  return false;
1080  }
1081  }
1082  return result;
1083 }
#define tprintf(...)
Definition: tprintf.h:31
STRING * output_file_
Name used by debug code.
Definition: baseapi.h:847
bool WriteTRFile(const STRING &filename)
Definition: blobclass.cpp:97
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:833
const char * string() const
Definition: strngs.cpp:193
bool ProcessPagesInternal(const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
Definition: baseapi.cpp:1097
bool tesseract::TessBaseAPI::ProcessPagesInternal ( const char *  filename,
const char *  retry_config,
int  timeout_millisec,
TessResultRenderer renderer 
)

Definition at line 1097 of file baseapi.cpp.

1100  {
1101 #ifndef ANDROID_BUILD
1102  PERF_COUNT_START("ProcessPages")
1103  bool stdInput = !strcmp(filename, "stdin") || !strcmp(filename, "-");
1104  if (stdInput) {
1105 #ifdef WIN32
1106  if (_setmode(_fileno(stdin), _O_BINARY) == -1)
1107  tprintf("ERROR: cin to binary: %s", strerror(errno));
1108 #endif // WIN32
1109  }
1110 
1111  if (stream_filelist) {
1112  return ProcessPagesFileList(stdin, NULL, retry_config,
1113  timeout_millisec, renderer,
1115  }
1116 
1117  // At this point we are officially in autodection territory.
1118  // That means we are going to buffer stdin so that it is
1119  // seekable. To keep code simple we will also buffer data
1120  // coming from a file.
1121  std::string buf;
1122  if (stdInput) {
1123  buf.assign((std::istreambuf_iterator<char>(std::cin)),
1124  (std::istreambuf_iterator<char>()));
1125  } else {
1126  std::ifstream ifs(filename, std::ios::binary);
1127  if (ifs) {
1128  buf.assign((std::istreambuf_iterator<char>(ifs)),
1129  (std::istreambuf_iterator<char>()));
1130  } else {
1131  tprintf("ERROR: Can not open input file %s\n", filename);
1132  return false;
1133  }
1134  }
1135 
1136  // Here is our autodetection
1137  int format;
1138  const l_uint8 * data = reinterpret_cast<const l_uint8 *>(buf.c_str());
1139  findFileFormatBuffer(data, &format);
1140 
1141  // Maybe we have a filelist
1142  if (format == IFF_UNKNOWN) {
1143  STRING s(buf.c_str());
1144  return ProcessPagesFileList(NULL, &s, retry_config,
1145  timeout_millisec, renderer,
1147  }
1148 
1149  // Maybe we have a TIFF which is potentially multipage
1150  bool tiff = (format == IFF_TIFF || format == IFF_TIFF_PACKBITS ||
1151  format == IFF_TIFF_RLE || format == IFF_TIFF_G3 ||
1152  format == IFF_TIFF_G4 || format == IFF_TIFF_LZW ||
1153  format == IFF_TIFF_ZIP);
1154 
1155  // Fail early if we can, before producing any output
1156  Pix *pix = NULL;
1157  if (!tiff) {
1158  pix = pixReadMem(data, buf.size());
1159  if (pix == NULL) {
1160  return false;
1161  }
1162  }
1163 
1164  // Begin the output
1165  const char* kUnknownTitle = "";
1166  if (renderer && !renderer->BeginDocument(kUnknownTitle)) {
1167  pixDestroy(&pix);
1168  return false;
1169  }
1170 
1171  // Produce output
1172  bool r = false;
1173  if (tiff) {
1174  r = ProcessPagesMultipageTiff(data, buf.size(), filename, retry_config,
1175  timeout_millisec, renderer,
1177  } else {
1178  r = ProcessPage(pix, 0, filename, retry_config,
1179  timeout_millisec, renderer);
1180  pixDestroy(&pix);
1181  }
1182 
1183  // End the output
1184  if (!r || (renderer && !renderer->EndDocument())) {
1185  return false;
1186  }
1188  return true;
1189 #else
1190  return false;
1191 #endif
1192 }
bool stream_filelist
Definition: baseapi.cpp:81
#define tprintf(...)
Definition: tprintf.h:31
bool ProcessPage(Pix *pix, int page_index, const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
Definition: baseapi.cpp:1194
#define PERF_COUNT_START(FUNCT_NAME)
#define PERF_COUNT_END
Definition: strngs.h:44
#define NULL
Definition: host.h:144
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:833
int tesseract::TessBaseAPI::Recognize ( ETEXT_DESC monitor)

Recognize the image from SetAndThresholdImage, generating Tesseract internal structures. Returns 0 on success. Optional. The Get*Text functions below will call Recognize if needed. After Recognize, the output is kept internally until the next SetImage.

Recognize the tesseract global image and return the result as Tesseract internal structures.

Definition at line 830 of file baseapi.cpp.

830  {
831  if (tesseract_ == NULL)
832  return -1;
833  if (FindLines() != 0)
834  return -1;
835  if (page_res_ != NULL)
836  delete page_res_;
837  if (block_list_->empty()) {
838  page_res_ = new PAGE_RES(false, block_list_,
840  return 0; // Empty page.
841  }
842 
844  recognition_done_ = true;
849  } else {
850  // TODO(rays) LSTM here.
851  page_res_ = new PAGE_RES(false,
853  }
856  return 0;
857  }
858 
859  if (truth_cb_ != NULL) {
860  tesseract_->wordrec_run_blamer.set_value(true);
861  PageIterator *page_it = new PageIterator(
866  image_height_, page_it, this->tesseract()->pix_grey());
867  delete page_it;
868  }
869 
870  int result = 0;
872  #ifndef GRAPHICS_DISABLED
874  #endif // GRAPHICS_DISABLED
875  // The page_res is invalid after an interactive session, so cleanup
876  // in a way that lets us continue to the next page without crashing.
877  delete page_res_;
878  page_res_ = NULL;
879  return -1;
881  STRING fontname;
882  ExtractFontName(*output_file_, &fontname);
884  } else if (tesseract_->tessedit_ambigs_training) {
885  FILE *training_output_file = tesseract_->init_recog_training(*input_file_);
886  // OCR the page segmented into words by tesseract.
888  *input_file_, page_res_, monitor, training_output_file);
889  fclose(training_output_file);
890  } else {
891  // Now run the main recognition.
892  bool wait_for_text = true;
893  GetBoolVariable("paragraph_text_based", &wait_for_text);
894  if (!wait_for_text) DetectParagraphs(false);
895  if (tesseract_->recog_all_words(page_res_, monitor, NULL, NULL, 0)) {
896  if (wait_for_text) DetectParagraphs(true);
897  } else {
898  result = -1;
899  }
900  }
901  return result;
902 }
void CorrectClassifyWords(PAGE_RES *page_res)
Definition: applybox.cpp:772
void ExtractFontName(const STRING &filename, STRING *fontname)
Definition: blobclass.cpp:46
void recog_training_segmented(const STRING &fname, PAGE_RES *page_res, volatile ETEXT_DESC *monitor, FILE *output_file)
TruthCallback * truth_cb_
Definition: baseapi.h:852
TESS_LOCAL void DetectParagraphs(bool after_text_recognition)
Definition: baseapi.cpp:2407
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:841
FILE * init_recog_training(const STRING &fname)
PAGE_RES * ApplyBoxes(const STRING &fname, bool find_segmentation, BLOCK_LIST *block_list)
Definition: applybox.cpp:117
bool recog_all_words(PAGE_RES *page_res, ETEXT_DESC *monitor, const TBOX *target_word_box, const char *word_config, int dopasses)
Definition: control.cpp:287
int GetScaledYResolution() const
Definition: thresholder.h:93
WERD_CHOICE * prev_word_best_choice_
Definition: wordrec.h:416
STRING * output_file_
Name used by debug code.
Definition: baseapi.h:847
bool GetBoolVariable(const char *name, bool *value) const
Definition: baseapi.cpp:236
bool wordrec_run_blamer
Definition: wordrec.h:168
Tesseract *const tesseract() const
Definition: baseapi.h:728
bool tessedit_resegment_from_line_boxes
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:844
void pgeditor_main(int width, int height, PAGE_RES *page_res)
Definition: pgedit.cpp:337
Pix * pix_grey() const
Dict & getDict()
Definition: classify.h:65
TESS_LOCAL int FindLines()
Definition: baseapi.cpp:2063
virtual void Run(A1, A2, A3, A4)=0
void ApplyBoxTraining(const STRING &fontname, PAGE_RES *page_res)
Definition: applybox.cpp:796
const UNICHARSET & getUnicharset() const
Definition: dict.h:96
BLOCK_LIST * block_list_
The page layout.
Definition: baseapi.h:843
Definition: strngs.h:44
#define NULL
Definition: host.h:144
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:833
STRING * input_file_
Name used by training code.
Definition: baseapi.h:845
bool recognition_done_
page_res_ contains recognition data.
Definition: baseapi.h:851
int tesseract::TessBaseAPI::RecognizeForChopTest ( ETEXT_DESC monitor)

Methods to retrieve information after SetAndThresholdImage(), Recognize() or TesseractRect(). (Recognize is called implicitly if needed.)Variant on Recognize used for testing chopper.

Tests the chopper by exhaustively running chop_one_blob.

Definition at line 905 of file baseapi.cpp.

905  {
906  if (tesseract_ == NULL)
907  return -1;
908  if (thresholder_ == NULL || thresholder_->IsEmpty()) {
909  tprintf("Please call SetImage before attempting recognition.");
910  return -1;
911  }
912  if (page_res_ != NULL)
913  ClearResults();
914  if (FindLines() != 0)
915  return -1;
916  // Additional conditions under which chopper test cannot be run
917  if (tesseract_->interactive_display_mode) return -1;
918 
919  recognition_done_ = true;
920 
921  page_res_ = new PAGE_RES(false, block_list_,
923 
924  PAGE_RES_IT page_res_it(page_res_);
925 
926  while (page_res_it.word() != NULL) {
927  WERD_RES *word_res = page_res_it.word();
928  GenericVector<TBOX> boxes;
929  tesseract_->MaximallyChopWord(boxes, page_res_it.block()->block,
930  page_res_it.row()->row, word_res);
931  page_res_it.forward();
932  }
933  return 0;
934 }
#define tprintf(...)
Definition: tprintf.h:31
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:841
WERD_CHOICE * prev_word_best_choice_
Definition: wordrec.h:416
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:844
TESS_LOCAL int FindLines()
Definition: baseapi.cpp:2063
WERD * word
Definition: pageres.h:175
bool IsEmpty() const
Return true if no image has been set.
Definition: thresholder.cpp:50
BLOCK_LIST * block_list_
The page layout.
Definition: baseapi.h:843
void MaximallyChopWord(const GenericVector< TBOX > &boxes, BLOCK *block, ROW *row, WERD_RES *word_res)
Definition: applybox.cpp:253
#define NULL
Definition: host.h:144
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:833
bool recognition_done_
page_res_ contains recognition data.
Definition: baseapi.h:851
void tesseract::TessBaseAPI::RunAdaptiveClassifier ( TBLOB blob,
int  num_max_matches,
int *  unichar_ids,
float *  ratings,
int *  num_matches_returned 
)

Method to run adaptive classifier on a blob. It returns at max num_max_matches results.

Method to run adaptive classifier on a blob.

Definition at line 2598 of file baseapi.cpp.

2602  {
2603  BLOB_CHOICE_LIST* choices = new BLOB_CHOICE_LIST;
2604  tesseract_->AdaptiveClassifier(blob, choices);
2605  BLOB_CHOICE_IT choices_it(choices);
2606  int& index = *num_matches_returned;
2607  index = 0;
2608  for (choices_it.mark_cycle_pt();
2609  !choices_it.cycled_list() && index < num_max_matches;
2610  choices_it.forward()) {
2611  BLOB_CHOICE* choice = choices_it.data();
2612  unichar_ids[index] = choice->unichar_id();
2613  ratings[index] = choice->rating();
2614  ++index;
2615  }
2616  *num_matches_returned = index;
2617  delete choices;
2618 }
float rating() const
Definition: ratngs.h:79
void AdaptiveClassifier(TBLOB *Blob, BLOB_CHOICE_LIST *Choices)
Definition: adaptmatch.cpp:185
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:833
UNICHAR_ID unichar_id() const
Definition: ratngs.h:76
void tesseract::TessBaseAPI::set_min_orientation_margin ( double  margin)

Definition at line 2195 of file baseapi.cpp.

2195  {
2196  tesseract_->min_orientation_margin.set_value(margin);
2197 }
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:833
void tesseract::TessBaseAPI::SetDictFunc ( DictFunc  f)

Sets Dict::letter_is_okay_ function to point to the given function.

Definition at line 1974 of file baseapi.cpp.

1974  {
1975  if (tesseract_ != NULL) {
1977  }
1978 }
Dict & getDict()
Definition: classify.h:65
int(Dict::* letter_is_okay_)(void *void_dawg_args, UNICHAR_ID unichar_id, bool word_end) const
Definition: dict.h:347
#define NULL
Definition: host.h:144
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:833
void tesseract::TessBaseAPI::SetFillLatticeFunc ( FillLatticeFunc  f)

Sets Wordrec::fill_lattice_ function to point to the given function.

Definition at line 2000 of file baseapi.cpp.

2000  {
2001  if (tesseract_ != NULL) tesseract_->fill_lattice_ = f;
2002 }
void(Wordrec::* fill_lattice_)(const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle)
Definition: wordrec.h:420
#define NULL
Definition: host.h:144
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:833
void tesseract::TessBaseAPI::SetImage ( const unsigned char *  imagedata,
int  width,
int  height,
int  bytes_per_pixel,
int  bytes_per_line 
)

Provide an image for Tesseract to recognize. Format is as TesseractRect above. Does not copy the image buffer, or take ownership. The source image may be destroyed after Recognize is called, either explicitly or implicitly via one of the Get*Text functions. SetImage clears all recognition results, and sets the rectangle to the full image, so it may be followed immediately by a GetUTF8Text, and it will automatically perform recognition.

Definition at line 525 of file baseapi.cpp.

527  {
528  if (InternalSetImage())
529  thresholder_->SetImage(imagedata, width, height,
530  bytes_per_pixel, bytes_per_line);
531 }
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:841
void SetImage(const unsigned char *imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line)
Definition: thresholder.cpp:62
TESS_LOCAL bool InternalSetImage()
Definition: baseapi.cpp:2005
void tesseract::TessBaseAPI::SetImage ( Pix *  pix)

Provide an image for Tesseract to recognize. As with SetImage above, Tesseract doesn't take a copy or ownership or pixDestroy the image, so it must persist until after Recognize. Pix vs raw, which to use? Use Pix where possible. A future version of Tesseract may choose to use Pix as its internal representation and discard IMAGE altogether. Because of that, an implementation that sources and targets Pix may end up with less copies than an implementation that does not.

Definition at line 550 of file baseapi.cpp.

550  {
551  if (InternalSetImage())
552  thresholder_->SetImage(pix);
553  SetInputImage(pix);
554 }
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:841
void SetImage(const unsigned char *imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line)
Definition: thresholder.cpp:62
TESS_LOCAL bool InternalSetImage()
Definition: baseapi.cpp:2005
void SetInputImage(Pix *pix)
Definition: baseapi.cpp:936
void tesseract::TessBaseAPI::SetProbabilityInContextFunc ( ProbabilityInContextFunc  f)

Sets Dict::probability_in_context_ function to point to the given function.

Sets Dict::probability_in_context_ function to point to the given function.

Parameters
fA single function that returns the probability of the current "character" (in general a utf-8 string), given the context of a previous utf-8 string.

Definition at line 1988 of file baseapi.cpp.

1988  {
1989  if (tesseract_ != NULL) {
1991  // Set it for the sublangs too.
1992  int num_subs = tesseract_->num_sub_langs();
1993  for (int i = 0; i < num_subs; ++i) {
1995  }
1996  }
1997 }
double(Dict::* probability_in_context_)(const char *lang, const char *context, int context_bytes, const char *character, int character_bytes)
Probability in context function used by the ngram permuter.
Definition: dict.h:357
Tesseract * get_sub_lang(int index) const
int num_sub_langs() const
Dict & getDict()
Definition: classify.h:65
#define NULL
Definition: host.h:144
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:833
void tesseract::TessBaseAPI::SetRectangle ( int  left,
int  top,
int  width,
int  height 
)

Restrict recognition to a sub-rectangle of the image. Call after SetImage. Each SetRectangle clears the recogntion results so multiple rectangles can be recognized with the same image.

Definition at line 561 of file baseapi.cpp.

561  {
562  if (thresholder_ == NULL)
563  return;
564  thresholder_->SetRectangle(left, top, width, height);
565  ClearResults();
566 }
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:841
void SetRectangle(int left, int top, int width, int height)
#define NULL
Definition: host.h:144
void tesseract::TessBaseAPI::SetSourceResolution ( int  ppi)

Set the resolution of the source image in pixels per inch so font size information can be calculated in results. Call this after SetImage().

Definition at line 533 of file baseapi.cpp.

533  {
534  if (thresholder_)
536  else
537  tprintf("Please call SetImage before SetSourceResolution.\n");
538 }
#define tprintf(...)
Definition: tprintf.h:31
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:841
void SetSourceYResolution(int ppi)
Definition: thresholder.h:86
void tesseract::TessBaseAPI::SetThresholder ( ImageThresholder thresholder)
inline

In extreme cases only, usually with a subclass of Thresholder, it is possible to provide a different Thresholder. The Thresholder may be preloaded with an image, settings etc, or they may be set after. Note that Tesseract takes ownership of the Thresholder and will delete it when it it is replaced or the API is destructed.

Definition at line 376 of file baseapi.h.

376  {
377  if (thresholder_ != NULL)
378  delete thresholder_;
379  thresholder_ = thresholder;
380  ClearResults();
381  }
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:841
#define NULL
Definition: host.h:144
Tesseract* const tesseract::TessBaseAPI::tesseract ( ) const
inline

Definition at line 728 of file baseapi.h.

728  {
729  return tesseract_;
730  }
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:833