20#include <allheaders.h>
42 BLOCK_LIST *block_list) {
43 std::string lstmf_name = output_basename +
".lstmf";
45 if (applybox_page > 0) {
47 if (!images.
LoadDocument(lstmf_name.c_str(), 0, 0,
nullptr)) {
48 tprintf(
"Failed to read training data from %s!\n", lstmf_name.c_str());
52 std::vector<TBOX> boxes;
53 std::vector<std::string> texts;
55 if (!
ReadAllBoxes(applybox_page,
false, input_imagename, &boxes, &texts,
nullptr,
nullptr) ||
57 tprintf(
"Failed to read boxes from %s\n", input_imagename);
62 tprintf(
"Failed to read pages from %s\n", input_imagename);
67 tprintf(
"Failed to write training data to %s!\n", lstmf_name.c_str());
78 auto box_count = boxes.size();
83 while (end_box < texts.size() && texts[end_box] ==
"\t") {
86 for (
auto start_box = end_box; start_box < box_count; start_box = end_box) {
88 TBOX line_box = boxes[start_box];
89 std::string line_str = texts[start_box];
90 for (end_box = start_box + 1; end_box < box_count && texts[end_box] !=
"\t"; ++end_box) {
91 line_box += boxes[end_box];
92 line_str += texts[end_box];
95 BLOCK *best_block =
nullptr;
97 BLOCK_IT b_it(block_list);
98 for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
99 BLOCK *block = b_it.data();
107 if (overlap_box.
area() > best_overlap) {
108 best_overlap = overlap_box.
area();
114 if (best_block ==
nullptr) {
115 tprintf(
"No block overlapping textline: %s\n", line_str.c_str());
117 imagedata =
GetLineData(line_box, boxes, texts, start_box, end_box, *best_block);
119 if (imagedata !=
nullptr) {
124 while (end_box < texts.size() && texts[end_box] ==
"\t") {
134 const std::vector<std::string> &texts,
int start_box,
int end_box,
135 const BLOCK &block) {
138 if (image_data ==
nullptr) {
145 std::vector<TBOX> line_boxes;
146 std::vector<std::string> line_texts;
147 for (
int b = start_box; b < end_box; ++b) {
149 box.
rotate(block_rotation);
151 line_boxes.push_back(box);
152 line_texts.push_back(texts[b]);
154 std::vector<int> page_numbers(line_boxes.size(), applybox_page);
155 image_data->
AddBoxes(line_boxes, line_texts, page_numbers);
166 TBOX *revised_box)
const {
168 wbox.
pad(padding, padding);
172 int num_rotations = 0;
188 int width = pixGetWidth(pix);
189 int height = pixGetHeight(pix);
190 TBOX image_box(0, 0, width, height);
192 *revised_box &= image_box;
196 Box *clip_box = boxCreate(revised_box->
left(), height - revised_box->
top(), revised_box->
width(),
198 Image box_pix = pixClipRectangle(pix, clip_box,
nullptr);
199 boxDestroy(&clip_box);
200 if (box_pix ==
nullptr) {
203 if (num_rotations > 0) {
204 Image rot_pix = pixRotateOrth(box_pix, num_rotations);
209 int depth = pixGetDepth(box_pix);
212 grey = pixConvertTo8(box_pix,
false);
216 bool vertical_text =
false;
217 if (num_rotations > 0) {
220 revised_box->
rotate(rotation);
221 if (num_rotations != 2) {
222 vertical_text =
true;
225 return new ImageData(vertical_text, box_pix);
248 if (im_data ==
nullptr) {
252 bool do_invert = tessedit_do_invert;
253 float threshold = do_invert ? double(invert_threshold) : 0.0f;
254 lstm_recognizer_->
RecognizeLine(*im_data, threshold, classify_debug_level > 0,
256 lstm_choice_mode, lstm_choice_iterations);
269 const Dict *stopper_dict = lstm_recognizer_->
GetDict();
270 if (stopper_dict ==
nullptr) {
273 for (
unsigned w = 0; w < words->
size(); ++w) {
292 if (
getDict().stopper_debug_level >= 1) {
293 tprintf(
"Best choice certainty=%g, space=%g, scaled=%g, final=%g\n",
const float kWorstDictCertainty
@ PSM_SINGLE_WORD
Treat the image as a single word.
const float kCertaintyScale
void tprintf(const char *format,...)
bool ReadAllBoxes(int target_page, bool skip_blanks, const char *filename, std::vector< TBOX > *boxes, std::vector< std::string > *texts, std::vector< std::string > *box_texts, std::vector< int > *pages)
void LSTMRecognizeWord(const BLOCK &block, ROW *row, WERD_RES *word, PointerVector< WERD_RES > *words)
bool TrainLineRecognizer(const char *input_imagename, const std::string &output_basename, BLOCK_LIST *block_list)
ImageData * GetLineData(const TBOX &line_box, const std::vector< TBOX > &boxes, const std::vector< std::string > &texts, int start_box, int end_box, const BLOCK &block)
Dict & getDict() override
ImageData * GetRectImage(const TBOX &box, const BLOCK &block, int padding, TBOX *revised_box) const
void SearchWords(PointerVector< WERD_RES > *words)
void TrainFromBoxes(const std::vector< TBOX > &boxes, const std::vector< std::string > &texts, BLOCK_LIST *block_list, DocumentData *training_data)
void AddBoxes(const std::vector< TBOX > &boxes, const std::vector< std::string > &texts, const std::vector< int > &box_pages)
void set_page_number(int num)
TESS_API bool SaveDocument(const char *filename, FileWriter writer)
TESS_API bool LoadDocument(const char *filename, int start_page, int64_t max_memory, FileReader reader)
TESS_API void AddPageToDocument(ImageData *page)
FCOORD re_rotation() const
PDBLK pdblk
Page Description Block.
float base_line(float xpos) const
tesseract::Tesseract * tesseract
WERD_CHOICE * best_choice
std::vector< int > best_state
void SetupFake(const UNICHARSET &uch)
POLY_BLOCK * poly_block() const
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
void set_certainty(float new_val)
unsigned state(unsigned index) const
TDimension height() const
void move(const ICOORD vec)
void rotate(const FCOORD &vec)
const ICOORD & botleft() const
TBOX intersection(const TBOX &box) const
TDimension bottom() const
void pad(int xpad, int ypad)
bool major_overlap(const TBOX &box) const
void initialise(uint16_t length)
TBOX bounding_box() const
bool AcceptableResult(WERD_RES *word) const
void RecognizeLine(const ImageData &image_data, float invert_threshold, bool debug, double worst_dict_cert, const TBOX &line_box, PointerVector< WERD_RES > *words, int lstm_choice_mode=0, int lstm_choice_amount=5)
const Dict * GetDict() const
const UNICHARSET & GetUnicharset() const