tesseract v5.3.3.20231005
lstmboxrenderer.cpp
Go to the documentation of this file.
1/**********************************************************************
2 * File: lstmboxrenderer.cpp
3 * Description: Renderer for creating box file for LSTM training.
4 * based on the tsv renderer.
5 *
6 * (C) Copyright 2019, Google Inc.
7 ** Licensed under the Apache License, Version 2.0 (the "License");
8 ** you may not use this file except in compliance with the License.
9 ** You may obtain a copy of the License at
10 ** http://www.apache.org/licenses/LICENSE-2.0
11 ** Unless required by applicable law or agreed to in writing, software
12 ** distributed under the License is distributed on an "AS IS" BASIS,
13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 ** See the License for the specific language governing permissions and
15 ** limitations under the License.
16 *
17 **********************************************************************/
18
19#include <tesseract/baseapi.h> // for TessBaseAPI
20#include <tesseract/renderer.h>
21#include "tesseractclass.h" // for Tesseract
22
23namespace tesseract {
24
30static void AddBoxToLSTM(int right, int bottom, int top, int image_height, int page_num,
31 std::string &text) {
32 text += " " + std::to_string(image_height - bottom);
33 text += " " + std::to_string(right + 5);
34 text += " " + std::to_string(image_height - top);
35 text += " " + std::to_string(page_num);
36}
37
38char *TessBaseAPI::GetLSTMBoxText(int page_number = 0) {
39 if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(nullptr) < 0)) {
40 return nullptr;
41 }
42
43 std::string lstm_box_str;
44 bool first_word = true;
45 int left = 0, top = 0, right = 0, bottom = 0;
46
48 while (!res_it->Empty(RIL_BLOCK)) {
49 if (res_it->Empty(RIL_SYMBOL)) {
50 res_it->Next(RIL_SYMBOL);
51 continue;
52 }
53 if (!first_word) {
54 if (!(res_it->IsAtBeginningOf(RIL_TEXTLINE))) {
55 if (res_it->IsAtBeginningOf(RIL_WORD)) {
56 lstm_box_str += " " + std::to_string(left);
57 AddBoxToLSTM(right, bottom, top, image_height_, page_number, lstm_box_str);
58 lstm_box_str += "\n"; // end of row for word
59 } // word
60 } else {
61 if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) {
62 lstm_box_str += "\t " + std::to_string(left);
63 AddBoxToLSTM(right, bottom, top, image_height_, page_number, lstm_box_str);
64 lstm_box_str += "\n"; // end of row for line
65 } // line
66 }
67 } // not first word
68 first_word = false;
69 // Use bounding box for whole line for everything
70 res_it->BoundingBox(RIL_TEXTLINE, &left, &top, &right, &bottom);
71 do {
72 lstm_box_str += std::unique_ptr<const char[]>(res_it->GetUTF8Text(RIL_SYMBOL)).get();
73 res_it->Next(RIL_SYMBOL);
74 } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_SYMBOL));
75 lstm_box_str += " " + std::to_string(left);
76 AddBoxToLSTM(right, bottom, top, image_height_, page_number, lstm_box_str);
77 lstm_box_str += "\n"; // end of row for symbol
78 }
79 if (!first_word) { // if first_word is true => empty page
80 lstm_box_str += "\t " + std::to_string(left);
81 AddBoxToLSTM(right, bottom, top, image_height_, page_number, lstm_box_str);
82 lstm_box_str += "\n"; // end of PAGE
83 }
84 char *ret = new char[lstm_box_str.length() + 1];
85 strcpy(ret, lstm_box_str.c_str());
86 delete res_it;
87 return ret;
88}
89
90/**********************************************************************
91 * LSTMBox Renderer interface implementation
92 **********************************************************************/
94 : TessResultRenderer(outputbase, "box") {}
95
97 const std::unique_ptr<const char[]> lstmbox(api->GetLSTMBoxText(imagenum()));
98 if (lstmbox == nullptr) {
99 return false;
100 }
101
102 AppendString(lstmbox.get());
103
104 return true;
105}
106
107} // namespace tesseract.
int Recognize(ETEXT_DESC *monitor)
Definition: baseapi.cpp:834
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:772
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:765
char * GetLSTMBoxText(int page_number)
LTRResultIterator * GetLTRIterator()
Definition: baseapi.cpp:1320
char * GetUTF8Text(PageIteratorLevel level) const
virtual bool Next(PageIteratorLevel level)
virtual bool IsAtBeginningOf(PageIteratorLevel level) const
bool Empty(PageIteratorLevel level) const
bool BoundingBox(PageIteratorLevel level, int *left, int *top, int *right, int *bottom) const
void AppendString(const char *s)
Definition: renderer.cpp:111
bool AddImageHandler(TessBaseAPI *api) override
TessLSTMBoxRenderer(const char *outputbase)