tesseract v5.3.3.20231005
renderer.cpp
Go to the documentation of this file.
1
2// File: renderer.cpp
3// Description: Rendering interface to inject into TessBaseAPI
4//
5// (C) Copyright 2011, Google Inc.
6// Licensed under the Apache License, Version 2.0 (the "License");
7// you may not use this file except in compliance with the License.
8// You may obtain a copy of the License at
9// http://www.apache.org/licenses/LICENSE-2.0
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15//
17
18#ifdef HAVE_CONFIG_H
19# include "config_auto.h"
20#endif
21#include <tesseract/baseapi.h>
22#include <tesseract/renderer.h>
23#include <cstring>
24#include <memory> // std::unique_ptr
25#include <string> // std::string
26#include "serialis.h" // Serialize
27
28namespace tesseract {
29
30/**********************************************************************
31 * Base Renderer interface implementation
32 **********************************************************************/
33TessResultRenderer::TessResultRenderer(const char *outputbase, const char *extension)
34 : next_(nullptr)
35 , fout_(stdout)
36 , file_extension_(extension)
37 , title_("")
38 , imagenum_(-1)
39 , happy_(true) {
40 if (strcmp(outputbase, "-") && strcmp(outputbase, "stdout")) {
41 std::string outfile = std::string(outputbase) + "." + extension;
42 fout_ = fopen(outfile.c_str(), "wb");
43 if (fout_ == nullptr) {
44 happy_ = false;
45 }
46 }
47}
48
50 if (fout_ != nullptr) {
51 if (fout_ != stdout) {
52 fclose(fout_);
53 } else {
54 clearerr(fout_);
55 }
56 }
57 delete next_;
58}
59
61 if (next == nullptr) {
62 return;
63 }
64
65 TessResultRenderer *remainder = next_;
66 next_ = next;
67 if (remainder) {
68 while (next->next_ != nullptr) {
69 next = next->next_;
70 }
71 next->next_ = remainder;
72 }
73}
74
75bool TessResultRenderer::BeginDocument(const char *title) {
76 if (!happy_) {
77 return false;
78 }
79 title_ = title;
80 imagenum_ = -1;
81 bool ok = BeginDocumentHandler();
82 if (next_) {
83 ok = next_->BeginDocument(title) && ok;
84 }
85 return ok;
86}
87
89 if (!happy_) {
90 return false;
91 }
92 ++imagenum_;
93 bool ok = AddImageHandler(api);
94 if (next_) {
95 ok = next_->AddImage(api) && ok;
96 }
97 return ok;
98}
99
101 if (!happy_) {
102 return false;
103 }
104 bool ok = EndDocumentHandler();
105 if (next_) {
106 ok = next_->EndDocument() && ok;
107 }
108 return ok;
109}
110
112 if (s == nullptr) {
113 return;
114 }
115 AppendData(s, strlen(s));
116}
117
118void TessResultRenderer::AppendData(const char *s, int len) {
119 if (!tesseract::Serialize(fout_, s, len)) {
120 happy_ = false;
121 }
122 fflush(fout_);
123}
124
126 return happy_;
127}
128
130 return happy_;
131}
132
133/**********************************************************************
134 * UTF8 Text Renderer interface implementation
135 **********************************************************************/
137 : TessResultRenderer(outputbase, "txt") {}
138
140 const std::unique_ptr<const char[]> utf8(api->GetUTF8Text());
141 if (utf8 == nullptr) {
142 return false;
143 }
144
145 const char *pageSeparator = api->GetStringVariable("page_separator");
146 if (pageSeparator != nullptr && *pageSeparator != '\0' && imagenum() > 0) {
147 AppendString(pageSeparator);
148 }
149
150 AppendString(utf8.get());
151
152 return true;
153}
154
155/**********************************************************************
156 * TSV Text Renderer interface implementation
157 **********************************************************************/
158TessTsvRenderer::TessTsvRenderer(const char *outputbase) : TessResultRenderer(outputbase, "tsv") {
159 font_info_ = false;
160}
161
162TessTsvRenderer::TessTsvRenderer(const char *outputbase, bool font_info)
163 : TessResultRenderer(outputbase, "tsv") {
164 font_info_ = font_info;
165}
166
168 // Output TSV column headings
170 "level\tpage_num\tblock_num\tpar_num\tline_num\tword_"
171 "num\tleft\ttop\twidth\theight\tconf\ttext\n");
172 return true;
173}
174
176 return true;
177}
178
180 const std::unique_ptr<const char[]> tsv(api->GetTSVText(imagenum()));
181 if (tsv == nullptr) {
182 return false;
183 }
184
185 AppendString(tsv.get());
186
187 return true;
188}
189
190/**********************************************************************
191 * UNLV Text Renderer interface implementation
192 **********************************************************************/
194 : TessResultRenderer(outputbase, "unlv") {}
195
197 const std::unique_ptr<const char[]> unlv(api->GetUNLVText());
198 if (unlv == nullptr) {
199 return false;
200 }
201
202 AppendString(unlv.get());
203
204 return true;
205}
206
207/**********************************************************************
208 * BoxText Renderer interface implementation
209 **********************************************************************/
211 : TessResultRenderer(outputbase, "box") {}
212
214 const std::unique_ptr<const char[]> text(api->GetBoxText(imagenum()));
215 if (text == nullptr) {
216 return false;
217 }
218
219 AppendString(text.get());
220
221 return true;
222}
223
224#ifndef DISABLED_LEGACY_ENGINE
225
226/**********************************************************************
227 * Osd Text Renderer interface implementation
228 **********************************************************************/
229TessOsdRenderer::TessOsdRenderer(const char *outputbase) : TessResultRenderer(outputbase, "osd") {}
230
232 const std::unique_ptr<const char[]> osd(api->GetOsdText(imagenum()));
233 if (osd == nullptr) {
234 return false;
235 }
236
237 AppendString(osd.get());
238
239 return true;
240}
241
242#endif // ndef DISABLED_LEGACY_ENGINE
243
244} // namespace tesseract
bool Serialize(FILE *fp, const std::vector< T > &data)
Definition: helpers.h:236
def next(obj)
Definition: ast.py:56
char * GetTSVText(int page_number)
Definition: baseapi.cpp:1412
char * GetOsdText(int page_number)
Definition: baseapi.cpp:1744
char * GetBoxText(int page_number)
Definition: baseapi.cpp:1552
const char * GetStringVariable(const char *name) const
Definition: baseapi.cpp:314
virtual bool BeginDocumentHandler()
Definition: renderer.cpp:125
virtual bool AddImageHandler(TessBaseAPI *api)=0
bool AddImage(TessBaseAPI *api)
Definition: renderer.cpp:88
bool BeginDocument(const char *title)
Definition: renderer.cpp:75
void AppendString(const char *s)
Definition: renderer.cpp:111
const char * title() const
Definition: renderer.h:87
TessResultRenderer * next()
Definition: renderer.h:57
virtual bool EndDocumentHandler()
Definition: renderer.cpp:129
void AppendData(const char *s, int len)
Definition: renderer.cpp:118
TessResultRenderer(const char *outputbase, const char *extension)
Definition: renderer.cpp:33
void insert(TessResultRenderer *next)
Definition: renderer.cpp:60
bool AddImageHandler(TessBaseAPI *api) override
Definition: renderer.cpp:139
TessTextRenderer(const char *outputbase)
Definition: renderer.cpp:136
TessTsvRenderer(const char *outputbase, bool font_info)
Definition: renderer.cpp:162
bool EndDocumentHandler() override
Definition: renderer.cpp:175
bool BeginDocumentHandler() override
Definition: renderer.cpp:167
bool AddImageHandler(TessBaseAPI *api) override
Definition: renderer.cpp:179
TessUnlvRenderer(const char *outputbase)
Definition: renderer.cpp:193
bool AddImageHandler(TessBaseAPI *api) override
Definition: renderer.cpp:196
bool AddImageHandler(TessBaseAPI *api) override
Definition: renderer.cpp:213
TessBoxTextRenderer(const char *outputbase)
Definition: renderer.cpp:210
TessOsdRenderer(const char *outputbase)
Definition: renderer.cpp:229
bool AddImageHandler(TessBaseAPI *api) override
Definition: renderer.cpp:231