tesseract v5.3.3.20231005
renderer.h
Go to the documentation of this file.
1// SPDX-License-Identifier: Apache-2.0
2// File: renderer.h
3// Description: Rendering interface to inject into TessBaseAPI
4//
5// (C) Copyright 2011, Google Inc.
6// Licensed under the Apache License, Version 2.0 (the "License");
7// you may not use this file except in compliance with the License.
8// You may obtain a copy of the License at
9// http://www.apache.org/licenses/LICENSE-2.0
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15
16#ifndef TESSERACT_API_RENDERER_H_
17#define TESSERACT_API_RENDERER_H_
18
19#include "export.h"
20
21// To avoid collision with other typenames include the ABSOLUTE MINIMUM
22// complexity of includes here. Use forward declarations wherever possible
23// and hide includes of complex types in baseapi.cpp.
24#include <cstdint>
25#include <string> // for std::string
26#include <vector> // for std::vector
27
28struct Pix;
29
30namespace tesseract {
31
32class TessBaseAPI;
33
48public:
49 virtual ~TessResultRenderer();
50
51 // Takes ownership of pointer so must be new'd instance.
52 // Renderers aren't ordered, but appends the sequences of next parameter
53 // and existing next(). The renderers should be unique across both lists.
54 void insert(TessResultRenderer *next);
55
56 // Returns the next renderer or nullptr.
58 return next_;
59 }
60
66 bool BeginDocument(const char *title);
67
76 bool AddImage(TessBaseAPI *api);
77
82 bool EndDocument();
83
84 const char *file_extension() const {
85 return file_extension_;
86 }
87 const char *title() const {
88 return title_.c_str();
89 }
90
91 // Is everything fine? Otherwise something went wrong.
92 bool happy() const {
93 return happy_;
94 }
95
105 int imagenum() const {
106 return imagenum_;
107 }
108
109protected:
120 TessResultRenderer(const char *outputbase, const char *extension);
121
122 // Hook for specialized handling in BeginDocument()
123 virtual bool BeginDocumentHandler();
124
125 // This must be overridden to render the OCR'd results
126 virtual bool AddImageHandler(TessBaseAPI *api) = 0;
127
128 // Hook for specialized handling in EndDocument()
129 virtual bool EndDocumentHandler();
130
131 // Renderers can call this to append '\0' terminated strings into
132 // the output string returned by GetOutput.
133 // This method will grow the output buffer if needed.
134 void AppendString(const char *s);
135
136 // Renderers can call this to append binary byte sequences into
137 // the output string returned by GetOutput. Note that s is not necessarily
138 // '\0' terminated (and can contain '\0' within it).
139 // This method will grow the output buffer if needed.
140 void AppendData(const char *s, int len);
141
142private:
143 TessResultRenderer *next_; // Can link multiple renderers together
144 FILE *fout_; // output file pointer
145 const char *file_extension_; // standard extension for generated output
146 std::string title_; // title of document being rendered
147 int imagenum_; // index of last image added
148 bool happy_; // I get grumpy when the disk fills up, etc.
149};
150
155public:
156 explicit TessTextRenderer(const char *outputbase);
157
158protected:
159 bool AddImageHandler(TessBaseAPI *api) override;
160};
161
166public:
167 explicit TessHOcrRenderer(const char *outputbase, bool font_info);
168 explicit TessHOcrRenderer(const char *outputbase);
169
170protected:
171 bool BeginDocumentHandler() override;
172 bool AddImageHandler(TessBaseAPI *api) override;
173 bool EndDocumentHandler() override;
174
175private:
176 bool font_info_; // whether to print font information
177};
178
183public:
184 explicit TessAltoRenderer(const char *outputbase);
185
186protected:
187 bool BeginDocumentHandler() override;
188 bool AddImageHandler(TessBaseAPI *api) override;
189 bool EndDocumentHandler() override;
190
191private:
192 bool begin_document;
193};
194
199public:
200 explicit TessTsvRenderer(const char *outputbase, bool font_info);
201 explicit TessTsvRenderer(const char *outputbase);
202
203protected:
204 bool BeginDocumentHandler() override;
205 bool AddImageHandler(TessBaseAPI *api) override;
206 bool EndDocumentHandler() override;
207
208private:
209 bool font_info_; // whether to print font information
210};
211
216public:
217 // datadir is the location of the TESSDATA. We need it because
218 // we load a custom PDF font from this location.
219 TessPDFRenderer(const char *outputbase, const char *datadir,
220 bool textonly = false);
221
222protected:
223 bool BeginDocumentHandler() override;
224 bool AddImageHandler(TessBaseAPI *api) override;
225 bool EndDocumentHandler() override;
226
227private:
228 // We don't want to have every image in memory at once,
229 // so we store some metadata as we go along producing
230 // PDFs one page at a time. At the end, that metadata is
231 // used to make everything that isn't easily handled in a
232 // streaming fashion.
233 long int obj_; // counter for PDF objects
234 std::vector<uint64_t> offsets_; // offset of every PDF object in bytes
235 std::vector<long int> pages_; // object number for every /Page object
236 std::string datadir_; // where to find the custom font
237 bool textonly_; // skip images if set
238 // Bookkeeping only. DIY = Do It Yourself.
239 void AppendPDFObjectDIY(size_t objectsize);
240 // Bookkeeping + emit data.
241 void AppendPDFObject(const char *data);
242 // Create the /Contents object for an entire page.
243 char *GetPDFTextObjects(TessBaseAPI *api, double width, double height);
244 // Turn an image into a PDF object. Only transcode if we have to.
245 static bool imageToPDFObj(Pix *pix, const char *filename, long int objnum,
246 char **pdf_object, long int *pdf_object_size,
247 int jpg_quality);
248};
249
254public:
255 explicit TessUnlvRenderer(const char *outputbase);
256
257protected:
258 bool AddImageHandler(TessBaseAPI *api) override;
259};
260
265public:
266 explicit TessLSTMBoxRenderer(const char *outputbase);
267
268protected:
269 bool AddImageHandler(TessBaseAPI *api) override;
270};
271
276public:
277 explicit TessBoxTextRenderer(const char *outputbase);
278
279protected:
280 bool AddImageHandler(TessBaseAPI *api) override;
281};
282
287public:
288 explicit TessWordStrBoxRenderer(const char *outputbase);
289
290protected:
291 bool AddImageHandler(TessBaseAPI *api) override;
292};
293
294#ifndef DISABLED_LEGACY_ENGINE
295
300public:
301 explicit TessOsdRenderer(const char *outputbase);
302
303protected:
304 bool AddImageHandler(TessBaseAPI *api) override;
305};
306
307#endif // ndef DISABLED_LEGACY_ENGINE
308
309} // namespace tesseract.
310
311#endif // TESSERACT_API_RENDERER_H_
struct TessBaseAPI TessBaseAPI
Definition: capi.h:60
struct TessResultRenderer TessResultRenderer
Definition: capi.h:59
def next(obj)
Definition: ast.py:56
virtual bool AddImageHandler(TessBaseAPI *api)=0
const char * file_extension() const
Definition: renderer.h:84
const char * title() const
Definition: renderer.h:87
TessResultRenderer * next()
Definition: renderer.h:57
#define TESS_API
Definition: export.h:32