All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
renderer.h
Go to the documentation of this file.
1 // File: renderer.h
3 // Description: Rendering interface to inject into TessBaseAPI
4 //
5 // (C) Copyright 2011, Google Inc.
6 // Licensed under the Apache License, Version 2.0 (the "License");
7 // you may not use this file except in compliance with the License.
8 // You may obtain a copy of the License at
9 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
17 
18 #ifndef TESSERACT_API_RENDERER_H__
19 #define TESSERACT_API_RENDERER_H__
20 
21 // To avoid collision with other typenames include the ABSOLUTE MINIMUM
22 // complexity of includes here. Use forward declarations wherever possible
23 // and hide includes of complex types in baseapi.cpp.
24 #include "genericvector.h"
25 #include "platform.h"
26 #include "publictypes.h"
27 
28 namespace tesseract {
29 
30 class TessBaseAPI;
31 
46  public:
47  virtual ~TessResultRenderer();
48 
49  // Takes ownership of pointer so must be new'd instance.
50  // Renderers aren't ordered, but appends the sequences of next parameter
51  // and existing next(). The renderers should be unique across both lists.
52  void insert(TessResultRenderer* next);
53 
54  // Returns the next renderer or NULL.
55  TessResultRenderer* next() { return next_; }
56 
61  bool BeginDocument(const char* title);
62 
71  bool AddImage(TessBaseAPI* api);
72 
77  bool EndDocument();
78 
79  const char* file_extension() const { return file_extension_; }
80  const char* title() const { return title_; }
81 
91  int imagenum() const { return imagenum_; }
92 
93  protected:
104  TessResultRenderer(const char *outputbase,
105  const char* extension);
106 
107  // Hook for specialized handling in BeginDocument()
108  virtual bool BeginDocumentHandler();
109 
110  // This must be overriden to render the OCR'd results
111  virtual bool AddImageHandler(TessBaseAPI* api) = 0;
112 
113  // Hook for specialized handling in EndDocument()
114  virtual bool EndDocumentHandler();
115 
116  // Renderers can call this to append '\0' terminated strings into
117  // the output string returned by GetOutput.
118  // This method will grow the output buffer if needed.
119  void AppendString(const char* s);
120 
121  // Renderers can call this to append binary byte sequences into
122  // the output string returned by GetOutput. Note that s is not necessarily
123  // '\0' terminated (and can contain '\0' within it).
124  // This method will grow the output buffer if needed.
125  void AppendData(const char* s, int len);
126 
127  private:
128  const char* file_extension_; // standard extension for generated output
129  const char* title_; // title of document being renderered
130  int imagenum_; // index of last image added
131 
132  FILE* fout_; // output file pointer
133  TessResultRenderer* next_; // Can link multiple renderers together
134  bool happy_; // I get grumpy when the disk fills up, etc.
135 };
136 
141  public:
142  explicit TessTextRenderer(const char *outputbase);
143 
144  protected:
145  virtual bool AddImageHandler(TessBaseAPI* api);
146 };
147 
152  public:
153  explicit TessHOcrRenderer(const char *outputbase, bool font_info);
154  explicit TessHOcrRenderer(const char *outputbase);
155 
156 protected:
157  virtual bool BeginDocumentHandler();
158  virtual bool AddImageHandler(TessBaseAPI* api);
159  virtual bool EndDocumentHandler();
160 
161 private:
162  bool font_info_; // whether to print font information
163 };
164 
169  public:
170  // datadir is the location of the TESSDATA. We need it because
171  // we load a custom PDF font from this location.
172  TessPDFRenderer(const char *outputbase, const char *datadir);
173 
174 protected:
175  virtual bool BeginDocumentHandler();
176  virtual bool AddImageHandler(TessBaseAPI* api);
177  virtual bool EndDocumentHandler();
178 
179 private:
180  // We don't want to have every image in memory at once,
181  // so we store some metadata as we go along producing
182  // PDFs one page at a time. At the end that metadata is
183  // used to make everything that isn't easily handled in a
184  // streaming fashion.
185  long int obj_; // counter for PDF objects
186  GenericVector<long int> offsets_; // offset of every PDF object in bytes
187  GenericVector<long int> pages_; // object number for every /Page object
188  const char *datadir_; // where to find the custom font
189  // Bookkeeping only. DIY = Do It Yourself.
190  void AppendPDFObjectDIY(size_t objectsize);
191  // Bookkeeping + emit data.
192  void AppendPDFObject(const char *data);
193  // Create the /Contents object for an entire page.
194  static char* GetPDFTextObjects(TessBaseAPI* api,
195  double width, double height);
196  // Turn an image into a PDF object. Only transcode if we have to.
197  static bool imageToPDFObj(Pix *pix, char *filename, long int objnum,
198  char **pdf_object, long int *pdf_object_size);
199 };
200 
201 
206  public:
207  explicit TessUnlvRenderer(const char *outputbase);
208 
209  protected:
210  virtual bool AddImageHandler(TessBaseAPI* api);
211 };
212 
217  public:
218  explicit TessBoxTextRenderer(const char *outputbase);
219 
220  protected:
221  virtual bool AddImageHandler(TessBaseAPI* api);
222 };
223 
224 } // namespace tesseract.
225 
226 #endif // TESSERACT_API_RENDERER_H__
struct TessResultRenderer TessResultRenderer
Definition: capi.h:61
struct TessUnlvRenderer TessUnlvRenderer
Definition: capi.h:65
void insert(LIST list, void *node)
Definition: oldlist.cpp:221
struct TessTextRenderer TessTextRenderer
Definition: capi.h:62
struct TessBaseAPI TessBaseAPI
Definition: capi.h:67
const char * title() const
Definition: renderer.h:80
struct TessBoxTextRenderer TessBoxTextRenderer
Definition: capi.h:66
TessResultRenderer * next()
Definition: renderer.h:55
#define TESS_API
Definition: platform.h:73
struct TessHOcrRenderer TessHOcrRenderer
Definition: capi.h:63
struct TessPDFRenderer TessPDFRenderer
Definition: capi.h:64
const char * file_extension() const
Definition: renderer.h:79