All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
renderer.cpp
Go to the documentation of this file.
1 // Include automatically generated configuration file if running autoconf.
2 #ifdef HAVE_CONFIG_H
3 #include "config_auto.h"
4 #endif
5 
6 #include <string.h>
7 #include "baseapi.h"
8 #include "genericvector.h"
9 #include "renderer.h"
10 
11 namespace tesseract {
12 
13 /**********************************************************************
14  * Base Renderer interface implementation
15  **********************************************************************/
17  const char* extension)
18  : file_extension_(extension),
19  title_(""), imagenum_(-1),
20  fout_(stdout),
21  next_(NULL),
22  happy_(true) {
23  if (strcmp(outputbase, "-") && strcmp(outputbase, "stdout")) {
24  STRING outfile = STRING(outputbase) + STRING(".") + STRING(file_extension_);
25  fout_ = fopen(outfile.string(), "wb");
26  if (fout_ == NULL) {
27  happy_ = false;
28  }
29  }
30 }
31 
33  if (fout_ != stdout)
34  fclose(fout_);
35  else
36  clearerr(fout_);
37  delete next_;
38 }
39 
41  if (next == NULL) return;
42 
43  TessResultRenderer* remainder = next_;
44  next_ = next;
45  if (remainder) {
46  while (next->next_ != NULL) {
47  next = next->next_;
48  }
49  next->next_ = remainder;
50  }
51 }
52 
53 bool TessResultRenderer::BeginDocument(const char* title) {
54  if (!happy_) return false;
55  title_ = title;
56  imagenum_ = -1;
57  bool ok = BeginDocumentHandler();
58  if (next_) {
59  ok = next_->BeginDocument(title) && ok;
60  }
61  return ok;
62 }
63 
65  if (!happy_) return false;
66  ++imagenum_;
67  bool ok = AddImageHandler(api);
68  if (next_) {
69  ok = next_->AddImage(api) && ok;
70  }
71  return ok;
72 }
73 
75  if (!happy_) return false;
76  bool ok = EndDocumentHandler();
77  if (next_) {
78  ok = next_->EndDocument() && ok;
79  }
80  return ok;
81 }
82 
83 void TessResultRenderer::AppendString(const char* s) {
84  AppendData(s, strlen(s));
85 }
86 
87 void TessResultRenderer::AppendData(const char* s, int len) {
88  int n = fwrite(s, 1, len, fout_);
89  if (n != len) happy_ = false;
90 }
91 
93  return happy_;
94 }
95 
97  return happy_;
98 }
99 
100 
101 /**********************************************************************
102  * UTF8 Text Renderer interface implementation
103  **********************************************************************/
104 TessTextRenderer::TessTextRenderer(const char *outputbase)
105  : TessResultRenderer(outputbase, "txt") {
106 }
107 
109  char* utf8 = api->GetUTF8Text();
110  if (utf8 == NULL) {
111  return false;
112  }
113 
114  AppendString(utf8);
115  delete[] utf8;
116 
117  bool pageBreak = false;
118  api->GetBoolVariable("include_page_breaks", &pageBreak);
119  const char* pageSeparator = api->GetStringVariable("page_separator");
120  if (pageBreak) {
121  AppendString(pageSeparator);
122  }
123 
124  return true;
125 }
126 
127 /**********************************************************************
128  * HOcr Text Renderer interface implementation
129  **********************************************************************/
130 TessHOcrRenderer::TessHOcrRenderer(const char *outputbase)
131  : TessResultRenderer(outputbase, "hocr") {
132  font_info_ = false;
133 }
134 
135 TessHOcrRenderer::TessHOcrRenderer(const char *outputbase, bool font_info)
136  : TessResultRenderer(outputbase, "hocr") {
137  font_info_ = font_info;
138 }
139 
141  AppendString(
142  "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
143  "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\"\n"
144  " \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n"
145  "<html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\" "
146  "lang=\"en\">\n <head>\n <title>\n");
147  AppendString(title());
148  AppendString(
149  "</title>\n"
150  "<meta http-equiv=\"Content-Type\" content=\"text/html;"
151  "charset=utf-8\" />\n"
152  " <meta name='ocr-system' content='tesseract " TESSERACT_VERSION_STR
153  "' />\n"
154  " <meta name='ocr-capabilities' content='ocr_page ocr_carea ocr_par"
155  " ocr_line ocrx_word");
156  if (font_info_)
157  AppendString(
158  " ocrp_lang ocrp_dir ocrp_font ocrp_fsize ocrp_wconf");
159  AppendString(
160  "'/>\n"
161  "</head>\n<body>\n");
162 
163  return true;
164 }
165 
167  AppendString(" </body>\n</html>\n");
168 
169  return true;
170 }
171 
173  char* hocr = api->GetHOCRText(imagenum());
174  if (hocr == NULL) return false;
175 
176  AppendString(hocr);
177  delete[] hocr;
178 
179  return true;
180 }
181 
182 /**********************************************************************
183  * UNLV Text Renderer interface implementation
184  **********************************************************************/
185 TessUnlvRenderer::TessUnlvRenderer(const char *outputbase)
186  : TessResultRenderer(outputbase, "unlv") {
187 }
188 
190  char* unlv = api->GetUNLVText();
191  if (unlv == NULL) return false;
192 
193  AppendString(unlv);
194  delete[] unlv;
195 
196  return true;
197 }
198 
199 /**********************************************************************
200  * BoxText Renderer interface implementation
201  **********************************************************************/
203  : TessResultRenderer(outputbase, "box") {
204 }
205 
207  char* text = api->GetBoxText(imagenum());
208  if (text == NULL) return false;
209 
210  AppendString(text);
211  delete[] text;
212 
213  return true;
214 }
215 
216 } // namespace tesseract
TessTextRenderer(const char *outputbase)
Definition: renderer.cpp:104
virtual bool AddImageHandler(TessBaseAPI *api)
Definition: renderer.cpp:172
TessResultRenderer(const char *outputbase, const char *extension)
Definition: renderer.cpp:16
bool AddImage(TessBaseAPI *api)
Definition: renderer.cpp:64
virtual bool EndDocumentHandler()
Definition: renderer.cpp:96
void insert(TessResultRenderer *next)
Definition: renderer.cpp:40
virtual bool AddImageHandler(TessBaseAPI *api)
Definition: renderer.cpp:206
virtual bool AddImageHandler(TessBaseAPI *api)
Definition: renderer.cpp:108
void AppendString(const char *s)
Definition: renderer.cpp:83
const char * title() const
Definition: renderer.h:80
bool GetBoolVariable(const char *name, bool *value) const
Definition: baseapi.cpp:236
virtual bool EndDocumentHandler()
Definition: renderer.cpp:166
virtual bool AddImageHandler(TessBaseAPI *api)=0
TessUnlvRenderer(const char *outputbase)
Definition: renderer.cpp:185
char * GetBoxText(int page_number)
Definition: baseapi.cpp:1581
TessResultRenderer * next()
Definition: renderer.h:55
virtual bool BeginDocumentHandler()
Definition: renderer.cpp:140
virtual bool AddImageHandler(TessBaseAPI *api)
Definition: renderer.cpp:189
const char * GetStringVariable(const char *name) const
Definition: baseapi.cpp:244
TessBoxTextRenderer(const char *outputbase)
Definition: renderer.cpp:202
#define TESSERACT_VERSION_STR
Definition: baseapi.h:23
TessHOcrRenderer(const char *outputbase, bool font_info)
Definition: renderer.cpp:135
char * GetHOCRText(int page_number)
Definition: baseapi.cpp:1399
virtual bool BeginDocumentHandler()
Definition: renderer.cpp:92
Definition: strngs.h:44
#define NULL
Definition: host.h:144
bool BeginDocument(const char *title)
Definition: renderer.cpp:53
const char * string() const
Definition: strngs.cpp:193
void AppendData(const char *s, int len)
Definition: renderer.cpp:87