28#ifndef TESSERACT_TRAINING_STRINGRENDERER_H_
29#define TESSERACT_TRAINING_STRINGRENDERER_H_
33#include "pango/pango-layout.h"
34#include "pango/pangocairo.h"
40#include <unordered_map>
52 StringRenderer(
const std::string &font_desc,
int page_width,
int page_height);
58 int RenderToImage(
const char *text,
int text_length,
Image *pix);
59 int RenderToGrayscaleImage(
const char *text,
int text_length,
Image *pix);
60 int RenderToBinaryImage(
const char *text,
int text_length,
int threshold,
Image *pix);
64 int RenderAllFontsToImage(
double min_coverage,
const char *text,
int text_length,
65 std::string *font_used,
Image *pix);
67 bool set_font(
const std::string &desc);
70 char_spacing_ = char_spacing;
75 void set_resolution(
const int resolution);
77 vertical_text_ = vertical_text;
80 gravity_hint_strong_ = gravity_hint_strong;
83 render_fullwidth_latin_ = render_fullwidth_latin;
88 void set_underline_start_prob(
const double frac);
91 void set_underline_continuation_prob(
const double frac);
93 underline_style_ = style;
105 drop_uncovered_chars_ = val;
108 strip_unrenderable_words_ = val;
111 output_word_boxes_ = val;
117 add_ligatures_ = add_ligatures;
126 h_margin_ = h_margin;
129 v_margin_ = v_margin;
143 const std::vector<BoxChar *> &GetBoxes()
const;
146 Boxa *GetPageBoxes()
const;
149 void RotatePageBoxes(
float rotation);
153 std::string GetBoxesStr();
155 void WriteAllBoxes(
const std::string &filename);
158 int StripUnrenderableWords(std::string *utf8_text)
const;
164 static std::string InsertWordJoiners(
const std::string &text);
167 static std::string ConvertBasicLatinToFullwidthLatin(
const std::string &text);
168 static std::string ConvertFullwidthLatinToBasicLatin(
const std::string &text);
172 void InitPangoCairo();
173 void FreePangoCairo();
175 void SetLayoutProperties();
176 void SetWordUnderlineAttributes(
const std::string &page_text);
178 void ComputeClusterBoxes();
179 void CorrectBoxPositionsToLayout(std::vector<BoxChar *> *boxchars);
180 bool GetClusterStrings(std::vector<std::string> *cluster_text);
181 int FindFirstPageBreakOffset(
const char *text,
int text_length);
187 double pen_color_[3];
double underline_continuation_prob_
double underline_start_prob_
void set_features(const char *features)
bool gravity_hint_strong_
void set_pen_color(double r, double g, double b)
void set_vertical_text(bool vertical_text)
void set_underline_style(const PangoUnderline style)
void set_render_fullwidth_latin(bool render_fullwidth_latin)
void set_drop_uncovered_chars(bool val)
void set_gravity_hint_strong(bool gravity_hint_strong)
const PangoFontInfo & font() const
bool render_fullwidth_latin_
void set_strip_unrenderable_words(bool val)
void set_add_ligatures(bool add_ligatures)
void set_h_margin(const int h_margin)
cairo_surface_t * surface_
std::vector< BoxChar * > boxchars_
bool strip_unrenderable_words_
void set_char_spacing(int char_spacing)
void set_output_word_boxes(bool val)
void set_v_margin(const int v_margin)
bool drop_uncovered_chars_
std::unordered_map< char32, int64_t > char_map_
PangoUnderline underline_style_
void set_leading(int leading)
void set_box_padding(int val)