All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
tesseract::PangoFontInfo Class Reference

#include <pango_font_info.h>

Public Types

enum  FontTypeEnum { UNKNOWN, SERIF, SANS_SERIF, DECORATIVE }
 

Public Member Functions

 PangoFontInfo ()
 
 PangoFontInfo (const string &name)
 
bool ParseFontDescriptionName (const string &name)
 
bool CoversUTF8Text (const char *utf8_text, int byte_length) const
 
int DropUncoveredChars (string *utf8_text) const
 
bool CanRenderString (const char *utf8_word, int len, vector< string > *graphemes) const
 
bool CanRenderString (const char *utf8_word, int len) const
 
bool GetSpacingProperties (const string &utf8_char, int *x_bearing, int *x_advance) const
 
string DescriptionName () const
 
const string & family_name () const
 
const int font_size () const
 
const bool is_bold () const
 
const bool is_italic () const
 
const bool is_smallcaps () const
 
const bool is_monospace () const
 
const bool is_fraktur () const
 
const FontTypeEnum font_type () const
 
const int resolution () const
 
void set_resolution (const int resolution)
 

Static Public Member Functions

static void InitFontConfig (bool force_clear, const string &fonts_dir)
 

Friends

class FontUtils
 

Detailed Description

Definition at line 38 of file pango_font_info.h.

Member Enumeration Documentation

Constructor & Destructor Documentation

tesseract::PangoFontInfo::PangoFontInfo ( )

Definition at line 78 of file pango_font_info.cpp.

78  : desc_(NULL), resolution_(kDefaultResolution) {
79  Clear();
80 }
const int kDefaultResolution
Default resolution used if input in not believable.
Definition: pagesegmain.cpp:60
#define NULL
Definition: host.h:144
tesseract::PangoFontInfo::PangoFontInfo ( const string &  name)
explicit

Definition at line 82 of file pango_font_info.cpp.

83  : desc_(NULL), resolution_(kDefaultResolution) {
84  if (!ParseFontDescriptionName(desc)) {
85  tprintf("ERROR: Could not parse %s\n", desc.c_str());
86  Clear();
87  }
88 }
#define tprintf(...)
Definition: tprintf.h:31
const int kDefaultResolution
Default resolution used if input in not believable.
Definition: pagesegmain.cpp:60
bool ParseFontDescriptionName(const string &name)
#define NULL
Definition: host.h:144

Member Function Documentation

bool tesseract::PangoFontInfo::CanRenderString ( const char *  utf8_word,
int  len,
vector< string > *  graphemes 
) const

Definition at line 377 of file pango_font_info.cpp.

378  {
379  if (graphemes) graphemes->clear();
380  // We check for font coverage of the text first, as otherwise Pango could
381  // (undesirably) fall back to another font that does have the required
382  // coverage.
383  if (!CoversUTF8Text(utf8_word, len)) {
384  return false;
385  }
386  // U+25CC dotted circle character that often (but not always) gets rendered
387  // when there is an illegal grapheme sequence.
388  const char32 kDottedCircleGlyph = 9676;
389  bool bad_glyph = false;
390  PangoFontMap* font_map = pango_cairo_font_map_get_default();
391  PangoContext* context = pango_context_new();
392  pango_context_set_font_map(context, font_map);
393  PangoLayout* layout;
394  {
395  // Pango is not relasing the cached layout.
397  layout = pango_layout_new(context);
398  }
399  if (desc_) {
400  pango_layout_set_font_description(layout, desc_);
401  } else {
402  PangoFontDescription *desc = pango_font_description_from_string(
403  DescriptionName().c_str());
404  pango_layout_set_font_description(layout, desc);
405  pango_font_description_free(desc);
406  }
407  pango_layout_set_text(layout, utf8_word, len);
408  PangoLayoutIter* run_iter = NULL;
409  { // Fontconfig caches some information here that is not freed before exit.
411  run_iter = pango_layout_get_iter(layout);
412  }
413  do {
414  PangoLayoutRun* run = pango_layout_iter_get_run_readonly(run_iter);
415  if (!run) {
416  tlog(2, "Found end of line NULL run marker\n");
417  continue;
418  }
419  PangoGlyph dotted_circle_glyph;
420  PangoFont* font = run->item->analysis.font;
421  dotted_circle_glyph = pango_fc_font_get_glyph(
422  reinterpret_cast<PangoFcFont*>(font), kDottedCircleGlyph);
423  if (TLOG_IS_ON(2)) {
424  PangoFontDescription* desc = pango_font_describe(font);
425  char* desc_str = pango_font_description_to_string(desc);
426  tlog(2, "Desc of font in run: %s\n", desc_str);
427  g_free(desc_str);
428  pango_font_description_free(desc);
429  }
430 
431  PangoGlyphItemIter cluster_iter;
432  gboolean have_cluster;
433  for (have_cluster = pango_glyph_item_iter_init_start(&cluster_iter,
434  run, utf8_word);
435  have_cluster && !bad_glyph;
436  have_cluster = pango_glyph_item_iter_next_cluster(&cluster_iter)) {
437  const int start_byte_index = cluster_iter.start_index;
438  const int end_byte_index = cluster_iter.end_index;
439  int start_glyph_index = cluster_iter.start_glyph;
440  int end_glyph_index = cluster_iter.end_glyph;
441  string cluster_text = string(utf8_word + start_byte_index,
442  end_byte_index - start_byte_index);
443  if (graphemes) graphemes->push_back(cluster_text);
444  if (IsUTF8Whitespace(cluster_text.c_str())) {
445  tlog(2, "Skipping whitespace\n");
446  continue;
447  }
448  if (TLOG_IS_ON(2)) {
449  printf("start_byte=%d end_byte=%d start_glyph=%d end_glyph=%d ",
450  start_byte_index, end_byte_index,
451  start_glyph_index, end_glyph_index);
452  }
453  for (int i = start_glyph_index,
454  step = (end_glyph_index > start_glyph_index) ? 1 : -1;
455  !bad_glyph && i != end_glyph_index; i+= step) {
456  const bool unknown_glyph =
457  (cluster_iter.glyph_item->glyphs->glyphs[i].glyph &
458  PANGO_GLYPH_UNKNOWN_FLAG);
459  const bool illegal_glyph =
460  (cluster_iter.glyph_item->glyphs->glyphs[i].glyph ==
461  dotted_circle_glyph);
462  bad_glyph = unknown_glyph || illegal_glyph;
463  if (TLOG_IS_ON(2)) {
464  printf("(%d=%d)", cluster_iter.glyph_item->glyphs->glyphs[i].glyph,
465  bad_glyph ? 1 : 0);
466  }
467  }
468  if (TLOG_IS_ON(2)) {
469  printf(" '%s'\n", cluster_text.c_str());
470  }
471  if (bad_glyph)
472  tlog(1, "Found illegal glyph!\n");
473  }
474  } while (!bad_glyph && pango_layout_iter_next_run(run_iter));
475 
476  pango_layout_iter_free(run_iter);
477  g_object_unref(context);
478  g_object_unref(layout);
479  if (bad_glyph && graphemes) graphemes->clear();
480  return !bad_glyph;
481 }
#define DISABLE_HEAP_LEAK_CHECK
Definition: util.h:63
bool CoversUTF8Text(const char *utf8_text, int byte_length) const
bool IsUTF8Whitespace(const char *text)
Definition: normstrngs.cpp:182
string DescriptionName() const
#define TLOG_IS_ON(level)
Definition: tlog.h:39
signed int char32
Definition: normstrngs.h:27
#define tlog(level,...)
Definition: tlog.h:33
#define NULL
Definition: host.h:144
bool tesseract::PangoFontInfo::CanRenderString ( const char *  utf8_word,
int  len 
) const

Definition at line 372 of file pango_font_info.cpp.

372  {
373  vector<string> graphemes;
374  return CanRenderString(utf8_word, len, &graphemes);
375 }
bool CanRenderString(const char *utf8_word, int len, vector< string > *graphemes) const
bool tesseract::PangoFontInfo::CoversUTF8Text ( const char *  utf8_text,
int  byte_length 
) const

Definition at line 252 of file pango_font_info.cpp.

252  {
253  PangoFont* font = ToPangoFont();
254  PangoCoverage* coverage = pango_font_get_coverage(font, NULL);
255  for (UNICHAR::const_iterator it = UNICHAR::begin(utf8_text, byte_length);
256  it != UNICHAR::end(utf8_text, byte_length);
257  ++it) {
258  if (IsWhitespace(*it) || pango_is_zero_width(*it))
259  continue;
260  if (pango_coverage_get(coverage, *it) != PANGO_COVERAGE_EXACT) {
261  char tmp[5];
262  int len = it.get_utf8(tmp);
263  tmp[len] = '\0';
264  tlog(2, "'%s' (U+%x) not covered by font\n", tmp, *it);
265  return false;
266  }
267  }
268  return true;
269 }
static const_iterator begin(const char *utf8_str, const int byte_length)
Definition: unichar.cpp:200
#define tlog(level,...)
Definition: tlog.h:33
#define NULL
Definition: host.h:144
static const_iterator end(const char *utf8_str, const int byte_length)
Definition: unichar.cpp:204
bool IsWhitespace(const char32 ch)
Definition: normstrngs.cpp:176
string tesseract::PangoFontInfo::DescriptionName ( ) const

Definition at line 104 of file pango_font_info.cpp.

104  {
105  if (!desc_) return "";
106  char* desc_str = pango_font_description_to_string(desc_);
107  string desc_name(desc_str);
108  g_free(desc_str);
109  return desc_name;
110 }
int tesseract::PangoFontInfo::DropUncoveredChars ( string *  utf8_text) const

Definition at line 293 of file pango_font_info.cpp.

293  {
294  PangoFont* font = ToPangoFont();
295  PangoCoverage* coverage = pango_font_get_coverage(font, NULL);
296  int num_dropped_chars = 0;
297  // Maintain two iterators that point into the string. For space efficiency, we
298  // will repeatedly copy one covered UTF8 character from one to the other, and
299  // at the end resize the string to the right length.
300  char* out = const_cast<char*>(utf8_text->c_str());
301  const UNICHAR::const_iterator it_begin =
302  UNICHAR::begin(utf8_text->c_str(), utf8_text->length());
303  const UNICHAR::const_iterator it_end =
304  UNICHAR::end(utf8_text->c_str(), utf8_text->length());
305  for (UNICHAR::const_iterator it = it_begin; it != it_end;) {
306  // Skip bad utf-8.
307  if (!it.is_legal()) {
308  ++it; // One suitable error message will still be issued.
309  continue;
310  }
311  int unicode = *it;
312  int utf8_len = it.utf8_len();
313  const char* utf8_char = it.utf8_data();
314  // Move it forward before the data gets modified.
315  ++it;
316  if (!IsWhitespace(unicode) && !pango_is_zero_width(unicode) &&
317  pango_coverage_get(coverage, unicode) != PANGO_COVERAGE_EXACT) {
318  if (TLOG_IS_ON(2)) {
319  UNICHAR unichar(unicode);
320  char* str = unichar.utf8_str();
321  tlog(2, "'%s' (U+%x) not covered by font\n", str, unicode);
322  delete[] str;
323  }
324  ++num_dropped_chars;
325  continue;
326  }
327  my_strnmove(out, utf8_char, utf8_len);
328  out += utf8_len;
329  }
330  utf8_text->resize(out - utf8_text->c_str());
331  return num_dropped_chars;
332 }
#define TLOG_IS_ON(level)
Definition: tlog.h:39
static const_iterator begin(const char *utf8_str, const int byte_length)
Definition: unichar.cpp:200
#define tlog(level,...)
Definition: tlog.h:33
#define NULL
Definition: host.h:144
static const_iterator end(const char *utf8_str, const int byte_length)
Definition: unichar.cpp:204
bool IsWhitespace(const char32 ch)
Definition: normstrngs.cpp:176
int utf8_len() const
Definition: unichar.cpp:186
const string& tesseract::PangoFontInfo::family_name ( ) const
inline

Definition at line 94 of file pango_font_info.h.

94 { return family_name_; }
const int tesseract::PangoFontInfo::font_size ( ) const
inline

Definition at line 96 of file pango_font_info.h.

96 { return font_size_; }
const FontTypeEnum tesseract::PangoFontInfo::font_type ( ) const
inline

Definition at line 102 of file pango_font_info.h.

102 { return font_type_; }
bool tesseract::PangoFontInfo::GetSpacingProperties ( const string &  utf8_char,
int *  x_bearing,
int *  x_advance 
) const

Definition at line 334 of file pango_font_info.cpp.

335  {
336  // Convert to equivalent PangoFont structure
337  PangoFont* font = ToPangoFont();
338  // Find the glyph index in the font for the supplied utf8 character.
339  int total_advance = 0;
340  int min_bearing = 0;
341  // Handle multi-unicode strings by reporting the left-most position of the
342  // x-bearing, and right-most position of the x-advance if the string were to
343  // be rendered.
344  const UNICHAR::const_iterator it_begin = UNICHAR::begin(utf8_char.c_str(),
345  utf8_char.length());
346  const UNICHAR::const_iterator it_end = UNICHAR::end(utf8_char.c_str(),
347  utf8_char.length());
348  for (UNICHAR::const_iterator it = it_begin; it != it_end; ++it) {
349  PangoGlyph glyph_index = pango_fc_font_get_glyph(
350  reinterpret_cast<PangoFcFont*>(font), *it);
351  if (!glyph_index) {
352  // Glyph for given unicode character doesn't exist in font.
353  return false;
354  }
355  // Find the ink glyph extents for the glyph
356  PangoRectangle ink_rect, logical_rect;
357  pango_font_get_glyph_extents(font, glyph_index, &ink_rect, &logical_rect);
358  pango_extents_to_pixels(&ink_rect, NULL);
359  pango_extents_to_pixels(&logical_rect, NULL);
360 
361  int bearing = total_advance + PANGO_LBEARING(ink_rect);
362  if (it == it_begin || bearing < min_bearing) {
363  min_bearing = bearing;
364  }
365  total_advance += PANGO_RBEARING(logical_rect);
366  }
367  *x_bearing = min_bearing;
368  *x_advance = total_advance;
369  return true;
370 }
static const_iterator begin(const char *utf8_str, const int byte_length)
Definition: unichar.cpp:200
#define NULL
Definition: host.h:144
static const_iterator end(const char *utf8_str, const int byte_length)
Definition: unichar.cpp:204
void tesseract::PangoFontInfo::InitFontConfig ( bool  force_clear,
const string &  fonts_dir 
)
static

Definition at line 117 of file pango_font_info.cpp.

117  {
118  if ((fontconfig_initialized_ && !force_clear) || fonts_dir.empty()) {
119  fontconfig_initialized_ = true;
120  return;
121  }
122  if (FLAGS_fontconfig_refresh_cache || force_clear) {
124  FLAGS_fontconfig_tmpdir.c_str(), "*cache-?").c_str());
125  }
126  if (FLAGS_fontconfig_refresh_config_file || FLAGS_fontconfig_refresh_cache ||
127  force_clear) {
128  const int MAX_FONTCONF_FILESIZE = 1024;
129  char fonts_conf_template[MAX_FONTCONF_FILESIZE];
130  snprintf(fonts_conf_template, MAX_FONTCONF_FILESIZE,
131  "<?xml version=\"1.0\"?>\n"
132  "<!DOCTYPE fontconfig SYSTEM \"fonts.dtd\">\n"
133  "<fontconfig>\n"
134  "<dir>%s</dir>\n"
135  "<cachedir>%s</cachedir>\n"
136  "<config></config>\n"
137  "</fontconfig>", fonts_dir.c_str(),
138  FLAGS_fontconfig_tmpdir.c_str());
139  string fonts_conf_file = File::JoinPath(FLAGS_fontconfig_tmpdir.c_str(),
140  "fonts.conf");
141  File::WriteStringToFileOrDie(fonts_conf_template, fonts_conf_file);
142  }
143 #ifdef _WIN32
144  std::string env("FONTCONFIG_PATH=");
145  env.append(FLAGS_fontconfig_tmpdir.c_str());
146  putenv(env.c_str());
147  putenv("LANG=en_US.utf8");
148 #else
149  setenv("FONTCONFIG_PATH", FLAGS_fontconfig_tmpdir.c_str(), true);
150  // Fix the locale so that the reported font names are consistent.
151  setenv("LANG", "en_US.utf8", true);
152 #endif // _WIN32
153  if (!fontconfig_initialized_ || force_clear) {
154  if (FcInitReinitialize() != FcTrue) {
155  tprintf("FcInitiReinitialize failed!!\n");
156  }
157  }
158  fontconfig_initialized_ = true;
160 }
#define tprintf(...)
Definition: tprintf.h:31
static string JoinPath(const string &prefix, const string &suffix)
Definition: fileio.cpp:89
static void WriteStringToFileOrDie(const string &str, const string &filename)
Definition: fileio.cpp:53
static bool DeleteMatchingFiles(const char *pattern)
Definition: fileio.cpp:118
const bool tesseract::PangoFontInfo::is_bold ( ) const
inline

Definition at line 97 of file pango_font_info.h.

97 { return is_bold_; }
const bool tesseract::PangoFontInfo::is_fraktur ( ) const
inline

Definition at line 101 of file pango_font_info.h.

101 { return is_fraktur_; }
const bool tesseract::PangoFontInfo::is_italic ( ) const
inline

Definition at line 98 of file pango_font_info.h.

98 { return is_italic_; }
const bool tesseract::PangoFontInfo::is_monospace ( ) const
inline

Definition at line 100 of file pango_font_info.h.

100 { return is_monospace_; }
const bool tesseract::PangoFontInfo::is_smallcaps ( ) const
inline

Definition at line 99 of file pango_font_info.h.

99 { return is_smallcaps_; }
bool tesseract::PangoFontInfo::ParseFontDescriptionName ( const string &  name)

Definition at line 227 of file pango_font_info.cpp.

227  {
228  PangoFontDescription *desc = pango_font_description_from_string(name.c_str());
229  bool success = ParseFontDescription(desc);
230  pango_font_description_free(desc);
231  return success;
232 }
name_table name
const int tesseract::PangoFontInfo::resolution ( ) const
inline

Definition at line 104 of file pango_font_info.h.

104 { return resolution_; }
void tesseract::PangoFontInfo::set_resolution ( const int  resolution)
inline

Definition at line 105 of file pango_font_info.h.

105  {
106  resolution_ = resolution;
107  }
const int resolution() const

Friends And Related Function Documentation

friend class FontUtils
friend

Definition at line 110 of file pango_font_info.h.


The documentation for this class was generated from the following files: