All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
tesseract::FontUtils Class Reference

#include <pango_font_info.h>

Static Public Member Functions

static bool IsAvailableFont (const char *font_desc)
 
static bool IsAvailableFont (const char *font_desc, string *best_match)
 
static const vector< string > & ListAvailableFonts ()
 
static bool SelectFont (const char *utf8_word, const int utf8_len, string *font_name, vector< string > *graphemes)
 
static bool SelectFont (const char *utf8_word, const int utf8_len, const vector< string > &all_fonts, string *font_name, vector< string > *graphemes)
 
static void GetAllRenderableCharacters (vector< bool > *unichar_bitmap)
 
static void GetAllRenderableCharacters (const vector< string > &font_names, vector< bool > *unichar_bitmap)
 
static void GetAllRenderableCharacters (const string &font_name, vector< bool > *unichar_bitmap)
 
static string BestFonts (const unordered_map< char32, inT64 > &ch_map, vector< std::pair< const char *, vector< bool > > > *font_flag)
 
static int FontScore (const unordered_map< char32, inT64 > &ch_map, const string &fontname, int *raw_score, vector< bool > *ch_flags)
 
static void ReInit ()
 

Detailed Description

Definition at line 143 of file pango_font_info.h.

Member Function Documentation

string tesseract::FontUtils::BestFonts ( const unordered_map< char32, inT64 > &  ch_map,
vector< std::pair< const char *, vector< bool > > > *  font_flag 
)
static

Definition at line 702 of file pango_font_info.cpp.

703  {
704  const double kMinOKFraction = 0.99;
705  // Weighted fraction of characters that must be renderable in a font to make
706  // it OK even if the raw count is not good.
707  const double kMinWeightedFraction = 0.99995;
708 
709  fonts->clear();
710  vector<vector<bool> > font_flags;
711  vector<int> font_scores;
712  vector<int> raw_scores;
713  int most_ok_chars = 0;
714  int best_raw_score = 0;
715  const vector<string>& font_names = FontUtils::ListAvailableFonts();
716  for (int i = 0; i < font_names.size(); ++i) {
717  vector<bool> ch_flags;
718  int raw_score = 0;
719  int ok_chars = FontScore(ch_map, font_names[i], &raw_score, &ch_flags);
720  most_ok_chars = MAX(ok_chars, most_ok_chars);
721  best_raw_score = MAX(raw_score, best_raw_score);
722 
723  font_flags.push_back(ch_flags);
724  font_scores.push_back(ok_chars);
725  raw_scores.push_back(raw_score);
726  }
727 
728  // Now select the fonts with a score above a threshold fraction
729  // of both the raw and weighted best scores. To prevent bogus fonts being
730  // selected for CJK, we require a high fraction (kMinOKFraction = 0.99) of
731  // BOTH weighted and raw scores.
732  // In low character-count scripts, the issue is more getting enough fonts,
733  // when only 1 or 2 might have all those rare dingbats etc in them, so we
734  // allow a font with a very high weighted (coverage) score
735  // (kMinWeightedFraction = 0.99995) to be used even if its raw score is poor.
736  int least_good_enough = static_cast<int>(most_ok_chars * kMinOKFraction);
737  int least_raw_enough = static_cast<int>(best_raw_score * kMinOKFraction);
738  int override_enough = static_cast<int>(most_ok_chars * kMinWeightedFraction);
739 
740  string font_list;
741  for (int i = 0; i < font_names.size(); ++i) {
742  int score = font_scores[i];
743  int raw_score = raw_scores[i];
744  if ((score >= least_good_enough && raw_score >= least_raw_enough) ||
745  score >= override_enough) {
746  fonts->push_back(make_pair(font_names[i].c_str(), font_flags[i]));
747  tlog(1, "OK font %s = %.4f%%, raw = %d = %.2f%%\n",
748  font_names[i].c_str(),
749  100.0 * score / most_ok_chars,
750  raw_score, 100.0 * raw_score / best_raw_score);
751  font_list += font_names[i];
752  font_list += "\n";
753  } else if (score >= least_good_enough || raw_score >= least_raw_enough) {
754  tlog(1, "Runner-up font %s = %.4f%%, raw = %d = %.2f%%\n",
755  font_names[i].c_str(),
756  100.0 * score / most_ok_chars,
757  raw_score, 100.0 * raw_score / best_raw_score);
758  }
759  }
760  return font_list;
761 }
#define MAX(x, y)
Definition: ndminx.h:24
static int FontScore(const unordered_map< char32, inT64 > &ch_map, const string &fontname, int *raw_score, vector< bool > *ch_flags)
#define tlog(level,...)
Definition: tlog.h:33
static const vector< string > & ListAvailableFonts()
int tesseract::FontUtils::FontScore ( const unordered_map< char32, inT64 > &  ch_map,
const string &  fontname,
int *  raw_score,
vector< bool > *  ch_flags 
)
static

Definition at line 667 of file pango_font_info.cpp.

670  {
671  PangoFontInfo font_info;
672  if (!font_info.ParseFontDescriptionName(fontname)) {
673  tprintf("ERROR: Could not parse %s\n", fontname.c_str());
674  }
675  PangoFont* font = font_info.ToPangoFont();
676  PangoCoverage* coverage = pango_font_get_coverage(font, NULL);
677 
678  if (ch_flags) {
679  ch_flags->clear();
680  ch_flags->reserve(ch_map.size());
681  }
682  *raw_score = 0;
683  int ok_chars = 0;
684  for (unordered_map<char32, inT64>::const_iterator it = ch_map.begin();
685  it != ch_map.end(); ++it) {
686  bool covered = (IsWhitespace(it->first) ||
687  (pango_coverage_get(coverage, it->first)
688  == PANGO_COVERAGE_EXACT));
689  if (covered) {
690  ++(*raw_score);
691  ok_chars += it->second;
692  }
693  if (ch_flags) {
694  ch_flags->push_back(covered);
695  }
696  }
697  return ok_chars;
698 }
#define tprintf(...)
Definition: tprintf.h:31
#define NULL
Definition: host.h:144
bool IsWhitespace(const char32 ch)
Definition: normstrngs.cpp:176
void tesseract::FontUtils::GetAllRenderableCharacters ( vector< bool > *  unichar_bitmap)
static

Definition at line 632 of file pango_font_info.cpp.

632  {
633  const vector<string>& all_fonts = ListAvailableFonts();
634  return GetAllRenderableCharacters(all_fonts, unichar_bitmap);
635 }
static void GetAllRenderableCharacters(vector< bool > *unichar_bitmap)
static const vector< string > & ListAvailableFonts()
void tesseract::FontUtils::GetAllRenderableCharacters ( const vector< string > &  font_names,
vector< bool > *  unichar_bitmap 
)
static

Definition at line 647 of file pango_font_info.cpp.

648  {
649  // Form the union of coverage maps from the fonts
650  PangoCoverage* all_coverage = pango_coverage_new();
651  tlog(1, "Processing %d fonts\n", fonts.size());
652  for (int i = 0; i < fonts.size(); ++i) {
653  PangoFontInfo font_info(fonts[i]);
654  PangoCoverage* coverage = pango_font_get_coverage(
655  font_info.ToPangoFont(), NULL);
656  // Mark off characters that any font can render.
657  pango_coverage_max(all_coverage, coverage);
658  }
659  CharCoverageMapToBitmap(all_coverage, unichar_bitmap);
660  pango_coverage_unref(all_coverage);
661 }
#define tlog(level,...)
Definition: tlog.h:33
#define NULL
Definition: host.h:144
void tesseract::FontUtils::GetAllRenderableCharacters ( const string &  font_name,
vector< bool > *  unichar_bitmap 
)
static

Definition at line 638 of file pango_font_info.cpp.

639  {
640  PangoFontInfo font_info(font_name);
641  PangoCoverage* coverage = pango_font_get_coverage(
642  font_info.ToPangoFont(), NULL);
643  CharCoverageMapToBitmap(coverage, unichar_bitmap);
644 }
#define NULL
Definition: host.h:144
static bool tesseract::FontUtils::IsAvailableFont ( const char *  font_desc)
inlinestatic

Definition at line 147 of file pango_font_info.h.

147  {
148  return IsAvailableFont(font_desc, NULL);
149  }
static bool IsAvailableFont(const char *font_desc)
#define NULL
Definition: host.h:144
bool tesseract::FontUtils::IsAvailableFont ( const char *  font_desc,
string *  best_match 
)
static

Definition at line 497 of file pango_font_info.cpp.

498  {
499  string query_desc(input_query_desc);
500  if (PANGO_VERSION <= 12005) {
501  // Strip commas and any ' Medium' substring in the name.
502  query_desc.erase(std::remove(query_desc.begin(), query_desc.end(), ','),
503  query_desc.end());
504  const string kMediumStr = " Medium";
505  std::size_t found = query_desc.find(kMediumStr);
506  if (found != std::string::npos) {
507  query_desc.erase(found, kMediumStr.length());
508  }
509  }
510 
511  PangoFontDescription *desc = pango_font_description_from_string(
512  query_desc.c_str());
513  PangoFont* selected_font = NULL;
514  {
515  PangoFontInfo::InitFontConfig(false, FLAGS_fonts_dir.c_str());
516  PangoFontMap* font_map = pango_cairo_font_map_get_default();
517  PangoContext* context = pango_context_new();
518  pango_context_set_font_map(context, font_map);
519  {
521  selected_font = pango_font_map_load_font(font_map, context, desc);
522  }
523  g_object_unref(context);
524  }
525  if (selected_font == NULL) {
526  pango_font_description_free(desc);
527  return false;
528  }
529  PangoFontDescription* selected_desc = pango_font_describe(selected_font);
530 
531  bool equal = pango_font_description_equal(desc, selected_desc);
532  tlog(3, "query weight = %d \t selected weight =%d\n",
533  pango_font_description_get_weight(desc),
534  pango_font_description_get_weight(selected_desc));
535 
536  char* selected_desc_str = pango_font_description_to_string(selected_desc);
537  tlog(2, "query_desc: '%s' Selected: 's'\n", query_desc.c_str(),
538  selected_desc_str);
539  if (!equal && best_match != NULL) {
540  *best_match = selected_desc_str;
541  // Clip the ending ' 0' if there is one. It seems that, if there is no
542  // point size on the end of the fontname, then Pango always appends ' 0'.
543  int len = best_match->size();
544  if (len > 2 && best_match->at(len - 1) == '0' &&
545  best_match->at(len - 2) == ' ') {
546  *best_match = best_match->substr(0, len - 2);
547  }
548  }
549  g_free(selected_desc_str);
550  pango_font_description_free(selected_desc);
551  g_object_unref(selected_font);
552  pango_font_description_free(desc);
553  return equal;
554 }
#define DISABLE_HEAP_LEAK_CHECK
Definition: util.h:63
static void InitFontConfig(bool force_clear, const string &fonts_dir)
#define tlog(level,...)
Definition: tlog.h:33
#define NULL
Definition: host.h:144
const vector< string > & tesseract::FontUtils::ListAvailableFonts ( )
static

Definition at line 569 of file pango_font_info.cpp.

569  {
570  if (available_fonts_.size()) {
571  return available_fonts_;
572  }
573 #ifndef USE_STD_NAMESPACE
574  if (FLAGS_use_only_legacy_fonts) {
575  // Restrict view to list of fonts in legacy_fonts.h
576  tprintf("Using list of legacy fonts only\n");
577  const int kNumFontLists = 4;
578  for (int i = 0; i < kNumFontLists; ++i) {
579  for (int j = 0; kFontlists[i][j] != NULL; ++j) {
580  available_fonts_.push_back(kFontlists[i][j]);
581  }
582  }
583  return available_fonts_;
584  }
585 #endif
586 
587  PangoFontFamily** families = 0;
588  int n_families = 0;
589  ListFontFamilies(&families, &n_families);
590  for (int i = 0; i < n_families; ++i) {
591  const char* family_name = pango_font_family_get_name(families[i]);
592  tlog(2, "Listing family %s\n", family_name);
593  if (ShouldIgnoreFontFamilyName(family_name)) {
594  continue;
595  }
596 
597  int n_faces;
598  PangoFontFace** faces = NULL;
599  pango_font_family_list_faces(families[i], &faces, &n_faces);
600  for (int j = 0; j < n_faces; ++j) {
601  PangoFontDescription* desc = pango_font_face_describe(faces[j]);
602  char* desc_str = pango_font_description_to_string(desc);
603  if (IsAvailableFont(desc_str)) {
604  available_fonts_.push_back(desc_str);
605  }
606  pango_font_description_free(desc);
607  g_free(desc_str);
608  }
609  g_free(faces);
610  }
611  g_free(families);
612  sort(available_fonts_.begin(), available_fonts_.end());
613  return available_fonts_;
614 }
#define tprintf(...)
Definition: tprintf.h:31
static bool IsAvailableFont(const char *font_desc)
#define tlog(level,...)
Definition: tlog.h:33
#define NULL
Definition: host.h:144
void tesseract::FontUtils::ReInit ( )
static

Definition at line 793 of file pango_font_info.cpp.

793 { available_fonts_.clear(); }
bool tesseract::FontUtils::SelectFont ( const char *  utf8_word,
const int  utf8_len,
string *  font_name,
vector< string > *  graphemes 
)
static

Definition at line 764 of file pango_font_info.cpp.

765  {
766  return SelectFont(utf8_word, utf8_len, ListAvailableFonts(), font_name,
767  graphemes);
768 }
static bool SelectFont(const char *utf8_word, const int utf8_len, string *font_name, vector< string > *graphemes)
static const vector< string > & ListAvailableFonts()
bool tesseract::FontUtils::SelectFont ( const char *  utf8_word,
const int  utf8_len,
const vector< string > &  all_fonts,
string *  font_name,
vector< string > *  graphemes 
)
static

Definition at line 771 of file pango_font_info.cpp.

773  {
774  if (font_name) font_name->clear();
775  if (graphemes) graphemes->clear();
776  for (int i = 0; i < all_fonts.size(); ++i) {
777  PangoFontInfo font;
778  vector<string> found_graphemes;
779  ASSERT_HOST_MSG(font.ParseFontDescriptionName(all_fonts[i]),
780  "Could not parse font desc name %s\n",
781  all_fonts[i].c_str());
782  if (font.CanRenderString(utf8_word, utf8_len, &found_graphemes)) {
783  if (graphemes) graphemes->swap(found_graphemes);
784  if (font_name) *font_name = all_fonts[i];
785  return true;
786  }
787  }
788  return false;
789 }
#define ASSERT_HOST_MSG(x, msg...)
Definition: errcode.h:98

The documentation for this class was generated from the following files: