tesseract  4.00.00dev
tesseract::FontUtils Class Reference

#include <pango_font_info.h>

Static Public Member Functions

static bool IsAvailableFont (const char *font_desc)
 
static bool IsAvailableFont (const char *font_desc, string *best_match)
 
static const std::vector< string > & ListAvailableFonts ()
 
static bool SelectFont (const char *utf8_word, const int utf8_len, string *font_name, std::vector< string > *graphemes)
 
static bool SelectFont (const char *utf8_word, const int utf8_len, const std::vector< string > &all_fonts, string *font_name, std::vector< string > *graphemes)
 
static void GetAllRenderableCharacters (std::vector< bool > *unichar_bitmap)
 
static void GetAllRenderableCharacters (const std::vector< string > &font_names, std::vector< bool > *unichar_bitmap)
 
static void GetAllRenderableCharacters (const string &font_name, std::vector< bool > *unichar_bitmap)
 
static string BestFonts (const std::unordered_map< char32, inT64 > &ch_map, std::vector< std::pair< const char *, std::vector< bool > > > *font_flag)
 
static int FontScore (const std::unordered_map< char32, inT64 > &ch_map, const string &fontname, int *raw_score, std::vector< bool > *ch_flags)
 
static void ReInit ()
 

Detailed Description

Definition at line 150 of file pango_font_info.h.

Member Function Documentation

◆ BestFonts()

string tesseract::FontUtils::BestFonts ( const std::unordered_map< char32, inT64 > &  ch_map,
std::vector< std::pair< const char *, std::vector< bool > > > *  font_flag 
)
static

Definition at line 687 of file pango_font_info.cpp.

689  {
690  const double kMinOKFraction = 0.99;
691  // Weighted fraction of characters that must be renderable in a font to make
692  // it OK even if the raw count is not good.
693  const double kMinWeightedFraction = 0.99995;
694 
695  fonts->clear();
696  std::vector<std::vector<bool> > font_flags;
697  std::vector<int> font_scores;
698  std::vector<int> raw_scores;
699  int most_ok_chars = 0;
700  int best_raw_score = 0;
701  const std::vector<string>& font_names = FontUtils::ListAvailableFonts();
702  for (unsigned i = 0; i < font_names.size(); ++i) {
703  std::vector<bool> ch_flags;
704  int raw_score = 0;
705  int ok_chars = FontScore(ch_map, font_names[i], &raw_score, &ch_flags);
706  most_ok_chars = MAX(ok_chars, most_ok_chars);
707  best_raw_score = MAX(raw_score, best_raw_score);
708 
709  font_flags.push_back(ch_flags);
710  font_scores.push_back(ok_chars);
711  raw_scores.push_back(raw_score);
712  }
713 
714  // Now select the fonts with a score above a threshold fraction
715  // of both the raw and weighted best scores. To prevent bogus fonts being
716  // selected for CJK, we require a high fraction (kMinOKFraction = 0.99) of
717  // BOTH weighted and raw scores.
718  // In low character-count scripts, the issue is more getting enough fonts,
719  // when only 1 or 2 might have all those rare dingbats etc in them, so we
720  // allow a font with a very high weighted (coverage) score
721  // (kMinWeightedFraction = 0.99995) to be used even if its raw score is poor.
722  int least_good_enough = static_cast<int>(most_ok_chars * kMinOKFraction);
723  int least_raw_enough = static_cast<int>(best_raw_score * kMinOKFraction);
724  int override_enough = static_cast<int>(most_ok_chars * kMinWeightedFraction);
725 
726  string font_list;
727  for (unsigned i = 0; i < font_names.size(); ++i) {
728  int score = font_scores[i];
729  int raw_score = raw_scores[i];
730  if ((score >= least_good_enough && raw_score >= least_raw_enough) ||
731  score >= override_enough) {
732  fonts->push_back(std::make_pair(font_names[i].c_str(), font_flags[i]));
733  tlog(1, "OK font %s = %.4f%%, raw = %d = %.2f%%\n",
734  font_names[i].c_str(),
735  100.0 * score / most_ok_chars,
736  raw_score, 100.0 * raw_score / best_raw_score);
737  font_list += font_names[i];
738  font_list += "\n";
739  } else if (score >= least_good_enough || raw_score >= least_raw_enough) {
740  tlog(1, "Runner-up font %s = %.4f%%, raw = %d = %.2f%%\n",
741  font_names[i].c_str(),
742  100.0 * score / most_ok_chars,
743  raw_score, 100.0 * raw_score / best_raw_score);
744  }
745  }
746  return font_list;
747 }
static const std::vector< string > & ListAvailableFonts()
#define MAX(x, y)
Definition: ndminx.h:24
static int FontScore(const std::unordered_map< char32, inT64 > &ch_map, const string &fontname, int *raw_score, std::vector< bool > *ch_flags)
#define tlog(level,...)
Definition: tlog.h:33

◆ FontScore()

int tesseract::FontUtils::FontScore ( const std::unordered_map< char32, inT64 > &  ch_map,
const string &  fontname,
int *  raw_score,
std::vector< bool > *  ch_flags 
)
static

Definition at line 651 of file pango_font_info.cpp.

653  {
654  PangoFontInfo font_info;
655  if (!font_info.ParseFontDescriptionName(fontname)) {
656  tprintf("ERROR: Could not parse %s\n", fontname.c_str());
657  }
658  PangoFont* font = font_info.ToPangoFont();
659  PangoCoverage* coverage = pango_font_get_coverage(font, nullptr);
660 
661  if (ch_flags) {
662  ch_flags->clear();
663  ch_flags->reserve(ch_map.size());
664  }
665  *raw_score = 0;
666  int ok_chars = 0;
667  for (std::unordered_map<char32, inT64>::const_iterator it = ch_map.begin();
668  it != ch_map.end(); ++it) {
669  bool covered = (IsWhitespace(it->first) ||
670  (pango_coverage_get(coverage, it->first)
671  == PANGO_COVERAGE_EXACT));
672  if (covered) {
673  ++(*raw_score);
674  ok_chars += it->second;
675  }
676  if (ch_flags) {
677  ch_flags->push_back(covered);
678  }
679  }
680  pango_coverage_unref(coverage);
681  g_object_unref(font);
682  return ok_chars;
683 }
bool IsWhitespace(const char32 ch)
Definition: normstrngs.cpp:223
#define tprintf(...)
Definition: tprintf.h:31

◆ GetAllRenderableCharacters() [1/3]

void tesseract::FontUtils::GetAllRenderableCharacters ( std::vector< bool > *  unichar_bitmap)
static

Definition at line 612 of file pango_font_info.cpp.

612  {
613  const std::vector<string>& all_fonts = ListAvailableFonts();
614  return GetAllRenderableCharacters(all_fonts, unichar_bitmap);
615 }
static const std::vector< string > & ListAvailableFonts()
static void GetAllRenderableCharacters(std::vector< bool > *unichar_bitmap)

◆ GetAllRenderableCharacters() [2/3]

void tesseract::FontUtils::GetAllRenderableCharacters ( const std::vector< string > &  font_names,
std::vector< bool > *  unichar_bitmap 
)
static

Definition at line 629 of file pango_font_info.cpp.

630  {
631  // Form the union of coverage maps from the fonts
632  PangoCoverage* all_coverage = pango_coverage_new();
633  tlog(1, "Processing %u fonts\n", static_cast<unsigned>(fonts.size()));
634  for (unsigned i = 0; i < fonts.size(); ++i) {
635  PangoFontInfo font_info(fonts[i]);
636  PangoFont* font = font_info.ToPangoFont();
637  PangoCoverage* coverage = pango_font_get_coverage(font, nullptr);
638  // Mark off characters that any font can render.
639  pango_coverage_max(all_coverage, coverage);
640  pango_coverage_unref(coverage);
641  g_object_unref(font);
642  }
643  CharCoverageMapToBitmap(all_coverage, unichar_bitmap);
644  pango_coverage_unref(all_coverage);
645 }
#define tlog(level,...)
Definition: tlog.h:33

◆ GetAllRenderableCharacters() [3/3]

void tesseract::FontUtils::GetAllRenderableCharacters ( const string &  font_name,
std::vector< bool > *  unichar_bitmap 
)
static

Definition at line 618 of file pango_font_info.cpp.

619  {
620  PangoFontInfo font_info(font_name);
621  PangoFont* font = font_info.ToPangoFont();
622  PangoCoverage* coverage = pango_font_get_coverage(font, nullptr);
623  CharCoverageMapToBitmap(coverage, unichar_bitmap);
624  pango_coverage_unref(coverage);
625  g_object_unref(font);
626 }

◆ IsAvailableFont() [1/2]

static bool tesseract::FontUtils::IsAvailableFont ( const char *  font_desc)
inlinestatic

Definition at line 154 of file pango_font_info.h.

154  {
155  return IsAvailableFont(font_desc, nullptr);
156  }
static bool IsAvailableFont(const char *font_desc)

◆ IsAvailableFont() [2/2]

bool tesseract::FontUtils::IsAvailableFont ( const char *  font_desc,
string *  best_match 
)
static

Definition at line 478 of file pango_font_info.cpp.

479  {
480  string query_desc(input_query_desc);
481 #if (PANGO_VERSION <= 12005)
482  // Strip commas and any ' Medium' substring in the name.
483  query_desc.erase(std::remove(query_desc.begin(), query_desc.end(), ','),
484  query_desc.end());
485  const string kMediumStr = " Medium";
486  std::size_t found = query_desc.find(kMediumStr);
487  if (found != std::string::npos) {
488  query_desc.erase(found, kMediumStr.length());
489  }
490 #endif
491  PangoFontDescription *desc = pango_font_description_from_string(
492  query_desc.c_str());
493  PangoFont* selected_font = nullptr;
494  {
496  PangoFontMap* font_map = pango_cairo_font_map_get_default();
497  PangoContext* context = pango_context_new();
498  pango_context_set_font_map(context, font_map);
499  {
501  selected_font = pango_font_map_load_font(font_map, context, desc);
502  }
503  g_object_unref(context);
504  }
505  if (selected_font == nullptr) {
506  pango_font_description_free(desc);
507  return false;
508  }
509  PangoFontDescription* selected_desc = pango_font_describe(selected_font);
510 
511  bool equal = pango_font_description_equal(desc, selected_desc);
512  tlog(3, "query weight = %d \t selected weight =%d\n",
513  pango_font_description_get_weight(desc),
514  pango_font_description_get_weight(selected_desc));
515 
516  char* selected_desc_str = pango_font_description_to_string(selected_desc);
517  tlog(2, "query_desc: '%s' Selected: '%s'\n", query_desc.c_str(),
518  selected_desc_str);
519  if (!equal && best_match != nullptr) {
520  *best_match = selected_desc_str;
521  // Clip the ending ' 0' if there is one. It seems that, if there is no
522  // point size on the end of the fontname, then Pango always appends ' 0'.
523  int len = best_match->size();
524  if (len > 2 && best_match->at(len - 1) == '0' &&
525  best_match->at(len - 2) == ' ') {
526  *best_match = best_match->substr(0, len - 2);
527  }
528  }
529  g_free(selected_desc_str);
530  pango_font_description_free(selected_desc);
531  g_object_unref(selected_font);
532  pango_font_description_free(desc);
533  return equal;
534 }
#define tlog(level,...)
Definition: tlog.h:33
#define DISABLE_HEAP_LEAK_CHECK
Definition: util.h:60

◆ ListAvailableFonts()

const std::vector< string > & tesseract::FontUtils::ListAvailableFonts ( )
static

Definition at line 549 of file pango_font_info.cpp.

549  {
550  if (!available_fonts_.empty()) {
551  return available_fonts_;
552  }
553 #ifdef GOOGLE_TESSERACT
554  if (FLAGS_use_only_legacy_fonts) {
555  // Restrict view to list of fonts in legacy_fonts.h
556  tprintf("Using list of legacy fonts only\n");
557  const int kNumFontLists = 4;
558  for (int i = 0; i < kNumFontLists; ++i) {
559  for (int j = 0; kFontlists[i][j] != nullptr; ++j) {
560  available_fonts_.push_back(kFontlists[i][j]);
561  }
562  }
563  return available_fonts_;
564  }
565 #endif
566 
567  PangoFontFamily** families = 0;
568  int n_families = 0;
569  ListFontFamilies(&families, &n_families);
570  for (int i = 0; i < n_families; ++i) {
571  const char* family_name = pango_font_family_get_name(families[i]);
572  tlog(2, "Listing family %s\n", family_name);
573  if (ShouldIgnoreFontFamilyName(family_name)) {
574  continue;
575  }
576 
577  int n_faces;
578  PangoFontFace** faces = nullptr;
579  pango_font_family_list_faces(families[i], &faces, &n_faces);
580  for (int j = 0; j < n_faces; ++j) {
581  PangoFontDescription* desc = pango_font_face_describe(faces[j]);
582  char* desc_str = pango_font_description_to_string(desc);
583  if (IsAvailableFont(desc_str)) {
584  available_fonts_.push_back(desc_str);
585  }
586  pango_font_description_free(desc);
587  g_free(desc_str);
588  }
589  g_free(faces);
590  }
591  g_free(families);
592  std::sort(available_fonts_.begin(), available_fonts_.end());
593  return available_fonts_;
594 }
#define tprintf(...)
Definition: tprintf.h:31
#define tlog(level,...)
Definition: tlog.h:33
static bool IsAvailableFont(const char *font_desc)

◆ ReInit()

void tesseract::FontUtils::ReInit ( )
static

Definition at line 779 of file pango_font_info.cpp.

779 { available_fonts_.clear(); }

◆ SelectFont() [1/2]

bool tesseract::FontUtils::SelectFont ( const char *  utf8_word,
const int  utf8_len,
string *  font_name,
std::vector< string > *  graphemes 
)
static

Definition at line 750 of file pango_font_info.cpp.

751  {
752  return SelectFont(utf8_word, utf8_len, ListAvailableFonts(), font_name,
753  graphemes);
754 }
static const std::vector< string > & ListAvailableFonts()
static bool SelectFont(const char *utf8_word, const int utf8_len, string *font_name, std::vector< string > *graphemes)

◆ SelectFont() [2/2]

bool tesseract::FontUtils::SelectFont ( const char *  utf8_word,
const int  utf8_len,
const std::vector< string > &  all_fonts,
string *  font_name,
std::vector< string > *  graphemes 
)
static

Definition at line 757 of file pango_font_info.cpp.

759  {
760  if (font_name) font_name->clear();
761  if (graphemes) graphemes->clear();
762  for (unsigned i = 0; i < all_fonts.size(); ++i) {
763  PangoFontInfo font;
764  std::vector<string> found_graphemes;
765  ASSERT_HOST_MSG(font.ParseFontDescriptionName(all_fonts[i]),
766  "Could not parse font desc name %s\n",
767  all_fonts[i].c_str());
768  if (font.CanRenderString(utf8_word, utf8_len, &found_graphemes)) {
769  if (graphemes) graphemes->swap(found_graphemes);
770  if (font_name) *font_name = all_fonts[i];
771  return true;
772  }
773  }
774  return false;
775 }
#define ASSERT_HOST_MSG(x,...)
Definition: errcode.h:90

The documentation for this class was generated from the following files: