All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
pango_font_info.h
Go to the documentation of this file.
1 /**********************************************************************
2  * File: pango_font_info.h
3  * Description: Font-related objects and helper functions
4  * Author: Ranjith Unnikrishnan
5  * Created: Mon Nov 18 2013
6  *
7  * (C) Copyright 2013, Google Inc.
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  * http://www.apache.org/licenses/LICENSE-2.0
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  *
18  **********************************************************************/
19 
20 #ifndef TESSERACT_TRAINING_PANGO_FONT_INFO_H_
21 #define TESSERACT_TRAINING_PANGO_FONT_INFO_H_
22 
23 #include <string>
24 #include <utility>
25 #include <vector>
26 
27 #include "hashfn.h"
28 #include "host.h"
29 #include "util.h"
30 #include "pango/pango-font.h"
31 
32 typedef signed int char32;
33 
34 namespace tesseract {
35 
36 // Data holder class for a font, intented to avoid having to work with Pango or
37 // FontConfig-specific objects directly.
39  public:
40  enum FontTypeEnum {
45  };
46  PangoFontInfo();
47  // Initialize from parsing a font description name, defined as a string of the
48  // format:
49  // "FamilyName [FaceName] [PointSize]"
50  // where a missing FaceName implies the default regular face.
51  // eg. "Arial Italic 12", "Verdana"
52  //
53  // FaceName is a combination of:
54  // [StyleName] [Variant] [Weight] [Stretch]
55  // with (all optional) Pango-defined values of:
56  // StyleName: Oblique, Italic
57  // Variant : Small-Caps
58  // Weight : Ultra-Light, Light, Medium, Semi-Bold, Bold, Ultra-Bold, Heavy
59  // Stretch : Ultra-Condensed, Extra-Condensed, Condensed, Semi-Condensed,
60  // Semi-Expanded, Expanded, Extra-Expanded, Ultra-Expanded.
61  explicit PangoFontInfo(const string& name);
62  bool ParseFontDescriptionName(const string& name);
63 
64  // Returns true if the font have codepoint coverage for the specified text.
65  bool CoversUTF8Text(const char* utf8_text, int byte_length) const;
66  // Modifies string to remove unicode points that are not covered by the
67  // font. Returns the number of characters dropped.
68  int DropUncoveredChars(string* utf8_text) const;
69 
70  // Returns true if the entire string can be rendered by the font with full
71  // character coverage and no unknown glyph or dotted-circle glyph
72  // substitutions on encountering a badly formed unicode sequence.
73  // If true, returns individual graphemes. Any whitespace characters in the
74  // original string are also included in the list.
75  bool CanRenderString(const char* utf8_word, int len,
76  vector<string>* graphemes) const;
77  bool CanRenderString(const char* utf8_word, int len) const;
78 
79  // Retrieves the x_bearing and x_advance for the given utf8 character in the
80  // font. Returns false if the glyph for the character could not be found in
81  // the font.
82  // Ref: http://freetype.sourceforge.net/freetype2/docs/glyphs/glyphs-3.html
83  bool GetSpacingProperties(const string& utf8_char,
84  int* x_bearing, int* x_advance) const;
85 
86  // Initializes FontConfig by setting its environment variable and creating
87  // a fonts.conf file that points to the given fonts_dir. Once initialized,
88  // it is not re-initialized unless force_clear is true.
89  static void InitFontConfig(bool force_clear, const string& fonts_dir);
90 
91  // Accessors
92  string DescriptionName() const;
93  // Font Family name eg. "Arial"
94  const string& family_name() const { return family_name_; }
95  // Size in points (1/72"), rounded to the nearest integer.
96  const int font_size() const { return font_size_; }
97  const bool is_bold() const { return is_bold_; }
98  const bool is_italic() const { return is_italic_; }
99  const bool is_smallcaps() const { return is_smallcaps_; }
100  const bool is_monospace() const { return is_monospace_; }
101  const bool is_fraktur() const { return is_fraktur_; }
102  const FontTypeEnum font_type() const { return font_type_; }
103 
104  const int resolution() const { return resolution_; }
105  void set_resolution(const int resolution) {
106  resolution_ = resolution;
107  }
108 
109  private:
110  friend class FontUtils;
111  void Clear();
112  bool ParseFontDescription(const PangoFontDescription* desc);
113  // Returns the PangoFont structure corresponding to the closest available font
114  // in the font map.
115  PangoFont* ToPangoFont() const;
116 
117  // Font properties set automatically from parsing the font description name.
118  string family_name_;
119  int font_size_;
120  bool is_bold_;
121  bool is_italic_;
122  bool is_smallcaps_;
123  bool is_monospace_;
124  bool is_fraktur_;
125  FontTypeEnum font_type_;
126  // The Pango description that was used to initialize the instance.
127  PangoFontDescription* desc_;
128  // Default output resolution to assume for GetSpacingProperties() and any
129  // other methods that returns pixel values.
130  int resolution_;
131  // Fontconfig operates through an environment variable, so it intrinsically
132  // cannot be thread-friendly, but you can serialize multiple independent
133  // font configurations by calling InitFontConfig(true, path).
134  static bool fontconfig_initialized_;
135 
136  private:
138  void operator=(const PangoFontInfo&);
139 };
140 
141 // Static utility methods for querying font availability and font-selection
142 // based on codepoint coverage.
143 class FontUtils {
144  public:
145  // Returns true if the font of the given description name is available in the
146  // target directory specified by --fonts_dir
147  static bool IsAvailableFont(const char* font_desc) {
148  return IsAvailableFont(font_desc, NULL);
149  }
150  // Returns true if the font of the given description name is available in the
151  // target directory specified by --fonts_dir. If false is returned, and
152  // best_match is not NULL, the closest matching font is returned there.
153  static bool IsAvailableFont(const char* font_desc, string* best_match);
154  // Outputs description names of available fonts.
155  static const vector<string>& ListAvailableFonts();
156 
157  // Picks font among available fonts that covers and can render the given word,
158  // and returns the font description name and the decomposition of the word to
159  // graphemes. Returns false if no suitable font was found.
160  static bool SelectFont(const char* utf8_word, const int utf8_len,
161  string* font_name, vector<string>* graphemes);
162 
163  // Picks font among all_fonts that covers and can render the given word,
164  // and returns the font description name and the decomposition of the word to
165  // graphemes. Returns false if no suitable font was found.
166  static bool SelectFont(const char* utf8_word, const int utf8_len,
167  const vector<string>& all_fonts,
168  string* font_name, vector<string>* graphemes);
169 
170  // Returns a bitmask where the value of true at index 'n' implies that unicode
171  // value 'n' is renderable by at least one available font.
172  static void GetAllRenderableCharacters(vector<bool>* unichar_bitmap);
173  // Variant of the above function that inspects only the provided font names.
174  static void GetAllRenderableCharacters(const vector<string>& font_names,
175  vector<bool>* unichar_bitmap);
176  static void GetAllRenderableCharacters(const string& font_name,
177  vector<bool>* unichar_bitmap);
178 
179  // NOTE: The following utilities were written to be backward compatible with
180  // StringRender.
181 
182  // BestFonts returns a font name and a bit vector of the characters it
183  // can render for the fonts that score within some fraction of the best
184  // font on the characters in the given hash map.
185  // In the flags vector, each flag is set according to whether the
186  // corresponding character (in order of iterating ch_map) can be rendered.
187  // The return string is a list of the acceptable fonts that were used.
188  static string BestFonts(const unordered_map<char32, inT64>& ch_map,
189  vector<std::pair<const char*, vector<bool> > >* font_flag);
190 
191  // FontScore returns the weighted renderability score of the given
192  // hash map character table in the given font. The unweighted score
193  // is also returned in raw_score.
194  // The values in the bool vector ch_flags correspond to whether the
195  // corresponding character (in order of iterating ch_map) can be rendered.
196  static int FontScore(const unordered_map<char32, inT64>& ch_map,
197  const string& fontname, int* raw_score,
198  vector<bool>* ch_flags);
199 
200  // PangoFontInfo is reinitialized, so clear the static list of fonts.
201  static void ReInit();
202 
203  private:
204  static vector<string> available_fonts_; // cache list
205 };
206 } // namespace tesseract
207 
208 #endif // TESSERACT_TRAINING_PANGO_FONT_INFO_H_
const FontTypeEnum font_type() const
const bool is_italic() const
static string BestFonts(const unordered_map< char32, inT64 > &ch_map, vector< std::pair< const char *, vector< bool > > > *font_flag)
signed int char32
bool CoversUTF8Text(const char *utf8_text, int byte_length) const
bool GetSpacingProperties(const string &utf8_char, int *x_bearing, int *x_advance) const
const bool is_bold() const
static bool SelectFont(const char *utf8_word, const int utf8_len, string *font_name, vector< string > *graphemes)
static bool IsAvailableFont(const char *font_desc)
const bool is_fraktur() const
int DropUncoveredChars(string *utf8_text) const
const bool is_monospace() const
string DescriptionName() const
bool ParseFontDescriptionName(const string &name)
name_table name
const int font_size() const
static void InitFontConfig(bool force_clear, const string &fonts_dir)
static void GetAllRenderableCharacters(vector< bool > *unichar_bitmap)
static int FontScore(const unordered_map< char32, inT64 > &ch_map, const string &fontname, int *raw_score, vector< bool > *ch_flags)
bool CanRenderString(const char *utf8_word, int len, vector< string > *graphemes) const
#define NULL
Definition: host.h:144
const bool is_smallcaps() const
static const vector< string > & ListAvailableFonts()
void set_resolution(const int resolution)
const string & family_name() const
const int resolution() const