tesseract v5.3.3.20231005
pango_font_info.h
Go to the documentation of this file.
1/**********************************************************************
2 * File: pango_font_info.h
3 * Description: Font-related objects and helper functions
4 * Author: Ranjith Unnikrishnan
5 * Created: Mon Nov 18 2013
6 *
7 * (C) Copyright 2013, Google Inc.
8 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 *
18 **********************************************************************/
19
20#ifndef TESSERACT_TRAINING_PANGO_FONT_INFO_H_
21#define TESSERACT_TRAINING_PANGO_FONT_INFO_H_
22
23#include "export.h"
24
25#include "commandlineflags.h"
26
27#include "pango/pango-font.h"
28#include "pango/pango.h"
29#include "pango/pangocairo.h"
30
31#include <string>
32#include <unordered_map>
33#include <utility>
34#include <vector>
35
36using char32 = signed int;
37
38namespace tesseract {
39
40// Data holder class for a font, intended to avoid having to work with Pango or
41// FontConfig-specific objects directly.
42class TESS_PANGO_TRAINING_API PangoFontInfo {
43public:
49 };
52 // Initialize from parsing a font description name, defined as a string of the
53 // format:
54 // "FamilyName [FaceName] [PointSize]"
55 // where a missing FaceName implies the default regular face.
56 // eg. "Arial Italic 12", "Verdana"
57 //
58 // FaceName is a combination of:
59 // [StyleName] [Variant] [Weight] [Stretch]
60 // with (all optional) Pango-defined values of:
61 // StyleName: Oblique, Italic
62 // Variant : Small-Caps
63 // Weight : Ultra-Light, Light, Medium, Semi-Bold, Bold, Ultra-Bold, Heavy
64 // Stretch : Ultra-Condensed, Extra-Condensed, Condensed, Semi-Condensed,
65 // Semi-Expanded, Expanded, Extra-Expanded, Ultra-Expanded.
66 explicit PangoFontInfo(const std::string &name);
67 bool ParseFontDescriptionName(const std::string &name);
68
69 // Returns true if the font have codepoint coverage for the specified text.
70 bool CoversUTF8Text(const char *utf8_text, int byte_length) const;
71 // Modifies string to remove unicode points that are not covered by the
72 // font. Returns the number of characters dropped.
73 int DropUncoveredChars(std::string *utf8_text) const;
74
75 // Returns true if the entire string can be rendered by the font with full
76 // character coverage and no unknown glyph or dotted-circle glyph
77 // substitutions on encountering a badly formed unicode sequence.
78 // If true, returns individual graphemes. Any whitespace characters in the
79 // original string are also included in the list.
80 bool CanRenderString(const char *utf8_word, int len, std::vector<std::string> *graphemes) const;
81 bool CanRenderString(const char *utf8_word, int len) const;
82
83 // Retrieves the x_bearing and x_advance for the given utf8 character in the
84 // font. Returns false if the glyph for the character could not be found in
85 // the font.
86 // Ref: http://freetype.sourceforge.net/freetype2/docs/glyphs/glyphs-3.html
87 bool GetSpacingProperties(const std::string &utf8_char, int *x_bearing, int *x_advance) const;
88
89 // If not already initialized, initializes FontConfig by setting its
90 // environment variable and creating a fonts.conf file that points to the
91 // FLAGS_fonts_dir and the cache to FLAGS_fontconfig_tmpdir.
92 static void SoftInitFontConfig();
93 // Re-initializes font config, whether or not already initialized.
94 // If already initialized, any existing cache is deleted, just to be sure.
95 static void HardInitFontConfig(const char *fonts_dir, const char *cache_dir);
96
97 // Accessors
98 std::string DescriptionName() const;
99 // Font Family name eg. "Arial"
100 const std::string &family_name() const {
101 return family_name_;
102 }
103 // Size in points (1/72"), rounded to the nearest integer.
104 int font_size() const {
105 return font_size_;
106 }
108 return font_type_;
109 }
110
111 int resolution() const {
112 return resolution_;
113 }
114 void set_resolution(const int resolution) {
115 resolution_ = resolution;
116 }
117
118private:
119 friend class FontUtils;
120 void Clear();
121 bool ParseFontDescription(const PangoFontDescription *desc);
122 // Returns the PangoFont structure corresponding to the closest available font
123 // in the font map.
124 PangoFont *ToPangoFont() const;
125
126 // Font properties set automatically from parsing the font description name.
127 std::string family_name_;
128 int font_size_;
129 FontTypeEnum font_type_;
130 // The Pango description that was used to initialize the instance.
131 PangoFontDescription *desc_;
132 // Default output resolution to assume for GetSpacingProperties() and any
133 // other methods that returns pixel values.
134 int resolution_;
135 // Fontconfig operates through an environment variable, so it intrinsically
136 // cannot be thread-friendly, but you can serialize multiple independent
137 // font configurations by calling HardInitFontConfig(fonts_dir, cache_dir).
138 // These hold the last initialized values set by HardInitFontConfig or
139 // the first call to SoftInitFontConfig.
140 // Directory to be scanned for font files.
141 static std::string fonts_dir_;
142 // Directory to store the cache of font information. (Can be the same as
143 // fonts_dir_)
144 static std::string cache_dir_;
145
146private:
147 PangoFontInfo(const PangoFontInfo &) = delete;
148 void operator=(const PangoFontInfo &) = delete;
149};
150
151// Static utility methods for querying font availability and font-selection
152// based on codepoint coverage.
153class TESS_PANGO_TRAINING_API FontUtils {
154public:
155 // Returns true if the font of the given description name is available in the
156 // target directory specified by --fonts_dir
157 static bool IsAvailableFont(const char *font_desc) {
158 return IsAvailableFont(font_desc, nullptr);
159 }
160 // Returns true if the font of the given description name is available in the
161 // target directory specified by --fonts_dir. If false is returned, and
162 // best_match is not nullptr, the closest matching font is returned there.
163 static bool IsAvailableFont(const char *font_desc, std::string *best_match);
164 // Outputs description names of available fonts.
165 static const std::vector<std::string> &ListAvailableFonts();
166
167 // Picks font among available fonts that covers and can render the given word,
168 // and returns the font description name and the decomposition of the word to
169 // graphemes. Returns false if no suitable font was found.
170 static bool SelectFont(const char *utf8_word, const int utf8_len, std::string *font_name,
171 std::vector<std::string> *graphemes);
172
173 // Picks font among all_fonts that covers and can render the given word,
174 // and returns the font description name and the decomposition of the word to
175 // graphemes. Returns false if no suitable font was found.
176 static bool SelectFont(const char *utf8_word, const int utf8_len,
177 const std::vector<std::string> &all_fonts, std::string *font_name,
178 std::vector<std::string> *graphemes);
179
180 // NOTE: The following utilities were written to be backward compatible with
181 // StringRender.
182
183 // BestFonts returns a font name and a bit vector of the characters it
184 // can render for the fonts that score within some fraction of the best
185 // font on the characters in the given hash map.
186 // In the flags vector, each flag is set according to whether the
187 // corresponding character (in order of iterating ch_map) can be rendered.
188 // The return string is a list of the acceptable fonts that were used.
189 static std::string BestFonts(const std::unordered_map<char32, int64_t> &ch_map,
190 std::vector<std::pair<const char *, std::vector<bool>>> *font_flag);
191
192 // FontScore returns the weighted renderability score of the given
193 // hash map character table in the given font. The unweighted score
194 // is also returned in raw_score.
195 // The values in the bool vector ch_flags correspond to whether the
196 // corresponding character (in order of iterating ch_map) can be rendered.
197 static int FontScore(const std::unordered_map<char32, int64_t> &ch_map,
198 const std::string &fontname, int *raw_score, std::vector<bool> *ch_flags);
199
200 // PangoFontInfo is reinitialized, so clear the static list of fonts.
201 static void ReInit();
202 static void PangoFontTypeInfo();
203
204private:
205 static std::vector<std::string> available_fonts_; // cache list
206};
207} // namespace tesseract
208
209#endif // TESSERACT_TRAINING_PANGO_FONT_INFO_H_
signed int char32
void set_resolution(const int resolution)
FontTypeEnum font_type() const
const std::string & family_name() const
static bool IsAvailableFont(const char *font_desc)