21#ifndef TESSERACT_CCUTIL_NORMSTRNGS_H_
22#define TESSERACT_CCUTIL_NORMSTRNGS_H_
59TESS_UNICHARSET_TRAINING_API
62 std::string *normalized);
68TESS_UNICHARSET_TRAINING_API
71 std::vector<std::string> *graphemes);
83TESS_UNICHARSET_TRAINING_API
88TESS_UNICHARSET_TRAINING_API
93TESS_UNICHARSET_TRAINING_API
98TESS_UNICHARSET_TRAINING_API
103TESS_UNICHARSET_TRAINING_API
107TESS_UNICHARSET_TRAINING_API
111TESS_UNICHARSET_TRAINING_API
bool IsOCREquivalent(char32 ch1, char32 ch2)
bool IsWhitespace(const char32 ch)
char32 OCRNormalize(char32 ch)
unsigned int SpanUTF8Whitespace(const char *text)
bool IsInterchangeValid(const char32 ch)
bool NormalizeCleanAndSegmentUTF8(UnicodeNormMode u_mode, OCRNorm ocr_normalize, GraphemeNormMode g_mode, bool report_errors, const char *str8, std::vector< std::string > *graphemes)
bool NormalizeUTF8String(UnicodeNormMode u_mode, OCRNorm ocr_normalize, GraphemeNorm grapheme_normalize, const char *str8, std::string *normalized)
bool IsInterchangeValid7BitAscii(const char32 ch)
char32 FullwidthToHalfwidth(const char32 ch)
unsigned int SpanUTF8NotWhitespace(const char *text)
bool IsValidCodepoint(const char32 ch)
bool IsUTF8Whitespace(const char *text)