tesseract v5.3.3.20231005
normstrngs.h File Reference
#include "export.h"
#include "validator.h"
#include <string>
#include <vector>

Go to the source code of this file.

Namespaces

namespace  tesseract
 

Enumerations

enum class  tesseract::UnicodeNormMode { tesseract::kNFD , tesseract::kNFC , tesseract::kNFKD , tesseract::kNFKC }
 
enum class  tesseract::OCRNorm { tesseract::kNone , tesseract::kNormalize }
 
enum class  tesseract::GraphemeNorm { tesseract::kNone , tesseract::kNormalize }
 

Functions

bool tesseract::NormalizeUTF8String (UnicodeNormMode u_mode, OCRNorm ocr_normalize, GraphemeNorm grapheme_normalize, const char *str8, std::string *normalized)
 
bool tesseract::NormalizeCleanAndSegmentUTF8 (UnicodeNormMode u_mode, OCRNorm ocr_normalize, GraphemeNormMode g_mode, bool report_errors, const char *str8, std::vector< std::string > *graphemes)
 
char32 tesseract::OCRNormalize (char32 ch)
 
bool tesseract::IsOCREquivalent (char32 ch1, char32 ch2)
 
bool tesseract::IsValidCodepoint (const char32 ch)
 
bool tesseract::IsWhitespace (const char32 ch)
 
bool tesseract::IsUTF8Whitespace (const char *text)
 
unsigned int tesseract::SpanUTF8Whitespace (const char *text)
 
unsigned int tesseract::SpanUTF8NotWhitespace (const char *text)
 
bool tesseract::IsInterchangeValid (const char32 ch)
 
bool tesseract::IsInterchangeValid7BitAscii (const char32 ch)
 
char32 tesseract::FullwidthToHalfwidth (const char32 ch)