|
tesseract v5.3.3.20231005
|
#include <validate_javanese.h>
Public Member Functions | |
| ValidateJavanese (ViramaScript script, bool report_errors) | |
| ~ValidateJavanese () override=default | |
Public Member Functions inherited from tesseract::Validator | |
| virtual | ~Validator () |
Protected Member Functions | |
| bool | ConsumeGraphemeIfValid () override |
| Validator::CharClass | UnicodeToCharClass (char32 ch) const override |
Protected Member Functions inherited from tesseract::Validator | |
| Validator (ViramaScript script, bool report_errors) | |
| bool | ValidateCleanAndSegmentInternal (GraphemeNormMode g_mode, const std::vector< char32 > &src, std::vector< std::vector< char32 > > *dest) |
| void | MoveResultsToDest (GraphemeNormMode g_mode, std::vector< std::vector< char32 > > *dest) |
| bool | IsSubscriptScript () const |
| bool | CodeOnlyToOutput () |
| void | MultiCodePart (unsigned length) |
| bool | UseMultiCode (unsigned length) |
| virtual bool | ConsumeGraphemeIfValid ()=0 |
| void | ComputeClassCodes (const std::vector< char32 > &text) |
| virtual CharClass | UnicodeToCharClass (char32 ch) const =0 |
| void | Clear () |
Additional Inherited Members | |
Static Public Member Functions inherited from tesseract::Validator | |
| static bool | ValidateCleanAndSegment (GraphemeNormMode g_mode, bool report_errors, const std::vector< char32 > &src, std::vector< std::vector< char32 > > *dest) |
| static bool | IsZeroWidthMark (char32 ch) |
Static Public Attributes inherited from tesseract::Validator | |
| static const char32 | kZeroWidthSpace = 0x200B |
| static const char32 | kZeroWidthNonJoiner = 0x200C |
| static const char32 | kZeroWidthJoiner = 0x200D |
| static const char32 | kLeftToRightMark = 0x200E |
| static const char32 | kRightToLeftMark = 0x200F |
| static const char32 | kInvalid = 0xfffd |
Protected Types inherited from tesseract::Validator | |
| enum class | CharClass { kConsonant = 'C' , kVowel = 'V' , kVirama = 'H' , kMatra = 'M' , kMatraPiece = 'P' , kVowelModifier = 'D' , kZeroWidthNonJoiner = 'z' , kZeroWidthJoiner = 'Z' , kVedicMark = 'v' , kNukta = 'N' , kRobat = 'R' , kOther = 'O' , kWhitespace = ' ' , kCombiner = 'c' } |
| using | IndicPair = std::pair< CharClass, char32 > |
Static Protected Member Functions inherited from tesseract::Validator | |
| static std::unique_ptr< Validator > | ScriptValidator (ViramaScript script, bool report_errors) |
| static ViramaScript | MostFrequentViramaScript (const std::vector< char32 > &utf32) |
| static bool | IsVirama (char32 unicode) |
| static bool | IsVedicAccent (char32 unicode) |
Protected Attributes inherited from tesseract::Validator | |
| ViramaScript | script_ |
| std::vector< IndicPair > | codes_ |
| std::vector< std::vector< char32 > > | parts_ |
| std::vector< char32 > | output_ |
| unsigned | codes_used_ |
| unsigned | output_used_ |
| bool | report_errors_ |
Static Protected Attributes inherited from tesseract::Validator | |
| static const int | kIndicCodePageSize = 128 |
| static const char32 | kMinIndicUnicode = 0x900 |
| static const char32 | kMaxSinhalaUnicode = 0xdff |
| static const char32 | kMaxViramaScriptUnicode = 0x17ff |
| static const char32 | kSinhalaVirama = 0xdca |
| static const char32 | kMyanmarVirama = 0x1039 |
| static const char32 | kKhmerVirama = 0x17d2 |
| static const char32 | kJavaneseVirama = 0xa9c0 |
| static const char32 | kMaxJavaneseUnicode = 0xa9df |
Definition at line 28 of file validate_javanese.h.
|
inline |
Definition at line 30 of file validate_javanese.h.
|
overridedefault |
|
overrideprotectedvirtual |
Implements tesseract::Validator.
Definition at line 39 of file validate_javanese.cpp.
|
overrideprotectedvirtual |
Implements tesseract::Validator.
Definition at line 280 of file validate_javanese.cpp.