Classes
struct	ADAPT_CLASS_STRUCT

struct	ADAPT_RESULTS

class	ADAPT_TEMPLATES_STRUCT

union	ADAPTED_CONFIG

class	AlignedBlob

struct	AlignedBlobParams

class	AmbigSpec

class	ApplyBoxTest

struct	AssociateStats

class	AssociateUtils

class	BAND

class	BandTriMatrix

class	BaseapiThreadTest

class	BaselineBlock

class	BaselineDetect

class	BaselineRow

class	BBGrid

struct	BestChoiceBundle
	Bundle together all the things pertaining to the best choice/state. More...

class	BitVector

class	BitVectorTest

struct	BlamerBundle

class	BlnEventHandler

class	BLOB_CHOICE

struct	BlobData

class	BlobGrid

class	BLOBNBOX

class	BLOCK

class	BLOCK_LINE_IT
	rectangle iterator More...

class	BLOCK_RECT_IT

class	BLOCK_RES

struct	BlockGroup

class	BoolParam

class	BoxChar

struct	BoxCharPtrSort

class	BoxWord

struct	BUCKETS

class	C_BLOB

class	C_OUTLINE

class	C_OUTLINE_FRAG

class	CCNonTextDetect

class	CCStruct

class	CCUtil

struct	CHAR_DESC_STRUCT

class	CHAR_FRAGMENT

struct	CHAR_FRAGMENT_INFO

struct	CHISTRUCT

class	ChoiceIterator

struct	CLASS_PRUNER_STRUCT

struct	CLASS_STRUCT

class	ClassicMockProgressSink

class	Classify

class	ClassPruner

struct	ClipFFunc

struct	ClipFPrime

struct	ClipGFunc

struct	ClipGPrime

class	CLIST

class	CLIST_ITERATOR

class	CLIST_LINK

class	Clst

struct	CLUSTER

struct	Cluster

struct	CLUSTERCONFIG

struct	CLUSTERER

struct	ClusteringContext

class	ColPartition

class	ColPartitionGrid

class	ColPartitionSet

class	ColPartitionTest

class	ColSegment

class	ColumnFinder

class	CommandlineflagsTest

class	Convolve

struct	CP_RESULT_STRUCT

class	CRACKEDGE

struct	CrackPos

class	CTC

struct	DANGERR_INFO

class	Dawg

struct	DawgArgs

class	DawgCache

struct	DawgLoader

struct	DawgPosition

class	DawgPositionVector

class	DawgTest

class	DebugPixa

class	DENORM

class	DENORMTest

class	DetLineFit

class	Dict

class	DIR128

class	DocumentCache

class	DocumentData

class	DoubleParam

class	DoublePtr

class	DPPoint

struct	EANYCODE_CHAR

struct	EdgeOffset

struct	EDGEPT

class	ELIST

class	ELIST2

class	ELIST2_ITERATOR

class	ELIST2_LINK

class	ELIST_ITERATOR

class	ELIST_LINK

class	Elst

class	Elst2

class	EquationDetect

class	EquationDetectBase

class	EquationFinderTest

class	ERRCODE

class	ErrorCounter

class	ETEXT_DESC

class	EuroText

class	FCOORD

struct	FEATURE_DEFS_STRUCT

struct	FEATURE_DESC_STRUCT

struct	FEATURE_SET_STRUCT

struct	FEATURE_STRUCT

struct	FFunc

class	File

struct	FILL_SPEC

struct	FILL_SWITCH

union	FLOATUNION

struct	FontInfo

class	FontInfoTable

struct	FontSpacingInfo

class	FontUtils

class	FontUtilsTest

class	FPAnalyzer

class	FPChar

class	FPCUTPT

struct	FPrime

class	FPRow

class	FPSEGPT

class	FRAGMENT

class	FriendlyTessBaseAPI

class	FullyConnected

class	GAPMAP

class	GENERIC_2D_ARRAY

class	GenericHeap

class	GenericVector

struct	GeometricClassifierState

struct	GFunc

struct	GPrime

struct	greater_than

class	GridBase

class	GridSearch

class	HeapTest

struct	HFunc

struct	HPrime

class	ICOORD
	integer coordinate More...

class	ICOORDELT

class	IcuErrorCode

struct	IdentityFunc

class	Image

class	ImageData

class	ImagedataTest

class	ImageFind

class	ImageThresholder

class	IndexMap

class	IndexMapBiDi

class	IndexMapBiDiTest

class	Input

class	InputBuffer

struct	INT_CLASS_STRUCT

struct	INT_FEATURE_STRUCT

struct	INT_FX_RESULT_STRUCT

struct	INT_PROTO_STRUCT

struct	INT_TEMPLATES_STRUCT

class	IntegerMatcher

struct	Interval

class	IntFeatureDist

class	IntFeatureMap

class	IntFeatureMapTest

class	IntFeatureSpace

class	IntGrid

class	IntParam

struct	IntSimdMatrix

class	IntSimdMatrixTest

struct	KDNODE

struct	KDPair

struct	KDPairDec

struct	KDPairInc

class	KDPtrPair

struct	KDPtrPairDec

struct	KDPtrPairInc

struct	KDTREE

class	KDTreeSearch

class	KDVector

struct	LABELEDLISTNODE

class	LanguageModel

struct	LanguageModelDawgInfo

struct	LanguageModelNgramInfo

struct	LanguageModelState
	Struct to store information maintained by various language model components. More...

class	LayoutTest

class	LigatureTable

class	LigatureTableTest

class	LineFinder

struct	LineHypothesis

struct	list_rec

class	ListTest

class	LLSQ

class	LLSQTest

struct	LMConsistencyInfo

class	LMPainPoints

class	LoadLang

class	LoadLanguage

class	LoadScript

class	LocalCorrelation

class	LocalFilePointer

class	LSTM

class	LSTMRecognizer

class	LSTMTester

class	LSTMTrainer

class	LSTMTrainerTest

class	LTRResultIterator

class	MasterTrainer

class	MatchGroundTruth

class	MATRIX

struct	MATRIX_COORD

class	MatrixTest

class	Maxpool

struct	MERGE_CLASS_NODE

struct	MFEDGEPT

class	MinK

class	MutableIterator

class	Network

class	NetworkBuilder

class	NetworkIO

class	NetworkioTest

class	NetworkScratch

class	NewMockProgressSink

struct	NodeChild

struct	NORM_PROTOS

class	NthItemTest

class	ObjectCache

class	OL_BUCKETS

class	OrientationDetector

struct	OSBestResult

class	OSDTest

struct	OSResults

class	OutputBuffer

class	PAGE_RES

class	PAGE_RES_IT

class	PageIterator

class	PageSegModeTest

class	PangoFontInfo

class	PangoFontInfoTest

struct	PARA

class	ParagraphModel

class	ParagraphModelSmearer

class	ParagraphTheory

class	Parallel

class	Param

struct	PARAM_DESC

class	ParamContent

class	ParamsEditor

class	ParamsModel

class	ParamsModelTest

class	ParamsTrainingBundle

struct	ParamsTrainingHypothesis

struct	ParamsVectors

class	ParamUtils

class	PB_LINE_IT

class	PDBLK
	page block More...

struct	PERM_CONFIG_STRUCT

class	PGEventHandler

class	PixelHistogram

class	Plumbing

class	PointerVector

class	POLY_BLOCK

struct	PROTO_KEY

struct	PROTO_SET_STRUCT

struct	PROTO_STRUCT

struct	PROTOTYPE

class	QLSQ

class	QRSequenceGeneratorTest

class	QSPLINE

class	QUAD_COEFFS

class	QuickTest

class	RecodeBeamSearch

class	RecodeBeamTest

class	RecodedCharID

struct	RecodeNode

class	Reconfig

class	REGION_OCC

class	REJ

class	REJMAP

struct	Relu

struct	ReluPrime

class	ResultIterator

class	ResultIteratorTest

class	Reversed

class	ROW

class	ROW_RES

class	RowInfo

class	RowScratchRegisters

class	SampleIterator

struct	SAMPLELIST

class	ScanutilsTest

struct	ScoredFont

struct	ScratchEvidence

class	ScriptDetector

class	ScrollView

class	SEAM

class	SegSearchPending

class	Series

class	Shape

class	ShapeClassifier

struct	ShapeDist

struct	ShapeQueueEntry

struct	ShapeRating

class	ShapeTable

class	ShapeTableTest

class	ShapeTest

class	SharedTest

class	ShiroRekhaSplitter

class	SIMDDetect

class	SimpleClusterer

class	SimpleStats

class	SORTED_FLOAT

class	SORTED_FLOATS

class	SortHelper

struct	SpacingProperties

struct	SPLIT

class	SquishedDawg

class	StaticShape

struct	STATISTICS

class	STATS

class	STATSTest

class	StrideMap

class	StridemapTest

class	StringParam

class	StringRenderer

class	StringRendererTest

class	StrokeWidth

class	StructuredTable

class	StructuredTableTest

struct	SVEvent

class	SVEventHandler

class	SVMenuNode

class	SVNetwork

class	SVPaint

struct	SVPolyLineBuffer

class	SVSemaphore

class	SVSync
	The SVSync class provides functionality for Thread & Process Creation. More...

class	TabConstraint

class	TabEventHandler

class	TabFind

struct	TABLE_FILLER

class	TableFinder

class	TableFinderTest

class	TableRecognizer

class	TableRecognizerTest

class	TabVector

class	TabVectorTest

class	TatweelTest

struct	TBLOB

class	TBOX

class	TBOXTest

struct	TEMP_CONFIG_STRUCT

struct	TEMP_PROTO_STRUCT

struct	TEMPCLUSTER

class	TessAltoRenderer

class	TessBaseAPI

class	TessBoxTextRenderer

class	TessClassifier

class	TessdataManager

class	Tesseract

struct	TesseractStats

class	TesseractTest

class	TessHOcrRenderer

struct	TESSLINE

class	TessLSTMBoxRenderer

class	TessOsdRenderer

class	TessPDFRenderer

class	TessResultRenderer

class	TessTextRenderer

class	TessTsvRenderer

class	TessUnlvRenderer

class	TessWordStrBoxRenderer

class	TestableColPartition

class	TestableEquationDetect

class	TestableQRSequenceGenerator

class	TestableStructuredTable

class	TestableTableFinder

class	TestableTableRecognizer

class	TestableValidator

class	TestClass

struct	TextAndModel

class	TextlineProjection

class	TextlineProjectionTest

class	Textord

class	TFile

class	TfileTest

class	TFNetworkModel

class	TFNetworkModelDefaultTypeInternal

class	TO_BLOCK

class	TO_ROW

struct	TPOINT

class	TrainingSample

class	TrainingSampleSet

class	TRand

class	TransposedArray

class	Trie

struct	TRIE_NODE_RECORD

struct	TWERD

class	UNICHAR

class	UnicharAmbigs

struct	UnicharAndFonts

class	UnicharCompress

class	UnicharcompressTest

class	UnicharIdArrayUtils

class	UNICHARMAP

struct	UnicharRating

class	UNICHARSET

class	UnicharsetTest

class	UnicityTable

class	UnicodeSpanSkipper

struct	UnityFunc

class	ValidateGrapheme

class	ValidateIndic

class	ValidateJavanese

class	ValidateKhmer

class	ValidateMyanmar

class	Validator

struct	ViterbiStateEntry

class	WeightMatrix

class	WERD

class	WERD_CHOICE

class	WERD_RES

struct	WordData

class	Wordrec

class	WordWithBox

class	WorkingPartSet

class	X_CLIST

class	X_ITER

class	X_LIST

Typedefs
using	FileReader = bool()(const char filename, std::vector< char > *data)

using	DictFunc = int(Dict::)(void , const UNICHARSET &, UNICHAR_ID, bool) const

using	ProbabilityInContextFunc = double(Dict::)(const char , const char , int, const char , int)

using	CANCEL_FUNC = bool()(void , int)

using	PROGRESS_FUNC = bool(*)(int, int, int, int, int)

using	PROGRESS_FUNC2 = bool()(ETEXT_DESC , int, int, int, int)

using	UNICHAR_ID = int

using	char32 = signed int

using	DotProductFunction = TFloat()(const TFloat , const TFloat *, int)

using	SetOfModels = std::vector< const ParagraphModel * >

using	WordRecognizer = void(Tesseract::)(const WordData &, WERD_RES , PointerVector< WERD_RES > )

using	VECTOR = TPOINT

using	FontSet = std::vector< int >

using	MatrixCoordPair = KDPairInc< float, MATRIX_COORD >

using	ParamsTrainingHypothesisList = std::vector< ParamsTrainingHypothesis >

using	BLOB_CHOICE_LIST_VECTOR = std::vector< BLOB_CHOICE_LIST * >

using	PRIORITY = float

using	UnicharIdVector = std::vector< UNICHAR_ID >

using	UnicharAmbigsVector = std::vector< AmbigSpec_LIST * >

using	IntKDPair = KDPairInc< int, int >

using	FileWriter = bool()(const std::vector< char > &data, const char filename)

using	TDimension = int16_t

using	TFloat = double

using	RSMap = std::unordered_map< int, std::unique_ptr< std::vector< int > > >

using	RSCounts = std::unordered_map< int, int >

using	ClusterPair = tesseract::KDPairInc< float, TEMPCLUSTER * >

using	ClusterHeap = tesseract::GenericHeap< ClusterPair >

using	DENSITYFUNC = double(*)(int32_t)

using	SOLVEFUNC = double()(CHISTRUCT , double)

using	SAMPLE = CLUSTER

using	FEATURE_DEFS = FEATURE_DEFS_STRUCT *

typedef uint32_t	PROTO_PRUNER[NUM_PP_PARAMS][NUM_PP_BUCKETS][WERDS_PER_PP_VECTOR]

typedef uint32_t	CONFIG_PRUNER[NUM_PP_PARAMS][NUM_PP_BUCKETS][4]

typedef INT_FEATURE_STRUCT	INT_FEATURE_ARRAY[MAX_NUM_INT_FEATURES]

using	kdwalk_proc = void()(ClusteringContext context, CLUSTER *Cluster, int32_t Level)

using	MicroFeature = std::array< float,(int) MicroFeatureParameter::MFCount >

using	MICROFEATURES = std::forward_list< MicroFeature >

using	MFOUTLINE = LIST

using	FEATURE_DESC = FEATURE_DESC_STRUCT *

using	FEATURE = FEATURE_STRUCT *

using	FEATURE_SET = FEATURE_SET_STRUCT *

using	CHAR_FEATURES = char *

using	CLASS_TYPE = CLASS_STRUCT *

using	CLASSES = CLASS_STRUCT *

using	ShapeQueue = GenericHeap< ShapeQueueEntry >

using	int_compare = int()(void , void *)

using	void_dest = void()(void )

using	LIST = list_rec *

using	EDGE_RECORD = uint64_t

using	EDGE_ARRAY = EDGE_RECORD *

using	EDGE_REF = int64_t

using	NODE_REF = int64_t

using	NODE_MAP = EDGE_REF *

using	NodeChildVector = std::vector< NodeChild >

using	SuccessorList = std::vector< int >

using	SuccessorListsVector = std::vector< SuccessorList * >

using	DawgVector = std::vector< Dawg * >

using	CLASS_ID = UNICHAR_ID

using	PROTO_ID = int16_t

using	FEATURE_ID = uint8_t

using	BLOB_WIDTH = uint8_t

using	DANGERR = std::vector< DANGERR_INFO >

using	EDGE_INDEX = int64_t

using	EDGE_VECTOR = std::vector< EDGE_RECORD >

using	TRIE_NODES = std::vector< TRIE_NODE_RECORD * >

using	RecodePair = KDPairInc< double, RecodeNode >

using	RecodeHeap = GenericHeap< RecodePair >

using	BlobGridSearch = GridSearch< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT >

using	ColPartitionGridSearch = GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT >

using	PartSetVector = std::vector< ColPartitionSet * >

using	WidthCallback = std::function< bool(int)>

using	ColSegmentGrid = BBGrid< ColSegment, ColSegment_CLIST, ColSegment_C_IT >

using	ColSegmentGridSearch = GridSearch< ColSegment, ColSegment_CLIST, ColSegment_C_IT >

using	WordGrid = BBGrid< WordWithBox, WordWithBox_CLIST, WordWithBox_C_IT >

using	WordSearch = GridSearch< WordWithBox, WordWithBox_CLIST, WordWithBox_C_IT >

using	LABELEDLIST = LABELEDLISTNODE *

using	MERGE_CLASS = MERGE_CLASS_NODE *

using	LigHash = std::unordered_map< std::string, std::string >

using	TestCallback = std::function< std::string(int, const double *, const TessdataManager &, int)>

using	PointPair = KDPairInc< float, EDGEPT * >

using	PointHeap = GenericHeap< PointPair >

using	SeamPair = KDPtrPairInc< float, SEAM >

using	SeamQueue = GenericHeap< SeamPair >

using	SeamDecPair = KDPtrPairDec< float, SEAM >

using	SeamPile = GenericHeap< SeamDecPair >

using	PainPointHeap = GenericHeap< MatrixCoordPair >

using	LanguageModelFlagsType = unsigned char
	Used for expressing various language model flags. More...

typedef ParagraphModel	PModel

Enumerations
enum	PolyBlockType { PT_UNKNOWN , PT_FLOWING_TEXT , PT_HEADING_TEXT , PT_PULLOUT_TEXT , PT_EQUATION , PT_INLINE_EQUATION , PT_TABLE , PT_VERTICAL_TEXT , PT_CAPTION_TEXT , PT_FLOWING_IMAGE , PT_HEADING_IMAGE , PT_PULLOUT_IMAGE , PT_HORZ_LINE , PT_VERT_LINE , PT_NOISE , PT_COUNT }

enum	Orientation { ORIENTATION_PAGE_UP = 0 , ORIENTATION_PAGE_RIGHT = 1 , ORIENTATION_PAGE_DOWN = 2 , ORIENTATION_PAGE_LEFT = 3 }

enum	WritingDirection { WRITING_DIRECTION_LEFT_TO_RIGHT = 0 , WRITING_DIRECTION_RIGHT_TO_LEFT = 1 , WRITING_DIRECTION_TOP_TO_BOTTOM = 2 }

enum	TextlineOrder { TEXTLINE_ORDER_LEFT_TO_RIGHT = 0 , TEXTLINE_ORDER_RIGHT_TO_LEFT = 1 , TEXTLINE_ORDER_TOP_TO_BOTTOM = 2 }

enum	PageSegMode { PSM_OSD_ONLY = 0 , PSM_AUTO_OSD = 1 , PSM_AUTO_ONLY = 2 , PSM_AUTO = 3 , PSM_SINGLE_COLUMN = 4 , PSM_SINGLE_BLOCK_VERT_TEXT = 5 , PSM_SINGLE_BLOCK = 6 , PSM_SINGLE_LINE = 7 , PSM_SINGLE_WORD = 8 , PSM_CIRCLE_WORD = 9 , PSM_SINGLE_CHAR = 10 , PSM_SPARSE_TEXT , PSM_SPARSE_TEXT_OSD = 12 , PSM_RAW_LINE = 13 , PSM_COUNT }

enum	PageIteratorLevel { RIL_BLOCK , RIL_PARA , RIL_TEXTLINE , RIL_WORD , RIL_SYMBOL }

enum	ParagraphJustification { JUSTIFICATION_UNKNOWN , JUSTIFICATION_LEFT , JUSTIFICATION_CENTER , JUSTIFICATION_RIGHT }

enum	OcrEngineMode { OEM_TESSERACT_ONLY , OEM_LSTM_ONLY , OEM_TESSERACT_LSTM_COMBINED , OEM_DEFAULT , OEM_COUNT }

enum	StrongScriptDirection { DIR_NEUTRAL = 0 , DIR_LEFT_TO_RIGHT = 1 , DIR_RIGHT_TO_LEFT = 2 , DIR_MIX = 3 }

enum	GARBAGE_LEVEL { G_NEVER_CRUNCH , G_OK , G_DODGY , G_TERRIBLE }

enum	LineType { LT_START = 'S' , LT_BODY = 'C' , LT_UNKNOWN = 'U' , LT_MULTIPLE = 'M' }

enum	ParamType { VT_INTEGER , VT_BOOLEAN , VT_STRING , VT_DOUBLE }

enum	CMD_EVENTS { NULL_CMD_EVENT , CHANGE_DISP_CMD_EVENT , DUMP_WERD_CMD_EVENT , SHOW_POINT_CMD_EVENT , SHOW_BLN_WERD_CMD_EVENT , DEBUG_WERD_CMD_EVENT , BLAMER_CMD_EVENT , BOUNDING_BOX_CMD_EVENT , CORRECT_TEXT_CMD_EVENT , POLYGONAL_CMD_EVENT , BL_NORM_CMD_EVENT , BITMAP_CMD_EVENT , IMAGE_CMD_EVENT , BLOCKS_CMD_EVENT , BASELINES_CMD_EVENT , UNIFORM_DISP_CMD_EVENT , REFRESH_CMD_EVENT , QUIT_CMD_EVENT , RECOG_WERDS , RECOG_PSEUDO , SHOW_BLOB_FEATURES , SHOW_SUBSCRIPT_CMD_EVENT , SHOW_SUPERSCRIPT_CMD_EVENT , SHOW_ITALIC_CMD_EVENT , SHOW_BOLD_CMD_EVENT , SHOW_UNDERLINE_CMD_EVENT , SHOW_FIXEDPITCH_CMD_EVENT , SHOW_SERIF_CMD_EVENT , SHOW_SMALLCAPS_CMD_EVENT , SHOW_DROPCAPS_CMD_EVENT , ACTION_1_CMD_EVENT , RECOG_WERDS , RECOG_PSEUDO , ACTION_2_CMD_EVENT }

enum	ColorationMode { CM_RAINBOW , CM_SUBSCRIPT , CM_SUPERSCRIPT , CM_ITALIC , CM_BOLD , CM_UNDERLINE , CM_FIXEDPITCH , CM_SERIF , CM_SMALLCAPS , CM_DROPCAPS }

enum	CMD_EVENTS { NULL_CMD_EVENT , CHANGE_DISP_CMD_EVENT , DUMP_WERD_CMD_EVENT , SHOW_POINT_CMD_EVENT , SHOW_BLN_WERD_CMD_EVENT , DEBUG_WERD_CMD_EVENT , BLAMER_CMD_EVENT , BOUNDING_BOX_CMD_EVENT , CORRECT_TEXT_CMD_EVENT , POLYGONAL_CMD_EVENT , BL_NORM_CMD_EVENT , BITMAP_CMD_EVENT , IMAGE_CMD_EVENT , BLOCKS_CMD_EVENT , BASELINES_CMD_EVENT , UNIFORM_DISP_CMD_EVENT , REFRESH_CMD_EVENT , QUIT_CMD_EVENT , RECOG_WERDS , RECOG_PSEUDO , SHOW_BLOB_FEATURES , SHOW_SUBSCRIPT_CMD_EVENT , SHOW_SUPERSCRIPT_CMD_EVENT , SHOW_ITALIC_CMD_EVENT , SHOW_BOLD_CMD_EVENT , SHOW_UNDERLINE_CMD_EVENT , SHOW_FIXEDPITCH_CMD_EVENT , SHOW_SERIF_CMD_EVENT , SHOW_SMALLCAPS_CMD_EVENT , SHOW_DROPCAPS_CMD_EVENT , ACTION_1_CMD_EVENT , RECOG_WERDS , RECOG_PSEUDO , ACTION_2_CMD_EVENT }

enum class	ThresholdMethod { Otsu , LeptonicaOtsu , Sauvola , Max }

enum	IncorrectResultReason { IRR_CORRECT , IRR_CLASSIFIER , IRR_CHOPPER , IRR_CLASS_LM_TRADEOFF , IRR_PAGE_LAYOUT , IRR_SEGSEARCH_HEUR , IRR_SEGSEARCH_PP , IRR_CLASS_OLD_LM_TRADEOFF , IRR_ADAPTION , IRR_NO_TRUTH_SPLIT , IRR_NO_TRUTH , IRR_UNKNOWN , IRR_NUM_REASONS }

enum	PITCH_TYPE { PITCH_DUNNO , PITCH_DEF_FIXED , PITCH_MAYBE_FIXED , PITCH_DEF_PROP , PITCH_MAYBE_PROP , PITCH_CORR_FIXED , PITCH_CORR_PROP }

enum	TabType { TT_NONE , TT_DELETED , TT_MAYBE_RAGGED , TT_MAYBE_ALIGNED , TT_CONFIRMED , TT_VLINE }

enum	BlobRegionType { BRT_NOISE , BRT_HLINE , BRT_VLINE , BRT_RECTIMAGE , BRT_POLYIMAGE , BRT_UNKNOWN , BRT_VERT_TEXT , BRT_TEXT , BRT_COUNT }

enum	BlobNeighbourDir { BND_LEFT , BND_BELOW , BND_RIGHT , BND_ABOVE , BND_COUNT }

enum	BlobSpecialTextType { BSTT_NONE , BSTT_ITALIC , BSTT_DIGIT , BSTT_MATH , BSTT_UNCLEAR , BSTT_SKIP , BSTT_COUNT }

enum	BlobTextFlowType { BTFT_NONE , BTFT_NONTEXT , BTFT_NEIGHBOURS , BTFT_CHAIN , BTFT_STRONG_CHAIN , BTFT_TEXT_ON_IMAGE , BTFT_LEADER , BTFT_COUNT }

enum	C_OUTLINE_FLAGS { COUT_INVERSE }

enum	CachingStrategy { CS_SEQUENTIAL , CS_ROUND_ROBIN }

enum	NormalizationMode { NM_BASELINE = -3 , NM_CHAR_ISOTROPIC = -2 , NM_CHAR_ANISOTROPIC = -1 }

enum	CRUNCH_MODE { CR_NONE , CR_KEEP_SPACE , CR_LOOSE_SPACE , CR_DELETE }

enum	kParamsTrainingFeatureType { PTRAIN_DIGITS_SHORT , PTRAIN_DIGITS_MED , PTRAIN_DIGITS_LONG , PTRAIN_NUM_SHORT , PTRAIN_NUM_MED , PTRAIN_NUM_LONG , PTRAIN_DOC_SHORT , PTRAIN_DOC_MED , PTRAIN_DOC_LONG , PTRAIN_DICT_SHORT , PTRAIN_DICT_MED , PTRAIN_DICT_LONG , PTRAIN_FREQ_SHORT , PTRAIN_FREQ_MED , PTRAIN_FREQ_LONG , PTRAIN_SHAPE_COST_PER_CHAR , PTRAIN_NGRAM_COST_PER_CHAR , PTRAIN_NUM_BAD_PUNC , PTRAIN_NUM_BAD_CASE , PTRAIN_XHEIGHT_CONSISTENCY , PTRAIN_NUM_BAD_CHAR_TYPE , PTRAIN_NUM_BAD_SPACING , PTRAIN_NUM_BAD_FONT , PTRAIN_RATING_PER_CHAR , PTRAIN_NUM_FEATURE_TYPES }

enum	BlobChoiceClassifier { BCC_STATIC_CLASSIFIER , BCC_ADAPTED_CLASSIFIER , BCC_SPECKLE_CLASSIFIER , BCC_AMBIG , BCC_FAKE }

enum	PermuterType { NO_PERM , PUNC_PERM , TOP_CHOICE_PERM , LOWER_CASE_PERM , UPPER_CASE_PERM , NGRAM_PERM , NUMBER_PERM , USER_PATTERN_PERM , SYSTEM_DAWG_PERM , DOC_DAWG_PERM , USER_DAWG_PERM , FREQ_DAWG_PERM , COMPOUND_PERM , NUM_PERMUTER_TYPES }

enum	ScriptPos { SP_NORMAL , SP_SUBSCRIPT , SP_SUPERSCRIPT , SP_DROPCAP }

enum	REJ_FLAGS { R_TESS_FAILURE , R_SMALL_XHT , R_EDGE_CHAR , R_1IL_CONFLICT , R_POSTNN_1IL , R_REJ_CBLOB , R_MM_REJECT , R_BAD_REPETITION , R_POOR_MATCH , R_NOT_TESS_ACCEPTED , R_CONTAINS_BLANKS , R_BAD_PERMUTER , R_HYPHEN , R_DUBIOUS , R_NO_ALPHANUMS , R_MOSTLY_REJ , R_XHT_FIXUP , R_BAD_QUALITY , R_DOC_REJ , R_BLOCK_REJ , R_ROW_REJ , R_UNLV_REJ , R_NN_ACCEPT , R_HYPHEN_ACCEPT , R_MM_ACCEPT , R_QUALITY_ACCEPT , R_MINIMAL_REJ_ACCEPT }

enum	WERD_FLAGS { W_SEGMENTED , W_ITALIC , W_BOLD , W_BOL , W_EOL , W_NORMALIZED , W_SCRIPT_HAS_XHEIGHT , W_SCRIPT_IS_LATIN , W_DONT_CHOP , W_REP_CHAR , W_FUZZY_SP , W_FUZZY_NON , W_INVERSE }

enum	DISPLAY_FLAGS { DF_BOX , DF_TEXT , DF_POLYGONAL , DF_EDGE_STEP , DF_BN_POLYGONAL , DF_BLAMER }

enum	AmbigType { NOT_AMBIG , REPLACE_AMBIG , DEFINITE_AMBIG , SIMILAR_AMBIG , CASE_AMBIG , AMBIG_TYPE_COUNT }

enum	TessErrorLogCode { DBG = -1 , TESSLOG = 0 , TESSEXIT = 1 , ABORT = 2 }

enum	SetParamConstraint { SET_PARAM_CONSTRAINT_NONE , SET_PARAM_CONSTRAINT_DEBUG_ONLY , SET_PARAM_CONSTRAINT_NON_DEBUG_ONLY , SET_PARAM_CONSTRAINT_NON_INIT_ONLY }

enum	TessdataType { TESSDATA_LANG_CONFIG , TESSDATA_UNICHARSET , TESSDATA_AMBIGS , TESSDATA_INTTEMP , TESSDATA_PFFMTABLE , TESSDATA_NORMPROTO , TESSDATA_PUNC_DAWG , TESSDATA_SYSTEM_DAWG , TESSDATA_NUMBER_DAWG , TESSDATA_FREQ_DAWG , TESSDATA_FIXED_LENGTH_DAWGS , TESSDATA_CUBE_UNICHARSET , TESSDATA_CUBE_SYSTEM_DAWG , TESSDATA_SHAPE_TABLE , TESSDATA_BIGRAM_DAWG , TESSDATA_UNAMBIG_DAWG , TESSDATA_PARAMS_MODEL , TESSDATA_LSTM , TESSDATA_LSTM_PUNC_DAWG , TESSDATA_LSTM_SYSTEM_DAWG , TESSDATA_LSTM_NUMBER_DAWG , TESSDATA_LSTM_UNICHARSET , TESSDATA_LSTM_RECODER , TESSDATA_VERSION , TESSDATA_NUM_ENTRIES }

enum	SpecialUnicharCodes { UNICHAR_SPACE , UNICHAR_JOINED , UNICHAR_BROKEN , SPECIAL_UNICHAR_CODES_COUNT }

enum class	OldUncleanUnichars { kFalse , kTrue }

enum	CharSegmentationType { CST_FRAGMENT , CST_WHOLE , CST_IMPROPER , CST_NGRAM }

enum	PROTOSTYLE { spherical , elliptical , mixed , automatic }

enum	DISTRIBUTION { normal , uniform , D_random , DISTRIBUTION_COUNT }

enum	SWITCH_TYPE { StartSwitch , EndSwitch , LastSwitch }

enum	IntmatcherDebugAction { IDA_ADAPTIVE , IDA_STATIC , IDA_SHAPE_INDEX , IDA_BOTH }

enum class	MicroFeatureParameter { MFXPosition , MFYPosition , MFLength , MFDirection , MFBulge1 , MFBulge2 , MFCount }

enum	DIRECTION : uint8_t { north , south , east , west , northeast , northwest , southeast , southwest }

enum	OUTLINETYPE { outer , hole }

enum	NORM_METHOD { baseline , character }

enum	NORM_PARAM_NAME { CharNormY , CharNormLength , CharNormRx , CharNormRy }

enum	OUTLINE_FEAT_PARAM_NAME { OutlineFeatX , OutlineFeatY , OutlineFeatLength , OutlineFeatDir }

enum	IntParams { IntX , IntY , IntDir }

enum	GeoParams { GeoBottom , GeoTop , GeoWidth , GeoCount }

enum	PICO_FEAT_PARAM_NAME { PicoFeatY , PicoFeatDir , PicoFeatX }

enum	DawgType { DAWG_TYPE_PUNCTUATION , DAWG_TYPE_WORD , DAWG_TYPE_NUMBER , DAWG_TYPE_PATTERN , DAWG_TYPE_COUNT }

enum	XHeightConsistencyEnum { XH_GOOD , XH_SUBNORMAL , XH_INCONSISTENT }

enum	TrainingFlags { TF_INT_MODE = 1 , TF_COMPRESS_UNICHARSET = 64 }

enum	NetworkType { NT_NONE , NT_INPUT , NT_CONVOLVE , NT_MAXPOOL , NT_PARALLEL , NT_REPLICATED , NT_PAR_RL_LSTM , NT_PAR_UD_LSTM , NT_PAR_2D_LSTM , NT_SERIES , NT_RECONFIG , NT_XREVERSED , NT_YREVERSED , NT_XYTRANSPOSE , NT_LSTM , NT_LSTM_SUMMARY , NT_LOGISTIC , NT_POSCLIP , NT_SYMCLIP , NT_TANH , NT_RELU , NT_LINEAR , NT_SOFTMAX , NT_SOFTMAX_NO_CTC , NT_LSTM_SOFTMAX , NT_LSTM_SOFTMAX_ENCODED , NT_TENSORFLOW , NT_COUNT }

enum	NetworkFlags { NF_LAYER_SPECIFIC_LR = 64 , NF_ADAM = 128 }

enum	TrainingState { TS_DISABLED , TS_ENABLED , TS_TEMP_DISABLE , TS_RE_ENABLE }

enum	NodeContinuation { NC_ANYTHING , NC_ONLY_DUP , NC_NO_DUP , NC_COUNT }

enum	TopNState { TN_TOP2 , TN_TOPN , TN_ALSO_RAN , TN_COUNT }

enum	LossType { LT_NONE , LT_CTC , LT_SOFTMAX , LT_LOGISTIC }

enum	FlexDimensions { FD_BATCH , FD_HEIGHT , FD_WIDTH , FD_DIMSIZE }

enum	SpacingNeighbourhood { PN_ABOVE2 , PN_ABOVE1 , PN_UPPER , PN_LOWER , PN_BELOW1 , PN_BELOW2 , PN_COUNT }

enum	ColumnSpanningType { CST_NOISE , CST_FLOWING , CST_HEADING , CST_PULLOUT , CST_COUNT }

enum	NeighbourPartitionType { NPT_HTEXT , NPT_VTEXT , NPT_WEAK_HTEXT , NPT_WEAK_VTEXT , NPT_IMAGE , NPT_COUNT }

enum	OVERLAP_STATE { ASSIGN , REJECT , NEW_ROW }

enum	ROW_CATEGORY { ROW_ASCENDERS_FOUND , ROW_DESCENDERS_FOUND , ROW_UNKNOWN , ROW_INVALID }

enum	LeftOrRight { LR_LEFT , LR_RIGHT }

enum	PartitionFindResult { PFR_OK , PFR_SKEW , PFR_NOISE }

enum	ColSegType { COL_UNKNOWN , COL_TEXT , COL_TABLE , COL_MIXED , COL_COUNT }

enum	TabAlignment { TA_LEFT_ALIGNED , TA_LEFT_RAGGED , TA_CENTER_JUSTIFIED , TA_RIGHT_ALIGNED , TA_RIGHT_RAGGED , TA_SEPARATOR , TA_COUNT }

enum	CountTypes { CT_UNICHAR_TOP_OK , CT_UNICHAR_TOP1_ERR , CT_UNICHAR_TOP2_ERR , CT_UNICHAR_TOPN_ERR , CT_UNICHAR_TOPTOP_ERR , CT_OK_MULTI_UNICHAR , CT_OK_JOINED , CT_OK_BROKEN , CT_REJECT , CT_FONT_ATTR_ERR , CT_OK_MULTI_FONT , CT_NUM_RESULTS , CT_RANK , CT_REJECTED_JUNK , CT_ACCEPTED_JUNK , CT_SIZE }

enum	FactorNames { FN_INCOLOR , FN_Y0 , FN_Y1 , FN_Y2 , FN_Y3 , FN_X0 , FN_X1 , FN_SHEAR , FN_NUM_FACTORS }

enum	ErrorTypes { ET_RMS , ET_DELTA , ET_WORD_RECERR , ET_CHAR_ERROR , ET_SKIP_RATIO , ET_COUNT }

enum	Trainability { TRAINABLE , PERFECT , UNENCODABLE , HI_PRECISION_ERR , NOT_BOXED }

enum	SerializeAmount { LIGHT , NO_BEST_TRAINER , FULL }

enum	SubTrainerResult { STR_NONE , STR_UPDATED , STR_REPLACED }

enum class	UnicodeNormMode { kNFD , kNFC , kNFKD , kNFKC }

enum class	OCRNorm { kNone , kNormalize }

enum class	GraphemeNorm { kNone , kNormalize }

enum class	GraphemeNormMode { kSingleString , kCombined , kGlyphSplit , kIndividualUnicodes }

enum class	ViramaScript : char32 { kNonVirama = 0 , kDevanagari = 0x900 , kBengali = 0x980 , kGurmukhi = 0xa00 , kGujarati = 0xa80 , kOriya = 0xb00 , kTamil = 0xb80 , kTelugu = 0xc00 , kKannada = 0xc80 , kMalayalam = 0xd00 , kSinhala = 0xd80 , kMyanmar = 0x1000 , kKhmer = 0x1780 , kJavanese = 0xa980 }

enum	SVEventType { SVET_DESTROY , SVET_EXIT , SVET_CLICK , SVET_SELECTION , SVET_INPUT , SVET_MOUSE , SVET_MOTION , SVET_HOVER , SVET_POPUP , SVET_MENU , SVET_ANY , SVET_COUNT }

enum	LMPainPointsType { LM_PPTYPE_BLAMER , LM_PPTYPE_AMBIG , LM_PPTYPE_PATH , LM_PPTYPE_SHAPE , LM_PPTYPE_NUM }

enum	TextModelInputType { PCONT = 0 , PSTART = 1 , PNONE = 2 }

Functions

std::string HOcrEscape (const char *text)

int orientation_and_script_detection (const char *filename, OSResults *, tesseract::Tesseract *)

int os_detect (TO_BLOCK_LIST *port_blocks, OSResults *osr, tesseract::Tesseract *tess)

int os_detect_blobs (const std::vector< int > *allowed_scripts, BLOBNBOX_CLIST *blob_list, OSResults *osr, tesseract::Tesseract *tess)

bool os_detect_blob (BLOBNBOX *bbox, OrientationDetector *o, ScriptDetector *s, OSResults *, tesseract::Tesseract *tess)

TESS_API int OrientationIdToValue (const int &id)

bool PTIsLineType (PolyBlockType type)

bool PTIsImageType (PolyBlockType type)

bool PTIsTextType (PolyBlockType type)

bool PTIsPulloutType (PolyBlockType type)

bool PSM_OSD_ENABLED (int pageseg_mode)

bool PSM_ORIENTATION_ENABLED (int pageseg_mode)

bool PSM_COL_FIND_ENABLED (int pageseg_mode)

bool PSM_SPARSE (int pageseg_mode)

bool PSM_BLOCK_FIND_ENABLED (int pageseg_mode)

bool PSM_LINE_FIND_ENABLED (int pageseg_mode)

bool PSM_WORD_FIND_ENABLED (int pageseg_mode)

TFloat DotProductNative (const TFloat *u, const TFloat *v, int n)

TFloat DotProductAVX (const TFloat *u, const TFloat *v, int n)

TFloat DotProductAVX512F (const TFloat *u, const TFloat *v, int n)

TFloat DotProductFMA (const TFloat *u, const TFloat *v, int n)

TFloat DotProductSSE (const TFloat *u, const TFloat *v, int n)

TFloat DotProductNEON (const TFloat *u, const TFloat *v, int n)

void reject_whole_page (PAGE_RES_IT &page_res_it)

int16_t word_blob_quality (WERD_RES *word)

bool IsTextOrEquationType (PolyBlockType type)

bool IsLeftIndented (const EquationDetect::IndentType type)

bool IsRightIndented (const EquationDetect::IndentType type)

char determine_newline_type (WERD *word, BLOCK *block, WERD *next_word, BLOCK *next_block)

bool AsciiLikelyListItem (const std::string &word)

template<class T >

void push_back_new (std::vector< T > &vector, const T &data)

void LeftWordAttributes (const UNICHARSET *unicharset, const WERD_CHOICE *werd, const std::string &utf8, bool *is_list, bool *starts_idea, bool *ends_idea)

void RightWordAttributes (const UNICHARSET *unicharset, const WERD_CHOICE *werd, const std::string &utf8, bool *is_list, bool *starts_idea, bool *ends_idea)

bool ValidFirstLine (const std::vector< RowScratchRegisters > *rows, int row, const ParagraphModel *model)

bool ValidBodyLine (const std::vector< RowScratchRegisters > *rows, int row, const ParagraphModel *model)

bool CrownCompatible (const std::vector< RowScratchRegisters > *rows, int a, int b, const ParagraphModel *model)

void RecomputeMarginsAndClearHypotheses (std::vector< RowScratchRegisters > *rows, int start, int end, int percentile)

int InterwordSpace (const std::vector< RowScratchRegisters > &rows, int row_start, int row_end)

bool FirstWordWouldHaveFit (const RowScratchRegisters &before, const RowScratchRegisters &after, tesseract::ParagraphJustification justification)

bool FirstWordWouldHaveFit (const RowScratchRegisters &before, const RowScratchRegisters &after)

bool RowsFitModel (const std::vector< RowScratchRegisters > *rows, int start, int end, const ParagraphModel *model)

void CanonicalizeDetectionResults (std::vector< PARA * > *row_owners, PARA_LIST *paragraphs)

void DetectParagraphs (int debug_level, std::vector< RowInfo > *row_infos, std::vector< PARA * > *row_owners, PARA_LIST *paragraphs, std::vector< ParagraphModel * > *models)

void DetectParagraphs (int debug_level, bool after_text_recognition, const MutableIterator *block_start, std::vector< ParagraphModel * > *models)

bool StrongModel (const ParagraphModel *model)

STRING_VAR_H (editor_image_win_name)

INT_VAR_H (editor_image_xpos)

INT_VAR_H (editor_image_ypos)

INT_VAR_H (editor_image_word_bb_color)

INT_VAR_H (editor_image_blob_bb_color)

STRING_VAR_H (editor_word_name)

INT_VAR_H (editor_word_xpos)

INT_VAR_H (editor_word_ypos)

INT_VAR_H (editor_word_height)

INT_VAR_H (editor_word_width)

void reject_blanks (WERD_RES *word)

void reject_poor_matches (WERD_RES *word)

float compute_reject_threshold (WERD_CHOICE *word)

bool word_contains_non_1_digit (const char *word, const char *word_lengths)

void dont_allow_1Il (WERD_RES *word)

void flip_hyphens (WERD_RES *word)

void flip_0O (WERD_RES *word)

bool non_0_digit (const char *str, int length)

PAGE_RES_IT * make_pseudo_word (PAGE_RES *page_res, const TBOX &selection_box)

void find_cblob_limits (C_BLOB *blob, float leftx, float rightx, FCOORD rotation, float &ymin, float &ymax)

void find_cblob_vlimits (C_BLOB *blob, float leftx, float rightx, float &ymin, float &ymax)

void find_cblob_hlimits (C_BLOB *blob, float bottomy, float topy, float &xmin, float &xmax)

C_BLOB * crotate_cblob (C_BLOB *blob, FCOORD rotation)

TBOX box_next (BLOBNBOX_IT *it)

TBOX box_next_pre_chopped (BLOBNBOX_IT *it)

void vertical_cblob_projection (C_BLOB *blob, STATS *stats)

void vertical_coutline_projection (C_OUTLINE *outline, STATS *stats)

void plot_blob_list (ScrollView *win, BLOBNBOX_LIST *list, ScrollView::Color body_colour, ScrollView::Color child_colour)

BlobNeighbourDir DirOtherWay (BlobNeighbourDir dir)

bool DominatesInMerge (BlobTextFlowType type1, BlobTextFlowType type2)

bool divisible_blob (TBLOB *blob, bool italic_blob, TPOINT *location)

void divide_blobs (TBLOB *blob, TBLOB *other_blob, bool italic_blob, const TPOINT &location)

bool read_unlv_file (std::string &name, int32_t xsize, int32_t ysize, BLOCK_LIST *blocks)

void FullPageBlock (int width, int height, BLOCK_LIST *blocks)

FILE * OpenBoxFile (const char *fname)

bool ReadAllBoxes (int target_page, bool skip_blanks, const char *filename, std::vector< TBOX > *boxes, std::vector< std::string > *texts, std::vector< std::string > *box_texts, std::vector< int > *pages)

bool ReadMemBoxes (int target_page, bool skip_blanks, const char *box_data, bool continue_on_failure, std::vector< TBOX > *boxes, std::vector< std::string > *texts, std::vector< std::string > *box_texts, std::vector< int > *pages)

bool ReadNextBox (int *line_number, FILE *box_file, std::string &utf8_str, TBOX *bounding_box)

bool ReadNextBox (int target_page, int *line_number, FILE *box_file, std::string &utf8_str, TBOX *bounding_box)

bool ParseBoxFileStr (const char *boxfile_str, int *page_number, std::string &utf8_str, TBOX *bounding_box)

void MakeBoxFileStr (const char *unichar_str, const TBOX &box, int page_num, std::string &box_str)

void FontInfoDeleteCallback (FontInfo f)

bool read_info (TFile *f, FontInfo *fi)

bool write_info (FILE *f, const FontInfo &fi)

bool read_spacing_info (TFile *f, FontInfo *fi)

bool write_spacing_info (FILE *f, const FontInfo &fi)

bool write_set (FILE *f, const FontSet &fs)

constexpr ERRCODE EMPTY_LLSQ ("Can't delete from an empty LLSQ")

template<typename T >

T MedianOfCircularValues (T modulus, std::vector< T > &v)

void PrintSegmentationStats (BLOCK_LIST *block_list)

void ExtractBlobsFromSegmentation (BLOCK_LIST *blocks, C_BLOB_LIST *output_blob_list)

void RefreshWordBlobsFromNewBlobs (BLOCK_LIST *block_list, C_BLOB_LIST *new_blobs, C_BLOB_LIST *not_found_blobs)

int OtsuThreshold (Image src_pix, int left, int top, int width, int height, std::vector< int > &thresholds, std::vector< int > &hi_values)

void HistogramRect (Image src_pix, int channel, int left, int top, int width, int height, int *histogram)

int OtsuStats (const int *histogram, int *H_out, int *omega0_out)

int ParamsTrainingFeatureByName (const char *name)

constexpr ERRCODE BADBLOCKLINE ("Y coordinate in block out of bounds")

constexpr ERRCODE LOSTBLOCKLINE ("Can't find rectangle for line")

ICOORD operator! (const ICOORD &src)

ICOORD operator- (const ICOORD &src)

ICOORD operator+ (const ICOORD &op1, const ICOORD &op2)

ICOORD & operator+= (ICOORD &op1, const ICOORD &op2)

ICOORD operator- (const ICOORD &op1, const ICOORD &op2)

ICOORD & operator-= (ICOORD &op1, const ICOORD &op2)

int32_t operator% (const ICOORD &op1, const ICOORD &op2)

int32_t operator* (const ICOORD &op1, const ICOORD &op2)

ICOORD operator* (const ICOORD &op1, TDimension scale)

ICOORD operator* (TDimension scale, const ICOORD &op1)

ICOORD & operator*= (ICOORD &op1, TDimension scale)

ICOORD operator/ (const ICOORD &op1, TDimension scale)

ICOORD & operator/= (ICOORD &op1, TDimension scale)

FCOORD operator! (const FCOORD &src)

FCOORD operator- (const FCOORD &src)

FCOORD operator+ (const FCOORD &op1, const FCOORD &op2)

FCOORD & operator+= (FCOORD &op1, const FCOORD &op2)

FCOORD operator- (const FCOORD &op1, const FCOORD &op2)

FCOORD & operator-= (FCOORD &op1, const FCOORD &op2)

float operator% (const FCOORD &op1, const FCOORD &op2)

float operator* (const FCOORD &op1, const FCOORD &op2)

FCOORD operator* (const FCOORD &op1, float scale)

FCOORD operator* (float scale, const FCOORD &op1)

FCOORD & operator*= (FCOORD &op1, float scale)

FCOORD operator/ (const FCOORD &op1, float scale)

FCOORD & operator/= (FCOORD &op1, float scale)

TESSLINE * ApproximateOutline (bool allow_detailed_fx, C_OUTLINE *c_outline)

BLOB_CHOICE * FindMatchingChoice (UNICHAR_ID char_id, BLOB_CHOICE_LIST *bc_list)

const char * ScriptPosToString (enum ScriptPos script_pos)

bool EqualIgnoringCaseAndTerminalPunct (const WERD_CHOICE &word1, const WERD_CHOICE &word2)

void print_ratings_list (const char *msg, BLOB_CHOICE_LIST *ratings, const UNICHARSET &current_unicharset)

TBOX & operator+= (TBOX &op1, const TBOX &op2)

TBOX & operator&= (TBOX &op1, const TBOX &op2)

EDGEPT * make_edgept (TDimension x, TDimension y, EDGEPT *next, EDGEPT *prev)

void remove_edgept (EDGEPT *point)

BOOL_VAR_H (wordrec_display_splits)

int word_comparator (const void *word1p, const void *word2p)

constexpr ERRCODE BADERRACTION ("Illegal error action")

constexpr ERRCODE ASSERT_FAILED ("Assert failed")

constexpr ERRCODE CANTOPENFILE ("Can't open file")

bool LoadDataFromFile (const char *filename, GenericVector< char > *data)

bool SaveDataToFile (const GenericVector< char > &data, const char *filename)

template<typename T >

int sort_cmp (const void *t1, const void *t2)

template<typename T >

int sort_ptr_cmp (const void *t1, const void *t2)

template<class T >

bool contains (const std::vector< T > &data, const T &value)

const std::vector< std::string > split (const std::string &s, char c)

void chomp_string (char *str)

int RoundUp (int n, int block_size)

template<typename T >

T ClipToRange (const T &x, const T &lower_bound, const T &upper_bound)

template<typename T1 , typename T2 >

void UpdateRange (const T1 &x, T2 *lower_bound, T2 *upper_bound)

template<typename T1 , typename T2 >

void UpdateRange (const T1 &x_lo, const T1 &x_hi, T2 *lower_bound, T2 *upper_bound)

template<typename T >

void IntersectRange (const T &lower1, const T &upper1, T *lower2, T *upper2)

int Modulo (int a, int b)

int DivRounded (int a, int b)

int IntCastRounded (double x)

int IntCastRounded (float x)

void ReverseN (void *ptr, int num_bytes)

void Reverse32 (void *ptr)

template<typename T >

bool DeSerialize (bool swap, FILE *fp, std::vector< T > &data)

template<typename T >

bool Serialize (FILE *fp, const std::vector< T > &data)

template<class T >

bool NearlyEqual (T x, T y, T tolerance)

constexpr ERRCODE NO_LIST ("Iterator not set to a list")

constexpr ERRCODE NULL_DATA ("List would have returned a nullptr data pointer")

constexpr ERRCODE NULL_CURRENT ("List current position is nullptr")

constexpr ERRCODE NULL_NEXT ("Next element on the list is nullptr")

constexpr ERRCODE NULL_PREV ("Previous element on the list is nullptr")

constexpr ERRCODE EMPTY_LIST ("List is empty")

constexpr ERRCODE BAD_PARAMETER ("List parameter error")

constexpr ERRCODE STILL_LINKED ("Attempting to add an element with non nullptr links, to a list")

tesseract::ParamsVectors * GlobalParams ()

bool LoadDataFromFile (const char *filename, std::vector< char > *data)

bool SaveDataToFile (const std::vector< char > &data, const char *filename)

template<typename T , size_t N>

constexpr size_t countof (T const (&)[N]) noexcept

template<typename T >

bool DeSerialize (FILE *fp, T *data, size_t n=1)

template<typename T >

bool Serialize (FILE *fp, const T *data, size_t n=1)

void tprintf (const char *format,...)

TESS_API INT_VAR_H (log_level)

void AddAdaptedClass (ADAPT_TEMPLATES_STRUCT *Templates, ADAPT_CLASS_STRUCT *Class, CLASS_ID ClassId)

ADAPT_CLASS_STRUCT * ReadAdaptedClass (TFile *fp)

PERM_CONFIG_STRUCT * ReadPermConfig (TFile *fp)

TEMP_CONFIG_STRUCT * ReadTempConfig (TFile *fp)

void WriteAdaptedClass (FILE *File, ADAPT_CLASS_STRUCT *Class, int NumConfigs)

void WritePermConfig (FILE *File, PERM_CONFIG_STRUCT *Config)

void WriteTempConfig (FILE *File, TEMP_CONFIG_STRUCT *Config)

bool MarginalMatch (float confidence, float matcher_great_threshold)

void InitMatcherRatings (float *Rating)

int MakeTempProtoPerm (void *item1, void *item2)

void SetAdaptiveThreshold (float Threshold)

CLUSTERER * MakeClusterer (int16_t SampleSize, const PARAM_DESC ParamDesc[])

SAMPLE * MakeSample (CLUSTERER *Clusterer, const float *Feature, uint32_t CharID)

LIST ClusterSamples (CLUSTERER *Clusterer, CLUSTERCONFIG *Config)

void FreeClusterer (CLUSTERER *Clusterer)

void FreeProtoList (LIST *ProtoList)

void FreePrototype (void *arg)

CLUSTER * NextSample (LIST *SearchState)

float Mean (PROTOTYPE *Proto, uint16_t Dimension)

float StandardDeviation (PROTOTYPE *Proto, uint16_t Dimension)

int32_t MergeClusters (int16_t N, PARAM_DESC ParamDesc[], int32_t n1, int32_t n2, float m[], float m1[], float m2[])

uint16_t ReadSampleSize (TFile *fp)

PARAM_DESC * ReadParamDesc (TFile *fp, uint16_t N)

PROTOTYPE * ReadPrototype (TFile *fp, uint16_t N)

void WriteParamDesc (FILE *File, uint16_t N, const PARAM_DESC ParamDesc[])

void WritePrototype (FILE *File, uint16_t N, PROTOTYPE *Proto)

StartParamDesc (MicroFeatureParams) DefineParam(0

DefineParam (0, 0, -0.25, 0.75) DefineParam(0

DefineParam (1, 0, 0.0, 1.0) DefineParam(0

DefineParam (0, 1, -0.5, 0.5) EndParamDesc DefineFeature(MicroFeatureDesc

MicroFeatureParams StartParamDesc (CharNormParams) DefineParam(0

MicroFeatureParams DefineParam (0, 1, 0.0, 1.0) DefineParam(0

MicroFeatureParams DefineParam (0, 0, 0.0, 1.0) EndParamDesc DefineFeature(CharNormDesc

MicroFeatureParams CharNormParams StartParamDesc (IntFeatParams) DefineParam(0

MicroFeatureParams CharNormParams DefineParam (0, 0, 0.0, 255.0) DefineParam(1

MicroFeatureParams CharNormParams EndParamDesc DefineFeature (IntFeatDesc, 2, 1, kIntFeatureType, IntFeatParams) StartParamDesc(GeoFeatParams) DefineParam(0

MicroFeatureParams CharNormParams EndParamDesc EndParamDesc DefineFeature (GeoFeatDesc, 3, 0, kGeoFeatureType, GeoFeatParams) float PicoFeatureLength

StartParamDesc (PicoFeatParams) DefineParam(0

EndParamDesc DefineFeature (PicoFeatDesc, 2, 1, "pf", PicoFeatParams) StartParamDesc(OutlineFeatParams) DefineParam(0

void InitFeatureDefs (FEATURE_DEFS_STRUCT *featuredefs)

void WriteCharDescription (const FEATURE_DEFS_STRUCT &FeatureDefs, CHAR_DESC_STRUCT *CharDesc, std::string &str)

bool ValidCharDescription (const FEATURE_DEFS_STRUCT &FeatureDefs, CHAR_DESC_STRUCT *CharDesc)

CHAR_DESC_STRUCT * ReadCharDescription (const FEATURE_DEFS_STRUCT &FeatureDefs, FILE *File)

uint32_t ShortNameToFeatureType (const FEATURE_DEFS_STRUCT &FeatureDefs, const char *ShortName)

void InitIntegerFX ()

FCOORD FeatureDirection (uint8_t theta)

TrainingSample * BlobToTrainingSample (const TBLOB &blob, bool nonlinear_norm, INT_FX_RESULT_STRUCT *fx_info, std::vector< INT_FEATURE_STRUCT > *bl_features)

BOOL_VAR_H (disable_character_fragments)

INT_VAR_H (classify_integer_matcher_multiplier)

float BucketStart (int Bucket, float Offset, int NumBuckets)

float BucketEnd (int Bucket, float Offset, int NumBuckets)

void DoFill (FILL_SPEC *FillSpec, CLASS_PRUNER_STRUCT *Pruner, uint32_t ClassMask, uint32_t ClassCount, uint32_t WordIndex)

bool FillerDone (TABLE_FILLER *Filler)

void FillPPCircularBits (uint32_t ParamTable[NUM_PP_BUCKETS][WERDS_PER_PP_VECTOR], int Bit, float Center, float Spread, bool debug)

void FillPPLinearBits (uint32_t ParamTable[NUM_PP_BUCKETS][WERDS_PER_PP_VECTOR], int Bit, float Center, float Spread, bool debug)

void GetCPPadsForLevel (int Level, float *EndPad, float *SidePad, float *AnglePad)

ScrollView::Color GetMatchColorFor (float Evidence)

void GetNextFill (TABLE_FILLER *Filler, FILL_SPEC *Fill)

void InitTableFiller (float EndPad, float SidePad, float AnglePad, PROTO_STRUCT *Proto, TABLE_FILLER *Filler)

void RenderIntFeature (ScrollView *window, const INT_FEATURE_STRUCT *Feature, ScrollView::Color color)

void RenderIntProto (ScrollView *window, INT_CLASS_STRUCT *Class, PROTO_ID ProtoId, ScrollView::Color color)

void AddIntClass (INT_TEMPLATES_STRUCT *Templates, CLASS_ID ClassId, INT_CLASS_STRUCT *Class)

int AddIntConfig (INT_CLASS_STRUCT *Class)

int AddIntProto (INT_CLASS_STRUCT *Class)

void AddProtoToClassPruner (PROTO_STRUCT *Proto, CLASS_ID ClassId, INT_TEMPLATES_STRUCT *Templates)

void AddProtoToProtoPruner (PROTO_STRUCT *Proto, int ProtoId, INT_CLASS_STRUCT *Class, bool debug)

uint8_t Bucket8For (float param, float offset, int num_buckets)

uint16_t Bucket16For (float param, float offset, int num_buckets)

uint8_t CircBucketFor (float param, float offset, int num_buckets)

void UpdateMatchDisplay ()

void ConvertConfig (BIT_VECTOR Config, int ConfigId, INT_CLASS_STRUCT *Class)

void DisplayIntFeature (const INT_FEATURE_STRUCT *Feature, float Evidence)

void DisplayIntProto (INT_CLASS_STRUCT *Class, PROTO_ID ProtoId, float Evidence)

void ClearFeatureSpaceWindow (NORM_METHOD norm_method, ScrollView *window)

void InitIntMatchWindowIfReqd ()

void InitProtoDisplayWindowIfReqd ()

void InitFeatureDisplayWindowIfReqd ()

ScrollView * CreateFeatureSpaceWindow (const char *name, int xpos, int ypos)

void ShowMatchDisplay ()

KDTREE * MakeKDTree (int16_t KeySize, const PARAM_DESC KeyDesc[])

void KDStore (KDTREE *Tree, float *Key, CLUSTER *Data)

void KDDelete (KDTREE *Tree, float Key[], void *Data)

void KDNearestNeighborSearch (KDTREE *Tree, float Query[], int QuerySize, float MaxDistance, int *NumberOfResults, void **NBuffer, float DBuffer[])

void KDWalk (KDTREE *Tree, kdwalk_proc action, ClusteringContext *context)

float DistanceSquared (int k, PARAM_DESC *dim, float p1[], float p2[])

float ComputeDistance (int k, PARAM_DESC *dim, float p1[], float p2[])

void Walk (KDTREE *tree, kdwalk_proc action, ClusteringContext *context, KDNODE *sub_tree, int32_t level)

void InsertNodes (KDTREE *tree, KDNODE *nodes)

int QueryInSearch (KDTREE *tree)

FEATURE_SET ExtractMicros (TBLOB *Blob, const DENORM &cn_denorm)

LIST ConvertBlob (TBLOB *blob)

MFOUTLINE ConvertOutline (TESSLINE *outline)

LIST ConvertOutlines (TESSLINE *outline, LIST mf_outlines, OUTLINETYPE outline_type)

void FindDirectionChanges (MFOUTLINE Outline, float MinSlope, float MaxSlope)

void FreeMFOutline (void *arg)

void FreeOutlines (LIST Outlines)

void MarkDirectionChanges (MFOUTLINE Outline)

MFOUTLINE NextExtremity (MFOUTLINE EdgePoint)

void NormalizeOutline (MFOUTLINE Outline, float XOrigin)

void ChangeDirection (MFOUTLINE Start, MFOUTLINE End, DIRECTION Direction)

void CharNormalizeOutline (MFOUTLINE Outline, const DENORM &cn_denorm)

void ComputeDirection (MFEDGEPT *Start, MFEDGEPT *Finish, float MinSlope, float MaxSlope)

MFOUTLINE NextDirectionChange (MFOUTLINE EdgePoint)

void ComputeBlobCenter (TBLOB *Blob, TPOINT *BlobCenter)

void FilterEdgeNoise (MFOUTLINE Outline, float NoiseSegmentLength)

MICROFEATURES ConvertToMicroFeatures (MFOUTLINE Outline, MICROFEATURES MicroFeatures)

MicroFeature ExtractMicroFeature (MFOUTLINE Start, MFOUTLINE End)

MICROFEATURES BlobMicroFeatures (TBLOB *Blob, const DENORM &cn_denorm)

double_VAR_H (classify_min_slope)

double_VAR_H (classify_max_slope)

float ActualOutlineLength (FEATURE Feature)

FEATURE_SET ExtractCharNormFeatures (const INT_FX_RESULT_STRUCT &fx_info)

double_VAR_H (classify_norm_adj_midpoint)

double_VAR_H (classify_norm_adj_curl)

bool AddFeature (FEATURE_SET FeatureSet, FEATURE Feature)

FEATURE_SET ReadFeatureSet (FILE *File, const FEATURE_DESC_STRUCT *FeatureDesc)

void WriteFeatureSet (FEATURE_SET FeatureSet, std::string &str)

void AddOutlineFeatureToSet (FPOINT *Start, FPOINT *End, FEATURE_SET FeatureSet)

void ConvertToOutlineFeatures (MFOUTLINE Outline, FEATURE_SET FeatureSet)

void NormalizeOutlineX (FEATURE_SET FeatureSet)

void ConvertSegmentToPicoFeat (FPOINT *Start, FPOINT *End, FEATURE_SET FeatureSet)

void ConvertToPicoFeatures2 (MFOUTLINE Outline, FEATURE_SET FeatureSet)

void NormalizePicoX (FEATURE_SET FeatureSet)

double_VAR_H (classify_pico_feature_length)

void InitPrototypes ()

LIST delete_d (LIST list, void *key, int_compare is_equal)

LIST destroy (LIST list)

void destroy_nodes (LIST list, void_dest destructor)

LIST last (LIST var_list)

LIST pop (LIST list)

LIST push (LIST list, void *element)

LIST push_last (LIST list, void *item)

LIST search (LIST list, void *key, int_compare is_equal)

TFloat Tanh (TFloat x)

TFloat Logistic (TFloat x)

template<class Func >

void FuncInplace (int n, TFloat *inout)

template<class Func >

void FuncMultiply (const TFloat *u, const TFloat *v, int n, TFloat *out)

template<typename T >

void SoftmaxInPlace (int n, T *inout)

void CopyVector (unsigned n, const TFloat *src, TFloat *dest)

void AccumulateVector (int n, const TFloat *src, TFloat *dest)

void MultiplyVectorsInPlace (int n, const TFloat *src, TFloat *inout)

void MultiplyAccumulate (int n, const TFloat *u, const TFloat *v, TFloat *out)

void SumVectors (int n, const TFloat *v1, const TFloat *v2, const TFloat *v3, const TFloat *v4, const TFloat *v5, TFloat *sum)

template<typename T >

void ZeroVector (unsigned n, T *vec)

template<typename T >

void ClipVector (int n, T lower, T upper, T *vec)

void CodeInBinary (int n, int nf, TFloat *vec)

INT_VAR_H (textord_debug_bugs)

INT_VAR_H (textord_debug_tabfind)

BOOL_VAR_H (textord_debug_printable)

Image TraceOutlineOnReducedPix (C_OUTLINE *outline, int gridsize, ICOORD bleft, int *left, int *bottom)

Image TraceBlockOnReducedPix (BLOCK *block, int gridsize, ICOORD bleft, int *left, int *bottom)

template<class BBC >

int SortByBoxLeft (const void *void1, const void *void2)

template<class BBC >

bool StdSortByBoxLeft (const void *void1, const void *void2)

template<class BBC >

int SortRightToLeft (const void *void1, const void *void2)

template<class BBC >

bool StdSortRightToLeft (const void *void1, const void *void2)

template<class BBC >

int SortByBoxBottom (const void *void1, const void *void2)

bool test_underline (bool testing_on, C_BLOB *blob, int16_t baseline, int16_t xheight)

double_VAR_H (textord_underline_threshold)

void compute_fixed_pitch_cjk (ICOORD page_tr, TO_BLOCK_LIST *port_blocks)

INT_VAR_H (devanagari_split_debuglevel)

BOOL_VAR_H (devanagari_split_debugimage)

ScrollView * create_to_win (ICOORD page_tr)

void close_to_win ()

void plot_box_list (ScrollView *win, BLOBNBOX_LIST *list, ScrollView::Color body_colour)

void plot_to_row (TO_ROW *row, ScrollView::Color colour, FCOORD rotation)

void plot_parallel_row (TO_ROW *row, float gradient, int32_t left, ScrollView::Color colour, FCOORD rotation)

void draw_occupation (int32_t xleft, int32_t ybottom, int32_t min_y, int32_t max_y, int32_t occupation[], int32_t thresholds[])

void draw_meanlines (TO_BLOCK *block, float gradient, int32_t left, ScrollView::Color colour, FCOORD rotation)

void plot_word_decisions (ScrollView *win, int16_t pitch, TO_ROW *row)

void plot_fp_cells (ScrollView *win, ScrollView::Color colour, BLOBNBOX_IT *blob_it, int16_t pitch, int16_t blob_count, STATS *projection, int16_t projection_left, int16_t projection_right, float projection_scale)

void plot_fp_cells2 (ScrollView *win, ScrollView::Color colour, TO_ROW *row, FPSEGPT_LIST *seg_list)

void plot_row_cells (ScrollView *win, ScrollView::Color colour, TO_ROW *row, float xshift, ICOORDELT_LIST *cells)

BOOL_VAR_H (textord_show_fixed_cuts)

void create_todebug_win ()

void complete_edge (CRACKEDGE *start, C_OUTLINE_IT *outline_it)

ScrollView::Color check_path_legal (CRACKEDGE *start)

int16_t loop_bounding_box (CRACKEDGE *&start, ICOORD &botleft, ICOORD &topright)

ROW * fixed_pitch_words (TO_ROW *row, FCOORD rotation)

void split_to_blob (BLOBNBOX *blob, int16_t chop_coord, float pitch_error, C_OUTLINE_LIST *left_coutlines, C_OUTLINE_LIST *right_coutlines)

INT_VAR_H (textord_fp_chop_error)

BOOL_VAR_H (gapmap_debug)

BOOL_VAR_H (gapmap_use_ends)

BOOL_VAR_H (gapmap_no_isolated_quanta)

double_VAR_H (gapmap_big_gaps)

BOOL_VAR_H (textord_heavy_nr)

BOOL_VAR_H (textord_show_initial_rows)

BOOL_VAR_H (textord_show_parallel_rows)

BOOL_VAR_H (textord_show_expanded_rows)

BOOL_VAR_H (textord_show_final_rows)

BOOL_VAR_H (textord_show_final_blobs)

BOOL_VAR_H (textord_test_landscape)

BOOL_VAR_H (textord_parallel_baselines)

BOOL_VAR_H (textord_straight_baselines)

BOOL_VAR_H (textord_old_baselines)

BOOL_VAR_H (textord_old_xheight)

BOOL_VAR_H (textord_fix_xheight_bug)

BOOL_VAR_H (textord_fix_makerow_bug)

BOOL_VAR_H (textord_debug_xheights)

INT_VAR_H (textord_test_x)

INT_VAR_H (textord_test_y)

INT_VAR_H (textord_min_blobs_in_row)

INT_VAR_H (textord_spline_minblobs)

INT_VAR_H (textord_spline_medianwin)

INT_VAR_H (textord_min_xheight)

double_VAR_H (textord_spline_shift_fraction)

double_VAR_H (textord_skew_ile)

double_VAR_H (textord_skew_lag)

double_VAR_H (textord_linespace_iqrlimit)

double_VAR_H (textord_width_limit)

double_VAR_H (textord_chop_width)

double_VAR_H (textord_minxh)

double_VAR_H (textord_min_linesize)

double_VAR_H (textord_excess_blobsize)

double_VAR_H (textord_occupancy_threshold)

double_VAR_H (textord_underline_width)

double_VAR_H (textord_min_blob_height_fraction)

double_VAR_H (textord_xheight_mode_fraction)

double_VAR_H (textord_ascheight_mode_fraction)

double_VAR_H (textord_ascx_ratio_min)

double_VAR_H (textord_ascx_ratio_max)

double_VAR_H (textord_descx_ratio_min)

double_VAR_H (textord_descx_ratio_max)

double_VAR_H (textord_xheight_error_margin)

INT_VAR_H (textord_lms_line_trials)

BOOL_VAR_H (textord_new_initial_xheight)

BOOL_VAR_H (textord_debug_blob)

void get_min_max_xheight (int block_linesize, int *min_height, int *max_height)

ROW_CATEGORY get_row_category (const TO_ROW *row)

bool within_error_margin (float test, float num, float margin)

float median_block_xheight (TO_BLOCK *block, float gradient)

int get_blob_coords (TO_ROW *row, int32_t lineheight, TBOX *blobcoords, bool &holed_line, int &outcount)

void make_first_baseline (TBOX blobcoords[], int blobcount, int xcoords[], int ycoords[], QSPLINE *spline, QSPLINE *baseline, float jumplimit)

void make_holed_baseline (TBOX blobcoords[], int blobcount, QSPLINE *spline, QSPLINE *baseline, float gradient)

int partition_line (TBOX blobcoords[], int blobcount, int *numparts, char partids[], int partsizes[], QSPLINE *spline, float jumplimit, float ydiffs[])

void merge_oldbl_parts (TBOX blobcoords[], int blobcount, char partids[], int partsizes[], int biggestpart, float jumplimit)

int get_ydiffs (TBOX blobcoords[], int blobcount, QSPLINE *spline, float ydiffs[])

int choose_partition (float diff, float partdiffs[], int lastpart, float jumplimit, float *drift, float *lastdelta, int *partcount)

int partition_coords (TBOX blobcoords[], int blobcount, char partids[], int bestpart, int xcoords[], int ycoords[])

int segment_spline (TBOX blobcoords[], int blobcount, int xcoords[], int ycoords[], int degree, int pointcount, int xstarts[])

bool split_stepped_spline (QSPLINE *baseline, float jumplimit, int *xcoords, int *xstarts, int &segments)

void insert_spline_point (int xstarts[], int segment, int coord1, int coord2, int &segments)

void find_lesser_parts (TO_ROW *row, TBOX blobcoords[], int blobcount, char partids[], int partsizes[], int partcount, int bestpart)

void old_first_xheight (TO_ROW *row, TBOX blobcoords[], int initialheight, int blobcount, QSPLINE *baseline, float jumplimit)

void make_first_xheight (TO_ROW *row, TBOX blobcoords[], int lineheight, int init_lineheight, int blobcount, QSPLINE *baseline, float jumplimit)

void find_top_modes (STATS *stats, int statnum, int modelist[], int modenum)

void pick_x_height (TO_ROW *row, int modelist[], int lefts[], int rights[], STATS *heightstat, int mode_threshold)

BOOL_VAR_H (textord_oldbl_debug)

int * make_height_array (TBOX blobcoords[], int blobcount, QSPLINE *baseline)

double check_pitch_sync2 (BLOBNBOX_IT *blob_it, int16_t blob_count, int16_t pitch, int16_t pitch_error, STATS *projection, int16_t projection_left, int16_t projection_right, float projection_scale, int16_t &occupation_count, FPSEGPT_LIST *seg_list, int16_t start, int16_t end)

double check_pitch_sync3 (int16_t projection_left, int16_t projection_right, int16_t zero_count, int16_t pitch, int16_t pitch_error, STATS *projection, float projection_scale, int16_t &occupation_count, FPSEGPT_LIST *seg_list, int16_t start, int16_t end)

double check_pitch_sync (BLOBNBOX_IT *blob_it, int16_t blob_count, int16_t pitch, int16_t pitch_error, STATS *projection, FPSEGPT_LIST *seg_list)

void make_illegal_segment (FPSEGPT_LIST *prev_list, TBOX blob_box, BLOBNBOX_IT blob_it, int16_t region_index, int16_t pitch, int16_t pitch_error, FPSEGPT_LIST *seg_list)

INT_VAR_H (pitsync_linear_version)

double_VAR_H (pitsync_joined_edge)

double_VAR_H (pitsync_offset_freecut_fraction)

int16_t vertical_torow_projection (TO_ROW *row, STATS *projection)

void block_edges (Image t_pix, PDBLK *block, C_OUTLINE_IT *outline_it)

template<typename T >

void DeleteObject (T *object)

double_VAR_H (textord_tabvector_vertical_gap_fraction)

double_VAR_H (textord_tabvector_vertical_box_ratio)

void compute_fixed_pitch (ICOORD page_tr, TO_BLOCK_LIST *port_blocks, float gradient, FCOORD rotation, bool testing_on)

void fix_row_pitch (TO_ROW *bad_row, TO_BLOCK *bad_block, TO_BLOCK_LIST *blocks, int32_t row_target, int32_t block_target)

void compute_block_pitch (TO_BLOCK *block, FCOORD rotation, int32_t block_index, bool testing_on)

bool compute_rows_pitch (TO_BLOCK *block, int32_t block_index, bool testing_on)

bool try_doc_fixed (ICOORD page_tr, TO_BLOCK_LIST *port_blocks, float gradient)

bool try_block_fixed (TO_BLOCK *block, int32_t block_index)

bool try_rows_fixed (TO_BLOCK *block, int32_t block_index, bool testing_on)

void print_block_counts (TO_BLOCK *block, int32_t block_index)

void count_block_votes (TO_BLOCK *block, int32_t &def_fixed, int32_t &def_prop, int32_t &maybe_fixed, int32_t &maybe_prop, int32_t &corr_fixed, int32_t &corr_prop, int32_t &dunno)

bool row_pitch_stats (TO_ROW *row, int32_t maxwidth, bool testing_on)

bool find_row_pitch (TO_ROW *row, int32_t maxwidth, int32_t dm_gap, TO_BLOCK *block, int32_t block_index, int32_t row_index, bool testing_on)

bool fixed_pitch_row (TO_ROW *row, BLOCK *block, int32_t block_index)

bool count_pitch_stats (TO_ROW *row, STATS *gap_stats, STATS *pitch_stats, float initial_pitch, float min_space, bool ignore_outsize, bool split_outsize, int32_t dm_gap)

float tune_row_pitch (TO_ROW *row, STATS *projection, int16_t projection_left, int16_t projection_right, float space_size, float &initial_pitch, float &best_sp_sd, int16_t &best_mid_cuts, ICOORDELT_LIST *best_cells, bool testing_on)

float tune_row_pitch2 (TO_ROW *row, STATS *projection, int16_t projection_left, int16_t projection_right, float space_size, float &initial_pitch, float &best_sp_sd, int16_t &best_mid_cuts, ICOORDELT_LIST *best_cells, bool testing_on)

float compute_pitch_sd (TO_ROW *row, STATS *projection, int16_t projection_left, int16_t projection_right, float space_size, float initial_pitch, float &sp_sd, int16_t &mid_cuts, ICOORDELT_LIST *row_cells, bool testing_on, int16_t start, int16_t end)

float compute_pitch_sd2 (TO_ROW *row, STATS *projection, int16_t projection_left, int16_t projection_right, float initial_pitch, int16_t &occupation, int16_t &mid_cuts, ICOORDELT_LIST *row_cells, bool testing_on, int16_t start, int16_t end)

void print_pitch_sd (TO_ROW *row, STATS *projection, int16_t projection_left, int16_t projection_right, float space_size, float initial_pitch)

void find_repeated_chars (TO_BLOCK *block, bool testing_on)

void plot_fp_word (TO_BLOCK *block, float pitch, float nonspace)

BOOL_VAR_H (textord_debug_pitch_test)

BOOL_VAR_H (textord_debug_pitch_metric)

BOOL_VAR_H (textord_show_row_cuts)

BOOL_VAR_H (textord_show_page_cuts)

BOOL_VAR_H (textord_blockndoc_fixed)

BOOL_VAR_H (textord_fast_pitch_test)

double_VAR_H (textord_projection_scale)

double_VAR_H (textord_balance_factor)

void SetBlobStrokeWidth (Image pix, BLOBNBOX *blob)

void assign_blobs_to_blocks2 (Image pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *port_blocks)

void tweak_row_baseline (ROW *row, double blshift_maxshift, double blshift_xfraction)

BOOL_VAR_H (textord_show_initial_words)

BOOL_VAR_H (textord_blocksall_fixed)

BOOL_VAR_H (textord_blocksall_prop)

INT_VAR_H (textord_dotmatrix_gap)

INT_VAR_H (textord_debug_block)

INT_VAR_H (textord_pitch_range)

double_VAR_H (textord_wordstats_smooth_factor)

double_VAR_H (textord_words_maxspace)

double_VAR_H (textord_words_default_maxspace)

double_VAR_H (textord_words_default_minspace)

double_VAR_H (textord_words_min_minspace)

double_VAR_H (textord_words_default_nonspace)

double_VAR_H (textord_words_initial_lower)

double_VAR_H (textord_words_initial_upper)

double_VAR_H (textord_words_minlarge)

double_VAR_H (textord_words_pitchsd_threshold)

double_VAR_H (textord_words_def_fixed)

double_VAR_H (textord_words_def_prop)

INT_VAR_H (textord_words_veto_power)

double_VAR_H (textord_pitch_rowsimilarity)

BOOL_VAR_H (textord_pitch_scalebigwords)

double_VAR_H (words_initial_lower)

double_VAR_H (words_initial_upper)

double_VAR_H (words_default_prop_nonspace)

double_VAR_H (words_default_fixed_space)

double_VAR_H (words_default_fixed_limit)

double_VAR_H (textord_words_definite_spread)

double_VAR_H (textord_spacesize_ratioprop)

double_VAR_H (textord_fpiqr_ratio)

double_VAR_H (textord_max_pitch_iqr)

void restore_underlined_blobs (TO_BLOCK *block)

TO_ROW * most_overlapping_row (TO_ROW_LIST *rows, BLOBNBOX *blob)

void find_underlined_blobs (BLOBNBOX *u_line, QSPLINE *baseline, float xheight, float baseline_offset, ICOORDELT_LIST *chop_cells)

void vertical_cunderline_projection (C_OUTLINE *outline, QSPLINE *baseline, float xheight, float baseline_offset, STATS *lower_proj, STATS *middle_proj, STATS *upper_proj)

double_VAR_H (textord_underline_offset)

BOOL_VAR_H (textord_restore_underlines)

BOOL_VAR_H (textord_force_make_prop_words)

BOOL_VAR_H (textord_chopper_test)

void ParseCommandLineFlags (const char *usage, int *argc, char ***argv, const bool remove_flags)

TESS_COMMON_TRAINING_API DECLARE_INT_PARAM_FLAG (debug_level)

TESS_COMMON_TRAINING_API DECLARE_STRING_PARAM_FLAG (D)

TESS_COMMON_TRAINING_API DECLARE_STRING_PARAM_FLAG (F)

TESS_COMMON_TRAINING_API DECLARE_STRING_PARAM_FLAG (O)

TESS_COMMON_TRAINING_API DECLARE_STRING_PARAM_FLAG (U)

TESS_COMMON_TRAINING_API DECLARE_STRING_PARAM_FLAG (X)

TESS_COMMON_TRAINING_API DECLARE_STRING_PARAM_FLAG (fonts_dir)

TESS_COMMON_TRAINING_API DECLARE_STRING_PARAM_FLAG (fontconfig_tmpdir)

TESS_COMMON_TRAINING_API DECLARE_STRING_PARAM_FLAG (output_trainer)

TESS_COMMON_TRAINING_API DECLARE_STRING_PARAM_FLAG (test_ch)

INT_PARAM_FLAG (debug_level, 0, "Level of Trainer debugging")

STRING_PARAM_FLAG (D, "", "Directory to write output files to")

STRING_PARAM_FLAG (F, "font_properties", "File listing font properties")

STRING_PARAM_FLAG (X, "", "File listing font xheights")

STRING_PARAM_FLAG (U, "unicharset", "File to load unicharset from")

STRING_PARAM_FLAG (O, "", "File to write unicharset to")

STRING_PARAM_FLAG (output_trainer, "", "File to write trainer to")

STRING_PARAM_FLAG (test_ch, "", "UTF8 test character string")

STRING_PARAM_FLAG (fonts_dir, "", "")

STRING_PARAM_FLAG (fontconfig_tmpdir, "", "")

void ParseArguments (int *argc, char ***argv)

ShapeTable * LoadShapeTable (const std::string &file_prefix)

void WriteShapeTable (const std::string &file_prefix, const ShapeTable &shape_table)

std::unique_ptr< MasterTrainer > LoadTrainingData (const char *const *filelist, bool replication, ShapeTable **shape_table, std::string &file_prefix)

LABELEDLIST FindList (LIST List, const std::string &Label)

void ReadTrainingSamples (const FEATURE_DEFS_STRUCT &feature_definitions, const char *feature_name, int max_samples, UNICHARSET *unicharset, FILE *file, LIST *training_samples)

void FreeTrainingSamples (LIST CharList)

void FreeLabeledList (LABELEDLIST LabeledList)

CLUSTERER * SetUpForClustering (const FEATURE_DEFS_STRUCT &FeatureDefs, LABELEDLIST char_sample, const char *program_feature_type)

void MergeInsignificantProtos (LIST ProtoList, const char *label, CLUSTERER *Clusterer, CLUSTERCONFIG *clusterconfig)

void CleanUpUnusedData (LIST ProtoList)

LIST RemoveInsignificantProtos (LIST ProtoList, bool KeepSigProtos, bool KeepInsigProtos, int N)

MERGE_CLASS FindClass (LIST List, const std::string &Label)

void FreeLabeledClassList (LIST ClassList)

CLASS_STRUCT * SetUpForFloat2Int (const UNICHARSET &unicharset, LIST LabeledClassList)

void Normalize (float *Values)

void FreeNormProtoList (LIST CharList)

void AddToNormProtosList (LIST *NormProtoList, LIST ProtoList, const std::string &CharName)

int NumberOfProtos (LIST ProtoList, bool CountSigProtos, bool CountInsigProtos)

void WriteTrainingSamples (const tesseract::FEATURE_DEFS_STRUCT &FeatureDefs, char *Directory, tesseract::LIST CharList, const char *program_feature_type)

void allocNormProtos ()

Image DegradeImage (Image input, int exposure, TRand *randomizer, float *rotation)

Image PrepareDistortedPix (const Image pix, bool perspective, bool invert, bool white_noise, bool smooth_noise, bool blur, int box_reduction, TRand *randomizer, std::vector< TBOX > *boxes)

void GeneratePerspectiveDistortion (int width, int height, TRand *randomizer, Image *pix, std::vector< TBOX > *boxes)

int ProjectiveCoeffs (int width, int height, TRand *randomizer, float **im_coeffs, float **box_coeffs)

bool LoadFileLinesToStrings (const char *filename, std::vector< std::string > *lines)

bool WriteFile (const std::string &output_dir, const std::string &lang, const std::string &suffix, const std::vector< char > &data, FileWriter writer)

std::string ReadFile (const std::string &filename, FileReader reader)

bool WriteUnicharset (const UNICHARSET &unicharset, const std::string &output_dir, const std::string &lang, FileWriter writer, TessdataManager *traineddata)

bool WriteRecoder (const UNICHARSET &unicharset, bool pass_through, const std::string &output_dir, const std::string &lang, FileWriter writer, std::string *radical_table_data, TessdataManager *traineddata)

int CombineLangModel (const UNICHARSET &unicharset, const std::string &script_dir, const std::string &version_str, const std::string &output_dir, const std::string &lang, bool pass_through_recoder, const std::vector< std::string > &words, const std::vector< std::string > &puncs, const std::vector< std::string > &numbers, bool lang_is_rtl, FileReader reader, FileWriter writer)

bool NormalizeUTF8String (UnicodeNormMode u_mode, OCRNorm ocr_normalize, GraphemeNorm grapheme_normalize, const char *str8, std::string *normalized)

bool NormalizeCleanAndSegmentUTF8 (UnicodeNormMode u_mode, OCRNorm ocr_normalize, GraphemeNormMode g_mode, bool report_errors, const char *str8, std::vector< std::string > *graphemes)

char32 OCRNormalize (char32 ch)

bool IsOCREquivalent (char32 ch1, char32 ch2)

bool IsValidCodepoint (const char32 ch)

bool IsWhitespace (const char32 ch)

bool IsUTF8Whitespace (const char *text)

unsigned int SpanUTF8Whitespace (const char *text)

unsigned int SpanUTF8NotWhitespace (const char *text)

bool IsInterchangeValid (const char32 ch)

bool IsInterchangeValid7BitAscii (const char32 ch)

char32 FullwidthToHalfwidth (const char32 ch)

void SetupBasicProperties (bool report_errors, bool decompose, UNICHARSET *unicharset)

void SetScriptProperties (const std::string &script_dir, UNICHARSET *unicharset)

std::string GetXheightString (const std::string &script_dir, const UNICHARSET &unicharset)

void SetPropertiesForInputFile (const std::string &script_dir, const std::string &input_unicharset_file, const std::string &output_unicharset_file, const std::string &output_xheights_file)

void SetupBasicProperties (bool report_errors, UNICHARSET *unicharset)

void create_fx_win ()

void clear_fx_win ()

void create_fxdebug_win ()

template<class BLOB_CHOICE >

int SortByUnicharID (const void *void1, const void *void2)

template<class BLOB_CHOICE >

int SortByRating (const void *void1, const void *void2)

void display_edgepts (LIST outlines)

void draw_blob_edges (TBLOB *blob)

void mark_outline (EDGEPT *edgept)

void display_blob (TBLOB *blob, ScrollView::Color color)

void render_blob (ScrollView *window, TBLOB *blob, ScrollView::Color color)

void render_edgepts (ScrollView *window, EDGEPT *edgept, ScrollView::Color color)

void render_outline (ScrollView *window, TESSLINE *outline, ScrollView::Color color)

BOOL_VAR_H (wordrec_display_all_blobs)

BOOL_VAR_H (wordrec_blob_pause)

void OCRTester (const char *imgname, const char *groundtruth, const char *tessdatadir, const char *lang)

TEST_P (MatchGroundTruth, FastPhototestOCR)

TEST_P (MatchGroundTruth, BestPhototestOCR)

TEST_P (MatchGroundTruth, TessPhototestOCR)

INSTANTIATE_TEST_SUITE_P (Eng, MatchGroundTruth, ::testing::Values("eng"))

INSTANTIATE_TEST_SUITE_P (DISABLED_Latin, MatchGroundTruth, ::testing::Values("script/Latin"))

INSTANTIATE_TEST_SUITE_P (DISABLED_Deva, MatchGroundTruth, ::testing::Values("script/Devanagari"))

INSTANTIATE_TEST_SUITE_P (DISABLED_Arabic, MatchGroundTruth, ::testing::Values("script/Arabic"))

TEST_F (EuroText, FastLatinOCR)

TEST_F (ApplyBoxTest, TimesCharLevel)

TEST_F (ApplyBoxTest, ItalicCharLevel)

TEST_F (ApplyBoxTest, TimesLineLevel)

TEST_F (ApplyBoxTest, ItalLineLevel)

std::string GetCleanedTextResult (tesseract::TessBaseAPI *tess, Image pix)

TEST_F (TesseractTest, StaticTessBaseAPI)

TEST_F (TesseractTest, BasicTesseractTest)

TEST_F (TesseractTest, IteratesParagraphsEvenIfNotDetected)

TEST_F (TesseractTest, HOCRWorksWithoutSetInputName)

TEST_F (TesseractTest, HOCRContainsBaseline)

TEST_F (TesseractTest, AdaptToWordStrTest)

TEST_F (TesseractTest, BasicLSTMTest)

TEST_F (TesseractTest, LSTMGeometryTest)

TEST_F (TesseractTest, InitConfigOnlyTest)

TEST (TesseractInstanceTest, TestMultipleTessInstances)

TEST (TesseractInstanceTest, TestMultipleTessInstanceVariables)

TEST_F (BaseapiThreadTest, TestBasicSanity)

TEST_F (BaseapiThreadTest, TestInit)

TEST_F (BaseapiThreadTest, TestRecognition)

TEST_F (BaseapiThreadTest, TestAll)

TEST_F (BitVectorTest, Primes)

TEST_F (BitVectorTest, SetAll)

TEST_F (BitVectorTest, TestNextSetBit)

TEST_F (BitVectorTest, TestNumSetBits)

TEST (CleanNamespaceTess, DummyTest)

TEST_F (ColPartitionTest, IsInSameColumnAsReflexive)

TEST_F (ColPartitionTest, IsInSameColumnAsBorders)

TEST_F (ColPartitionTest, IsInSameColumnAsSuperset)

TEST_F (ColPartitionTest, IsInSameColumnAsPartialOverlap)

TEST_F (CommandlineflagsTest, RemoveFlags)

TEST_F (CommandlineflagsTest, ExitsWithErrorOnInvalidFlag)

TEST_F (CommandlineflagsTest, ParseIntegerFlags)

TEST_F (CommandlineflagsTest, ParseDoubleFlags)

TEST_F (CommandlineflagsTest, ParseStringFlags)

TEST_F (CommandlineflagsTest, ParseBoolFlags)

TEST_F (CommandlineflagsTest, ParseOldFlags)

TEST_F (DawgTest, TestDawgConversion)

TEST_F (DawgTest, TestMatching)

TEST_F (DENORMTest, NoRotations)

TEST_F (DENORMTest, WithRotations)

TEST_F (DENORMTest, Multiple)

TEST_F (EquationFinderTest, IdentifySpecialText)

TEST_F (EquationFinderTest, EstimateTypeForUnichar)

TEST_F (EquationFinderTest, IsIndented)

TEST_F (EquationFinderTest, IsNearSmallNeighbor)

TEST_F (EquationFinderTest, CheckSeedBlobsCount)

TEST_F (EquationFinderTest, ComputeForegroundDensity)

TEST_F (EquationFinderTest, CountAlignment)

TEST_F (EquationFinderTest, ComputeCPsSuperBBox)

TEST_F (EquationFinderTest, SplitCPHorLite)

TEST_F (EquationFinderTest, SplitCPHor)

TEST (FileTest, JoinPath)

TEST (OutputBufferTest, WriteString)

TEST (InputBufferTest, Read)

TEST_F (HeapTest, SortTest)

TEST_F (HeapTest, MixedTest)

TEST_F (HeapTest, PopWorstTest)

TEST_F (HeapTest, RevalueTest)

TEST_F (HeapTest, DoublePtrTest)

TEST_F (ImagedataTest, CachesProperly)

TEST_F (ImagedataTest, CachesMultiDocs)

TEST_F (IndexMapBiDiTest, Primes)

TEST_F (IndexMapBiDiTest, ManyToOne)

TEST_F (IntFeatureMapTest, Exhaustive)

TEST_F (IntSimdMatrixTest, C)

TEST_F (IntSimdMatrixTest, SSE)

TEST_F (IntSimdMatrixTest, AVX2)

std::string TestDataNameToPath (const std::string &name)

TEST (LangModelTest, AddACharacter)

TEST (LangModelTest, AddACharacterHindi)

TEST_F (LayoutTest, ArraySizeTest)

TEST_F (LayoutTest, UNLV8087_054)

TEST_F (LayoutTest, HebrewOrderingAndSkew)

TEST_F (LigatureTableTest, DoesFillLigatureTables)

TEST_F (LigatureTableTest, TestCustomLigatures)

TEST_F (LLSQTest, BasicLines)

TEST_F (LLSQTest, Vectors)

TEST_F (LLSQTest, RmsOrthWorksAsIntended)

TEST_F (ListTest, TestCLIST)

TEST_F (ListTest, TestELIST)

TEST_F (ListTest, TestELIST2)

void LangLoader (const char *lang, const char *tessdatadir)

TEST_P (LoadLanguage, afr)

TEST_P (LoadLanguage, amh)

TEST_P (LoadLanguage, ara)

TEST_P (LoadLanguage, asm)

TEST_P (LoadLanguage, aze)

TEST_P (LoadLanguage, aze_cyrl)

TEST_P (LoadLanguage, bel)

TEST_P (LoadLanguage, ben)

TEST_P (LoadLanguage, bod)

TEST_P (LoadLanguage, bos)

TEST_P (LoadLanguage, bre)

TEST_P (LoadLanguage, bul)

TEST_P (LoadLanguage, cat)

TEST_P (LoadLanguage, ceb)

TEST_P (LoadLanguage, ces)

TEST_P (LoadLanguage, chi_sim)

TEST_P (LoadLanguage, chi_sim_vert)

TEST_P (LoadLanguage, chi_tra)

TEST_P (LoadLanguage, chi_tra_vert)

TEST_P (LoadLanguage, chr)

TEST_P (LoadLanguage, cos)

TEST_P (LoadLanguage, cym)

TEST_P (LoadLanguage, dan)

TEST_P (LoadLanguage, deu)

TEST_P (LoadLanguage, div)

TEST_P (LoadLanguage, dzo)

TEST_P (LoadLanguage, ell)

TEST_P (LoadLanguage, eng)

TEST_P (LoadLanguage, enm)

TEST_P (LoadLanguage, epo)

TEST_P (LoadLanguage, est)

TEST_P (LoadLanguage, eus)

TEST_P (LoadLanguage, fao)

TEST_P (LoadLanguage, fas)

TEST_P (LoadLanguage, fil)

TEST_P (LoadLanguage, fin)

TEST_P (LoadLanguage, fra)

TEST_P (LoadLanguage, frk)

TEST_P (LoadLanguage, frm)

TEST_P (LoadLanguage, fry)

TEST_P (LoadLanguage, gla)

TEST_P (LoadLanguage, gle)

TEST_P (LoadLanguage, glg)

TEST_P (LoadLanguage, grc)

TEST_P (LoadLanguage, guj)

TEST_P (LoadLanguage, hat)

TEST_P (LoadLanguage, heb)

TEST_P (LoadLanguage, hin)

TEST_P (LoadLanguage, hrv)

TEST_P (LoadLanguage, hun)

TEST_P (LoadLanguage, hye)

TEST_P (LoadLanguage, iku)

TEST_P (LoadLanguage, ind)

TEST_P (LoadLanguage, isl)

TEST_P (LoadLanguage, ita)

TEST_P (LoadLanguage, ita_old)

TEST_P (LoadLanguage, jav)

TEST_P (LoadLanguage, jpn)

TEST_P (LoadLanguage, jpn_vert)

TEST_P (LoadLanguage, kan)

TEST_P (LoadLanguage, kat)

TEST_P (LoadLanguage, kat_old)

TEST_P (LoadLanguage, kaz)

TEST_P (LoadLanguage, khm)

TEST_P (LoadLanguage, kir)

TEST_P (LoadLanguage, kor)

TEST_P (LoadLanguage, kor_vert)

TEST_P (LoadLanguage, lao)

TEST_P (LoadLanguage, lat)

TEST_P (LoadLanguage, lav)

TEST_P (LoadLanguage, lit)

TEST_P (LoadLanguage, ltz)

TEST_P (LoadLanguage, mal)

TEST_P (LoadLanguage, mar)

TEST_P (LoadLanguage, mkd)

TEST_P (LoadLanguage, mlt)

TEST_P (LoadLanguage, mon)

TEST_P (LoadLanguage, mri)

TEST_P (LoadLanguage, msa)

TEST_P (LoadLanguage, mya)

TEST_P (LoadLanguage, nep)

TEST_P (LoadLanguage, nld)

TEST_P (LoadLanguage, nor)

TEST_P (LoadLanguage, oci)

TEST_P (LoadLanguage, ori)

TEST_P (LoadLanguage, osd)

TEST_P (LoadLanguage, pan)

TEST_P (LoadLanguage, pol)

TEST_P (LoadLanguage, por)

TEST_P (LoadLanguage, pus)

TEST_P (LoadLanguage, que)

TEST_P (LoadLanguage, ron)

TEST_P (LoadLanguage, rus)

TEST_P (LoadLanguage, san)

TEST_P (LoadLanguage, sin)

TEST_P (LoadLanguage, slk)

TEST_P (LoadLanguage, slv)

TEST_P (LoadLanguage, snd)

TEST_P (LoadLanguage, spa)

TEST_P (LoadLanguage, spa_old)

TEST_P (LoadLanguage, sqi)

TEST_P (LoadLanguage, srp)

TEST_P (LoadLanguage, srp_latn)

TEST_P (LoadLanguage, sun)

TEST_P (LoadLanguage, swa)

TEST_P (LoadLanguage, swe)

TEST_P (LoadLanguage, syr)

TEST_P (LoadLanguage, tam)

TEST_P (LoadLanguage, tat)

TEST_P (LoadLanguage, tel)

TEST_P (LoadLanguage, tgk)

TEST_P (LoadLanguage, tha)

TEST_P (LoadLanguage, tir)

TEST_P (LoadLanguage, ton)

TEST_P (LoadLanguage, tur)

TEST_P (LoadLanguage, uig)

TEST_P (LoadLanguage, ukr)

TEST_P (LoadLanguage, urd)

TEST_P (LoadLanguage, uzb)

TEST_P (LoadLanguage, uzb_cyrl)

TEST_P (LoadLanguage, vie)

TEST_P (LoadLanguage, yid)

TEST_P (LoadLanguage, yor)

INSTANTIATE_TEST_SUITE_P (DISABLED_Tessdata_fast, LoadLanguage, ::testing::Values(TESSDATA_DIR "_fast"))

INSTANTIATE_TEST_SUITE_P (DISABLED_Tessdata_best, LoadLanguage, ::testing::Values(TESSDATA_DIR "_best"))

INSTANTIATE_TEST_SUITE_P (DISABLED_Tessdata, LoadLanguage, ::testing::Values(TESSDATA_DIR))

TEST_P (LoadScript, Arabic)

TEST_P (LoadScript, Armenian)

TEST_P (LoadScript, Bengali)

TEST_P (LoadScript, Canadian_Aboriginal)

TEST_P (LoadScript, Cherokee)

TEST_P (LoadScript, Cyrillic)

TEST_P (LoadScript, Devanagari)

TEST_P (LoadScript, Ethiopic)

TEST_P (LoadScript, Fraktur)

TEST_P (LoadScript, Georgian)

TEST_P (LoadScript, Greek)

TEST_P (LoadScript, Gujarati)

TEST_P (LoadScript, Gurmukhi)

TEST_P (LoadScript, HanS)

TEST_P (LoadScript, HanS_vert)

TEST_P (LoadScript, HanT)

TEST_P (LoadScript, HanT_vert)

TEST_P (LoadScript, Hangul)

TEST_P (LoadScript, Hangul_vert)

TEST_P (LoadScript, Hebrew)

TEST_P (LoadScript, Japanese)

TEST_P (LoadScript, Japanese_vert)

TEST_P (LoadScript, Kannada)

TEST_P (LoadScript, Khmer)

TEST_P (LoadScript, Lao)

TEST_P (LoadScript, Latin)

TEST_P (LoadScript, Malayalam)

TEST_P (LoadScript, Myanmar)

TEST_P (LoadScript, Oriya)

TEST_P (LoadScript, Sinhala)

TEST_P (LoadScript, Syriac)

TEST_P (LoadScript, Tamil)

TEST_P (LoadScript, Telugu)

TEST_P (LoadScript, Thaana)

TEST_P (LoadScript, Thai)

TEST_P (LoadScript, Tibetan)

TEST_P (LoadScript, Vietnamese)

INSTANTIATE_TEST_SUITE_P (DISABLED_Tessdata_fast, LoadScript, ::testing::Values(TESSDATA_DIR "_fast"))

INSTANTIATE_TEST_SUITE_P (DISABLED_Tessdata_best, LoadScript, ::testing::Values(TESSDATA_DIR "_best"))

INSTANTIATE_TEST_SUITE_P (DISABLED_Tessdata, LoadScript, ::testing::Values(TESSDATA_DIR))

TEST_F (LoadLang, engFast)

TEST_F (LoadLang, engBest)

TEST_F (LoadLang, engBestInt)

TEST_F (LoadLang, kmrFast)

TEST_F (LoadLang, kmrBest)

TEST_F (LSTMTrainerTest, RecodeTestKorBase)

TEST_F (LSTMTrainerTest, RecodeTestKor)

TEST_F (LSTMTrainerTest, EncodeDecodeBothTestKor)

TEST_F (LSTMTrainerTest, TestSquashed)

TEST_F (LSTMTrainerTest, BasicTest)

TEST_F (LSTMTrainerTest, ColorTest)

TEST_F (LSTMTrainerTest, BidiTest)

TEST_F (LSTMTrainerTest, Test2D)

TEST_F (LSTMTrainerTest, TestAdam)

TEST_F (LSTMTrainerTest, SpeedTest)

TEST_F (LSTMTrainerTest, DeterminismTest)

TEST_F (LSTMTrainerTest, SoftmaxBaselineTest)

TEST_F (LSTMTrainerTest, SoftmaxTest)

TEST_F (LSTMTrainerTest, EncodedSoftmaxTest)

TEST_F (LSTMTrainerTest, TestLayerAccess)

TEST_F (LSTMTrainerTest, EncodesEng)

TEST_F (LSTMTrainerTest, EncodesKan)

TEST_F (LSTMTrainerTest, EncodesKor)

TEST_F (LSTMTrainerTest, MapCoder)

TEST_F (LSTMTrainerTest, ConvertModel)

TEST_F (MatrixTest, RotatingTranspose_3_1)

TEST_F (MatrixTest, RotatingTranspose_2_0)

TEST_F (MatrixTest, RotatingTranspose_1_3)

TEST_F (MatrixTest, RotatingTranspose_0_2)

TEST_F (NetworkioTest, InitWithZeroFill)

TEST_F (NetworkioTest, CopyWithYReversal)

TEST_F (NetworkioTest, CopyWithXReversal)

TEST_F (NetworkioTest, CopyWithXYTranspose)

TEST (NormstrngsTest, BasicText)

TEST (NormstrngsTest, LigatureText)

TEST (NormstrngsTest, OcrSpecificNormalization)

TEST (NormstrngsTest, DetectsCorrectText)

TEST (NormstrngsTest, DetectsIncorrectText)

TEST (NormstrngsTest, NonIndicTextDoesntBreakIndicRules)

TEST (NormstrngsTest, NoLonelyJoiners)

TEST (NormstrngsTest, NoLonelyJoinersPlus)

TEST (NormstrngsTest, NoLonelyJoinersNonAlpha)

TEST (NormstrngsTest, JoinersStayInArabic)

TEST (NormstrngsTest, DigitOK)

TEST (NormstrngsTest, DandaOK)

TEST (NormstrngsTest, AllScriptsRegtest)

TEST (NormstrngsTest, IsWhitespace)

TEST (NormstrngsTest, SpanUTF8Whitespace)

TEST (NormstrngsTest, SpanUTF8NotWhitespace)

TEST (NormstrngsTest, IsInterchangeValid)

TEST (NormstrngsTest, IsInterchangeValid7BitAscii)

TEST (NormstrngsTest, FullwidthToHalfwidth)

std::string CodepointList (const std::vector< char32 > &str32)

std::string PrintString32WithUnicodes (const std::string &str)

std::string PrintStringVectorWithUnicodes (const std::vector< std::string > &glyphs)

void ExpectGraphemeModeResults (const std::string &str, UnicodeNormMode u_mode, int unicode_count, int glyph_count, int grapheme_count, const std::string &target_str)

TEST_F (NthItemTest, GeneralTest)

TEST_F (NthItemTest, BoringTest)

TEST_F (NthItemTest, UniqueTest)

TEST_F (NthItemTest, EqualTest)

TEST_P (OSDTest, MatchOrientationDegrees)

INSTANTIATE_TEST_SUITE_P (TessdataEngEuroHebrew, OSDTest, ::testing::Combine(::testing::Values(0), ::testing::Values(TESTING_DIR "/phototest.tif", TESTING_DIR "/eurotext.tif", TESTING_DIR "/hebrew.png"), ::testing::Values(TESSDATA_DIR)))

INSTANTIATE_TEST_SUITE_P (TessdataBestEngEuroHebrew, OSDTest, ::testing::Combine(::testing::Values(0), ::testing::Values(TESTING_DIR "/phototest.tif", TESTING_DIR "/eurotext.tif", TESTING_DIR "/hebrew.png"), ::testing::Values(TESSDATA_DIR "_best")))

INSTANTIATE_TEST_SUITE_P (TessdataFastEngEuroHebrew, OSDTest, ::testing::Combine(::testing::Values(0), ::testing::Values(TESTING_DIR "/phototest.tif", TESTING_DIR "/eurotext.tif", TESTING_DIR "/hebrew.png"), ::testing::Values(TESSDATA_DIR "_fast")))

INSTANTIATE_TEST_SUITE_P (TessdataFastRotated90, OSDTest, ::testing::Combine(::testing::Values(90), ::testing::Values(TESTING_DIR "/phototest-rotated-R.png"), ::testing::Values(TESSDATA_DIR "_fast")))

INSTANTIATE_TEST_SUITE_P (TessdataFastRotated180, OSDTest, ::testing::Combine(::testing::Values(180), ::testing::Values(TESTING_DIR "/phototest-rotated-180.png"), ::testing::Values(TESSDATA_DIR "_fast")))

INSTANTIATE_TEST_SUITE_P (TessdataFastRotated270, OSDTest, ::testing::Combine(::testing::Values(270), ::testing::Values(TESTING_DIR "/phototest-rotated-L.png"), ::testing::Values(TESSDATA_DIR "_fast")))

INSTANTIATE_TEST_SUITE_P (TessdataFastDevaRotated270, OSDTest, ::testing::Combine(::testing::Values(270), ::testing::Values(TESTING_DIR "/devatest-rotated-270.png"), ::testing::Values(TESSDATA_DIR "_fast")))

INSTANTIATE_TEST_SUITE_P (TessdataFastDeva, OSDTest, ::testing::Combine(::testing::Values(0), ::testing::Values(TESTING_DIR "/devatest.png"), ::testing::Values(TESSDATA_DIR "_fast")))

TEST_F (PageSegModeTest, WordTest)

TEST_F (PangoFontInfoTest, TestNonDefaultConstructor)

TEST_F (PangoFontInfoTest, DoesParseFontDescriptionName)

TEST_F (PangoFontInfoTest, DoesParseMissingFonts)

TEST_F (PangoFontInfoTest, DoesGetSpacingProperties)

TEST_F (PangoFontInfoTest, CanRenderString)

TEST_F (PangoFontInfoTest, CanRenderLigature)

TEST_F (PangoFontInfoTest, CannotRenderUncoveredString)

TEST_F (PangoFontInfoTest, CannotRenderInvalidString)

TEST_F (PangoFontInfoTest, CanDropUncoveredChars)

TEST_F (FontUtilsTest, DoesFindAvailableFonts)

TEST_F (FontUtilsTest, DoesDetectMissingFonts)

TEST_F (FontUtilsTest, DoesListAvailableFonts)

TEST_F (FontUtilsTest, DoesSelectFont)

TEST_F (FontUtilsTest, DoesFailToSelectFont)

void AsciiToRowInfo (const char *text, int row_number, RowInfo *info)

void MakeAsciiRowInfos (const TextAndModel *row_infos, int n, std::vector< RowInfo > *output)

void EvaluateParagraphDetection (const TextAndModel *correct, int n, const std::vector< PARA * > &detector_output)

void TestParagraphDetection (const TextAndModel *correct, int num_rows)

TEST (ParagraphsTest, ListItemsIdentified)

TEST (ParagraphsTest, TestSimpleParagraphDetection)

TEST (ParagraphsTest, TestFewCluesWithCrown)

TEST (ParagraphsTest, TestCrownParagraphDetection)

TEST (ParagraphsText, TestRealFlushLeftParagraphs)

TEST (ParagraphsTest, TestSingleFullPageContinuation)

TEST (ParagraphsTest, TestRightAlignedParagraph)

TEST (ParagraphsTest, TestTinyParagraphs)

TEST (ParagraphsTest, TestComplexPage1)

TEST (ParagraphsTest, TestComplexPage2)

TEST (ParagraphsTest, TestSubtleCrown)

TEST (ParagraphsTest, TestStrayLineInBlock)

TEST (ParagraphsTest, TestUnlvInsurance)

TEST (ParagraphsTest, TestSplitsOutLeaderLines)

TEST (ParagraphsTest, NotDistractedBySourceCode)

TEST (ParagraphsTest, NotOverlyAggressiveWithBlockQuotes)

TEST (ParagraphsTest, IndexPageTest)

TEST_F (ParamsModelTest, TestEngParamsModelIO)

void ClassicProgressTester (const char *imgname, const char *tessdatadir, const char *lang)

void NewProgressTester (const char *imgname, const char *tessdatadir, const char *lang)

TEST (QuickTest, ClassicProgressReporting)

TEST (QuickTest, NewProgressReporting)

TEST (QRSequenceGenerator, GetBinaryReversedInteger)

TEST_P (QRSequenceGeneratorTest, GeneratesValidSequence)

INSTANTIATE_TEST_SUITE_P (RangeTest, QRSequenceGeneratorTest, ::testing::Values(2, 7, 8, 9, 16, 1e2, 1e4, 1e6))

TEST_F (RecodeBeamTest, DoesChinese)

TEST_F (RecodeBeamTest, DoesJapanese)

TEST_F (RecodeBeamTest, DoesKorean)

TEST_F (RecodeBeamTest, DoesKannada)

TEST_F (RecodeBeamTest, DoesMarathi)

TEST_F (RecodeBeamTest, DoesEnglish)

TEST_F (RecodeBeamTest, DISABLED_EngDictionary)

TEST_F (RecodeBeamTest, DISABLED_ChiDictionary)

TEST_F (RecodeBeamTest, DISABLED_MultiCodeSequences)

TEST_F (TBOXTest, OverlapInside)

TEST_F (TBOXTest, OverlapBoolCorners)

TEST_F (TBOXTest, OverlapFractionCorners)

TEST_F (TBOXTest, OverlapBoolSides)

TEST_F (TBOXTest, OverlapFractionSides)

TEST_F (TBOXTest, OverlapBoolSpan)

TEST_F (TBOXTest, OverlapFractionSpan)

TEST_F (TBOXTest, OverlapOutsideTests)

TEST_F (TBOXTest, OverlapXFraction)

TEST_F (TBOXTest, OverlapYFraction)

TEST_F (TBOXTest, OverlapXFractionZeroSize)

TEST_F (TBOXTest, OverlapYFractionZeroSize)

TEST_F (ResultIteratorTest, EasyTest)

TEST_F (ResultIteratorTest, ComplexTest)

TEST_F (ResultIteratorTest, GreyTest)

TEST_F (ResultIteratorTest, SmallCapDropCapTest)

TEST_F (ResultIteratorTest, DualStartTextlineOrderTest)

TEST_F (ResultIteratorTest, LeftwardTextlineOrderTest)

TEST_F (ResultIteratorTest, RightwardTextlineOrderTest)

TEST_F (ResultIteratorTest, TextlineOrderSanityCheck)

TEST_F (ResultIteratorTest, DISABLED_NonNullChoicesTest)

TEST_F (ResultIteratorTest, NonNullConfidencesTest)

TEST_F (ScanutilsTest, DoesScanf)

TEST_F (ShapeTest, BasicTest)

TEST_F (ShapeTest, AddShapeTest)

TEST_F (ShapeTableTest, FullTest)

TEST_F (STATSTest, BasicStats)

TEST_F (STATSTest, InitStats)

TEST_F (STATSTest, TopNModes)

TEST_F (StridemapTest, Indexing)

TEST_F (StridemapTest, Scaling)

TEST_F (StringRendererTest, DoesRenderToImage)

TEST_F (StringRendererTest, DoesRenderToImageWithUnderline)

TEST_F (StringRendererTest, DoesHandleNewlineCharacters)

TEST_F (StringRendererTest, DoesRenderLigatures)

TEST_F (StringRendererTest, ArabicBoxcharsInLTROrder)

TEST_F (StringRendererTest, DoesOutputBoxcharsInReadingOrder)

TEST_F (StringRendererTest, DoesRenderVerticalText)

TEST_F (StringRendererTest, DoesKeepAllImageBoxes)

TEST_F (StringRendererTest, DoesClearBoxes)

TEST_F (StringRendererTest, DoesLigatureTextForRendering)

TEST_F (StringRendererTest, DoesRetainInputLigatureForRendering)

TEST_F (StringRendererTest, DoesStripUnrenderableWords)

TEST_F (StringRendererTest, DoesRenderWordBoxes)

TEST_F (StringRendererTest, DoesRenderWordBoxesFromMultiLineText)

TEST_F (StringRendererTest, DoesRenderAllFontsToImage)

TEST_F (StringRendererTest, DoesNotRenderWordJoiner)

TEST_F (StringRendererTest, DISABLED_DoesDropUncoveredChars)

TEST (ConvertBasicLatinToFullwidthLatinTest, DoesConvertBasicLatin)

TEST (ConvertBasicLatinToFullwidthLatinTest, DoesNotConvertFullwidthLatin)

TEST (ConvertBasicLatinToFullwidthLatinTest, DoesNotConvertNonLatin)

TEST (ConvertBasicLatinToFullwidthLatinTest, DoesNotConvertSpace)

TEST (ConvertFullwidthLatinToBasicLatinTest, DoesConvertFullwidthLatin)

TEST (ConvertFullwidthLatinToBasicLatinTest, DoesNotConvertBasicLatin)

TEST (ConvertFullwidthLatinToBasicLatinTest, DoesNotConvertNonLatin)

TEST (ConvertFullwidthLatinToBasicLatinTest, DoesNotConvertSpace)

TEST_F (TableFinderTest, GapInXProjectionNoGap)

TEST_F (TableFinderTest, GapInXProjectionEdgeGap)

TEST_F (TableFinderTest, GapInXProjectionExists)

TEST_F (TableFinderTest, HasLeaderAdjacentOverlapping)

TEST_F (TableFinderTest, HasLeaderAdjacentNoOverlap)

TEST_F (TableFinderTest, HasLeaderAdjacentPreservesColumns)

TEST_F (TableFinderTest, SplitAndInsertFragmentedPartitionsBasicPass)

TEST_F (TableFinderTest, SplitAndInsertFragmentedPartitionsBasicFail)

TEST_F (TableRecognizerTest, HasSignificantLinesBasicPass)

TEST_F (TableRecognizerTest, HasSignificantLinesBasicFail)

TEST_F (TableRecognizerTest, HasSignificantLinesHorizontalOnlyFails)

TEST_F (TableRecognizerTest, FindLinesBoundingBoxBasic)

TEST_F (TableRecognizerTest, RecognizeLinedTableBasic)

TEST_F (TableRecognizerTest, RecognizeWhitespacedTableBasic)

TEST_F (StructuredTableTest, CountVerticalIntersectionsAll)

TEST_F (StructuredTableTest, CountHorizontalIntersectionsAll)

TEST_F (StructuredTableTest, VerifyLinedTableBasicPass)

TEST_F (StructuredTableTest, VerifyLinedTableHorizontalFail)

TEST_F (StructuredTableTest, VerifyLinedTableVerticalFail)

TEST_F (StructuredTableTest, FindWhitespacedColumnsBasic)

TEST_F (StructuredTableTest, FindWhitespacedColumnsSorted)

TEST_F (TabVectorTest, SetStartEndPointsMatch)

TEST_F (TabVectorTest, XAtY45DegreeSlopeInRangeExact)

TEST_F (TabVectorTest, XAtYVerticalInRangeExact)

TEST_F (TabVectorTest, XAtYHorizontal)

TEST_F (TabVectorTest, XAtYRoundingSimple)

TEST_F (TabVectorTest, XAtYLargeNumbers)

TEST_F (TabVectorTest, XAtYHorizontalInRangeExact)

TEST_F (TabVectorTest, VOverlapInRangeSimple)

TEST_F (TabVectorTest, VOverlapOutOfRange)

TEST_F (TabVectorTest, XYFlip)

TEST_F (TatweelTest, UnicharsetIgnoresTatweel)

TEST_F (TatweelTest, DictIgnoresTatweel)

TEST_F (TatweelTest, UnicharsetLoadKeepsTatweel)

TEST_F (TextlineProjectionTest, Unrotated)

TEST_F (TextlineProjectionTest, Rotated)

TEST_F (TfileTest, Serialize)

TEST_F (TfileTest, FGets)

TEST_F (TfileTest, BigEndian)

TEST (UnicharTest, Conversion)

TEST (UnicharTest, InvalidText)

TEST_F (UnicharcompressTest, DoesChinese)

TEST_F (UnicharcompressTest, DoesJapanese)

TEST_F (UnicharcompressTest, DoesKorean)

TEST_F (UnicharcompressTest, DoesKannada)

TEST_F (UnicharcompressTest, DoesMarathi)

TEST_F (UnicharcompressTest, DoesEnglish)

TEST_F (UnicharcompressTest, DoesLigaturesWithDoubles)

TEST_F (UnicharcompressTest, GetEncodingAsString)

TEST (UnicharsetTest, Basics)

TEST (UnicharsetTest, Multibyte)

TEST (UnicharsetTest, MultibyteBigrams)

TEST (UnicharsetTest, OldStyle)

TEST (ValidateGraphemeTest, MultipleSyllablesAreNotASingleGrapheme)

TEST (ValidateGraphemeTest, SingleConsonantOK)

TEST (ValidateGraphemeTest, SimpleCV)

TEST (ValidateGraphemeTest, SubscriptConjunct)

TEST (ValidateGraphemeTest, HalfFormJoiner)

TEST (ValidateGraphemeTest, TraditionalConjunctJoiner)

TEST (ValidateGraphemeTest, OpenConjunctNonJoiner)

TEST (ValidateGraphemeTest, ExplicitViramaNonJoiner)

TEST (ValidateGraphemeTest, ThaiGraphemes)

TEST (ValidateGraphemeTest, NoLonelyJoinersQuote)

TEST (ValidateIndicTest, AddsJoinerToTerminalVirama)

TEST (ValidateIndicTest, OnlyOneDependentVowel)

TEST (ValidateIndicTest, OnlyOneVowelModifier)

TEST (ValidateIndicTest, VowelModifierMustBeLast)

TEST (ValidateIndicTest, MatrasFollowConsonantsNotVowels)

TEST (ValidateIndicTest, SubGraphemes)

TEST (ValidateIndicTest, Nukta)

TEST (ValidateIndicTest, SinhalaRakaransaya)

TEST (ValidateIndicTest, SinhalaYansaya)

TEST (ValidateIndicTest, SinhalaRepaya)

TEST (ValidateIndicTest, SinhalaSpecials)

TEST (ValidateKhmerTest, GoodKhmerWords)

TEST (ValidateKhmerTest, BadKhmerWords)

TEST (ValidateMyanmarTest, GoodMyanmarWords)

TEST (ValidateMyanmarTest, BadMyanmarWords)

TEST (ValidatorTest, MostFrequentViramaScript)

TEST (ValidatorTest, Idempotency)

fix_fuzzy_spaces()

Walk over the page finding sequences of words joined by fuzzy spaces. Extract them as a sublist, process the sublist to find the optimal arrangement of spaces then replace the sublist in the ROW_RES.

Parameters

	monitor	progress monitor
	word_count	count of words in doc
[out]	page_res

void initialise_search (WERD_RES_LIST &src_list, WERD_RES_LIST &new_list)

transform_to_next_perm()

Examines the current word list to find the smallest word gap size. Then walks the word list closing any gaps of this size by either inserted new combination words, or extending existing ones.

The routine COULD be limited to stop it building words longer than N blobs.

If there are no more gaps then it DELETES the entire list and returns the empty list to cause termination.

void transform_to_next_perm (WERD_RES_LIST &words)

fix_sp_fp_word()

Test the current word to see if it can be split by deleting noise blobs. If so, do the business. Return with the iterator pointing to the same place if the word is unchanged, or the last of the replacement words.

void fixspace_dbg (WERD_RES *word)

C_OUTLINE::move

Move C_OUTLINE by vector

Parameters

vec	vector to reposition OUTLINE by

POLY_BLOCK::reflect_in_y_axis

Reflect the coords of the polygon in the y-axis. (Flip the sign of x.)

int lessthan (const void *first, const void *second)

start_seam_list

Initialize a list of seams that match the original number of blobs present in the starting segmentation. Each of the seams created by this routine have location information only.

void start_seam_list (TWERD *word, std::vector< SEAM * > *seam_array)

AddConfigToClass

Add a new config to this class. Malloc new space and copy the old configs if necessary. Return the config id for the new config.

Parameters

Class The class to add to

int AddConfigToClass (CLASS_TYPE Class)

AddProtoToClass

Add a new proto to this class. Malloc new space and copy the old protos if necessary. Return the proto id for the new proto.

Parameters

Class The class to add to

int AddProtoToClass (CLASS_TYPE Class)

void FillABC (PROTO_STRUCT *Proto)

void FreeClass (CLASS_TYPE Class)

void FreeClassFields (CLASS_TYPE Class)

CLASS_TYPE NewClass (int NumProtos, int NumConfigs)

extract_edges

void extract_edges (Image pix, BLOCK *block)

fill_buckets

capture_children

Find all neighbouring outlines that are children of this outline and either move them to the output list or declare this outline illegal and return false.

empty_buckets

Run the edge detector over the block and return a list of blobs.

outlines_to_blobs

Gather together outlines into blobs using the usual bucket sort.

void outlines_to_blobs (BLOCK *block, ICOORD bleft, ICOORD tright, C_OUTLINE_LIST *outlines)

row_y_order

Sort function to sort rows in y from page top.

row_spacing_order

Qsort style function to compare 2 TO_ROWS based on their spacing value.

make_single_row

Arrange the blobs into a single row... well actually, if there is only a single blob, it makes 2 rows, in case the top-level blob is a container of the real blobs to recognize.

float make_single_row (ICOORD page_tr, bool allow_sub_blobs, TO_BLOCK *block, TO_BLOCK_LIST *blocks)

make_rows

Arrange the blobs into rows.

float make_rows (ICOORD page_tr, TO_BLOCK_LIST *port_blocks)

make_initial_textrows

Arrange the good blobs into rows of text.

void make_initial_textrows (ICOORD page_tr, TO_BLOCK *block, FCOORD rotation, bool testing_on)

fit_lms_line

Fit an LMS line to a row.

void fit_lms_line (TO_ROW *row)

find_best_dropout_row

Delete this row if it has a neighbour with better dropout characteristics. true is returned if the row should be deleted.

bool find_best_dropout_row (TO_ROW *row, int32_t distance, float dist_limit, int32_t line_index, TO_ROW_IT *row_it, bool testing_on)

deskew_block_coords

Compute the bounding box of all the blobs in the block if they were deskewed without actually doing it.

TBOX deskew_block_coords (TO_BLOCK *block, float gradient)

compute_line_occupation

Compute the pixel projection back on the y axis given the global skew. Also compute the 1st derivative.

void compute_line_occupation (TO_BLOCK *block, float gradient, int32_t min_y, int32_t max_y, int32_t *occupation, int32_t *deltas)

void compute_occupation_threshold (int32_t low_window, int32_t high_window, int32_t line_count, int32_t *occupation, int32_t *thresholds)

compute_dropout_distances

Compute the distance from each coordinate to the nearest dropout.

void compute_dropout_distances (int32_t *occupation, int32_t *thresholds, int32_t line_count)

expand_rows

Expand each row to the least of its allowed size and touching its neighbours. If the expansion would entirely swallow a neighbouring row then do so.

void expand_rows (ICOORD page_tr, TO_BLOCK *block, float gradient, FCOORD rotation, int32_t block_edge, bool testing_on)

void adjust_row_limits (TO_BLOCK *block)

compute_row_stats

Compute the linespacing and offset.

void compute_row_stats (TO_BLOCK *block, bool testing_on)

fill_heights

Fill the given heights with heights of the blobs that are legal candidates for estimating xheight.

void fill_heights (TO_ROW *row, float gradient, int min_height, int max_height, STATS *heights, STATS *floating_heights)

compute_xheight_from_modes

Given a STATS object heights, looks for two most frequently occurring heights that look like xheight and xheight + ascrise. If found, sets the values of *xheight and *ascrise accordingly, otherwise sets xheight to any most frequently occurring height and sets *ascrise to 0. Returns the number of times xheight occurred in heights. For each mode that is considered for being an xheight the count of floating blobs (stored in floating_heights) is subtracted from the total count of the blobs of this height. This is done because blobs that sit far above the baseline could represent valid ascenders, but it is highly unlikely that such a character's height will be an xheight (e.g. -, ', =, ^, ‘, ", ’, etc) If cap_only, then force finding of only the top mode.

int compute_xheight_from_modes (STATS *heights, STATS *floating_heights, bool cap_only, int min_height, int max_height, float *xheight, float *ascrise)

compute_row_descdrop

Estimates the descdrop of this row. This function looks for "significant" descenders of lowercase letters (those that could not just be the small descenders of upper case letters like Q,J). The function also takes into account how many potential ascenders this row might contain. If the number of potential ascenders along with descenders is close to the expected fraction of the total number of blobs in the row, the function returns the descender height, returns 0 otherwise.

int32_t compute_row_descdrop (TO_ROW *row, float gradient, int xheight_blob_count, STATS *asc_heights)

compute_height_modes

Find the top maxmodes values in the input array and put their indices in the output in the order in which they occurred.

int32_t compute_height_modes (STATS *heights, int32_t min_height, int32_t max_height, int32_t *modes, int32_t maxmodes)

correct_row_xheight

Adjust the xheight etc of this row if not within reasonable limits of the average for the block.

void correct_row_xheight (TO_ROW *row, float xheight, float ascrise, float descdrop)

separate_underlines

Test wide objects for being potential underlines. If they are then put them in a separate list in the block.

void separate_underlines (TO_BLOCK *block, float gradient, FCOORD rotation, bool testing_on)

pre_associate_blobs

Associate overlapping blobs and fake chop wide blobs.

void pre_associate_blobs (ICOORD page_tr, TO_BLOCK *block, FCOORD rotation, bool testing_on)

fit_parallel_rows

Re-fit the rows in the block to the given gradient.

void fit_parallel_rows (TO_BLOCK *block, float gradient, FCOORD rotation, int32_t block_edge, bool testing_on)

fit_parallel_lms

Fit an LMS line to a row. Make the fit parallel to the given gradient and set the row accordingly.

void fit_parallel_lms (float gradient, TO_ROW *row)

make_baseline_spline

Fit an LMS line to a row. Make the fit parallel to the given gradient and set the row accordingly.

void make_baseline_spline (TO_ROW *row, TO_BLOCK *block)

segment_baseline

Divide the baseline up into segments which require a different quadratic fitted to them. Return true if enough blobs were far enough away to need a quadratic.

bool segment_baseline (TO_ROW *row, TO_BLOCK *block, int32_t &segments, int32_t *xstarts)

linear_spline_baseline

Divide the baseline up into segments which require a different quadratic fitted to them.

Returns: true if enough blobs were far enough away to need a quadratic.

double * linear_spline_baseline (TO_ROW *row, TO_BLOCK *block, int32_t &segments, int32_t xstarts[])

assign_blobs_to_rows

Make enough rows to allocate all the given blobs to one. If a block skew is given, use that, else attempt to track it.

void assign_blobs_to_rows (TO_BLOCK *block, float *gradient, int pass, bool reject_misses, bool make_new_rows, bool drawing_skew)

most_overlapping_row

Return the row which most overlaps the blob.

OVERLAP_STATE most_overlapping_row (TO_ROW_IT *row_it, TO_ROW *&best_row, float top, float bottom, float rowsize, bool testing_blob)

blob_x_order

Sort function to sort blobs in x from page left.

int blob_x_order (const void *item1, const void *item2)

mark_repeated_chars

Mark blobs marked with BTFT_LEADER in repeated sets using the repeated_set member of BLOBNBOX.

void mark_repeated_chars (TO_ROW *row)

make_single_word

For each row, arrange the blobs into one word. There is no fixed pitch detection.

void make_single_word (bool one_blob, TO_ROW_LIST *rows, ROW_LIST *real_rows)

void make_words (tesseract::Textord *textord, ICOORD page_tr, float gradient, BLOCK_LIST *blocks, TO_BLOCK_LIST *port_blocks)

set_row_spaces

Set the min_space and max_nonspace members of the row so that the blobs can be arranged into words.

void set_row_spaces (TO_BLOCK *block, FCOORD rotation, bool testing_on)

row_words

Compute the max nonspace and min space for the row.

int32_t row_words (TO_BLOCK *block, TO_ROW *row, int32_t maxwidth, FCOORD rotation, bool testing_on)

row_words2

Compute the max nonspace and min space for the row.

int32_t row_words2 (TO_BLOCK *block, TO_ROW *row, int32_t maxwidth, FCOORD rotation, bool testing_on)

make_real_words

Convert a TO_BLOCK to a BLOCK.

void make_real_words (tesseract::Textord *textord, TO_BLOCK *block, FCOORD rotation)

make_rep_words

Fabricate a real row from only the repeated blob words. Get the xheight from the block as it may be more meaningful.

ROW * make_rep_words (TO_ROW *row, TO_BLOCK *block)

make_real_word

Construct a WERD from a given number of adjacent entries in a list of BLOBNBOXs.

WERD * make_real_word (BLOBNBOX_IT *box_it, int32_t blobcount, bool bol, uint8_t blanks)

check_blob

Returns: true if blob has a non whole outline.

any_shared_split_points

Return true if any of the splits share a point with this one.

preserve_outline_tree

Copy the list of outlines.

restore_outline_tree

Copy the list of outlines.

Variables
const int	kMaxNumberOfScripts = 116 + 1 + 2 + 1

constexpr int	kPointsPerInch = 72

constexpr int	kMinCredibleResolution = 70

constexpr int	kMaxCredibleResolution = 2400

constexpr int	kResolutionEstimationFactor = 10

const int	kMinRectSize = 10

const char	kTesseractReject = '~'

const char	kUNLVReject = '~'

const char	kUNLVSuspect = '^'

const int	kNumbersPerBlob = 5

const int	kBytesPerNumber = 5

const int	kBytesPerBoxFileLine = (kBytesPerNumber + 1) * kNumbersPerBlob + 1

const int	kBytesPer64BitNumber = 20

const int	kMaxBytesPerLine = kNumbersPerBlob * (kBytesPer64BitNumber + 1) + 1 + UNICHAR_LEN

const int	kUniChs [] = {0x20ac, 0x201c, 0x201d, 0x2018, 0x2019, 0x2022, 0x2014, 0}

const int	kLatinChs [] = {0x00a2, 0x0022, 0x0022, 0x0027, 0x0027, 0x00b7, 0x002d, 0}

DotProductFunction	DotProduct

const float	kMathDigitDensityTh1 = 0.25

const float	kMathDigitDensityTh2 = 0.1

const float	kMathItalicDensityTh = 0.5

const float	kUnclearDensityTh = 0.25

const int	kSeedBlobsCountTh = 10

const int	kLeftIndentAlignmentCountTh = 1

const int	kMaxCharTopRange = 48

const float	kCertaintyScale = 7.0f

const float	kWorstDictCertainty = -25.0f

const float	kSizeRatioToReject = 2.0

const int	kMinAcceptableBlobHeight = 10

const float	kScriptAcceptRatio = 1.3

const float	kHanRatioInKorean = 0.7

const float	kHanRatioInJapanese = 0.3

const float	kNonAmbiguousMargin = 1.0

const int	kMaxCircleErosions = 8

const ParagraphModel *	kCrownLeft

const ParagraphModel *	kCrownRight

char *	editor_image_win_name = "EditorImage"

int	editor_image_xpos = 590

int	editor_image_ypos = 10

int	editor_image_word_bb_color = ScrollView::BLUE

int	editor_image_blob_bb_color = ScrollView::YELLOW

char *	editor_word_name = "BlnWords"

int	editor_word_xpos = 60

int	editor_word_ypos = 510

int	editor_word_height = 240

int	editor_word_width = 655

BLOCK_LIST *	current_block_list

const int16_t	kMaxBoxEdgeDiff = 2

const char	kBlameCorrect [] = "corr"

const char	kBlameClassifier [] = "cl"

const char	kBlameChopper [] = "chop"

const char	kBlameClassLMTradeoff [] = "cl/LM"

const char	kBlamePageLayout [] = "pglt"

const char	kBlameSegsearchHeur [] = "ss_heur"

const char	kBlameSegsearchPP [] = "ss_pp"

const char	kBlameClassOldLMTradeoff [] = "cl/old_LM"

const char	kBlameAdaption [] = "adapt"

const char	kBlameNoTruthSplit [] = "no_tr_spl"

const char	kBlameNoTruth [] = "no_tr"

const char	kBlameUnknown [] = "unkn"

const char *const	kIncorrectResultReasonNames []

const double	kCosSmallAngle = 0.866

const double	kDefiniteAspectRatio = 2.0

const double	kComplexShapePerimeterRatio = 1.5

const double	kMinMediumSizeRatio = 0.25

const double	kMaxMediumSizeRatio = 4.0

const TPOINT	kDivisibleVerticalUpright (0, 1)

const TPOINT	kDivisibleVerticalItalic (1, 5)

const int	kBoxReadBufSize = 1024

const int	kBoxClipTolerance = 2

const int	kNumEndPoints = 3

const int	kMinPointsForErrorCount = 16

const int	kMaxRealDistance = 2.0

const int	kMaxReadAhead = 8

const int	kFeaturePadding = 2

const int	kImagePadding = 4

const int	kSloppyTolerance = 4

const float	kFinalPixelTolerance = 0.125f

const int	kBlnCellHeight = 256

const int	kBlnXHeight = 128

const int	kBlnBaselineOffset = 64

const int	kHistogramSize = 256

const int	kWordrecMaxNumJoinChunks = 4

const double	kMaxWordSizeRatio = 1.25

const double	kMaxLineSizeRatio = 1.25

const double	kMaxWordGapRatio = 2.0

const int	par1 = 4500 / (approx_dist * approx_dist)

const int	par2 = 6750 / (approx_dist * approx_dist)

const long double	kMinVariance = 1.0L / 1024

const int	kMinSubscriptOffset = 20

const int	kMinSuperscriptOffset = 20

const int	kMaxDropCapBottom = -128

const double	kMaxOverlapDenominator = 0.125

const double	kMinXHeightMatch = 0.5

const double	kMaxBaselineDrift = 0.0625

const int	kCenterGradeCap = 25

const double	kBadPriority = 999.0

bool	wordrec_display_splits = 0

const double	kMaxPerimeterWidthRatio = 8.0

const int	kMaxAmbigStringSize = UNICHAR_LEN * (MAX_AMBIG_SIZE + 1)

int	log_level = INT_MAX

const int	kRadicalRadix = 29

const double	kMinXHeightFraction = 0.25

const double	kMinCapHeightFraction = 0.05

const char	kUniversalAmbigsFile []

const int	ksizeofUniversalAmbigsFile = sizeof(kUniversalAmbigsFile)

const double	FTable [FTABLE_Y][FTABLE_X]

const char *const	kMicroFeatureType = "mf"

const char *const	kCNFeatureType = "cn"

const char *const	kIntFeatureType = "if"

const char *const	kGeoFeatureType = "tb"

EndParamDesc	of

const FEATURE_DESC_STRUCT	MicroFeatureDesc

TESS_API const FEATURE_DESC_STRUCT	PicoFeatDesc

const FEATURE_DESC_STRUCT	CharNormDesc

const FEATURE_DESC_STRUCT	OutlineFeatDesc

const FEATURE_DESC_STRUCT	IntFeatDesc

const FEATURE_DESC_STRUCT	GeoFeatDesc

const double	kStandardFeatureLength = 64.0 / 5

const float	MF_SCALE_FACTOR = 0.5f / kBlnXHeight

double	classify_min_slope = 0.414213562

double	classify_max_slope = 2.414213562

double	classify_pico_feature_length = 0.05

TESS_API float	PicoFeatureLength

const int	kRandomizingCenter = 128

const int	case_state_table [6][4]

const char	kDoNotReverse [] = "RRP_DO_NO_REVERSE"

const char	kReverseIfHasRTL [] = "RRP_REVERSE_IF_HAS_RTL"

const char	kForceReverse [] = "RRP_FORCE_REVERSE"

const char *const	RTLReversePolicyNames [] = {kDoNotReverse, kReverseIfHasRTL, kForceReverse}

const TFloat	TanhTable []

const TFloat	LogisticTable []

constexpr int	kTableSize = 4096

constexpr TFloat	kScaleFactor = 256.0

const int	kMaxInputHeight = 48

const TFloat	kStateClip = 100.0

const TFloat	kErrClip = 1.0f

const double	kDictRatio = 2.25

const double	kCertOffset = -0.085

const int	kMinWinSize = 500

const int	kMaxWinSize = 2000

const int	kXWinFrameSize = 30

const int	kYWinFrameSize = 80

const float	kMinCertainty = -20.0f

const float	kMinProb = std::exp(kMinCertainty)

class tesseract::TFNetworkModelDefaultTypeInternal	_TFNetworkModel_default_instance_

const int	kAdamCorrectionIterations = 200000

const TFloat	kAdamEpsilon = 1e-8

const int	kInt8Flag = 1

const int	kAdamFlag = 4

const int	kDoubleFlag = 128

const int	kHistogramBuckets = 16

int	textord_debug_tabfind = 0

int	textord_debug_bugs = 0

bool	textord_debug_printable = false

const double	kAlignedFraction = 0.03125

const double	kRaggedFraction = 2.5

const double	kAlignedGapFraction = 0.75

const double	kRaggedGapFraction = 1.0

const int	kVLineAlignment = 3

const int	kVLineGutter = 1

const int	kVLineSearchSize = 150

const int	kMinRaggedTabs = 5

const int	kMinAlignedTabs = 4

const int	kVLineMinLength = 300

const double	kMinTabGradient = 4.0

const int	kMaxSkewFactor = 15

double	textord_underline_threshold = 0.5

const double	kMaxSmallNeighboursPerPix = 1.0 / 32

const int	kMaxLargeOverlapsWithSmall = 3

const int	kMaxMediumOverlapsWithSmall = 12

const int	kMaxLargeOverlapsWithMedium = 12

const int	kOriginalNoiseMultiple = 8

const int	kNoisePadding = 4

const double	kPhotoOffsetFraction = 0.375

const double	kMinGoodTextPARatio = 1.5

const int	kMaxIncompatibleColumnCount = 2

const double	kHorizontalGapMergeFraction = 0.5

const double	kMinGutterWidthGrid = 0.5

const double	kMaxDistToPartSizeRatio = 1.5

const double	kMaxSpacingDrift = 1.0 / 72

const double	kMaxTopSpacingFraction = 0.25

const double	kMaxSameBlockLineSpacing = 3

const double	kMaxSizeRatio = 1.5

const double	kMaxLeaderGapFractionOfMax = 0.25

const double	kMaxLeaderGapFractionOfMin = 0.5

const int	kMinLeaderCount = 5

const int	kMinStrongTextValue = 6

const int	kMinChainTextValue = 3

const int	kHorzStrongTextlineCount = 8

const int	kHorzStrongTextlineHeight = 10

const int	kHorzStrongTextlineAspect = 5

const double	kMaxBaselineError = 0.4375

const double	kMinBaselineCoverage = 0.5

const int	kMaxRMSColorNoise = 128

const int	kMaxColorDistance = 900

const int	kRGBRMSColors = 4

const int	kMaxPadFactor = 6

const int	kMaxNeighbourDistFactor = 4

const int	kMaxCaptionLines = 7

const double	kMinCaptionGapRatio = 2.0

const double	kMinCaptionGapHeightRatio = 0.5

const double	kMarginOverlapFraction = 0.25

const double	kBigPartSizeRatio = 1.75

const double	kTinyEnoughTextlineOverlapFraction = 0.25

const double	kMaxPartitionSpacing = 1.75

const int	kSmoothDecisionMargin = 4

const double	kMinColumnWidth = 2.0 / 3

int	devanagari_split_debuglevel = 0

bool	devanagari_split_debugimage = 0

bool	textord_show_fixed_cuts = false

ScrollView *	to_win = nullptr

FILE *	to_debug

int	textord_fp_chop_error = 2

bool	gapmap_debug = false

bool	gapmap_use_ends = false

bool	gapmap_no_isolated_quanta = false

double	gapmap_big_gaps = 1.75

const double	kMinRectangularFraction = 0.125

const double	kMaxRectangularFraction = 0.75

const double	kMaxRectangularGradient = 0.1

const int	kMinImageFindSize = 100

const int	kThinLineFraction = 20
	Denominator of resolution makes max pixel width to allow thin lines. More...

const int	kMinLineLengthFraction = 4
	Denominator of resolution makes min pixels to demand line lengths to be. More...

const int	kCrackSpacing = 100
	Spacing of cracks across the page to break up tall vertical lines. More...

const int	kLineFindGridSize = 50
	Grid size used by line finder. Not very critical. More...

const int	kMinThickLineWidth = 12

const int	kMaxLineResidue = 6

const double	kThickLengthMultiple = 0.75

const double	kMaxNonLineDensity = 0.25

const double	kMaxStaveHeight = 1.0

const double	kMinMusicPixelFraction = 0.75

bool	textord_heavy_nr = false

bool	textord_show_initial_rows = false

bool	textord_show_parallel_rows = false

bool	textord_show_expanded_rows = false

bool	textord_show_final_rows = false

bool	textord_show_final_blobs = false

bool	textord_test_landscape = false

bool	textord_parallel_baselines = true

bool	textord_straight_baselines = false

bool	textord_old_baselines = true

bool	textord_old_xheight = false

bool	textord_fix_xheight_bug = true

bool	textord_fix_makerow_bug = true

bool	textord_debug_xheights = false

int	textord_test_x = -INT32_MAX

int	textord_test_y = -INT32_MAX

int	textord_min_blobs_in_row = 4

int	textord_spline_minblobs = 8

int	textord_spline_medianwin = 6

int	textord_min_xheight = 10

double	textord_spline_shift_fraction = 0.02

double	textord_skew_ile = 0.5

double	textord_skew_lag = 0.02

double	textord_linespace_iqrlimit = 0.2

double	textord_width_limit = 8

double	textord_chop_width = 1.5

double	textord_minxh = 0.25

double	textord_min_linesize = 1.25

double	textord_excess_blobsize = 1.3

double	textord_occupancy_threshold = 0.4

double	textord_underline_width = 2.0

double	textord_min_blob_height_fraction = 0.75

double	textord_xheight_mode_fraction = 0.4

double	textord_ascheight_mode_fraction = 0.08

double	textord_ascx_ratio_min = 1.25

double	textord_ascx_ratio_max = 1.8

double	textord_descx_ratio_min = 0.25

double	textord_descx_ratio_max = 0.6

double	textord_xheight_error_margin = 0.1

int	textord_lms_line_trials = 12

bool	textord_new_initial_xheight = true

bool	textord_debug_blob = false

bool	textord_oldbl_debug = false

const int	kMinModeFactorOcropus = 32

const int	kMinModeFactor = 12

int	pitsync_linear_version = 6

double	pitsync_joined_edge = 0.75

double	pitsync_offset_freecut_fraction = 0.25

const double	kStrokeWidthFractionTolerance = 0.125

const double	kStrokeWidthTolerance = 1.5

const double	kStrokeWidthFractionCJK = 0.25

const double	kStrokeWidthCJK = 2.0

const int	kCJKRadius = 2

const double	kCJKBrokenDistanceFraction = 0.25

const int	kCJKMaxComponents = 8

const double	kCJKAspectRatio = 1.25

const double	kCJKAspectRatioIncrease = 1.0625

const int	kMaxCJKSizeRatio = 5

const double	kBrokenCJKIterationFraction = 0.125

const double	kDiacriticXPadRatio = 7.0

const double	kDiacriticYPadRatio = 1.75

const double	kMinDiacriticSizeRatio = 1.0625

const double	kMaxDiacriticDistanceRatio = 1.25

const double	kMaxDiacriticGapToBaseCharHeight = 1.0

const int	kLineTrapLongest = 4

const int	kLineTrapShortest = 2

const int	kMostlyOneDirRatio = 3

const double	kLineResidueAspectRatio = 8.0

const int	kLineResiduePadRatio = 3

const double	kLineResidueSizeRatio = 1.75

const double	kNeighbourSearchFactor = 2.5

const double	kNoiseOverlapGrowthFactor = 4.0

const double	kNoiseOverlapAreaFactor = 1.0 / 512

const int	kTabRadiusFactor = 5

const int	kMinVerticalSearch = 3

const int	kMaxVerticalSearch = 12

const int	kMaxRaggedSearch = 25

const int	kMinLinesInColumn = 10

const double	kMinFractionalLinesInColumn = 0.125

const double	kMaxGutterWidthAbsolute = 2.00

const int	kRaggedGutterMultiple = 5

const double	kLineFragmentAspectRatio = 10.0

const int	kMinEvaluatedTabs = 3

const double	kCosMaxSkewAngle = 0.866025

const int	kColumnWidthFactor = 20

const int	kMaxVerticalSpacing = 500

const int	kMaxBlobWidth = 500

const double	kSplitPartitionSize = 2.0

const double	kAllowTextHeight = 0.5

const double	kAllowTextWidth = 0.6

const double	kAllowTextArea = 0.8

const double	kAllowBlobHeight = 0.3

const double	kAllowBlobWidth = 0.4

const double	kAllowBlobArea = 0.05

const int	kMinBoxesInTextPartition = 10

const int	kMaxBoxesInDataPartition = 20

const double	kMaxGapInTextPartition = 4.0

const double	kMinMaxGapInTextPartition = 0.5

const double	kMaxBlobOverlapFactor = 4.0

const double	kMaxTableCellXheight = 2.0

const int	kMaxColumnHeaderDistance = 4

const double	kTableColumnThreshold = 3.0

const double	kMinOverlapWithTable = 0.6

const int	kSideSpaceMargin = 10

const double	kSmallTableProjectionThreshold = 0.35

const double	kLargeTableProjectionThreshold = 0.45

const int	kLargeTableRowCount = 6

const int	kMinRowsInTable = 3

const int	kAdjacentLeaderSearchPadding = 2

const double	kParagraphEndingPreviousLineRatio = 1.3

const double	kMaxParagraphEndingLeftSpaceMultiple = 3.0

const double	kMinParagraphEndingTextToWhitespaceRatio = 3.0

const double	kMaxXProjectionGapFactor = 2.0

const double	kStrokeWidthFractionalTolerance = 0.25

const double	kStrokeWidthConstantTolerance = 2.0

const double	kHorizontalSpacing = 0.30

const double	kVerticalSpacing = -0.2

const int	kCellSplitRowThreshold = 0

const int	kCellSplitColumnThreshold = 0

const int	kLinedTableMinVerticalLines = 3

const int	kLinedTableMinHorizontalLines = 3

const double	kRequiredColumns = 0.7

const double	kMarginFactor = 1.1

const double	kMaxRowSize = 2.5

const double	kGoodRowNumberOfColumnsSmall [] = {2, 2, 2, 2, 2, 3, 3}

const double	kGoodRowNumberOfColumnsLarge = 0.7

const double	kMinFilledArea = 0.35

const int	kGutterMultiple = 4

const int	kGutterToNeighbourRatio = 3

const int	kSimilarVectorDist = 10

const int	kSimilarRaggedDist = 50

const int	kMaxFillinMultiple = 11

const double	kMinGutterFraction = 0.5

const double	kLineCountReciprocal = 4.0

const double	kMinAlignedGutter = 0.25

const double	kMinRaggedGutter = 1.5

double	textord_tabvector_vertical_gap_fraction = 0.5

double	textord_tabvector_vertical_box_ratio = 0.5

bool	textord_debug_pitch_test = false

bool	textord_fast_pitch_test = false

bool	textord_debug_pitch_metric = false

bool	textord_show_row_cuts = false

bool	textord_show_page_cuts = false

bool	textord_blockndoc_fixed = false

double	textord_projection_scale = 0.200

double	textord_balance_factor = 1.0

bool	textord_show_initial_words = false

bool	textord_blocksall_fixed = false

bool	textord_blocksall_prop = false

int	textord_dotmatrix_gap = 3

int	textord_debug_block = 0

int	textord_pitch_range = 2

double	textord_wordstats_smooth_factor = 0.05

double	textord_words_maxspace = 4.0

double	textord_words_default_maxspace = 3.5

double	textord_words_default_minspace = 0.6

double	textord_words_min_minspace = 0.3

double	textord_words_default_nonspace = 0.2

double	textord_words_initial_lower = 0.25

double	textord_words_initial_upper = 0.15

double	textord_words_minlarge = 0.75

double	textord_words_pitchsd_threshold = 0.040

double	textord_words_def_fixed = 0.016

double	textord_words_def_prop = 0.090

int	textord_words_veto_power = 5

double	textord_pitch_rowsimilarity = 0.08

bool	textord_pitch_scalebigwords = false

double	words_initial_lower = 0.5

double	words_initial_upper = 0.15

double	words_default_prop_nonspace = 0.25

double	words_default_fixed_space = 0.75

double	words_default_fixed_limit = 0.6

double	textord_words_definite_spread = 0.30

double	textord_spacesize_ratioprop = 2.0

double	textord_fpiqr_ratio = 1.5

double	textord_max_pitch_iqr = 0.20

double	textord_underline_offset = 0.1

bool	textord_restore_underlines = true

bool	textord_force_make_prop_words = false

bool	textord_chopper_test = false

CLUSTERCONFIG	Config = {elliptical, 0.625, 0.05, 1.0, 1e-6, 0}

FEATURE_DEFS_STRUCT	feature_defs

const double	kRatingEpsilon = 1.0 / 32

const int	kMaxOffsetDist = 32

const int	kMinClusteredShapes = 1

const int	kMaxUnicharsPerCluster = 2000

const float	kFontMergeDistance = 0.025

const float	kInfiniteDist = 999.0f

const int	kTestChar = -1

const int	kSquareLimit = 25

const int	kPrime1 = 17

const int	kPrime2 = 13

const float	kRotationRange = 0.02f

const int	kExposureFactor = 16

const int	kSaltnPepper = 5

const int	kMinRampSize = 1000

const int	kMaxLineLength = 1024

const int	kMinLigature = 0xfb00

const int	kMaxLigature = 0xfb17

const int	kDefaultResolution = 300

const double	kMinDivergenceRate = 50.0

const int	kMinStallIterations = 10000

const double	kSubTrainerMarginFraction = 3.0 / 128

const double	kLearningRateDecay = M_SQRT1_2

const int	kNumAdjustmentIterations = 100

const int	kErrorGraphInterval = 1000

const int	kNumPagesPerBatch = 100

const int	kMinStartedErrorRate = 75

const double	kStageTransitionThreshold = 10.0

const double	kHighConfidence = 0.9375

const double	kImprovementFraction = 15.0 / 16.0

const double	kBestCheckpointFraction = 31.0 / 32.0

const int	kTargetXScale = 5

const int	kTargetYScale = 100

const int	kSvPort = 8461

const int	kMaxMsgSize = 4096

const int	kMaxIntPairSize = 45

ScrollView *	fx_win = nullptr

ScrollView *	edge_window = nullptr

ScrollView *	blob_window = nullptr

ScrollView::Color	color_list []

bool	wordrec_display_all_blobs = 0

bool	wordrec_blob_pause = 0

const char *	kTruthTextWords = "To simple burn running of goods lately.\n"

const char *	kTruthTextLine = "Tosimpleburnrunningofgoodslately.\n"

int	test_data [] = {8, 1, 2, -4, 7, 9, 65536, 4, 9, 0}

const char *	kStrings8087_054 [] = {"dat", "Dalmatian", "", "DAMAGED DURING", "margarine,", nullptr}

const PolyBlockType	kBlocks8087_054 []

const int	kTrainerIterations = 600

const int	kBatchIterations = 100

const char	kEngText [] = "the quick brown fox jumps over the lazy dog"

const char	kHinText [] = "पिताने विवाह की \| हो गई उद्विग्न वह सोचा"

const char	kKorText [] = "이는 것으로"

const char *	kBadlyFormedHinWords [] = {"उपयोक्ताो", "नहीें", "प्रंात", "कहीअे", "पत्रिाका", "छह्णाीस"}

const char *	kBadlyFormedThaiWords [] = {"ฤิ", "กา้ํ", "กิำ", "นำ้", "เเก"}

const char *	kExpectedFontNames []

const char	kArabicText [] = "والفكر والصراع 1234,\nوالفكر والصراع"

const ParagraphJustification	kLeft = JUSTIFICATION_LEFT

const ParagraphJustification	kCenter = JUSTIFICATION_CENTER

const ParagraphJustification	kRight = JUSTIFICATION_RIGHT

const ParagraphJustification	kUnknown = JUSTIFICATION_UNKNOWN

const TextAndModel	kTwoSimpleParagraphs []

const TextAndModel	kFewCluesWithCrown []

const TextAndModel	kCrownedParagraph []

const TextAndModel	kFlushLeftParagraphs []

const TextAndModel	kSingleFullPageContinuation []

const TextAndModel	kRightAligned []

const TextAndModel	kTinyParagraphs []

const TextAndModel	kComplexPage1 []

const TextAndModel	kComplexPage2 []

const TextAndModel	kSubtleCrown []

const TextAndModel	kUnlvRep3AO []

const TextAndModel	kTableOfContents []

const TextAndModel	kTextWithSourceCode []

const TextAndModel	kOldManAndSea []

const TextAndModel	kNewZealandIndex []

const int	kNumChars = 100

const int	kPadding = 64

const char *	kGWRTops []

const float	kGWRTopScores []

const char *	kGWR2nds []

const float	kGWR2ndScores []

const char *	kZHTops [] = {"实", "学", "储", "啬", "投", "学", "生", nullptr}

const float	kZHTopScores [] = {0.98, 0.98, 0.98, 0.98, 0.98, 0.98, 0.98}

const char *	kZH2nds [] = {"学", "储", "投", "生", "学", "生", "实", nullptr}

const float	kZH2ndScores [] = {0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01}

const char *	kViTops [] = {"v", "ậ", "y", " ", "t", "ộ", "i", nullptr}

const float	kViTopScores [] = {0.98, 0.98, 0.98, 0.98, 0.98, 0.98, 0.97}

const char *	kVi2nds [] = {"V", "a", "v", "", "l", "o", "", nullptr}

const float	kVi2ndScores [] = {0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01}

const int	kTestData [] = {2, 0, 12, 1, 1, 2, 10, 1, 0, 0, 0, 2, 0, 4, 1, 1}

const char	kMixedText [] = "والفكر 123 والصراع abc"

const char	kEngNonLigatureText [] = "fidelity"

const char	kEngLigatureText [] = "ﬁdelity"

NormEvidenceOf
Return the new type of evidence number corresponding to this normalization adjustment. The equation that represents the transform is: 1 / (1 + (NormAdj / midpoint) ^ curl)
double	classify_norm_adj_midpoint = 32.0

double	classify_norm_adj_curl = 2.0

const double	kWidthErrorWeighting = 0.125

compute_page_skew
Compute the skew over a full page by averaging the gradients over all the lines. Get the error of the same row.
const double	kNoiseSize = 0.5

const int	kMinSize = 8

void	compute_page_skew (TO_BLOCK_LIST *blocks, float &page_m, float &page_err)

void	vigorous_noise_removal (TO_BLOCK *block)

void	cleanup_rows_making (ICOORD page_tr, TO_BLOCK *block, float gradient, FCOORD rotation, int32_t block_edge, bool testing_on)

void	delete_non_dropout_rows (TO_BLOCK *block, float gradient, FCOORD rotation, int32_t block_edge, bool testing_on)

Detailed Description

The box file is assumed to contain box definitions, one per line, of the following format for blob-level boxes:

*   <UTF8 str> <left> <bottom> <right> <top> <page id>
*

and for word/line-level boxes:

*   WordStr <left> <bottom> <right> <top> <page id> #<space-delimited word str>
*

NOTES: The boxes use tesseract coordinates, i.e. 0,0 is at BOTTOM-LEFT.

<page id> is 0-based, and the page number is used for multipage input (tiff).

In the blob-level form, each line represents a recognizable unit, which may be several UTF-8 bytes, but there is a bounding box around each recognizable unit, and no classifier is needed to train in this mode (bootstrapping.)

In the word/line-level form, the line begins with the literal "WordStr", and the bounding box bounds either a whole line or a whole word. The recognizable units in the word/line are listed after the # at the end of the line and are space delimited, ignoring any original spaces on the line. Eg.

* word -> #w o r d
* multi word line -> #m u l t i w o r d l i n e
*

The recognizable units must be space-delimited in order to allow multiple unicodes to be used for a single recognizable unit, eg Hindi.

In this mode, the classifier must have been pre-trained with the desired character set, or it will not be able to find the character segmentations.

Make a word from the selected blobs and run Tess on them.

Parameters

page_res	recognise blobs
selection_box	within this box

Include Files and Type Defines

Typedef Documentation

◆ BLOB_CHOICE_LIST_VECTOR

using tesseract::BLOB_CHOICE_LIST_VECTOR = typedef std::vector<BLOB_CHOICE_LIST *>

Definition at line 627 of file ratngs.h.

◆ BLOB_WIDTH

using tesseract::BLOB_WIDTH = typedef uint8_t

Definition at line 29 of file stopper.h.

◆ BlobGridSearch

using tesseract::BlobGridSearch = typedef GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT>

Definition at line 30 of file blobgrid.h.

◆ CANCEL_FUNC

using tesseract::CANCEL_FUNC = typedef bool (*)(void *, int)

Definition at line 98 of file ocrclass.h.

◆ char32

using tesseract::char32 = typedef signed int

Definition at line 49 of file unichar.h.

◆ CHAR_FEATURES

using tesseract::CHAR_FEATURES = typedef char *

Definition at line 93 of file ocrfeatures.h.

◆ CLASS_ID

using tesseract::CLASS_ID = typedef UNICHAR_ID

a CLASS_ID is the ascii character to be associated with a class

Definition at line 34 of file matchdefs.h.

◆ CLASS_TYPE

using tesseract::CLASS_TYPE = typedef CLASS_STRUCT *

Definition at line 49 of file protos.h.

◆ CLASSES

using tesseract::CLASSES = typedef CLASS_STRUCT *

Definition at line 50 of file protos.h.

◆ ClusterHeap

using tesseract::ClusterHeap = typedef tesseract::GenericHeap<ClusterPair>

Definition at line 1263 of file cluster.cpp.

◆ ClusterPair

using tesseract::ClusterPair = typedef tesseract::KDPairInc<float, TEMPCLUSTER *>

Definition at line 1262 of file cluster.cpp.

◆ ColPartitionGridSearch

using tesseract::ColPartitionGridSearch = typedef GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>

Definition at line 918 of file colpartition.h.

◆ ColSegmentGrid

using tesseract::ColSegmentGrid = typedef BBGrid<ColSegment, ColSegment_CLIST, ColSegment_C_IT>

Definition at line 109 of file tablefind.h.

◆ ColSegmentGridSearch

using tesseract::ColSegmentGridSearch = typedef GridSearch<ColSegment, ColSegment_CLIST, ColSegment_C_IT>

Definition at line 110 of file tablefind.h.

◆ CONFIG_PRUNER

typedef uint32_t tesseract::CONFIG_PRUNER[NUM_PP_PARAMS][NUM_PP_BUCKETS][4]

Definition at line 91 of file intproto.h.

◆ DANGERR

using tesseract::DANGERR = typedef std::vector<DANGERR_INFO>

Definition at line 47 of file stopper.h.

◆ DawgVector

using tesseract::DawgVector = typedef std::vector<Dawg *>

Definition at line 57 of file dict.h.

◆ DENSITYFUNC

using tesseract::DENSITYFUNC = typedef double (*)(int32_t)

Definition at line 1311 of file cluster.cpp.

◆ DictFunc

using tesseract::DictFunc = typedef int (Dict::*)(void *, const UNICHARSET &, UNICHAR_ID, bool) const

Definition at line 63 of file baseapi.h.

◆ DotProductFunction

using tesseract::DotProductFunction = typedef TFloat (*)(const TFloat *, const TFloat *, int)

Definition at line 26 of file simddetect.h.

◆ EDGE_ARRAY

using tesseract::EDGE_ARRAY = typedef EDGE_RECORD *

Definition at line 48 of file dawg.h.

◆ EDGE_INDEX

using tesseract::EDGE_INDEX = typedef int64_t

Definition at line 38 of file trie.h.

◆ EDGE_RECORD

using tesseract::EDGE_RECORD = typedef uint64_t

Definition at line 47 of file dawg.h.

◆ EDGE_REF

using tesseract::EDGE_REF = typedef int64_t

Definition at line 49 of file dawg.h.

◆ EDGE_VECTOR

using tesseract::EDGE_VECTOR = typedef std::vector<EDGE_RECORD>

Definition at line 39 of file trie.h.

◆ FEATURE

using tesseract::FEATURE = typedef FEATURE_STRUCT *

Definition at line 68 of file ocrfeatures.h.

◆ FEATURE_DEFS

using tesseract::FEATURE_DEFS = typedef FEATURE_DEFS_STRUCT *

Definition at line 45 of file featdefs.h.

◆ FEATURE_DESC

using tesseract::FEATURE_DESC = typedef FEATURE_DESC_STRUCT *

Definition at line 56 of file ocrfeatures.h.

◆ FEATURE_ID

using tesseract::FEATURE_ID = typedef uint8_t

FEATURE_ID is the index of a feature within a character description The feature id ranges from 0 to N-1 where N is the number of features in a character description.

Definition at line 46 of file matchdefs.h.

◆ FEATURE_SET

using tesseract::FEATURE_SET = typedef FEATURE_SET_STRUCT *

Definition at line 87 of file ocrfeatures.h.

◆ FileReader

using tesseract::FileReader = typedef bool (*)(const char *filename, std::vector<char> *data)

Definition at line 61 of file baseapi.h.

◆ FileWriter

using tesseract::FileWriter = typedef bool (*)(const std::vector<char> &data, const char *filename)

Definition at line 40 of file serialis.h.

◆ FontSet

using tesseract::FontSet = typedef std::vector<int>

Definition at line 154 of file fontinfo.h.

◆ int_compare

using tesseract::int_compare = typedef int (*)(void *, void *)

Definition at line 77 of file oldlist.h.

◆ INT_FEATURE_ARRAY

typedef INT_FEATURE_STRUCT tesseract::INT_FEATURE_ARRAY[MAX_NUM_INT_FEATURES]

Definition at line 137 of file intproto.h.

◆ IntKDPair

using tesseract::IntKDPair = typedef KDPairInc<int, int>

Definition at line 191 of file kdpair.h.

◆ kdwalk_proc

using tesseract::kdwalk_proc = typedef void (*)(ClusteringContext *context, CLUSTER *Cluster, int32_t Level)

Definition at line 39 of file kdtree.h.

◆ LABELEDLIST

using tesseract::LABELEDLIST = typedef LABELEDLISTNODE *

Definition at line 89 of file commontraining.h.

◆ LanguageModelFlagsType

using tesseract::LanguageModelFlagsType = typedef unsigned char

Used for expressing various language model flags.

Definition at line 35 of file lm_state.h.

◆ LigHash

using tesseract::LigHash = typedef std::unordered_map<std::string, std::string>

Definition at line 36 of file ligature_table.h.

◆ LIST

using tesseract::LIST = typedef list_rec *

Definition at line 125 of file oldlist.h.

◆ MatrixCoordPair

using tesseract::MatrixCoordPair = typedef KDPairInc<float, MATRIX_COORD>

Definition at line 724 of file matrix.h.

◆ MERGE_CLASS

using tesseract::MERGE_CLASS = typedef MERGE_CLASS_NODE *

Definition at line 98 of file commontraining.h.

◆ MFOUTLINE

using tesseract::MFOUTLINE = typedef LIST

Definition at line 28 of file mfoutline.h.

◆ MicroFeature

using tesseract::MicroFeature = typedef std::array<float, (int)MicroFeatureParameter::MFCount>

Definition at line 36 of file mfdefs.h.

◆ MICROFEATURES

using tesseract::MICROFEATURES = typedef std::forward_list<MicroFeature>

Definition at line 37 of file mfdefs.h.

◆ NODE_MAP

using tesseract::NODE_MAP = typedef EDGE_REF *

Definition at line 51 of file dawg.h.

◆ NODE_REF

using tesseract::NODE_REF = typedef int64_t

Definition at line 50 of file dawg.h.

◆ NodeChildVector

using tesseract::NodeChildVector = typedef std::vector<NodeChild>

Definition at line 60 of file dawg.h.

◆ PainPointHeap

using tesseract::PainPointHeap = typedef GenericHeap<MatrixCoordPair>

Definition at line 36 of file lm_pain_points.h.

◆ ParamsTrainingHypothesisList

using tesseract::ParamsTrainingHypothesisList = typedef std::vector<ParamsTrainingHypothesis>

Definition at line 126 of file params_training_featdef.h.

◆ PartSetVector

using tesseract::PartSetVector = typedef std::vector<ColPartitionSet *>

Definition at line 32 of file colpartitionset.h.

◆ PModel

typedef ParagraphModel tesseract::PModel

Definition at line 229 of file paragraphs_test.cc.

◆ PointHeap

using tesseract::PointHeap = typedef GenericHeap<PointPair>

Definition at line 32 of file chop.h.

◆ PointPair

using tesseract::PointPair = typedef KDPairInc<float, EDGEPT *>

Definition at line 31 of file chop.h.

◆ PRIORITY

using tesseract::PRIORITY = typedef float

Definition at line 31 of file seam.h.

◆ ProbabilityInContextFunc

using tesseract::ProbabilityInContextFunc = typedef double (Dict::*)(const char *, const char *, int, const char *, int)

Definition at line 65 of file baseapi.h.

◆ PROGRESS_FUNC

using tesseract::PROGRESS_FUNC = typedef bool (*)(int, int, int, int, int)

Definition at line 99 of file ocrclass.h.

◆ PROGRESS_FUNC2

using tesseract::PROGRESS_FUNC2 = typedef bool (*)(ETEXT_DESC *, int, int, int, int)

Definition at line 100 of file ocrclass.h.

◆ PROTO_ID

using tesseract::PROTO_ID = typedef int16_t

a PROTO_ID is the index of a prototype within it's class. Valid proto id's are 0 to N-1 where N is the number of prototypes that make up the class.

Definition at line 40 of file matchdefs.h.

◆ PROTO_PRUNER

typedef uint32_t tesseract::PROTO_PRUNER[NUM_PP_PARAMS][NUM_PP_BUCKETS][WERDS_PER_PP_VECTOR]

Definition at line 84 of file intproto.h.

◆ RecodeHeap

using tesseract::RecodeHeap = typedef GenericHeap<RecodePair>

Definition at line 178 of file recodebeam.h.

◆ RecodePair

using tesseract::RecodePair = typedef KDPairInc<double, RecodeNode>

Definition at line 177 of file recodebeam.h.

◆ RSCounts

using tesseract::RSCounts = typedef std::unordered_map<int, int>

Definition at line 48 of file unicharcompress.cpp.

◆ RSMap

using tesseract::RSMap = typedef std::unordered_map<int, std::unique_ptr<std::vector<int> >>

Definition at line 46 of file unicharcompress.cpp.

◆ SAMPLE

using tesseract::SAMPLE = typedef CLUSTER

Definition at line 51 of file cluster.h.

◆ SeamDecPair

using tesseract::SeamDecPair = typedef KDPtrPairDec<float, SEAM>

Definition at line 33 of file findseam.h.

◆ SeamPair

using tesseract::SeamPair = typedef KDPtrPairInc<float, SEAM>

Definition at line 30 of file findseam.h.

◆ SeamPile

using tesseract::SeamPile = typedef GenericHeap<SeamDecPair>

Definition at line 34 of file findseam.h.

◆ SeamQueue

using tesseract::SeamQueue = typedef GenericHeap<SeamPair>

Definition at line 31 of file findseam.h.

◆ SetOfModels

using tesseract::SetOfModels = typedef std::vector<const ParagraphModel *>

Definition at line 91 of file paragraphs_internal.h.

◆ ShapeQueue

using tesseract::ShapeQueue = typedef GenericHeap<ShapeQueueEntry>

Definition at line 125 of file shapetable.h.

◆ SOLVEFUNC

using tesseract::SOLVEFUNC = typedef double (*)(CHISTRUCT *, double)

Definition at line 1312 of file cluster.cpp.

◆ SuccessorList

using tesseract::SuccessorList = typedef std::vector<int>

Definition at line 61 of file dawg.h.

◆ SuccessorListsVector

using tesseract::SuccessorListsVector = typedef std::vector<SuccessorList *>

Definition at line 62 of file dawg.h.

◆ TDimension

using tesseract::TDimension = typedef int16_t

Definition at line 32 of file tesstypes.h.

◆ TestCallback

using tesseract::TestCallback = typedef std::function<std::string(int, const double *, const TessdataManager &, int)>

Definition at line 77 of file lstmtrainer.h.

◆ TFloat

using tesseract::TFloat = typedef double

Definition at line 39 of file tesstypes.h.

◆ TRIE_NODES

using tesseract::TRIE_NODES = typedef std::vector<TRIE_NODE_RECORD *>

Definition at line 45 of file trie.h.

◆ UNICHAR_ID

using tesseract::UNICHAR_ID = typedef int

Definition at line 34 of file unichar.h.

◆ UnicharAmbigsVector

using tesseract::UnicharAmbigsVector = typedef std::vector<AmbigSpec_LIST *>

Definition at line 140 of file ambigs.h.

◆ UnicharIdVector

using tesseract::UnicharIdVector = typedef std::vector<UNICHAR_ID>

Definition at line 38 of file ambigs.h.

◆ VECTOR

using tesseract::VECTOR = typedef TPOINT

Definition at line 93 of file blobs.h.

◆ void_dest

using tesseract::void_dest = typedef void (*)(void *)

Definition at line 78 of file oldlist.h.

◆ WidthCallback

using tesseract::WidthCallback = typedef std::function<bool(int)>

Definition at line 35 of file tabfind.h.

◆ WordGrid

using tesseract::WordGrid = typedef BBGrid<WordWithBox, WordWithBox_CLIST, WordWithBox_C_IT>

Definition at line 73 of file textord.h.

◆ WordRecognizer

using tesseract::WordRecognizer = typedef void (Tesseract::*)(const WordData &, WERD_RES **, PointerVector<WERD_RES> *)

Definition at line 175 of file tesseractclass.h.

◆ WordSearch

using tesseract::WordSearch = typedef GridSearch<WordWithBox, WordWithBox_CLIST, WordWithBox_C_IT>

Definition at line 74 of file textord.h.

Enumeration Type Documentation

◆ AmbigType

enum tesseract::AmbigType

Enumerator
NOT_AMBIG
REPLACE_AMBIG
DEFINITE_AMBIG
SIMILAR_AMBIG
CASE_AMBIG
AMBIG_TYPE_COUNT

Definition at line 40 of file ambigs.h.

               {
  NOT_AMBIG,      // the ngram pair is not ambiguous
  REPLACE_AMBIG,  // ocred ngram should always be substituted with correct
  DEFINITE_AMBIG, // add correct ngram to the classifier results (1-1)
  SIMILAR_AMBIG,  // use pairwise classifier for ocred/correct pair (1-1)
  CASE_AMBIG,     // this is a case ambiguity (1-1)
 
  AMBIG_TYPE_COUNT // number of enum entries
};

◆ BlobChoiceClassifier

enum tesseract::BlobChoiceClassifier

Enumerator
BCC_STATIC_CLASSIFIER
BCC_ADAPTED_CLASSIFIER
BCC_SPECKLE_CLASSIFIER
BCC_AMBIG
BCC_FAKE

Definition at line 48 of file ratngs.h.

                          {
  BCC_STATIC_CLASSIFIER,  // From the char_norm classifier.
  BCC_ADAPTED_CLASSIFIER, // From the adaptive classifier.
  BCC_SPECKLE_CLASSIFIER, // Backup for failed classification.
  BCC_AMBIG,              // Generated by ambiguity detection.
  BCC_FAKE,               // From some other process.
};

◆ BlobNeighbourDir

enum tesseract::BlobNeighbourDir

Enumerator
BND_LEFT
BND_BELOW
BND_RIGHT
BND_ABOVE
BND_COUNT

Definition at line 89 of file blobbox.h.

89{ BND_LEFT, BND_BELOW, BND_RIGHT, BND_ABOVE, BND_COUNT };

tesseract::BND_LEFT

@ BND_LEFT

Definition: blobbox.h:89

tesseract::BND_RIGHT

@ BND_RIGHT

Definition: blobbox.h:89

tesseract::BND_BELOW

@ BND_BELOW

Definition: blobbox.h:89

tesseract::BND_ABOVE

@ BND_ABOVE

Definition: blobbox.h:89

tesseract::BND_COUNT

@ BND_COUNT

Definition: blobbox.h:89

◆ BlobRegionType

enum tesseract::BlobRegionType

Enumerator
BRT_NOISE
BRT_HLINE
BRT_VLINE
BRT_RECTIMAGE
BRT_POLYIMAGE
BRT_UNKNOWN
BRT_VERT_TEXT
BRT_TEXT
BRT_COUNT

Definition at line 74 of file blobbox.h.

                    {
  BRT_NOISE,     // Neither text nor image.
  BRT_HLINE,     // Horizontal separator line.
  BRT_VLINE,     // Vertical separator line.
  BRT_RECTIMAGE, // Rectangular image.
  BRT_POLYIMAGE, // Non-rectangular image.
  BRT_UNKNOWN,   // Not determined yet.
  BRT_VERT_TEXT, // Vertical alignment, not necessarily vertically oriented.
  BRT_TEXT,      // Convincing text.
 
  BRT_COUNT // Number of possibilities.
};

◆ BlobSpecialTextType

enum tesseract::BlobSpecialTextType

Enumerator
BSTT_NONE
BSTT_ITALIC
BSTT_DIGIT
BSTT_MATH
BSTT_UNCLEAR
BSTT_SKIP
BSTT_COUNT

Definition at line 92 of file blobbox.h.

                         {
  BSTT_NONE,    // No special.
  BSTT_ITALIC,  // Italic style.
  BSTT_DIGIT,   // Digit symbols.
  BSTT_MATH,    // Mathematical symbols (not including digit).
  BSTT_UNCLEAR, // Characters with low recognition rate.
  BSTT_SKIP,    // Characters that we skip labeling (usually too small).
  BSTT_COUNT
};

◆ BlobTextFlowType

enum tesseract::BlobTextFlowType

Enumerator
BTFT_NONE
BTFT_NONTEXT
BTFT_NEIGHBOURS
BTFT_CHAIN
BTFT_STRONG_CHAIN
BTFT_TEXT_ON_IMAGE
BTFT_LEADER
BTFT_COUNT

Definition at line 110 of file blobbox.h.

                      {
  BTFT_NONE,          // No text flow set yet.
  BTFT_NONTEXT,       // Flow too poor to be likely text.
  BTFT_NEIGHBOURS,    // Neighbours support flow in this direction.
  BTFT_CHAIN,         // There is a weak chain of text in this direction.
  BTFT_STRONG_CHAIN,  // There is a strong chain of text in this direction.
  BTFT_TEXT_ON_IMAGE, // There is a strong chain of text on an image.
  BTFT_LEADER,        // Leader dots/dashes etc.
  BTFT_COUNT
};

◆ C_OUTLINE_FLAGS

enum tesseract::C_OUTLINE_FLAGS

Enumerator
COUT_INVERSE

Definition at line 45 of file coutln.h.

                     {
  COUT_INVERSE // White on black blob
};

◆ CachingStrategy

enum tesseract::CachingStrategy

Enumerator
CS_SEQUENTIAL
CS_ROUND_ROBIN

Definition at line 42 of file imagedata.h.

                     {
  // Reads all of one file before moving on to the next. Requires samples to be
  // shuffled across files. Uses the count of samples in the first file as
  // the count in all the files to achieve high-speed random access. As a
  // consequence, if subsequent files are smaller, they get entries used more
  // than once, and if subsequent files are larger, some entries are not used.
  // Best for larger data sets that don't fit in memory.
  CS_SEQUENTIAL,
  // Reads one sample from each file in rotation. Does not require shuffled
  // samples, but is extremely disk-intensive. Samples in smaller files also
  // get used more often than samples in larger files.
  // Best for smaller data sets that mostly fit in memory.
  CS_ROUND_ROBIN,
};

◆ CharSegmentationType

enum tesseract::CharSegmentationType

Enumerator
CST_FRAGMENT
CST_WHOLE
CST_IMPROPER
CST_NGRAM

Definition at line 87 of file classify.h.

                          {
  CST_FRAGMENT, // A partial character.
  CST_WHOLE,    // A correctly segmented character.
  CST_IMPROPER, // More than one but less than 2 characters.
  CST_NGRAM     // Multiple characters.
};

◆ CMD_EVENTS [1/2]

enum tesseract::CMD_EVENTS

Enumerator
NULL_CMD_EVENT
CHANGE_DISP_CMD_EVENT
DUMP_WERD_CMD_EVENT
SHOW_POINT_CMD_EVENT
SHOW_BLN_WERD_CMD_EVENT
DEBUG_WERD_CMD_EVENT
BLAMER_CMD_EVENT
BOUNDING_BOX_CMD_EVENT
CORRECT_TEXT_CMD_EVENT
POLYGONAL_CMD_EVENT
BL_NORM_CMD_EVENT
BITMAP_CMD_EVENT
IMAGE_CMD_EVENT
BLOCKS_CMD_EVENT
BASELINES_CMD_EVENT
UNIFORM_DISP_CMD_EVENT
REFRESH_CMD_EVENT
QUIT_CMD_EVENT
RECOG_WERDS
RECOG_PSEUDO
SHOW_BLOB_FEATURES
SHOW_SUBSCRIPT_CMD_EVENT
SHOW_SUPERSCRIPT_CMD_EVENT
SHOW_ITALIC_CMD_EVENT
SHOW_BOLD_CMD_EVENT
SHOW_UNDERLINE_CMD_EVENT
SHOW_FIXEDPITCH_CMD_EVENT
SHOW_SERIF_CMD_EVENT
SHOW_SMALLCAPS_CMD_EVENT
SHOW_DROPCAPS_CMD_EVENT
ACTION_1_CMD_EVENT
RECOG_WERDS
RECOG_PSEUDO
ACTION_2_CMD_EVENT

Definition at line 50 of file pgedit.cpp.

                {
  NULL_CMD_EVENT,
  CHANGE_DISP_CMD_EVENT,
  DUMP_WERD_CMD_EVENT,
  SHOW_POINT_CMD_EVENT,
  SHOW_BLN_WERD_CMD_EVENT,
  DEBUG_WERD_CMD_EVENT,
  BLAMER_CMD_EVENT,
  BOUNDING_BOX_CMD_EVENT,
  CORRECT_TEXT_CMD_EVENT,
  POLYGONAL_CMD_EVENT,
  BL_NORM_CMD_EVENT,
  BITMAP_CMD_EVENT,
  IMAGE_CMD_EVENT,
  BLOCKS_CMD_EVENT,
  BASELINES_CMD_EVENT,
  UNIFORM_DISP_CMD_EVENT,
  REFRESH_CMD_EVENT,
  QUIT_CMD_EVENT,
  RECOG_WERDS,
  RECOG_PSEUDO,
  SHOW_BLOB_FEATURES,
  SHOW_SUBSCRIPT_CMD_EVENT,
  SHOW_SUPERSCRIPT_CMD_EVENT,
  SHOW_ITALIC_CMD_EVENT,
  SHOW_BOLD_CMD_EVENT,
  SHOW_UNDERLINE_CMD_EVENT,
  SHOW_FIXEDPITCH_CMD_EVENT,
  SHOW_SERIF_CMD_EVENT,
  SHOW_SMALLCAPS_CMD_EVENT,
  SHOW_DROPCAPS_CMD_EVENT,
};

◆ CMD_EVENTS [2/2]

enum tesseract::CMD_EVENTS

Enumerator
NULL_CMD_EVENT
CHANGE_DISP_CMD_EVENT
DUMP_WERD_CMD_EVENT
SHOW_POINT_CMD_EVENT
SHOW_BLN_WERD_CMD_EVENT
DEBUG_WERD_CMD_EVENT
BLAMER_CMD_EVENT
BOUNDING_BOX_CMD_EVENT
CORRECT_TEXT_CMD_EVENT
POLYGONAL_CMD_EVENT
BL_NORM_CMD_EVENT
BITMAP_CMD_EVENT
IMAGE_CMD_EVENT
BLOCKS_CMD_EVENT
BASELINES_CMD_EVENT
UNIFORM_DISP_CMD_EVENT
REFRESH_CMD_EVENT
QUIT_CMD_EVENT
RECOG_WERDS
RECOG_PSEUDO
SHOW_BLOB_FEATURES
SHOW_SUBSCRIPT_CMD_EVENT
SHOW_SUPERSCRIPT_CMD_EVENT
SHOW_ITALIC_CMD_EVENT
SHOW_BOLD_CMD_EVENT
SHOW_UNDERLINE_CMD_EVENT
SHOW_FIXEDPITCH_CMD_EVENT
SHOW_SERIF_CMD_EVENT
SHOW_SMALLCAPS_CMD_EVENT
SHOW_DROPCAPS_CMD_EVENT
ACTION_1_CMD_EVENT
RECOG_WERDS
RECOG_PSEUDO
ACTION_2_CMD_EVENT

Definition at line 463 of file tessedit.cpp.

463{ ACTION_1_CMD_EVENT, RECOG_WERDS, RECOG_PSEUDO, ACTION_2_CMD_EVENT };

tesseract::ACTION_2_CMD_EVENT

@ ACTION_2_CMD_EVENT

Definition: tessedit.cpp:463

tesseract::ACTION_1_CMD_EVENT

@ ACTION_1_CMD_EVENT

Definition: tessedit.cpp:463

◆ ColorationMode

enum tesseract::ColorationMode

Enumerator
CM_RAINBOW
CM_SUBSCRIPT
CM_SUPERSCRIPT
CM_ITALIC
CM_BOLD
CM_UNDERLINE
CM_FIXEDPITCH
CM_SERIF
CM_SMALLCAPS
CM_DROPCAPS

Definition at line 83 of file pgedit.cpp.

                    {
  CM_RAINBOW,
  CM_SUBSCRIPT,
  CM_SUPERSCRIPT,
  CM_ITALIC,
  CM_BOLD,
  CM_UNDERLINE,
  CM_FIXEDPITCH,
  CM_SERIF,
  CM_SMALLCAPS,
  CM_DROPCAPS
};

◆ ColSegType

enum tesseract::ColSegType

Enumerator
COL_UNKNOWN
COL_TEXT
COL_TABLE
COL_MIXED
COL_COUNT

Definition at line 29 of file tablefind.h.

29{ COL_UNKNOWN, COL_TEXT, COL_TABLE, COL_MIXED, COL_COUNT };

tesseract::COL_TEXT

@ COL_TEXT

Definition: tablefind.h:29

tesseract::COL_MIXED

@ COL_MIXED

Definition: tablefind.h:29

tesseract::COL_TABLE

@ COL_TABLE

Definition: tablefind.h:29

tesseract::COL_UNKNOWN

@ COL_UNKNOWN

Definition: tablefind.h:29

tesseract::COL_COUNT

@ COL_COUNT

Definition: tablefind.h:29

◆ ColumnSpanningType

enum tesseract::ColumnSpanningType

Enumerator
CST_NOISE
CST_FLOWING
CST_HEADING
CST_PULLOUT
CST_COUNT

Definition at line 47 of file colpartition.h.

                        {
  CST_NOISE,   // Strictly between columns.
  CST_FLOWING, // Strictly within a single column.
  CST_HEADING, // Spans multiple columns.
  CST_PULLOUT, // Touches multiple columns, but doesn't span them.
  CST_COUNT    // Number of entries.
};

◆ CountTypes

enum tesseract::CountTypes

Enumerator
CT_UNICHAR_TOP_OK
CT_UNICHAR_TOP1_ERR
CT_UNICHAR_TOP2_ERR
CT_UNICHAR_TOPN_ERR
CT_UNICHAR_TOPTOP_ERR
CT_OK_MULTI_UNICHAR
CT_OK_JOINED
CT_OK_BROKEN
CT_REJECT
CT_FONT_ATTR_ERR
CT_OK_MULTI_FONT
CT_NUM_RESULTS
CT_RANK
CT_REJECTED_JUNK
CT_ACCEPTED_JUNK
CT_SIZE

Definition at line 69 of file errorcounter.h.

                {
  CT_UNICHAR_TOP_OK, // Top shape contains correct unichar id.
  // The rank of the results in TOP1, TOP2, TOPN is determined by a gap of
  // kRatingEpsilon from the first result in each group. The real top choice
  // is measured using TOPTOP.
  CT_UNICHAR_TOP1_ERR,   // Top shape does not contain correct unichar id.
  CT_UNICHAR_TOP2_ERR,   // Top 2 shapes don't contain correct unichar id.
  CT_UNICHAR_TOPN_ERR,   // No output shape contains correct unichar id.
  CT_UNICHAR_TOPTOP_ERR, // Very top choice not correct.
  CT_OK_MULTI_UNICHAR,   // Top shape id has correct unichar id, and others.
  CT_OK_JOINED,          // Top shape id is correct but marked joined.
  CT_OK_BROKEN,          // Top shape id is correct but marked broken.
  CT_REJECT,             // Classifier hates this.
  CT_FONT_ATTR_ERR,      // Top unichar OK, but font attributes incorrect.
  CT_OK_MULTI_FONT,      // CT_FONT_ATTR_OK but there are multiple font attrs.
  CT_NUM_RESULTS,        // Number of answers produced.
  CT_RANK,               // Rank of correct answer.
  CT_REJECTED_JUNK,      // Junk that was correctly rejected.
  CT_ACCEPTED_JUNK,      // Junk that was incorrectly classified otherwise.
 
  CT_SIZE // Number of types for array sizing.
};

◆ CRUNCH_MODE

enum tesseract::CRUNCH_MODE

Enumerator
CR_NONE
CR_KEEP_SPACE
CR_LOOSE_SPACE
CR_DELETE

Definition at line 160 of file pageres.h.

160{ CR_NONE, CR_KEEP_SPACE, CR_LOOSE_SPACE, CR_DELETE };

tesseract::CR_NONE

@ CR_NONE

Definition: pageres.h:160

tesseract::CR_KEEP_SPACE

@ CR_KEEP_SPACE

Definition: pageres.h:160

tesseract::CR_LOOSE_SPACE

@ CR_LOOSE_SPACE

Definition: pageres.h:160

tesseract::CR_DELETE

@ CR_DELETE

Definition: pageres.h:160

◆ DawgType

enum tesseract::DawgType

Enumerator
DAWG_TYPE_PUNCTUATION
DAWG_TYPE_WORD
DAWG_TYPE_NUMBER
DAWG_TYPE_PATTERN
DAWG_TYPE_COUNT

Definition at line 64 of file dawg.h.

              {
  DAWG_TYPE_PUNCTUATION,
  DAWG_TYPE_WORD,
  DAWG_TYPE_NUMBER,
  DAWG_TYPE_PATTERN,
 
  DAWG_TYPE_COUNT // number of enum entries
};

◆ DIRECTION

enum tesseract::DIRECTION : uint8_t

Enumerator
north
south
east
west
northeast
northwest
southeast
southwest

Definition at line 30 of file mfoutline.h.

30: uint8_t { north, south, east, west, northeast, northwest, southeast, southwest };

tesseract::northeast

@ northeast

Definition: mfoutline.h:30

tesseract::south

@ south

Definition: mfoutline.h:30

tesseract::east

@ east

Definition: mfoutline.h:30

tesseract::southeast

@ southeast

Definition: mfoutline.h:30

tesseract::northwest

@ northwest

Definition: mfoutline.h:30

tesseract::west

@ west

Definition: mfoutline.h:30

tesseract::southwest

@ southwest

Definition: mfoutline.h:30

tesseract::north

@ north

Definition: mfoutline.h:30

◆ DISPLAY_FLAGS

enum tesseract::DISPLAY_FLAGS

Enumerator
DF_BOX	Bounding box.
DF_TEXT	Correct ascii.
DF_POLYGONAL	Polyg approx.
DF_EDGE_STEP	Edge steps.
DF_BN_POLYGONAL	BL normalisd polyapx.
DF_BLAMER	Blamer information.

Definition at line 46 of file werd.h.

                   {
  /* Display flags bit number allocations */
  DF_BOX,          
  DF_TEXT,         
  DF_POLYGONAL,    
  DF_EDGE_STEP,    
  DF_BN_POLYGONAL, 
  DF_BLAMER        
};

◆ DISTRIBUTION

enum tesseract::DISTRIBUTION

Enumerator
normal
uniform
D_random
DISTRIBUTION_COUNT

Definition at line 65 of file cluster.h.

65{ normal, uniform, D_random, DISTRIBUTION_COUNT } DISTRIBUTION;

tesseract::DISTRIBUTION

DISTRIBUTION

Definition: cluster.h:65

tesseract::D_random

@ D_random

Definition: cluster.h:65

tesseract::DISTRIBUTION_COUNT

@ DISTRIBUTION_COUNT

Definition: cluster.h:65

tesseract::uniform

@ uniform

Definition: cluster.h:65

tesseract::normal

@ normal

Definition: cluster.h:65

◆ ErrorTypes

enum tesseract::ErrorTypes

Enumerator
ET_RMS
ET_DELTA
ET_WORD_RECERR
ET_CHAR_ERROR
ET_SKIP_RATIO
ET_COUNT

Definition at line 41 of file lstmtrainer.h.

                {
  ET_RMS,         // RMS activation error.
  ET_DELTA,       // Number of big errors in deltas.
  ET_WORD_RECERR, // Output text string word recall error.
  ET_CHAR_ERROR,  // Output text string total char error.
  ET_SKIP_RATIO,  // Fraction of samples skipped.
  ET_COUNT        // For array sizing.
};

◆ FactorNames

enum tesseract::FactorNames

Enumerator
FN_INCOLOR
FN_Y0
FN_Y1
FN_Y2
FN_Y3
FN_X0
FN_X1
FN_SHEAR
FN_NUM_FACTORS

Definition at line 39 of file degradeimage.cpp.

                 {
  FN_INCOLOR,
  FN_Y0,
  FN_Y1,
  FN_Y2,
  FN_Y3,
  FN_X0,
  FN_X1,
  FN_SHEAR,
  // x2 = x1 - shear
  // x3 = x0 + shear
  FN_NUM_FACTORS
};

◆ FlexDimensions

enum tesseract::FlexDimensions

Enumerator
FD_BATCH
FD_HEIGHT
FD_WIDTH
FD_DIMSIZE

Definition at line 32 of file stridemap.h.

                    {
  FD_BATCH,   // Index of multiple images.
  FD_HEIGHT,  // y-coordinate in image.
  FD_WIDTH,   // x-coordinate in image.
  FD_DIMSIZE, // Number of flexible non-depth dimensions.
};

◆ GARBAGE_LEVEL

enum tesseract::GARBAGE_LEVEL

Enumerator
G_NEVER_CRUNCH
G_OK
G_DODGY
G_TERRIBLE

Definition at line 30 of file docqual.h.

30{ G_NEVER_CRUNCH, G_OK, G_DODGY, G_TERRIBLE };

tesseract::G_TERRIBLE

@ G_TERRIBLE

Definition: docqual.h:30

tesseract::G_NEVER_CRUNCH

@ G_NEVER_CRUNCH

Definition: docqual.h:30

tesseract::G_OK

@ G_OK

Definition: docqual.h:30

tesseract::G_DODGY

@ G_DODGY

Definition: docqual.h:30

◆ GeoParams

enum tesseract::GeoParams

Enumerator
GeoBottom
GeoTop
GeoWidth
GeoCount

Definition at line 35 of file picofeat.h.

               {
  GeoBottom, // Bounding box bottom in baseline space (0-255).
  GeoTop,    // Bounding box top in baseline space (0-255).
  GeoWidth,  // Bounding box width in baseline space (0-255).
 
  GeoCount // Number of geo features.
};

◆ GraphemeNorm

enum class tesseract::GraphemeNorm

strong

Enumerator
kNone
kNormalize

Definition at line 51 of file normstrngs.h.

                        {
  kNone,
  kNormalize,
};

◆ GraphemeNormMode

enum class tesseract::GraphemeNormMode

strong

Enumerator
kSingleString
kCombined
kGlyphSplit
kIndividualUnicodes

Definition at line 36 of file validator.h.

                            {
  // Validation result is a single string, even if input is multi-word.
  kSingleString,
  // Standard unicode graphemes are validated and output as grapheme units.
  kCombined,
  // Graphemes are validated and sub-divided. For virama-using scripts, units
  // that correspond to repeatable glyphs are generated. (Mostly single unicodes
  // but viramas and joiners are paired with the most sensible neighbor.)
  // For non-virama scripts, this means that base/accent pairs are separated,
  // ie the output is individual unicodes.
  kGlyphSplit,
  // The output is always single unicodes, regardless of the script.
  kIndividualUnicodes,
};

◆ IncorrectResultReason

enum tesseract::IncorrectResultReason

Enumerator
IRR_CORRECT
IRR_CLASSIFIER
IRR_CHOPPER
IRR_CLASS_LM_TRADEOFF
IRR_PAGE_LAYOUT
IRR_SEGSEARCH_HEUR
IRR_SEGSEARCH_PP
IRR_CLASS_OLD_LM_TRADEOFF
IRR_ADAPTION
IRR_NO_TRUTH_SPLIT
IRR_NO_TRUTH
IRR_UNKNOWN
IRR_NUM_REASONS

Definition at line 56 of file blamer.h.

                           {
  // The text recorded in best choice == truth text
  IRR_CORRECT,
  // Either: Top choice is incorrect and is a dictionary word (language model
  // is unlikely to help correct such errors, so blame the classifier).
  // Or: the correct unichar was not included in shortlist produced by the
  // classifier at all.
  IRR_CLASSIFIER,
  // Chopper have not found one or more splits that correspond to the correct
  // character bounding boxes recorded in BlamerBundle::truth_word.
  IRR_CHOPPER,
  // Classifier did include correct unichars for each blob in the correct
  // segmentation, however its rating could have been too bad to allow the
  // language model to pull out the correct choice. On the other hand the
  // strength of the language model might have been too weak to favor the
  // correct answer, this we call this case a classifier-language model
  // tradeoff error.
  IRR_CLASS_LM_TRADEOFF,
  // Page layout failed to produce the correct bounding box. Blame page layout
  // if the truth was not found for the word, which implies that the bounding
  // box of the word was incorrect (no truth word had a similar bounding box).
  IRR_PAGE_LAYOUT,
  // SegSearch heuristic prevented one or more blobs from the correct
  // segmentation state to be classified (e.g. the blob was too wide).
  IRR_SEGSEARCH_HEUR,
  // The correct segmentaiton state was not explored because of poor SegSearch
  // pain point prioritization. We blame SegSearch pain point prioritization
  // if the best rating of a choice constructed from correct segmentation is
  // better than that of the best choice (i.e. if we got to explore the correct
  // segmentation state, language model would have picked the correct choice).
  IRR_SEGSEARCH_PP,
  // Same as IRR_CLASS_LM_TRADEOFF, but used when we only run chopper on a word,
  // and thus use the old language model (permuters).
  // TODO(antonova): integrate the new language mode with chopper
  IRR_CLASS_OLD_LM_TRADEOFF,
  // If there is an incorrect adaptive template match with a better score than
  // a correct one (either pre-trained or adapted), mark this as adaption error.
  IRR_ADAPTION,
  // split_and_recog_word() failed to find a suitable split in truth.
  IRR_NO_TRUTH_SPLIT,
  // Truth is not available for this word (e.g. when words in corrected content
  // file are turned into ~~~~ because an appropriate alignment was not found.
  IRR_NO_TRUTH,
  // The text recorded in best choice != truth text, but none of the above
  // reasons are set.
  IRR_UNKNOWN,
 
  IRR_NUM_REASONS
};

◆ IntmatcherDebugAction

enum tesseract::IntmatcherDebugAction

Enumerator
IDA_ADAPTIVE
IDA_STATIC
IDA_SHAPE_INDEX
IDA_BOTH

Definition at line 139 of file intproto.h.

139{ IDA_ADAPTIVE, IDA_STATIC, IDA_SHAPE_INDEX, IDA_BOTH };

tesseract::IDA_BOTH

@ IDA_BOTH

Definition: intproto.h:139

tesseract::IDA_SHAPE_INDEX

@ IDA_SHAPE_INDEX

Definition: intproto.h:139

tesseract::IDA_ADAPTIVE

@ IDA_ADAPTIVE

Definition: intproto.h:139

tesseract::IDA_STATIC

@ IDA_STATIC

Definition: intproto.h:139

◆ IntParams

enum tesseract::IntParams

Enumerator
IntX
IntY
IntDir

Definition at line 28 of file picofeat.h.

               {
  IntX,  // x-position (0-255).
  IntY,  // y-position (0-255).
  IntDir // Direction (0-255, circular).
};

◆ kParamsTrainingFeatureType

enum tesseract::kParamsTrainingFeatureType

Enumerator
PTRAIN_DIGITS_SHORT
PTRAIN_DIGITS_MED
PTRAIN_DIGITS_LONG
PTRAIN_NUM_SHORT
PTRAIN_NUM_MED
PTRAIN_NUM_LONG
PTRAIN_DOC_SHORT
PTRAIN_DOC_MED
PTRAIN_DOC_LONG
PTRAIN_DICT_SHORT
PTRAIN_DICT_MED
PTRAIN_DICT_LONG
PTRAIN_FREQ_SHORT
PTRAIN_FREQ_MED
PTRAIN_FREQ_LONG
PTRAIN_SHAPE_COST_PER_CHAR
PTRAIN_NGRAM_COST_PER_CHAR
PTRAIN_NUM_BAD_PUNC
PTRAIN_NUM_BAD_CASE
PTRAIN_XHEIGHT_CONSISTENCY
PTRAIN_NUM_BAD_CHAR_TYPE
PTRAIN_NUM_BAD_SPACING
PTRAIN_NUM_BAD_FONT
PTRAIN_RATING_PER_CHAR
PTRAIN_NUM_FEATURE_TYPES

Definition at line 39 of file params_training_featdef.h.

                                {
  // Digits
  PTRAIN_DIGITS_SHORT, // 0
  PTRAIN_DIGITS_MED,   // 1
  PTRAIN_DIGITS_LONG,  // 2
  // Number or pattern (NUMBER_PERM, USER_PATTERN_PERM)
  PTRAIN_NUM_SHORT, // 3
  PTRAIN_NUM_MED,   // 4
  PTRAIN_NUM_LONG,  // 5
  // Document word (DOC_DAWG_PERM)
  PTRAIN_DOC_SHORT, // 6
  PTRAIN_DOC_MED,   // 7
  PTRAIN_DOC_LONG,  // 8
  // Word (SYSTEM_DAWG_PERM, USER_DAWG_PERM, COMPOUND_PERM)
  PTRAIN_DICT_SHORT, // 9
  PTRAIN_DICT_MED,   // 10
  PTRAIN_DICT_LONG,  // 11
  // Frequent word (FREQ_DAWG_PERM)
  PTRAIN_FREQ_SHORT,          // 12
  PTRAIN_FREQ_MED,            // 13
  PTRAIN_FREQ_LONG,           // 14
  PTRAIN_SHAPE_COST_PER_CHAR, // 15
  PTRAIN_NGRAM_COST_PER_CHAR, // 16
  PTRAIN_NUM_BAD_PUNC,        // 17
  PTRAIN_NUM_BAD_CASE,        // 18
  PTRAIN_XHEIGHT_CONSISTENCY, // 19
  PTRAIN_NUM_BAD_CHAR_TYPE,   // 20
  PTRAIN_NUM_BAD_SPACING,     // 21
  PTRAIN_NUM_BAD_FONT,        // 22
  PTRAIN_RATING_PER_CHAR,     // 23
 
  PTRAIN_NUM_FEATURE_TYPES
};

◆ LeftOrRight

enum tesseract::LeftOrRight

Enumerator
LR_LEFT
LR_RIGHT

Definition at line 38 of file strokewidth.h.

38{ LR_LEFT, LR_RIGHT };

tesseract::LR_LEFT

@ LR_LEFT

Definition: strokewidth.h:38

tesseract::LR_RIGHT

@ LR_RIGHT

Definition: strokewidth.h:38

◆ LineType

enum tesseract::LineType

Enumerator
LT_START
LT_BODY
LT_UNKNOWN
LT_MULTIPLE

Definition at line 48 of file paragraphs_internal.h.

              {
  LT_START = 'S',    // First line of a paragraph.
  LT_BODY = 'C',     // Continuation line of a paragraph.
  LT_UNKNOWN = 'U',  // No clues.
  LT_MULTIPLE = 'M', // Matches for both LT_START and LT_BODY.
};

◆ LMPainPointsType

enum tesseract::LMPainPointsType

Enumerator
LM_PPTYPE_BLAMER
LM_PPTYPE_AMBIG
LM_PPTYPE_PATH
LM_PPTYPE_SHAPE
LM_PPTYPE_NUM

Definition at line 39 of file lm_pain_points.h.

                      {
  LM_PPTYPE_BLAMER,
  LM_PPTYPE_AMBIG,
  LM_PPTYPE_PATH,
  LM_PPTYPE_SHAPE,
 
  LM_PPTYPE_NUM
};

◆ LossType

enum tesseract::LossType

Enumerator
LT_NONE
LT_CTC
LT_SOFTMAX
LT_LOGISTIC

Definition at line 29 of file static_shape.h.

              {
  LT_NONE,     // Undefined.
  LT_CTC,      // Softmax with standard CTC for training/decoding.
  LT_SOFTMAX,  // Outputs sum to 1 in fixed positions.
  LT_LOGISTIC, // Logistic outputs with independent values.
};

◆ MicroFeatureParameter

enum class tesseract::MicroFeatureParameter

strong

Enumerator
MFXPosition
MFYPosition
MFLength
MFDirection
MFBulge1
MFBulge2
MFCount

Definition at line 25 of file mfdefs.h.

                                 {
  MFXPosition,
  MFYPosition,
  MFLength,
  MFDirection,
  MFBulge1,
  MFBulge2,
 
  MFCount // For array sizes.
};

◆ NeighbourPartitionType

enum tesseract::NeighbourPartitionType

Enumerator
NPT_HTEXT
NPT_VTEXT
NPT_WEAK_HTEXT
NPT_WEAK_VTEXT
NPT_IMAGE
NPT_COUNT

Definition at line 1548 of file colpartitiongrid.cpp.

                            {
  NPT_HTEXT,      // Definite horizontal text.
  NPT_VTEXT,      // Definite vertical text.
  NPT_WEAK_HTEXT, // Weakly horizontal text. Counts as HTEXT for HTEXT, but
                  // image for image and VTEXT.
  NPT_WEAK_VTEXT, // Weakly vertical text. Counts as VTEXT for VTEXT, but
                  // image for image and HTEXT.
  NPT_IMAGE,      // Defininte non-text.
  NPT_COUNT       // Number of array elements.
};

◆ NetworkFlags

enum tesseract::NetworkFlags

Enumerator
NF_LAYER_SPECIFIC_LR
NF_ADAM

Definition at line 83 of file network.h.

                  {
  // Network forward/backprop behavior.
  NF_LAYER_SPECIFIC_LR = 64, // Separate learning rate for each layer.
  NF_ADAM = 128,             // Weight-specific learning rate.
};

◆ NetworkType

enum tesseract::NetworkType

Enumerator
NT_NONE
NT_INPUT
NT_CONVOLVE
NT_MAXPOOL
NT_PARALLEL
NT_REPLICATED
NT_PAR_RL_LSTM
NT_PAR_UD_LSTM
NT_PAR_2D_LSTM
NT_SERIES
NT_RECONFIG
NT_XREVERSED
NT_YREVERSED
NT_XYTRANSPOSE
NT_LSTM
NT_LSTM_SUMMARY
NT_LOGISTIC
NT_POSCLIP
NT_SYMCLIP
NT_TANH
NT_RELU
NT_LINEAR
NT_SOFTMAX
NT_SOFTMAX_NO_CTC
NT_LSTM_SOFTMAX
NT_LSTM_SOFTMAX_ENCODED
NT_TENSORFLOW
NT_COUNT

Definition at line 41 of file network.h.

                 {
  NT_NONE,  // The naked base class.
  NT_INPUT, // Inputs from an image.
  // Plumbing networks combine other networks or rearrange the inputs.
  NT_CONVOLVE,    // Duplicates inputs in a sliding window neighborhood.
  NT_MAXPOOL,     // Chooses the max result from a rectangle.
  NT_PARALLEL,    // Runs networks in parallel.
  NT_REPLICATED,  // Runs identical networks in parallel.
  NT_PAR_RL_LSTM, // Runs LTR and RTL LSTMs in parallel.
  NT_PAR_UD_LSTM, // Runs Up and Down LSTMs in parallel.
  NT_PAR_2D_LSTM, // Runs 4 LSTMs in parallel.
  NT_SERIES,      // Executes a sequence of layers.
  NT_RECONFIG,    // Scales the time/y size but makes the output deeper.
  NT_XREVERSED,   // Reverses the x direction of the inputs/outputs.
  NT_YREVERSED,   // Reverses the y-direction of the inputs/outputs.
  NT_XYTRANSPOSE, // Transposes x and y (for just a single op).
  // Functional networks actually calculate stuff.
  NT_LSTM,           // Long-Short-Term-Memory block.
  NT_LSTM_SUMMARY,   // LSTM that only keeps its last output.
  NT_LOGISTIC,       // Fully connected logistic nonlinearity.
  NT_POSCLIP,        // Fully connected rect lin version of logistic.
  NT_SYMCLIP,        // Fully connected rect lin version of tanh.
  NT_TANH,           // Fully connected with tanh nonlinearity.
  NT_RELU,           // Fully connected with rectifier nonlinearity.
  NT_LINEAR,         // Fully connected with no nonlinearity.
  NT_SOFTMAX,        // Softmax uses exponential normalization, with CTC.
  NT_SOFTMAX_NO_CTC, // Softmax uses exponential normalization, no CTC.
  // The SOFTMAX LSTMs both have an extra softmax layer on top, but inside, with
  // the outputs fed back to the input of the LSTM at the next timestep.
  // The ENCODED version binary encodes the softmax outputs, providing log2 of
  // the number of outputs as additional inputs, and the other version just
  // provides all the softmax outputs as additional inputs.
  NT_LSTM_SOFTMAX,         // 1-d LSTM with built-in fully connected softmax.
  NT_LSTM_SOFTMAX_ENCODED, // 1-d LSTM with built-in binary encoded softmax.
  // A TensorFlow graph encapsulated as a Tesseract network.
  NT_TENSORFLOW,
 
  NT_COUNT // Array size.
};

◆ NodeContinuation

enum tesseract::NodeContinuation

Enumerator
NC_ANYTHING
NC_ONLY_DUP
NC_NO_DUP
NC_COUNT

Definition at line 72 of file recodebeam.h.

                      {
  NC_ANYTHING, // This node used just its own score, so anything can follow.
  NC_ONLY_DUP, // The current node combined another score with the score for
               // itself, without a stand-alone duplicate before, so must be
               // followed by a stand-alone duplicate.
  NC_NO_DUP,   // The current node combined another score with the score for
               // itself, after a stand-alone, so can only be followed by
               // something other than a duplicate of the current node.
  NC_COUNT
};

◆ NORM_METHOD

enum tesseract::NORM_METHOD

Enumerator
baseline
character

Definition at line 53 of file mfoutline.h.

53{ baseline, character };

tesseract::character

@ character

Definition: mfoutline.h:53

tesseract::baseline

@ baseline

Definition: mfoutline.h:53

◆ NORM_PARAM_NAME

enum tesseract::NORM_PARAM_NAME

Enumerator
CharNormY
CharNormLength
CharNormRx
CharNormRy

Definition at line 30 of file normfeat.h.

30{ CharNormY, CharNormLength, CharNormRx, CharNormRy } NORM_PARAM_NAME;

tesseract::NORM_PARAM_NAME

NORM_PARAM_NAME

Definition: normfeat.h:30

tesseract::CharNormLength

@ CharNormLength

Definition: normfeat.h:30

tesseract::CharNormRy

@ CharNormRy

Definition: normfeat.h:30

tesseract::CharNormY

@ CharNormY

Definition: normfeat.h:30

tesseract::CharNormRx

@ CharNormRx

Definition: normfeat.h:30

◆ NormalizationMode

enum tesseract::NormalizationMode

Enumerator
NM_BASELINE
NM_CHAR_ISOTROPIC
NM_CHAR_ANISOTROPIC

Definition at line 46 of file normalis.h.

                       {
  NM_BASELINE = -3,        // The original BL normalization mode.
  NM_CHAR_ISOTROPIC = -2,  // Character normalization but isotropic.
  NM_CHAR_ANISOTROPIC = -1 // The original CN normalization mode.
};

◆ OcrEngineMode

enum tesseract::OcrEngineMode

When Tesseract/Cube is initialized we can choose to instantiate/load/run only the Tesseract part, only the Cube part or both along with the combiner. The preference of which engine to use is stored in tessedit_ocr_engine_mode.

ATTENTION: When modifying this enum, please make sure to make the appropriate changes to all the enums mirroring it (e.g. OCREngine in cityblock/workflow/detection/detection_storage.proto). Such enums will mention the connection to OcrEngineMode in the comments.

Enumerator
OEM_TESSERACT_ONLY
OEM_LSTM_ONLY
OEM_TESSERACT_LSTM_COMBINED
OEM_DEFAULT
OEM_COUNT

Definition at line 263 of file publictypes.h.

                   {
  OEM_TESSERACT_ONLY,          // Run Tesseract only - fastest; deprecated
  OEM_LSTM_ONLY,               // Run just the LSTM line recognizer.
  OEM_TESSERACT_LSTM_COMBINED, // Run the LSTM recognizer, but allow fallback
                               // to Tesseract when things get difficult.
                               // deprecated
  OEM_DEFAULT,                 // Specify this mode when calling init_*(),
                               // to indicate that any of the above modes
                               // should be automatically inferred from the
                               // variables in the language-specific config,
                               // command-line configs, or if not specified
                               // in any of the above should be set to the
                               // default OEM_TESSERACT_ONLY.
  OEM_COUNT                    // Number of OEMs
};

◆ OCRNorm

enum class tesseract::OCRNorm

strong

Enumerator
kNone
kNormalize

Definition at line 43 of file normstrngs.h.

                   {
  kNone,
  kNormalize,
};

◆ OldUncleanUnichars

enum class tesseract::OldUncleanUnichars

strong

Enumerator
kFalse
kTrue

Definition at line 45 of file unicharset.h.

                              {
  kFalse,
  kTrue,
};

◆ Orientation

enum tesseract::Orientation

+---------------—+ Orientation Example: | 1 Aaaa Aaaa Aaaa | ==================== | Aaa aa aaa aa | To left is a diagram of some (1) English and | aaaaaa A aa aaa. | (2) Chinese text and a (3) photo credit. | 2 | | ####### c c C | Upright Latin characters are represented as A and a. | ####### c c c | '<' represents a latin character rotated | < ####### c c c | anti-clockwise 90 degrees. | < ####### c c | | < ####### . c | Upright Chinese characters are represented C and c. | 3 ####### c | +---------------—+ NOTA BENE: enum values here should match goodoc.proto

If you orient your head so that "up" aligns with Orientation, then the characters will appear "right side up" and readable.

In the example above, both the English and Chinese paragraphs are oriented so their "up" is the top of the page (page up). The photo credit is read with one's head turned leftward ("up" is to page left).

The values of this enum match the convention of Tesseract's osdetect.h

Enumerator
ORIENTATION_PAGE_UP
ORIENTATION_PAGE_RIGHT
ORIENTATION_PAGE_DOWN
ORIENTATION_PAGE_LEFT

Definition at line 114 of file publictypes.h.

                 {
  ORIENTATION_PAGE_UP = 0,
  ORIENTATION_PAGE_RIGHT = 1,
  ORIENTATION_PAGE_DOWN = 2,
  ORIENTATION_PAGE_LEFT = 3,
};

◆ OUTLINE_FEAT_PARAM_NAME

enum tesseract::OUTLINE_FEAT_PARAM_NAME

Enumerator
OutlineFeatX
OutlineFeatY
OutlineFeatLength
OutlineFeatDir

Definition at line 27 of file outfeat.h.

             {
  OutlineFeatX,
  OutlineFeatY,
  OutlineFeatLength,
  OutlineFeatDir
} OUTLINE_FEAT_PARAM_NAME;

◆ OUTLINETYPE

enum tesseract::OUTLINETYPE

Enumerator
outer
hole

Definition at line 51 of file mfoutline.h.

51{ outer, hole };

tesseract::outer

@ outer

Definition: mfoutline.h:51

tesseract::hole

@ hole

Definition: mfoutline.h:51

◆ OVERLAP_STATE

enum tesseract::OVERLAP_STATE

Enumerator
ASSIGN
REJECT
NEW_ROW

Definition at line 30 of file makerow.h.

                   {
  ASSIGN, // assign it to row
  REJECT, // reject it - dual overlap
  NEW_ROW
};

◆ PageIteratorLevel

enum tesseract::PageIteratorLevel

enum of the elements of the page hierarchy, used in ResultIterator to provide functions that operate on each level without having to have 5x as many functions.

Enumerator
RIL_BLOCK
RIL_PARA
RIL_TEXTLINE
RIL_WORD
RIL_SYMBOL

Definition at line 214 of file publictypes.h.

                       {
  RIL_BLOCK,    // Block of text/image/separator line.
  RIL_PARA,     // Paragraph within a block.
  RIL_TEXTLINE, // Line within a paragraph.
  RIL_WORD,     // Word within a textline.
  RIL_SYMBOL    // Symbol/character within a word.
};

◆ PageSegMode

enum tesseract::PageSegMode

Possible modes for page layout analysis. These must be kept in order of decreasing amount of layout analysis to be done, except for OSD_ONLY, so that the inequality test macros below work.

Enumerator
PSM_OSD_ONLY	Orientation and script detection only.
PSM_AUTO_OSD	Automatic page segmentation with orientation and script detection. (OSD)
PSM_AUTO_ONLY	Automatic page segmentation, but no OSD, or OCR.
PSM_AUTO	Fully automatic page segmentation, but no OSD.
PSM_SINGLE_COLUMN	Assume a single column of text of variable sizes.
PSM_SINGLE_BLOCK_VERT_TEXT	Assume a single uniform block of vertically aligned text.
PSM_SINGLE_BLOCK	Assume a single uniform block of text. (Default.)
PSM_SINGLE_LINE	Treat the image as a single text line.
PSM_SINGLE_WORD	Treat the image as a single word.
PSM_CIRCLE_WORD	Treat the image as a single word in a circle.
PSM_SINGLE_CHAR	Treat the image as a single character.
PSM_SPARSE_TEXT	Find as much text as possible in no particular order.
PSM_SPARSE_TEXT_OSD	Sparse text with orientation and script det.
PSM_RAW_LINE	Treat the image as a single text line, bypassing hacks that are Tesseract-specific.
PSM_COUNT	Number of enum entries.

Definition at line 157 of file publictypes.h.

                 {
  PSM_OSD_ONLY = 0,      
  PSM_AUTO_OSD = 1,      
  PSM_AUTO_ONLY = 2,     
  PSM_AUTO = 3,          
  PSM_SINGLE_COLUMN = 4, 
  PSM_SINGLE_BLOCK_VERT_TEXT = 5, 
  PSM_SINGLE_BLOCK = 6, 
  PSM_SINGLE_LINE = 7,  
  PSM_SINGLE_WORD = 8,  
  PSM_CIRCLE_WORD = 9,  
  PSM_SINGLE_CHAR = 10, 
  PSM_SPARSE_TEXT =
      11, 
  PSM_SPARSE_TEXT_OSD = 12, 
  PSM_RAW_LINE = 13, 
 
  PSM_COUNT 
};

◆ ParagraphJustification

enum tesseract::ParagraphJustification

JUSTIFICATION_UNKNOWN The alignment is not clearly one of the other options. This could happen for example if there are only one or two lines of text or the text looks like source code or poetry.

NOTA BENE: Fully justified paragraphs (text aligned to both left and right margins) are marked by Tesseract with JUSTIFICATION_LEFT if their text is written with a left-to-right script and with JUSTIFICATION_RIGHT if their text is written in a right-to-left script.

Interpretation for text read in vertical lines: "Left" is wherever the starting reading position is.

JUSTIFICATION_LEFT Each line, except possibly the first, is flush to the same left tab stop.

JUSTIFICATION_CENTER The text lines of the paragraph are centered about a line going down through their middle of the text lines.

JUSTIFICATION_RIGHT Each line, except possibly the first, is flush to the same right tab stop.

Enumerator
JUSTIFICATION_UNKNOWN
JUSTIFICATION_LEFT
JUSTIFICATION_CENTER
JUSTIFICATION_RIGHT

Definition at line 246 of file publictypes.h.

                            {
  JUSTIFICATION_UNKNOWN,
  JUSTIFICATION_LEFT,
  JUSTIFICATION_CENTER,
  JUSTIFICATION_RIGHT,
};

◆ ParamType

enum tesseract::ParamType

Enumerator
VT_INTEGER
VT_BOOLEAN
VT_STRING
VT_DOUBLE

Definition at line 40 of file paramsd.h.

40{ VT_INTEGER, VT_BOOLEAN, VT_STRING, VT_DOUBLE };

tesseract::VT_INTEGER

@ VT_INTEGER

Definition: paramsd.h:40

tesseract::VT_STRING

@ VT_STRING

Definition: paramsd.h:40

tesseract::VT_BOOLEAN

@ VT_BOOLEAN

Definition: paramsd.h:40

tesseract::VT_DOUBLE

@ VT_DOUBLE

Definition: paramsd.h:40

◆ PartitionFindResult

enum tesseract::PartitionFindResult

Enumerator
PFR_OK
PFR_SKEW
PFR_NOISE

Definition at line 42 of file strokewidth.h.

                         {
  PFR_OK,   // Everything is OK.
  PFR_SKEW, // Skew was detected and rotated.
  PFR_NOISE // Noise was detected and removed.
};

◆ PermuterType

enum tesseract::PermuterType

Enumerator
NO_PERM
PUNC_PERM
TOP_CHOICE_PERM
LOWER_CASE_PERM
UPPER_CASE_PERM
NGRAM_PERM
NUMBER_PERM
USER_PATTERN_PERM
SYSTEM_DAWG_PERM
DOC_DAWG_PERM
USER_DAWG_PERM
FREQ_DAWG_PERM
COMPOUND_PERM
NUM_PERMUTER_TYPES

Definition at line 235 of file ratngs.h.

                  {
  NO_PERM,           // 0
  PUNC_PERM,         // 1
  TOP_CHOICE_PERM,   // 2
  LOWER_CASE_PERM,   // 3
  UPPER_CASE_PERM,   // 4
  NGRAM_PERM,        // 5
  NUMBER_PERM,       // 6
  USER_PATTERN_PERM, // 7
  SYSTEM_DAWG_PERM,  // 8
  DOC_DAWG_PERM,     // 9
  USER_DAWG_PERM,    // 10
  FREQ_DAWG_PERM,    // 11
  COMPOUND_PERM,     // 12
 
  NUM_PERMUTER_TYPES
};

◆ PICO_FEAT_PARAM_NAME

enum tesseract::PICO_FEAT_PARAM_NAME

Enumerator
PicoFeatY
PicoFeatDir
PicoFeatX

Definition at line 43 of file picofeat.h.

43{ PicoFeatY, PicoFeatDir, PicoFeatX } PICO_FEAT_PARAM_NAME;

tesseract::PICO_FEAT_PARAM_NAME

PICO_FEAT_PARAM_NAME

Definition: picofeat.h:43

tesseract::PicoFeatDir

@ PicoFeatDir

Definition: picofeat.h:43

tesseract::PicoFeatX

@ PicoFeatX

Definition: picofeat.h:43

tesseract::PicoFeatY

@ PicoFeatY

Definition: picofeat.h:43

◆ PITCH_TYPE

enum tesseract::PITCH_TYPE

Enumerator
PITCH_DUNNO
PITCH_DEF_FIXED
PITCH_MAYBE_FIXED
PITCH_DEF_PROP
PITCH_MAYBE_PROP
PITCH_CORR_FIXED
PITCH_CORR_PROP

Definition at line 47 of file blobbox.h.

                {
  PITCH_DUNNO,       // insufficient data
  PITCH_DEF_FIXED,   // definitely fixed
  PITCH_MAYBE_FIXED, // could be
  PITCH_DEF_PROP,
  PITCH_MAYBE_PROP,
  PITCH_CORR_FIXED,
  PITCH_CORR_PROP
};

◆ PolyBlockType

enum tesseract::PolyBlockType

Possible types for a POLY_BLOCK or ColPartition. Must be kept in sync with kPBColors in polyblk.cpp and PTIs*Type functions below, as well as kPolyBlockNames in layout_test.cc. Used extensively by ColPartition, and POLY_BLOCK.

Enumerator
PT_UNKNOWN
PT_FLOWING_TEXT
PT_HEADING_TEXT
PT_PULLOUT_TEXT
PT_EQUATION
PT_INLINE_EQUATION
PT_TABLE
PT_VERTICAL_TEXT
PT_CAPTION_TEXT
PT_FLOWING_IMAGE
PT_HEADING_IMAGE
PT_PULLOUT_IMAGE
PT_HORZ_LINE
PT_VERT_LINE
PT_NOISE
PT_COUNT

Definition at line 51 of file publictypes.h.

                   {
  PT_UNKNOWN,         // Type is not yet known. Keep as the first element.
  PT_FLOWING_TEXT,    // Text that lives inside a column.
  PT_HEADING_TEXT,    // Text that spans more than one column.
  PT_PULLOUT_TEXT,    // Text that is in a cross-column pull-out region.
  PT_EQUATION,        // Partition belonging to an equation region.
  PT_INLINE_EQUATION, // Partition has inline equation.
  PT_TABLE,           // Partition belonging to a table region.
  PT_VERTICAL_TEXT,   // Text-line runs vertically.
  PT_CAPTION_TEXT,    // Text that belongs to an image.
  PT_FLOWING_IMAGE,   // Image that lives inside a column.
  PT_HEADING_IMAGE,   // Image that spans more than one column.
  PT_PULLOUT_IMAGE,   // Image that is in a cross-column pull-out region.
  PT_HORZ_LINE,       // Horizontal Line.
  PT_VERT_LINE,       // Vertical Line.
  PT_NOISE,           // Lies outside of any column.
  PT_COUNT
};

◆ PROTOSTYLE

enum tesseract::PROTOSTYLE

Enumerator
spherical
elliptical
mixed
automatic

Definition at line 53 of file cluster.h.

53{ spherical, elliptical, mixed, automatic } PROTOSTYLE;

tesseract::PROTOSTYLE

PROTOSTYLE

Definition: cluster.h:53

tesseract::spherical

@ spherical

Definition: cluster.h:53

tesseract::mixed

@ mixed

Definition: cluster.h:53

tesseract::elliptical

@ elliptical

Definition: cluster.h:53

tesseract::automatic

@ automatic

Definition: cluster.h:53

◆ REJ_FLAGS

enum tesseract::REJ_FLAGS

Enumerator
R_TESS_FAILURE
R_SMALL_XHT
R_EDGE_CHAR
R_1IL_CONFLICT
R_POSTNN_1IL
R_REJ_CBLOB
R_MM_REJECT
R_BAD_REPETITION
R_POOR_MATCH
R_NOT_TESS_ACCEPTED
R_CONTAINS_BLANKS
R_BAD_PERMUTER
R_HYPHEN
R_DUBIOUS
R_NO_ALPHANUMS
R_MOSTLY_REJ
R_XHT_FIXUP
R_BAD_QUALITY
R_DOC_REJ
R_BLOCK_REJ
R_ROW_REJ
R_UNLV_REJ
R_NN_ACCEPT
R_HYPHEN_ACCEPT
R_MM_ACCEPT
R_QUALITY_ACCEPT
R_MINIMAL_REJ_ACCEPT

Definition at line 51 of file rejctmap.h.

               {
  /* Reject modes which are NEVER overridden */
  R_TESS_FAILURE,   // PERM Tess didn't classify
  R_SMALL_XHT,      // PERM Xht too small
  R_EDGE_CHAR,      // PERM Too close to edge of image
  R_1IL_CONFLICT,   // PERM 1Il confusion
  R_POSTNN_1IL,     // PERM 1Il unrejected by NN
  R_REJ_CBLOB,      // PERM Odd blob
  R_MM_REJECT,      // PERM Matrix match rejection (m's)
  R_BAD_REPETITION, // TEMP Repeated char which doesn't match trend
 
  /* Initial reject modes (pre NN_ACCEPT) */
  R_POOR_MATCH,        // TEMP Ray's original heuristic (Not used)
  R_NOT_TESS_ACCEPTED, // TEMP Tess didn't accept WERD
  R_CONTAINS_BLANKS,   // TEMP Tess failed on other chs in WERD
  R_BAD_PERMUTER,      // POTENTIAL Bad permuter for WERD
 
  /* Reject modes generated after NN_ACCEPT but before MM_ACCEPT */
  R_HYPHEN,       // TEMP Post NN dodgy hyphen or full stop
  R_DUBIOUS,      // TEMP Post NN dodgy chars
  R_NO_ALPHANUMS, // TEMP No alphanumerics in word after NN
  R_MOSTLY_REJ,   // TEMP Most of word rejected so rej the rest
  R_XHT_FIXUP,    // TEMP Xht tests unsure
 
  /* Reject modes generated after MM_ACCEPT but before QUALITY_ACCEPT */
  R_BAD_QUALITY, // TEMP Quality metrics bad for WERD
 
  /* Reject modes generated after QUALITY_ACCEPT but before MINIMAL_REJ accep*/
  R_DOC_REJ,   // TEMP Document rejection
  R_BLOCK_REJ, // TEMP Block rejection
  R_ROW_REJ,   // TEMP Row rejection
  R_UNLV_REJ,  // TEMP ~ turned to - or ^ turned to space
 
  /* Accept modes which occur between the above rejection groups */
  R_NN_ACCEPT,         // NN acceptance
  R_HYPHEN_ACCEPT,     // Hyphen acceptance
  R_MM_ACCEPT,         // Matrix match acceptance
  R_QUALITY_ACCEPT,    // Accept word in good quality doc
  R_MINIMAL_REJ_ACCEPT // Accept EVERYTHING except tess failures
};

◆ ROW_CATEGORY

enum tesseract::ROW_CATEGORY

Enumerator
ROW_ASCENDERS_FOUND
ROW_DESCENDERS_FOUND
ROW_UNKNOWN
ROW_INVALID

Definition at line 36 of file makerow.h.

                  {
  ROW_ASCENDERS_FOUND,
  ROW_DESCENDERS_FOUND,
  ROW_UNKNOWN,
  ROW_INVALID,
};

◆ ScriptPos

enum tesseract::ScriptPos

Enumerator
SP_NORMAL
SP_SUBSCRIPT
SP_SUPERSCRIPT
SP_DROPCAP

Definition at line 254 of file ratngs.h.

254{ SP_NORMAL, SP_SUBSCRIPT, SP_SUPERSCRIPT, SP_DROPCAP };

tesseract::SP_SUBSCRIPT

@ SP_SUBSCRIPT

Definition: ratngs.h:254

tesseract::SP_DROPCAP

@ SP_DROPCAP

Definition: ratngs.h:254

tesseract::SP_NORMAL

@ SP_NORMAL

Definition: ratngs.h:254

tesseract::SP_SUPERSCRIPT

@ SP_SUPERSCRIPT

Definition: ratngs.h:254

◆ SerializeAmount

enum tesseract::SerializeAmount

Enumerator
LIGHT
NO_BEST_TRAINER
FULL

Definition at line 60 of file lstmtrainer.h.

                     {
  LIGHT,           // Minimal data for remote training.
  NO_BEST_TRAINER, // Save an empty vector in place of best_trainer_.
  FULL,            // All data including best_trainer_.
};

◆ SetParamConstraint

enum tesseract::SetParamConstraint

Enumerator
SET_PARAM_CONSTRAINT_NONE
SET_PARAM_CONSTRAINT_DEBUG_ONLY
SET_PARAM_CONSTRAINT_NON_DEBUG_ONLY
SET_PARAM_CONSTRAINT_NON_INIT_ONLY

Definition at line 39 of file params.h.

                        {
  SET_PARAM_CONSTRAINT_NONE,
  SET_PARAM_CONSTRAINT_DEBUG_ONLY,
  SET_PARAM_CONSTRAINT_NON_DEBUG_ONLY,
  SET_PARAM_CONSTRAINT_NON_INIT_ONLY,
};

◆ SpacingNeighbourhood

enum tesseract::SpacingNeighbourhood

Enumerator
PN_ABOVE2
PN_ABOVE1
PN_UPPER
PN_LOWER
PN_BELOW1
PN_BELOW2
PN_COUNT

Definition at line 42 of file colpartition.cpp.

                          {
  PN_ABOVE2,
  PN_ABOVE1,
  PN_UPPER,
  PN_LOWER,
  PN_BELOW1,
  PN_BELOW2,
  PN_COUNT
};

◆ SpecialUnicharCodes

enum tesseract::SpecialUnicharCodes

Enumerator
UNICHAR_SPACE
UNICHAR_JOINED
UNICHAR_BROKEN
SPECIAL_UNICHAR_CODES_COUNT

Definition at line 35 of file unicharset.h.

                         {
  UNICHAR_SPACE,
  UNICHAR_JOINED,
  UNICHAR_BROKEN,
 
  SPECIAL_UNICHAR_CODES_COUNT
};

◆ StrongScriptDirection

enum tesseract::StrongScriptDirection

Enumerator
DIR_NEUTRAL
DIR_LEFT_TO_RIGHT
DIR_RIGHT_TO_LEFT
DIR_MIX

Definition at line 41 of file unichar.h.

                           {
  DIR_NEUTRAL = 0,       // Text contains only neutral characters.
  DIR_LEFT_TO_RIGHT = 1, // Text contains no Right-to-Left characters.
  DIR_RIGHT_TO_LEFT = 2, // Text contains no Left-to-Right characters.
  DIR_MIX = 3,           // Text contains a mixture of left-to-right
                         // and right-to-left characters.
};

◆ SubTrainerResult

enum tesseract::SubTrainerResult

Enumerator
STR_NONE
STR_UPDATED
STR_REPLACED

Definition at line 67 of file lstmtrainer.h.

                      {
  STR_NONE,    // Did nothing as not good enough.
  STR_UPDATED, // Subtrainer was updated, but didn't replace *this.
  STR_REPLACED // Subtrainer replaced *this.
};

◆ SVEventType

enum tesseract::SVEventType

Enumerator
SVET_DESTROY
SVET_EXIT
SVET_CLICK
SVET_SELECTION
SVET_INPUT
SVET_MOUSE
SVET_MOTION
SVET_HOVER
SVET_POPUP
SVET_MENU
SVET_ANY
SVET_COUNT

Definition at line 53 of file scrollview.h.

                 {
  SVET_DESTROY,   // Window has been destroyed by user.
  SVET_EXIT,      // User has destroyed the last window by clicking on the 'X'.
  SVET_CLICK,     // Left button pressed.
  SVET_SELECTION, // Left button selection.
  SVET_INPUT,     // There is some input (single key or a whole string).
  SVET_MOUSE,     // The mouse has moved with a button pressed.
  SVET_MOTION,    // The mouse has moved with no button pressed.
  SVET_HOVER,     // The mouse has stayed still for a second.
  SVET_POPUP,     // A command selected through a popup menu.
  SVET_MENU,      // A command selected through the menubar.
  SVET_ANY,       // Any of the above.
 
  SVET_COUNT // Array sizing.
};

◆ SWITCH_TYPE

enum tesseract::SWITCH_TYPE

Enumerator
StartSwitch
EndSwitch
LastSwitch

Definition at line 69 of file intproto.cpp.

69{ StartSwitch, EndSwitch, LastSwitch } SWITCH_TYPE;

tesseract::SWITCH_TYPE

SWITCH_TYPE

Definition: intproto.cpp:69

tesseract::LastSwitch

@ LastSwitch

Definition: intproto.cpp:69

tesseract::EndSwitch

@ EndSwitch

Definition: intproto.cpp:69

tesseract::StartSwitch

@ StartSwitch

Definition: intproto.cpp:69

◆ TabAlignment

enum tesseract::TabAlignment

Enumerator
TA_LEFT_ALIGNED
TA_LEFT_RAGGED
TA_CENTER_JUSTIFIED
TA_RIGHT_ALIGNED
TA_RIGHT_RAGGED
TA_SEPARATOR
TA_COUNT

Definition at line 41 of file tabvector.h.

                  {
  TA_LEFT_ALIGNED,
  TA_LEFT_RAGGED,
  TA_CENTER_JUSTIFIED,
  TA_RIGHT_ALIGNED,
  TA_RIGHT_RAGGED,
  TA_SEPARATOR,
  TA_COUNT
};

◆ TabType

enum tesseract::TabType

Enumerator
TT_NONE
TT_DELETED
TT_MAYBE_RAGGED
TT_MAYBE_ALIGNED
TT_CONFIRMED
TT_VLINE

Definition at line 61 of file blobbox.h.

             {
  TT_NONE,          // Not a tab.
  TT_DELETED,       // Not a tab after detailed analysis.
  TT_MAYBE_RAGGED,  // Initial designation of a tab-stop candidate.
  TT_MAYBE_ALIGNED, // Initial designation of a tab-stop candidate.
  TT_CONFIRMED,     // Aligned with neighbours.
  TT_VLINE          // Detected as a vertical line.
};

◆ TessdataType

enum tesseract::TessdataType

Enumerator
TESSDATA_LANG_CONFIG
TESSDATA_UNICHARSET
TESSDATA_AMBIGS
TESSDATA_INTTEMP
TESSDATA_PFFMTABLE
TESSDATA_NORMPROTO
TESSDATA_PUNC_DAWG
TESSDATA_SYSTEM_DAWG
TESSDATA_NUMBER_DAWG
TESSDATA_FREQ_DAWG
TESSDATA_FIXED_LENGTH_DAWGS
TESSDATA_CUBE_UNICHARSET
TESSDATA_CUBE_SYSTEM_DAWG
TESSDATA_SHAPE_TABLE
TESSDATA_BIGRAM_DAWG
TESSDATA_UNAMBIG_DAWG
TESSDATA_PARAMS_MODEL
TESSDATA_LSTM
TESSDATA_LSTM_PUNC_DAWG
TESSDATA_LSTM_SYSTEM_DAWG
TESSDATA_LSTM_NUMBER_DAWG
TESSDATA_LSTM_UNICHARSET
TESSDATA_LSTM_RECODER
TESSDATA_VERSION
TESSDATA_NUM_ENTRIES

Definition at line 58 of file tessdatamanager.h.

                  {
  TESSDATA_LANG_CONFIG,        // 0
  TESSDATA_UNICHARSET,         // 1
  TESSDATA_AMBIGS,             // 2
  TESSDATA_INTTEMP,            // 3
  TESSDATA_PFFMTABLE,          // 4
  TESSDATA_NORMPROTO,          // 5
  TESSDATA_PUNC_DAWG,          // 6
  TESSDATA_SYSTEM_DAWG,        // 7
  TESSDATA_NUMBER_DAWG,        // 8
  TESSDATA_FREQ_DAWG,          // 9
  TESSDATA_FIXED_LENGTH_DAWGS, // 10  // deprecated
  TESSDATA_CUBE_UNICHARSET,    // 11  // deprecated
  TESSDATA_CUBE_SYSTEM_DAWG,   // 12  // deprecated
  TESSDATA_SHAPE_TABLE,        // 13
  TESSDATA_BIGRAM_DAWG,        // 14
  TESSDATA_UNAMBIG_DAWG,       // 15
  TESSDATA_PARAMS_MODEL,       // 16
  TESSDATA_LSTM,               // 17
  TESSDATA_LSTM_PUNC_DAWG,     // 18
  TESSDATA_LSTM_SYSTEM_DAWG,   // 19
  TESSDATA_LSTM_NUMBER_DAWG,   // 20
  TESSDATA_LSTM_UNICHARSET,    // 21
  TESSDATA_LSTM_RECODER,       // 22
  TESSDATA_VERSION,            // 23
 
  TESSDATA_NUM_ENTRIES
};

◆ TessErrorLogCode

enum tesseract::TessErrorLogCode

Enumerator
DBG
TESSLOG
TESSEXIT
ABORT

Definition at line 27 of file errcode.h.

                      {
  DBG = -1,     /*log without alert */
  TESSLOG = 0,  /*alert user */
  TESSEXIT = 1, /*exit after error */
  ABORT = 2     /*abort after error */
};

◆ TextlineOrder

enum tesseract::TextlineOrder

The text lines are read in the given sequence.

In English, the order is top-to-bottom. In Chinese, vertical text lines are read right-to-left. Mongolian is written in vertical columns top to bottom like Chinese, but the lines order left-to right.

Note that only some combinations make sense. For example, WRITING_DIRECTION_LEFT_TO_RIGHT implies TEXTLINE_ORDER_TOP_TO_BOTTOM

Enumerator
TEXTLINE_ORDER_LEFT_TO_RIGHT
TEXTLINE_ORDER_RIGHT_TO_LEFT
TEXTLINE_ORDER_TOP_TO_BOTTOM

Definition at line 146 of file publictypes.h.

                   {
  TEXTLINE_ORDER_LEFT_TO_RIGHT = 0,
  TEXTLINE_ORDER_RIGHT_TO_LEFT = 1,
  TEXTLINE_ORDER_TOP_TO_BOTTOM = 2,
};

◆ TextModelInputType

enum tesseract::TextModelInputType

Enumerator
PCONT
PSTART
PNONE

Definition at line 31 of file paragraphs_test.cc.

                        {
  PCONT = 0,  // Continuation line of a paragraph (default).
  PSTART = 1, // First line of a paragraph.
  PNONE = 2,  // Not a paragraph line.
};

◆ ThresholdMethod

enum class tesseract::ThresholdMethod

strong

Enumerator
Otsu
LeptonicaOtsu
Sauvola
Max

Definition at line 30 of file thresholder.h.

                           {
  Otsu,          // Tesseract's legacy Otsu
  LeptonicaOtsu, // Leptonica's Otsu
  Sauvola,       // Leptonica's Sauvola
  Max,           // Number of Thresholding methods
};

◆ TopNState

enum tesseract::TopNState

Enumerator
TN_TOP2
TN_TOPN
TN_ALSO_RAN
TN_COUNT

Definition at line 84 of file recodebeam.h.

               {
  TN_TOP2,     // Winner or 2nd.
  TN_TOPN,     // Runner up in top-n, but not 1st or 2nd.
  TN_ALSO_RAN, // Not in the top-n.
  TN_COUNT
};

◆ Trainability

enum tesseract::Trainability

Enumerator
TRAINABLE
PERFECT
UNENCODABLE
HI_PRECISION_ERR
NOT_BOXED

Definition at line 51 of file lstmtrainer.h.

                  {
  TRAINABLE,        // Non-zero delta error.
  PERFECT,          // Zero delta error.
  UNENCODABLE,      // Not trainable due to coding/alignment trouble.
  HI_PRECISION_ERR, // Hi confidence disagreement.
  NOT_BOXED,        // Early in training and has no character boxes.
};

◆ TrainingFlags

enum tesseract::TrainingFlags

Enumerator
TF_INT_MODE
TF_COMPRESS_UNICHARSET

Definition at line 44 of file lstmrecognizer.h.

                   {
  TF_INT_MODE = 1,
  TF_COMPRESS_UNICHARSET = 64,
};

◆ TrainingState

enum tesseract::TrainingState

Enumerator
TS_DISABLED
TS_ENABLED
TS_TEMP_DISABLE
TS_RE_ENABLE

Definition at line 90 of file network.h.

                   {
  // Valid states of training_.
  TS_DISABLED,     // Disabled permanently.
  TS_ENABLED,      // Enabled for backprop and to write a training dump.
                   // Re-enable from ANY disabled state.
  TS_TEMP_DISABLE, // Temporarily disabled to write a recognition dump.
  // Valid only for SetEnableTraining.
  TS_RE_ENABLE, // Re-Enable from TS_TEMP_DISABLE, but not TS_DISABLED.
};

◆ UnicodeNormMode

enum class tesseract::UnicodeNormMode

strong

Enumerator
kNFD
kNFC
kNFKD
kNFKC

Definition at line 34 of file normstrngs.h.

                           {
  kNFD,
  kNFC,
  kNFKD,
  kNFKC,
};

◆ ViramaScript

enum class tesseract::ViramaScript : char32

strong

Enumerator
kNonVirama
kDevanagari
kBengali
kGurmukhi
kGujarati
kOriya
kTamil
kTelugu
kKannada
kMalayalam
kSinhala
kMyanmar
kKhmer
kJavanese

Definition at line 55 of file validator.h.

                        : char32 {
  kNonVirama = 0,
  kDevanagari = 0x900,
  kBengali = 0x980,
  kGurmukhi = 0xa00,
  kGujarati = 0xa80,
  kOriya = 0xb00,
  kTamil = 0xb80,
  kTelugu = 0xc00,
  kKannada = 0xc80,
  kMalayalam = 0xd00,
  kSinhala = 0xd80,
  kMyanmar = 0x1000,
  kKhmer = 0x1780,
  kJavanese = 0xa980,
};

◆ WERD_FLAGS

enum tesseract::WERD_FLAGS

Enumerator
W_SEGMENTED	correctly segmented
W_ITALIC	italic text
W_BOLD	bold text
W_BOL	start of line
W_EOL	end of line
W_NORMALIZED	flags
W_SCRIPT_HAS_XHEIGHT	x-height concept makes sense.
W_SCRIPT_IS_LATIN	Special case latin for y. splitting.
W_DONT_CHOP	fixed pitch chopped
W_REP_CHAR	repeated character
W_FUZZY_SP	fuzzy space
W_FUZZY_NON	fuzzy nonspace
W_INVERSE	white on black

Definition at line 30 of file werd.h.

                {
  W_SEGMENTED,          
  W_ITALIC,             
  W_BOLD,               
  W_BOL,                
  W_EOL,                
  W_NORMALIZED,         
  W_SCRIPT_HAS_XHEIGHT, 
  W_SCRIPT_IS_LATIN,    
  W_DONT_CHOP,          
  W_REP_CHAR,           
  W_FUZZY_SP,           
  W_FUZZY_NON,          
  W_INVERSE             
};

◆ WritingDirection

enum tesseract::WritingDirection

The grapheme clusters within a line of text are laid out logically in this direction, judged when looking at the text line rotated so that its Orientation is "page up".

For English text, the writing direction is left-to-right. For the Chinese text in the above example, the writing direction is top-to-bottom.

Enumerator
WRITING_DIRECTION_LEFT_TO_RIGHT
WRITING_DIRECTION_RIGHT_TO_LEFT
WRITING_DIRECTION_TOP_TO_BOTTOM

Definition at line 129 of file publictypes.h.

                      {
  WRITING_DIRECTION_LEFT_TO_RIGHT = 0,
  WRITING_DIRECTION_RIGHT_TO_LEFT = 1,
  WRITING_DIRECTION_TOP_TO_BOTTOM = 2,
};

◆ XHeightConsistencyEnum

enum tesseract::XHeightConsistencyEnum

Enumerator
XH_GOOD
XH_SUBNORMAL
XH_INCONSISTENT

Definition at line 81 of file dict.h.

81{ XH_GOOD, XH_SUBNORMAL, XH_INCONSISTENT };

tesseract::XH_GOOD

@ XH_GOOD

Definition: dict.h:81

tesseract::XH_SUBNORMAL

@ XH_SUBNORMAL

Definition: dict.h:81

tesseract::XH_INCONSISTENT

@ XH_INCONSISTENT

Definition: dict.h:81

Function Documentation

◆ AccumulateVector()

void tesseract::AccumulateVector	(	int	n,
		const TFloat *	src,
		TFloat *	dest
	)

inline

Definition at line 215 of file functions.h.

                                                                     {
  for (int i = 0; i < n; ++i) {
    dest[i] += src[i];
  }
}

◆ ActualOutlineLength()

float tesseract::ActualOutlineLength ( FEATURE Feature )

Return the length of the outline in baseline normalized form.

Definition at line 27 of file normfeat.cpp.

                                           {
  return (Feature->Params[CharNormLength] * LENGTH_COMPRESSION);
}

◆ AddAdaptedClass()

void tesseract::AddAdaptedClass	(	ADAPT_TEMPLATES_STRUCT *	Templates,
		ADAPT_CLASS_STRUCT *	Class,
		CLASS_ID	ClassId
	)

This routine adds a new adapted class to an existing set of adapted templates.

Parameters

Templates	set of templates to add new class to
Class	new class to add to templates
ClassId	class id to associate with new class

Note: Globals: none

Definition at line 41 of file adaptive.cpp.

                                                                                                     {
  assert(Templates != nullptr);
  assert(Class != nullptr);
  assert(LegalClassId(ClassId));
  assert(UnusedClassIdIn(Templates->Templates, ClassId));
  assert(Class->NumPermConfigs == 0);
 
  auto IntClass = new INT_CLASS_STRUCT(1, 1);
  AddIntClass(Templates->Templates, ClassId, IntClass);
 
  assert(Templates->Class[ClassId] == nullptr);
  Templates->Class[ClassId] = Class;
 
} /* AddAdaptedClass */

◆ AddConfigToClass()

TESS_API int tesseract::AddConfigToClass ( CLASS_TYPE Class )

Definition at line 49 of file protos.cpp.

                                       {
  int NewNumConfigs;
  int NewConfig;
  int MaxNumProtos;
  BIT_VECTOR Config;
 
  MaxNumProtos = Class->MaxNumProtos;
  ASSERT_HOST(MaxNumProtos <= MAX_NUM_PROTOS);
 
  if (Class->NumConfigs >= Class->MaxNumConfigs) {
    /* add configs in CONFIG_INCREMENT chunks at a time */
    NewNumConfigs =
        (((Class->MaxNumConfigs + CONFIG_INCREMENT) / CONFIG_INCREMENT) * CONFIG_INCREMENT);
 
    Class->Configurations.resize(NewNumConfigs);
    Class->MaxNumConfigs = NewNumConfigs;
  }
  NewConfig = Class->NumConfigs++;
  Config = NewBitVector(MAX_NUM_PROTOS);
  Class->Configurations[NewConfig] = Config;
  zero_all_bits(Config, WordsInVectorOfSize(MAX_NUM_PROTOS));
 
  return (NewConfig);
}

◆ AddFeature()

bool tesseract::AddFeature	(	FEATURE_SET	FeatureSet,
		FEATURE	Feature
	)

Add a feature to a feature set. If the feature set is already full, false is returned to indicate that the feature could not be added to the set; otherwise, true is returned.

Parameters

FeatureSet	set of features to add Feature to
Feature	feature to be added to FeatureSet

Returns: true if feature added to set, false if set is already full.

Definition at line 40 of file ocrfeatures.cpp.

                                                         {
  if (FeatureSet->NumFeatures >= FeatureSet->MaxNumFeatures) {
    delete Feature;
    return false;
  }
 
  FeatureSet->Features[FeatureSet->NumFeatures++] = Feature;
  return true;
} /* AddFeature */

◆ AddIntClass()

void tesseract::AddIntClass	(	INT_TEMPLATES_STRUCT *	Templates,
		CLASS_ID	ClassId,
		INT_CLASS_STRUCT *	Class
	)

This routine adds a new class structure to a set of templates. Classes have to be added to Templates in the order of increasing ClassIds.

Parameters

Templates	templates to add new class to
ClassId	class id to associate new class with
Class	class data structure to add to templates

Globals: none

Public Function Prototypes

Definition at line 220 of file intproto.cpp.

                                                                                             {
  int Pruner;
 
  assert(LegalClassId(ClassId));
  if (static_cast<unsigned>(ClassId) != Templates->NumClasses) {
    fprintf(stderr,
            "Please make sure that classes are added to templates"
            " in increasing order of ClassIds\n");
    exit(1);
  }
  ClassForClassId(Templates, ClassId) = Class;
  Templates->NumClasses++;
 
  if (Templates->NumClasses > MaxNumClassesIn(Templates)) {
    Pruner = Templates->NumClassPruners++;
    Templates->ClassPruners[Pruner] = new CLASS_PRUNER_STRUCT;
    memset(Templates->ClassPruners[Pruner], 0, sizeof(CLASS_PRUNER_STRUCT));
  }
} /* AddIntClass */

◆ AddIntConfig()

int tesseract::AddIntConfig ( INT_CLASS_STRUCT * Class )

This routine returns the index of the next free config in Class.

Parameters

Class class to add new configuration to

Globals: none

Returns: Index of next free config.

Definition at line 250 of file intproto.cpp.

                                          {
  int Index;
 
  assert(Class->NumConfigs < MAX_NUM_CONFIGS);
 
  Index = Class->NumConfigs++;
  Class->ConfigLengths[Index] = 0;
  return Index;
} /* AddIntConfig */

◆ AddIntProto()

int tesseract::AddIntProto ( INT_CLASS_STRUCT * Class )

This routine allocates the next free proto in Class and returns its index.

Parameters

Class class to add new proto to

Globals: none

Returns: Proto index of new proto.

Definition at line 270 of file intproto.cpp.

                                         {
  if (Class->NumProtos >= MAX_NUM_PROTOS) {
    return (NO_PROTO);
  }
 
  int Index = Class->NumProtos++;
 
  if (Class->NumProtos > MaxNumIntProtosIn(Class)) {
    int ProtoSetId = Class->NumProtoSets++;
    auto ProtoSet = new PROTO_SET_STRUCT;
    Class->ProtoSets[ProtoSetId] = ProtoSet;
    memset(ProtoSet, 0, sizeof(*ProtoSet));
 
    /* reallocate space for the proto lengths and install in class */
    Class->ProtoLengths.resize(MaxNumIntProtosIn(Class));
  }
 
  /* initialize proto so its length is zero and it isn't in any configs */
  Class->ProtoLengths[Index] = 0;
  auto Proto = ProtoForProtoId(Class, Index);
  for (uint32_t *Word = Proto->Configs; Word < Proto->Configs + WERDS_PER_CONFIG_VEC; *Word++ = 0) {
  }
 
  return (Index);
}

◆ AddOutlineFeatureToSet()

void tesseract::AddOutlineFeatureToSet	(	FPOINT *	Start,
		FPOINT *	End,
		FEATURE_SET	FeatureSet
	)

This routine computes the midpoint between Start and End to obtain the x,y position of the outline-feature. It also computes the direction from Start to End as the direction of the outline-feature and the distance from Start to End as the length of the outline-feature. This feature is then inserted into the next feature slot in FeatureSet.

Parameters

Start	starting point of outline-feature
End	ending point of outline-feature
FeatureSet	set to add outline-feature to

Definition at line 78 of file outfeat.cpp.

                                                                                {
  auto Feature = new FEATURE_STRUCT(&OutlineFeatDesc);
  Feature->Params[OutlineFeatDir] = NormalizedAngleFrom(Start, End, 1.0);
  Feature->Params[OutlineFeatX] = AverageOf(Start->x, End->x);
  Feature->Params[OutlineFeatY] = AverageOf(Start->y, End->y);
  Feature->Params[OutlineFeatLength] = DistanceBetween(*Start, *End);
  AddFeature(FeatureSet, Feature);
 
} /* AddOutlineFeatureToSet */

◆ AddProtoToClass()

TESS_API int tesseract::AddProtoToClass ( CLASS_TYPE Class )

Definition at line 82 of file protos.cpp.

                                      {
  if (Class->NumProtos >= Class->MaxNumProtos) {
    /* add protos in PROTO_INCREMENT chunks at a time */
    int NewNumProtos =
        (((Class->MaxNumProtos + PROTO_INCREMENT) / PROTO_INCREMENT) * PROTO_INCREMENT);
 
    Class->Prototypes.resize(NewNumProtos);
 
    Class->MaxNumProtos = NewNumProtos;
    ASSERT_HOST(NewNumProtos <= MAX_NUM_PROTOS);
  }
  int NewProto = Class->NumProtos++;
  ASSERT_HOST(Class->NumProtos <= MAX_NUM_PROTOS);
  return (NewProto);
}

◆ AddProtoToClassPruner()

void tesseract::AddProtoToClassPruner	(	PROTO_STRUCT *	Proto,
		CLASS_ID	ClassId,
		INT_TEMPLATES_STRUCT *	Templates
	)

This routine adds Proto to the class pruning tables for the specified class in Templates.

Globals:

classify_num_cp_levels number of levels used in the class pruner
Parameters

Proto floating-pt proto to add to class pruner

ClassId class id corresponding to Proto

Templates set of templates containing class pruner

Definition at line 306 of file intproto.cpp.

{
  CLASS_PRUNER_STRUCT *Pruner;
  uint32_t ClassMask;
  uint32_t ClassCount;
  uint32_t WordIndex;
  int Level;
  float EndPad, SidePad, AnglePad;
  TABLE_FILLER TableFiller;
  FILL_SPEC FillSpec;
 
  Pruner = CPrunerFor(Templates, ClassId);
  WordIndex = CPrunerWordIndexFor(ClassId);
  ClassMask = CPrunerMaskFor(MAX_LEVEL, ClassId);
 
  for (Level = classify_num_cp_levels - 1; Level >= 0; Level--) {
    GetCPPadsForLevel(Level, &EndPad, &SidePad, &AnglePad);
    ClassCount = CPrunerMaskFor(Level, ClassId);
    InitTableFiller(EndPad, SidePad, AnglePad, Proto, &TableFiller);
 
    while (!FillerDone(&TableFiller)) {
      GetNextFill(&TableFiller, &FillSpec);
      DoFill(&FillSpec, Pruner, ClassMask, ClassCount, WordIndex);
    }
  }
} /* AddProtoToClassPruner */

◆ AddProtoToProtoPruner()

void tesseract::AddProtoToProtoPruner	(	PROTO_STRUCT *	Proto,
		int	ProtoId,
		INT_CLASS_STRUCT *	Class,
		bool	debug
	)

This routine updates the proto pruner lookup tables for Class to include a new proto identified by ProtoId and described by Proto.

Parameters

Proto	floating-pt proto to be added to proto pruner
ProtoId	id of proto
Class	integer class that contains desired proto pruner
debug	debug flag

Note: Globals: none

Definition at line 344 of file intproto.cpp.

                                                                                                  {
  float X, Y, Length;
  float Pad;
 
  if (ProtoId >= Class->NumProtos) {
    tprintf("AddProtoToProtoPruner:assert failed: %d < %d", ProtoId, Class->NumProtos);
  }
  assert(ProtoId < Class->NumProtos);
 
  int Index = IndexForProto(ProtoId);
  auto ProtoSet = Class->ProtoSets[SetForProto(ProtoId)];
 
  float Angle = Proto->Angle;
#ifndef _WIN32
  assert(!std::isnan(Angle));
#endif
 
  FillPPCircularBits(ProtoSet->ProtoPruner[PRUNER_ANGLE], Index, Angle + ANGLE_SHIFT,
                     classify_pp_angle_pad / 360.0, debug);
 
  Angle *= 2.0 * M_PI;
  Length = Proto->Length;
 
  X = Proto->X + X_SHIFT;
  Pad = std::max(fabs(std::cos(Angle)) * (Length / 2.0 + classify_pp_end_pad * GetPicoFeatureLength()),
                 fabs(std::sin(Angle)) * (classify_pp_side_pad * GetPicoFeatureLength()));
 
  FillPPLinearBits(ProtoSet->ProtoPruner[PRUNER_X], Index, X, Pad, debug);
 
  Y = Proto->Y + Y_SHIFT;
  Pad = std::max(fabs(std::sin(Angle)) * (Length / 2.0 + classify_pp_end_pad * GetPicoFeatureLength()),
                 fabs(std::cos(Angle)) * (classify_pp_side_pad * GetPicoFeatureLength()));
 
  FillPPLinearBits(ProtoSet->ProtoPruner[PRUNER_Y], Index, Y, Pad, debug);
} /* AddProtoToProtoPruner */

◆ AddToNormProtosList()

TESS_COMMON_TRAINING_API void tesseract::AddToNormProtosList	(	LIST *	NormProtoList,
		LIST	ProtoList,
		const std::string &	CharName
	)

Definition at line 722 of file commontraining.cpp.

                                                                                         {
  auto LabeledProtoList = new LABELEDLISTNODE(CharName.c_str());
  iterate(ProtoList) {
    auto Proto = reinterpret_cast<PROTOTYPE *>(ProtoList->first_node());
    LabeledProtoList->List = push(LabeledProtoList->List, Proto);
  }
  *NormProtoList = push(*NormProtoList, LabeledProtoList);
}

◆ adjust_row_limits()

void tesseract::adjust_row_limits ( TO_BLOCK * block )

adjust_row_limits

Change the limits of rows to suit the default fractions.

Definition at line 1129 of file makerow.cpp.

  {
  TO_ROW *row; // current row
  float size;  // size of row
  float ymax;  // top of row
  float ymin;  // bottom of row
  TO_ROW_IT row_it = block->get_rows();
 
  if (textord_show_expanded_rows) {
    tprintf("Adjusting row limits for block(%d,%d)\n", block->block->pdblk.bounding_box().left(),
            block->block->pdblk.bounding_box().top());
  }
  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
    row = row_it.data();
    size = row->max_y() - row->min_y();
    if (textord_show_expanded_rows) {
      tprintf("Row at %f has min %f, max %f, size %f\n", row->intercept(), row->min_y(),
              row->max_y(), size);
    }
    size /= tesseract::CCStruct::kXHeightFraction + tesseract::CCStruct::kAscenderFraction +
            tesseract::CCStruct::kDescenderFraction;
    ymax = size * (tesseract::CCStruct::kXHeightFraction + tesseract::CCStruct::kAscenderFraction);
    ymin = -size * tesseract::CCStruct::kDescenderFraction;
    row->set_limits(row->intercept() + ymin, row->intercept() + ymax);
    row->merged = false;
  }
}

◆ allocNormProtos()

void tesseract::allocNormProtos ( )

◆ ApproximateOutline()

TESSLINE * tesseract::ApproximateOutline	(	bool	allow_detailed_fx,
		C_OUTLINE *	c_outline
	)

Definition at line 529 of file polyaprx.cpp.

                                                                           {
  EDGEPT stack_edgepts[FASTEDGELENGTH]; // converted path
  EDGEPT *edgepts = stack_edgepts;
 
  // Use heap memory if the stack buffer is not big enough.
  if (c_outline->pathlength() > FASTEDGELENGTH) {
    edgepts = new EDGEPT[c_outline->pathlength()];
  }
 
  // bounding box
  const auto &loop_box = c_outline->bounding_box();
  int32_t area = loop_box.height();
  if (!poly_wide_objects_better && loop_box.width() > area) {
    area = loop_box.width();
  }
  area *= area;
  edgesteps_to_edgepts(c_outline, edgepts);
  fix2(edgepts, area);
  EDGEPT *edgept = poly2(edgepts, area); // 2nd approximation.
  EDGEPT *startpt = edgept;
  EDGEPT *result = nullptr;
  EDGEPT *prev_result = nullptr;
  do {
    auto *new_pt = new EDGEPT;
    new_pt->pos = edgept->pos;
    new_pt->prev = prev_result;
    if (prev_result == nullptr) {
      result = new_pt;
    } else {
      prev_result->next = new_pt;
      new_pt->prev = prev_result;
    }
    if (allow_detailed_fx) {
      new_pt->src_outline = edgept->src_outline;
      new_pt->start_step = edgept->start_step;
      new_pt->step_count = edgept->step_count;
    }
    prev_result = new_pt;
    edgept = edgept->next;
  } while (edgept != startpt);
  prev_result->next = result;
  result->prev = prev_result;
  if (edgepts != stack_edgepts) {
    delete[] edgepts;
  }
  return TESSLINE::BuildFromOutlineList(result);
}

◆ AsciiLikelyListItem()

TESS_API bool tesseract::AsciiLikelyListItem ( const std::string & word )

Definition at line 282 of file paragraphs.cpp.

                                                {
  return LikelyListMark(word) || LikelyListNumeral(word);
}

◆ AsciiToRowInfo()

void tesseract::AsciiToRowInfo	(	const char *	text,
		int	row_number,
		RowInfo *	info
	)

Definition at line 49 of file paragraphs_test.cc.

                                                                     {
  const int kCharWidth = 10;
  const int kLineSpace = 30;
  info->text = text;
  info->has_leaders = strstr(text, "...") != nullptr || strstr(text, ". . .") != nullptr;
  info->has_drop_cap = false;
  info->pix_ldistance = info->pix_rdistance = 0;
  info->average_interword_space = kCharWidth;
  info->pix_xheight = kCharWidth;
  info->lword_text = info->rword_text = "";
  info->ltr = true;
 
  std::vector<std::string> words = split(text, ' ');
  info->num_words = words.size();
  if (info->num_words < 1) {
    return;
  }
 
  info->lword_text = words[0].c_str();
  info->rword_text = words[words.size() - 1].c_str();
  int lspace = 0;
  while (lspace < info->text.size() && text[lspace] == ' ') {
    lspace++;
  }
  int rspace = 0;
  while (rspace < info->text.size() && text[info->text.size() - rspace - 1] == ' ') {
    rspace++;
  }
 
  int top = -kLineSpace * row_number;
  int bottom = top - kLineSpace;
  int row_right = kCharWidth * info->text.size();
  int lword_width = kCharWidth * info->lword_text.size();
  int rword_width = kCharWidth * info->rword_text.size();
  info->pix_ldistance = lspace * kCharWidth;
  info->pix_rdistance = rspace * kCharWidth;
  info->lword_box = TBOX(info->pix_ldistance, bottom, info->pix_ldistance + lword_width, top);
  info->rword_box = TBOX(row_right - info->pix_rdistance - rword_width, bottom,
                         row_right - info->pix_rdistance, top);
  LeftWordAttributes(nullptr, nullptr, info->lword_text, &info->lword_indicates_list_item,
                     &info->lword_likely_starts_idea, &info->lword_likely_ends_idea);
  RightWordAttributes(nullptr, nullptr, info->rword_text, &info->rword_indicates_list_item,
                      &info->rword_likely_starts_idea, &info->rword_likely_ends_idea);
}

◆ ASSERT_FAILED()

constexpr ERRCODE tesseract::ASSERT_FAILED ( "Assert failed" )

constexpr

◆ assign_blobs_to_blocks2()

void tesseract::assign_blobs_to_blocks2	(	Image	pix,
		BLOCK_LIST *	blocks,
		TO_BLOCK_LIST *	port_blocks
	)

Definition at line 162 of file tordmain.cpp.

                                                         { // output list
  BLOCK_IT block_it = blocks;
  C_BLOB_IT blob_it;       // iterator
  BLOBNBOX_IT port_box_it; // iterator
                           // destination iterator
  TO_BLOCK_IT port_block_it = port_blocks;
 
  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
    auto block = block_it.data();
    auto port_block = new TO_BLOCK(block);
 
    // Convert the good outlines to block->blob_list
    port_box_it.set_to_list(&port_block->blobs);
    blob_it.set_to_list(block->blob_list());
    for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
      auto blob = blob_it.extract();
      auto newblob = new BLOBNBOX(blob); // Convert blob to BLOBNBOX.
      newblob->set_owns_cblob(true);
      SetBlobStrokeWidth(pix, newblob);
      port_box_it.add_after_then_move(newblob);
    }
 
    // Put the rejected outlines in block->noise_blobs, which allows them to
    // be reconsidered and sorted back into rows and recover outlines mistakenly
    // rejected.
    port_box_it.set_to_list(&port_block->noise_blobs);
    blob_it.set_to_list(block->reject_blobs());
    for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
      auto blob = blob_it.extract();
      auto newblob = new BLOBNBOX(blob); // Convert blob to BLOBNBOX.
      newblob->set_owns_cblob(true);
      SetBlobStrokeWidth(pix, newblob);
      port_box_it.add_after_then_move(newblob);
    }
 
    port_block_it.add_after_then_move(port_block);
  }
}

◆ assign_blobs_to_rows()

void tesseract::assign_blobs_to_rows	(	TO_BLOCK *	block,
		float *	gradient,
		int	pass,
		bool	reject_misses,
		bool	make_new_rows,
		bool	drawing_skew
	)

Definition at line 2272 of file makerow.cpp.

  {
  OVERLAP_STATE overlap_result; // what to do with it
  float ycoord;                 // current y
  float top, bottom;            // of blob
  float g_length = 1.0f;        // from gradient
  int16_t row_count;            // no of rows
  int16_t left_x;               // left edge
  int16_t last_x;               // previous edge
  float block_skew;             // y delta
  float smooth_factor;          // for new coords
  float near_dist;              // dist to nearest row
  ICOORD testpt;                // testing only
  BLOBNBOX *blob;               // current blob
  TO_ROW *row;                  // current row
  TO_ROW *dest_row = nullptr;   // row to put blob in
                                // iterators
  BLOBNBOX_IT blob_it = &block->blobs;
  TO_ROW_IT row_it = block->get_rows();
 
  ycoord =
      (block->block->pdblk.bounding_box().bottom() + block->block->pdblk.bounding_box().top()) /
      2.0f;
  if (gradient != nullptr) {
    g_length = std::sqrt(1 + *gradient * *gradient);
  }
#ifndef GRAPHICS_DISABLED
  if (drawing_skew) {
    to_win->SetCursor(block->block->pdblk.bounding_box().left(), ycoord);
  }
#endif
  testpt = ICOORD(textord_test_x, textord_test_y);
  blob_it.sort(blob_x_order);
  smooth_factor = 1.0;
  block_skew = 0.0f;
  row_count = row_it.length(); // might have rows
  if (!blob_it.empty()) {
    left_x = blob_it.data()->bounding_box().left();
  } else {
    left_x = block->block->pdblk.bounding_box().left();
  }
  last_x = left_x;
  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
    blob = blob_it.data();
    if (gradient != nullptr) {
      block_skew = (1 - 1 / g_length) * blob->bounding_box().bottom() +
                   *gradient / g_length * blob->bounding_box().left();
    } else if (blob->bounding_box().left() - last_x > block->line_size / 2 &&
               last_x - left_x > block->line_size * 2 && textord_interpolating_skew) {
      //                      tprintf("Interpolating skew from %g",block_skew);
      block_skew *= static_cast<float>(blob->bounding_box().left() - left_x) / (last_x - left_x);
      //                      tprintf("to %g\n",block_skew);
    }
    last_x = blob->bounding_box().left();
    top = blob->bounding_box().top() - block_skew;
    bottom = blob->bounding_box().bottom() - block_skew;
#ifndef GRAPHICS_DISABLED
    if (drawing_skew) {
      to_win->DrawTo(blob->bounding_box().left(), ycoord + block_skew);
    }
#endif
    if (!row_it.empty()) {
      for (row_it.move_to_first(); !row_it.at_last() && row_it.data()->min_y() > top;
           row_it.forward()) {
      }
      row = row_it.data();
      if (row->min_y() <= top && row->max_y() >= bottom) {
        // any overlap
        dest_row = row;
        overlap_result = most_overlapping_row(&row_it, dest_row, top, bottom, block->line_size,
                                              blob->bounding_box().contains(testpt));
        if (overlap_result == NEW_ROW && !reject_misses) {
          overlap_result = ASSIGN;
        }
      } else {
        overlap_result = NEW_ROW;
        if (!make_new_rows) {
          near_dist = row_it.data_relative(-1)->min_y() - top;
          // below bottom
          if (bottom < row->min_y()) {
            if (row->min_y() - bottom <= (block->line_spacing - block->line_size) *
                                             tesseract::CCStruct::kDescenderFraction) {
              // done it
              overlap_result = ASSIGN;
              dest_row = row;
            }
          } else if (near_dist > 0 && near_dist < bottom - row->max_y()) {
            row_it.backward();
            dest_row = row_it.data();
            if (dest_row->min_y() - bottom <= (block->line_spacing - block->line_size) *
                                                  tesseract::CCStruct::kDescenderFraction) {
              // done it
              overlap_result = ASSIGN;
            }
          } else {
            if (top - row->max_y() <=
                (block->line_spacing - block->line_size) *
                    (textord_overlap_x + tesseract::CCStruct::kAscenderFraction)) {
              // done it
              overlap_result = ASSIGN;
              dest_row = row;
            }
          }
        }
      }
      if (overlap_result == ASSIGN) {
        dest_row->add_blob(blob_it.extract(), top, bottom, block->line_size);
      }
      if (overlap_result == NEW_ROW) {
        if (make_new_rows && top - bottom < block->max_blob_size) {
          dest_row = new TO_ROW(blob_it.extract(), top, bottom, block->line_size);
          row_count++;
          if (bottom > row_it.data()->min_y()) {
            row_it.add_before_then_move(dest_row);
          // insert in right place
          } else {
            row_it.add_after_then_move(dest_row);
          }
          smooth_factor = 1.0 / (row_count * textord_skew_lag + textord_skewsmooth_offset);
        } else {
          overlap_result = REJECT;
        }
      }
    } else if (make_new_rows && top - bottom < block->max_blob_size) {
      overlap_result = NEW_ROW;
      dest_row = new TO_ROW(blob_it.extract(), top, bottom, block->line_size);
      row_count++;
      row_it.add_after_then_move(dest_row);
      smooth_factor = 1.0 / (row_count * textord_skew_lag + textord_skewsmooth_offset2);
    } else {
      overlap_result = REJECT;
    }
    if (blob->bounding_box().contains(testpt) && textord_debug_blob) {
      if (overlap_result != REJECT) {
        tprintf("Test blob assigned to row at (%g,%g) on pass %d\n", dest_row->min_y(),
                dest_row->max_y(), pass);
      } else {
        tprintf("Test blob assigned to no row on pass %d\n", pass);
      }
    }
    if (overlap_result != REJECT) {
      while (!row_it.at_first() && row_it.data()->min_y() > row_it.data_relative(-1)->min_y()) {
        row = row_it.extract();
        row_it.backward();
        row_it.add_before_then_move(row);
      }
      while (!row_it.at_last() && row_it.data()->min_y() < row_it.data_relative(1)->min_y()) {
        row = row_it.extract();
        row_it.forward();
        // Keep rows in order.
        row_it.add_after_then_move(row);
      }
      BLOBNBOX_IT added_blob_it(dest_row->blob_list());
      added_blob_it.move_to_last();
      TBOX prev_box = added_blob_it.data_relative(-1)->bounding_box();
      if (dest_row->blob_list()->singleton() || !prev_box.major_x_overlap(blob->bounding_box())) {
        block_skew = (1 - smooth_factor) * block_skew +
                     smooth_factor * (blob->bounding_box().bottom() - dest_row->initial_min_y());
      }
    }
  }
  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
    if (row_it.data()->blob_list()->empty()) {
      delete row_it.extract(); // Discard empty rows.
    }
  }
}

◆ BAD_PARAMETER()

constexpr ERRCODE tesseract::BAD_PARAMETER ( "List parameter error" )

constexpr

◆ BADBLOCKLINE()

constexpr ERRCODE tesseract::BADBLOCKLINE ( "Y coordinate in block out of bounds" )

constexpr

◆ BADERRACTION()

constexpr ERRCODE tesseract::BADERRACTION ( "Illegal error action" )

constexpr

◆ blob_x_order()

int tesseract::blob_x_order	(	const void *	item1,
		const void *	item2
	)

Definition at line 2542 of file makerow.cpp.

                       {
  // converted ptr
  const BLOBNBOX *blob1 = *reinterpret_cast<const BLOBNBOX *const *>(item1);
  // converted ptr
  const BLOBNBOX *blob2 = *reinterpret_cast<const BLOBNBOX *const *>(item2);
 
  if (blob1->bounding_box().left() < blob2->bounding_box().left()) {
    return -1;
  } else if (blob1->bounding_box().left() > blob2->bounding_box().left()) {
    return 1;
  } else {
    return 0;
  }
}

◆ BlobMicroFeatures()

MICROFEATURES tesseract::BlobMicroFeatures	(	TBLOB *	Blob,
		const DENORM &	cn_denorm
	)

This routine extracts micro-features from the specified blob and returns a list of the micro-features. All micro-features are normalized according to the specified line statistics.

Parameters

Blob	blob to extract micro-features from
cn_denorm	control parameter to feature extractor

Returns: List of micro-features extracted from the blob.

Definition at line 54 of file mfx.cpp.

                                                                      {
  MICROFEATURES MicroFeatures;
  LIST Outlines;
  LIST RemainingOutlines;
 
  if (Blob != nullptr) {
    Outlines = ConvertBlob(Blob);
 
    RemainingOutlines = Outlines;
    iterate(RemainingOutlines) {
      auto Outline = static_cast<MFOUTLINE>(RemainingOutlines->first_node());
      CharNormalizeOutline(Outline, cn_denorm);
    }
 
    RemainingOutlines = Outlines;
    iterate(RemainingOutlines) {
      auto Outline = static_cast<MFOUTLINE>(RemainingOutlines->first_node());
      FindDirectionChanges(Outline, classify_min_slope, classify_max_slope);
      MarkDirectionChanges(Outline);
      MicroFeatures = ConvertToMicroFeatures(Outline, MicroFeatures);
    }
    FreeOutlines(Outlines);
  }
  return MicroFeatures;
} /* BlobMicroFeatures */

◆ BlobToTrainingSample()

TrainingSample * tesseract::BlobToTrainingSample	(	const TBLOB &	blob,
		bool	nonlinear_norm,
		INT_FX_RESULT_STRUCT *	fx_info,
		std::vector< INT_FEATURE_STRUCT > *	bl_features
	)

Definition at line 79 of file intfx.cpp.

                                                                                 {
  std::vector<INT_FEATURE_STRUCT> cn_features;
  Classify::ExtractFeatures(blob, nonlinear_norm, bl_features, &cn_features, fx_info, nullptr);
  // TODO(rays) Use blob->PreciseBoundingBox() instead.
  TBOX box = blob.bounding_box();
  TrainingSample *sample = nullptr;
  int num_features = fx_info->NumCN;
  if (num_features > 0) {
    sample = TrainingSample::CopyFromFeatures(*fx_info, box, &cn_features[0], num_features);
  }
  if (sample != nullptr) {
    // Set the bounding box (in original image coordinates) in the sample.
    TPOINT topleft, botright;
    topleft.x = box.left();
    topleft.y = box.top();
    botright.x = box.right();
    botright.y = box.bottom();
    TPOINT original_topleft, original_botright;
    blob.denorm().DenormTransform(nullptr, topleft, &original_topleft);
    blob.denorm().DenormTransform(nullptr, botright, &original_botright);
    sample->set_bounding_box(
        TBOX(original_topleft.x, original_botright.y, original_botright.x, original_topleft.y));
  }
  return sample;
}

◆ block_edges()

void tesseract::block_edges	(	Image	t_pix,
		PDBLK *	block,
		C_OUTLINE_IT *	outline_it
	)

Definition at line 62 of file scanedg.cpp.

                                           {
  ICOORD bleft; // bounding box
  ICOORD tright;
  BLOCK_LINE_IT line_it = block; // line iterator
 
  int width = pixGetWidth(t_pix);
  int height = pixGetHeight(t_pix);
  int wpl = pixGetWpl(t_pix);
  // lines in progress
  std::unique_ptr<CRACKEDGE *[]> ptrline(new CRACKEDGE *[width + 1]);
  CRACKEDGE *free_cracks = nullptr;
 
  block->bounding_box(bleft, tright); // block box
  ASSERT_HOST(tright.x() <= width);
  ASSERT_HOST(tright.y() <= height);
  int block_width = tright.x() - bleft.x();
  for (int x = block_width; x >= 0; x--) {
    ptrline[x] = nullptr; //  no lines in progress
  }
 
  std::unique_ptr<uint8_t[]> bwline(new uint8_t[width]);
 
  const uint8_t margin = WHITE_PIX;
 
  for (int y = tright.y() - 1; y >= bleft.y() - 1; y--) {
    if (y >= bleft.y() && y < tright.y()) {
      // Get the binary pixels from the image.
      l_uint32 *line = pixGetData(t_pix) + wpl * (height - 1 - y);
      for (int x = 0; x < block_width; ++x) {
        bwline[x] = GET_DATA_BIT(line, x + bleft.x()) ^ 1;
      }
      make_margins(block, &line_it, bwline.get(), margin, bleft.x(), tright.x(), y);
    } else {
      memset(bwline.get(), margin, block_width * sizeof(bwline[0]));
    }
    line_edges(bleft.x(), y, block_width, margin, bwline.get(), ptrline.get(), &free_cracks,
               outline_it);
  }
 
  free_crackedges(free_cracks); // really free them
}

◆ BOOL_VAR_H() [1/40]

tesseract::BOOL_VAR_H ( devanagari_split_debugimage )

◆ BOOL_VAR_H() [2/40]

tesseract::BOOL_VAR_H ( disable_character_fragments )

◆ BOOL_VAR_H() [3/40]

tesseract::BOOL_VAR_H ( gapmap_debug )

◆ BOOL_VAR_H() [4/40]

tesseract::BOOL_VAR_H ( gapmap_no_isolated_quanta )

◆ BOOL_VAR_H() [5/40]

tesseract::BOOL_VAR_H ( gapmap_use_ends )

◆ BOOL_VAR_H() [6/40]

tesseract::BOOL_VAR_H ( textord_blockndoc_fixed )

◆ BOOL_VAR_H() [7/40]

tesseract::BOOL_VAR_H ( textord_blocksall_fixed )

◆ BOOL_VAR_H() [8/40]

tesseract::BOOL_VAR_H ( textord_blocksall_prop )

◆ BOOL_VAR_H() [9/40]

tesseract::BOOL_VAR_H ( textord_chopper_test )

◆ BOOL_VAR_H() [10/40]

tesseract::BOOL_VAR_H ( textord_debug_blob )

◆ BOOL_VAR_H() [11/40]

tesseract::BOOL_VAR_H ( textord_debug_pitch_metric )

◆ BOOL_VAR_H() [12/40]

tesseract::BOOL_VAR_H ( textord_debug_pitch_test )

◆ BOOL_VAR_H() [13/40]

tesseract::BOOL_VAR_H ( textord_debug_printable )

◆ BOOL_VAR_H() [14/40]

tesseract::BOOL_VAR_H ( textord_debug_xheights )

◆ BOOL_VAR_H() [15/40]

tesseract::BOOL_VAR_H ( textord_fast_pitch_test )

◆ BOOL_VAR_H() [16/40]

tesseract::BOOL_VAR_H ( textord_fix_makerow_bug )

◆ BOOL_VAR_H() [17/40]

tesseract::BOOL_VAR_H ( textord_fix_xheight_bug )

◆ BOOL_VAR_H() [18/40]

tesseract::BOOL_VAR_H ( textord_force_make_prop_words )

◆ BOOL_VAR_H() [19/40]

tesseract::BOOL_VAR_H ( textord_heavy_nr )

◆ BOOL_VAR_H() [20/40]

tesseract::BOOL_VAR_H ( textord_new_initial_xheight )

◆ BOOL_VAR_H() [21/40]

tesseract::BOOL_VAR_H ( textord_old_baselines )

◆ BOOL_VAR_H() [22/40]

tesseract::BOOL_VAR_H ( textord_old_xheight )

◆ BOOL_VAR_H() [23/40]

tesseract::BOOL_VAR_H ( textord_oldbl_debug )

◆ BOOL_VAR_H() [24/40]

tesseract::BOOL_VAR_H ( textord_parallel_baselines )

◆ BOOL_VAR_H() [25/40]

tesseract::BOOL_VAR_H ( textord_pitch_scalebigwords )

◆ BOOL_VAR_H() [26/40]

tesseract::BOOL_VAR_H ( textord_restore_underlines )

◆ BOOL_VAR_H() [27/40]

tesseract::BOOL_VAR_H ( textord_show_expanded_rows )

◆ BOOL_VAR_H() [28/40]

tesseract::BOOL_VAR_H ( textord_show_final_blobs )

◆ BOOL_VAR_H() [29/40]

tesseract::BOOL_VAR_H ( textord_show_final_rows )

◆ BOOL_VAR_H() [30/40]

tesseract::BOOL_VAR_H ( textord_show_fixed_cuts )

◆ BOOL_VAR_H() [31/40]

tesseract::BOOL_VAR_H ( textord_show_initial_rows )

◆ BOOL_VAR_H() [32/40]

tesseract::BOOL_VAR_H ( textord_show_initial_words )

◆ BOOL_VAR_H() [33/40]

tesseract::BOOL_VAR_H ( textord_show_page_cuts )

◆ BOOL_VAR_H() [34/40]

tesseract::BOOL_VAR_H ( textord_show_parallel_rows )

◆ BOOL_VAR_H() [35/40]

tesseract::BOOL_VAR_H ( textord_show_row_cuts )

◆ BOOL_VAR_H() [36/40]

tesseract::BOOL_VAR_H ( textord_straight_baselines )

◆ BOOL_VAR_H() [37/40]

tesseract::BOOL_VAR_H ( textord_test_landscape )

◆ BOOL_VAR_H() [38/40]

tesseract::BOOL_VAR_H ( wordrec_blob_pause )

◆ BOOL_VAR_H() [39/40]

tesseract::BOOL_VAR_H ( wordrec_display_all_blobs )

◆ BOOL_VAR_H() [40/40]

tesseract::BOOL_VAR_H ( wordrec_display_splits )

◆ box_next()

TBOX tesseract::box_next ( BLOBNBOX_IT * it )

Definition at line 638 of file blobbox.cpp.

  {
  BLOBNBOX *blob; // current blob
  TBOX result;    // total box
 
  blob = it->data();
  result = blob->bounding_box();
  do {
    it->forward();
    blob = it->data();
    if (blob->cblob() == nullptr) {
      // was pre-chopped
      result += blob->bounding_box();
    }
  }
  // until next real blob
  while ((blob->cblob() == nullptr) || blob->joined_to_prev());
  return result;
}

◆ box_next_pre_chopped()

TBOX tesseract::box_next_pre_chopped ( BLOBNBOX_IT * it )

Definition at line 667 of file blobbox.cpp.

  {
  BLOBNBOX *blob; // current blob
  TBOX result;    // total box
 
  blob = it->data();
  result = blob->bounding_box();
  do {
    it->forward();
    blob = it->data();
  }
  // until next real blob
  while (blob->joined_to_prev());
  return result;
}

◆ Bucket16For()

uint16_t tesseract::Bucket16For	(	float	param,
		float	offset,
		int	num_buckets
	)

Definition at line 389 of file intproto.cpp.

                                                                 {
  int bucket = IntCastRounded(MapParam(param, offset, num_buckets));
  return static_cast<uint16_t>(ClipToRange<int>(bucket, 0, num_buckets - 1));
}

◆ Bucket8For()

uint8_t tesseract::Bucket8For	(	float	param,
		float	offset,
		int	num_buckets
	)

Returns a quantized bucket for the given param shifted by offset, notionally (param + offset) * num_buckets, but clipped and casted to the appropriate type.

Definition at line 385 of file intproto.cpp.

                                                               {
  int bucket = IntCastRounded(MapParam(param, offset, num_buckets));
  return static_cast<uint8_t>(ClipToRange<int>(bucket, 0, num_buckets - 1));
}

◆ BucketEnd()

float tesseract::BucketEnd	(	int	Bucket,
		float	Offset,
		int	NumBuckets
	)

This routine returns the parameter value which corresponds to the end of the specified bucket. The bucket number should have been generated using the BucketFor() function with parameters Offset and NumBuckets.

Parameters

Bucket	bucket whose end is to be computed
Offset	offset used to map params to buckets
NumBuckets	total number of buckets

Returns: Param value corresponding to end position of Bucket.

Note: Globals: none

Definition at line 1007 of file intproto.cpp.

                                                          {
  return static_cast<float>(Bucket + 1) / NumBuckets - Offset;
} /* BucketEnd */

◆ BucketStart()

float tesseract::BucketStart	(	int	Bucket,
		float	Offset,
		int	NumBuckets
	)

This routine returns the parameter value which corresponds to the beginning of the specified bucket. The bucket number should have been generated using the BucketFor() function with parameters Offset and NumBuckets.

Parameters

Bucket	bucket whose start is to be computed
Offset	offset used to map params to buckets
NumBuckets	total number of buckets

Returns: Param value corresponding to start position of Bucket.

Note: Globals: none

Definition at line 991 of file intproto.cpp.

                                                            {
  return static_cast<float>(Bucket) / NumBuckets - Offset;
 
} /* BucketStart */

◆ CanonicalizeDetectionResults()

void tesseract::CanonicalizeDetectionResults	(	std::vector< PARA * > *	row_owners,
		PARA_LIST *	paragraphs
	)

Definition at line 2288 of file paragraphs.cpp.

                                                                                        {
  std::vector<PARA *> &rows = *row_owners;
  paragraphs->clear();
  PARA_IT out(paragraphs);
  PARA *formerly_null = nullptr;
  for (unsigned i = 0; i < rows.size(); i++) {
    if (rows[i] == nullptr) {
      if (i == 0 || rows[i - 1] != formerly_null) {
        rows[i] = formerly_null = new PARA();
      } else {
        rows[i] = formerly_null;
        continue;
      }
    } else if (i > 0 && rows[i - 1] == rows[i]) {
      continue;
    }
    out.add_after_then_move(rows[i]);
  }
}

◆ CANTOPENFILE()

constexpr ERRCODE tesseract::CANTOPENFILE ( "Can't open file" )

constexpr

◆ ChangeDirection()

void tesseract::ChangeDirection	(	MFOUTLINE	Start,
		MFOUTLINE	End,
		DIRECTION	Direction
	)

Change the direction of every vector in the specified outline segment to Direction. The segment to be changed starts at Start and ends at End. Note that the previous direction of End must also be changed to reflect the change in direction of the point before it.

Parameters

Start	defines start of segment of outline to be modified
End	defines end of segment of outline to be modified
Direction	new direction to assign to segment

Definition at line 280 of file mfoutline.cpp.

                                                                          {
  MFOUTLINE Current;
 
  for (Current = Start; Current != End; Current = NextPointAfter(Current)) {
    PointAt(Current)->Direction = Direction;
  }
 
  PointAt(End)->PreviousDirection = Direction;
 
} /* ChangeDirection */

◆ CharNormalizeOutline()

void tesseract::CharNormalizeOutline	(	MFOUTLINE	Outline,
		const DENORM &	cn_denorm
	)

This routine normalizes each point in Outline by translating it to the specified center and scaling it anisotropically according to the given scale factors.

Parameters

Outline	outline to be character normalized
cn_denorm

Definition at line 298 of file mfoutline.cpp.

                                                                      {
  MFOUTLINE First, Current;
  MFEDGEPT *CurrentPoint;
 
  if (Outline == NIL_LIST) {
    return;
  }
 
  First = Outline;
  Current = First;
  do {
    CurrentPoint = PointAt(Current);
    FCOORD pos(CurrentPoint->Point.x, CurrentPoint->Point.y);
    cn_denorm.LocalNormTransform(pos, &pos);
    CurrentPoint->Point.x = (pos.x() - UINT8_MAX / 2) * MF_SCALE_FACTOR;
    CurrentPoint->Point.y = (pos.y() - UINT8_MAX / 2) * MF_SCALE_FACTOR;
 
    Current = NextPointAfter(Current);
  } while (Current != First);
 
} /* CharNormalizeOutline */

◆ check_path_legal()

ScrollView::Color tesseract::check_path_legal ( CRACKEDGE * start )

Definition at line 67 of file edgloop.cpp.

  {
  int lastchain;     // last chain code
  int chaindiff;     // chain code diff
  int32_t length;    // length of loop
  int32_t chainsum;  // sum of chain diffs
  CRACKEDGE *edgept; // current point
  constexpr ERRCODE ED_ILLEGAL_SUM("Illegal sum of chain codes");
 
  length = 0;
  chainsum = 0; // sum of chain codes
  edgept = start;
  lastchain = edgept->prev->stepdir; // previous chain code
  do {
    length++;
    if (edgept->stepdir != lastchain) {
      // chain code difference
      chaindiff = edgept->stepdir - lastchain;
      if (chaindiff > 2) {
        chaindiff -= 4;
      } else if (chaindiff < -2) {
        chaindiff += 4;
      }
      chainsum += chaindiff; // sum differences
      lastchain = edgept->stepdir;
    }
    edgept = edgept->next;
  } while (edgept != start && length < C_OUTLINE::kMaxOutlineLength);
 
  if ((chainsum != 4 && chainsum != -4) || edgept != start || length < MINEDGELENGTH) {
    if (edgept != start) {
      return ScrollView::YELLOW;
    } else if (length < MINEDGELENGTH) {
      return ScrollView::MAGENTA;
    } else {
      ED_ILLEGAL_SUM.error("check_path_legal", TESSLOG, "chainsum=%d", chainsum);
      return ScrollView::GREEN;
    }
  }
  // colour on inside
  return chainsum < 0 ? ScrollView::BLUE : ScrollView::RED;
}

◆ check_pitch_sync()

double tesseract::check_pitch_sync	(	BLOBNBOX_IT *	blob_it,
		int16_t	blob_count,
		int16_t	pitch,
		int16_t	pitch_error,
		STATS *	projection,
		FPSEGPT_LIST *	seg_list
	)

Definition at line 138 of file pitsync1.cpp.

  {
  int16_t x;          // current coord
  int16_t min_index;  // blob number
  int16_t max_index;  // blob number
  int16_t left_edge;  // of word
  int16_t right_edge; // of word
  int16_t right_max;  // max allowed x
  int16_t min_x;      // in this region
  int16_t max_x;
  int16_t region_index;
  int16_t best_region_index = 0; // for best result
  int16_t offset;                // dist to legal area
  int16_t left_best_x;           // edge of good region
  int16_t right_best_x;          // right edge
  TBOX min_box;                  // bounding box
  TBOX max_box;                  // bounding box
  TBOX next_box;                 // box of next blob
  FPSEGPT *segpt;                // segment point
  FPSEGPT_LIST *segpts;          // points in a segment
  double best_cost;              // best path
  double mean_sum;               // computes result
  FPSEGPT *best_end;             // end of best path
  BLOBNBOX_IT min_it;            // copy iterator
  BLOBNBOX_IT max_it;            // copy iterator
  FPSEGPT_IT segpt_it;           // iterator
                                 // output segments
  FPSEGPT_IT outseg_it = seg_list;
  FPSEGPT_LIST_CLIST lattice; // list of lists
                              // region iterator
  FPSEGPT_LIST_C_IT lattice_it = &lattice;
 
  //      tprintf("Computing sync on word of %d blobs with pitch %d\n",
  //              blob_count, pitch);
  //      if (blob_count==8 && pitch==27)
  //              projection->print(stdout,true);
  if (pitch < 3) {
    pitch = 3; // nothing ludicrous
  }
  if ((pitch - 3) / 2 < pitch_error) {
    pitch_error = (pitch - 3) / 2;
  }
  min_it = *blob_it;
  min_box = box_next(&min_it); // get box
  //      if (blob_count==8 && pitch==27)
  //              tprintf("1st box at (%d,%d)->(%d,%d)\n",
  //                      min_box.left(),min_box.bottom(),
  //                      min_box.right(),min_box.top());
  // left of word
  left_edge = min_box.left() + pitch_error;
  for (min_index = 1; min_index < blob_count; min_index++) {
    min_box = box_next(&min_it);
    //              if (blob_count==8 && pitch==27)
    //                      tprintf("Box at (%d,%d)->(%d,%d)\n",
    //                              min_box.left(),min_box.bottom(),
    //                              min_box.right(),min_box.top());
  }
  right_edge = min_box.right(); // end of word
  max_x = left_edge;
  // min permissible
  min_x = max_x - pitch + pitch_error * 2 + 1;
  right_max = right_edge + pitch - pitch_error - 1;
  segpts = new FPSEGPT_LIST; // list of points
  segpt_it.set_to_list(segpts);
  for (x = min_x; x <= max_x; x++) {
    segpt = new FPSEGPT(x); // make a new one
                            // put in list
    segpt_it.add_after_then_move(segpt);
  }
  // first segment
  lattice_it.add_before_then_move(segpts);
  min_index = 0;
  region_index = 1;
  best_cost = FLT_MAX;
  best_end = nullptr;
  min_it = *blob_it;
  min_box = box_next(&min_it); // first box
  do {
    left_best_x = -1;
    right_best_x = -1;
    segpts = new FPSEGPT_LIST; // list of points
    segpt_it.set_to_list(segpts);
    min_x += pitch - pitch_error; // next limits
    max_x += pitch + pitch_error;
    while (min_box.right() < min_x && min_index < blob_count) {
      min_index++;
      min_box = box_next(&min_it);
    }
    max_it = min_it;
    max_index = min_index;
    max_box = min_box;
    next_box = box_next(&max_it);
    for (x = min_x; x <= max_x && x <= right_max; x++) {
      while (x < right_edge && max_index < blob_count && x > max_box.right()) {
        max_index++;
        max_box = next_box;
        next_box = box_next(&max_it);
      }
      if (x <= max_box.left() + pitch_error || x >= max_box.right() - pitch_error ||
          x >= right_edge || (max_index < blob_count - 1 && x >= next_box.left()) ||
          (x - max_box.left() > pitch * pitsync_joined_edge &&
           max_box.right() - x > pitch * pitsync_joined_edge)) {
        //                      || projection->local_min(x))
        if (x - max_box.left() > 0 && x - max_box.left() <= pitch_error) {
          // dist to real break
          offset = x - max_box.left();
        } else if (max_box.right() - x > 0 && max_box.right() - x <= pitch_error &&
                   (max_index >= blob_count - 1 || x < next_box.left())) {
          offset = max_box.right() - x;
        } else {
          offset = 0;
        }
        //                              offset=pitsync_offset_freecut_fraction*projection->pile_count(x);
        segpt = new FPSEGPT(x, false, offset, region_index, pitch, pitch_error, lattice_it.data());
      } else {
        offset = projection->pile_count(x);
        segpt = new FPSEGPT(x, true, offset, region_index, pitch, pitch_error, lattice_it.data());
      }
      if (segpt->previous() != nullptr) {
        segpt_it.add_after_then_move(segpt);
        if (x >= right_edge - pitch_error) {
          segpt->terminal = true; // no more wanted
          if (segpt->cost_function() < best_cost) {
            best_cost = segpt->cost_function();
            // find least
            best_end = segpt;
            best_region_index = region_index;
            left_best_x = x;
            right_best_x = x;
          } else if (segpt->cost_function() == best_cost && right_best_x == x - 1) {
            right_best_x = x;
          }
        }
      } else {
        delete segpt; // no good
      }
    }
    if (segpts->empty()) {
      if (best_end != nullptr) {
        break; // already found one
      }
      make_illegal_segment(lattice_it.data(), min_box, min_it, region_index, pitch, pitch_error,
                           segpts);
    } else {
      if (right_best_x > left_best_x + 1) {
        left_best_x = (left_best_x + right_best_x + 1) / 2;
        for (segpt_it.mark_cycle_pt();
             !segpt_it.cycled_list() && segpt_it.data()->position() != left_best_x;
             segpt_it.forward()) {
          ;
        }
        if (segpt_it.data()->position() == left_best_x) {
          // middle of region
          best_end = segpt_it.data();
        }
      }
    }
    // new segment
    lattice_it.add_before_then_move(segpts);
    region_index++;
  } while (min_x < right_edge);
  ASSERT_HOST(best_end != nullptr); // must always find some
 
  for (lattice_it.mark_cycle_pt(); !lattice_it.cycled_list(); lattice_it.forward()) {
    segpts = lattice_it.data();
    segpt_it.set_to_list(segpts);
    //              if (blob_count==8 && pitch==27)
    //              {
    //                      for
    //                      (segpt_it.mark_cycle_pt();!segpt_it.cycled_list();segpt_it.forward())
    //                      {
    //                              segpt=segpt_it.data();
    //                              tprintf("At %d, (%x) cost=%g, m=%g, sq=%g,
    //                              pred=%x\n",
    //                                      segpt->position(),segpt,segpt->cost_function(),
    //                                      segpt->sum(),segpt->squares(),segpt->previous());
    //                      }
    //                      tprintf("\n");
    //              }
    for (segpt_it.mark_cycle_pt(); !segpt_it.cycled_list() && segpt_it.data() != best_end;
         segpt_it.forward()) {
      ;
    }
    if (segpt_it.data() == best_end) {
      // save good one
      segpt = segpt_it.extract();
      outseg_it.add_before_then_move(segpt);
      best_end = segpt->previous();
    }
  }
  ASSERT_HOST(best_end == nullptr);
  ASSERT_HOST(!outseg_it.empty());
  outseg_it.move_to_last();
  mean_sum = outseg_it.data()->sum();
  mean_sum = mean_sum * mean_sum / best_region_index;
  if (outseg_it.data()->squares() - mean_sum < 0) {
    tprintf("Impossible sqsum=%g, mean=%g, total=%d\n", outseg_it.data()->squares(),
            outseg_it.data()->sum(), best_region_index);
  }
  lattice.deep_clear(); // shift the lot
  return outseg_it.data()->squares() - mean_sum;
}

◆ check_pitch_sync2()

double tesseract::check_pitch_sync2	(	BLOBNBOX_IT *	blob_it,
		int16_t	blob_count,
		int16_t	pitch,
		int16_t	pitch_error,
		STATS *	projection,
		int16_t	projection_left,
		int16_t	projection_right,
		float	projection_scale,
		int16_t &	occupation_count,
		FPSEGPT_LIST *	seg_list,
		int16_t	start,
		int16_t	end
	)

Definition at line 292 of file pithsync.cpp.

  {
  bool faking;                  // illegal cut pt
  bool mid_cut;                 // cheap cut pt.
  int16_t x;                    // current coord
  int16_t blob_index;           // blob number
  int16_t left_edge;            // of word
  int16_t right_edge;           // of word
  int16_t array_origin;         // x coord of array
  int16_t offset;               // dist to legal area
  int16_t zero_count;           // projection zero
  int16_t best_left_x = 0;      // for equals
  int16_t best_right_x = 0;     // right edge
  TBOX this_box;                // bounding box
  TBOX next_box;                // box of next blob
  FPSEGPT *segpt;               // segment point
  double best_cost;             // best path
  double mean_sum;              // computes result
  FPCUTPT *best_end;            // end of best path
  int16_t best_fake;            // best fake level
  int16_t best_count;           // no of cuts
  BLOBNBOX_IT this_it;          // copy iterator
  FPSEGPT_IT seg_it = seg_list; // output iterator
 
  //      tprintf("Computing sync on word of %d blobs with pitch %d\n",
  //              blob_count, pitch);
  //      if (blob_count==8 && pitch==27)
  //              projection->print(stdout,true);
  zero_count = 0;
  if (pitch < 3) {
    pitch = 3; // nothing ludicrous
  }
  if ((pitch - 3) / 2 < pitch_error) {
    pitch_error = (pitch - 3) / 2;
  }
  this_it = *blob_it;
  this_box = box_next(&this_it); // get box
  //      left_edge=this_box.left(); //left of word right_edge=this_box.right();
  //      for (blob_index=1;blob_index<blob_count;blob_index++)
  //      {
  //              this_box=box_next(&this_it);
  //              if (this_box.right()>right_edge)
  //                      right_edge=this_box.right();
  //      }
  for (left_edge = projection_left;
       projection->pile_count(left_edge) == 0 && left_edge < projection_right; left_edge++) {
    ;
  }
  for (right_edge = projection_right;
       projection->pile_count(right_edge) == 0 && right_edge > left_edge; right_edge--) {
    ;
  }
  ASSERT_HOST(right_edge >= left_edge);
  if (pitsync_linear_version >= 4) {
    return check_pitch_sync3(projection_left, projection_right, zero_count, pitch, pitch_error,
                             projection, projection_scale, occupation_count, seg_list, start, end);
  }
  array_origin = left_edge - pitch;
  // array of points
  std::vector<FPCUTPT> cutpts(right_edge - left_edge + pitch * 2 + 1);
  for (x = array_origin; x < left_edge; x++) {
    // free cuts
    cutpts[x - array_origin].setup(&cutpts[0], array_origin, projection, zero_count, pitch, x, 0);
  }
  for (offset = 0; offset <= pitch_error; offset++, x++) {
    // not quite free
    cutpts[x - array_origin].setup(&cutpts[0], array_origin, projection, zero_count, pitch, x,
                                   offset);
  }
 
  this_it = *blob_it;
  best_cost = FLT_MAX;
  best_end = nullptr;
  this_box = box_next(&this_it); // first box
  next_box = box_next(&this_it); // second box
  blob_index = 1;
  while (x < right_edge - pitch_error) {
    if (x > this_box.right() + pitch_error && blob_index < blob_count) {
      this_box = next_box;
      next_box = box_next(&this_it);
      blob_index++;
    }
    faking = false;
    mid_cut = false;
    if (x <= this_box.left()) {
      offset = 0;
    } else if (x <= this_box.left() + pitch_error) {
      offset = x - this_box.left();
    } else if (x >= this_box.right()) {
      offset = 0;
    } else if (x >= next_box.left() && blob_index < blob_count) {
      offset = x - next_box.left();
      if (this_box.right() - x < offset) {
        offset = this_box.right() - x;
      }
    } else if (x >= this_box.right() - pitch_error) {
      offset = this_box.right() - x;
    } else if (x - this_box.left() > pitch * pitsync_joined_edge &&
               this_box.right() - x > pitch * pitsync_joined_edge) {
      mid_cut = true;
      offset = 0;
    } else {
      faking = true;
      offset = projection->pile_count(x);
    }
    cutpts[x - array_origin].assign(&cutpts[0], array_origin, x, faking, mid_cut, offset,
                                    projection, projection_scale, zero_count, pitch, pitch_error);
    x++;
  }
 
  best_fake = INT16_MAX;
  best_cost = INT32_MAX;
  best_count = INT16_MAX;
  while (x < right_edge + pitch) {
    offset = x < right_edge ? right_edge - x : 0;
    cutpts[x - array_origin].assign(&cutpts[0], array_origin, x, false, false, offset, projection,
                                    projection_scale, zero_count, pitch, pitch_error);
    cutpts[x - array_origin].terminal = true;
    if (cutpts[x - array_origin].index() + cutpts[x - array_origin].fake_count <=
        best_count + best_fake) {
      if (cutpts[x - array_origin].fake_count < best_fake ||
          (cutpts[x - array_origin].fake_count == best_fake &&
           cutpts[x - array_origin].cost_function() < best_cost)) {
        best_fake = cutpts[x - array_origin].fake_count;
        best_cost = cutpts[x - array_origin].cost_function();
        best_left_x = x;
        best_right_x = x;
        best_count = cutpts[x - array_origin].index();
      } else if (cutpts[x - array_origin].fake_count == best_fake && x == best_right_x + 1 &&
                 cutpts[x - array_origin].cost_function() == best_cost) {
        // exactly equal
        best_right_x = x;
      }
    }
    x++;
  }
  ASSERT_HOST(best_fake < INT16_MAX);
 
  best_end = &cutpts[(best_left_x + best_right_x) / 2 - array_origin];
  if (this_box.right() == textord_test_x && this_box.top() == textord_test_y) {
    for (x = left_edge - pitch; x < right_edge + pitch; x++) {
      tprintf("x=%d, C=%g, s=%g, sq=%g, prev=%d\n", x, cutpts[x - array_origin].cost_function(),
              cutpts[x - array_origin].sum(), cutpts[x - array_origin].squares(),
              cutpts[x - array_origin].previous()->position());
    }
  }
  occupation_count = -1;
  do {
    for (x = best_end->position() - pitch + pitch_error;
         x < best_end->position() - pitch_error && projection->pile_count(x) == 0; x++) {
      ;
    }
    if (x < best_end->position() - pitch_error) {
      occupation_count++;
    }
    // copy it
    segpt = new FPSEGPT(best_end);
    seg_it.add_before_then_move(segpt);
    best_end = best_end->previous();
  } while (best_end != nullptr);
  seg_it.move_to_last();
  mean_sum = seg_it.data()->sum();
  mean_sum = mean_sum * mean_sum / best_count;
  if (seg_it.data()->squares() - mean_sum < 0) {
    tprintf("Impossible sqsum=%g, mean=%g, total=%d\n", seg_it.data()->squares(),
            seg_it.data()->sum(), best_count);
  }
  //      tprintf("blob_count=%d, pitch=%d, sync=%g, occ=%d\n",
  //              blob_count,pitch,seg_it.data()->squares()-mean_sum,
  //              occupation_count);
  return seg_it.data()->squares() - mean_sum;
}

◆ check_pitch_sync3()

double tesseract::check_pitch_sync3	(	int16_t	projection_left,
		int16_t	projection_right,
		int16_t	zero_count,
		int16_t	pitch,
		int16_t	pitch_error,
		STATS *	projection,
		float	projection_scale,
		int16_t &	occupation_count,
		FPSEGPT_LIST *	seg_list,
		int16_t	start,
		int16_t	end
	)

Definition at line 484 of file pithsync.cpp.

  {
  bool faking;                  // illegal cut pt
  bool mid_cut;                 // cheap cut pt.
  int16_t left_edge;            // of word
  int16_t right_edge;           // of word
  int16_t x;                    // current coord
  int16_t array_origin;         // x coord of array
  int16_t offset;               // dist to legal area
  int16_t projection_offset;    // from scaled projection
  int16_t prev_zero;            // previous zero dist
  int16_t next_zero;            // next zero dist
  int16_t zero_offset;          // scan window
  int16_t best_left_x = 0;      // for equals
  int16_t best_right_x = 0;     // right edge
  FPSEGPT *segpt;               // segment point
  int minindex;                 // next input position
  int test_index;               // index to mins
  double best_cost;             // best path
  double mean_sum;              // computes result
  FPCUTPT *best_end;            // end of best path
  int16_t best_fake;            // best fake level
  int16_t best_count;           // no of cuts
  FPSEGPT_IT seg_it = seg_list; // output iterator
 
  end = (end - start) % pitch;
  if (pitch < 3) {
    pitch = 3; // nothing ludicrous
  }
  if ((pitch - 3) / 2 < pitch_error) {
    pitch_error = (pitch - 3) / 2;
  }
  // min dist of zero
  zero_offset = static_cast<int16_t>(pitch * pitsync_joined_edge);
  for (left_edge = projection_left;
       projection->pile_count(left_edge) == 0 && left_edge < projection_right; left_edge++) {
    ;
  }
  for (right_edge = projection_right;
       projection->pile_count(right_edge) == 0 && right_edge > left_edge; right_edge--) {
    ;
  }
  array_origin = left_edge - pitch;
  // array of points
  std::vector<FPCUTPT> cutpts(right_edge - left_edge + pitch * 2 + 1);
  // local min results
  std::vector<bool> mins(pitch_error * 2 + 1);
  for (x = array_origin; x < left_edge; x++) {
    // free cuts
    cutpts[x - array_origin].setup(&cutpts[0], array_origin, projection, zero_count, pitch, x, 0);
  }
  prev_zero = left_edge - 1;
  for (offset = 0; offset <= pitch_error; offset++, x++) {
    // not quite free
    cutpts[x - array_origin].setup(&cutpts[0], array_origin, projection, zero_count, pitch, x,
                                   offset);
  }
 
  best_cost = FLT_MAX;
  best_end = nullptr;
  for (offset = -pitch_error, minindex = 0; offset < pitch_error; offset++, minindex++) {
    mins[minindex] = projection->local_min(x + offset);
  }
  next_zero = x + zero_offset + 1;
  for (offset = next_zero - 1; offset >= x; offset--) {
    if (projection->pile_count(offset) <= zero_count) {
      next_zero = offset;
      break;
    }
  }
  while (x < right_edge - pitch_error) {
    mins[minindex] = projection->local_min(x + pitch_error);
    minindex++;
    if (minindex > pitch_error * 2) {
      minindex = 0;
    }
    faking = false;
    mid_cut = false;
    offset = 0;
    if (projection->pile_count(x) <= zero_count) {
      prev_zero = x;
    } else {
      for (offset = 1; offset <= pitch_error; offset++) {
        if (projection->pile_count(x + offset) <= zero_count ||
            projection->pile_count(x - offset) <= zero_count) {
          break;
        }
      }
    }
    if (offset > pitch_error) {
      if (x - prev_zero > zero_offset && next_zero - x > zero_offset) {
        for (offset = 0; offset <= pitch_error; offset++) {
          test_index = minindex + pitch_error + offset;
          if (test_index > pitch_error * 2) {
            test_index -= pitch_error * 2 + 1;
          }
          if (mins[test_index]) {
            break;
          }
          test_index = minindex + pitch_error - offset;
          if (test_index > pitch_error * 2) {
            test_index -= pitch_error * 2 + 1;
          }
          if (mins[test_index]) {
            break;
          }
        }
      }
      if (offset > pitch_error) {
        offset = projection->pile_count(x);
        faking = true;
      } else {
        projection_offset = static_cast<int16_t>(projection->pile_count(x) / projection_scale);
        if (projection_offset > offset) {
          offset = projection_offset;
        }
        mid_cut = true;
      }
    }
    if ((start == 0 && end == 0) || !textord_fast_pitch_test ||
        (x - projection_left - start) % pitch <= end) {
      cutpts[x - array_origin].assign(&cutpts[0], array_origin, x, faking, mid_cut, offset,
                                      projection, projection_scale, zero_count, pitch, pitch_error);
    } else {
      cutpts[x - array_origin].assign_cheap(&cutpts[0], array_origin, x, faking, mid_cut, offset,
                                            projection, projection_scale, zero_count, pitch,
                                            pitch_error);
    }
    x++;
    if (next_zero < x || next_zero == x + zero_offset) {
      next_zero = x + zero_offset + 1;
    }
    if (projection->pile_count(x + zero_offset) <= zero_count) {
      next_zero = x + zero_offset;
    }
  }
 
  best_fake = INT16_MAX;
  best_cost = INT32_MAX;
  best_count = INT16_MAX;
  while (x < right_edge + pitch) {
    offset = x < right_edge ? right_edge - x : 0;
    cutpts[x - array_origin].assign(&cutpts[0], array_origin, x, false, false, offset, projection,
                                    projection_scale, zero_count, pitch, pitch_error);
    cutpts[x - array_origin].terminal = true;
    if (cutpts[x - array_origin].index() + cutpts[x - array_origin].fake_count <=
        best_count + best_fake) {
      if (cutpts[x - array_origin].fake_count < best_fake ||
          (cutpts[x - array_origin].fake_count == best_fake &&
           cutpts[x - array_origin].cost_function() < best_cost)) {
        best_fake = cutpts[x - array_origin].fake_count;
        best_cost = cutpts[x - array_origin].cost_function();
        best_left_x = x;
        best_right_x = x;
        best_count = cutpts[x - array_origin].index();
      } else if (cutpts[x - array_origin].fake_count == best_fake && x == best_right_x + 1 &&
                 cutpts[x - array_origin].cost_function() == best_cost) {
        // exactly equal
        best_right_x = x;
      }
    }
    x++;
  }
  ASSERT_HOST(best_fake < INT16_MAX);
 
  best_end = &cutpts[(best_left_x + best_right_x) / 2 - array_origin];
  //      for (x=left_edge-pitch;x<right_edge+pitch;x++)
  //      {
  //              tprintf("x=%d, C=%g, s=%g, sq=%g, prev=%d\n",
  //                      x,cutpts[x-array_origin].cost_function(),
  //                      cutpts[x-array_origin].sum(),
  //                      cutpts[x-array_origin].squares(),
  //                      cutpts[x-array_origin].previous()->position());
  //      }
  occupation_count = -1;
  do {
    for (x = best_end->position() - pitch + pitch_error;
         x < best_end->position() - pitch_error && projection->pile_count(x) == 0; x++) {
    }
    if (x < best_end->position() - pitch_error) {
      occupation_count++;
    }
    // copy it
    segpt = new FPSEGPT(best_end);
    seg_it.add_before_then_move(segpt);
    best_end = best_end->previous();
  } while (best_end != nullptr);
  seg_it.move_to_last();
  mean_sum = seg_it.data()->sum();
  mean_sum = mean_sum * mean_sum / best_count;
  if (seg_it.data()->squares() - mean_sum < 0) {
    tprintf("Impossible sqsum=%g, mean=%g, total=%d\n", seg_it.data()->squares(),
            seg_it.data()->sum(), best_count);
  }
  return seg_it.data()->squares() - mean_sum;
}

◆ chomp_string()

void tesseract::chomp_string ( char * str )

inline

Definition at line 91 of file helpers.h.

                                    {
  int last_index = static_cast<int>(strlen(str)) - 1;
  while (last_index >= 0 && (str[last_index] == '\n' || str[last_index] == '\r')) {
    str[last_index--] = '\0';
  }
}

◆ choose_partition()

int tesseract::choose_partition	(	float	diff,
		float	partdiffs[],
		int	lastpart,
		float	jumplimit,
		float *	drift,
		float *	lastdelta,
		int *	partcount
	)

Definition at line 910 of file oldbasel.cpp.

  {
  int partition;   /*partition no */
  int bestpart;    /*best new partition */
  float bestdelta; /*best gap from a part */
  float delta;     /*diff from part */
 
  if (lastpart < 0) {
    partdiffs[0] = diff;
    lastpart = 0; /*first point */
    *drift = 0.0f;
    *lastdelta = 0.0f;
  }
  /*adjusted diff from part */
  delta = diff - partdiffs[lastpart] - *drift;
  if (textord_oldbl_debug) {
    tprintf("Diff=%.2f, Delta=%.3f, Drift=%.3f, ", diff, delta, *drift);
  }
  if (ABS(delta) > jumplimit / 2) {
    /*delta on part 0 */
    bestdelta = diff - partdiffs[0] - *drift;
    bestpart = 0; /*0 best so far */
    for (partition = 1; partition < *partcount; partition++) {
      delta = diff - partdiffs[partition] - *drift;
      if (ABS(delta) < ABS(bestdelta)) {
        bestdelta = delta;
        bestpart = partition; /*part with nearest jump */
      }
    }
    delta = bestdelta;
    /*too far away */
    if (ABS(bestdelta) > jumplimit && *partcount < MAXPARTS) { /*and spare part left */
      bestpart = (*partcount)++;                               /*best was new one */
                                                               /*start new one */
      partdiffs[bestpart] = diff - *drift;
      delta = 0.0f;
    }
  } else {
    bestpart = lastpart; /*best was last one */
  }
 
  if (bestpart == lastpart &&
      (ABS(delta - *lastdelta) < jumplimit / 2 || ABS(delta) < jumplimit / 2)) {
    /*smooth the drift */
    *drift = (3 * *drift + delta) / 3;
  }
  *lastdelta = delta;
 
  if (textord_oldbl_debug) {
    tprintf("P=%d\n", bestpart);
  }
 
  return bestpart;
}

◆ CircBucketFor()

uint8_t tesseract::CircBucketFor	(	float	param,
		float	offset,
		int	num_buckets
	)

Returns a quantized bucket for the given circular param shifted by offset, notionally (param + offset) * num_buckets, but modded and casted to the appropriate type.

Definition at line 399 of file intproto.cpp.

                                                                  {
  int bucket = IntCastRounded(MapParam(param, offset, num_buckets));
  return static_cast<uint8_t>(Modulo(bucket, num_buckets));
} /* CircBucketFor */

◆ ClassicProgressTester()

void tesseract::ClassicProgressTester	(	const char *	imgname,
		const char *	tessdatadir,
		const char *	lang
	)

Definition at line 85 of file progress_test.cc.

                                                                                           {
  using ::testing::_;
  using ::testing::AllOf;
  using ::testing::AtLeast;
  using ::testing::DoAll;
  using ::testing::Gt;
  using ::testing::Le;
  using ::testing::Return;
  using ::testing::SaveArg;
 
  auto api = std::make_unique<tesseract::TessBaseAPI>();
  ASSERT_FALSE(api->Init(tessdatadir, lang)) << "Could not initialize tesseract.";
  Image image = pixRead(imgname);
  ASSERT_TRUE(image != nullptr) << "Failed to read test image.";
  api->SetImage(image);
 
  ClassicMockProgressSink progressSink;
 
  int currentProgress = -1;
  EXPECT_CALL(progressSink, classicProgress(AllOf(Gt<int &>(currentProgress), Le(100))))
      .Times(AtLeast(5))
      .WillRepeatedly(DoAll(SaveArg<0>(&currentProgress), Return(false)));
  EXPECT_CALL(progressSink, cancel(_)).Times(AtLeast(5)).WillRepeatedly(Return(false));
 
  EXPECT_EQ(api->Recognize(&progressSink.monitor), false);
  EXPECT_GE(currentProgress, 50) << "The reported progress did not reach 50%";
 
  api->End();
  image.destroy();
}

◆ cleanup_rows_making()

void tesseract::cleanup_rows_making	(	ICOORD	page_tr,
		TO_BLOCK *	block,
		float	gradient,
		FCOORD	rotation,
		int32_t	block_edge,
		bool	testing_on
	)

cleanup_rows_making

Remove overlapping rows and fit all the blobs to what's left.

Definition at line 563 of file makerow.cpp.

  {
  // iterators
  BLOBNBOX_IT blob_it = &block->blobs;
  TO_ROW_IT row_it = block->get_rows();
 
#ifndef GRAPHICS_DISABLED
  if (textord_show_parallel_rows && testing_on) {
    if (to_win == nullptr) {
      create_to_win(page_tr);
    }
  }
#endif
  // get row coords
  fit_parallel_rows(block, gradient, rotation, block_edge,
                    textord_show_parallel_rows && testing_on);
  delete_non_dropout_rows(block, gradient, rotation, block_edge,
                          textord_show_parallel_rows && testing_on);
  expand_rows(page_tr, block, gradient, rotation, block_edge, testing_on);
  blob_it.set_to_list(&block->blobs);
  row_it.set_to_list(block->get_rows());
  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
    blob_it.add_list_after(row_it.data()->blob_list());
  }
  // give blobs back
  assign_blobs_to_rows(block, &gradient, 1, false, false, false);
  // now new rows must be genuine
  blob_it.set_to_list(&block->blobs);
  blob_it.add_list_after(&block->large_blobs);
  assign_blobs_to_rows(block, &gradient, 2, true, true, false);
  // safe to use big ones now
  blob_it.set_to_list(&block->blobs);
  // throw all blobs in
  blob_it.add_list_after(&block->noise_blobs);
  blob_it.add_list_after(&block->small_blobs);
  assign_blobs_to_rows(block, &gradient, 3, false, false, false);
}

◆ CleanUpUnusedData()

TESS_COMMON_TRAINING_API void tesseract::CleanUpUnusedData ( LIST ProtoList )

Definition at line 529 of file commontraining.cpp.

                                       {
  PROTOTYPE *Prototype;
 
  iterate(ProtoList) {
    Prototype = reinterpret_cast<PROTOTYPE *>(ProtoList->first_node());
    delete[] Prototype->Variance.Elliptical;
    Prototype->Variance.Elliptical = nullptr;
    delete[] Prototype->Magnitude.Elliptical;
    Prototype->Magnitude.Elliptical = nullptr;
    delete[] Prototype->Weight.Elliptical;
    Prototype->Weight.Elliptical = nullptr;
  }
}

◆ clear_fx_win()

void tesseract::clear_fx_win ( )

Definition at line 61 of file drawfx.cpp.

                    { // make features win
  fx_win->Clear();
  fx_win->Pen(64, 64, 64);
  fx_win->Line(-WERDWIDTH, kBlnBaselineOffset, WERDWIDTH, kBlnBaselineOffset);
  fx_win->Line(-WERDWIDTH, kBlnXHeight + kBlnBaselineOffset, WERDWIDTH,
               kBlnXHeight + kBlnBaselineOffset);
}

◆ ClearFeatureSpaceWindow()

TESS_API void tesseract::ClearFeatureSpaceWindow	(	NORM_METHOD	norm_method,
		ScrollView *	window
	)

Clears the given window and draws the featurespace guides for the appropriate normalization method.

Definition at line 889 of file intproto.cpp.

                                                                          {
  window->Clear();
 
  window->Pen(ScrollView::GREY);
  // Draw the feature space limit rectangle.
  window->Rectangle(0, 0, INT_MAX_X, INT_MAX_Y);
  if (norm_method == baseline) {
    window->SetCursor(0, INT_DESCENDER);
    window->DrawTo(INT_MAX_X, INT_DESCENDER);
    window->SetCursor(0, INT_BASELINE);
    window->DrawTo(INT_MAX_X, INT_BASELINE);
    window->SetCursor(0, INT_XHEIGHT);
    window->DrawTo(INT_MAX_X, INT_XHEIGHT);
    window->SetCursor(0, INT_CAPHEIGHT);
    window->DrawTo(INT_MAX_X, INT_CAPHEIGHT);
  } else {
    window->Rectangle(INT_XCENTER - INT_XRADIUS, INT_YCENTER - INT_YRADIUS,
                      INT_XCENTER + INT_XRADIUS, INT_YCENTER + INT_YRADIUS);
  }
}

◆ ClipToRange()

template<typename T >

T tesseract::ClipToRange	(	const T &	x,
		const T &	lower_bound,
		const T &	upper_bound
	)

inline

Definition at line 105 of file helpers.h.

                                                                             {
  if (x < lower_bound) {
    return lower_bound;
  }
  if (x > upper_bound) {
    return upper_bound;
  }
  return x;
}

◆ ClipVector()

template<typename T >

void tesseract::ClipVector	(	int	n,
		T	lower,
		T	upper,
		T *	vec
	)

inline

Definition at line 251 of file functions.h.

                                                        {
  for (int i = 0; i < n; ++i) {
    vec[i] = ClipToRange(vec[i], lower, upper);
  }
}

◆ close_to_win()

void tesseract::close_to_win ( )

Definition at line 56 of file drawtord.cpp.

                    {
  // to_win is leaked, but this enables the user to view the contents.
  if (to_win != nullptr) {
    to_win->Update();
  }
}

◆ ClusterSamples()

TESS_API LIST tesseract::ClusterSamples	(	CLUSTERER *	Clusterer,
		CLUSTERCONFIG *	Config
	)

This routine first checks to see if the samples in this clusterer have already been clustered before; if so, it does not bother to recreate the cluster tree. It simply recomputes the prototypes based on the new Config info.

If the samples have not been clustered before, the samples in the KD tree are formed into a cluster tree and then the prototypes are computed from the cluster tree.

In either case this routine returns a pointer to a list of prototypes that best represent the samples given the constraints specified in Config.

Parameters

Clusterer	data struct containing samples to be clustered
Config	parameters which control clustering process

Returns: Pointer to a list of prototypes

Definition at line 1543 of file cluster.cpp.

                                                                 {
  // only create cluster tree if samples have never been clustered before
  if (Clusterer->Root == nullptr) {
    CreateClusterTree(Clusterer);
  }
 
  // deallocate the old prototype list if one exists
  FreeProtoList(&Clusterer->ProtoList);
  Clusterer->ProtoList = NIL_LIST;
 
  // compute prototypes starting at the root node in the tree
  ComputePrototypes(Clusterer, Config);
  // We don't need the cluster pointers in the protos any more, so null them
  // out, which makes it safe to delete the clusterer.
  LIST proto_list = Clusterer->ProtoList;
  iterate(proto_list) {
    auto *proto = reinterpret_cast<PROTOTYPE *>(proto_list->first_node());
    proto->Cluster = nullptr;
  }
  return Clusterer->ProtoList;
} // ClusterSamples

◆ CodeInBinary()

void tesseract::CodeInBinary	(	int	n,
		int	nf,
		TFloat *	vec
	)

inline

Definition at line 259 of file functions.h.

                                                     {
  if (nf <= 0 || n < nf) {
    return;
  }
  int index = 0;
  TFloat best_score = vec[0];
  for (int i = 1; i < n; ++i) {
    if (vec[i] > best_score) {
      best_score = vec[i];
      index = i;
    }
  }
  int mask = 1;
  for (int i = 0; i < nf; ++i, mask *= 2) {
    vec[i] = (index & mask) ? 1.0 : 0.0;
  }
}

◆ CodepointList()

std::string tesseract::CodepointList ( const std::vector< char32 > & str32 )

inline

Definition at line 22 of file normstrngs_test.h.

                                                               {
  std::stringstream result;
  int total_chars = str32.size();
  result << std::hex;
  for (int i = 0; i < total_chars; ++i) {
    result << "[" << str32[i] << "]";
  }
  return result.str();
}

◆ CombineLangModel()

TESS_UNICHARSET_TRAINING_API int tesseract::CombineLangModel	(	const UNICHARSET &	unicharset,
		const std::string &	script_dir,
		const std::string &	version_str,
		const std::string &	output_dir,
		const std::string &	lang,
		bool	pass_through_recoder,
		const std::vector< std::string > &	words,
		const std::vector< std::string > &	puncs,
		const std::vector< std::string > &	numbers,
		bool	lang_is_rtl,
		FileReader	reader,
		FileWriter	writer
	)

Definition at line 194 of file lang_model_helpers.cpp.

                                        {
  // Build the traineddata file.
  TessdataManager traineddata;
  if (!version_str.empty()) {
    traineddata.SetVersionString(traineddata.VersionString() + ":" + version_str);
  }
  // Unicharset and recoder.
  if (!WriteUnicharset(unicharset, output_dir, lang, writer, &traineddata)) {
    tprintf("Error writing unicharset!!\n");
    return EXIT_FAILURE;
  } else {
    tprintf("Config file is optional, continuing...\n");
  }
  // If there is a config file, read it and add to traineddata.
  std::string config_filename = script_dir + "/" + lang + "/" + lang + ".config";
  std::string config_file = ReadFile(config_filename, reader);
  if (config_file.length() > 0) {
    traineddata.OverwriteEntry(TESSDATA_LANG_CONFIG, &config_file[0], config_file.length());
  }
  std::string radical_filename = script_dir + "/radical-stroke.txt";
  std::string radical_data = ReadFile(radical_filename, reader);
  if (radical_data.empty()) {
    tprintf("Error reading radical code table %s\n", radical_filename.c_str());
    return EXIT_FAILURE;
  }
  if (!WriteRecoder(unicharset, pass_through_recoder, output_dir, lang, writer, &radical_data,
                    &traineddata)) {
    tprintf("Error writing recoder!!\n");
  }
  if (!words.empty() || !puncs.empty() || !numbers.empty()) {
    if (!WriteDawgs(words, puncs, numbers, lang_is_rtl, unicharset, &traineddata)) {
      tprintf("Error during conversion of wordlists to DAWGs!!\n");
      return EXIT_FAILURE;
    }
  }
 
  // Traineddata file.
  std::vector<char> traineddata_data;
  traineddata.Serialize(&traineddata_data);
  if (!WriteFile(output_dir, lang, ".traineddata", traineddata_data, writer)) {
    tprintf("Error writing output traineddata file!!\n");
    return EXIT_FAILURE;
  }
  tprintf("Created %s/%s/%s.traineddata", output_dir.c_str(), lang.c_str(), lang.c_str());
  return EXIT_SUCCESS;
}

◆ complete_edge()

void tesseract::complete_edge	(	CRACKEDGE *	start,
		C_OUTLINE_IT *	outline_it
	)

Definition at line 38 of file edgloop.cpp.

                                             {
  ScrollView::Color colour; // colour to draw in
  int16_t looplength;       // steps in loop
  ICOORD botleft;           // bounding box
  ICOORD topright;
  C_OUTLINE *outline; // new outline
 
  // check length etc.
  colour = check_path_legal(start);
 
  if (colour == ScrollView::RED || colour == ScrollView::BLUE) {
    looplength = loop_bounding_box(start, botleft, topright);
    outline = new C_OUTLINE(start, botleft, topright, looplength);
    // add to list
    outline_it->add_after_then_move(outline);
  }
}

◆ compute_block_pitch()

void tesseract::compute_block_pitch	(	TO_BLOCK *	block,
		FCOORD	rotation,
		int32_t	block_index,
		bool	testing_on
	)

Definition at line 293 of file topitch.cpp.

                                          {   // correct orientation
  TBOX block_box;                             // bounding box
 
  block_box = block->block->pdblk.bounding_box();
  if (testing_on && textord_debug_pitch_test) {
    tprintf("Block %d at (%d,%d)->(%d,%d)\n", block_index, block_box.left(), block_box.bottom(),
            block_box.right(), block_box.top());
  }
  block->min_space = static_cast<int32_t>(floor(block->xheight * textord_words_default_minspace));
  block->max_nonspace = static_cast<int32_t>(ceil(block->xheight * textord_words_default_nonspace));
  block->fixed_pitch = 0.0f;
  block->space_size = static_cast<float>(block->min_space);
  block->kern_size = static_cast<float>(block->max_nonspace);
  block->pr_nonsp = block->xheight * words_default_prop_nonspace;
  block->pr_space = block->pr_nonsp * textord_spacesize_ratioprop;
  if (!block->get_rows()->empty()) {
    ASSERT_HOST(block->xheight > 0);
    find_repeated_chars(block, textord_show_initial_words && testing_on);
#ifndef GRAPHICS_DISABLED
    if (textord_show_initial_words && testing_on) {
      // overlap_picture_ops(true);
      ScrollView::Update();
    }
#endif
    compute_rows_pitch(block, block_index, textord_debug_pitch_test && testing_on);
  }
}

◆ compute_dropout_distances()

void tesseract::compute_dropout_distances	(	int32_t *	occupation,
		int32_t *	thresholds,
		int32_t	line_count
	)

Definition at line 933 of file makerow.cpp.

  {
  int32_t line_index;     // of thresholds line
  int32_t distance;       // from prev dropout
  int32_t next_dist;      // to next dropout
  int32_t back_index;     // for back filling
  int32_t prev_threshold; // before overwrite
 
  distance = -line_count;
  line_index = 0;
  do {
    do {
      distance--;
      prev_threshold = thresholds[line_index];
      // distance from prev
      thresholds[line_index] = distance;
      line_index++;
    } while (line_index < line_count && (occupation[line_index] < thresholds[line_index] ||
                                         occupation[line_index - 1] >= prev_threshold));
    if (line_index < line_count) {
      back_index = line_index - 1;
      next_dist = 1;
      while (next_dist < -distance && back_index >= 0) {
        thresholds[back_index] = next_dist;
        back_index--;
        next_dist++;
        distance++;
      }
      distance = 1;
    }
  } while (line_index < line_count);
}

◆ compute_fixed_pitch()

void tesseract::compute_fixed_pitch	(	ICOORD	page_tr,
		TO_BLOCK_LIST *	port_blocks,
		float	gradient,
		FCOORD	rotation,
		bool	testing_on
	)

Definition at line 75 of file topitch.cpp.

                                          {          // correct orientation
  TO_BLOCK_IT block_it;                              // iterator
  TO_BLOCK *block;                                   // current block;
  TO_ROW *row;                                       // current row
  int block_index;                                   // block number
  int row_index;                                     // row number
 
#ifndef GRAPHICS_DISABLED
  if (textord_show_initial_words && testing_on) {
    if (to_win == nullptr) {
      create_to_win(page_tr);
    }
  }
#endif
 
  block_it.set_to_list(port_blocks);
  block_index = 1;
  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
    block = block_it.data();
    compute_block_pitch(block, rotation, block_index, testing_on);
    block_index++;
  }
 
  if (!try_doc_fixed(page_tr, port_blocks, gradient)) {
    block_index = 1;
    for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
      block = block_it.data();
      if (!try_block_fixed(block, block_index)) {
        try_rows_fixed(block, block_index, testing_on);
      }
      block_index++;
    }
  }
 
  block_index = 1;
  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
    block = block_it.data();
    POLY_BLOCK *pb = block->block->pdblk.poly_block();
    if (pb != nullptr && !pb->IsText()) {
      continue; // Non-text doesn't exist!
    }
    // row iterator
    TO_ROW_IT row_it(block->get_rows());
    row_index = 1;
    for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
      row = row_it.data();
      fix_row_pitch(row, block, port_blocks, row_index, block_index);
      row_index++;
    }
    block_index++;
  }
#ifndef GRAPHICS_DISABLED
  if (textord_show_initial_words && testing_on) {
    ScrollView::Update();
  }
#endif
}

◆ compute_fixed_pitch_cjk()

void tesseract::compute_fixed_pitch_cjk	(	ICOORD	page_tr,
		TO_BLOCK_LIST *	port_blocks
	)

Definition at line 1103 of file cjkpitch.cpp.

                                                                         {
  FPAnalyzer analyzer(page_tr, port_blocks);
  if (analyzer.num_rows() == 0) {
    return;
  }
 
  analyzer.Pass1Analyze();
  analyzer.EstimatePitch(true);
 
  // Perform pass1 analysis again with the initial estimation of row
  // pitches, for better estimation.
  analyzer.Pass1Analyze();
  analyzer.EstimatePitch(true);
 
  // Early exit if the page doesn't seem to contain fixed pitch rows.
  if (!analyzer.maybe_fixed_pitch()) {
    if (textord_debug_pitch_test) {
      tprintf("Page doesn't seem to contain fixed pitch rows\n");
    }
    return;
  }
 
  unsigned iteration = 0;
  do {
    analyzer.MergeFragments();
    analyzer.FinalizeLargeChars();
    analyzer.EstimatePitch(false);
    iteration++;
  } while (analyzer.Pass2Analyze() && iteration < analyzer.max_iteration());
 
  if (textord_debug_pitch_test) {
    tprintf("compute_fixed_pitch_cjk finished after %u iteration (limit=%u)\n", iteration,
            analyzer.max_iteration());
  }
 
  analyzer.OutputEstimations();
  if (textord_debug_pitch_test) {
    analyzer.DebugOutputResult();
  }
}

◆ compute_height_modes()

int32_t tesseract::compute_height_modes	(	STATS *	heights,
		int32_t	min_height,
		int32_t	max_height,
		int32_t *	modes,
		int32_t	maxmodes
	)

Definition at line 1629 of file makerow.cpp.

                                               { // size of modes
  int32_t pile_count;                            // no in source pile
  int32_t src_count;                             // no of source entries
  int32_t src_index;                             // current entry
  int32_t least_count;                           // height of smalllest
  int32_t least_index;                           // index of least
  int32_t dest_count;                            // index in modes
 
  src_count = max_height + 1 - min_height;
  dest_count = 0;
  least_count = INT32_MAX;
  least_index = -1;
  for (src_index = 0; src_index < src_count; src_index++) {
    pile_count = heights->pile_count(min_height + src_index);
    if (pile_count > 0) {
      if (dest_count < maxmodes) {
        if (pile_count < least_count) {
          // find smallest in array
          least_count = pile_count;
          least_index = dest_count;
        }
        modes[dest_count++] = min_height + src_index;
      } else if (pile_count >= least_count) {
        while (least_index < maxmodes - 1) {
          modes[least_index] = modes[least_index + 1];
          // shuffle up
          least_index++;
        }
        // new one on end
        modes[maxmodes - 1] = min_height + src_index;
        if (pile_count == least_count) {
          // new smallest
          least_index = maxmodes - 1;
        } else {
          least_count = heights->pile_count(modes[0]);
          least_index = 0;
          for (dest_count = 1; dest_count < maxmodes; dest_count++) {
            pile_count = heights->pile_count(modes[dest_count]);
            if (pile_count < least_count) {
              // find smallest
              least_count = pile_count;
              least_index = dest_count;
            }
          }
        }
      }
    }
  }
  return dest_count;
}

◆ compute_line_occupation()

void tesseract::compute_line_occupation	(	TO_BLOCK *	block,
		float	gradient,
		int32_t	min_y,
		int32_t	max_y,
		int32_t *	occupation,
		int32_t *	deltas
	)

Definition at line 799 of file makerow.cpp.

  {
  int32_t line_count; // maxy-miny+1
  int32_t line_index; // of scan line
  int index;          // array index for daft compilers
  TO_ROW *row;        // current row
  TO_ROW_IT row_it = block->get_rows();
  BLOBNBOX *blob;      // current blob
  BLOBNBOX_IT blob_it; // iterator
  float length;        // of skew vector
  TBOX blob_box;       // bounding box
  FCOORD rotation;     // inverse of skew
 
  line_count = max_y - min_y + 1;
  length = std::sqrt(gradient * gradient + 1);
  rotation = FCOORD(1 / length, -gradient / length);
  for (line_index = 0; line_index < line_count; line_index++) {
    deltas[line_index] = 0;
  }
  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
    row = row_it.data();
    blob_it.set_to_list(row->blob_list());
    for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
      blob = blob_it.data();
      blob_box = blob->bounding_box();
      blob_box.rotate(rotation); // de-skew it
      int32_t width = blob_box.right() - blob_box.left();
      index = blob_box.bottom() - min_y;
      ASSERT_HOST(index >= 0 && index < line_count);
      // count transitions
      deltas[index] += width;
      index = blob_box.top() - min_y;
      ASSERT_HOST(index >= 0 && index < line_count);
      deltas[index] -= width;
    }
  }
  occupation[0] = deltas[0];
  for (line_index = 1; line_index < line_count; line_index++) {
    occupation[line_index] = occupation[line_index - 1] + deltas[line_index];
  }
}

◆ compute_occupation_threshold()

void tesseract::compute_occupation_threshold	(	int32_t	low_window,
		int32_t	high_window,
		int32_t	line_count,
		int32_t *	occupation,
		int32_t *	thresholds
	)

compute_occupation_threshold

Compute thresholds for textline or not for the occupation array.

Definition at line 852 of file makerow.cpp.

  {
  int32_t line_index; // of thresholds line
  int32_t low_index;  // in occupation
  int32_t high_index; // in occupation
  int32_t sum;        // current average
  int32_t divisor;    // to get thresholds
  int32_t min_index;  // of min occ
  int32_t min_occ;    // min in locality
  int32_t test_index; // for finding min
 
  divisor = static_cast<int32_t>(ceil((low_window + high_window) / textord_occupancy_threshold));
  if (low_window + high_window < line_count) {
    for (sum = 0, high_index = 0; high_index < low_window; high_index++) {
      sum += occupation[high_index];
    }
    for (low_index = 0; low_index < high_window; low_index++, high_index++) {
      sum += occupation[high_index];
    }
    min_occ = occupation[0];
    min_index = 0;
    for (test_index = 1; test_index < high_index; test_index++) {
      if (occupation[test_index] <= min_occ) {
        min_occ = occupation[test_index];
        min_index = test_index; // find min in region
      }
    }
    for (line_index = 0; line_index < low_window; line_index++) {
      thresholds[line_index] = (sum - min_occ) / divisor + min_occ;
    }
    // same out to end
    for (low_index = 0; high_index < line_count; low_index++, high_index++) {
      sum -= occupation[low_index];
      sum += occupation[high_index];
      if (occupation[high_index] <= min_occ) {
        // find min in region
        min_occ = occupation[high_index];
        min_index = high_index;
      }
      // lost min from region
      if (min_index <= low_index) {
        min_occ = occupation[low_index + 1];
        min_index = low_index + 1;
        for (test_index = low_index + 2; test_index <= high_index; test_index++) {
          if (occupation[test_index] <= min_occ) {
            min_occ = occupation[test_index];
            // find min in region
            min_index = test_index;
          }
        }
      }
      thresholds[line_index++] = (sum - min_occ) / divisor + min_occ;
    }
  } else {
    min_occ = occupation[0];
    min_index = 0;
    for (sum = 0, low_index = 0; low_index < line_count; low_index++) {
      if (occupation[low_index] < min_occ) {
        min_occ = occupation[low_index];
        min_index = low_index;
      }
      sum += occupation[low_index];
    }
    line_index = 0;
  }
  for (; line_index < line_count; line_index++) {
    thresholds[line_index] = (sum - min_occ) / divisor + min_occ;
  }
  // same out to end
}

◆ compute_page_skew()

void tesseract::compute_page_skew	(	TO_BLOCK_LIST *	blocks,
		float &	page_m,
		float &	page_err
	)

Definition at line 315 of file makerow.cpp.

  {
  int32_t row_count;             // total rows
  int32_t blob_count;            // total_blobs
  int32_t row_err;               // integer error
  int32_t row_index;             // of total
  TO_ROW *row;                   // current row
  TO_BLOCK_IT block_it = blocks; // iterator
 
  row_count = 0;
  blob_count = 0;
  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
    POLY_BLOCK *pb = block_it.data()->block->pdblk.poly_block();
    if (pb != nullptr && !pb->IsText()) {
      continue; // Pretend non-text blocks don't exist.
    }
    row_count += block_it.data()->get_rows()->length();
    // count up rows
    TO_ROW_IT row_it(block_it.data()->get_rows());
    for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
      blob_count += row_it.data()->blob_list()->length();
    }
  }
  if (row_count == 0) {
    page_m = 0.0f;
    page_err = 0.0f;
    return;
  }
  // of rows
  std::vector<float> gradients(blob_count);
  // of rows
  std::vector<float> errors(blob_count);
 
  row_index = 0;
  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
    POLY_BLOCK *pb = block_it.data()->block->pdblk.poly_block();
    if (pb != nullptr && !pb->IsText()) {
      continue; // Pretend non-text blocks don't exist.
    }
    TO_ROW_IT row_it(block_it.data()->get_rows());
    for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
      row = row_it.data();
      blob_count = row->blob_list()->length();
      row_err = static_cast<int32_t>(std::ceil(row->line_error()));
      if (row_err <= 0) {
        row_err = 1;
      }
      if (textord_biased_skewcalc) {
        blob_count /= row_err;
        for (blob_count /= row_err; blob_count > 0; blob_count--) {
          gradients[row_index] = row->line_m();
          errors[row_index] = row->line_error();
          row_index++;
        }
      } else if (blob_count >= textord_min_blobs_in_row) {
        // get gradient
        gradients[row_index] = row->line_m();
        errors[row_index] = row->line_error();
        row_index++;
      }
    }
  }
  if (row_index == 0) {
    // desperate
    for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
      POLY_BLOCK *pb = block_it.data()->block->pdblk.poly_block();
      if (pb != nullptr && !pb->IsText()) {
        continue; // Pretend non-text blocks don't exist.
      }
      TO_ROW_IT row_it(block_it.data()->get_rows());
      for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
        row = row_it.data();
        gradients[row_index] = row->line_m();
        errors[row_index] = row->line_error();
        row_index++;
      }
    }
  }
  row_count = row_index;
  row_index = static_cast<int32_t>(row_count * textord_skew_ile);
  gradients.resize(row_count);
  std::nth_element(gradients.begin(), gradients.begin() + row_index, gradients.end());
  page_m = gradients[row_index];
  row_index = static_cast<int32_t>(row_count * textord_skew_ile);
  errors.resize(row_count);
  std::nth_element(errors.begin(), errors.begin() + row_index, errors.end());
  page_err = errors[row_index];
}

◆ compute_pitch_sd()

float tesseract::compute_pitch_sd	(	TO_ROW *	row,
		STATS *	projection,
		int16_t	projection_left,
		int16_t	projection_right,
		float	space_size,
		float	initial_pitch,
		float &	sp_sd,
		int16_t &	mid_cuts,
		ICOORDELT_LIST *	row_cells,
		bool	testing_on,
		int16_t	start,
		int16_t	end
	)

Definition at line 1289 of file topitch.cpp.

  {
  int16_t occupation; // no of cells in word.
                      // blobs
  BLOBNBOX_IT blob_it = row->blob_list();
  BLOBNBOX_IT start_it;  // start of word
  BLOBNBOX_IT plot_it;   // for plotting
  int16_t blob_count;    // no of blobs
  TBOX blob_box;         // bounding box
  TBOX prev_box;         // of super blob
  int32_t prev_right;    // of word sync
  int scale_factor;      // on scores for big words
  int32_t sp_count;      // spaces
  FPSEGPT_LIST seg_list; // char cells
  FPSEGPT_IT seg_it;     // iterator
  int16_t segpos;        // position of segment
  int16_t cellpos;       // previous cell boundary
                         // iterator
  ICOORDELT_IT cell_it = row_cells;
  ICOORDELT *cell;     // new cell
  double sqsum;        // sum of squares
  double spsum;        // of spaces
  double sp_var;       // space error
  double word_sync;    // result for word
  int32_t total_count; // total blobs
 
  if ((pitsync_linear_version & 3) > 1) {
    word_sync = compute_pitch_sd2(row, projection, projection_left, projection_right, initial_pitch,
                                  occupation, mid_cuts, row_cells, testing_on, start, end);
    sp_sd = occupation;
    return word_sync;
  }
  mid_cuts = 0;
  cellpos = 0;
  total_count = 0;
  sqsum = 0;
  sp_count = 0;
  spsum = 0;
  prev_right = -1;
  if (blob_it.empty()) {
    return space_size * 10;
  }
#ifndef GRAPHICS_DISABLED
  if (testing_on && to_win != nullptr) {
    blob_box = blob_it.data()->bounding_box();
    projection->plot(to_win, projection_left, row->intercept(), 1.0f, -1.0f, ScrollView::CORAL);
  }
#endif
  start_it = blob_it;
  blob_count = 0;
  blob_box = box_next(&blob_it); // first blob
  blob_it.mark_cycle_pt();
  do {
    for (; blob_count > 0; blob_count--) {
      box_next(&start_it);
    }
    do {
      prev_box = blob_box;
      blob_count++;
      blob_box = box_next(&blob_it);
    } while (!blob_it.cycled_list() && blob_box.left() - prev_box.right() < space_size);
    plot_it = start_it;
    if (pitsync_linear_version & 3) {
      word_sync = check_pitch_sync2(&start_it, blob_count, static_cast<int16_t>(initial_pitch), 2,
                                    projection, projection_left, projection_right,
                                    row->xheight * textord_projection_scale, occupation, &seg_list,
                                    start, end);
    } else {
      word_sync = check_pitch_sync(&start_it, blob_count, static_cast<int16_t>(initial_pitch), 2,
                                   projection, &seg_list);
    }
    if (testing_on) {
      tprintf("Word ending at (%d,%d), len=%d, sync rating=%g, ", prev_box.right(), prev_box.top(),
              seg_list.length() - 1, word_sync);
      seg_it.set_to_list(&seg_list);
      for (seg_it.mark_cycle_pt(); !seg_it.cycled_list(); seg_it.forward()) {
        if (seg_it.data()->faked) {
          tprintf("(F)");
        }
        tprintf("%d, ", seg_it.data()->position());
        //                              tprintf("C=%g, s=%g, sq=%g\n",
        //                                      seg_it.data()->cost_function(),
        //                                      seg_it.data()->sum(),
        //                                      seg_it.data()->squares());
      }
      tprintf("\n");
    }
#ifndef GRAPHICS_DISABLED
    if (textord_show_fixed_cuts && blob_count > 0 && to_win != nullptr) {
      plot_fp_cells2(to_win, ScrollView::GOLDENROD, row, &seg_list);
    }
#endif
    seg_it.set_to_list(&seg_list);
    if (prev_right >= 0) {
      sp_var = seg_it.data()->position() - prev_right;
      sp_var -= floor(sp_var / initial_pitch + 0.5) * initial_pitch;
      sp_var *= sp_var;
      spsum += sp_var;
      sp_count++;
    }
    for (seg_it.mark_cycle_pt(); !seg_it.cycled_list(); seg_it.forward()) {
      segpos = seg_it.data()->position();
      if (cell_it.empty() || segpos > cellpos + initial_pitch / 2) {
        // big gap
        while (!cell_it.empty() && segpos > cellpos + initial_pitch * 3 / 2) {
          cell = new ICOORDELT(cellpos + static_cast<int16_t>(initial_pitch), 0);
          cell_it.add_after_then_move(cell);
          cellpos += static_cast<int16_t>(initial_pitch);
        }
        // make new one
        cell = new ICOORDELT(segpos, 0);
        cell_it.add_after_then_move(cell);
        cellpos = segpos;
      } else if (segpos > cellpos - initial_pitch / 2) {
        cell = cell_it.data();
        // average positions
        cell->set_x((cellpos + segpos) / 2);
        cellpos = cell->x();
      }
    }
    seg_it.move_to_last();
    prev_right = seg_it.data()->position();
    if (textord_pitch_scalebigwords) {
      scale_factor = (seg_list.length() - 2) / 2;
      if (scale_factor < 1) {
        scale_factor = 1;
      }
    } else {
      scale_factor = 1;
    }
    sqsum += word_sync * scale_factor;
    total_count += (seg_list.length() - 1) * scale_factor;
    seg_list.clear();
  } while (!blob_it.cycled_list());
  sp_sd = sp_count > 0 ? sqrt(spsum / sp_count) : 0;
  return total_count > 0 ? sqrt(sqsum / total_count) : space_size * 10;
}

◆ compute_pitch_sd2()

float tesseract::compute_pitch_sd2	(	TO_ROW *	row,
		STATS *	projection,
		int16_t	projection_left,
		int16_t	projection_right,
		float	initial_pitch,
		int16_t &	occupation,
		int16_t &	mid_cuts,
		ICOORDELT_LIST *	row_cells,
		bool	testing_on,
		int16_t	start,
		int16_t	end
	)

Definition at line 1446 of file topitch.cpp.

  {
  // blobs
  BLOBNBOX_IT blob_it = row->blob_list();
  BLOBNBOX_IT plot_it;
  int16_t blob_count;    // no of blobs
  TBOX blob_box;         // bounding box
  FPSEGPT_LIST seg_list; // char cells
  FPSEGPT_IT seg_it;     // iterator
  int16_t segpos;        // position of segment
                         // iterator
  ICOORDELT_IT cell_it = row_cells;
  ICOORDELT *cell;  // new cell
  double word_sync; // result for word
 
  mid_cuts = 0;
  if (blob_it.empty()) {
    occupation = 0;
    return initial_pitch * 10;
  }
#ifndef GRAPHICS_DISABLED
  if (testing_on && to_win != nullptr) {
    projection->plot(to_win, projection_left, row->intercept(), 1.0f, -1.0f, ScrollView::CORAL);
  }
#endif
  blob_count = 0;
  blob_it.mark_cycle_pt();
  do {
    // first blob
    blob_box = box_next(&blob_it);
    blob_count++;
  } while (!blob_it.cycled_list());
  plot_it = blob_it;
  word_sync = check_pitch_sync2(
      &blob_it, blob_count, static_cast<int16_t>(initial_pitch), 2, projection, projection_left,
      projection_right, row->xheight * textord_projection_scale, occupation, &seg_list, start, end);
  if (testing_on) {
    tprintf("Row ending at (%d,%d), len=%d, sync rating=%g, ", blob_box.right(), blob_box.top(),
            seg_list.length() - 1, word_sync);
    seg_it.set_to_list(&seg_list);
    for (seg_it.mark_cycle_pt(); !seg_it.cycled_list(); seg_it.forward()) {
      if (seg_it.data()->faked) {
        tprintf("(F)");
      }
      tprintf("%d, ", seg_it.data()->position());
      //                              tprintf("C=%g, s=%g, sq=%g\n",
      //                                      seg_it.data()->cost_function(),
      //                                      seg_it.data()->sum(),
      //                                      seg_it.data()->squares());
    }
    tprintf("\n");
  }
#ifndef GRAPHICS_DISABLED
  if (textord_show_fixed_cuts && blob_count > 0 && to_win != nullptr) {
    plot_fp_cells2(to_win, ScrollView::GOLDENROD, row, &seg_list);
  }
#endif
  seg_it.set_to_list(&seg_list);
  for (seg_it.mark_cycle_pt(); !seg_it.cycled_list(); seg_it.forward()) {
    segpos = seg_it.data()->position();
    // make new one
    cell = new ICOORDELT(segpos, 0);
    cell_it.add_after_then_move(cell);
    if (seg_it.at_last()) {
      mid_cuts = seg_it.data()->cheap_cuts();
    }
  }
  seg_list.clear();
  return occupation > 0 ? sqrt(word_sync / occupation) : initial_pitch * 10;
}

◆ compute_reject_threshold()

float tesseract::compute_reject_threshold ( WERD_CHOICE * word )

Definition at line 227 of file reject.cpp.

                                                  {
  float threshold;      // rejection threshold
  float bestgap = 0.0f; // biggest gap
  float gapstart;       // bottom of gap
 
  auto blob_count = word->length();
  std::vector<float> ratings;
  ratings.reserve(blob_count);
  for (unsigned i = 0; i < blob_count; ++i) {
    ratings.push_back(word->certainty(i));
  }
  std::sort(ratings.begin(), ratings.end());
  gapstart = ratings[0] - 1; // all reject if none better
  if (blob_count >= 3) {
    for (unsigned index = 0; index < blob_count - 1; index++) {
      if (ratings[index + 1] - ratings[index] > bestgap) {
        bestgap = ratings[index + 1] - ratings[index];
        // find biggest
        gapstart = ratings[index];
      }
    }
  }
  threshold = gapstart + bestgap / 2;
 
  return threshold;
}

◆ compute_row_descdrop()

int32_t tesseract::compute_row_descdrop	(	TO_ROW *	row,
		float	gradient,
		int	xheight_blob_count,
		STATS *	asc_heights
	)

Definition at line 1576 of file makerow.cpp.

                                                 {
  // Count how many potential ascenders are in this row.
  int i_min = asc_heights->min_bucket();
  if ((i_min / row->xheight) < textord_ascx_ratio_min) {
    i_min = static_cast<int>(floor(row->xheight * textord_ascx_ratio_min + 0.5));
  }
  int i_max = asc_heights->max_bucket();
  if ((i_max / row->xheight) > textord_ascx_ratio_max) {
    i_max = static_cast<int>(floor(row->xheight * textord_ascx_ratio_max));
  }
  int num_potential_asc = 0;
  for (int i = i_min; i <= i_max; ++i) {
    num_potential_asc += asc_heights->pile_count(i);
  }
  auto min_height = static_cast<int32_t>(floor(row->xheight * textord_descx_ratio_min + 0.5));
  auto max_height = static_cast<int32_t>(floor(row->xheight * textord_descx_ratio_max));
  float xcentre; // centre of blob
  float height;  // height of blob
  BLOBNBOX_IT blob_it = row->blob_list();
  BLOBNBOX *blob; // current blob
  STATS heights(min_height, max_height);
  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
    blob = blob_it.data();
    if (!blob->joined_to_prev()) {
      xcentre = (blob->bounding_box().left() + blob->bounding_box().right()) / 2.0f;
      height = (gradient * xcentre + row->parallel_c() - blob->bounding_box().bottom());
      if (height >= min_height && height <= max_height) {
        heights.add(static_cast<int>(floor(height + 0.5)), 1);
      }
    }
  }
  int blob_index = heights.mode();                 // find mode
  int blob_count = heights.pile_count(blob_index); // get count of mode
  float total_fraction = (textord_descheight_mode_fraction + textord_ascheight_mode_fraction);
  if (static_cast<float>(blob_count + num_potential_asc) < xheight_blob_count * total_fraction) {
    blob_count = 0;
  }
  int descdrop = blob_count > 0 ? -blob_index : 0;
  if (textord_debug_xheights) {
    tprintf("Descdrop: %d (potential ascenders %d, descenders %d)\n", descdrop, num_potential_asc,
            blob_count);
    heights.print();
  }
  return descdrop;
}

◆ compute_row_stats()

void tesseract::compute_row_stats	(	TO_BLOCK *	block,
		bool	testing_on
	)

Definition at line 1163 of file makerow.cpp.

  {
  int32_t row_index; // of median
  TO_ROW *row;       // current row
  TO_ROW *prev_row;  // previous row
  float iqr;         // inter quartile range
  TO_ROW_IT row_it = block->get_rows();
  // number of rows
  int16_t rowcount = row_it.length();
  // for choose nth
  std::vector<TO_ROW *> rows(rowcount);
  rowcount = 0;
  prev_row = nullptr;
  row_it.move_to_last(); // start at bottom
  do {
    row = row_it.data();
    if (prev_row != nullptr) {
      rows[rowcount++] = prev_row;
      prev_row->spacing = row->intercept() - prev_row->intercept();
      if (prev_row->spacing < 0.1 && prev_row->spacing > -0.1) {
        // Avoid small spacing values which give a small disp_quant_factor_.
        // That can cause large memory allocations with out-of-memory.
        prev_row->spacing = 0;
      }
      if (testing_on) {
        tprintf("Row at %g yields spacing of %g\n", row->intercept(), prev_row->spacing);
      }
    }
    prev_row = row;
    row_it.backward();
  } while (!row_it.at_last());
  block->key_row = prev_row;
  block->baseline_offset = std::fmod(prev_row->parallel_c(), block->line_spacing);
  if (testing_on) {
    tprintf("Blob based spacing=(%g,%g), offset=%g", block->line_size, block->line_spacing,
            block->baseline_offset);
  }
  if (rowcount > 0) {
    rows.resize(rowcount);
    row_index = rowcount * 3 / 4;
    std::nth_element(rows.begin(), rows.begin() + row_index, rows.end(), row_spacing_order);
    iqr = rows[row_index]->spacing;
    row_index = rowcount / 4;
    std::nth_element(rows.begin(), rows.begin() + row_index, rows.end(), row_spacing_order);
    iqr -= rows[row_index]->spacing;
    row_index = rowcount / 2;
    std::nth_element(rows.begin(), rows.begin() + row_index, rows.end(), row_spacing_order);
    block->key_row = rows[row_index];
    if (testing_on) {
      tprintf(" row based=%g(%g)", rows[row_index]->spacing, iqr);
    }
    if (rowcount > 2 && iqr < rows[row_index]->spacing * textord_linespace_iqrlimit) {
      if (!textord_new_initial_xheight) {
        if (rows[row_index]->spacing < block->line_spacing &&
            rows[row_index]->spacing > block->line_size) {
          // within range
          block->line_size = rows[row_index]->spacing;
        // spacing=size
        } else if (rows[row_index]->spacing > block->line_spacing) {
          block->line_size = block->line_spacing;
        }
        // too big so use max
      } else {
        if (rows[row_index]->spacing < block->line_spacing) {
          block->line_size = rows[row_index]->spacing;
        } else {
          block->line_size = block->line_spacing;
        }
        // too big so use max
      }
      if (block->line_size < textord_min_xheight) {
        block->line_size = (float)textord_min_xheight;
      }
      block->line_spacing = rows[row_index]->spacing;
      block->max_blob_size = block->line_spacing * textord_excess_blobsize;
    }
    block->baseline_offset = std::fmod(rows[row_index]->intercept(), block->line_spacing);
  }
  if (testing_on) {
    tprintf("\nEstimate line size=%g, spacing=%g, offset=%g\n", block->line_size,
            block->line_spacing, block->baseline_offset);
  }
}

◆ compute_rows_pitch()

bool tesseract::compute_rows_pitch	(	TO_BLOCK *	block,
		int32_t	block_index,
		bool	testing_on
	)

Definition at line 330 of file topitch.cpp.

  {
  int32_t maxwidth;   // of spaces
  TO_ROW *row;        // current row
  int32_t row_index;  // row number.
  float lower, upper; // cluster thresholds
  TO_ROW_IT row_it = block->get_rows();
 
  row_index = 1;
  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
    row = row_it.data();
    ASSERT_HOST(row->xheight > 0);
    row->compute_vertical_projection();
    maxwidth = static_cast<int32_t>(ceil(row->xheight * textord_words_maxspace));
    if (row_pitch_stats(row, maxwidth, testing_on) &&
        find_row_pitch(row, maxwidth, textord_dotmatrix_gap + 1, block, block_index, row_index,
                       testing_on)) {
      if (row->fixed_pitch == 0) {
        lower = row->pr_nonsp;
        upper = row->pr_space;
        row->space_size = upper;
        row->kern_size = lower;
      }
    } else {
      row->fixed_pitch = 0.0f; // insufficient data
      row->pitch_decision = PITCH_DUNNO;
    }
    row_index++;
  }
  return false;
}

◆ compute_xheight_from_modes()

int tesseract::compute_xheight_from_modes	(	STATS *	heights,
		STATS *	floating_heights,
		bool	cap_only,
		int	min_height,
		int	max_height,
		float *	xheight,
		float *	ascrise
	)

Definition at line 1480 of file makerow.cpp.

                                                                                               {
  int blob_index = heights->mode();                 // find mode
  int blob_count = heights->pile_count(blob_index); // get count of mode
  if (textord_debug_xheights) {
    tprintf("min_height=%d, max_height=%d, mode=%d, count=%d, total=%d\n", min_height, max_height,
            blob_index, blob_count, heights->get_total());
    heights->print();
    floating_heights->print();
  }
  if (blob_count == 0) {
    return 0;
  }
  int modes[MAX_HEIGHT_MODES]; // biggest piles
  bool in_best_pile = false;
  int prev_size = -INT32_MAX;
  int best_count = 0;
  int mode_count = compute_height_modes(heights, min_height, max_height, modes, MAX_HEIGHT_MODES);
  if (cap_only && mode_count > 1) {
    mode_count = 1;
  }
  int x;
  if (textord_debug_xheights) {
    tprintf("found %d modes: ", mode_count);
    for (x = 0; x < mode_count; x++) {
      tprintf("%d ", modes[x]);
    }
    tprintf("\n");
  }
 
  for (x = 0; x < mode_count - 1; x++) {
    if (modes[x] != prev_size + 1) {
      in_best_pile = false; // had empty height
    }
    int modes_x_count = heights->pile_count(modes[x]) - floating_heights->pile_count(modes[x]);
    if ((modes_x_count >= blob_count * textord_xheight_mode_fraction) &&
        (in_best_pile || modes_x_count > best_count)) {
      for (int asc = x + 1; asc < mode_count; asc++) {
        float ratio = static_cast<float>(modes[asc]) / static_cast<float>(modes[x]);
        if (textord_ascx_ratio_min < ratio && ratio < textord_ascx_ratio_max &&
            (heights->pile_count(modes[asc]) >= blob_count * textord_ascheight_mode_fraction)) {
          if (modes_x_count > best_count) {
            in_best_pile = true;
            best_count = modes_x_count;
          }
          if (textord_debug_xheights) {
            tprintf("X=%d, asc=%d, count=%d, ratio=%g\n", modes[x], modes[asc] - modes[x],
                    modes_x_count, ratio);
          }
          prev_size = modes[x];
          *xheight = static_cast<float>(modes[x]);
          *ascrise = static_cast<float>(modes[asc] - modes[x]);
        }
      }
    }
  }
  if (*xheight == 0) { // single mode
    // Remove counts of the "floating" blobs (the one whose height is too
    // small in relation to it's top end of the bounding box) from heights
    // before computing the single-mode xheight.
    // Restore the counts in heights after the mode is found, since
    // floating blobs might be useful for determining potential ascenders
    // in compute_row_descdrop().
    if (floating_heights->get_total() > 0) {
      for (x = min_height; x < max_height; ++x) {
        heights->add(x, -(floating_heights->pile_count(x)));
      }
      blob_index = heights->mode(); // find the modified mode
      for (x = min_height; x < max_height; ++x) {
        heights->add(x, floating_heights->pile_count(x));
      }
    }
    *xheight = static_cast<float>(blob_index);
    *ascrise = 0.0f;
    best_count = heights->pile_count(blob_index);
    if (textord_debug_xheights) {
      tprintf("Single mode xheight set to %g\n", *xheight);
    }
  } else if (textord_debug_xheights) {
    tprintf("Multi-mode xheight set to %g, asc=%g\n", *xheight, *ascrise);
  }
  return best_count;
}

◆ ComputeBlobCenter()

void tesseract::ComputeBlobCenter	(	TBLOB *	Blob,
		TPOINT *	BlobCenter
	)

Public Function Prototypes

◆ ComputeDirection()

void tesseract::ComputeDirection	(	MFEDGEPT *	Start,
		MFEDGEPT *	Finish,
		float	MinSlope,
		float	MaxSlope
	)

This routine computes the slope from Start to Finish and and then computes the approximate direction of the line segment from Start to Finish. The direction is quantized into 8 buckets: N, S, E, W, NE, NW, SE, SW Both the slope and the direction are then stored into the appropriate fields of the Start edge point. The direction is also stored into the PreviousDirection field of the Finish edge point.

Parameters

Start	starting point to compute direction from
Finish	finishing point to compute direction to
MinSlope	slope below which lines are horizontal
MaxSlope	slope above which lines are vertical

Definition at line 335 of file mfoutline.cpp.

                                                                                         {
  FVECTOR Delta;
 
  Delta.x = Finish->Point.x - Start->Point.x;
  Delta.y = Finish->Point.y - Start->Point.y;
  if (Delta.x == 0) {
    if (Delta.y < 0) {
      Start->Slope = -FLT_MAX;
      Start->Direction = south;
    } else {
      Start->Slope = FLT_MAX;
      Start->Direction = north;
    }
  } else {
    Start->Slope = Delta.y / Delta.x;
    if (Delta.x > 0) {
      if (Delta.y > 0) {
        if (Start->Slope > MinSlope) {
          if (Start->Slope < MaxSlope) {
            Start->Direction = northeast;
          } else {
            Start->Direction = north;
          }
        } else {
          Start->Direction = east;
        }
      } else if (Start->Slope < -MinSlope) {
        if (Start->Slope > -MaxSlope) {
          Start->Direction = southeast;
        } else {
          Start->Direction = south;
        }
      } else {
        Start->Direction = east;
      }
    } else if (Delta.y > 0) {
      if (Start->Slope < -MinSlope) {
        if (Start->Slope > -MaxSlope) {
          Start->Direction = northwest;
        } else {
          Start->Direction = north;
        }
      } else {
        Start->Direction = west;
      }
    } else if (Start->Slope > MinSlope) {
      if (Start->Slope < MaxSlope) {
        Start->Direction = southwest;
      } else {
        Start->Direction = south;
      }
    } else {
      Start->Direction = west;
    }
  }
  Finish->PreviousDirection = Start->Direction;
}

◆ ComputeDistance()

TESS_API float tesseract::ComputeDistance	(	int	k,
		PARAM_DESC *	dim,
		float	p1[],
		float	p2[]
	)

Definition at line 400 of file kdtree.cpp.

                                                                      {
  return std::sqrt(DistanceSquared(k, dim, p1, p2));
}

◆ contains()

template<class T >

bool tesseract::contains	(	const std::vector< T > &	data,
		const T &	value
	)

inline

Definition at line 39 of file helpers.h.

                                                               {
  return std::find(data.begin(), data.end(), value) != data.end();
}

◆ ConvertBlob()

LIST tesseract::ConvertBlob ( TBLOB * blob )

Convert a blob into a list of MFOUTLINEs (float-based microfeature format).

Definition at line 34 of file mfoutline.cpp.

                              {
  LIST outlines = NIL_LIST;
  return (blob == nullptr) ? NIL_LIST : ConvertOutlines(blob->outlines, outlines, outer);
}

◆ ConvertConfig()

void tesseract::ConvertConfig	(	BIT_VECTOR	Config,
		int	ConfigId,
		INT_CLASS_STRUCT *	Class
	)

This operation updates the config vectors of all protos in Class to indicate that the protos with 1's in Config belong to a new configuration identified by ConfigId. It is assumed that the length of the Config bit vector is equal to the number of protos in Class.

Parameters

Config	config to be added to class
ConfigId	id to be used for new config
Class	class to add new config to

Definition at line 430 of file intproto.cpp.

                                                                             {
  int ProtoId;
  INT_PROTO_STRUCT *Proto;
  int TotalLength;
 
  for (ProtoId = 0, TotalLength = 0; ProtoId < Class->NumProtos; ProtoId++) {
    if (test_bit(Config, ProtoId)) {
      Proto = ProtoForProtoId(Class, ProtoId);
      SET_BIT(Proto->Configs, ConfigId);
      TotalLength += Class->ProtoLengths[ProtoId];
    }
  }
  Class->ConfigLengths[ConfigId] = TotalLength;
} /* ConvertConfig */

◆ ConvertOutline()

MFOUTLINE tesseract::ConvertOutline ( TESSLINE * outline )

Convert a TESSLINE into the float-based MFOUTLINE micro-feature format.

Definition at line 41 of file mfoutline.cpp.

                                            {
  auto MFOutline = NIL_LIST;
 
  if (outline == nullptr || outline->loop == nullptr) {
    return MFOutline;
  }
 
  auto StartPoint = outline->loop;
  auto EdgePoint = StartPoint;
  do {
    auto NextPoint = EdgePoint->next;
 
    /* filter out duplicate points */
    if (EdgePoint->pos.x != NextPoint->pos.x || EdgePoint->pos.y != NextPoint->pos.y) {
      auto NewPoint = new MFEDGEPT;
      NewPoint->ClearMark();
      NewPoint->Hidden = EdgePoint->IsHidden();
      NewPoint->Point.x = EdgePoint->pos.x;
      NewPoint->Point.y = EdgePoint->pos.y;
      MFOutline = push(MFOutline, NewPoint);
    }
    EdgePoint = NextPoint;
  } while (EdgePoint != StartPoint);
 
  if (MFOutline != nullptr) {
    MakeOutlineCircular(MFOutline);
  }
  return MFOutline;
}

◆ ConvertOutlines()

LIST tesseract::ConvertOutlines	(	TESSLINE *	outline,
		LIST	mf_outlines,
		OUTLINETYPE	outline_type
	)

Convert a tree of outlines to a list of MFOUTLINEs (lists of MFEDGEPTs).

Parameters

outline	first outline to be converted
mf_outlines	list to add converted outlines to
outline_type	are the outlines outer or holes?

Definition at line 79 of file mfoutline.cpp.

                                                                                    {
  MFOUTLINE mf_outline;
 
  while (outline != nullptr) {
    mf_outline = ConvertOutline(outline);
    if (mf_outline != nullptr) {
      mf_outlines = push(mf_outlines, mf_outline);
    }
    outline = outline->next;
  }
  return mf_outlines;
}

◆ ConvertSegmentToPicoFeat()

void tesseract::ConvertSegmentToPicoFeat	(	FPOINT *	Start,
		FPOINT *	End,
		FEATURE_SET	FeatureSet
	)

This routine converts an entire segment of an outline into a set of pico features which are added to FeatureSet. The length of the segment is rounded to the nearest whole number of pico-features. The pico-features are spaced evenly over the entire segment. Results are placed in FeatureSet. Globals:

classify_pico_feature_length length of a single pico-feature
Parameters

Start starting point of pico-feature

End ending point of pico-feature

FeatureSet set to add pico-feature to

Definition at line 95 of file picofeat.cpp.

                                                                                  {
  float Angle;
  float Length;
  int NumFeatures;
  FPOINT Center;
  FPOINT Delta;
  int i;
 
  Angle = NormalizedAngleFrom(Start, End, 1.0);
  Length = DistanceBetween(*Start, *End);
  NumFeatures = static_cast<int>(floor(Length / classify_pico_feature_length + 0.5));
  if (NumFeatures < 1) {
    NumFeatures = 1;
  }
 
  /* compute vector for one pico feature */
  Delta.x = XDelta(*Start, *End) / NumFeatures;
  Delta.y = YDelta(*Start, *End) / NumFeatures;
 
  /* compute position of first pico feature */
  Center.x = Start->x + Delta.x / 2.0;
  Center.y = Start->y + Delta.y / 2.0;
 
  /* compute each pico feature in segment and add to feature set */
  for (i = 0; i < NumFeatures; i++) {
    auto Feature = new FEATURE_STRUCT(&PicoFeatDesc);
    Feature->Params[PicoFeatDir] = Angle;
    Feature->Params[PicoFeatX] = Center.x;
    Feature->Params[PicoFeatY] = Center.y;
    AddFeature(FeatureSet, Feature);
 
    Center.x += Delta.x;
    Center.y += Delta.y;
  }
} /* ConvertSegmentToPicoFeat */

◆ ConvertToMicroFeatures()

MICROFEATURES tesseract::ConvertToMicroFeatures	(	MFOUTLINE	Outline,
		MICROFEATURES	MicroFeatures
	)

Convert Outline to MicroFeatures

Parameters

Outline	outline to extract micro-features from
MicroFeatures	list of micro-features to add to

Returns: List of micro-features with new features added to front.

Note: Globals: none

Definition at line 91 of file mfx.cpp.

                                                                                     {
  MFOUTLINE Current;
  MFOUTLINE Last;
  MFOUTLINE First;
 
  if (DegenerateOutline(Outline)) {
    return (MicroFeatures);
  }
 
  First = NextExtremity(Outline);
  Last = First;
  do {
    Current = NextExtremity(Last);
    if (!PointAt(Current)->Hidden) {
      auto NewFeature = ExtractMicroFeature(Last, Current);
      MicroFeatures.push_front(NewFeature);
    }
    Last = Current;
  } while (Last != First);
 
  return MicroFeatures;
} /* ConvertToMicroFeatures */

◆ ConvertToOutlineFeatures()

void tesseract::ConvertToOutlineFeatures	(	MFOUTLINE	Outline,
		FEATURE_SET	FeatureSet
	)

This routine steps converts each section in the specified outline to a feature described by its x,y position, length and angle. Results are returned in FeatureSet.

Parameters

Outline	outline to extract outline-features from
FeatureSet	set of features to add outline-features to

Definition at line 97 of file outfeat.cpp.

                                                                         {
  MFOUTLINE Next;
  MFOUTLINE First;
  FPOINT FeatureStart;
  FPOINT FeatureEnd;
 
  if (DegenerateOutline(Outline)) {
    return;
  }
 
  First = Outline;
  Next = First;
  do {
    FeatureStart = PointAt(Next)->Point;
    Next = NextPointAfter(Next);
 
    /* note that an edge is hidden if the ending point of the edge is
   marked as hidden.  This situation happens because the order of
   the outlines is reversed when they are converted from the old
   format.  In the old format, a hidden edge is marked by the
   starting point for that edge. */
    if (!PointAt(Next)->Hidden) {
      FeatureEnd = PointAt(Next)->Point;
      AddOutlineFeatureToSet(&FeatureStart, &FeatureEnd, FeatureSet);
    }
  } while (Next != First);
} /* ConvertToOutlineFeatures */

◆ ConvertToPicoFeatures2()

void tesseract::ConvertToPicoFeatures2	(	MFOUTLINE	Outline,
		FEATURE_SET	FeatureSet
	)

This routine steps through the specified outline and cuts it up into pieces of equal length. These pieces become the desired pico-features. Each segment in the outline is converted into an integral number of pico-features. Results are returned in FeatureSet.

Globals:

classify_pico_feature_length length of features to be extracted
Parameters

Outline outline to extract micro-features from

FeatureSet set of features to add pico-features to

Definition at line 144 of file picofeat.cpp.

                                                                       {
  MFOUTLINE Next;
  MFOUTLINE First;
  MFOUTLINE Current;
 
  if (DegenerateOutline(Outline)) {
    return;
  }
 
  First = Outline;
  Current = First;
  Next = NextPointAfter(Current);
  do {
    /* note that an edge is hidden if the ending point of the edge is
   marked as hidden.  This situation happens because the order of
   the outlines is reversed when they are converted from the old
   format.  In the old format, a hidden edge is marked by the
   starting point for that edge. */
    if (!(PointAt(Next)->Hidden)) {
      ConvertSegmentToPicoFeat(&(PointAt(Current)->Point), &(PointAt(Next)->Point), FeatureSet);
    }
 
    Current = Next;
    Next = NextPointAfter(Current);
  } while (Current != First);
 
} /* ConvertToPicoFeatures2 */

◆ CopyVector()

void tesseract::CopyVector	(	unsigned	n,
		const TFloat *	src,
		TFloat *	dest
	)

inline

Definition at line 210 of file functions.h.

                                                                    {
  memcpy(dest, src, n * sizeof(dest[0]));
}

◆ correct_row_xheight()

void tesseract::correct_row_xheight	(	TO_ROW *	row,
		float	xheight,
		float	ascrise,
		float	descdrop
	)

Definition at line 1690 of file makerow.cpp.

                                                                                    {
  ROW_CATEGORY row_category = get_row_category(row);
  if (textord_debug_xheights) {
    tprintf(
        "correcting row xheight: row->xheight %.4f"
        ", row->acrise %.4f row->descdrop %.4f\n",
        row->xheight, row->ascrise, row->descdrop);
  }
  bool normal_xheight = within_error_margin(row->xheight, xheight, textord_xheight_error_margin);
  bool cap_xheight =
      within_error_margin(row->xheight, xheight + ascrise, textord_xheight_error_margin);
  // Use the average xheight/ascrise for the following cases:
  // -- the xheight of the row could not be determined at all
  // -- the row has descenders (e.g. "many groups", "ISBN 12345 p.3")
  //    and its xheight is close to either cap height or average xheight
  // -- the row does not have ascenders or descenders, but its xheight
  //    is close to the average block xheight (e.g. row with "www.mmm.com")
  if (row_category == ROW_ASCENDERS_FOUND) {
    if (row->descdrop >= 0) {
      row->descdrop = row->xheight * (descdrop / xheight);
    }
  } else if (row_category == ROW_INVALID ||
             (row_category == ROW_DESCENDERS_FOUND && (normal_xheight || cap_xheight)) ||
             (row_category == ROW_UNKNOWN && normal_xheight)) {
    if (textord_debug_xheights) {
      tprintf("using average xheight\n");
    }
    row->xheight = xheight;
    row->ascrise = ascrise;
    row->descdrop = descdrop;
  } else if (row_category == ROW_DESCENDERS_FOUND) {
    // Assume this is a row with mostly lowercase letters and it's xheight
    // is computed correctly (unfortunately there is no way to distinguish
    // this from the case when descenders are found, but the most common
    // height is capheight).
    if (textord_debug_xheights) {
      tprintf("lowercase, corrected ascrise\n");
    }
    row->ascrise = row->xheight * (ascrise / xheight);
  } else if (row_category == ROW_UNKNOWN) {
    // Otherwise assume this row is an all-caps or small-caps row
    // and adjust xheight and ascrise of the row.
 
    row->all_caps = true;
    if (cap_xheight) { // regular all caps
      if (textord_debug_xheights) {
        tprintf("all caps\n");
      }
      row->xheight = xheight;
      row->ascrise = ascrise;
      row->descdrop = descdrop;
    } else { // small caps or caps with an odd xheight
      if (textord_debug_xheights) {
        if (row->xheight < xheight + ascrise && row->xheight > xheight) {
          tprintf("small caps\n");
        } else {
          tprintf("all caps with irregular xheight\n");
        }
      }
      row->ascrise = row->xheight * (ascrise / (xheight + ascrise));
      row->xheight -= row->ascrise;
      row->descdrop = row->xheight * (descdrop / xheight);
    }
  }
  if (textord_debug_xheights) {
    tprintf(
        "corrected row->xheight = %.4f, row->acrise = %.4f, row->descdrop"
        " = %.4f\n",
        row->xheight, row->ascrise, row->descdrop);
  }
}

◆ count_block_votes()

void tesseract::count_block_votes	(	TO_BLOCK *	block,
		int32_t &	def_fixed,
		int32_t &	def_prop,
		int32_t &	maybe_fixed,
		int32_t &	maybe_prop,
		int32_t &	corr_fixed,
		int32_t &	corr_prop,
		int32_t &	dunno
	)

Definition at line 606 of file topitch.cpp.

                                        {
  TO_ROW *row; // current row
  TO_ROW_IT row_it = block->get_rows();
 
  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
    row = row_it.data();
    switch (row->pitch_decision) {
      case PITCH_DUNNO:
        dunno++;
        break;
      case PITCH_DEF_PROP:
        def_prop++;
        break;
      case PITCH_MAYBE_PROP:
        maybe_prop++;
        break;
      case PITCH_DEF_FIXED:
        def_fixed++;
        break;
      case PITCH_MAYBE_FIXED:
        maybe_fixed++;
        break;
      case PITCH_CORR_PROP:
        corr_prop++;
        break;
      case PITCH_CORR_FIXED:
        corr_fixed++;
        break;
    }
  }
}

◆ count_pitch_stats()

bool tesseract::count_pitch_stats	(	TO_ROW *	row,
		STATS *	gap_stats,
		STATS *	pitch_stats,
		float	initial_pitch,
		float	min_space,
		bool	ignore_outsize,
		bool	split_outsize,
		int32_t	dm_gap
	)

Definition at line 1008 of file topitch.cpp.

  {
  bool prev_valid; // not word broken
  BLOBNBOX *blob;  // current blob
                   // blobs
  BLOBNBOX_IT blob_it = row->blob_list();
  int32_t prev_right;  // end of prev blob
  int32_t prev_centre; // centre of previous blob
  int32_t x_centre;    // centre of this blob
  int32_t blob_width;  // width of blob
  int32_t width_units; // no of widths in blob
  float width;         // blob width
  TBOX blob_box;       // bounding box
  TBOX joined_box;     // of super blob
 
  gap_stats->clear();
  pitch_stats->clear();
  if (blob_it.empty()) {
    return false;
  }
  prev_valid = false;
  prev_centre = 0;
  prev_right = 0; // stop compiler warning
  joined_box = blob_it.data()->bounding_box();
  do {
    blob_it.forward();
    blob = blob_it.data();
    if (!blob->joined_to_prev()) {
      blob_box = blob->bounding_box();
      if ((blob_box.left() - joined_box.right() < dm_gap && !blob_it.at_first()) ||
          blob->cblob() == nullptr) {
        joined_box += blob_box; // merge blobs
      } else {
        blob_width = joined_box.width();
        if (split_outsize) {
          width_units =
              static_cast<int32_t>(floor(static_cast<float>(blob_width) / initial_pitch + 0.5));
          if (width_units < 1) {
            width_units = 1;
          }
          width_units--;
        } else if (ignore_outsize) {
          width = static_cast<float>(blob_width) / initial_pitch;
          width_units =
              width < 1 + words_default_fixed_limit && width > 1 - words_default_fixed_limit ? 0
                                                                                             : -1;
        } else {
          width_units = 0; // everything in
        }
        x_centre = static_cast<int32_t>(joined_box.left() +
                                        (blob_width - width_units * initial_pitch) / 2);
        if (prev_valid && width_units >= 0) {
          //                                              if (width_units>0)
          //                                              {
          //                                                      tprintf("wu=%d,
          //                                                      width=%d,
          //                                                      xc=%d, adding
          //                                                      %d\n",
          //                                                              width_units,blob_width,x_centre,x_centre-prev_centre);
          //                                              }
          gap_stats->add(joined_box.left() - prev_right, 1);
          pitch_stats->add(x_centre - prev_centre, 1);
        }
        prev_centre = static_cast<int32_t>(x_centre + width_units * initial_pitch);
        prev_right = joined_box.right();
        prev_valid = blob_box.left() - joined_box.right() < min_space;
        prev_valid = prev_valid && width_units >= 0;
        joined_box = blob_box;
      }
    }
  } while (!blob_it.at_first());
  return gap_stats->get_total() >= 3;
}

◆ countof()

template<typename T , size_t N>

constexpr size_t tesseract::countof ( T const(&)[N] )

constexprnoexcept

Definition at line 34 of file serialis.h.

                                                  {
  return N;
}

◆ create_fx_win()

void tesseract::create_fx_win ( )

Definition at line 50 of file drawfx.cpp.

                     { // make features win
  fx_win = new ScrollView(FXDEMOWIN, FXDEMOXPOS, FXDEMOYPOS, FXDEMOXSIZE, FXDEMOYSIZE,
                          WERDWIDTH * 2, BLN_MAX * 2, true);
}

◆ create_fxdebug_win()

void tesseract::create_fxdebug_win ( )

Definition at line 77 of file drawfx.cpp.

77 { // make gradients win

78}

◆ create_to_win()

ScrollView * tesseract::create_to_win ( ICOORD page_tr )

Definition at line 47 of file drawtord.cpp.

                                          {
  if (to_win != nullptr) {
    return to_win;
  }
  to_win = new ScrollView(TO_WIN_NAME, TO_WIN_XPOS, TO_WIN_YPOS, page_tr.x() + 1, page_tr.y() + 1,
                          page_tr.x(), page_tr.y(), true);
  return to_win;
}

◆ create_todebug_win()

void tesseract::create_todebug_win ( )

◆ CreateFeatureSpaceWindow()

TESS_API ScrollView * tesseract::CreateFeatureSpaceWindow	(	const char *	name,
		int	xpos,
		int	ypos
	)

Creates a window of the appropriate size for displaying elements in feature space.

Definition at line 1622 of file intproto.cpp.

                                                                           {
  return new ScrollView(name, xpos, ypos, 520, 520, 260, 260, true);
}

◆ crotate_cblob()

C_BLOB * tesseract::crotate_cblob	(	C_BLOB *	blob,
		FCOORD	rotation
	)

Definition at line 614 of file blobbox.cpp.

  {
  C_OUTLINE_LIST out_list; // output outlines
                           // input outlines
  C_OUTLINE_IT in_it = blob->out_list();
  // output outlines
  C_OUTLINE_IT out_it = &out_list;
 
  for (in_it.mark_cycle_pt(); !in_it.cycled_list(); in_it.forward()) {
    out_it.add_after_then_move(new C_OUTLINE(in_it.data(), rotation));
  }
  return new C_BLOB(&out_list);
}

◆ CrownCompatible()

bool tesseract::CrownCompatible	(	const std::vector< RowScratchRegisters > *	rows,
		int	a,
		int	b,
		const ParagraphModel *	model
	)

Definition at line 1349 of file paragraphs.cpp.

                                                  {
  if (model != kCrownRight && model != kCrownLeft) {
    tprintf("CrownCompatible() should only be called with crown models!\n");
    return false;
  }
  auto &row_a = (*rows)[a];
  auto &row_b = (*rows)[b];
  if (model == kCrownRight) {
    return NearlyEqual(row_a.rindent_ + row_a.rmargin_, row_b.rindent_ + row_b.rmargin_,
                       Epsilon(row_a.ri_->average_interword_space));
  }
  return NearlyEqual(row_a.lindent_ + row_a.lmargin_, row_b.lindent_ + row_b.lmargin_,
                     Epsilon(row_a.ri_->average_interword_space));
}

◆ DECLARE_INT_PARAM_FLAG()

TESS_COMMON_TRAINING_API tesseract::DECLARE_INT_PARAM_FLAG ( debug_level )

◆ DECLARE_STRING_PARAM_FLAG() [1/9]

TESS_COMMON_TRAINING_API tesseract::DECLARE_STRING_PARAM_FLAG ( D )

◆ DECLARE_STRING_PARAM_FLAG() [2/9]

TESS_COMMON_TRAINING_API tesseract::DECLARE_STRING_PARAM_FLAG ( F )

◆ DECLARE_STRING_PARAM_FLAG() [3/9]

TESS_COMMON_TRAINING_API tesseract::DECLARE_STRING_PARAM_FLAG ( fontconfig_tmpdir )

◆ DECLARE_STRING_PARAM_FLAG() [4/9]

TESS_COMMON_TRAINING_API tesseract::DECLARE_STRING_PARAM_FLAG ( fonts_dir )

◆ DECLARE_STRING_PARAM_FLAG() [5/9]

TESS_COMMON_TRAINING_API tesseract::DECLARE_STRING_PARAM_FLAG ( O )

◆ DECLARE_STRING_PARAM_FLAG() [6/9]

TESS_COMMON_TRAINING_API tesseract::DECLARE_STRING_PARAM_FLAG ( output_trainer )

◆ DECLARE_STRING_PARAM_FLAG() [7/9]

TESS_COMMON_TRAINING_API tesseract::DECLARE_STRING_PARAM_FLAG ( test_ch )

◆ DECLARE_STRING_PARAM_FLAG() [8/9]

TESS_COMMON_TRAINING_API tesseract::DECLARE_STRING_PARAM_FLAG ( U )

◆ DECLARE_STRING_PARAM_FLAG() [9/9]

TESS_COMMON_TRAINING_API tesseract::DECLARE_STRING_PARAM_FLAG ( X )

◆ DefineFeature() [1/3]

MicroFeatureParams CharNormParams EndParamDesc EndParamDesc tesseract::DefineFeature	(	GeoFeatDesc	,
		3	,
		0	,
		kGeoFeatureType	,
		GeoFeatParams
	)

◆ DefineFeature() [2/3]

MicroFeatureParams CharNormParams EndParamDesc tesseract::DefineFeature	(	IntFeatDesc	,
		2	,
		1	,
		kIntFeatureType	,
		IntFeatParams
	)

◆ DefineFeature() [3/3]

EndParamDesc tesseract::DefineFeature	(	PicoFeatDesc	,
		2	,
		1	,
		"pf"	,
		PicoFeatParams
	)

◆ DefineParam() [1/6]

EndParamDesc tesseract::DefineParam	(	0	,
		0	,
		-0.	25,
		0.	75
	)

◆ DefineParam() [2/6]

MicroFeatureParams tesseract::DefineParam	(	0	,
		0	,
		0.	0,
		1.	0
	)

◆ DefineParam() [3/6]

MicroFeatureParams CharNormParams EndParamDesc tesseract::DefineParam	(	0	,
		0	,
		0.	0,
		255.	0
	)

◆ DefineParam() [4/6]

tesseract::DefineParam	(	0	,
		1	,
		-0.	5,
		0.	5
	)

◆ DefineParam() [5/6]

MicroFeatureParams tesseract::DefineParam	(	0	,
		1	,
		0.	0,
		1.	0
	)

◆ DefineParam() [6/6]

EndParamDesc tesseract::DefineParam	(	1	,
		0	,
		0.	0,
		1.	0
	)

◆ DegradeImage()

Image tesseract::DegradeImage	(	Image	input,
		int	exposure,
		TRand *	randomizer,
		float *	rotation
	)

Definition at line 89 of file degradeimage.cpp.

                                                                                  {
  Image pix = pixConvertTo8(input, false);
  input.destroy();
  input = pix;
  int width = pixGetWidth(input);
  int height = pixGetHeight(input);
 
  if (exposure >= 2) {
    // An erosion simulates the spreading darkening of a dark copy.
    // This is backwards to binary morphology,
    // see http://www.leptonica.com/grayscale-morphology.html
    pix = input;
    input = pixErodeGray(pix, 3, 3);
    pix.destroy();
  }
  // A convolution is essential to any mode as no scanner produces an
  // image as sharp as the electronic image.
  pix = pixBlockconv(input, 1, 1);
  input.destroy();
  // A small random rotation helps to make the edges jaggy in a realistic way.
  if (rotation != nullptr) {
    float radians_clockwise = 0.0f;
    if (*rotation) {
      radians_clockwise = *rotation;
    } else if (randomizer != nullptr) {
      radians_clockwise = randomizer->SignedRand(kRotationRange);
    }
 
    input = pixRotate(pix, radians_clockwise, L_ROTATE_AREA_MAP, L_BRING_IN_WHITE, 0, 0);
    // Rotate the boxes to match.
    *rotation = radians_clockwise;
    pix.destroy();
  } else {
    input = pix;
  }
 
  if (exposure >= 3 || exposure == 1) {
    // Erosion after the convolution is not as heavy as before, so it is
    // good for level 1 and in addition as a level 3.
    // This is backwards to binary morphology,
    // see http://www.leptonica.com/grayscale-morphology.html
    pix = input;
    input = pixErodeGray(pix, 3, 3);
    pix.destroy();
  }
  // The convolution really needed to be 2x2 to be realistic enough, but
  // we only have 3x3, so we have to bias the image darker or lose thin
  // strokes.
  int erosion_offset = 0;
  // For light and 0 exposure, there is no dilation, so compensate for the
  // convolution with a big darkening bias which is undone for lighter
  // exposures.
  if (exposure <= 0) {
    erosion_offset = -3 * kExposureFactor;
  }
  // Add in a general offset of the greyscales for the exposure level so
  // a threshold of 128 gives a reasonable binary result.
  erosion_offset -= exposure * kExposureFactor;
  // Add a gradual fade over the page and a small amount of salt and pepper
  // noise to simulate noise in the sensor/paper fibres and varying
  // illumination.
  l_uint32 *data = pixGetData(input);
  for (int y = 0; y < height; ++y) {
    for (int x = 0; x < width; ++x) {
      int pixel = GET_DATA_BYTE(data, x);
      if (randomizer != nullptr) {
        pixel += randomizer->IntRand() % (kSaltnPepper * 2 + 1) - kSaltnPepper;
      }
      if (height + width > kMinRampSize) {
        pixel -= (2 * x + y) * 32 / (height + width);
      }
      pixel += erosion_offset;
      if (pixel < 0) {
        pixel = 0;
      }
      if (pixel > 255) {
        pixel = 255;
      }
      SET_DATA_BYTE(data, x, pixel);
    }
    data += pixGetWpl(input);
  }
  return input;
}

◆ delete_d()

LIST tesseract::delete_d	(	LIST	list,
		void *	key,
		int_compare	is_equal
	)

Definition at line 88 of file oldlist.cpp.

                                                          {
  LIST result = NIL_LIST;
  LIST last_one = NIL_LIST;
 
  if (is_equal == nullptr) {
    is_equal = is_same;
  }
 
  while (list != NIL_LIST) {
    if (!(*is_equal)(list->first_node(), key)) {
      if (last_one == NIL_LIST) {
        last_one = list;
        list = list->list_rest();
        result = last_one;
        set_rest(last_one, NIL_LIST);
      } else {
        set_rest(last_one, list);
        last_one = list;
        list = list->list_rest();
        set_rest(last_one, NIL_LIST);
      }
    } else {
      list = pop(list);
    }
  }
  return (result);
}

◆ delete_non_dropout_rows()

void tesseract::delete_non_dropout_rows	(	TO_BLOCK *	block,
		float	gradient,
		FCOORD	rotation,
		int32_t	block_edge,
		bool	testing_on
	)

delete_non_dropout_rows

Compute the linespacing and offset.

Definition at line 612 of file makerow.cpp.

  {
  TBOX block_box; // deskewed block
  int32_t max_y;  // in block
  int32_t min_y;
  int32_t line_index; // of scan line
  int32_t line_count; // no of scan lines
  int32_t distance;   // to drop-out
  int32_t xleft;      // of block
  int32_t ybottom;    // of block
  TO_ROW *row;        // current row
  TO_ROW_IT row_it = block->get_rows();
  BLOBNBOX_IT blob_it = &block->blobs;
 
  if (row_it.empty()) {
    return; // empty block
  }
  block_box = deskew_block_coords(block, gradient);
  xleft = block->block->pdblk.bounding_box().left();
  ybottom = block->block->pdblk.bounding_box().bottom();
  min_y = block_box.bottom() - 1;
  max_y = block_box.top() + 1;
  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
    line_index = static_cast<int32_t>(std::floor(row_it.data()->intercept()));
    if (line_index <= min_y) {
      min_y = line_index - 1;
    }
    if (line_index >= max_y) {
      max_y = line_index + 1;
    }
  }
  line_count = max_y - min_y + 1;
  if (line_count <= 0) {
    return; // empty block
  }
  // change in occupation
  std::vector<int32_t> deltas(line_count);
  // of pixel coords
  std::vector<int32_t> occupation(line_count);
 
  compute_line_occupation(block, gradient, min_y, max_y, &occupation[0], &deltas[0]);
  compute_occupation_threshold(
      static_cast<int32_t>(ceil(block->line_spacing * (tesseract::CCStruct::kDescenderFraction +
                                                       tesseract::CCStruct::kAscenderFraction))),
      static_cast<int32_t>(ceil(block->line_spacing * (tesseract::CCStruct::kXHeightFraction +
                                                       tesseract::CCStruct::kAscenderFraction))),
      max_y - min_y + 1, &occupation[0], &deltas[0]);
#ifndef GRAPHICS_DISABLED
  if (testing_on) {
    draw_occupation(xleft, ybottom, min_y, max_y, &occupation[0], &deltas[0]);
  }
#endif
  compute_dropout_distances(&occupation[0], &deltas[0], line_count);
  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
    row = row_it.data();
    line_index = static_cast<int32_t>(std::floor(row->intercept()));
    distance = deltas[line_index - min_y];
    if (find_best_dropout_row(row, distance, block->line_spacing / 2, line_index, &row_it,
                              testing_on)) {
#ifndef GRAPHICS_DISABLED
      if (testing_on) {
        plot_parallel_row(row, gradient, block_edge, ScrollView::WHITE, rotation);
      }
#endif
      blob_it.add_list_after(row_it.data()->blob_list());
      delete row_it.extract(); // too far away
    }
  }
  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
    blob_it.add_list_after(row_it.data()->blob_list());
  }
}

◆ DeleteObject()

template<typename T >

void tesseract::DeleteObject ( T * object )

Definition at line 156 of file tablefind.cpp.

                             {
  delete object;
}

◆ DeSerialize() [1/2]

template<typename T >

bool tesseract::DeSerialize	(	bool	swap,
		FILE *	fp,
		std::vector< T > &	data
	)

Definition at line 205 of file helpers.h.

                                                          {
  uint32_t size;
  if (fread(&size, sizeof(size), 1, fp) != 1) {
    return false;
  }
  if (swap) {
    Reverse32(&size);
  }
  // Arbitrarily limit the number of elements to protect against bad data.
  assert(size <= UINT16_MAX);
  if (size > UINT16_MAX) {
    return false;
  }
  // TODO: optimize.
  data.resize(size);
  if (size > 0) {
    if (fread(&data[0], sizeof(T), size, fp) != size) {
      return false;
    }
    if (swap) {
      for (uint32_t i = 0; i < size; ++i) {
        ReverseN(&data[i], sizeof(T));
      }
    }
  }
  return true;
}

◆ DeSerialize() [2/2]

template<typename T >

bool tesseract::DeSerialize	(	FILE *	fp,
		T *	data,
		size_t	n = `1`
	)

Definition at line 49 of file serialis.h.

                                                  {
  return fread(data, sizeof(T), n, fp) == n;
}

◆ deskew_block_coords()

TBOX tesseract::deskew_block_coords	(	TO_BLOCK *	block,
		float	gradient
	)

Definition at line 765 of file makerow.cpp.

  {
  TBOX result;     // block bounds
  TBOX blob_box;   // of block
  FCOORD rotation; // deskew vector
  float length;    // of gradient vector
  TO_ROW_IT row_it = block->get_rows();
  TO_ROW *row;         // current row
  BLOBNBOX *blob;      // current blob
  BLOBNBOX_IT blob_it; // iterator
 
  length = std::sqrt(gradient * gradient + 1);
  rotation = FCOORD(1 / length, -gradient / length);
  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
    row = row_it.data();
    blob_it.set_to_list(row->blob_list());
    for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
      blob = blob_it.data();
      blob_box = blob->bounding_box();
      blob_box.rotate(rotation); // de-skew it
      result += blob_box;
    }
  }
  return result;
}

◆ destroy()

TESS_API LIST tesseract::destroy ( LIST list )

Definition at line 121 of file oldlist.cpp.

                        {
  LIST next;
 
  while (list != NIL_LIST) {
    next = list->list_rest();
    delete list;
    list = next;
  }
  return (NIL_LIST);
}

◆ destroy_nodes()

void tesseract::destroy_nodes	(	LIST	list,
		void_dest	destructor
	)

Definition at line 137 of file oldlist.cpp.

                                                    {
  ASSERT_HOST(destructor != nullptr);
 
  while (list != NIL_LIST) {
    if (list->first_node() != nullptr) {
      (*destructor)(list->first_node());
    }
    list = pop(list);
  }
}

◆ DetectParagraphs() [1/2]

TESS_API void tesseract::DetectParagraphs	(	int	debug_level,
		bool	after_text_recognition,
		const MutableIterator *	block_start,
		std::vector< ParagraphModel * > *	models
	)

Definition at line 2562 of file paragraphs.cpp.

                                                                                               {
  // Clear out any preconceived notions.
  if (block_start->Empty(RIL_TEXTLINE)) {
    return;
  }
  BLOCK *block = block_start->PageResIt()->block()->block;
  block->para_list()->clear();
  bool is_image_block = block->pdblk.poly_block() && !block->pdblk.poly_block()->IsText();
 
  // Convert the Tesseract structures to RowInfos
  // for the paragraph detection algorithm.
  MutableIterator row(*block_start);
  if (row.Empty(RIL_TEXTLINE)) {
    return; // end of input already.
  }
 
  std::vector<RowInfo> row_infos;
  do {
    if (!row.PageResIt()->row()) {
      continue; // empty row.
    }
    row.PageResIt()->row()->row->set_para(nullptr);
    row_infos.emplace_back();
    RowInfo &ri = row_infos.back();
    InitializeRowInfo(after_text_recognition, row, &ri);
  } while (!row.IsAtFinalElement(RIL_BLOCK, RIL_TEXTLINE) && row.Next(RIL_TEXTLINE));
 
  // If we're called before text recognition, we might not have
  // tight block bounding boxes, so trim by the minimum on each side.
  if (!row_infos.empty()) {
    int min_lmargin = row_infos[0].pix_ldistance;
    int min_rmargin = row_infos[0].pix_rdistance;
    for (unsigned i = 1; i < row_infos.size(); i++) {
      if (row_infos[i].pix_ldistance < min_lmargin) {
        min_lmargin = row_infos[i].pix_ldistance;
      }
      if (row_infos[i].pix_rdistance < min_rmargin) {
        min_rmargin = row_infos[i].pix_rdistance;
      }
    }
    if (min_lmargin > 0 || min_rmargin > 0) {
      for (auto &row_info : row_infos) {
        row_info.pix_ldistance -= min_lmargin;
        row_info.pix_rdistance -= min_rmargin;
      }
    }
  }
 
  // Run the paragraph detection algorithm.
  std::vector<PARA *> row_owners;
  std::vector<PARA *> the_paragraphs;
  if (!is_image_block) {
    DetectParagraphs(debug_level, &row_infos, &row_owners, block->para_list(), models);
  } else {
    row_owners.resize(row_infos.size());
    CanonicalizeDetectionResults(&row_owners, block->para_list());
  }
 
  // Now stitch in the row_owners into the rows.
  row = *block_start;
  for (auto &row_owner : row_owners) {
    while (!row.PageResIt()->row()) {
      row.Next(RIL_TEXTLINE);
    }
    row.PageResIt()->row()->row->set_para(row_owner);
    row.Next(RIL_TEXTLINE);
  }
}

◆ DetectParagraphs() [2/2]

TESS_API void tesseract::DetectParagraphs	(	int	debug_level,
		std::vector< RowInfo > *	row_infos,
		std::vector< PARA * > *	row_owners,
		PARA_LIST *	paragraphs,
		std::vector< ParagraphModel * > *	models
	)

Definition at line 2318 of file paragraphs.cpp.

                                                           {
  ParagraphTheory theory(models);
 
  // Initialize row_owners to be a bunch of nullptr pointers.
  row_owners->clear();
  row_owners->resize(row_infos->size());
 
  // Set up row scratch registers for the main algorithm.
  std::vector<RowScratchRegisters> rows(row_infos->size());
  for (unsigned i = 0; i < row_infos->size(); i++) {
    rows[i].Init((*row_infos)[i]);
  }
 
  // Pass 1:
  //   Detect sequences of lines that all contain leader dots (.....)
  //   These are likely Tables of Contents.  If there are three text lines in
  //   a row with leader dots, it's pretty safe to say the middle one should
  //   be a paragraph of its own.
  SeparateSimpleLeaderLines(&rows, 0, rows.size(), &theory);
 
  DebugDump(debug_level > 1, "End of Pass 1", theory, rows);
 
  std::vector<Interval> leftovers;
  LeftoverSegments(rows, &leftovers, 0, rows.size());
  for (auto &leftover : leftovers) {
    // Pass 2a:
    //   Find any strongly evidenced start-of-paragraph lines.  If they're
    //   followed by two lines that look like body lines, make a paragraph
    //   model for that and see if that model applies throughout the text
    //   (that is, "smear" it).
    StrongEvidenceClassify(debug_level, &rows, leftover.begin, leftover.end, &theory);
 
    // Pass 2b:
    //   If we had any luck in pass 2a, we got part of the page and didn't
    //   know how to classify a few runs of rows. Take the segments that
    //   didn't find a model and reprocess them individually.
    std::vector<Interval> leftovers2;
    LeftoverSegments(rows, &leftovers2, leftover.begin, leftover.end);
    bool pass2a_was_useful =
        leftovers2.size() > 1 ||
        (leftovers2.size() == 1 && (leftovers2[0].begin != 0 || static_cast<size_t>(leftovers2[0].end) != rows.size()));
    if (pass2a_was_useful) {
      for (auto &leftover2 : leftovers2) {
        StrongEvidenceClassify(debug_level, &rows, leftover2.begin, leftover2.end, &theory);
      }
    }
  }
 
  DebugDump(debug_level > 1, "End of Pass 2", theory, rows);
 
  // Pass 3:
  //   These are the dregs for which we didn't have enough strong textual
  //   and geometric clues to form matching models for.  Let's see if
  //   the geometric clues are simple enough that we could just use those.
  LeftoverSegments(rows, &leftovers, 0, rows.size());
  for (auto &leftover : leftovers) {
    GeometricClassify(debug_level, &rows, leftover.begin, leftover.end, &theory);
  }
 
  // Undo any flush models for which there's little evidence.
  DowngradeWeakestToCrowns(debug_level, &theory, &rows);
 
  DebugDump(debug_level > 1, "End of Pass 3", theory, rows);
 
  // Pass 4:
  //   Take everything that's still not marked up well and clear all markings.
  LeftoverSegments(rows, &leftovers, 0, rows.size());
  for (auto &leftover : leftovers) {
    for (int j = leftover.begin; j < leftover.end; j++) {
      rows[j].SetUnknown();
    }
  }
 
  DebugDump(debug_level > 1, "End of Pass 4", theory, rows);
 
  // Convert all of the unique hypothesis runs to PARAs.
  ConvertHypothesizedModelRunsToParagraphs(debug_level, rows, row_owners, &theory);
 
  DebugDump(debug_level > 0, "Final Paragraph Segmentation", theory, rows);
 
  // Finally, clean up any dangling nullptr row paragraph parents.
  CanonicalizeDetectionResults(row_owners, paragraphs);
}

◆ determine_newline_type()

char tesseract::determine_newline_type	(	WERD *	word,
		BLOCK *	block,
		WERD *	next_word,
		BLOCK *	next_block
	)

test line ends

Parameters

word	word to do
block	current block
next_word	next word
next_block	block of next word

Definition at line 207 of file output.cpp.

  {
  int16_t end_gap; // to right edge
  int16_t width;   // of next word
  TBOX word_box;   // bounding
  TBOX next_box;   // next word
  TBOX block_box;  // block bounding
 
  if (!word->flag(W_EOL)) {
    return false; // not end of line
  }
  if (next_word == nullptr || next_block == nullptr || block != next_block) {
    return CTRL_NEWLINE;
  }
  if (next_word->space() > 0) {
    return CTRL_HARDLINE; // it is tabbed
  }
  word_box = word->bounding_box();
  next_box = next_word->bounding_box();
  block_box = block->pdblk.bounding_box();
  // gap to eol
  end_gap = block_box.right() - word_box.right();
  end_gap -= static_cast<int32_t>(block->space());
  width = next_box.right() - next_box.left();
  //      tprintf("end_gap=%d-%d=%d, width=%d-%d=%d, nl=%d\n",
  //              block_box.right(),word_box.right(),end_gap,
  //              next_box.right(),next_box.left(),width,
  //              end_gap>width ? CTRL_HARDLINE : CTRL_NEWLINE);
  return end_gap > width ? CTRL_HARDLINE : CTRL_NEWLINE;
}

◆ DirOtherWay()

BlobNeighbourDir tesseract::DirOtherWay ( BlobNeighbourDir dir )

inline

Definition at line 102 of file blobbox.h.

                                                          {
  return static_cast<BlobNeighbourDir>(dir ^ 2);
}

◆ display_blob()

void tesseract::display_blob	(	TBLOB *	blob,
		ScrollView::Color	color
	)

Definition at line 54 of file render.cpp.

                                                      {
  /* Size of drawable */
  if (blob_window == nullptr) {
    blob_window = new ScrollView("Blobs", 520, 10, 500, 256, 2000, 256, true);
  } else {
    blob_window->Clear();
  }
 
  render_blob(blob_window, blob, color);
}

◆ display_edgepts()

void tesseract::display_edgepts ( LIST outlines )

Definition at line 47 of file plotedges.cpp.

                                    {
  /* Set up window */
  if (edge_window == nullptr) {
    edge_window = new ScrollView("Edges", 750, 150, 400, 128, 800, 256, true);
  } else {
    edge_window->Clear();
  }
  /* Render the outlines */
  auto window = edge_window;
  /* Reclaim old memory */
  iterate(outlines) {
    render_edgepts(window, reinterpret_cast<EDGEPT *>(outlines->first_node()), ScrollView::WHITE);
  }
}

◆ DisplayIntFeature()

void tesseract::DisplayIntFeature	(	const INT_FEATURE_STRUCT *	Feature,
		float	Evidence
	)

This routine renders the specified feature into a global display list.

Globals:

FeatureShapes global display list for features
Parameters

Feature pico-feature to be displayed

Evidence best evidence for this feature (0-1)

Definition at line 543 of file intproto.cpp.

                                                                          {
  ScrollView::Color color = GetMatchColorFor(Evidence);
  RenderIntFeature(IntMatchWindow, Feature, color);
  if (FeatureDisplayWindow) {
    RenderIntFeature(FeatureDisplayWindow, Feature, color);
  }
} /* DisplayIntFeature */

◆ DisplayIntProto()

void tesseract::DisplayIntProto	(	INT_CLASS_STRUCT *	Class,
		PROTO_ID	ProtoId,
		float	Evidence
	)

This routine renders the specified proto into a global display list.

Globals:

ProtoShapes global display list for protos
Parameters

Class class to take proto from

ProtoId id of proto in Class to be displayed

Evidence total evidence for proto (0-1)

Definition at line 561 of file intproto.cpp.

                                                                                {
  ScrollView::Color color = GetMatchColorFor(Evidence);
  RenderIntProto(IntMatchWindow, Class, ProtoId, color);
  if (ProtoDisplayWindow) {
    RenderIntProto(ProtoDisplayWindow, Class, ProtoId, color);
  }
} /* DisplayIntProto */

◆ DistanceSquared()

float tesseract::DistanceSquared	(	int	k,
		PARAM_DESC *	dim,
		float	p1[],
		float	p2[]
	)

Returns the Euclidean distance squared between p1 and p2 for all essential dimensions.

Parameters

k	keys are in k-space
dim	dimension descriptions (essential, circular, etc)
p1,p2	two different points in K-D space

Definition at line 378 of file kdtree.cpp.

                                                                      {
  float total_distance = 0;
 
  for (; k > 0; k--, p1++, p2++, dim++) {
    if (dim->NonEssential) {
      continue;
    }
 
    float dimension_distance = *p1 - *p2;
 
    /* if this dimension is circular - check wraparound distance */
    if (dim->Circular) {
      dimension_distance = Magnitude(dimension_distance);
      float wrap_distance = dim->Max - dim->Min - dimension_distance;
      dimension_distance = std::min(dimension_distance, wrap_distance);
    }
 
    total_distance += dimension_distance * dimension_distance;
  }
  return total_distance;
}

◆ divide_blobs()

void tesseract::divide_blobs	(	TBLOB *	blob,
		TBLOB *	other_blob,
		bool	italic_blob,
		const TPOINT &	location
	)

Definition at line 970 of file blobs.cpp.

                                                                                            {
  TPOINT vertical = italic_blob ? kDivisibleVerticalItalic : kDivisibleVerticalUpright;
  TESSLINE *outline1 = nullptr;
  TESSLINE *outline2 = nullptr;
 
  TESSLINE *outline = blob->outlines;
  blob->outlines = nullptr;
  int location_prod = location.cross(vertical);
 
  while (outline != nullptr) {
    TPOINT mid_pt((outline->topleft.x + outline->botright.x) / 2,
                  (outline->topleft.y + outline->botright.y) / 2);
    int mid_prod = mid_pt.cross(vertical);
    if (mid_prod < location_prod) {
      // Outline is in left blob.
      if (outline1) {
        outline1->next = outline;
      } else {
        blob->outlines = outline;
      }
      outline1 = outline;
    } else {
      // Outline is in right blob.
      if (outline2) {
        outline2->next = outline;
      } else {
        other_blob->outlines = outline;
      }
      outline2 = outline;
    }
    outline = outline->next;
  }
 
  if (outline1) {
    outline1->next = nullptr;
  }
  if (outline2) {
    outline2->next = nullptr;
  }
}

◆ divisible_blob()

bool tesseract::divisible_blob	(	TBLOB *	blob,
		bool	italic_blob,
		TPOINT *	location
	)

Definition at line 923 of file blobs.cpp.

                                                                     {
  if (blob->outlines == nullptr || blob->outlines->next == nullptr) {
    return false; // Need at least 2 outlines for it to be possible.
  }
  int max_gap = 0;
  TPOINT vertical = italic_blob ? kDivisibleVerticalItalic : kDivisibleVerticalUpright;
  for (TESSLINE *outline1 = blob->outlines; outline1 != nullptr; outline1 = outline1->next) {
    if (outline1->is_hole) {
      continue; // Holes do not count as separable.
    }
    TPOINT mid_pt1((outline1->topleft.x + outline1->botright.x) / 2,
                   (outline1->topleft.y + outline1->botright.y) / 2);
    int mid_prod1 = mid_pt1.cross(vertical);
    int min_prod1, max_prod1;
    outline1->MinMaxCrossProduct(vertical, &min_prod1, &max_prod1);
    for (TESSLINE *outline2 = outline1->next; outline2 != nullptr; outline2 = outline2->next) {
      if (outline2->is_hole) {
        continue; // Holes do not count as separable.
      }
      TPOINT mid_pt2((outline2->topleft.x + outline2->botright.x) / 2,
                     (outline2->topleft.y + outline2->botright.y) / 2);
      int mid_prod2 = mid_pt2.cross(vertical);
      int min_prod2, max_prod2;
      outline2->MinMaxCrossProduct(vertical, &min_prod2, &max_prod2);
      int mid_gap = abs(mid_prod2 - mid_prod1);
      int overlap = std::min(max_prod1, max_prod2) - std::max(min_prod1, min_prod2);
      if (mid_gap - overlap / 4 > max_gap) {
        max_gap = mid_gap - overlap / 4;
        *location = mid_pt1;
        *location += mid_pt2;
        *location /= 2;
      }
    }
  }
  // Use the y component of the vertical vector as an approximation to its
  // length.
  return max_gap > vertical.y;
}

◆ DivRounded()

int tesseract::DivRounded	(	int	a,
		int	b
	)

inline

Definition at line 162 of file helpers.h.

                                    {
  if (b < 0) {
    return -DivRounded(a, -b);
  }
  return a >= 0 ? (a + b / 2) / b : (a - b / 2) / b;
}

◆ DoFill()

void tesseract::DoFill	(	FILL_SPEC *	FillSpec,
		CLASS_PRUNER_STRUCT *	Pruner,
		uint32_t	ClassMask,
		uint32_t	ClassCount,
		uint32_t	WordIndex
	)

This routine fills in the section of a class pruner corresponding to a single x value for a single proto of a class.

Parameters

FillSpec	specifies which bits to fill in pruner
Pruner	class pruner to be filled
ClassMask	indicates which bits to change in each word
ClassCount	indicates what to change bits to
WordIndex	indicates which word to change

Definition at line 1021 of file intproto.cpp.

                                                     {
  int X, Y, Angle;
  uint32_t OldWord;
 
  X = FillSpec->X;
  if (X < 0) {
    X = 0;
  }
  if (X >= NUM_CP_BUCKETS) {
    X = NUM_CP_BUCKETS - 1;
  }
 
  if (FillSpec->YStart < 0) {
    FillSpec->YStart = 0;
  }
  if (FillSpec->YEnd >= NUM_CP_BUCKETS) {
    FillSpec->YEnd = NUM_CP_BUCKETS - 1;
  }
 
  for (Y = FillSpec->YStart; Y <= FillSpec->YEnd; Y++) {
    for (Angle = FillSpec->AngleStart;; CircularIncrement(Angle, NUM_CP_BUCKETS)) {
      OldWord = Pruner->p[X][Y][Angle][WordIndex];
      if (ClassCount > (OldWord & ClassMask)) {
        OldWord &= ~ClassMask;
        OldWord |= ClassCount;
        Pruner->p[X][Y][Angle][WordIndex] = OldWord;
      }
      if (Angle == FillSpec->AngleEnd) {
        break;
      }
    }
  }
} /* DoFill */

◆ DominatesInMerge()

bool tesseract::DominatesInMerge	(	BlobTextFlowType	type1,
		BlobTextFlowType	type2
	)

inline

Definition at line 125 of file blobbox.h.

                                                                             {
  // LEADER always loses.
  if (type1 == BTFT_LEADER) {
    return false;
  }
  if (type2 == BTFT_LEADER) {
    return true;
  }
  // With those out of the way, the ordering of the enum determines the result.
  return type1 >= type2;
}

◆ dont_allow_1Il()

void tesseract::dont_allow_1Il ( WERD_RES * word )

◆ DotProductAVX()

TFloat tesseract::DotProductAVX	(	const TFloat *	u,
		const TFloat *	v,
		int	n
	)

◆ DotProductAVX512F()

TFloat tesseract::DotProductAVX512F	(	const TFloat *	u,
		const TFloat *	v,
		int	n
	)

◆ DotProductFMA()

TFloat tesseract::DotProductFMA	(	const TFloat *	u,
		const TFloat *	v,
		int	n
	)

◆ DotProductNative()

TFloat tesseract::DotProductNative	(	const TFloat *	u,
		const TFloat *	v,
		int	n
	)

Definition at line 22 of file dotproduct.cpp.

                                                                 {
  TFloat total = 0;
#if defined(OPENMP_SIMD) || defined(_OPENMP)
#pragma omp simd reduction(+:total)
#endif
  for (int k = 0; k < n; k++) {
    total += u[k] * v[k];
  }
  return total;
}

◆ DotProductNEON()

TFloat tesseract::DotProductNEON	(	const TFloat *	u,
		const TFloat *	v,
		int	n
	)

◆ DotProductSSE()

TFloat tesseract::DotProductSSE	(	const TFloat *	u,
		const TFloat *	v,
		int	n
	)

◆ double_VAR_H() [1/55]

tesseract::double_VAR_H ( classify_max_slope )

◆ double_VAR_H() [2/55]

tesseract::double_VAR_H ( classify_min_slope )

◆ double_VAR_H() [3/55]

tesseract::double_VAR_H ( classify_norm_adj_curl )

◆ double_VAR_H() [4/55]

tesseract::double_VAR_H ( classify_norm_adj_midpoint )

◆ double_VAR_H() [5/55]

tesseract::double_VAR_H ( classify_pico_feature_length )

◆ double_VAR_H() [6/55]

tesseract::double_VAR_H ( gapmap_big_gaps )

◆ double_VAR_H() [7/55]

tesseract::double_VAR_H ( pitsync_joined_edge )

◆ double_VAR_H() [8/55]

tesseract::double_VAR_H ( pitsync_offset_freecut_fraction )

◆ double_VAR_H() [9/55]

tesseract::double_VAR_H ( textord_ascheight_mode_fraction )

◆ double_VAR_H() [10/55]

tesseract::double_VAR_H ( textord_ascx_ratio_max )

◆ double_VAR_H() [11/55]

tesseract::double_VAR_H ( textord_ascx_ratio_min )

◆ double_VAR_H() [12/55]

tesseract::double_VAR_H ( textord_balance_factor )

◆ double_VAR_H() [13/55]

tesseract::double_VAR_H ( textord_chop_width )

◆ double_VAR_H() [14/55]

tesseract::double_VAR_H ( textord_descx_ratio_max )

◆ double_VAR_H() [15/55]

tesseract::double_VAR_H ( textord_descx_ratio_min )

◆ double_VAR_H() [16/55]

tesseract::double_VAR_H ( textord_excess_blobsize )

◆ double_VAR_H() [17/55]

tesseract::double_VAR_H ( textord_fpiqr_ratio )

◆ double_VAR_H() [18/55]

tesseract::double_VAR_H ( textord_linespace_iqrlimit )

◆ double_VAR_H() [19/55]

tesseract::double_VAR_H ( textord_max_pitch_iqr )

◆ double_VAR_H() [20/55]

tesseract::double_VAR_H ( textord_min_blob_height_fraction )

◆ double_VAR_H() [21/55]

tesseract::double_VAR_H ( textord_min_linesize )

◆ double_VAR_H() [22/55]

tesseract::double_VAR_H ( textord_minxh )

◆ double_VAR_H() [23/55]

tesseract::double_VAR_H ( textord_occupancy_threshold )

◆ double_VAR_H() [24/55]

tesseract::double_VAR_H ( textord_pitch_rowsimilarity )

◆ double_VAR_H() [25/55]

tesseract::double_VAR_H ( textord_projection_scale )

◆ double_VAR_H() [26/55]

tesseract::double_VAR_H ( textord_skew_ile )

◆ double_VAR_H() [27/55]

tesseract::double_VAR_H ( textord_skew_lag )

◆ double_VAR_H() [28/55]

tesseract::double_VAR_H ( textord_spacesize_ratioprop )

◆ double_VAR_H() [29/55]

tesseract::double_VAR_H ( textord_spline_shift_fraction )

◆ double_VAR_H() [30/55]

tesseract::double_VAR_H ( textord_tabvector_vertical_box_ratio )

◆ double_VAR_H() [31/55]

tesseract::double_VAR_H ( textord_tabvector_vertical_gap_fraction )

◆ double_VAR_H() [32/55]

tesseract::double_VAR_H ( textord_underline_offset )

◆ double_VAR_H() [33/55]

tesseract::double_VAR_H ( textord_underline_threshold )

◆ double_VAR_H() [34/55]

tesseract::double_VAR_H ( textord_underline_width )

◆ double_VAR_H() [35/55]

tesseract::double_VAR_H ( textord_width_limit )

◆ double_VAR_H() [36/55]

tesseract::double_VAR_H ( textord_words_def_fixed )

◆ double_VAR_H() [37/55]

tesseract::double_VAR_H ( textord_words_def_prop )

◆ double_VAR_H() [38/55]

tesseract::double_VAR_H ( textord_words_default_maxspace )

◆ double_VAR_H() [39/55]

tesseract::double_VAR_H ( textord_words_default_minspace )

◆ double_VAR_H() [40/55]

tesseract::double_VAR_H ( textord_words_default_nonspace )

◆ double_VAR_H() [41/55]

tesseract::double_VAR_H ( textord_words_definite_spread )

◆ double_VAR_H() [42/55]

tesseract::double_VAR_H ( textord_words_initial_lower )

◆ double_VAR_H() [43/55]

tesseract::double_VAR_H ( textord_words_initial_upper )

◆ double_VAR_H() [44/55]

tesseract::double_VAR_H ( textord_words_maxspace )

◆ double_VAR_H() [45/55]

tesseract::double_VAR_H ( textord_words_min_minspace )

◆ double_VAR_H() [46/55]

tesseract::double_VAR_H ( textord_words_minlarge )

◆ double_VAR_H() [47/55]

tesseract::double_VAR_H ( textord_words_pitchsd_threshold )

◆ double_VAR_H() [48/55]

tesseract::double_VAR_H ( textord_wordstats_smooth_factor )

◆ double_VAR_H() [49/55]

tesseract::double_VAR_H ( textord_xheight_error_margin )

◆ double_VAR_H() [50/55]

tesseract::double_VAR_H ( textord_xheight_mode_fraction )

◆ double_VAR_H() [51/55]

tesseract::double_VAR_H ( words_default_fixed_limit )

◆ double_VAR_H() [52/55]

tesseract::double_VAR_H ( words_default_fixed_space )

◆ double_VAR_H() [53/55]

tesseract::double_VAR_H ( words_default_prop_nonspace )

◆ double_VAR_H() [54/55]

tesseract::double_VAR_H ( words_initial_lower )

◆ double_VAR_H() [55/55]

tesseract::double_VAR_H ( words_initial_upper )

◆ draw_blob_edges()

void tesseract::draw_blob_edges ( TBLOB * blob )

Definition at line 67 of file plotedges.cpp.

                                  {
  if (wordrec_display_splits) {
    LIST edge_list = NIL_LIST;
    for (TESSLINE *ol = blob->outlines; ol != nullptr; ol = ol->next) {
      edge_list = push(edge_list, ol->loop);
    }
    display_edgepts(edge_list);
    destroy(edge_list);
  }
}

◆ draw_meanlines()

void tesseract::draw_meanlines	(	TO_BLOCK *	block,
		float	gradient,
		int32_t	left,
		ScrollView::Color	colour,
		FCOORD	rotation
	)

Definition at line 203 of file drawtord.cpp.

  {
  FCOORD plot_pt; // point to plot
                  // rows
  TO_ROW_IT row_it = block->get_rows();
  TO_ROW *row;         // current row
  BLOBNBOX_IT blob_it; // blobs
  float right;         // end of row
  to_win->Pen(colour);
  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
    row = row_it.data();
    blob_it.set_to_list(row->blob_list());
    blob_it.move_to_last();
    right = blob_it.data()->bounding_box().right();
    plot_pt = FCOORD(static_cast<float>(left), gradient * left + row->parallel_c() + row->xheight);
    plot_pt.rotate(rotation);
    to_win->SetCursor(plot_pt.x(), plot_pt.y());
    plot_pt = FCOORD(right, gradient * right + row->parallel_c() + row->xheight);
    plot_pt.rotate(rotation);
    to_win->DrawTo(plot_pt.x(), plot_pt.y());
  }
}

◆ draw_occupation()

void tesseract::draw_occupation	(	int32_t	xleft,
		int32_t	ybottom,
		int32_t	min_y,
		int32_t	max_y,
		int32_t	occupation[],
		int32_t	thresholds[]
	)

Definition at line 161 of file drawtord.cpp.

  {
  int32_t line_index;                     // pixel coord
  ScrollView::Color colour;               // of histogram
  auto fleft = static_cast<float>(xleft); // float version
 
  colour = ScrollView::WHITE;
  to_win->Pen(colour);
  to_win->SetCursor(fleft, static_cast<float>(ybottom));
  for (line_index = min_y; line_index <= max_y; line_index++) {
    if (occupation[line_index - min_y] < thresholds[line_index - min_y]) {
      if (colour != ScrollView::BLUE) {
        colour = ScrollView::BLUE;
        to_win->Pen(colour);
      }
    } else {
      if (colour != ScrollView::WHITE) {
        colour = ScrollView::WHITE;
        to_win->Pen(colour);
      }
    }
    to_win->DrawTo(fleft + occupation[line_index - min_y] / 10.0, static_cast<float>(line_index));
  }
  colour = ScrollView::STEEL_BLUE;
  to_win->Pen(colour);
  to_win->SetCursor(fleft, static_cast<float>(ybottom));
  for (line_index = min_y; line_index <= max_y; line_index++) {
    to_win->DrawTo(fleft + thresholds[line_index - min_y] / 10.0, static_cast<float>(line_index));
  }
}

◆ EMPTY_LIST()

constexpr ERRCODE tesseract::EMPTY_LIST ( "List is empty" )

constexpr

◆ EMPTY_LLSQ()

constexpr ERRCODE tesseract::EMPTY_LLSQ ( "Can't delete from an empty LLSQ" )

constexpr

◆ EqualIgnoringCaseAndTerminalPunct()

bool tesseract::EqualIgnoringCaseAndTerminalPunct	(	const WERD_CHOICE &	word1,
		const WERD_CHOICE &	word2
	)

Definition at line 773 of file ratngs.cpp.

                                                                                           {
  const UNICHARSET *uchset = word1.unicharset();
  if (word2.unicharset() != uchset) {
    return false;
  }
  unsigned w1start, w1end;
  word1.punct_stripped(&w1start, &w1end);
  unsigned w2start, w2end;
  word2.punct_stripped(&w2start, &w2end);
  if (w1end - w1start != w2end - w2start) {
    return false;
  }
  for (unsigned i = 0; i < w1end - w1start; i++) {
    if (uchset->to_lower(word1.unichar_id(w1start + i)) !=
        uchset->to_lower(word2.unichar_id(w2start + i))) {
      return false;
    }
  }
  return true;
}

◆ EvaluateParagraphDetection()

void tesseract::EvaluateParagraphDetection	(	const TextAndModel *	correct,
		int	n,
		const std::vector< PARA * > &	detector_output
	)

Definition at line 105 of file paragraphs_test.cc.

                                                                          {
  int incorrect_breaks = 0;
  int missed_breaks = 0;
  int poorly_matched_models = 0;
  int bad_crowns = 0;
  int bad_list_items = 0;
  ASSERT_EQ(detector_output.size(), n);
  for (int i = 1; i < n; i++) {
    bool has_break = correct[i].model_type != PCONT;
    bool detected_break = (detector_output[i - 1] != detector_output[i]);
    if (has_break && !detected_break) {
      missed_breaks++;
    }
    if (detected_break && !has_break) {
      incorrect_breaks++;
    }
    if (has_break) {
      if (correct[i].model_type == PNONE) {
        if (detector_output[i]->model != nullptr) {
          poorly_matched_models++;
        }
      } else {
        if (correct[i].model.justification() != kUnknown &&
            (detector_output[i]->model == nullptr ||
             !correct[i].model.Comparable(*detector_output[i]->model))) {
          poorly_matched_models++;
        }
      }
      if (correct[i].is_very_first_or_continuation ^
          detector_output[i]->is_very_first_or_continuation) {
        bad_crowns++;
      }
      if (correct[i].is_list_item ^ detector_output[i]->is_list_item) {
        bad_list_items++;
      }
    }
  }
  EXPECT_EQ(incorrect_breaks, 0);
  EXPECT_EQ(missed_breaks, 0);
  EXPECT_EQ(poorly_matched_models, 0);
  EXPECT_EQ(bad_list_items, 0);
  EXPECT_EQ(bad_crowns, 0);
  if (incorrect_breaks || missed_breaks || poorly_matched_models || bad_list_items || bad_crowns) {
    std::vector<std::string> dbg_lines;
    dbg_lines.emplace_back("# ==========================");
    dbg_lines.emplace_back("# Correct paragraph breaks:");
    dbg_lines.emplace_back("# ==========================");
    for (int i = 0; i < n; i++) {
      if (correct[i].model_type != PCONT) {
        std::string s = std::string(correct[i].ascii) + "  #  " +
                        correct[i].model.ToString() +
                        (correct[i].is_very_first_or_continuation ? " crown" : "") +
                        (correct[i].is_list_item ? " li" : "");
        dbg_lines.push_back(s);
      } else {
        dbg_lines.emplace_back(correct[i].ascii);
      }
    }
    dbg_lines.emplace_back("");
    dbg_lines.emplace_back("# ==========================");
    dbg_lines.emplace_back("# Paragraph detector output:");
    dbg_lines.emplace_back("# ==========================");
    for (int i = 0; i < n; i++) {
      std::string annotation;
      if (i == 0 || (detector_output[i - 1] != detector_output[i])) {
        if (detector_output[i] && detector_output[i]->model) {
          annotation +=
              "  #  " + detector_output[i]->model->ToString() +
              (detector_output[i]->is_very_first_or_continuation ? " crown" : "") +
              (detector_output[i]->is_list_item ? " li" : "");
        } else {
          annotation = "  #  Unmodeled paragraph.";
        }
      }
      std::string s = correct[i].ascii + annotation;
      dbg_lines.push_back(s);
    }
    std::string s;
    for (auto &dbg_line : dbg_lines) {
      s += dbg_line + "\n";
    }
    LOG(INFO) << "Discrepancy!\n" << s;
  }
}

◆ expand_rows()

void tesseract::expand_rows	(	ICOORD	page_tr,
		TO_BLOCK *	block,
		float	gradient,
		FCOORD	rotation,
		int32_t	block_edge,
		bool	testing_on
	)

Definition at line 976 of file makerow.cpp.

  {
  bool swallowed_row;    // eaten a neighbour
  float y_max, y_min;    // new row limits
  float y_bottom, y_top; // allowed limits
  TO_ROW *test_row;      // next row
  TO_ROW *row;           // current row
                         // iterators
  BLOBNBOX_IT blob_it = &block->blobs;
  TO_ROW_IT row_it = block->get_rows();
 
#ifndef GRAPHICS_DISABLED
  if (textord_show_expanded_rows && testing_on) {
    if (to_win == nullptr) {
      create_to_win(page_tr);
    }
  }
#endif
 
  adjust_row_limits(block); // shift min,max.
  if (textord_new_initial_xheight) {
    if (block->get_rows()->empty()) {
      return;
    }
    compute_row_stats(block, textord_show_expanded_rows && testing_on);
  }
  assign_blobs_to_rows(block, &gradient, 4, true, false, false);
  // get real membership
  if (block->get_rows()->empty()) {
    return;
  }
  fit_parallel_rows(block, gradient, rotation, block_edge,
                    textord_show_expanded_rows && testing_on);
  if (!textord_new_initial_xheight) {
    compute_row_stats(block, textord_show_expanded_rows && testing_on);
  }
  row_it.move_to_last();
  do {
    row = row_it.data();
    y_max = row->max_y(); // get current limits
    y_min = row->min_y();
    y_bottom = row->intercept() - block->line_size * textord_expansion_factor *
                                      tesseract::CCStruct::kDescenderFraction;
    y_top = row->intercept() +
            block->line_size * textord_expansion_factor *
                (tesseract::CCStruct::kXHeightFraction + tesseract::CCStruct::kAscenderFraction);
    if (y_min > y_bottom) { // expansion allowed
      if (textord_show_expanded_rows && testing_on) {
        tprintf("Expanding bottom of row at %f from %f to %f\n", row->intercept(), y_min, y_bottom);
      }
      // expandable
      swallowed_row = true;
      while (swallowed_row && !row_it.at_last()) {
        swallowed_row = false;
        // get next one
        test_row = row_it.data_relative(1);
        // overlaps space
        if (test_row->max_y() > y_bottom) {
          if (test_row->min_y() > y_bottom) {
            if (textord_show_expanded_rows && testing_on) {
              tprintf("Eating row below at %f\n", test_row->intercept());
            }
            row_it.forward();
#ifndef GRAPHICS_DISABLED
            if (textord_show_expanded_rows && testing_on) {
              plot_parallel_row(test_row, gradient, block_edge, ScrollView::WHITE, rotation);
            }
#endif
            blob_it.set_to_list(row->blob_list());
            blob_it.add_list_after(test_row->blob_list());
            // swallow complete row
            delete row_it.extract();
            row_it.backward();
            swallowed_row = true;
          } else if (test_row->max_y() < y_min) {
            // shorter limit
            y_bottom = test_row->max_y();
            if (textord_show_expanded_rows && testing_on) {
              tprintf("Truncating limit to %f due to touching row at %f\n", y_bottom,
                      test_row->intercept());
            }
          } else {
            y_bottom = y_min; // can't expand it
            if (textord_show_expanded_rows && testing_on) {
              tprintf("Not expanding limit beyond %f due to touching row at %f\n", y_bottom,
                      test_row->intercept());
            }
          }
        }
      }
      y_min = y_bottom; // expand it
    }
    if (y_max < y_top) { // expansion allowed
      if (textord_show_expanded_rows && testing_on) {
        tprintf("Expanding top of row at %f from %f to %f\n", row->intercept(), y_max, y_top);
      }
      swallowed_row = true;
      while (swallowed_row && !row_it.at_first()) {
        swallowed_row = false;
        // get one above
        test_row = row_it.data_relative(-1);
        if (test_row->min_y() < y_top) {
          if (test_row->max_y() < y_top) {
            if (textord_show_expanded_rows && testing_on) {
              tprintf("Eating row above at %f\n", test_row->intercept());
            }
            row_it.backward();
            blob_it.set_to_list(row->blob_list());
#ifndef GRAPHICS_DISABLED
            if (textord_show_expanded_rows && testing_on) {
              plot_parallel_row(test_row, gradient, block_edge, ScrollView::WHITE, rotation);
            }
#endif
            blob_it.add_list_after(test_row->blob_list());
            // swallow complete row
            delete row_it.extract();
            row_it.forward();
            swallowed_row = true;
          } else if (test_row->min_y() < y_max) {
            // shorter limit
            y_top = test_row->min_y();
            if (textord_show_expanded_rows && testing_on) {
              tprintf("Truncating limit to %f due to touching row at %f\n", y_top,
                      test_row->intercept());
            }
          } else {
            y_top = y_max; // can't expand it
            if (textord_show_expanded_rows && testing_on) {
              tprintf("Not expanding limit beyond %f due to touching row at %f\n", y_top,
                      test_row->intercept());
            }
          }
        }
      }
      y_max = y_top;
    }
    // new limits
    row->set_limits(y_min, y_max);
    row_it.backward();
  } while (!row_it.at_last());
}

◆ ExpectGraphemeModeResults()

void tesseract::ExpectGraphemeModeResults	(	const std::string &	str,
		UnicodeNormMode	u_mode,
		int	unicode_count,
		int	glyph_count,
		int	grapheme_count,
		const std::string &	target_str
	)

inline

Definition at line 48 of file normstrngs_test.h.

                                                                   {
  std::vector<std::string> glyphs;
  std::string s;
  EXPECT_TRUE(NormalizeCleanAndSegmentUTF8(
      u_mode, OCRNorm::kNone, GraphemeNormMode::kIndividualUnicodes, true, str.c_str(), &glyphs));
  EXPECT_EQ(glyphs.size(), unicode_count) << PrintStringVectorWithUnicodes(glyphs);
  for (auto &glyph : glyphs) {
    s += glyph;
  }
  EXPECT_EQ(target_str, s);
  EXPECT_TRUE(NormalizeCleanAndSegmentUTF8(u_mode, OCRNorm::kNone, GraphemeNormMode::kGlyphSplit,
                                           true, str.c_str(), &glyphs));
  EXPECT_EQ(glyphs.size(), glyph_count) << PrintStringVectorWithUnicodes(glyphs);
  s.clear();
  for (auto &glyph : glyphs) {
    s += glyph;
  }
  EXPECT_EQ(target_str, s);
  EXPECT_TRUE(NormalizeCleanAndSegmentUTF8(u_mode, OCRNorm::kNone, GraphemeNormMode::kCombined,
                                           true, str.c_str(), &glyphs));
  EXPECT_EQ(glyphs.size(), grapheme_count) << PrintStringVectorWithUnicodes(glyphs);
  s.clear();
  for (auto &glyph : glyphs) {
    s += glyph;
  }
  EXPECT_EQ(target_str, s);
  EXPECT_TRUE(NormalizeCleanAndSegmentUTF8(u_mode, OCRNorm::kNone, GraphemeNormMode::kSingleString,
                                           true, str.c_str(), &glyphs));
  EXPECT_EQ(glyphs.size(), 1) << PrintStringVectorWithUnicodes(glyphs);
  EXPECT_EQ(target_str, glyphs[0]);
  std::string result;
  EXPECT_TRUE(
      NormalizeUTF8String(u_mode, OCRNorm::kNone, GraphemeNorm::kNormalize, str.c_str(), &result));
  EXPECT_EQ(target_str, result);
}

◆ extract_edges()

void tesseract::extract_edges	(	Image	pix,
		BLOCK *	block
	)

Definition at line 347 of file edgblob.cpp.

                                 { // block to scan
  C_OUTLINE_LIST outlines;         // outlines in block
  C_OUTLINE_IT out_it = &outlines;
 
  block_edges(pix, &(block->pdblk), &out_it);
  ICOORD bleft; // block box
  ICOORD tright;
  block->pdblk.bounding_box(bleft, tright);
  // make blobs
  outlines_to_blobs(block, bleft, tright, &outlines);
}

◆ ExtractBlobsFromSegmentation()

void tesseract::ExtractBlobsFromSegmentation	(	BLOCK_LIST *	blocks,
		C_BLOB_LIST *	output_blob_list
	)

Definition at line 440 of file ocrblock.cpp.

                                                                                     {
  C_BLOB_IT return_list_it(output_blob_list);
  BLOCK_IT block_it(blocks);
  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
    BLOCK *block = block_it.data();
    ROW_IT row_it(block->row_list());
    for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
      ROW *row = row_it.data();
      // Iterate over all werds in the row.
      WERD_IT werd_it(row->word_list());
      for (werd_it.mark_cycle_pt(); !werd_it.cycled_list(); werd_it.forward()) {
        WERD *werd = werd_it.data();
        return_list_it.move_to_last();
        return_list_it.add_list_after(werd->cblob_list());
        return_list_it.move_to_last();
        return_list_it.add_list_after(werd->rej_cblob_list());
      }
    }
  }
}

◆ ExtractCharNormFeatures()

FEATURE_SET tesseract::ExtractCharNormFeatures ( const INT_FX_RESULT_STRUCT & fx_info )

Return the character normalization feature for a blob.

The features returned are in a scale where the x-height has been normalized to live in the region y = [-0.25 .. 0.25]. Example ranges for English below are based on the Linux font collection on 2009-12-04:

Params[CharNormY]
- The y coordinate of the grapheme's centroid.
- English: [-0.27, 0.71]
Params[CharNormLength]
- The length of the grapheme's outline (tiny segments discarded), divided by 10.0=LENGTH_COMPRESSION.
- English: [0.16, 0.85]
Params[CharNormRx]
- The radius of gyration about the x axis, as measured from CharNormY.
- English: [0.011, 0.34]
Params[CharNormRy]
- The radius of gyration about the y axis, as measured from the x center of the grapheme's bounding box.
- English: [0.011, 0.31]

Definition at line 56 of file normfeat.cpp.

                                                                         {
  auto feature_set = new FEATURE_SET_STRUCT(1);
  auto feature = new FEATURE_STRUCT(&CharNormDesc);
 
  feature->Params[CharNormY] = MF_SCALE_FACTOR * (fx_info.Ymean - kBlnBaselineOffset);
  feature->Params[CharNormLength] = MF_SCALE_FACTOR * fx_info.Length / LENGTH_COMPRESSION;
  feature->Params[CharNormRx] = MF_SCALE_FACTOR * fx_info.Rx;
  feature->Params[CharNormRy] = MF_SCALE_FACTOR * fx_info.Ry;
 
  AddFeature(feature_set, feature);
 
  return feature_set;
} /* ExtractCharNormFeatures */

◆ ExtractMicroFeature()

MicroFeature tesseract::ExtractMicroFeature	(	MFOUTLINE	Start,
		MFOUTLINE	End
	)

This routine computes the feature parameters which describe the micro-feature that starts and Start and ends at End. A new micro-feature is allocated, filled with the feature parameters, and returned. The routine assumes that Start and End are not the same point. If they are the same point, nullptr is returned, a warning message is printed, and the current outline is dumped to stdout.

Parameters

Start	starting point of micro-feature
End	ending point of micro-feature

Returns: New micro-feature or nullptr if the feature was rejected.

Note: Globals: none

Definition at line 127 of file mfx.cpp.

                                                                 {
  MFEDGEPT *P1, *P2;
 
  P1 = PointAt(Start);
  P2 = PointAt(End);
 
  MicroFeature NewFeature;
  NewFeature[(int)MicroFeatureParameter::MFXPosition] = AverageOf(P1->Point.x, P2->Point.x);
  NewFeature[(int)MicroFeatureParameter::MFYPosition] = AverageOf(P1->Point.y, P2->Point.y);
  NewFeature[(int)MicroFeatureParameter::MFLength] = DistanceBetween(P1->Point, P2->Point);
  NewFeature[(int)MicroFeatureParameter::MFDirection] = NormalizedAngleFrom(&P1->Point, &P2->Point, 1.0);
  NewFeature[(int)MicroFeatureParameter::MFBulge1] = 0.0f;  // deprecated
  NewFeature[(int)MicroFeatureParameter::MFBulge2] = 0.0f; // deprecated
 
  return NewFeature;
} /* ExtractMicroFeature */

◆ ExtractMicros()

FEATURE_SET tesseract::ExtractMicros	(	TBLOB *	Blob,
		const DENORM &	cn_denorm
	)

Call the old micro-feature extractor and then copy the features into the new format. Then deallocate the old micro-features.

Parameters

Blob	blob to extract micro-features from
cn_denorm	control parameter to feature extractor.

Returns: Micro-features for Blob.

Definition at line 41 of file mf.cpp.

                                                                {
  auto features = BlobMicroFeatures(Blob, cn_denorm);
  if (features.empty()) {
    return nullptr;
  }
  int n = 0;
  for ([[maybe_unused]] auto &f: features) {
    ++n;
  }
  auto FeatureSet = new FEATURE_SET_STRUCT(n);
 
  for (auto &f : features) {
    auto Feature = new FEATURE_STRUCT(&MicroFeatureDesc);
    for (int i = 0; i < (int)MicroFeatureParameter::MFCount; ++i)
      Feature->Params[i] = f[i];
    // Bulge features are deprecated and should not be used. Set to 0.
    Feature->Params[(int)MicroFeatureParameter::MFBulge1] = 0.0f;
    Feature->Params[(int)MicroFeatureParameter::MFBulge2] = 0.0f;
 
#ifndef _WIN32
    // Assert that feature parameters are well defined.
    for (int i = 0; i < Feature->Type->NumParams; i++) {
      ASSERT_HOST(!std::isnan(Feature->Params[i]));
    }
#endif
 
    AddFeature(FeatureSet, Feature);
  }
  return FeatureSet;
} /* ExtractMicros */

◆ FeatureDirection()

TESS_API FCOORD tesseract::FeatureDirection ( uint8_t theta )

Definition at line 70 of file intfx.cpp.

                                       {
  return FCOORD(cos_table[theta], sin_table[theta]);
}

◆ fill_heights()

void tesseract::fill_heights	(	TO_ROW *	row,
		float	gradient,
		int	min_height,
		int	max_height,
		STATS *	heights,
		STATS *	floating_heights
	)

Definition at line 1418 of file makerow.cpp.

                                           {
  float xcentre;  // centre of blob
  float top;      // top y coord of blob
  float height;   // height of blob
  BLOBNBOX *blob; // current blob
  int repeated_set;
  BLOBNBOX_IT blob_it = row->blob_list();
  if (blob_it.empty()) {
    return; // no blobs in this row
  }
  bool has_rep_chars = row->rep_chars_marked() && row->num_repeated_sets() > 0;
  do {
    blob = blob_it.data();
    if (!blob->joined_to_prev()) {
      xcentre = (blob->bounding_box().left() + blob->bounding_box().right()) / 2.0f;
      top = blob->bounding_box().top();
      height = blob->bounding_box().height();
      if (textord_fix_xheight_bug) {
        top -= row->baseline.y(xcentre);
      } else {
        top -= gradient * xcentre + row->parallel_c();
      }
      if (top >= min_height && top <= max_height) {
        heights->add(static_cast<int32_t>(floor(top + 0.5)), 1);
        if (height / top < textord_min_blob_height_fraction) {
          floating_heights->add(static_cast<int32_t>(floor(top + 0.5)), 1);
        }
      }
    }
    // Skip repeated chars, since they are likely to skew the height stats.
    if (has_rep_chars && blob->repeated_set() != 0) {
      repeated_set = blob->repeated_set();
      blob_it.forward();
      while (!blob_it.at_first() && blob_it.data()->repeated_set() == repeated_set) {
        blob_it.forward();
        if (textord_debug_xheights) {
          tprintf("Skipping repeated char when computing xheight\n");
        }
      }
    } else {
      blob_it.forward();
    }
  } while (!blob_it.at_first());
}

◆ FillABC()

TESS_API void tesseract::FillABC ( PROTO_STRUCT * Proto )

Definition at line 103 of file protos.cpp.

                                  {
  float Slope, Intercept, Normalizer;
 
  Slope = tan(Proto->Angle * 2.0 * M_PI);
  Intercept = Proto->Y - Slope * Proto->X;
  Normalizer = 1.0 / sqrt(Slope * Slope + 1.0);
  Proto->A = Slope * Normalizer;
  Proto->B = -Normalizer;
  Proto->C = Intercept * Normalizer;
}

◆ FillerDone()

bool tesseract::FillerDone ( TABLE_FILLER * Filler )

Return true if the specified table filler is done, i.e. if it has no more lines to fill.

Parameters

Filler table filler to check if done

Returns: true if no more lines to fill, false otherwise.

Note: Globals: none

Definition at line 1063 of file intproto.cpp.

                                      {
  FILL_SWITCH *Next;
 
  Next = &(Filler->Switch[Filler->NextSwitch]);
 
  return Filler->X > Next->X && Next->Type == LastSwitch;
 
} /* FillerDone */

◆ FillPPCircularBits()

void tesseract::FillPPCircularBits	(	uint32_t	ParamTable[NUM_PP_BUCKETS][WERDS_PER_PP_VECTOR],
		int	Bit,
		float	Center,
		float	Spread,
		bool	debug
	)

This routine sets Bit in each bit vector whose bucket lies within the range Center +- Spread. The fill is done for a circular dimension, i.e. bucket 0 is adjacent to the last bucket. It is assumed that Center and Spread are expressed in a circular coordinate system whose range is 0 to 1.

Parameters

ParamTable	table of bit vectors, one per param bucket
Bit	bit position in vectors to be filled
Center	center of filled area
Spread	spread of filled area
debug	debug flag

Definition at line 1085 of file intproto.cpp.

                                                                {
  int i, FirstBucket, LastBucket;
 
  if (Spread > 0.5) {
    Spread = 0.5;
  }
 
  FirstBucket = static_cast<int>(std::floor((Center - Spread) * NUM_PP_BUCKETS));
  if (FirstBucket < 0) {
    FirstBucket += NUM_PP_BUCKETS;
  }
 
  LastBucket = static_cast<int>(std::floor((Center + Spread) * NUM_PP_BUCKETS));
  if (LastBucket >= NUM_PP_BUCKETS) {
    LastBucket -= NUM_PP_BUCKETS;
  }
  if (debug) {
    tprintf("Circular fill from %d to %d", FirstBucket, LastBucket);
  }
  for (i = FirstBucket; true; CircularIncrement(i, NUM_PP_BUCKETS)) {
    SET_BIT(ParamTable[i], Bit);
 
    /* exit loop after we have set the bit for the last bucket */
    if (i == LastBucket) {
      break;
    }
  }
 
} /* FillPPCircularBits */

◆ FillPPLinearBits()

void tesseract::FillPPLinearBits	(	uint32_t	ParamTable[NUM_PP_BUCKETS][WERDS_PER_PP_VECTOR],
		int	Bit,
		float	Center,
		float	Spread,
		bool	debug
	)

This routine sets Bit in each bit vector whose bucket lies within the range Center +- Spread. The fill is done for a linear dimension, i.e. there is no wrap-around for this dimension. It is assumed that Center and Spread are expressed in a linear coordinate system whose range is approximately 0 to 1. Values outside this range will be clipped.

Parameters

ParamTable	table of bit vectors, one per param bucket
Bit	bit number being filled
Center	center of filled area
Spread	spread of filled area
debug	debug flag

Definition at line 1130 of file intproto.cpp.

                                                              {
  int i, FirstBucket, LastBucket;
 
  FirstBucket = static_cast<int>(std::floor((Center - Spread) * NUM_PP_BUCKETS));
  if (FirstBucket < 0) {
    FirstBucket = 0;
  }
 
  LastBucket = static_cast<int>(std::floor((Center + Spread) * NUM_PP_BUCKETS));
  if (LastBucket >= NUM_PP_BUCKETS) {
    LastBucket = NUM_PP_BUCKETS - 1;
  }
 
  if (debug) {
    tprintf("Linear fill from %d to %d", FirstBucket, LastBucket);
  }
  for (i = FirstBucket; i <= LastBucket; i++) {
    SET_BIT(ParamTable[i], Bit);
  }
 
} /* FillPPLinearBits */

◆ FilterEdgeNoise()

void tesseract::FilterEdgeNoise	(	MFOUTLINE	Outline,
		float	NoiseSegmentLength
	)

◆ find_best_dropout_row()

bool tesseract::find_best_dropout_row	(	TO_ROW *	row,
		int32_t	distance,
		float	dist_limit,
		int32_t	line_index,
		TO_ROW_IT *	row_it,
		bool	testing_on
	)

Definition at line 696 of file makerow.cpp.

  {
  int32_t next_index; // of neighbouring row
  int32_t row_offset; // from current row
  int32_t abs_dist;   // absolute distance
  int8_t row_inc;     // increment to row_index
  TO_ROW *next_row;   // nextious row
 
  if (testing_on) {
    tprintf("Row at %g(%g), dropout dist=%d,", row->intercept(), row->parallel_c(), distance);
  }
  if (distance < 0) {
    row_inc = 1;
    abs_dist = -distance;
  } else {
    row_inc = -1;
    abs_dist = distance;
  }
  if (abs_dist > dist_limit) {
    if (testing_on) {
      tprintf(" too far - deleting\n");
    }
    return true;
  }
  if ((distance < 0 && !row_it->at_last()) || (distance >= 0 && !row_it->at_first())) {
    row_offset = row_inc;
    do {
      next_row = row_it->data_relative(row_offset);
      next_index = static_cast<int32_t>(std::floor(next_row->intercept()));
      if ((distance < 0 && next_index < line_index &&
           next_index > line_index + distance + distance) ||
          (distance >= 0 && next_index > line_index &&
           next_index < line_index + distance + distance)) {
        if (testing_on) {
          tprintf(" nearer neighbour (%d) at %g\n", line_index + distance - next_index,
                  next_row->intercept());
        }
        return true; // other is nearer
      } else if (next_index == line_index || next_index == line_index + distance + distance) {
        if (row->believability() <= next_row->believability()) {
          if (testing_on) {
            tprintf(" equal but more believable at %g (%g/%g)\n", next_row->intercept(),
                    row->believability(), next_row->believability());
          }
          return true; // other is more believable
        }
      }
      row_offset += row_inc;
    } while ((next_index == line_index || next_index == line_index + distance + distance) &&
             row_offset < row_it->length());
    if (testing_on) {
      tprintf(" keeping\n");
    }
  }
  return false;
}

◆ find_cblob_hlimits()

void tesseract::find_cblob_hlimits	(	C_BLOB *	blob,
		float	bottomy,
		float	topy,
		float &	xmin,
		float &	xmax
	)

Definition at line 579 of file blobbox.cpp.

                 {
  int16_t stepindex;  // current point
  ICOORD pos;         // current coords
  ICOORD vec;         // rotated step
  C_OUTLINE *outline; // current outline
                      // outlines
  C_OUTLINE_IT out_it = blob->out_list();
 
  xmin = static_cast<float>(INT32_MAX);
  xmax = static_cast<float>(-INT32_MAX);
  for (out_it.mark_cycle_pt(); !out_it.cycled_list(); out_it.forward()) {
    outline = out_it.data();
    pos = outline->start_pos(); // get coords
    for (stepindex = 0; stepindex < outline->pathlength(); stepindex++) {
      // inside
      if (pos.y() >= bottomy && pos.y() <= topy) {
        UpdateRange(pos.x(), &xmin, &xmax);
      }
      vec = outline->step(stepindex);
      pos += vec; // move to next
    }
  }
}

◆ find_cblob_limits()

void tesseract::find_cblob_limits	(	C_BLOB *	blob,
		float	leftx,
		float	rightx,
		FCOORD	rotation,
		float &	ymin,
		float &	ymax
	)

Definition at line 504 of file blobbox.cpp.

                 {
  int16_t stepindex;  // current point
  ICOORD pos;         // current coords
  ICOORD vec;         // rotated step
  C_OUTLINE *outline; // current outline
                      // outlines
  C_OUTLINE_IT out_it = blob->out_list();
 
  ymin = static_cast<float>(INT32_MAX);
  ymax = static_cast<float>(-INT32_MAX);
  for (out_it.mark_cycle_pt(); !out_it.cycled_list(); out_it.forward()) {
    outline = out_it.data();
    pos = outline->start_pos(); // get coords
    pos.rotate(rotation);
    for (stepindex = 0; stepindex < outline->pathlength(); stepindex++) {
      // inside
      if (pos.x() >= leftx && pos.x() <= rightx) {
        UpdateRange(pos.y(), &ymin, &ymax);
      }
      vec = outline->step(stepindex);
      vec.rotate(rotation);
      pos += vec; // move to next
    }
  }
}

◆ find_cblob_vlimits()

void tesseract::find_cblob_vlimits	(	C_BLOB *	blob,
		float	leftx,
		float	rightx,
		float &	ymin,
		float &	ymax
	)

Definition at line 543 of file blobbox.cpp.

                 {
  int16_t stepindex;  // current point
  ICOORD pos;         // current coords
  ICOORD vec;         // rotated step
  C_OUTLINE *outline; // current outline
                      // outlines
  C_OUTLINE_IT out_it = blob->out_list();
 
  ymin = static_cast<float>(INT32_MAX);
  ymax = static_cast<float>(-INT32_MAX);
  for (out_it.mark_cycle_pt(); !out_it.cycled_list(); out_it.forward()) {
    outline = out_it.data();
    pos = outline->start_pos(); // get coords
    for (stepindex = 0; stepindex < outline->pathlength(); stepindex++) {
      // inside
      if (pos.x() >= leftx && pos.x() <= rightx) {
        UpdateRange(pos.y(), &ymin, &ymax);
      }
      vec = outline->step(stepindex);
      pos += vec; // move to next
    }
  }
}

◆ find_lesser_parts()

void tesseract::find_lesser_parts	(	TO_ROW *	row,
		TBOX	blobcoords[],
		int	blobcount,
		char	partids[],
		int	partsizes[],
		int	partcount,
		int	bestpart
	)

Definition at line 1262 of file oldbasel.cpp.

  {
  int blobindex;             /*index of blob */
  int partition;             /*current partition */
  int xcentre;               /*centre of blob */
  int poscount;              /*count of best up step */
  int negcount;              /*count of best down step */
  float partsteps[MAXPARTS]; /*average step to part */
  float bestneg;             /*best down step */
  int runlength;             /*length of bad run */
  int biggestrun;            /*biggest bad run */
 
  biggestrun = 0;
  for (partition = 0; partition < partcount; partition++) {
    partsteps[partition] = 0.0; /*zero accumulators */
  }
  for (runlength = 0, blobindex = 0; blobindex < blobcount; blobindex++) {
    xcentre = (blobcoords[blobindex].left() + blobcoords[blobindex].right()) >> 1;
    /*in other parts */
    int part_id = static_cast<int>(static_cast<unsigned char>(partids[blobindex]));
    if (part_id != bestpart) {
      runlength++; /*run of non bests */
      if (runlength > biggestrun) {
        biggestrun = runlength;
      }
      partsteps[part_id] += blobcoords[blobindex].bottom() - row->baseline.y(xcentre);
    } else {
      runlength = 0;
    }
  }
  if (biggestrun > MAXBADRUN) {
    row->xheight = -1.0f; /*failed */
  } else {
    row->xheight = 1.0f; /*success */
  }
  poscount = negcount = 0;
  bestneg = 0.0; /*no step yet */
  for (partition = 0; partition < partcount; partition++) {
    if (partition != bestpart) {
      // by jetsoft divide by zero possible
      if (partsizes[partition] == 0) {
        partsteps[partition] = 0;
      } else {
        partsteps[partition] /= partsizes[partition];
      }
      //
 
      if (partsteps[partition] >= MINASCRISE && partsizes[partition] > poscount) {
        poscount = partsizes[partition];
      }
      if (partsteps[partition] <= -MINASCRISE && partsizes[partition] > negcount) {
        /*ascender rise */
        bestneg = partsteps[partition];
        /*2nd most popular */
        negcount = partsizes[partition];
      }
    }
  }
  /*average x-height */
  partsteps[bestpart] /= blobcount;
  row->descdrop = bestneg;
}

◆ find_repeated_chars()

void tesseract::find_repeated_chars	(	TO_BLOCK *	block,
		bool	testing_on
	)

Definition at line 1660 of file topitch.cpp.

                                          { // Debug mode.
  POLY_BLOCK *pb = block->block->pdblk.poly_block();
  if (pb != nullptr && !pb->IsText()) {
    return; // Don't find repeated chars in non-text blocks.
  }
 
  TO_ROW *row;
  BLOBNBOX_IT box_it;
  BLOBNBOX_IT search_it; // forward search
  WERD *word;            // new word
  TBOX word_box;         // for plotting
  int blobcount, repeated_set;
 
  TO_ROW_IT row_it = block->get_rows();
  if (row_it.empty()) {
    return; // empty block
  }
  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
    row = row_it.data();
    box_it.set_to_list(row->blob_list());
    if (box_it.empty()) {
      continue; // no blobs in this row
    }
    if (!row->rep_chars_marked()) {
      mark_repeated_chars(row);
    }
    if (row->num_repeated_sets() == 0) {
      continue; // nothing to do for this row
    }
    // new words
    WERD_IT word_it(&row->rep_words);
    do {
      if (box_it.data()->repeated_set() != 0 && !box_it.data()->joined_to_prev()) {
        blobcount = 1;
        repeated_set = box_it.data()->repeated_set();
        search_it = box_it;
        search_it.forward();
        while (!search_it.at_first() && search_it.data()->repeated_set() == repeated_set) {
          blobcount++;
          search_it.forward();
        }
        // After the call to make_real_word() all the blobs from this
        // repeated set will be removed from the blob list. box_it will be
        // set to point to the blob after the end of the extracted sequence.
        word = make_real_word(&box_it, blobcount, box_it.at_first(), 1);
        if (!box_it.empty() && box_it.data()->joined_to_prev()) {
          tprintf("Bad box joined to prev at");
          box_it.data()->bounding_box().print();
          tprintf("After repeated word:");
          word->bounding_box().print();
        }
        ASSERT_HOST(box_it.empty() || !box_it.data()->joined_to_prev());
        word->set_flag(W_REP_CHAR, true);
        word->set_flag(W_DONT_CHOP, true);
        word_it.add_after_then_move(word);
      } else {
        box_it.forward();
      }
    } while (!box_it.at_first());
  }
}

◆ find_row_pitch()

bool tesseract::find_row_pitch	(	TO_ROW *	row,
		int32_t	maxwidth,
		int32_t	dm_gap,
		TO_BLOCK *	block,
		int32_t	block_index,
		int32_t	row_index,
		bool	testing_on
	)

Definition at line 784 of file topitch.cpp.

  {
  bool used_dm_model; // looks like dot matrix
  float min_space;    // estimate threshold
  float non_space;    // gap size
  float gap_iqr;      // interquartile range
  float pitch_iqr;
  float dm_gap_iqr; // interquartile range
  float dm_pitch_iqr;
  float dm_pitch;      // pitch with dm on
  float pitch;         // revised estimate
  float initial_pitch; // guess at pitch
  STATS gap_stats(0, maxwidth - 1);
  // centre-centre
  STATS pitch_stats(0, maxwidth - 1);
 
  row->fixed_pitch = 0.0f;
  initial_pitch = row->fp_space;
  if (initial_pitch > row->xheight * (1 + words_default_fixed_limit)) {
    initial_pitch = row->xheight; // keep pitch decent
  }
  non_space = row->fp_nonsp;
  if (non_space > initial_pitch) {
    non_space = initial_pitch;
  }
  min_space = (initial_pitch + non_space) / 2;
 
  if (!count_pitch_stats(row, &gap_stats, &pitch_stats, initial_pitch, min_space, true, false,
                         dm_gap)) {
    dm_gap_iqr = 0.0001f;
    dm_pitch_iqr = maxwidth * 2.0f;
    dm_pitch = initial_pitch;
  } else {
    dm_gap_iqr = gap_stats.ile(0.75) - gap_stats.ile(0.25);
    dm_pitch_iqr = pitch_stats.ile(0.75) - pitch_stats.ile(0.25);
    dm_pitch = pitch_stats.ile(0.5);
  }
  gap_stats.clear();
  pitch_stats.clear();
  if (!count_pitch_stats(row, &gap_stats, &pitch_stats, initial_pitch, min_space, true, false, 0)) {
    gap_iqr = 0.0001f;
    pitch_iqr = maxwidth * 3.0f;
  } else {
    gap_iqr = gap_stats.ile(0.75) - gap_stats.ile(0.25);
    pitch_iqr = pitch_stats.ile(0.75) - pitch_stats.ile(0.25);
    if (testing_on) {
      tprintf(
          "First fp iteration:initial_pitch=%g, gap_iqr=%g, pitch_iqr=%g, "
          "pitch=%g\n",
          initial_pitch, gap_iqr, pitch_iqr, pitch_stats.ile(0.5));
    }
    initial_pitch = pitch_stats.ile(0.5);
    if (min_space > initial_pitch && count_pitch_stats(row, &gap_stats, &pitch_stats, initial_pitch,
                                                       initial_pitch, true, false, 0)) {
      min_space = initial_pitch;
      gap_iqr = gap_stats.ile(0.75) - gap_stats.ile(0.25);
      pitch_iqr = pitch_stats.ile(0.75) - pitch_stats.ile(0.25);
      if (testing_on) {
        tprintf(
            "Revised fp iteration:initial_pitch=%g, gap_iqr=%g, pitch_iqr=%g, "
            "pitch=%g\n",
            initial_pitch, gap_iqr, pitch_iqr, pitch_stats.ile(0.5));
      }
      initial_pitch = pitch_stats.ile(0.5);
    }
  }
  if (textord_debug_pitch_metric) {
    tprintf("Blk=%d:Row=%d:%c:p_iqr=%g:g_iqr=%g:dm_p_iqr=%g:dm_g_iqr=%g:%c:", block_index,
            row_index, 'X', pitch_iqr, gap_iqr, dm_pitch_iqr, dm_gap_iqr,
            pitch_iqr > maxwidth && dm_pitch_iqr > maxwidth
                ? 'D'
                : (pitch_iqr * dm_gap_iqr <= dm_pitch_iqr * gap_iqr ? 'S' : 'M'));
  }
  if (pitch_iqr > maxwidth && dm_pitch_iqr > maxwidth) {
    row->pitch_decision = PITCH_DUNNO;
    if (textord_debug_pitch_metric) {
      tprintf("\n");
    }
    return false; // insufficient data
  }
  if (pitch_iqr * dm_gap_iqr <= dm_pitch_iqr * gap_iqr) {
    if (testing_on) {
      tprintf(
          "Choosing non dm version:pitch_iqr=%g, gap_iqr=%g, dm_pitch_iqr=%g, "
          "dm_gap_iqr=%g\n",
          pitch_iqr, gap_iqr, dm_pitch_iqr, dm_gap_iqr);
    }
    gap_iqr = gap_stats.ile(0.75) - gap_stats.ile(0.25);
    pitch_iqr = pitch_stats.ile(0.75) - pitch_stats.ile(0.25);
    pitch = pitch_stats.ile(0.5);
    used_dm_model = false;
  } else {
    if (testing_on) {
      tprintf(
          "Choosing dm version:pitch_iqr=%g, gap_iqr=%g, dm_pitch_iqr=%g, "
          "dm_gap_iqr=%g\n",
          pitch_iqr, gap_iqr, dm_pitch_iqr, dm_gap_iqr);
    }
    gap_iqr = dm_gap_iqr;
    pitch_iqr = dm_pitch_iqr;
    pitch = dm_pitch;
    used_dm_model = true;
  }
  if (textord_debug_pitch_metric) {
    tprintf("rev_p_iqr=%g:rev_g_iqr=%g:pitch=%g:", pitch_iqr, gap_iqr, pitch);
    tprintf("p_iqr/g=%g:p_iqr/x=%g:iqr_res=%c:", pitch_iqr / gap_iqr, pitch_iqr / block->xheight,
            pitch_iqr < gap_iqr * textord_fpiqr_ratio &&
                    pitch_iqr < block->xheight * textord_max_pitch_iqr &&
                    pitch < block->xheight * textord_words_default_maxspace
                ? 'F'
                : 'P');
  }
  if (pitch_iqr < gap_iqr * textord_fpiqr_ratio &&
      pitch_iqr < block->xheight * textord_max_pitch_iqr &&
      pitch < block->xheight * textord_words_default_maxspace) {
    row->pitch_decision = PITCH_MAYBE_FIXED;
  } else {
    row->pitch_decision = PITCH_MAYBE_PROP;
  }
  row->fixed_pitch = pitch;
  row->kern_size = gap_stats.ile(0.5);
  row->min_space = static_cast<int32_t>(row->fixed_pitch + non_space) / 2;
  if (row->min_space > row->fixed_pitch) {
    row->min_space = static_cast<int32_t>(row->fixed_pitch);
  }
  row->max_nonspace = row->min_space;
  row->space_size = row->fixed_pitch;
  row->space_threshold = (row->max_nonspace + row->min_space) / 2;
  row->used_dm_model = used_dm_model;
  return true;
}

◆ find_top_modes()

void tesseract::find_top_modes	(	STATS *	stats,
		int	statnum,
		int	modelist[],
		int	modenum
	)

Definition at line 1508 of file oldbasel.cpp.

  {
  int mode_count;
  int last_i = 0;
  int last_max = INT32_MAX;
  int i;
  int mode;
  int total_max = 0;
  int mode_factor = textord_ocropus_mode ? kMinModeFactorOcropus : kMinModeFactor;
 
  for (mode_count = 0; mode_count < modenum; mode_count++) {
    mode = 0;
    for (i = 0; i < statnum; i++) {
      if (stats->pile_count(i) > stats->pile_count(mode)) {
        if ((stats->pile_count(i) < last_max) ||
            ((stats->pile_count(i) == last_max) && (i > last_i))) {
          mode = i;
        }
      }
    }
    last_i = mode;
    last_max = stats->pile_count(last_i);
    total_max += last_max;
    if (last_max <= total_max / mode_factor) {
      mode = 0;
    }
    modelist[mode_count] = mode;
  }
}

◆ find_underlined_blobs()

void tesseract::find_underlined_blobs	(	BLOBNBOX *	u_line,
		QSPLINE *	baseline,
		float	xheight,
		float	baseline_offset,
		ICOORDELT_LIST *	chop_cells
	)

Definition at line 158 of file underlin.cpp.

  {
  ICOORD blob_chop; // sides of blob
  TBOX blob_box = u_line->bounding_box();
  // cell iterator
  ICOORDELT_IT cell_it = chop_cells;
  STATS upper_proj(blob_box.left(), blob_box.right());
  STATS middle_proj(blob_box.left(), blob_box.right());
  STATS lower_proj(blob_box.left(), blob_box.right());
  C_OUTLINE_IT out_it; // outlines of blob
 
  ASSERT_HOST(u_line->cblob() != nullptr);
 
  out_it.set_to_list(u_line->cblob()->out_list());
  for (out_it.mark_cycle_pt(); !out_it.cycled_list(); out_it.forward()) {
    vertical_cunderline_projection(out_it.data(), baseline, xheight, baseline_offset, &lower_proj,
                                   &middle_proj, &upper_proj);
  }
 
  for (auto x = blob_box.left(); x < blob_box.right(); x++) {
    if (middle_proj.pile_count(x) > 0) {
      auto y = x + 1;
      for (; y < blob_box.right() && middle_proj.pile_count(y) > 0; y++) {
        ;
      }
      blob_chop = ICOORD(x, y);
      cell_it.add_after_then_move(new ICOORDELT(blob_chop));
      x = y;
    }
  }
}

◆ FindClass()

TESS_COMMON_TRAINING_API MERGE_CLASS tesseract::FindClass	(	LIST	List,
		const std::string &	Label
	)

Definition at line 597 of file commontraining.cpp.

                                                         {
  MERGE_CLASS MergeClass;
 
  iterate(List) {
    MergeClass = reinterpret_cast<MERGE_CLASS>(List->first_node());
    if (MergeClass->Label == Label) {
      return (MergeClass);
    }
  }
  return (nullptr);
 
} /* FindClass */

◆ FindDirectionChanges()

void tesseract::FindDirectionChanges	(	MFOUTLINE	Outline,
		float	MinSlope,
		float	MaxSlope
	)

This routine searches through the specified outline, computes a slope for each vector in the outline, and marks each vector as having one of the following directions: N, S, E, W, NE, NW, SE, SW This information is then stored in the outline and the outline is returned.

Parameters

Outline	micro-feature outline to analyze
MinSlope	controls "snapping" of segments to horizontal
MaxSlope	controls "snapping" of segments to vertical

Definition at line 104 of file mfoutline.cpp.

                                                                             {
  MFEDGEPT *Current;
  MFEDGEPT *Last;
  MFOUTLINE EdgePoint;
 
  if (DegenerateOutline(Outline)) {
    return;
  }
 
  Last = PointAt(Outline);
  Outline = NextPointAfter(Outline);
  EdgePoint = Outline;
  do {
    Current = PointAt(EdgePoint);
    ComputeDirection(Last, Current, MinSlope, MaxSlope);
 
    Last = Current;
    EdgePoint = NextPointAfter(EdgePoint);
  } while (EdgePoint != Outline);
 
} /* FindDirectionChanges */

◆ FindList()

LABELEDLIST tesseract::FindList	(	LIST	List,
		const std::string &	Label
	)

This routine searches through a list of labeled lists to find a list with the specified label. If a matching labeled list cannot be found, nullptr is returned.

Parameters

List	list to search
Label	label to search for

Returns: Labeled list with the specified label or nullptr.

Note: Globals: none

Definition at line 302 of file commontraining.cpp.

                                                        {
  LABELEDLIST LabeledList;
 
  iterate(List) {
    LabeledList = reinterpret_cast<LABELEDLIST>(List->first_node());
    if (LabeledList->Label == Label) {
      return (LabeledList);
    }
  }
  return (nullptr);
 
} /* FindList */

◆ FindMatchingChoice()

BLOB_CHOICE * tesseract::FindMatchingChoice	(	UNICHAR_ID	char_id,
		BLOB_CHOICE_LIST *	bc_list
	)

Definition at line 177 of file ratngs.cpp.

                                                                               {
  // Find the corresponding best BLOB_CHOICE.
  BLOB_CHOICE_IT choice_it(bc_list);
  for (choice_it.mark_cycle_pt(); !choice_it.cycled_list(); choice_it.forward()) {
    BLOB_CHOICE *choice = choice_it.data();
    if (choice->unichar_id() == char_id) {
      return choice;
    }
  }
  return nullptr;
}

◆ FirstWordWouldHaveFit() [1/2]

bool tesseract::FirstWordWouldHaveFit	(	const RowScratchRegisters &	before,
		const RowScratchRegisters &	after
	)

Definition at line 1704 of file paragraphs.cpp.

                                                                                                {
  if (before.ri_->num_words == 0 || after.ri_->num_words == 0) {
    return true;
  }
 
  int available_space = before.lindent_;
  if (before.rindent_ > available_space) {
    available_space = before.rindent_;
  }
  available_space -= before.ri_->average_interword_space;
 
  if (before.ri_->ltr) {
    return after.ri_->lword_box.width() < available_space;
  }
  return after.ri_->rword_box.width() < available_space;
}

◆ FirstWordWouldHaveFit() [2/2]

bool tesseract::FirstWordWouldHaveFit	(	const RowScratchRegisters &	before,
		const RowScratchRegisters &	after,
		tesseract::ParagraphJustification	justification
	)

Definition at line 1678 of file paragraphs.cpp.

                                                                          {
  if (before.ri_->num_words == 0 || after.ri_->num_words == 0) {
    return true;
  }
 
  if (justification == JUSTIFICATION_UNKNOWN) {
    tprintf("Don't call FirstWordWouldHaveFit(r, s, JUSTIFICATION_UNKNOWN).\n");
  }
  int available_space;
  if (justification == JUSTIFICATION_CENTER) {
    available_space = before.lindent_ + before.rindent_;
  } else {
    available_space = before.OffsideIndent(justification);
  }
  available_space -= before.ri_->average_interword_space;
 
  if (before.ri_->ltr) {
    return after.ri_->lword_box.width() < available_space;
  }
  return after.ri_->rword_box.width() < available_space;
}

◆ fit_lms_line()

void tesseract::fit_lms_line ( TO_ROW * row )

Definition at line 296 of file makerow.cpp.

                               {
  float m, c; // fitted line
  tesseract::DetLineFit lms;
  BLOBNBOX_IT blob_it = row->blob_list();
 
  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
    const TBOX &box = blob_it.data()->bounding_box();
    lms.Add(ICOORD((box.left() + box.right()) / 2, box.bottom()));
  }
  double error = lms.Fit(&m, &c);
  row->set_line(m, c, error);
}

◆ fit_parallel_lms()

void tesseract::fit_parallel_lms	(	float	gradient,
		TO_ROW *	row
	)

Definition at line 1970 of file makerow.cpp.

                                                   {
  float c;       // fitted line
  int blobcount; // no of blobs
  tesseract::DetLineFit lms;
  BLOBNBOX_IT blob_it = row->blob_list();
 
  blobcount = 0;
  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
    if (!blob_it.data()->joined_to_prev()) {
      const TBOX &box = blob_it.data()->bounding_box();
      lms.Add(ICOORD((box.left() + box.right()) / 2, box.bottom()));
      blobcount++;
    }
  }
  double error = lms.ConstrainedFit(gradient, &c);
  row->set_parallel_line(gradient, c, error);
  if (textord_straight_baselines && blobcount > textord_lms_line_trials) {
    error = lms.Fit(&gradient, &c);
  }
  // set the other too
  row->set_line(gradient, c, error);
}

◆ fit_parallel_rows()

void tesseract::fit_parallel_rows	(	TO_BLOCK *	block,
		float	gradient,
		FCOORD	rotation,
		int32_t	block_edge,
		bool	testing_on
	)

Definition at line 1928 of file makerow.cpp.

  {
#ifndef GRAPHICS_DISABLED
  ScrollView::Color colour; // of row
#endif
  TO_ROW_IT row_it = block->get_rows();
 
  row_it.move_to_first();
  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
    if (row_it.data()->blob_list()->empty()) {
      delete row_it.extract(); // nothing in it
    } else {
      fit_parallel_lms(gradient, row_it.data());
    }
  }
#ifndef GRAPHICS_DISABLED
  if (testing_on) {
    colour = ScrollView::RED;
    for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
      plot_parallel_row(row_it.data(), gradient, block_edge, colour, rotation);
      colour = static_cast<ScrollView::Color>(colour + 1);
      if (colour > ScrollView::MAGENTA) {
        colour = ScrollView::RED;
      }
    }
  }
#endif
  row_it.sort(row_y_order); // may have gone out of order
}

◆ fix_row_pitch()

void tesseract::fix_row_pitch	(	TO_ROW *	bad_row,
		TO_BLOCK *	bad_block,
		TO_BLOCK_LIST *	blocks,
		int32_t	row_target,
		int32_t	block_target
	)

Definition at line 144 of file topitch.cpp.

                                         { // number of block
  int16_t mid_cuts;
  int block_votes;               // votes in block
  int like_votes;                // votes over page
  int other_votes;               // votes of unlike blocks
  int block_index;               // number of block
  int row_index;                 // number of row
  int maxwidth;                  // max pitch
  TO_BLOCK_IT block_it = blocks; // block iterator
  TO_BLOCK *block;               // current block
  TO_ROW *row;                   // current row
  float sp_sd;                   // space deviation
  STATS block_stats;             // pitches in block
  STATS like_stats;              // pitches in page
 
  block_votes = like_votes = other_votes = 0;
  maxwidth = static_cast<int32_t>(ceil(bad_row->xheight * textord_words_maxspace));
  if (bad_row->pitch_decision != PITCH_DEF_FIXED && bad_row->pitch_decision != PITCH_DEF_PROP) {
    block_stats.set_range(0, maxwidth - 1);
    like_stats.set_range(0, maxwidth - 1);
    block_index = 1;
    for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
      block = block_it.data();
      POLY_BLOCK *pb = block->block->pdblk.poly_block();
      if (pb != nullptr && !pb->IsText()) {
        continue; // Non text doesn't exist!
      }
      row_index = 1;
      TO_ROW_IT row_it(block->get_rows());
      for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
        row = row_it.data();
        if ((bad_row->all_caps &&
             row->xheight + row->ascrise <
                 (bad_row->xheight + bad_row->ascrise) * (1 + textord_pitch_rowsimilarity) &&
             row->xheight + row->ascrise >
                 (bad_row->xheight + bad_row->ascrise) * (1 - textord_pitch_rowsimilarity)) ||
            (!bad_row->all_caps &&
             row->xheight < bad_row->xheight * (1 + textord_pitch_rowsimilarity) &&
             row->xheight > bad_row->xheight * (1 - textord_pitch_rowsimilarity))) {
          if (block_index == block_target) {
            if (row->pitch_decision == PITCH_DEF_FIXED) {
              block_votes += textord_words_veto_power;
              block_stats.add(static_cast<int32_t>(row->fixed_pitch), textord_words_veto_power);
            } else if (row->pitch_decision == PITCH_MAYBE_FIXED ||
                       row->pitch_decision == PITCH_CORR_FIXED) {
              block_votes++;
              block_stats.add(static_cast<int32_t>(row->fixed_pitch), 1);
            } else if (row->pitch_decision == PITCH_DEF_PROP) {
              block_votes -= textord_words_veto_power;
            } else if (row->pitch_decision == PITCH_MAYBE_PROP ||
                       row->pitch_decision == PITCH_CORR_PROP) {
              block_votes--;
            }
          } else {
            if (row->pitch_decision == PITCH_DEF_FIXED) {
              like_votes += textord_words_veto_power;
              like_stats.add(static_cast<int32_t>(row->fixed_pitch), textord_words_veto_power);
            } else if (row->pitch_decision == PITCH_MAYBE_FIXED ||
                       row->pitch_decision == PITCH_CORR_FIXED) {
              like_votes++;
              like_stats.add(static_cast<int32_t>(row->fixed_pitch), 1);
            } else if (row->pitch_decision == PITCH_DEF_PROP) {
              like_votes -= textord_words_veto_power;
            } else if (row->pitch_decision == PITCH_MAYBE_PROP ||
                       row->pitch_decision == PITCH_CORR_PROP) {
              like_votes--;
            }
          }
        } else {
          if (row->pitch_decision == PITCH_DEF_FIXED) {
            other_votes += textord_words_veto_power;
          } else if (row->pitch_decision == PITCH_MAYBE_FIXED ||
                     row->pitch_decision == PITCH_CORR_FIXED) {
            other_votes++;
          } else if (row->pitch_decision == PITCH_DEF_PROP) {
            other_votes -= textord_words_veto_power;
          } else if (row->pitch_decision == PITCH_MAYBE_PROP ||
                     row->pitch_decision == PITCH_CORR_PROP) {
            other_votes--;
          }
        }
        row_index++;
      }
      block_index++;
    }
    if (block_votes > textord_words_veto_power) {
      bad_row->fixed_pitch = block_stats.ile(0.5);
      bad_row->pitch_decision = PITCH_CORR_FIXED;
    } else if (block_votes <= textord_words_veto_power && like_votes > 0) {
      bad_row->fixed_pitch = like_stats.ile(0.5);
      bad_row->pitch_decision = PITCH_CORR_FIXED;
    } else {
      bad_row->pitch_decision = PITCH_CORR_PROP;
      if (block_votes == 0 && like_votes == 0 && other_votes > 0 &&
          (textord_debug_pitch_test || textord_debug_pitch_metric)) {
        tprintf(
            "Warning:row %d of block %d set prop with no like rows against "
            "trend\n",
            row_target, block_target);
      }
    }
  }
  if (textord_debug_pitch_metric) {
    tprintf(":b_votes=%d:l_votes=%d:o_votes=%d", block_votes, like_votes, other_votes);
    tprintf("x=%g:asc=%g\n", bad_row->xheight, bad_row->ascrise);
  }
  if (bad_row->pitch_decision == PITCH_CORR_FIXED) {
    if (bad_row->fixed_pitch < textord_min_xheight) {
      if (block_votes > 0) {
        bad_row->fixed_pitch = block_stats.ile(0.5);
      } else if (block_votes == 0 && like_votes > 0) {
        bad_row->fixed_pitch = like_stats.ile(0.5);
      } else {
        tprintf("Warning:guessing pitch as xheight on row %d, block %d\n", row_target,
                block_target);
        bad_row->fixed_pitch = bad_row->xheight;
      }
    }
    if (bad_row->fixed_pitch < textord_min_xheight) {
      bad_row->fixed_pitch = (float)textord_min_xheight;
    }
    bad_row->kern_size = bad_row->fixed_pitch / 4;
    bad_row->min_space = static_cast<int32_t>(bad_row->fixed_pitch * 0.6);
    bad_row->max_nonspace = static_cast<int32_t>(bad_row->fixed_pitch * 0.4);
    bad_row->space_threshold = (bad_row->min_space + bad_row->max_nonspace) / 2;
    bad_row->space_size = bad_row->fixed_pitch;
    if (bad_row->char_cells.empty() && !bad_row->blob_list()->empty()) {
      tune_row_pitch(bad_row, &bad_row->projection, bad_row->projection_left,
                     bad_row->projection_right,
                     (bad_row->fixed_pitch + bad_row->max_nonspace * 3) / 4, bad_row->fixed_pitch,
                     sp_sd, mid_cuts, &bad_row->char_cells, false);
    }
  } else if (bad_row->pitch_decision == PITCH_CORR_PROP ||
             bad_row->pitch_decision == PITCH_DEF_PROP) {
    bad_row->fixed_pitch = 0.0f;
    bad_row->char_cells.clear();
  }
}

◆ fixed_pitch_row()

bool tesseract::fixed_pitch_row	(	TO_ROW *	row,
		BLOCK *	block,
		int32_t	block_index
	)

Definition at line 931 of file topitch.cpp.

  {
  const char *res_string; // pitch result
  int16_t mid_cuts;       // no of cheap cuts
  float non_space;        // gap size
  float pitch_sd;         // error on pitch
  float sp_sd = 0.0f;     // space sd
 
  non_space = row->fp_nonsp;
  if (non_space > row->fixed_pitch) {
    non_space = row->fixed_pitch;
  }
  POLY_BLOCK *pb = block != nullptr ? block->pdblk.poly_block() : nullptr;
  if (textord_all_prop || (pb != nullptr && !pb->IsText())) {
    // Set the decision to definitely proportional.
    pitch_sd = textord_words_def_prop * row->fixed_pitch;
    row->pitch_decision = PITCH_DEF_PROP;
  } else {
    pitch_sd = tune_row_pitch(row, &row->projection, row->projection_left, row->projection_right,
                              (row->fixed_pitch + non_space * 3) / 4, row->fixed_pitch, sp_sd,
                              mid_cuts, &row->char_cells, block_index == textord_debug_block);
    if (pitch_sd < textord_words_pitchsd_threshold * row->fixed_pitch &&
        ((pitsync_linear_version & 3) < 3 ||
         ((pitsync_linear_version & 3) >= 3 &&
          (row->used_dm_model || sp_sd > 20 || (pitch_sd == 0 && sp_sd > 10))))) {
      if (pitch_sd < textord_words_def_fixed * row->fixed_pitch && !row->all_caps &&
          ((pitsync_linear_version & 3) < 3 || sp_sd > 20)) {
        row->pitch_decision = PITCH_DEF_FIXED;
      } else {
        row->pitch_decision = PITCH_MAYBE_FIXED;
      }
    } else if ((pitsync_linear_version & 3) < 3 || sp_sd > 20 || mid_cuts > 0 ||
               pitch_sd >= textord_words_pitchsd_threshold * row->fixed_pitch) {
      if (pitch_sd < textord_words_def_prop * row->fixed_pitch) {
        row->pitch_decision = PITCH_MAYBE_PROP;
      } else {
        row->pitch_decision = PITCH_DEF_PROP;
      }
    } else {
      row->pitch_decision = PITCH_DUNNO;
    }
  }
 
  if (textord_debug_pitch_metric) {
    res_string = "??";
    switch (row->pitch_decision) {
      case PITCH_DEF_PROP:
        res_string = "DP";
        break;
      case PITCH_MAYBE_PROP:
        res_string = "MP";
        break;
      case PITCH_DEF_FIXED:
        res_string = "DF";
        break;
      case PITCH_MAYBE_FIXED:
        res_string = "MF";
        break;
      default:
        res_string = "??";
    }
    tprintf(":sd/p=%g:occ=%g:init_res=%s\n", pitch_sd / row->fixed_pitch, sp_sd, res_string);
  }
  return true;
}

◆ fixed_pitch_words()

ROW * tesseract::fixed_pitch_words	(	TO_ROW *	row,
		FCOORD	rotation
	)

Definition at line 65 of file fpchop.cpp.

  {
  bool bol;                // start of line
  uint8_t blanks;          // in front of word
  uint8_t new_blanks;      // blanks in empty cell
  int16_t chop_coord;      // chop boundary
  int16_t prev_chop_coord; // start of cell
  int16_t rep_left;        // left edge of rep word
  ROW *real_row;           // output row
  C_OUTLINE_LIST left_coutlines;
  C_OUTLINE_LIST right_coutlines;
  C_BLOB_LIST cblobs;
  C_BLOB_IT cblob_it = &cblobs;
  WERD_LIST words;
  WERD_IT word_it = &words; // new words
                            // repeated blobs
  WERD_IT rep_it = &row->rep_words;
  WERD *word;         // new word
  int32_t xstarts[2]; // row ends
  int32_t prev_x;     // end of prev blob
                      // iterator
  BLOBNBOX_IT box_it = row->blob_list();
  // boundaries
  ICOORDELT_IT cell_it = &row->char_cells;
 
#ifndef GRAPHICS_DISABLED
  if (textord_show_page_cuts && to_win != nullptr) {
    plot_row_cells(to_win, ScrollView::RED, row, 0, &row->char_cells);
  }
#endif
 
  prev_x = -INT16_MAX;
  bol = true;
  blanks = 0;
  if (rep_it.empty()) {
    rep_left = INT16_MAX;
  } else {
    rep_left = rep_it.data()->bounding_box().left();
  }
  if (box_it.empty()) {
    return nullptr; // empty row
  }
  xstarts[0] = box_it.data()->bounding_box().left();
  if (rep_left < xstarts[0]) {
    xstarts[0] = rep_left;
  }
  if (cell_it.empty() || row->char_cells.singleton()) {
    tprintf("Row without enough char cells!\n");
    tprintf("Leftmost blob is at (%d,%d)\n", box_it.data()->bounding_box().left(),
            box_it.data()->bounding_box().bottom());
    return nullptr;
  }
  ASSERT_HOST(!cell_it.empty() && !row->char_cells.singleton());
  prev_chop_coord = cell_it.data()->x();
  word = nullptr;
  while (rep_left < cell_it.data()->x()) {
    word =
        add_repeated_word(&rep_it, rep_left, prev_chop_coord, blanks, row->fixed_pitch, &word_it);
  }
  cell_it.mark_cycle_pt();
  if (prev_chop_coord >= cell_it.data()->x()) {
    cell_it.forward();
  }
  for (; !cell_it.cycled_list(); cell_it.forward()) {
    chop_coord = cell_it.data()->x();
    while (!box_it.empty() && box_it.data()->bounding_box().left() <= chop_coord) {
      if (box_it.data()->bounding_box().right() > prev_x) {
        prev_x = box_it.data()->bounding_box().right();
      }
      split_to_blob(box_it.extract(), chop_coord, textord_fp_chop_error + 0.5f, &left_coutlines,
                    &right_coutlines);
      box_it.forward();
      while (!box_it.empty() && box_it.data()->cblob() == nullptr) {
        delete box_it.extract();
        box_it.forward();
      }
    }
    if (!right_coutlines.empty() && left_coutlines.empty()) {
      split_to_blob(nullptr, chop_coord, textord_fp_chop_error + 0.5f, &left_coutlines,
                    &right_coutlines);
    }
    if (!left_coutlines.empty()) {
      cblob_it.add_after_then_move(new C_BLOB(&left_coutlines));
    } else {
      if (rep_left < chop_coord) {
        if (rep_left > prev_chop_coord) {
          new_blanks =
              static_cast<uint8_t>(floor((rep_left - prev_chop_coord) / row->fixed_pitch + 0.5));
        } else {
          new_blanks = 0;
        }
      } else {
        if (chop_coord > prev_chop_coord) {
          new_blanks =
              static_cast<uint8_t>(floor((chop_coord - prev_chop_coord) / row->fixed_pitch + 0.5));
        } else {
          new_blanks = 0;
        }
      }
      if (!cblob_it.empty()) {
        if (blanks < 1 && word != nullptr && !word->flag(W_REP_CHAR)) {
          blanks = 1;
        }
        word = new WERD(&cblobs, blanks, nullptr);
        cblob_it.set_to_list(&cblobs);
        word->set_flag(W_DONT_CHOP, true);
        word_it.add_after_then_move(word);
        if (bol) {
          word->set_flag(W_BOL, true);
          bol = false;
        }
        blanks = new_blanks;
      } else {
        blanks += new_blanks;
      }
      while (rep_left < chop_coord) {
        word = add_repeated_word(&rep_it, rep_left, prev_chop_coord, blanks, row->fixed_pitch,
                                 &word_it);
      }
    }
    if (prev_chop_coord < chop_coord) {
      prev_chop_coord = chop_coord;
    }
  }
  if (!cblob_it.empty()) {
    word = new WERD(&cblobs, blanks, nullptr);
    word->set_flag(W_DONT_CHOP, true);
    word_it.add_after_then_move(word);
    if (bol) {
      word->set_flag(W_BOL, true);
    }
  }
  ASSERT_HOST(word != nullptr);
  while (!rep_it.empty()) {
    add_repeated_word(&rep_it, rep_left, prev_chop_coord, blanks, row->fixed_pitch, &word_it);
  }
  // at end of line
  word_it.data()->set_flag(W_EOL, true);
  if (prev_chop_coord > prev_x) {
    prev_x = prev_chop_coord;
  }
  xstarts[1] = prev_x + 1;
  real_row =
      new ROW(row, static_cast<int16_t>(row->kern_size), static_cast<int16_t>(row->space_size));
  word_it.set_to_list(real_row->word_list());
  // put words in row
  word_it.add_list_after(&words);
  real_row->recalc_bounding_box();
  return real_row;
}

◆ fixspace_dbg()

void tesseract::fixspace_dbg ( WERD_RES * word )

Definition at line 806 of file fixspace.cpp.

                                  {
  TBOX box = word->word->bounding_box();
  const bool show_map_detail = false;
  int16_t i;
 
  box.print();
  tprintf(" \"%s\" ", word->best_choice->unichar_string().c_str());
  tprintf("Blob count: %d (word); %d/%d (rebuild word)\n", word->word->cblob_list()->length(),
          word->rebuild_word->NumBlobs(), word->box_word->length());
  word->reject_map.print(debug_fp);
  tprintf("\n");
  if (show_map_detail) {
    tprintf("\"%s\"\n", word->best_choice->unichar_string().c_str());
    for (i = 0; word->best_choice->unichar_string()[i] != '\0'; i++) {
      tprintf("**** \"%c\" ****\n", word->best_choice->unichar_string()[i]);
      word->reject_map[i].full_print(debug_fp);
    }
  }
 
  tprintf("Tess Accepted: %s\n", word->tess_accepted ? "TRUE" : "FALSE");
  tprintf("Done flag: %s\n\n", word->done ? "TRUE" : "FALSE");
}

◆ flip_0O()

void tesseract::flip_0O ( WERD_RES * word )

◆ flip_hyphens()

void tesseract::flip_hyphens ( WERD_RES * word )

◆ FontInfoDeleteCallback()

void tesseract::FontInfoDeleteCallback ( FontInfo f )

Definition at line 129 of file fontinfo.cpp.

                                        {
  if (f.spacing_vec != nullptr) {
    for (auto data : *f.spacing_vec) {
      delete data;
    }
    delete f.spacing_vec;
    f.spacing_vec = nullptr;
  }
  delete[] f.name;
  f.name = nullptr;
}

◆ FreeClass()

TESS_API void tesseract::FreeClass ( CLASS_TYPE Class )

Definition at line 119 of file protos.cpp.

                                 {
  if (Class) {
    FreeClassFields(Class);
    delete Class;
  }
}

◆ FreeClassFields()

TESS_API void tesseract::FreeClassFields ( CLASS_TYPE Class )

Definition at line 131 of file protos.cpp.

                                       {
  if (Class) {
    for (int i = 0; i < Class->NumConfigs; i++) {
      FreeBitVector(Class->Configurations[i]);
    }
  }
}

◆ FreeClusterer()

TESS_API void tesseract::FreeClusterer ( CLUSTERER * Clusterer )

This routine frees all of the memory allocated to the specified data structure. It will not, however, free the memory used by the prototype list. The pointers to the clusters for each prototype in the list will be set to nullptr to indicate that the cluster data structures no longer exist. Any sample lists that have been obtained via calls to GetSamples are no longer valid.

Parameters

Clusterer pointer to data structure to be freed

Definition at line 1575 of file cluster.cpp.

                                         {
  if (Clusterer != nullptr) {
    delete[] Clusterer->ParamDesc;
    delete Clusterer->KDTree;
    delete Clusterer->Root;
    // Free up all used buckets structures.
    for (auto &d : Clusterer->bucket_cache) {
      for (auto &c : d) {
        delete c;
      }
    }
 
    delete Clusterer;
  }
} // FreeClusterer

◆ FreeLabeledClassList()

TESS_COMMON_TRAINING_API void tesseract::FreeLabeledClassList ( LIST ClassList )

This routine deallocates all of the space allocated to the specified list of training samples.

Parameters

ClassList list of all fonts in document

Definition at line 616 of file commontraining.cpp.

                                          {
  MERGE_CLASS MergeClass;
 
  LIST nodes = ClassList;
  iterate(ClassList) /* iterate through all of the fonts */
  {
    MergeClass = reinterpret_cast<MERGE_CLASS>(ClassList->first_node());
    FreeClass(MergeClass->Class);
    delete MergeClass;
  }
  destroy(nodes);
 
} /* FreeLabeledClassList */

◆ FreeLabeledList()

TESS_COMMON_TRAINING_API void tesseract::FreeLabeledList ( LABELEDLIST LabeledList )

This routine deallocates all of the memory consumed by a labeled list. It does not free any memory which may be consumed by the items in the list.

Parameters

LabeledList labeled list to be freed

Note: Globals: none

Definition at line 417 of file commontraining.cpp.

                                              {
  destroy(LabeledList->List);
  delete LabeledList;
} /* FreeLabeledList */

◆ FreeMFOutline()

void tesseract::FreeMFOutline ( void * arg )

This routine deallocates all of the memory consumed by a micro-feature outline.

Parameters

arg	micro-feature outline to be freed

Definition at line 132 of file mfoutline.cpp.

                              { // MFOUTLINE Outline)
  auto Outline = static_cast<MFOUTLINE>(arg);
 
  /* break the circular outline so we can use std. techniques to deallocate */
  MFOUTLINE Start = Outline->list_rest();
  set_rest(Outline, NIL_LIST);
  while (Start != nullptr) {
    delete reinterpret_cast<MFEDGEPT *>(Start->first_node());
    Start = pop(Start);
  }
 
} /* FreeMFOutline */

◆ FreeNormProtoList()

TESS_COMMON_TRAINING_API void tesseract::FreeNormProtoList ( LIST CharList )

Definition at line 706 of file commontraining.cpp.

{
  LABELEDLIST char_sample;
 
  LIST nodes = CharList;
  iterate(CharList) /* iterate through all of the fonts */
  {
    char_sample = reinterpret_cast<LABELEDLIST>(CharList->first_node());
    FreeLabeledList(char_sample);
  }
  destroy(nodes);
 
} // FreeNormProtoList

◆ FreeOutlines()

void tesseract::FreeOutlines ( LIST Outlines )

Release all memory consumed by the specified list of outlines.

Parameters

Outlines list of mf-outlines to be freed

Definition at line 151 of file mfoutline.cpp.

                                 {
  destroy_nodes(Outlines, FreeMFOutline);
} /* FreeOutlines */

◆ FreeProtoList()

TESS_API void tesseract::FreeProtoList ( LIST * ProtoList )

This routine frees all of the memory allocated to the specified list of prototypes. The clusters which are pointed to by the prototypes are not freed.

Parameters

ProtoList pointer to list of prototypes to be freed

Definition at line 1597 of file cluster.cpp.

                                    {
  destroy_nodes(*ProtoList, FreePrototype);
} // FreeProtoList

◆ FreePrototype()

void tesseract::FreePrototype ( void * arg )

This routine deallocates the memory consumed by the specified prototype and modifies the corresponding cluster so that it is no longer marked as a prototype. The cluster is NOT deallocated by this routine.

Parameters

arg	prototype data structure to be deallocated

Definition at line 1608 of file cluster.cpp.

                              { // PROTOTYPE     *Prototype)
  auto *Prototype = static_cast<PROTOTYPE *>(arg);
 
  // unmark the corresponding cluster (if there is one
  if (Prototype->Cluster != nullptr) {
    Prototype->Cluster->Prototype = false;
  }
 
  // deallocate the prototype statistics and then the prototype itself
  if (Prototype->Style != spherical) {
    delete[] Prototype->Variance.Elliptical;
    delete[] Prototype->Magnitude.Elliptical;
    delete[] Prototype->Weight.Elliptical;
  }
  delete Prototype;
} // FreePrototype

◆ FreeTrainingSamples()

TESS_COMMON_TRAINING_API void tesseract::FreeTrainingSamples ( LIST CharList )

This routine deallocates all of the space allocated to the specified list of training samples.

Parameters

CharList list of all fonts in document

Definition at line 391 of file commontraining.cpp.

                                        {
  LABELEDLIST char_sample;
  FEATURE_SET FeatureSet;
  LIST FeatureList;
 
  LIST nodes = CharList;
  iterate(CharList) { /* iterate through all of the fonts */
    char_sample = reinterpret_cast<LABELEDLIST>(CharList->first_node());
    FeatureList = char_sample->List;
    iterate(FeatureList) { /* iterate through all of the classes */
      FeatureSet = reinterpret_cast<FEATURE_SET>(FeatureList->first_node());
      delete FeatureSet;
    }
    FreeLabeledList(char_sample);
  }
  destroy(nodes);
} /* FreeTrainingSamples */

◆ FullPageBlock()

void tesseract::FullPageBlock	(	int	width,
		int	height,
		BLOCK_LIST *	blocks
	)

Definition at line 68 of file blread.cpp.

                                                              {
  BLOCK_IT block_it(blocks);
  auto *block = new BLOCK("", true, 0, 0, 0, 0, width, height);
  block_it.add_to_end(block);
}

◆ FullwidthToHalfwidth()

TESS_UNICHARSET_TRAINING_API char32 tesseract::FullwidthToHalfwidth ( const char32 ch )

Definition at line 282 of file normstrngs.cpp.

                                             {
  // Return unchanged if not in the fullwidth-halfwidth Unicode block.
  if (ch < 0xFF00 || ch > 0xFFEF || !IsValidCodepoint(ch)) {
    if (ch != 0x3000) {
      return ch;
    }
  }
  // Special case for fullwidth left and right "white parentheses".
  if (ch == 0xFF5F) {
    return 0x2985;
  }
  if (ch == 0xFF60) {
    return 0x2986;
  }
  // Construct a full-to-half width transliterator.
  IcuErrorCode error_code;
  icu::UnicodeString uch_str(static_cast<UChar32>(ch));
  const icu::Transliterator *fulltohalf =
      icu::Transliterator::createInstance("Fullwidth-Halfwidth", UTRANS_FORWARD, error_code);
  error_code.assertSuccess();
  error_code.reset();
 
  fulltohalf->transliterate(uch_str);
  delete fulltohalf;
  ASSERT_HOST(uch_str.length() != 0);
  return uch_str[0];
}

◆ FuncInplace()

template<class Func >

void tesseract::FuncInplace	(	int	n,
		TFloat *	inout
	)

inline

Definition at line 164 of file functions.h.

                                              {
  Func f;
  for (int i = 0; i < n; ++i) {
    inout[i] = f(inout[i]);
  }
}

◆ FuncMultiply()

template<class Func >

void tesseract::FuncMultiply	(	const TFloat *	u,
		const TFloat *	v,
		int	n,
		TFloat *	out
	)

inline

Definition at line 173 of file functions.h.

                                                                               {
  Func f;
  for (int i = 0; i < n; ++i) {
    out[i] = f(u[i]) * v[i];
  }
}

◆ GeneratePerspectiveDistortion()

void tesseract::GeneratePerspectiveDistortion	(	int	width,
		int	height,
		TRand *	randomizer,
		Image *	pix,
		std::vector< TBOX > *	boxes
	)

Definition at line 222 of file degradeimage.cpp.

                                                           {
  if (pix != nullptr && *pix != nullptr) {
    width = pixGetWidth(*pix);
    height = pixGetHeight(*pix);
  }
  float *im_coeffs = nullptr;
  float *box_coeffs = nullptr;
  l_int32 incolor = ProjectiveCoeffs(width, height, randomizer, &im_coeffs, &box_coeffs);
  if (pix != nullptr && *pix != nullptr) {
    // Transform the image.
    Image transformed = pixProjective(*pix, im_coeffs, incolor);
    if (transformed == nullptr) {
      tprintf("Projective transformation failed!!\n");
      return;
    }
    pix->destroy();
    *pix = transformed;
  }
  if (boxes != nullptr) {
    // Transform the boxes.
    for (auto &b : *boxes) {
      int x1, y1, x2, y2;
      const TBOX &box = b;
      projectiveXformSampledPt(box_coeffs, box.left(), height - box.top(), &x1, &y1);
      projectiveXformSampledPt(box_coeffs, box.right(), height - box.bottom(), &x2, &y2);
      TBOX new_box1(x1, height - y2, x2, height - y1);
      projectiveXformSampledPt(box_coeffs, box.left(), height - box.bottom(), &x1, &y1);
      projectiveXformSampledPt(box_coeffs, box.right(), height - box.top(), &x2, &y2);
      TBOX new_box2(x1, height - y1, x2, height - y2);
      b = new_box1.bounding_union(new_box2);
    }
  }
  lept_free(im_coeffs);
  lept_free(box_coeffs);
}

◆ get_blob_coords()

int tesseract::get_blob_coords	(	TO_ROW *	row,
		int32_t	lineheight,
		TBOX *	blobcoords,
		bool &	holed_line,
		int &	outcount
	)

Definition at line 416 of file oldbasel.cpp.

  {
  // blobs
  BLOBNBOX_IT blob_it = row->blob_list();
  int blobindex;    /*no along text line */
  int losscount;    // lost blobs
  int maxlosscount; // greatest lost blobs
  /*height stat collection */
  STATS heightstat(0, MAXHEIGHT - 1);
 
  if (blob_it.empty()) {
    return 0; // none
  }
  maxlosscount = 0;
  losscount = 0;
  blob_it.mark_cycle_pt();
  blobindex = 0;
  do {
    blobcoords[blobindex] = box_next_pre_chopped(&blob_it);
    if (blobcoords[blobindex].height() > lineheight * 0.25) {
      heightstat.add(blobcoords[blobindex].height(), 1);
    }
    if (blobindex == 0 || blobcoords[blobindex].height() > lineheight * 0.25 ||
        blob_it.cycled_list()) {
      blobindex++; /*no of merged blobs */
      losscount = 0;
    } else {
      if (blobcoords[blobindex].height() < blobcoords[blobindex].width() * oldbl_dot_error_size &&
          blobcoords[blobindex].width() < blobcoords[blobindex].height() * oldbl_dot_error_size) {
        // counts as dot
        blobindex++;
        losscount = 0;
      } else {
        losscount++; // lost it
        if (losscount > maxlosscount) {
          // remember max
          maxlosscount = losscount;
        }
      }
    }
  } while (!blob_it.cycled_list());
 
  holed_line = maxlosscount > oldbl_holed_losscount;
  outcount = blobindex; /*total blobs */
 
  if (heightstat.get_total() > 1) {
    /*guess x-height */
    return static_cast<int>(heightstat.ile(0.25));
  } else {
    return blobcoords[0].height();
  }
}

◆ get_min_max_xheight()

void tesseract::get_min_max_xheight	(	int	block_linesize,
		int *	min_height,
		int *	max_height
	)

inline

Definition at line 86 of file makerow.h.

                                                                                      {
  *min_height = static_cast<int32_t>(floor(block_linesize * textord_minxh));
  if (*min_height < textord_min_xheight) {
    *min_height = textord_min_xheight;
  }
  *max_height = static_cast<int32_t>(ceil(block_linesize * 3.0));
}

◆ get_row_category()

ROW_CATEGORY tesseract::get_row_category ( const TO_ROW * row )

inline

Definition at line 94 of file makerow.h.

                                                        {
  if (row->xheight <= 0) {
    return ROW_INVALID;
  }
  return (row->ascrise > 0) ? ROW_ASCENDERS_FOUND
                            : (row->descdrop != 0) ? ROW_DESCENDERS_FOUND : ROW_UNKNOWN;
}

◆ get_ydiffs()

int tesseract::get_ydiffs	(	TBOX	blobcoords[],
		int	blobcount,
		QSPLINE *	spline,
		float	ydiffs[]
	)

Definition at line 860 of file oldbasel.cpp.

  {
  int blobindex; /*current blob */
  int xcentre;   /*xcoord */
  int lastx;     /*last xcentre */
  float diffsum; /*sum of diffs */
  float diff;    /*current difference */
  float drift;   /*sum of spline steps */
  float bestsum; /*smallest diffsum */
  int bestindex; /*index of bestsum */
 
  diffsum = 0.0f;
  bestindex = 0;
  bestsum = static_cast<float>(INT32_MAX);
  drift = 0.0f;
  lastx = blobcoords[0].left();
  /*do each blob in row */
  for (blobindex = 0; blobindex < blobcount; blobindex++) {
    /*centre of blob */
    xcentre = (blobcoords[blobindex].left() + blobcoords[blobindex].right()) >> 1;
    // step functions in spline
    drift += spline->step(lastx, xcentre);
    lastx = xcentre;
    diff = blobcoords[blobindex].bottom();
    diff -= spline->y(xcentre);
    diff += drift;
    ydiffs[blobindex] = diff; /*store difference */
    if (blobindex > 2) {
      /*remove old one */
      diffsum -= ABS(ydiffs[blobindex - 3]);
    }
    diffsum += ABS(diff); /*add new one */
    if (blobindex >= 2 && diffsum < bestsum) {
      bestsum = diffsum;         /*find min sum */
      bestindex = blobindex - 1; /*middle of set */
    }
  }
  return bestindex;
}

◆ GetCleanedTextResult()

std::string tesseract::GetCleanedTextResult	(	tesseract::TessBaseAPI *	tess,
		Image	pix
	)

Definition at line 45 of file baseapi_test.cc.

                                                                      {
  tess->SetImage(pix);
  char *result = tess->GetUTF8Text();
  std::string ocr_result = result;
  delete[] result;
  trim(ocr_result);
  return ocr_result;
}

◆ GetCPPadsForLevel()

void tesseract::GetCPPadsForLevel	(	int	Level,
		float *	EndPad,
		float *	SidePad,
		float *	AnglePad
	)

This routine copies the appropriate global pad variables into EndPad, SidePad, and AnglePad. This is a kludge used to get around the fact that global control variables cannot be arrays. If the specified level is illegal, the tightest possible pads are returned.

Parameters

Level	"tightness" level to return pads for
EndPad	place to put end pad for Level
SidePad	place to put side pad for Level
AnglePad	place to put angle pad for Level

Definition at line 1235 of file intproto.cpp.

                                                                                  {
  switch (Level) {
    case 0:
      *EndPad = classify_cp_end_pad_loose * GetPicoFeatureLength();
      *SidePad = classify_cp_side_pad_loose * GetPicoFeatureLength();
      *AnglePad = classify_cp_angle_pad_loose / 360.0;
      break;
 
    case 1:
      *EndPad = classify_cp_end_pad_medium * GetPicoFeatureLength();
      *SidePad = classify_cp_side_pad_medium * GetPicoFeatureLength();
      *AnglePad = classify_cp_angle_pad_medium / 360.0;
      break;
 
    case 2:
      *EndPad = classify_cp_end_pad_tight * GetPicoFeatureLength();
      *SidePad = classify_cp_side_pad_tight * GetPicoFeatureLength();
      *AnglePad = classify_cp_angle_pad_tight / 360.0;
      break;
 
    default:
      *EndPad = classify_cp_end_pad_tight * GetPicoFeatureLength();
      *SidePad = classify_cp_side_pad_tight * GetPicoFeatureLength();
      *AnglePad = classify_cp_angle_pad_tight / 360.0;
      break;
  }
  if (*AnglePad > 0.5) {
    *AnglePad = 0.5;
  }
 
} /* GetCPPadsForLevel */

◆ GetMatchColorFor()

ScrollView::Color tesseract::GetMatchColorFor ( float Evidence )

Parameters

Evidence evidence value to return color for

Returns: Color which corresponds to specified Evidence value.

Note: Globals: none

Definition at line 1272 of file intproto.cpp.

                                                 {
  assert(Evidence >= 0.0);
  assert(Evidence <= 1.0);
 
  if (Evidence >= 0.90) {
    return ScrollView::WHITE;
  } else if (Evidence >= 0.75) {
    return ScrollView::GREEN;
  } else if (Evidence >= 0.50) {
    return ScrollView::RED;
  } else {
    return ScrollView::BLUE;
  }
} /* GetMatchColorFor */

◆ GetNextFill()

void tesseract::GetNextFill	(	TABLE_FILLER *	Filler,
		FILL_SPEC *	Fill
	)

This routine returns (in Fill) the specification of the next line to be filled from Filler. FillerDone() should always be called before GetNextFill() to ensure that we do not run past the end of the fill table.

Parameters

Filler	filler to get next fill spec from
Fill	place to put spec for next fill

Definition at line 1295 of file intproto.cpp.

                                                        {
  FILL_SWITCH *Next;
 
  /* compute the fill assuming no switches will be encountered */
  Fill->AngleStart = Filler->AngleStart;
  Fill->AngleEnd = Filler->AngleEnd;
  Fill->X = Filler->X;
  Fill->YStart = Filler->YStart >> 8;
  Fill->YEnd = Filler->YEnd >> 8;
 
  /* update the fill info and the filler for ALL switches at this X value */
  Next = &(Filler->Switch[Filler->NextSwitch]);
  while (Filler->X >= Next->X) {
    Fill->X = Filler->X = Next->X;
    if (Next->Type == StartSwitch) {
      Fill->YStart = Next->Y;
      Filler->StartDelta = Next->Delta;
      Filler->YStart = Next->YInit;
    } else if (Next->Type == EndSwitch) {
      Fill->YEnd = Next->Y;
      Filler->EndDelta = Next->Delta;
      Filler->YEnd = Next->YInit;
    } else { /* Type must be LastSwitch */
      break;
    }
    Filler->NextSwitch++;
    Next = &(Filler->Switch[Filler->NextSwitch]);
  }
 
  /* prepare the filler for the next call to this routine */
  Filler->X++;
  Filler->YStart += Filler->StartDelta;
  Filler->YEnd += Filler->EndDelta;
 
} /* GetNextFill */

◆ GetXheightString()

std::string tesseract::GetXheightString	(	const std::string &	script_dir,
		const UNICHARSET &	unicharset
	)

Definition at line 166 of file unicharset_training_utils.cpp.

                                                                                      {
  std::string xheights_str;
  for (int s = 0; s < unicharset.get_script_table_size(); ++s) {
    // Load the xheights for the script if available.
    std::string filename = script_dir + "/" + unicharset.get_script_from_script_id(s) + ".xheights";
    std::string script_heights;
    if (File::ReadFileToString(filename, &script_heights)) {
      xheights_str += script_heights;
    }
  }
  return xheights_str;
}

◆ GlobalParams()

TESS_API ParamsVectors * tesseract::GlobalParams ( )

Definition at line 36 of file params.cpp.

                                       {
  static tesseract::ParamsVectors global_params = tesseract::ParamsVectors();
  return &global_params;
}

◆ HistogramRect()

void tesseract::HistogramRect	(	Image	src_pix,
		int	channel,
		int	left,
		int	top,
		int	width,
		int	height,
		int *	histogram
	)

Definition at line 146 of file otsuthr.cpp.

                                   {
  int num_channels = pixGetDepth(src_pix) / 8;
  channel = ClipToRange(channel, 0, num_channels - 1);
  int bottom = top + height;
  memset(histogram, 0, sizeof(*histogram) * kHistogramSize);
  int src_wpl = pixGetWpl(src_pix);
  l_uint32 *srcdata = pixGetData(src_pix);
  for (int y = top; y < bottom; ++y) {
    const l_uint32 *linedata = srcdata + y * src_wpl;
    for (int x = 0; x < width; ++x) {
      int pixel = GET_DATA_BYTE(linedata, (x + left) * num_channels + channel);
      ++histogram[pixel];
    }
  }
}

◆ HOcrEscape()

std::string tesseract::HOcrEscape ( const char * text )

Escape a char string - replace &<>"' with HTML codes. Escape a char string - replace <>&"' with HTML codes.

Definition at line 2378 of file baseapi.cpp.

                                       {
  std::string ret;
  const char *ptr;
  for (ptr = text; *ptr; ptr++) {
    switch (*ptr) {
      case '<':
        ret += "&lt;";
        break;
      case '>':
        ret += "&gt;";
        break;
      case '&':
        ret += "&amp;";
        break;
      case '"':
        ret += "&quot;";
        break;
      case '\'':
        ret += "&#39;";
        break;
      default:
        ret += *ptr;
    }
  }
  return ret;
}

◆ InitFeatureDefs()

TESS_API void tesseract::InitFeatureDefs ( FEATURE_DEFS_STRUCT * featuredefs )

Definition at line 87 of file featdefs.cpp.

                                                       {
  featuredefs->NumFeatureTypes = NUM_FEATURE_TYPES;
  for (int i = 0; i < NUM_FEATURE_TYPES; ++i) {
    featuredefs->FeatureDesc[i] = DescDefs[i];
  }
}

◆ InitFeatureDisplayWindowIfReqd()

void tesseract::InitFeatureDisplayWindowIfReqd ( )

Initializes the feature display window if it is not already initialized.

Definition at line 1614 of file intproto.cpp.

                                      {
  if (FeatureDisplayWindow == nullptr) {
    FeatureDisplayWindow = CreateFeatureSpaceWindow("FeatureDisplayWindow", 50, 700);
  }
}

◆ initialise_search()

void tesseract::initialise_search	(	WERD_RES_LIST &	src_list,
		WERD_RES_LIST &	new_list
	)

Definition at line 201 of file fixspace.cpp.

                                                                         {
  WERD_RES_IT src_it(&src_list);
  WERD_RES_IT new_it(&new_list);
  WERD_RES *src_wd;
  WERD_RES *new_wd;
 
  for (src_it.mark_cycle_pt(); !src_it.cycled_list(); src_it.forward()) {
    src_wd = src_it.data();
    if (!src_wd->combination) {
      new_wd = WERD_RES::deep_copy(src_wd);
      new_wd->combination = false;
      new_wd->part_of_combo = false;
      new_it.add_after_then_move(new_wd);
    }
  }
}

◆ InitIntegerFX()

TESS_API void tesseract::InitIntegerFX ( )

Public Code

Public Function Prototypes

Definition at line 54 of file intfx.cpp.

                     {
  // Guards write access to AtanTable so we don't create it more than once.
  static std::mutex atan_table_mutex;
  static bool atan_table_init = false;
  std::lock_guard<std::mutex> guard(atan_table_mutex);
  if (!atan_table_init) {
    for (int i = 0; i < INT_CHAR_NORM_RANGE; ++i) {
      cos_table[i] = cos(i * 2 * M_PI / INT_CHAR_NORM_RANGE + M_PI);
      sin_table[i] = sin(i * 2 * M_PI / INT_CHAR_NORM_RANGE + M_PI);
    }
    atan_table_init = true;
  }
}

◆ InitIntMatchWindowIfReqd()

void tesseract::InitIntMatchWindowIfReqd ( )

Initializes the int matcher window if it is not already initialized.

Definition at line 1587 of file intproto.cpp.

                                {
  if (IntMatchWindow == nullptr) {
    IntMatchWindow = CreateFeatureSpaceWindow("IntMatchWindow", 50, 200);
    auto *popup_menu = new SVMenuNode();
 
    popup_menu->AddChild("Debug Adapted classes", IDA_ADAPTIVE, "x", "Class to debug");
    popup_menu->AddChild("Debug Static classes", IDA_STATIC, "x", "Class to debug");
    popup_menu->AddChild("Debug Both", IDA_BOTH, "x", "Class to debug");
    popup_menu->AddChild("Debug Shape Index", IDA_SHAPE_INDEX, "0", "Index to debug");
    popup_menu->BuildMenu(IntMatchWindow, false);
  }
}

◆ InitMatcherRatings()

void tesseract::InitMatcherRatings ( float * Rating )

◆ InitProtoDisplayWindowIfReqd()

void tesseract::InitProtoDisplayWindowIfReqd ( )

Initializes the proto display window if it is not already initialized.

Definition at line 1604 of file intproto.cpp.

                                    {
  if (ProtoDisplayWindow == nullptr) {
    ProtoDisplayWindow = CreateFeatureSpaceWindow("ProtoDisplayWindow", 550, 200);
  }
}

◆ InitPrototypes()

void tesseract::InitPrototypes ( )

◆ InitTableFiller()

void tesseract::InitTableFiller	(	float	EndPad,
		float	SidePad,
		float	AnglePad,
		PROTO_STRUCT *	Proto,
		TABLE_FILLER *	Filler
	)

This routine computes a data structure (Filler) which can be used to fill in a rectangle surrounding the specified Proto. Results are returned in Filler.

Parameters

EndPad,SidePad,AnglePad	padding to add to proto
Proto	proto to create a filler for
Filler	place to put table filler

Definition at line 1340 of file intproto.cpp.

{
  float Angle;
  float X, Y, HalfLength;
  float Cos, Sin;
  float XAdjust, YAdjust;
  FPOINT Start, Switch1, Switch2, End;
  int S1 = 0;
  int S2 = 1;
 
  Angle = Proto->Angle;
  X = Proto->X;
  Y = Proto->Y;
  HalfLength = Proto->Length / 2.0;
 
  Filler->AngleStart = CircBucketFor(Angle - AnglePad, AS, NB);
  Filler->AngleEnd = CircBucketFor(Angle + AnglePad, AS, NB);
  Filler->NextSwitch = 0;
 
  if (fabs(Angle - 0.0) < HV_TOLERANCE || fabs(Angle - 0.5) < HV_TOLERANCE) {
    /* horizontal proto - handle as special case */
    Filler->X = Bucket8For(X - HalfLength - EndPad, XS, NB);
    Filler->YStart = Bucket16For(Y - SidePad, YS, NB * 256);
    Filler->YEnd = Bucket16For(Y + SidePad, YS, NB * 256);
    Filler->StartDelta = 0;
    Filler->EndDelta = 0;
    Filler->Switch[0].Type = LastSwitch;
    Filler->Switch[0].X = Bucket8For(X + HalfLength + EndPad, XS, NB);
  } else if (fabs(Angle - 0.25) < HV_TOLERANCE || fabs(Angle - 0.75) < HV_TOLERANCE) {
    /* vertical proto - handle as special case */
    Filler->X = Bucket8For(X - SidePad, XS, NB);
    Filler->YStart = Bucket16For(Y - HalfLength - EndPad, YS, NB * 256);
    Filler->YEnd = Bucket16For(Y + HalfLength + EndPad, YS, NB * 256);
    Filler->StartDelta = 0;
    Filler->EndDelta = 0;
    Filler->Switch[0].Type = LastSwitch;
    Filler->Switch[0].X = Bucket8For(X + SidePad, XS, NB);
  } else {
    /* diagonal proto */
 
    if ((Angle > 0.0 && Angle < 0.25) || (Angle > 0.5 && Angle < 0.75)) {
      /* rising diagonal proto */
      Angle *= 2.0 * M_PI;
      Cos = fabs(std::cos(Angle));
      Sin = fabs(std::sin(Angle));
 
      /* compute the positions of the corners of the acceptance region */
      Start.x = X - (HalfLength + EndPad) * Cos - SidePad * Sin;
      Start.y = Y - (HalfLength + EndPad) * Sin + SidePad * Cos;
      End.x = 2.0 * X - Start.x;
      End.y = 2.0 * Y - Start.y;
      Switch1.x = X - (HalfLength + EndPad) * Cos + SidePad * Sin;
      Switch1.y = Y - (HalfLength + EndPad) * Sin - SidePad * Cos;
      Switch2.x = 2.0 * X - Switch1.x;
      Switch2.y = 2.0 * Y - Switch1.y;
 
      if (Switch1.x > Switch2.x) {
        S1 = 1;
        S2 = 0;
      }
 
      /* translate into bucket positions and deltas */
      Filler->X = Bucket8For(Start.x, XS, NB);
      Filler->StartDelta = -static_cast<int16_t>((Cos / Sin) * 256);
      Filler->EndDelta = static_cast<int16_t>((Sin / Cos) * 256);
 
      XAdjust = BucketEnd(Filler->X, XS, NB) - Start.x;
      YAdjust = XAdjust * Cos / Sin;
      Filler->YStart = Bucket16For(Start.y - YAdjust, YS, NB * 256);
      YAdjust = XAdjust * Sin / Cos;
      Filler->YEnd = Bucket16For(Start.y + YAdjust, YS, NB * 256);
 
      Filler->Switch[S1].Type = StartSwitch;
      Filler->Switch[S1].X = Bucket8For(Switch1.x, XS, NB);
      Filler->Switch[S1].Y = Bucket8For(Switch1.y, YS, NB);
      XAdjust = Switch1.x - BucketStart(Filler->Switch[S1].X, XS, NB);
      YAdjust = XAdjust * Sin / Cos;
      Filler->Switch[S1].YInit = Bucket16For(Switch1.y - YAdjust, YS, NB * 256);
      Filler->Switch[S1].Delta = Filler->EndDelta;
 
      Filler->Switch[S2].Type = EndSwitch;
      Filler->Switch[S2].X = Bucket8For(Switch2.x, XS, NB);
      Filler->Switch[S2].Y = Bucket8For(Switch2.y, YS, NB);
      XAdjust = Switch2.x - BucketStart(Filler->Switch[S2].X, XS, NB);
      YAdjust = XAdjust * Cos / Sin;
      Filler->Switch[S2].YInit = Bucket16For(Switch2.y + YAdjust, YS, NB * 256);
      Filler->Switch[S2].Delta = Filler->StartDelta;
 
      Filler->Switch[2].Type = LastSwitch;
      Filler->Switch[2].X = Bucket8For(End.x, XS, NB);
    } else {
      /* falling diagonal proto */
      Angle *= 2.0 * M_PI;
      Cos = fabs(std::cos(Angle));
      Sin = fabs(std::sin(Angle));
 
      /* compute the positions of the corners of the acceptance region */
      Start.x = X - (HalfLength + EndPad) * Cos - SidePad * Sin;
      Start.y = Y + (HalfLength + EndPad) * Sin - SidePad * Cos;
      End.x = 2.0 * X - Start.x;
      End.y = 2.0 * Y - Start.y;
      Switch1.x = X - (HalfLength + EndPad) * Cos + SidePad * Sin;
      Switch1.y = Y + (HalfLength + EndPad) * Sin + SidePad * Cos;
      Switch2.x = 2.0 * X - Switch1.x;
      Switch2.y = 2.0 * Y - Switch1.y;
 
      if (Switch1.x > Switch2.x) {
        S1 = 1;
        S2 = 0;
      }
 
      /* translate into bucket positions and deltas */
      Filler->X = Bucket8For(Start.x, XS, NB);
      Filler->StartDelta = static_cast<int16_t>(
          ClipToRange<int>(-IntCastRounded((Sin / Cos) * 256), INT16_MIN, INT16_MAX));
      Filler->EndDelta = static_cast<int16_t>(
          ClipToRange<int>(IntCastRounded((Cos / Sin) * 256), INT16_MIN, INT16_MAX));
 
      XAdjust = BucketEnd(Filler->X, XS, NB) - Start.x;
      YAdjust = XAdjust * Sin / Cos;
      Filler->YStart = Bucket16For(Start.y - YAdjust, YS, NB * 256);
      YAdjust = XAdjust * Cos / Sin;
      Filler->YEnd = Bucket16For(Start.y + YAdjust, YS, NB * 256);
 
      Filler->Switch[S1].Type = EndSwitch;
      Filler->Switch[S1].X = Bucket8For(Switch1.x, XS, NB);
      Filler->Switch[S1].Y = Bucket8For(Switch1.y, YS, NB);
      XAdjust = Switch1.x - BucketStart(Filler->Switch[S1].X, XS, NB);
      YAdjust = XAdjust * Sin / Cos;
      Filler->Switch[S1].YInit = Bucket16For(Switch1.y + YAdjust, YS, NB * 256);
      Filler->Switch[S1].Delta = Filler->StartDelta;
 
      Filler->Switch[S2].Type = StartSwitch;
      Filler->Switch[S2].X = Bucket8For(Switch2.x, XS, NB);
      Filler->Switch[S2].Y = Bucket8For(Switch2.y, YS, NB);
      XAdjust = Switch2.x - BucketStart(Filler->Switch[S2].X, XS, NB);
      YAdjust = XAdjust * Cos / Sin;
      Filler->Switch[S2].YInit = Bucket16For(Switch2.y - YAdjust, YS, NB * 256);
      Filler->Switch[S2].Delta = Filler->EndDelta;
 
      Filler->Switch[2].Type = LastSwitch;
      Filler->Switch[2].X = Bucket8For(End.x, XS, NB);
    }
  }
} /* InitTableFiller */

◆ insert_spline_point()

void tesseract::insert_spline_point	(	int	xstarts[],
		int	segment,
		int	coord1,
		int	coord2,
		int &	segments
	)

Definition at line 1239 of file oldbasel.cpp.

  {
  int index; // for shuffling
 
  for (index = segments; index > segment; index--) {
    xstarts[index + 1] = xstarts[index];
  }
  segments++;
  xstarts[segment] = coord1;
  xstarts[segment + 1] = coord2;
}

◆ InsertNodes()

void tesseract::InsertNodes	(	KDTREE *	tree,
		KDNODE *	nodes
	)

Given a subtree nodes, insert all of its elements into tree.

Definition at line 477 of file kdtree.cpp.

                                              {
  if (nodes == nullptr) {
    return;
  }
 
  KDStore(tree, nodes->Key, nodes->Data);
  InsertNodes(tree, nodes->Left);
  InsertNodes(tree, nodes->Right);
}

◆ INSTANTIATE_TEST_SUITE_P() [1/19]

tesseract::INSTANTIATE_TEST_SUITE_P	(	DISABLED_Arabic	,
		MatchGroundTruth	,
		::testing::Values("script/Arabic")
	)

◆ INSTANTIATE_TEST_SUITE_P() [2/19]

tesseract::INSTANTIATE_TEST_SUITE_P	(	DISABLED_Deva	,
		MatchGroundTruth	,
		::testing::Values("script/Devanagari")
	)

◆ INSTANTIATE_TEST_SUITE_P() [3/19]

tesseract::INSTANTIATE_TEST_SUITE_P	(	DISABLED_Latin	,
		MatchGroundTruth	,
		::testing::Values("script/Latin")
	)

◆ INSTANTIATE_TEST_SUITE_P() [4/19]

tesseract::INSTANTIATE_TEST_SUITE_P	(	DISABLED_Tessdata	,
		LoadLanguage	,
		::testing::Values(TESSDATA_DIR)
	)

◆ INSTANTIATE_TEST_SUITE_P() [5/19]

tesseract::INSTANTIATE_TEST_SUITE_P	(	DISABLED_Tessdata	,
		LoadScript	,
		::testing::Values(TESSDATA_DIR)
	)

◆ INSTANTIATE_TEST_SUITE_P() [6/19]

tesseract::INSTANTIATE_TEST_SUITE_P	(	DISABLED_Tessdata_best	,
		LoadLanguage	,
		::testing::Values(TESSDATA_DIR "_best")
	)

◆ INSTANTIATE_TEST_SUITE_P() [7/19]

tesseract::INSTANTIATE_TEST_SUITE_P	(	DISABLED_Tessdata_best	,
		LoadScript	,
		::testing::Values(TESSDATA_DIR "_best")
	)

◆ INSTANTIATE_TEST_SUITE_P() [8/19]

tesseract::INSTANTIATE_TEST_SUITE_P	(	DISABLED_Tessdata_fast	,
		LoadLanguage	,
		::testing::Values(TESSDATA_DIR "_fast")
	)

◆ INSTANTIATE_TEST_SUITE_P() [9/19]

tesseract::INSTANTIATE_TEST_SUITE_P	(	DISABLED_Tessdata_fast	,
		LoadScript	,
		::testing::Values(TESSDATA_DIR "_fast")
	)

◆ INSTANTIATE_TEST_SUITE_P() [10/19]

tesseract::INSTANTIATE_TEST_SUITE_P	(	Eng	,
		MatchGroundTruth	,
		::testing::Values("eng")
	)

◆ INSTANTIATE_TEST_SUITE_P() [11/19]

tesseract::INSTANTIATE_TEST_SUITE_P	(	RangeTest	,
		QRSequenceGeneratorTest	,
		::testing::Values(2, 7, 8, 9, 16, 1e2, 1e4, 1e6)
	)

◆ INSTANTIATE_TEST_SUITE_P() [12/19]

tesseract::INSTANTIATE_TEST_SUITE_P	(	TessdataBestEngEuroHebrew	,
		OSDTest	,
		::testing::Combine(::testing::Values(0), ::testing::Values(TESTING_DIR "/phototest.tif", TESTING_DIR "/eurotext.tif", TESTING_DIR "/hebrew.png"), ::testing::Values(TESSDATA_DIR "_best"))
	)

◆ INSTANTIATE_TEST_SUITE_P() [13/19]

tesseract::INSTANTIATE_TEST_SUITE_P	(	TessdataEngEuroHebrew	,
		OSDTest	,
		::testing::Combine(::testing::Values(0), ::testing::Values(TESTING_DIR "/phototest.tif", TESTING_DIR "/eurotext.tif", TESTING_DIR "/hebrew.png"), ::testing::Values(TESSDATA_DIR))
	)

◆ INSTANTIATE_TEST_SUITE_P() [14/19]

tesseract::INSTANTIATE_TEST_SUITE_P	(	TessdataFastDeva	,
		OSDTest	,
		::testing::Combine(::testing::Values(0), ::testing::Values(TESTING_DIR "/devatest.png"), ::testing::Values(TESSDATA_DIR "_fast"))
	)

◆ INSTANTIATE_TEST_SUITE_P() [15/19]

tesseract::INSTANTIATE_TEST_SUITE_P	(	TessdataFastDevaRotated270	,
		OSDTest	,
		::testing::Combine(::testing::Values(270), ::testing::Values(TESTING_DIR "/devatest-rotated-270.png"), ::testing::Values(TESSDATA_DIR "_fast"))
	)

◆ INSTANTIATE_TEST_SUITE_P() [16/19]

tesseract::INSTANTIATE_TEST_SUITE_P	(	TessdataFastEngEuroHebrew	,
		OSDTest	,
		::testing::Combine(::testing::Values(0), ::testing::Values(TESTING_DIR "/phototest.tif", TESTING_DIR "/eurotext.tif", TESTING_DIR "/hebrew.png"), ::testing::Values(TESSDATA_DIR "_fast"))
	)

◆ INSTANTIATE_TEST_SUITE_P() [17/19]

tesseract::INSTANTIATE_TEST_SUITE_P	(	TessdataFastRotated180	,
		OSDTest	,
		::testing::Combine(::testing::Values(180), ::testing::Values(TESTING_DIR "/phototest-rotated-180.png"), ::testing::Values(TESSDATA_DIR "_fast"))
	)

◆ INSTANTIATE_TEST_SUITE_P() [18/19]

tesseract::INSTANTIATE_TEST_SUITE_P	(	TessdataFastRotated270	,
		OSDTest	,
		::testing::Combine(::testing::Values(270), ::testing::Values(TESTING_DIR "/phototest-rotated-L.png"), ::testing::Values(TESSDATA_DIR "_fast"))
	)

◆ INSTANTIATE_TEST_SUITE_P() [19/19]

tesseract::INSTANTIATE_TEST_SUITE_P	(	TessdataFastRotated90	,
		OSDTest	,
		::testing::Combine(::testing::Values(90), ::testing::Values(TESTING_DIR "/phototest-rotated-R.png"), ::testing::Values(TESSDATA_DIR "_fast"))
	)

◆ INT_PARAM_FLAG()

tesseract::INT_PARAM_FLAG	(	debug_level	,
		0	,
		"Level of Trainer debugging"
	)

◆ INT_VAR_H() [1/26]

tesseract::INT_VAR_H ( classify_integer_matcher_multiplier )

◆ INT_VAR_H() [2/26]

tesseract::INT_VAR_H ( devanagari_split_debuglevel )

◆ INT_VAR_H() [3/26]

tesseract::INT_VAR_H ( editor_image_blob_bb_color )

◆ INT_VAR_H() [4/26]

tesseract::INT_VAR_H ( editor_image_word_bb_color )

◆ INT_VAR_H() [5/26]

tesseract::INT_VAR_H ( editor_image_xpos )

◆ INT_VAR_H() [6/26]

tesseract::INT_VAR_H ( editor_image_ypos )

◆ INT_VAR_H() [7/26]

tesseract::INT_VAR_H ( editor_word_height )

◆ INT_VAR_H() [8/26]

tesseract::INT_VAR_H ( editor_word_width )

◆ INT_VAR_H() [9/26]

tesseract::INT_VAR_H ( editor_word_xpos )

◆ INT_VAR_H() [10/26]

tesseract::INT_VAR_H ( editor_word_ypos )

◆ INT_VAR_H() [11/26]

TESS_API tesseract::INT_VAR_H ( log_level )

◆ INT_VAR_H() [12/26]

tesseract::INT_VAR_H ( pitsync_linear_version )

◆ INT_VAR_H() [13/26]

tesseract::INT_VAR_H ( textord_debug_block )

◆ INT_VAR_H() [14/26]

tesseract::INT_VAR_H ( textord_debug_bugs )

◆ INT_VAR_H() [15/26]

tesseract::INT_VAR_H ( textord_debug_tabfind )

◆ INT_VAR_H() [16/26]

tesseract::INT_VAR_H ( textord_dotmatrix_gap )

◆ INT_VAR_H() [17/26]

tesseract::INT_VAR_H ( textord_fp_chop_error )

◆ INT_VAR_H() [18/26]

tesseract::INT_VAR_H ( textord_lms_line_trials )

◆ INT_VAR_H() [19/26]

tesseract::INT_VAR_H ( textord_min_blobs_in_row )

◆ INT_VAR_H() [20/26]

tesseract::INT_VAR_H ( textord_min_xheight )

◆ INT_VAR_H() [21/26]

tesseract::INT_VAR_H ( textord_pitch_range )

◆ INT_VAR_H() [22/26]

tesseract::INT_VAR_H ( textord_spline_medianwin )

◆ INT_VAR_H() [23/26]

tesseract::INT_VAR_H ( textord_spline_minblobs )

◆ INT_VAR_H() [24/26]

tesseract::INT_VAR_H ( textord_test_x )

◆ INT_VAR_H() [25/26]

tesseract::INT_VAR_H ( textord_test_y )

◆ INT_VAR_H() [26/26]

tesseract::INT_VAR_H ( textord_words_veto_power )

◆ IntCastRounded() [1/2]

int tesseract::IntCastRounded ( double x )

inline

Definition at line 170 of file helpers.h.

                                    {
  assert(std::isfinite(x));
  assert(x < INT_MAX);
  assert(x > INT_MIN);
  return x >= 0.0 ? static_cast<int>(x + 0.5) : -static_cast<int>(-x + 0.5);
}

◆ IntCastRounded() [2/2]

int tesseract::IntCastRounded ( float x )

inline

Definition at line 178 of file helpers.h.

                                   {
  assert(std::isfinite(x));
  return x >= 0.0F ? static_cast<int>(x + 0.5F) : -static_cast<int>(-x + 0.5F);
}

◆ IntersectRange()

template<typename T >

void tesseract::IntersectRange	(	const T &	lower1,
		const T &	upper1,
		T *	lower2,
		T *	upper2
	)

inline

Definition at line 141 of file helpers.h.

                                                                                   {
  if (lower1 > *lower2) {
    *lower2 = lower1;
  }
  if (upper1 < *upper2) {
    *upper2 = upper1;
  }
}

◆ InterwordSpace()

int tesseract::InterwordSpace	(	const std::vector< RowScratchRegisters > &	rows,
		int	row_start,
		int	row_end
	)

Definition at line 1654 of file paragraphs.cpp.

                                                                                           {
  if (row_end < row_start + 1) {
    return 1;
  }
  int word_height =
      (rows[row_start].ri_->lword_box.height() + rows[row_end - 1].ri_->lword_box.height()) / 2;
  int word_width =
      (rows[row_start].ri_->lword_box.width() + rows[row_end - 1].ri_->lword_box.width()) / 2;
  STATS spacing_widths(0, 4 + word_width);
  for (int i = row_start; i < row_end; i++) {
    if (rows[i].ri_->num_words > 1) {
      spacing_widths.add(rows[i].ri_->average_interword_space, 1);
    }
  }
  int minimum_reasonable_space = word_height / 3;
  if (minimum_reasonable_space < 2) {
    minimum_reasonable_space = 2;
  }
  int median = spacing_widths.median();
  return (median > minimum_reasonable_space) ? median : minimum_reasonable_space;
}

◆ IsInterchangeValid()

TESS_UNICHARSET_TRAINING_API bool tesseract::IsInterchangeValid ( const char32 ch )

Definition at line 261 of file normstrngs.cpp.

                                         {
  return IsValidCodepoint(ch) && !(ch >= 0xFDD0 && ch <= 0xFDEF) && // Noncharacters.
         !(ch >= 0xFFFE && ch <= 0xFFFF) && !(ch >= 0x1FFFE && ch <= 0x1FFFF) &&
         !(ch >= 0x2FFFE && ch <= 0x2FFFF) && !(ch >= 0x3FFFE && ch <= 0x3FFFF) &&
         !(ch >= 0x4FFFE && ch <= 0x4FFFF) && !(ch >= 0x5FFFE && ch <= 0x5FFFF) &&
         !(ch >= 0x6FFFE && ch <= 0x6FFFF) && !(ch >= 0x7FFFE && ch <= 0x7FFFF) &&
         !(ch >= 0x8FFFE && ch <= 0x8FFFF) && !(ch >= 0x9FFFE && ch <= 0x9FFFF) &&
         !(ch >= 0xAFFFE && ch <= 0xAFFFF) && !(ch >= 0xBFFFE && ch <= 0xBFFFF) &&
         !(ch >= 0xCFFFE && ch <= 0xCFFFF) && !(ch >= 0xDFFFE && ch <= 0xDFFFF) &&
         !(ch >= 0xEFFFE && ch <= 0xEFFFF) && !(ch >= 0xFFFFE && ch <= 0xFFFFF) &&
         !(ch >= 0x10FFFE && ch <= 0x10FFFF) &&
         (!u_isISOControl(static_cast<UChar32>(ch)) || ch == '\n' || ch == '\f' || ch == '\t' ||
          ch == '\r');
}

◆ IsInterchangeValid7BitAscii()

TESS_UNICHARSET_TRAINING_API bool tesseract::IsInterchangeValid7BitAscii ( const char32 ch )

Definition at line 276 of file normstrngs.cpp.

                                                  {
  return IsValidCodepoint(ch) && ch <= 128 &&
         (!u_isISOControl(static_cast<UChar32>(ch)) || ch == '\n' || ch == '\f' || ch == '\t' ||
          ch == '\r');
}

◆ IsLeftIndented()

bool tesseract::IsLeftIndented ( const EquationDetect::IndentType type )

inline

Definition at line 90 of file equationdetect.cpp.

                                                                {
  return type == EquationDetect::LEFT_INDENT || type == EquationDetect::BOTH_INDENT;
}

◆ IsOCREquivalent()

bool tesseract::IsOCREquivalent	(	char32	ch1,
		char32	ch2
	)

Definition at line 219 of file normstrngs.cpp.

                                             {
  return OCRNormalize(ch1) == OCRNormalize(ch2);
}

◆ IsRightIndented()

bool tesseract::IsRightIndented ( const EquationDetect::IndentType type )

inline

Definition at line 94 of file equationdetect.cpp.

                                                                 {
  return type == EquationDetect::RIGHT_INDENT || type == EquationDetect::BOTH_INDENT;
}

◆ IsTextOrEquationType()

bool tesseract::IsTextOrEquationType ( PolyBlockType type )

inline

Definition at line 86 of file equationdetect.cpp.

                                                     {
  return PTIsTextType(type) || type == PT_EQUATION;
}

◆ IsUTF8Whitespace()

TESS_UNICHARSET_TRAINING_API bool tesseract::IsUTF8Whitespace ( const char * text )

Definition at line 233 of file normstrngs.cpp.

                                        {
  return SpanUTF8Whitespace(text) == strlen(text);
}

◆ IsValidCodepoint()

bool tesseract::IsValidCodepoint ( const char32 ch )

Definition at line 223 of file normstrngs.cpp.

                                       {
  // In the range [0, 0xD800) or [0xE000, 0x10FFFF]
  return (static_cast<uint32_t>(ch) < 0xD800) || (ch >= 0xE000 && ch <= 0x10FFFF);
}

◆ IsWhitespace()

TESS_UNICHARSET_TRAINING_API bool tesseract::IsWhitespace ( const char32 ch )

Definition at line 228 of file normstrngs.cpp.

                                   {
  ASSERT_HOST_MSG(IsValidCodepoint(ch), "Invalid Unicode codepoint: 0x%x\n", ch);
  return u_isUWhiteSpace(static_cast<UChar32>(ch));
}

◆ KDDelete()

void tesseract::KDDelete	(	KDTREE *	Tree,
		float	Key[],
		void *	Data
	)

This routine deletes a node from Tree. The node to be deleted is specified by the Key for the node and the Data contents of the node. These two pointers must be identical to the pointers that were used for the node when it was originally stored in the tree. A node will be deleted from the tree only if its key and data pointers are identical to Key and Data respectively. The tree is re-formed by removing the affected subtree and inserting all elements but the root.

Parameters

Tree	K-D tree to delete node from
Key	key of node to be deleted
Data	data contents of node to be deleted

Definition at line 252 of file kdtree.cpp.

                                                     {
  int Level;
  KDNODE *Current;
  KDNODE *Father;
 
  /* initialize search at root of tree */
  Father = &(Tree->Root);
  Current = Father->Left;
  Level = NextLevel(Tree, -1);
 
  /* search tree for node to be deleted */
  while ((Current != nullptr) && (!NodeFound(Current, Key, Data))) {
    Father = Current;
    if (Key[Level] < Current->BranchPoint) {
      Current = Current->Left;
    } else {
      Current = Current->Right;
    }
 
    Level = NextLevel(Tree, Level);
  }
 
  if (Current != nullptr) { /* if node to be deleted was found */
    if (Current == Father->Left) {
      Father->Left = nullptr;
      Father->LeftBranch = Tree->KeyDesc[Level].Min;
    } else {
      Father->Right = nullptr;
      Father->RightBranch = Tree->KeyDesc[Level].Max;
    }
 
    InsertNodes(Tree, Current->Left);
    InsertNodes(Tree, Current->Right);
    delete Current;
  }
} /* KDDelete */

◆ KDNearestNeighborSearch()

void tesseract::KDNearestNeighborSearch	(	KDTREE *	Tree,
		float	Query[],
		int	QuerySize,
		float	MaxDistance,
		int *	NumberOfResults,
		void **	NBuffer,
		float	DBuffer[]
	)

This routine searches the K-D tree specified by Tree and finds the QuerySize nearest neighbors of Query. All neighbors must be within MaxDistance of Query. The data contents of the nearest neighbors are placed in NBuffer and their distances from Query are placed in DBuffer.

Parameters

Tree	ptr to K-D tree to be searched
Query	ptr to query key (point in D-space)
QuerySize	number of nearest neighbors to be found
MaxDistance	all neighbors must be within this distance
NBuffer	ptr to QuerySize buffer to hold nearest neighbors
DBuffer	ptr to QuerySize buffer to hold distances from nearest neighbor to query point
NumberOfResults	[out] Number of nearest neighbors actually found

Definition at line 305 of file kdtree.cpp.

                                                                                    {
  KDTreeSearch search(Tree, Query, QuerySize);
  search.Search(NumberOfResults, DBuffer, NBuffer);
}

◆ KDStore()

void tesseract::KDStore	(	KDTREE *	Tree,
		float *	Key,
		CLUSTER *	Data
	)

This routine stores Data in the K-D tree specified by Tree using Key as an access key.

Parameters

Tree	K-D tree in which data is to be stored
Key	ptr to key by which data can be retrieved
Data	ptr to data to be stored in the tree

Definition at line 215 of file kdtree.cpp.

                                                      {
  auto PtrToNode = &(Tree->Root.Left);
  auto Node = *PtrToNode;
  auto Level = NextLevel(Tree, -1);
  while (Node != nullptr) {
    if (Key[Level] < Node->BranchPoint) {
      PtrToNode = &(Node->Left);
      if (Key[Level] > Node->LeftBranch) {
        Node->LeftBranch = Key[Level];
      }
    } else {
      PtrToNode = &(Node->Right);
      if (Key[Level] < Node->RightBranch) {
        Node->RightBranch = Key[Level];
      }
    }
    Level = NextLevel(Tree, Level);
    Node = *PtrToNode;
  }
 
  *PtrToNode = new KDNODE(Tree, Key, Data, Level);
} /* KDStore */

◆ KDWalk()

void tesseract::KDWalk	(	KDTREE *	Tree,
		kdwalk_proc	action,
		ClusteringContext *	context
	)

Walk a given Tree with action.

Definition at line 313 of file kdtree.cpp.

                                                                          {
  if (Tree->Root.Left != nullptr) {
    Walk(Tree, action, context, Tree->Root.Left, NextLevel(Tree, -1));
  }
}

◆ LangLoader()

void tesseract::LangLoader	(	const char *	lang,
		const char *	tessdatadir
	)

Definition at line 39 of file loadlang_test.cc.

                                                           {
  auto api = std::make_unique<tesseract::TessBaseAPI>();
  ASSERT_FALSE(api->Init(tessdatadir, lang)) << "Could not initialize tesseract for $lang.";
  api->End();
}

◆ last()

LIST tesseract::last ( LIST var_list )

Definition at line 153 of file oldlist.cpp.

                         {
  while (var_list->list_rest() != NIL_LIST) {
    var_list = var_list->list_rest();
  }
  return var_list;
}

◆ LeftWordAttributes()

TESS_API void tesseract::LeftWordAttributes	(	const UNICHARSET *	unicharset,
		const WERD_CHOICE *	werd,
		const std::string &	utf8,
		bool *	is_list,
		bool *	starts_idea,
		bool *	ends_idea
	)

Definition at line 431 of file paragraphs.cpp.

                                                                           {
  *is_list = false;
  *starts_idea = false;
  *ends_idea = false;
  if (utf8.empty() || (werd != nullptr && werd->empty())) { // Empty
    *ends_idea = true;
    return;
  }
 
  if (unicharset && werd) { // We have a proper werd and unicharset so use it.
    if (UniLikelyListItem(unicharset, werd)) {
      *is_list = true;
      *starts_idea = true;
      *ends_idea = true;
    }
    if (unicharset->get_isupper(werd->unichar_id(0))) {
      *starts_idea = true;
    }
    if (unicharset->get_ispunctuation(werd->unichar_id(0))) {
      *starts_idea = true;
      *ends_idea = true;
    }
  } else { // Assume utf8 is mostly ASCII
    if (AsciiLikelyListItem(utf8)) {
      *is_list = true;
      *starts_idea = true;
    }
    int start_letter = utf8[0];
    if (IsOpeningPunct(start_letter)) {
      *starts_idea = true;
    }
    if (IsTerminalPunct(start_letter)) {
      *ends_idea = true;
    }
    if (start_letter >= 'A' && start_letter <= 'Z') {
      *starts_idea = true;
    }
  }
}

◆ lessthan()

int tesseract::lessthan	(	const void *	first,
		const void *	second
	)

Definition at line 374 of file polyblk.cpp.

                                                    {
  const ICOORDELT *p1 = *reinterpret_cast<const ICOORDELT *const *>(first);
  const ICOORDELT *p2 = *reinterpret_cast<const ICOORDELT *const *>(second);
 
  if (p1->x() < p2->x()) {
    return (-1);
  } else if (p1->x() > p2->x()) {
    return (1);
  } else {
    return (0);
  }
}

◆ linear_spline_baseline()

double * tesseract::linear_spline_baseline	(	TO_ROW *	row,
		TO_BLOCK *	block,
		int32_t &	segments,
		int32_t	xstarts[]
	)

Definition at line 2180 of file makerow.cpp.

  {
  int blobcount;         // no of blobs
  int blobindex;         // current blob
  int index1, index2;    // blob numbers
  int blobs_per_segment; // blobs in each
  TBOX box;              // blob box
  TBOX new_box;          // new_it box
                         // blobs
  BLOBNBOX_IT blob_it = row->blob_list();
  BLOBNBOX_IT new_it = blob_it; // front end
  float b, c;                   // fitted curve
  tesseract::DetLineFit lms;
  int32_t segment; // current segment
 
  box = box_next_pre_chopped(&blob_it);
  xstarts[0] = box.left();
  blobcount = 1;
  while (!blob_it.at_first()) {
    blobcount++;
    box = box_next_pre_chopped(&blob_it);
  }
  segments = blobcount / textord_spline_medianwin;
  if (segments < 1) {
    segments = 1;
  }
  blobs_per_segment = blobcount / segments;
  // quadratic coeffs
  auto *coeffs = new double[segments * 3];
  if (textord_oldbl_debug) {
    tprintf(
        "Linear splining baseline of %d blobs at (%d,%d), into %d segments of "
        "%d blobs\n",
        blobcount, box.left(), box.bottom(), segments, blobs_per_segment);
  }
  segment = 1;
  for (index2 = 0; index2 < blobs_per_segment / 2; index2++) {
    box_next_pre_chopped(&new_it);
  }
  index1 = 0;
  blobindex = index2;
  do {
    blobindex += blobs_per_segment;
    lms.Clear();
    while (index1 < blobindex || (segment == segments && index1 < blobcount)) {
      box = box_next_pre_chopped(&blob_it);
      int middle = (box.left() + box.right()) / 2;
      lms.Add(ICOORD(middle, box.bottom()));
      index1++;
      if (index1 == blobindex - blobs_per_segment / 2 || index1 == blobcount - 1) {
        xstarts[segment] = box.left();
      }
    }
    lms.Fit(&b, &c);
    coeffs[segment * 3 - 3] = 0;
    coeffs[segment * 3 - 2] = b;
    coeffs[segment * 3 - 1] = c;
    segment++;
    if (segment > segments) {
      break;
    }
 
    blobindex += blobs_per_segment;
    lms.Clear();
    while (index2 < blobindex || (segment == segments && index2 < blobcount)) {
      new_box = box_next_pre_chopped(&new_it);
      int middle = (new_box.left() + new_box.right()) / 2;
      lms.Add(ICOORD(middle, new_box.bottom()));
      index2++;
      if (index2 == blobindex - blobs_per_segment / 2 || index2 == blobcount - 1) {
        xstarts[segment] = new_box.left();
      }
    }
    lms.Fit(&b, &c);
    coeffs[segment * 3 - 3] = 0;
    coeffs[segment * 3 - 2] = b;
    coeffs[segment * 3 - 1] = c;
    segment++;
  } while (segment <= segments);
  return coeffs;
}

◆ LoadDataFromFile() [1/2]

bool tesseract::LoadDataFromFile	(	const char *	filename,
		GenericVector< char > *	data
	)

inline

Definition at line 233 of file genericvector.h.

                                                                              {
  bool result = false;
  FILE *fp = fopen(filename, "rb");
  if (fp != nullptr) {
    fseek(fp, 0, SEEK_END);
    auto size = std::ftell(fp);
    fseek(fp, 0, SEEK_SET);
    // Trying to open a directory on Linux sets size to LONG_MAX. Catch it here.
    if (size > 0 && size < LONG_MAX) {
      // reserve an extra byte in case caller wants to append a '\0' character
      data->reserve(size + 1);
      data->resize_no_init(size);
      result = static_cast<long>(fread(&(*data)[0], 1, size, fp)) == size;
    }
    fclose(fp);
  }
  return result;
}

◆ LoadDataFromFile() [2/2]

TESS_API bool tesseract::LoadDataFromFile	(	const char *	filename,
		std::vector< char > *	data
	)

Definition at line 32 of file serialis.cpp.

                                                                   {
  bool result = false;
  FILE *fp = fopen(filename, "rb");
  if (fp != nullptr) {
    fseek(fp, 0, SEEK_END);
    auto size = std::ftell(fp);
    fseek(fp, 0, SEEK_SET);
    // Trying to open a directory on Linux sets size to LONG_MAX. Catch it here.
    if (size > 0 && size < LONG_MAX) {
      // reserve an extra byte in case caller wants to append a '\0' character
      data->reserve(size + 1);
      data->resize(size); // TODO: optimize no init
      result = static_cast<long>(fread(&(*data)[0], 1, size, fp)) == size;
    }
    fclose(fp);
  }
  return result;
}

◆ LoadFileLinesToStrings()

bool tesseract::LoadFileLinesToStrings	(	const char *	filename,
		std::vector< std::string > *	lines
	)

inline

Definition at line 32 of file fileio.h.

                                                                                      {
  std::vector<char> data;
  if (!LoadDataFromFile(filename, &data)) {
    return false;
  }
  // TODO: optimize.
  std::string lines_str(&data[0], data.size());
  *lines = split(lines_str, '\n');
  return true;
}

◆ LoadShapeTable()

ShapeTable * tesseract::LoadShapeTable ( const std::string & file_prefix )

Definition at line 148 of file commontraining.cpp.

                                                         {
  ShapeTable *shape_table = nullptr;
  std::string shape_table_file = file_prefix;
  shape_table_file += kShapeTableFileSuffix;
  TFile shape_fp;
  if (shape_fp.Open(shape_table_file.c_str(), nullptr)) {
    shape_table = new ShapeTable;
    if (!shape_table->DeSerialize(&shape_fp)) {
      delete shape_table;
      shape_table = nullptr;
      tprintf("Error: Failed to read shape table %s\n", shape_table_file.c_str());
    } else {
      int num_shapes = shape_table->NumShapes();
      tprintf("Read shape table %s of %d shapes\n", shape_table_file.c_str(), num_shapes);
    }
  } else {
    tprintf("Warning: No shape table file present: %s\n", shape_table_file.c_str());
  }
  return shape_table;
}

◆ LoadTrainingData()

TESS_COMMON_TRAINING_API std::unique_ptr< MasterTrainer > tesseract::LoadTrainingData	(	const char const	filelist,
		bool	replication,
		ShapeTable **	shape_table,
		std::string &	file_prefix
	)

Creates a MasterTrainer and loads the training data into it: Initializes feature_defs and IntegerFX. Loads the shape_table if shape_table != nullptr. Loads initial unicharset from -U command-line option. If FLAGS_T is set, loads the majority of data from there, else:

Loads font info from -F option.
Loads xheights from -X option.
Loads samples from .tr files in remaining command-line args.
Deletes outliers and computes canonical samples.
If FLAGS_output_trainer is set, saves the trainer for future use. TODO: Who uses that? There is currently no code which reads it. Computes canonical and cloud features. If shape_table is not nullptr, but failed to load, make a fake flat one, as shape clustering was not run.

Definition at line 200 of file commontraining.cpp.

                                                                                                  {
  InitFeatureDefs(&feature_defs);
  InitIntegerFX();
  file_prefix = "";
  if (!FLAGS_D.empty()) {
    file_prefix += FLAGS_D.c_str();
    file_prefix += "/";
  }
  // If we are shape clustering (nullptr shape_table) or we successfully load
  // a shape_table written by a previous shape clustering, then
  // shape_analysis will be true, meaning that the MasterTrainer will replace
  // some members of the unicharset with their fragments.
  bool shape_analysis = false;
  if (shape_table != nullptr) {
    *shape_table = LoadShapeTable(file_prefix);
    if (*shape_table != nullptr) {
      shape_analysis = true;
    }
  } else {
    shape_analysis = true;
  }
  auto trainer = std::make_unique<MasterTrainer>(NM_CHAR_ANISOTROPIC, shape_analysis, replication,
                                                 FLAGS_debug_level);
  IntFeatureSpace fs;
  fs.Init(kBoostXYBuckets, kBoostXYBuckets, kBoostDirBuckets);
  trainer->LoadUnicharset(FLAGS_U.c_str());
  // Get basic font information from font_properties.
  if (!FLAGS_F.empty()) {
    if (!trainer->LoadFontInfo(FLAGS_F.c_str())) {
      return {};
    }
  }
  if (!FLAGS_X.empty()) {
    if (!trainer->LoadXHeights(FLAGS_X.c_str())) {
      return {};
    }
  }
  trainer->SetFeatureSpace(fs);
  // Load training data from .tr files in filelist (terminated by nullptr).
  for (const char *page_name = *filelist++; page_name != nullptr; page_name = *filelist++) {
    tprintf("Reading %s ...\n", page_name);
    trainer->ReadTrainingSamples(page_name, feature_defs, false);
 
    // If there is a file with [lang].[fontname].exp[num].fontinfo present,
    // read font spacing information in to fontinfo_table.
    int pagename_len = strlen(page_name);
    char *fontinfo_file_name = new char[pagename_len + 7];
    strncpy(fontinfo_file_name, page_name, pagename_len - 2);  // remove "tr"
    strcpy(fontinfo_file_name + pagename_len - 2, "fontinfo"); // +"fontinfo"
    trainer->AddSpacingInfo(fontinfo_file_name);
    delete[] fontinfo_file_name;
 
    // Load the images into memory if required by the classifier.
    if (FLAGS_load_images) {
      std::string image_name = page_name;
      // Chop off the tr and replace with tif. Extension must be tif!
      image_name.resize(image_name.length() - 2);
      image_name += "tif";
      trainer->LoadPageImages(image_name.c_str());
    }
  }
  trainer->PostLoadCleanup();
  // Write the master trainer if required.
  if (!FLAGS_output_trainer.empty()) {
    FILE *fp = fopen(FLAGS_output_trainer.c_str(), "wb");
    if (fp == nullptr) {
      tprintf("Can't create saved trainer data!\n");
    } else {
      trainer->Serialize(fp);
      fclose(fp);
    }
  }
  trainer->PreTrainingSetup();
  if (!FLAGS_O.empty() && !trainer->unicharset().save_to_file(FLAGS_O.c_str())) {
    fprintf(stderr, "Failed to save unicharset to file %s\n", FLAGS_O.c_str());
    return {};
  }
 
  if (shape_table != nullptr) {
    // If we previously failed to load a shapetable, then shape clustering
    // wasn't run so make a flat one now.
    if (*shape_table == nullptr) {
      *shape_table = new ShapeTable;
      trainer->SetupFlatShapeTable(*shape_table);
      tprintf("Flat shape table summary: %s\n", (*shape_table)->SummaryStr().c_str());
    }
    (*shape_table)->set_unicharset(trainer->unicharset());
  }
  return trainer;
}

◆ Logistic()

TFloat tesseract::Logistic ( TFloat x )

inline

Definition at line 59 of file functions.h.

                                 {
  if (x < 0) {
    return 1 - Logistic(-x);
  }
  x *= kScaleFactor;
  auto index = static_cast<unsigned>(x);
  if (index >= (kTableSize - 1)) {
    return 1;
  }
  TFloat l0 = LogisticTable[index];
  TFloat l1 = LogisticTable[index + 1];
  // Linear interpolation.
  return l0 + (l1 - l0) * (x - index);
}

◆ loop_bounding_box()

int16_t tesseract::loop_bounding_box	(	CRACKEDGE *&	start,
		ICOORD &	botleft,
		ICOORD &	topright
	)

Definition at line 117 of file edgloop.cpp.

                      {
  int16_t length;       // length of loop
  int16_t leftmost;     // on top row
  CRACKEDGE *edgept;    // current point
  CRACKEDGE *realstart; // topleft start
 
  edgept = start;
  realstart = start;
  botleft = topright = ICOORD(edgept->pos.x(), edgept->pos.y());
  leftmost = edgept->pos.x();
  length = 0; // coutn length
  do {
    edgept = edgept->next;
    if (edgept->pos.x() < botleft.x()) {
      // get bounding box
      botleft.set_x(edgept->pos.x());
    } else if (edgept->pos.x() > topright.x()) {
      topright.set_x(edgept->pos.x());
    }
    if (edgept->pos.y() < botleft.y()) {
      // get bounding box
      botleft.set_y(edgept->pos.y());
    } else if (edgept->pos.y() > topright.y()) {
      realstart = edgept;
      leftmost = edgept->pos.x();
      topright.set_y(edgept->pos.y());
    } else if (edgept->pos.y() == topright.y() && edgept->pos.x() < leftmost) {
      // leftmost on line
      leftmost = edgept->pos.x();
      realstart = edgept;
    }
    length++; // count elements
  } while (edgept != start);
  start = realstart; // shift it to topleft
  return length;
}

◆ LOSTBLOCKLINE()

constexpr ERRCODE tesseract::LOSTBLOCKLINE ( "Can't find rectangle for line" )

constexpr

◆ make_baseline_spline()

void tesseract::make_baseline_spline	(	TO_ROW *	row,
		TO_BLOCK *	block
	)

Definition at line 2053 of file makerow.cpp.

                                           {
  double *coeffs;   // quadratic coeffs
  int32_t segments; // no of segments
 
  // spline boundaries
  auto *xstarts = new int32_t[row->blob_list()->length() + 1];
  if (segment_baseline(row, block, segments, xstarts) && !textord_straight_baselines &&
      !textord_parallel_baselines) {
    coeffs = linear_spline_baseline(row, block, segments, xstarts);
  } else {
    xstarts[1] = xstarts[segments];
    segments = 1;
    coeffs = new double[3];
    coeffs[0] = 0;
    coeffs[1] = row->line_m();
    coeffs[2] = row->line_c();
  }
  row->baseline = QSPLINE(segments, xstarts, coeffs);
  delete[] coeffs;
  delete[] xstarts;
}

◆ make_edgept()

EDGEPT * tesseract::make_edgept	(	TDimension	x,
		TDimension	y,
		EDGEPT *	next,
		EDGEPT *	prev
	)

Definition at line 138 of file split.cpp.

                                                                            {
  EDGEPT *this_edgept;
  /* Create point */
  this_edgept = new EDGEPT;
  this_edgept->pos.x = x;
  this_edgept->pos.y = y;
  // Now deal with the src_outline steps.
  C_OUTLINE *prev_ol = prev->src_outline;
  if (prev_ol != nullptr && prev->next == next) {
    // Compute the fraction of the segment that is being cut.
    FCOORD segment_vec(next->pos.x - prev->pos.x, next->pos.y - prev->pos.y);
    FCOORD target_vec(x - prev->pos.x, y - prev->pos.y);
    double cut_fraction = target_vec.length() / segment_vec.length();
    // Get the start and end at the step level.
    ICOORD step_start = prev_ol->position_at_index(prev->start_step);
    int end_step = prev->start_step + prev->step_count;
    int step_length = prev_ol->pathlength();
    ICOORD step_end = prev_ol->position_at_index(end_step % step_length);
    ICOORD step_vec = step_end - step_start;
    double target_length = step_vec.length() * cut_fraction;
    // Find the point on the segment that gives the length nearest to target.
    int best_step = prev->start_step;
    ICOORD total_step(0, 0);
    double best_dist = target_length;
    for (int s = prev->start_step; s < end_step; ++s) {
      total_step += prev_ol->step(s % step_length);
      double dist = fabs(target_length - total_step.length());
      if (dist < best_dist) {
        best_dist = dist;
        best_step = s + 1;
      }
    }
    // The new point is an intermediate point.
    this_edgept->src_outline = prev_ol;
    this_edgept->step_count = end_step - best_step;
    this_edgept->start_step = best_step % step_length;
    prev->step_count = best_step - prev->start_step;
  } else {
    // The new point is poly only.
    this_edgept->src_outline = nullptr;
    this_edgept->step_count = 0;
    this_edgept->start_step = 0;
  }
  /* Hook it up */
  this_edgept->next = next;
  this_edgept->prev = prev;
  prev->next = this_edgept;
  next->prev = this_edgept;
  /* Set up vec entries */
  this_edgept->vec.x = this_edgept->next->pos.x - x;
  this_edgept->vec.y = this_edgept->next->pos.y - y;
  this_edgept->prev->vec.x = x - this_edgept->prev->pos.x;
  this_edgept->prev->vec.y = y - this_edgept->prev->pos.y;
  return this_edgept;
}

◆ make_first_baseline()

void tesseract::make_first_baseline	(	TBOX	blobcoords[],
		int	blobcount,
		int	xcoords[],
		int	ycoords[],
		QSPLINE *	spline,
		QSPLINE *	baseline,
		float	jumplimit
	)

Definition at line 482 of file oldbasel.cpp.

  {
  int leftedge;              /*left edge of line */
  int rightedge;             /*right edge of line */
  int blobindex;             /*current blob */
  int segment;               /*current segment */
  float prevy, thisy, nexty; /*3 y coords */
  float y1, y2, y3;          /*3 smooth blobs */
  float maxmax, minmin;      /*absolute limits */
  int x2 = 0;                /*right edge of old y3 */
  int ycount;                /*no of ycoords in use */
  float yturns[SPLINESIZE];  /*y coords of turn pts */
  int xturns[SPLINESIZE];    /*xcoords of turn pts */
  int xstarts[SPLINESIZE + 1];
  int segments; // no of segments
  ICOORD shift; // shift of spline
 
  prevy = 0;
  /*left edge of row */
  leftedge = blobcoords[0].left();
  /*right edge of line */
  rightedge = blobcoords[blobcount - 1].right();
  if (spline == nullptr       /*no given spline */
      || spline->segments < 3 /*or trivial */
                              /*or too non-overlap */
      || spline->xcoords[1] > leftedge + MAXOVERLAP * (rightedge - leftedge) ||
      spline->xcoords[spline->segments - 1] < rightedge - MAXOVERLAP * (rightedge - leftedge)) {
    if (textord_oldbl_paradef) {
      return; // use default
    }
    xstarts[0] = blobcoords[0].left() - 1;
    for (blobindex = 0; blobindex < blobcount; blobindex++) {
      xcoords[blobindex] = (blobcoords[blobindex].left() + blobcoords[blobindex].right()) / 2;
      ycoords[blobindex] = blobcoords[blobindex].bottom();
    }
    xstarts[1] = blobcoords[blobcount - 1].right() + 1;
    segments = 1; /*no of segments */
 
    /*linear */
    *baseline = QSPLINE(xstarts, segments, xcoords, ycoords, blobcount, 1);
 
    if (blobcount >= 3) {
      y1 = y2 = y3 = 0.0f;
      ycount = 0;
      segment = 0; /*no of segments */
      maxmax = minmin = 0.0f;
      thisy = ycoords[0] - baseline->y(xcoords[0]);
      nexty = ycoords[1] - baseline->y(xcoords[1]);
      for (blobindex = 2; blobindex < blobcount; blobindex++) {
        prevy = thisy; /*shift ycoords */
        thisy = nexty;
        nexty = ycoords[blobindex] - baseline->y(xcoords[blobindex]);
        /*middle of smooth y */
        if (ABS(thisy - prevy) < jumplimit && ABS(thisy - nexty) < jumplimit) {
          y1 = y2; /*shift window */
          y2 = y3;
          y3 = thisy; /*middle point */
          ycount++;
          /*local max */
          if (ycount >= 3 && ((y1 < y2 && y2 >= y3)
                              /*local min */
                              || (y1 > y2 && y2 <= y3))) {
            if (segment < SPLINESIZE - 2) {
              /*turning pt */
              xturns[segment] = x2;
              yturns[segment] = y2;
              segment++; /*no of spline segs */
            }
          }
          if (ycount == 1) {
            maxmax = minmin = y3; /*initialise limits */
          } else {
            if (y3 > maxmax) {
              maxmax = y3; /*biggest max */
            }
            if (y3 < minmin) {
              minmin = y3; /*smallest min */
            }
          }
          /*possible turning pt */
          x2 = blobcoords[blobindex - 1].right();
        }
      }
 
      jumplimit *= 1.2f;
      /*must be wavy */
      if (maxmax - minmin > jumplimit) {
        ycount = segment; /*no of segments */
        for (blobindex = 0, segment = 1; blobindex < ycount; blobindex++) {
          if (yturns[blobindex] > minmin + jumplimit || yturns[blobindex] < maxmax - jumplimit) {
            /*significant peak */
            if (segment == 1 || yturns[blobindex] > prevy + jumplimit ||
                yturns[blobindex] < prevy - jumplimit) {
              /*different to previous */
              xstarts[segment] = xturns[blobindex];
              segment++;
              prevy = yturns[blobindex];
            }
            /*bigger max */
            else if ((prevy > minmin + jumplimit && yturns[blobindex] > prevy)
                     /*smaller min */
                     || (prevy < maxmax - jumplimit && yturns[blobindex] < prevy)) {
              xstarts[segment - 1] = xturns[blobindex];
              /*improved previous */
              prevy = yturns[blobindex];
            }
          }
        }
        xstarts[segment] = blobcoords[blobcount - 1].right() + 1;
        segments = segment; /*no of segments */
                            /*linear */
        *baseline = QSPLINE(xstarts, segments, xcoords, ycoords, blobcount, 1);
      }
    }
  } else {
    *baseline = *spline; /*copy it */
    shift =
        ICOORD(0, static_cast<int16_t>(blobcoords[0].bottom() - spline->y(blobcoords[0].right())));
    baseline->move(shift);
  }
}

◆ make_first_xheight()

void tesseract::make_first_xheight	(	TO_ROW *	row,
		TBOX	blobcoords[],
		int	lineheight,
		int	init_lineheight,
		int	blobcount,
		QSPLINE *	baseline,
		float	jumplimit
	)

Definition at line 1421 of file oldbasel.cpp.

  {
  STATS heightstat(0, HEIGHTBUCKETS - 1);
  int lefts[HEIGHTBUCKETS];
  int rights[HEIGHTBUCKETS];
  int modelist[MODENUM];
  int blobindex;
  int mode_count; // blobs to count in thr
  int sign_bit;
  int mode_threshold;
  const int kBaselineTouch = 2;  // This really should change with resolution.
  const int kGoodStrength = 8;   // Strength of baseline-touching heights.
  const float kMinHeight = 0.25; // Min fraction of lineheight to use.
 
  sign_bit = row->xheight > 0 ? 1 : -1;
 
  memset(lefts, 0, HEIGHTBUCKETS * sizeof(lefts[0]));
  memset(rights, 0, HEIGHTBUCKETS * sizeof(rights[0]));
  mode_count = 0;
  for (blobindex = 0; blobindex < blobcount; blobindex++) {
    int xcenter = (blobcoords[blobindex].left() + blobcoords[blobindex].right()) / 2;
    float base = baseline->y(xcenter);
    float bottomdiff = std::fabs(base - blobcoords[blobindex].bottom());
    int strength = textord_ocropus_mode && bottomdiff <= kBaselineTouch ? kGoodStrength : 1;
    int height = static_cast<int>(blobcoords[blobindex].top() - base + 0.5);
    if (blobcoords[blobindex].height() > init_lineheight * kMinHeight) {
      if (height > lineheight * oldbl_xhfract && height > textord_min_xheight) {
        heightstat.add(height, strength);
        if (height < HEIGHTBUCKETS) {
          if (xcenter > rights[height]) {
            rights[height] = xcenter;
          }
          if (xcenter > 0 && (lefts[height] == 0 || xcenter < lefts[height])) {
            lefts[height] = xcenter;
          }
        }
      }
      mode_count += strength;
    }
  }
 
  mode_threshold = static_cast<int>(blobcount * 0.1);
  if (oldbl_dot_error_size > 1 || oldbl_xhfix) {
    mode_threshold = static_cast<int>(mode_count * 0.1);
  }
 
  if (textord_oldbl_debug) {
    tprintf("blobcount=%d, mode_count=%d, mode_t=%d\n", blobcount, mode_count, mode_threshold);
  }
  find_top_modes(&heightstat, HEIGHTBUCKETS, modelist, MODENUM);
  if (textord_oldbl_debug) {
    for (blobindex = 0; blobindex < MODENUM; blobindex++) {
      tprintf("mode[%d]=%d ", blobindex, modelist[blobindex]);
    }
    tprintf("\n");
  }
  pick_x_height(row, modelist, lefts, rights, &heightstat, mode_threshold);
 
  if (textord_oldbl_debug) {
    tprintf("Output xheight=%g\n", row->xheight);
  }
  if (row->xheight < 0 && textord_oldbl_debug) {
    tprintf("warning: Row Line height < 0; %4.2f\n", row->xheight);
  }
 
  if (sign_bit < 0) {
    row->xheight = -row->xheight;
  }
}

◆ make_height_array()

int * tesseract::make_height_array	(	TBOX	blobcoords[],
		int	blobcount,
		QSPLINE *	baseline
	)

◆ make_holed_baseline()

void tesseract::make_holed_baseline	(	TBOX	blobcoords[],
		int	blobcount,
		QSPLINE *	spline,
		QSPLINE *	baseline,
		float	gradient
	)

Definition at line 619 of file oldbasel.cpp.

  {
  int leftedge;  /*left edge of line */
  int rightedge; /*right edge of line */
  int blobindex; /*current blob */
  float x;       // centre of row
  ICOORD shift;  // shift of spline
 
  tesseract::DetLineFit lms; // straight baseline
  int32_t xstarts[2];        // straight line
  double coeffs[3];
  float c; // line parameter
 
  /*left edge of row */
  leftedge = blobcoords[0].left();
  /*right edge of line */
  rightedge = blobcoords[blobcount - 1].right();
  for (blobindex = 0; blobindex < blobcount; blobindex++) {
    lms.Add(ICOORD((blobcoords[blobindex].left() + blobcoords[blobindex].right()) / 2,
                   blobcoords[blobindex].bottom()));
  }
  lms.ConstrainedFit(gradient, &c);
  xstarts[0] = leftedge;
  xstarts[1] = rightedge;
  coeffs[0] = 0;
  coeffs[1] = gradient;
  coeffs[2] = c;
  *baseline = QSPLINE(1, xstarts, coeffs);
  if (spline != nullptr        /*no given spline */
      && spline->segments >= 3 /*or trivial */
                               /*or too non-overlap */
      && spline->xcoords[1] <= leftedge + MAXOVERLAP * (rightedge - leftedge) &&
      spline->xcoords[spline->segments - 1] >= rightedge - MAXOVERLAP * (rightedge - leftedge)) {
    *baseline = *spline; /*copy it */
    x = (leftedge + rightedge) / 2.0;
    shift = ICOORD(0, static_cast<int16_t>(gradient * x + c - spline->y(x)));
    baseline->move(shift);
  }
}

◆ make_illegal_segment()

void tesseract::make_illegal_segment	(	FPSEGPT_LIST *	prev_list,
		TBOX	blob_box,
		BLOBNBOX_IT	blob_it,
		int16_t	region_index,
		int16_t	pitch,
		int16_t	pitch_error,
		FPSEGPT_LIST *	seg_list
	)

Definition at line 353 of file pitsync1.cpp.

  {
  int16_t x;         // current coord
  int16_t min_x = 0; // in this region
  int16_t max_x = 0;
  int16_t offset;                 // dist to edge
  FPSEGPT *segpt;                 // segment point
  FPSEGPT *prevpt;                // previous point
  float best_cost;                // best path
  FPSEGPT_IT segpt_it = seg_list; // iterator
                                  // previous points
  FPSEGPT_IT prevpt_it = prev_list;
 
  best_cost = FLT_MAX;
  for (prevpt_it.mark_cycle_pt(); !prevpt_it.cycled_list(); prevpt_it.forward()) {
    prevpt = prevpt_it.data();
    if (prevpt->cost_function() < best_cost) {
      // find least
      best_cost = prevpt->cost_function();
      min_x = prevpt->position();
      max_x = min_x; // limits on coords
    } else if (prevpt->cost_function() == best_cost) {
      max_x = prevpt->position();
    }
  }
  min_x += pitch - pitch_error;
  max_x += pitch + pitch_error;
  for (x = min_x; x <= max_x; x++) {
    while (x > blob_box.right()) {
      blob_box = box_next(&blob_it);
    }
    offset = x - blob_box.left();
    if (blob_box.right() - x < offset) {
      offset = blob_box.right() - x;
    }
    segpt = new FPSEGPT(x, false, offset, region_index, pitch, pitch_error, prev_list);
    if (segpt->previous() != nullptr) {
      ASSERT_HOST(offset >= 0);
      fprintf(stderr, "made fake at %d\n", x);
      // make one up
      segpt_it.add_after_then_move(segpt);
      segpt->faked = true;
      segpt->fake_count++;
    } else {
      delete segpt;
    }
  }
}

◆ make_initial_textrows()

void tesseract::make_initial_textrows	(	ICOORD	page_tr,
		TO_BLOCK *	block,
		FCOORD	rotation,
		bool	testing_on
	)

Definition at line 254 of file makerow.cpp.

  {
  TO_ROW_IT row_it = block->get_rows();
 
#ifndef GRAPHICS_DISABLED
  ScrollView::Color colour; // of row
 
  if (textord_show_initial_rows && testing_on) {
    if (to_win == nullptr) {
      create_to_win(page_tr);
    }
  }
#endif
  // guess skew
  assign_blobs_to_rows(block, nullptr, 0, true, true, textord_show_initial_rows && testing_on);
  row_it.move_to_first();
  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
    fit_lms_line(row_it.data());
  }
#ifndef GRAPHICS_DISABLED
  if (textord_show_initial_rows && testing_on) {
    colour = ScrollView::RED;
    for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
      plot_to_row(row_it.data(), colour, rotation);
      colour = static_cast<ScrollView::Color>(colour + 1);
      if (colour > ScrollView::MAGENTA) {
        colour = ScrollView::RED;
      }
    }
  }
#endif
}

◆ make_pseudo_word()

PAGE_RES_IT * tesseract::make_pseudo_word	(	PAGE_RES *	page_res,
		const TBOX &	selection_box
	)

Definition at line 38 of file werdit.cpp.

                                                                             {
  PAGE_RES_IT pr_it(page_res);
  C_BLOB_LIST new_blobs;              // list of gathered blobs
  C_BLOB_IT new_blob_it = &new_blobs; // iterator
 
  for (WERD_RES *word_res = pr_it.word(); word_res != nullptr; word_res = pr_it.forward()) {
    WERD *word = word_res->word;
    if (word->bounding_box().overlap(selection_box)) {
      C_BLOB_IT blob_it(word->cblob_list());
      for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
        C_BLOB *blob = blob_it.data();
        if (blob->bounding_box().overlap(selection_box)) {
          new_blob_it.add_after_then_move(C_BLOB::deep_copy(blob));
        }
      }
      if (!new_blobs.empty()) {
        WERD *pseudo_word = new WERD(&new_blobs, 1, nullptr);
        word_res = pr_it.InsertSimpleCloneWord(*word_res, pseudo_word);
        auto *it = new PAGE_RES_IT(page_res);
        while (it->word() != word_res && it->word() != nullptr) {
          it->forward();
        }
        ASSERT_HOST(it->word() == word_res);
        return it;
      }
    }
  }
  return nullptr;
}

◆ make_real_word()

WERD * tesseract::make_real_word	(	BLOBNBOX_IT *	box_it,
		int32_t	blobcount,
		bool	bol,
		uint8_t	blanks
	)

Definition at line 559 of file wordseg.cpp.

  {
  C_OUTLINE_IT cout_it;
  C_BLOB_LIST cblobs;
  C_BLOB_IT cblob_it = &cblobs;
 
  for (int blobindex = 0; blobindex < blobcount; blobindex++) {
    auto bblob = box_it->extract();
    if (bblob->joined_to_prev()) {
      auto cblob = bblob->remove_cblob();
      if (cblob != nullptr) {
        cout_it.set_to_list(cblob_it.data()->out_list());
        cout_it.move_to_last();
        cout_it.add_list_after(cblob->out_list());
        delete cblob;
      }
    } else {
      auto cblob = bblob->remove_cblob();
      if (cblob != nullptr) {
        cblob_it.add_after_then_move(cblob);
      }
    }
    delete bblob;
    box_it->forward(); // next one
  }
 
  if (blanks < 1) {
    blanks = 1;
  }
 
  auto word = new WERD(&cblobs, blanks, nullptr);
 
  if (bol) {
    word->set_flag(W_BOL, true);
  }
  if (box_it->at_first()) {
    word->set_flag(W_EOL, true); // at end of line
  }
 
  return word;
}

◆ make_real_words()

void tesseract::make_real_words	(	tesseract::Textord *	textord,
		TO_BLOCK *	block,
		FCOORD	rotation
	)

Definition at line 473 of file wordseg.cpp.

  {
  TO_ROW *row; // current row
  TO_ROW_IT row_it = block->get_rows();
  ROW *real_row = nullptr; // output row
  ROW_IT real_row_it = block->block->row_list();
 
  if (row_it.empty()) {
    return; // empty block
  }
  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
    row = row_it.data();
    if (row->blob_list()->empty() && !row->rep_words.empty()) {
      real_row = make_rep_words(row, block);
    } else if (!row->blob_list()->empty()) {
      // In a fixed pitch document, some lines may be detected as fixed pitch
      // while others don't, and will go through different path.
      // For non-space delimited language like CJK, fixed pitch chop always
      // leave the entire line as one word.  We can force consistent chopping
      // with force_make_prop_words flag.
      POLY_BLOCK *pb = block->block->pdblk.poly_block();
      if (textord_chopper_test) {
        real_row = textord->make_blob_words(row, rotation);
      } else if (textord_force_make_prop_words || (pb != nullptr && !pb->IsText()) ||
                 row->pitch_decision == PITCH_DEF_PROP || row->pitch_decision == PITCH_CORR_PROP) {
        real_row = textord->make_prop_words(row, rotation);
      } else if (row->pitch_decision == PITCH_DEF_FIXED ||
                 row->pitch_decision == PITCH_CORR_FIXED) {
        real_row = fixed_pitch_words(row, rotation);
      } else {
        ASSERT_HOST(false);
      }
    }
    if (real_row != nullptr) {
      // put row in block
      real_row_it.add_after_then_move(real_row);
    }
  }
  block->block->set_stats(block->fixed_pitch == 0, static_cast<int16_t>(block->kern_size),
                          static_cast<int16_t>(block->space_size),
                          static_cast<int16_t>(block->fixed_pitch));
  block->block->check_pitch();
}

◆ make_rep_words()

ROW * tesseract::make_rep_words	(	TO_ROW *	row,
		TO_BLOCK *	block
	)

Definition at line 526 of file wordseg.cpp.

  {
  ROW *real_row; // output row
  TBOX word_box; // bounding box
                 // iterator
  WERD_IT word_it = &row->rep_words;
 
  if (word_it.empty()) {
    return nullptr;
  }
  word_box = word_it.data()->bounding_box();
  for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) {
    word_box += word_it.data()->bounding_box();
  }
  row->xheight = block->xheight;
  real_row =
      new ROW(row, static_cast<int16_t>(block->kern_size), static_cast<int16_t>(block->space_size));
  word_it.set_to_list(real_row->word_list());
  // put words in row
  word_it.add_list_after(&row->rep_words);
  real_row->recalc_bounding_box();
  return real_row;
}

◆ make_rows()

float tesseract::make_rows	(	ICOORD	page_tr,
		TO_BLOCK_LIST *	port_blocks
	)

Definition at line 229 of file makerow.cpp.

                                                            {
  float port_m;         // global skew
  float port_err;       // global noise
  TO_BLOCK_IT block_it; // iterator
 
  block_it.set_to_list(port_blocks);
  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
    make_initial_textrows(page_tr, block_it.data(), FCOORD(1.0f, 0.0f), !textord_test_landscape);
  }
  // compute globally
  compute_page_skew(port_blocks, port_m, port_err);
  block_it.set_to_list(port_blocks);
  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
    cleanup_rows_making(page_tr, block_it.data(), port_m, FCOORD(1.0f, 0.0f),
                        block_it.data()->block->pdblk.bounding_box().left(),
                        !textord_test_landscape);
  }
  return port_m; // global skew
}

◆ make_single_row()

float tesseract::make_single_row	(	ICOORD	page_tr,
		bool	allow_sub_blobs,
		TO_BLOCK *	block,
		TO_BLOCK_LIST *	blocks
	)

Definition at line 190 of file makerow.cpp.

                                             {
  BLOBNBOX_IT blob_it = &block->blobs;
  TO_ROW_IT row_it = block->get_rows();
 
  // Include all the small blobs and large blobs.
  blob_it.add_list_after(&block->small_blobs);
  blob_it.add_list_after(&block->noise_blobs);
  blob_it.add_list_after(&block->large_blobs);
  if (block->blobs.singleton() && allow_sub_blobs) {
    blob_it.move_to_first();
    float size = MakeRowFromSubBlobs(block, blob_it.data()->cblob(), &row_it);
    if (size > block->line_size) {
      block->line_size = size;
    }
  } else if (block->blobs.empty()) {
    // Make a fake blob.
    C_BLOB *blob = C_BLOB::FakeBlob(block->block->pdblk.bounding_box());
    // The blobnbox owns the blob.
    auto *bblob = new BLOBNBOX(blob);
    blob_it.add_after_then_move(bblob);
  }
  MakeRowFromBlobs(block->line_size, &blob_it, &row_it);
  // Fit an LMS line to the rows.
  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
    fit_lms_line(row_it.data());
  }
  float gradient;
  float fit_error;
  // Compute the skew based on the fitted line.
  compute_page_skew(blocks, gradient, fit_error);
  return gradient;
}

◆ make_single_word()

void tesseract::make_single_word	(	bool	one_blob,
		TO_ROW_LIST *	rows,
		ROW_LIST *	real_rows
	)

Definition at line 53 of file wordseg.cpp.

                                                                             {
  TO_ROW_IT to_row_it(rows);
  ROW_IT row_it(real_rows);
  for (to_row_it.mark_cycle_pt(); !to_row_it.cycled_list(); to_row_it.forward()) {
    TO_ROW *row = to_row_it.data();
    // The blobs have to come out of the BLOBNBOX into the C_BLOB_LIST ready
    // to create the word.
    C_BLOB_LIST cblobs;
    C_BLOB_IT cblob_it(&cblobs);
    BLOBNBOX_IT box_it(row->blob_list());
    for (; !box_it.empty(); box_it.forward()) {
      BLOBNBOX *bblob = box_it.extract();
      if (bblob->joined_to_prev() || (one_blob && !cblob_it.empty())) {
        auto cblob = bblob->remove_cblob();
        if (cblob != nullptr) {
          C_OUTLINE_IT cout_it(cblob_it.data()->out_list());
          cout_it.move_to_last();
          cout_it.add_list_after(cblob->out_list());
          delete cblob;
        }
      } else {
        auto cblob = bblob->remove_cblob();
        if (cblob != nullptr) {
          cblob_it.add_after_then_move(cblob);
        }
      }
      delete bblob;
    }
    // Convert the TO_ROW to a ROW.
    ROW *real_row =
        new ROW(row, static_cast<int16_t>(row->kern_size), static_cast<int16_t>(row->space_size));
    WERD_IT word_it(real_row->word_list());
    WERD *word = new WERD(&cblobs, 0, nullptr);
    word->set_flag(W_BOL, true);
    word->set_flag(W_EOL, true);
    word->set_flag(W_DONT_CHOP, one_blob);
    word_it.add_after_then_move(word);
    row_it.add_after_then_move(real_row);
  }
}

◆ make_words()

void tesseract::make_words	(	tesseract::Textord *	textord,
		ICOORD	page_tr,
		float	gradient,
		BLOCK_LIST *	blocks,
		TO_BLOCK_LIST *	port_blocks
	)

make_words

Arrange the blobs into words.

Definition at line 99 of file wordseg.cpp.

                                            { // output list
  TO_BLOCK_IT block_it;                       // iterator
  TO_BLOCK *block;                            // current block
 
  if (textord->use_cjk_fp_model()) {
    compute_fixed_pitch_cjk(page_tr, port_blocks);
  } else {
    compute_fixed_pitch(page_tr, port_blocks, gradient, FCOORD(0.0f, -1.0f),
                        !bool(textord_test_landscape));
  }
  textord->to_spacing(page_tr, port_blocks);
  block_it.set_to_list(port_blocks);
  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
    block = block_it.data();
    make_real_words(textord, block, FCOORD(1.0f, 0.0f));
  }
}

◆ MakeAsciiRowInfos()

void tesseract::MakeAsciiRowInfos	(	const TextAndModel *	row_infos,
		int	n,
		std::vector< RowInfo > *	output
	)

Definition at line 94 of file paragraphs_test.cc.

                                                                                         {
  output->clear();
  RowInfo info;
  for (int i = 0; i < n; i++) {
    AsciiToRowInfo(row_infos[i].ascii, i, &info);
    output->push_back(info);
  }
}

◆ MakeBoxFileStr()

TESS_API void tesseract::MakeBoxFileStr	(	const char *	unichar_str,
		const TBOX &	box,
		int	page_num,
		std::string &	box_str
	)

Definition at line 280 of file boxread.cpp.

                                                                                                {
  box_str = unichar_str;
  box_str += " " + std::to_string(box.left());
  box_str += " " + std::to_string(box.bottom());
  box_str += " " + std::to_string(box.right());
  box_str += " " + std::to_string(box.top());
  box_str += " " + std::to_string(page_num);
}

◆ MakeClusterer()

TESS_API CLUSTERER * tesseract::MakeClusterer	(	int16_t	SampleSize,
		const PARAM_DESC	ParamDesc[]
	)

This routine creates a new clusterer data structure, initializes it, and returns a pointer to it.

Parameters

SampleSize	number of dimensions in feature space
ParamDesc	description of each dimension

Returns: pointer to the new clusterer data structure

Definition at line 1440 of file cluster.cpp.

                                                                           {
  int i;
 
  // allocate main clusterer data structure and init simple fields
  auto Clusterer = new CLUSTERER;
  Clusterer->SampleSize = SampleSize;
  Clusterer->NumberOfSamples = 0;
  Clusterer->NumChar = 0;
 
  // init fields which will not be used initially
  Clusterer->Root = nullptr;
  Clusterer->ProtoList = NIL_LIST;
 
  // maintain a copy of param descriptors in the clusterer data structure
  Clusterer->ParamDesc = new PARAM_DESC[SampleSize];
  for (i = 0; i < SampleSize; i++) {
    Clusterer->ParamDesc[i].Circular = ParamDesc[i].Circular;
    Clusterer->ParamDesc[i].NonEssential = ParamDesc[i].NonEssential;
    Clusterer->ParamDesc[i].Min = ParamDesc[i].Min;
    Clusterer->ParamDesc[i].Max = ParamDesc[i].Max;
    Clusterer->ParamDesc[i].Range = ParamDesc[i].Max - ParamDesc[i].Min;
    Clusterer->ParamDesc[i].HalfRange = Clusterer->ParamDesc[i].Range / 2;
    Clusterer->ParamDesc[i].MidRange = (ParamDesc[i].Max + ParamDesc[i].Min) / 2;
  }
 
  // allocate a kd tree to hold the samples
  Clusterer->KDTree = MakeKDTree(SampleSize, ParamDesc);
 
  // Initialize cache of histogram buckets to minimize recomputing them.
  for (auto &d : Clusterer->bucket_cache) {
    for (auto &c : d) {
      c = nullptr;
    }
  }
 
  return Clusterer;
} // MakeClusterer

◆ MakeKDTree()

KDTREE * tesseract::MakeKDTree	(	int16_t	KeySize,
		const PARAM_DESC	KeyDesc[]
	)

Returns: a new KDTREE based on the specified parameters.

Parameters

KeySize	# of dimensions in the K-D tree
KeyDesc	array of params to describe key dimensions

Definition at line 186 of file kdtree.cpp.

                                                                {
  auto *KDTree = new KDTREE(KeySize);
  for (int i = 0; i < KeySize; i++) {
    KDTree->KeyDesc[i].NonEssential = KeyDesc[i].NonEssential;
    KDTree->KeyDesc[i].Circular = KeyDesc[i].Circular;
    if (KeyDesc[i].Circular) {
      KDTree->KeyDesc[i].Min = KeyDesc[i].Min;
      KDTree->KeyDesc[i].Max = KeyDesc[i].Max;
      KDTree->KeyDesc[i].Range = KeyDesc[i].Max - KeyDesc[i].Min;
      KDTree->KeyDesc[i].HalfRange = KDTree->KeyDesc[i].Range / 2;
      KDTree->KeyDesc[i].MidRange = (KeyDesc[i].Max + KeyDesc[i].Min) / 2;
    } else {
      KDTree->KeyDesc[i].Min = MINSEARCH;
      KDTree->KeyDesc[i].Max = MAXSEARCH;
    }
  }
  KDTree->Root.Left = nullptr;
  KDTree->Root.Right = nullptr;
  return KDTree;
}

◆ MakeSample()

TESS_API SAMPLE * tesseract::MakeSample	(	CLUSTERER *	Clusterer,
		const float *	Feature,
		uint32_t	CharID
	)

This routine creates a new sample data structure to hold the specified feature. This sample is added to the clusterer data structure (so that it knows which samples are to be clustered later), and a pointer to the sample is returned to the caller.

Parameters

Clusterer	clusterer data structure to add sample to
Feature	feature to be added to clusterer
CharID	unique ident. of char that sample came from

Returns: Pointer to the new sample data structure

Definition at line 1491 of file cluster.cpp.

                                                                                {
  int i;
 
  // see if the samples have already been clustered - if so trap an error
  // Can't add samples after they have been clustered.
  ASSERT_HOST(Clusterer->Root == nullptr);
 
  // allocate the new sample and initialize it
  auto Sample = new SAMPLE(Clusterer->SampleSize);
  Sample->Clustered = false;
  Sample->Prototype = false;
  Sample->SampleCount = 1;
  Sample->Left = nullptr;
  Sample->Right = nullptr;
  Sample->CharID = CharID;
 
  for (i = 0; i < Clusterer->SampleSize; i++) {
    Sample->Mean[i] = Feature[i];
  }
 
  // add the sample to the KD tree - keep track of the total # of samples
  Clusterer->NumberOfSamples++;
  KDStore(Clusterer->KDTree, &Sample->Mean[0], Sample);
  if (CharID >= Clusterer->NumChar) {
    Clusterer->NumChar = CharID + 1;
  }
 
  // execute hook for monitoring clustering operation
  // (*SampleCreationHook)(Sample);
 
  return (Sample);
} // MakeSample

◆ MakeTempProtoPerm()

int tesseract::MakeTempProtoPerm	(	void *	item1,
		void *	item2
	)

This routine converts TempProto to be permanent if its proto id is used by the configuration specified in ProtoKey.

Parameters

item1	(TEMP_PROTO) temporary proto to compare to key
item2	(PROTO_KEY) defines which protos to make permanent

Globals: none

Returns: true if TempProto is converted, false otherwise

Definition at line 1896 of file adaptmatch.cpp.

                                                {
  auto TempProto = static_cast<TEMP_PROTO_STRUCT *>(item1);
  auto ProtoKey = static_cast<PROTO_KEY *>(item2);
 
  auto Class = ProtoKey->Templates->Class[ProtoKey->ClassId];
  auto Config = TempConfigFor(Class, ProtoKey->ConfigId);
 
  if (TempProto->ProtoId > Config->MaxProtoId || !test_bit(Config->Protos, TempProto->ProtoId)) {
    return false;
  }
 
  MakeProtoPermanent(Class, TempProto->ProtoId);
  AddProtoToClassPruner(&(TempProto->Proto), ProtoKey->ClassId, ProtoKey->Templates->Templates);
  delete TempProto;
 
  return true;
} /* MakeTempProtoPerm */

◆ MarginalMatch()

bool tesseract::MarginalMatch	(	float	confidence,
		float	matcher_great_threshold
	)

inline

Definition at line 142 of file adaptmatch.cpp.

                                                                           {
  return (1.0f - confidence) > matcher_great_threshold;
}

◆ mark_outline()

void tesseract::mark_outline ( EDGEPT * edgept )

Definition at line 83 of file plotedges.cpp.

                                  { /* Start of point list */
  auto window = edge_window;
  float x = edgept->pos.x;
  float y = edgept->pos.y;
 
  window->Pen(ScrollView::RED);
  window->SetCursor(x, y);
 
  x -= 4;
  y -= 12;
  window->DrawTo(x, y);
 
  x -= 2;
  y += 4;
  window->DrawTo(x, y);
 
  x -= 4;
  y += 2;
  window->DrawTo(x, y);
 
  x += 10;
  y += 6;
  window->DrawTo(x, y);
 
  window->Update();
}

◆ mark_repeated_chars()

void tesseract::mark_repeated_chars ( TO_ROW * row )

Definition at line 2565 of file makerow.cpp.

                                      {
  BLOBNBOX_IT box_it(row->blob_list()); // Iterator.
  int num_repeated_sets = 0;
  if (!box_it.empty()) {
    do {
      BLOBNBOX *bblob = box_it.data();
      int repeat_length = 1;
      if (bblob->flow() == BTFT_LEADER && !bblob->joined_to_prev() && bblob->cblob() != nullptr) {
        BLOBNBOX_IT test_it(box_it);
        for (test_it.forward(); !test_it.at_first();) {
          bblob = test_it.data();
          if (bblob->flow() != BTFT_LEADER) {
            break;
          }
          test_it.forward();
          bblob = test_it.data();
          if (bblob->joined_to_prev() || bblob->cblob() == nullptr) {
            repeat_length = 0;
            break;
          }
          ++repeat_length;
        }
      }
      if (repeat_length >= kMinLeaderCount) {
        num_repeated_sets++;
        for (; repeat_length > 0; box_it.forward(), --repeat_length) {
          bblob = box_it.data();
          bblob->set_repeated_set(num_repeated_sets);
        }
      } else {
        bblob->set_repeated_set(0);
        box_it.forward();
      }
    } while (!box_it.at_first()); // until all done
  }
  row->set_num_repeated_sets(num_repeated_sets);
}

◆ MarkDirectionChanges()

void tesseract::MarkDirectionChanges ( MFOUTLINE Outline )

This routine searches through the specified outline and finds the points at which the outline changes direction. These points are then marked as "extremities". This routine is used as an alternative to FindExtremities(). It forces the endpoints of the microfeatures to be at the direction changes rather than at the midpoint between direction changes.

Parameters

Outline micro-feature outline to analyze

Definition at line 166 of file mfoutline.cpp.

                                             {
  MFOUTLINE Current;
  MFOUTLINE Last;
  MFOUTLINE First;
 
  if (DegenerateOutline(Outline)) {
    return;
  }
 
  First = NextDirectionChange(Outline);
  Last = First;
  do {
    Current = NextDirectionChange(Last);
    PointAt(Current)->MarkPoint();
    Last = Current;
  } while (Last != First);
 
} /* MarkDirectionChanges */

◆ Mean()

float tesseract::Mean	(	PROTOTYPE *	Proto,
		uint16_t	Dimension
	)

This routine returns the mean of the specified prototype in the indicated dimension.

Parameters

Proto	prototype to return mean of
Dimension	dimension whose mean is to be returned

Returns: Mean of Prototype in Dimension

Definition at line 1662 of file cluster.cpp.

                                                 {
  return (Proto->Mean[Dimension]);
} // Mean

◆ median_block_xheight()

float tesseract::median_block_xheight	(	TO_BLOCK *	block,
		float	gradient
	)

◆ MedianOfCircularValues()

template<typename T >

T tesseract::MedianOfCircularValues	(	T	modulus,
		std::vector< T > &	v
	)

Definition at line 117 of file linlsq.h.

                                                     {
  LLSQ stats;
  T halfrange = static_cast<T>(modulus / 2);
  auto num_elements = v.size();
  for (auto i : v) {
    stats.add(i, i + halfrange);
  }
  bool offset_needed = stats.y_variance() < stats.x_variance();
  if (offset_needed) {
    for (auto i : v) {
      i += halfrange;
    }
  }
  auto median_index = num_elements / 2;
  std::nth_element(v.begin(), v.begin() + median_index, v.end());
  if (offset_needed) {
    for (auto i : v) {
      i -= halfrange;
    }
  }
  return v[median_index];
}

◆ merge_oldbl_parts()

void tesseract::merge_oldbl_parts	(	TBOX	blobcoords[],
		int	blobcount,
		char	partids[],
		int	partsizes[],
		int	biggestpart,
		float	jumplimit
	)

Definition at line 749 of file oldbasel.cpp.

  {
  bool found_one; // found a bestpart blob
  bool close_one; // found was close enough
  int blobindex;  /*no along text line */
  int prevpart;   // previous iteration
  int runlength;  // no in this part
  float diff;     /*difference from line */
  int startx;     /*index of start blob */
  int test_blob;  // another index
  FCOORD coord;   // blob coordinate
  float m, c;     // fitted line
  QLSQ stats;     // line stuff
 
  prevpart = biggestpart;
  runlength = 0;
  startx = 0;
  for (blobindex = 0; blobindex < blobcount; blobindex++) {
    if (partids[blobindex] != prevpart) {
      //                      tprintf("Partition change at (%d,%d) from %d to %d
      //                      after run of %d\n",
      //                              blobcoords[blobindex].left(),blobcoords[blobindex].bottom(),
      //                              prevpart,partids[blobindex],runlength);
      if (prevpart != biggestpart && runlength > MAXBADRUN) {
        stats.clear();
        for (test_blob = startx; test_blob < blobindex; test_blob++) {
          coord = FCOORD((blobcoords[test_blob].left() + blobcoords[test_blob].right()) / 2.0,
                         blobcoords[test_blob].bottom());
          stats.add(coord.x(), coord.y());
        }
        stats.fit(1);
        m = stats.get_b();
        c = stats.get_c();
        if (textord_oldbl_debug) {
          tprintf("Fitted line y=%g x + %g\n", m, c);
        }
        found_one = false;
        close_one = false;
        for (test_blob = 1;
             !found_one && (startx - test_blob >= 0 || blobindex + test_blob <= blobcount);
             test_blob++) {
          if (startx - test_blob >= 0 && partids[startx - test_blob] == biggestpart) {
            found_one = true;
            coord = FCOORD(
                (blobcoords[startx - test_blob].left() + blobcoords[startx - test_blob].right()) /
                    2.0,
                blobcoords[startx - test_blob].bottom());
            diff = m * coord.x() + c - coord.y();
            if (textord_oldbl_debug) {
              tprintf("Diff of common blob to suspect part=%g at (%g,%g)\n", diff, coord.x(),
                      coord.y());
            }
            if (diff < jumplimit && -diff < jumplimit) {
              close_one = true;
            }
          }
          if (blobindex + test_blob <= blobcount &&
              partids[blobindex + test_blob - 1] == biggestpart) {
            found_one = true;
            coord = FCOORD((blobcoords[blobindex + test_blob - 1].left() +
                            blobcoords[blobindex + test_blob - 1].right()) /
                               2.0,
                           blobcoords[blobindex + test_blob - 1].bottom());
            diff = m * coord.x() + c - coord.y();
            if (textord_oldbl_debug) {
              tprintf("Diff of common blob to suspect part=%g at (%g,%g)\n", diff, coord.x(),
                      coord.y());
            }
            if (diff < jumplimit && -diff < jumplimit) {
              close_one = true;
            }
          }
        }
        if (close_one) {
          if (textord_oldbl_debug) {
            tprintf(
                "Merged %d blobs back into part %d from %d starting at "
                "(%d,%d)\n",
                runlength, biggestpart, prevpart, blobcoords[startx].left(),
                blobcoords[startx].bottom());
          }
          // switch sides
          partsizes[prevpart] -= runlength;
          for (test_blob = startx; test_blob < blobindex; test_blob++) {
            partids[test_blob] = biggestpart;
          }
        }
      }
      prevpart = partids[blobindex];
      runlength = 1;
      startx = blobindex;
    } else {
      runlength++;
    }
  }
}

◆ MergeClusters()

TESS_API int32_t tesseract::MergeClusters	(	int16_t	N,
		PARAM_DESC	ParamDesc[],
		int32_t	n1,
		int32_t	n2,
		float	m[],
		float	m1[],
		float	m2[]
	)

This routine merges two clusters into one larger cluster. To do this it computes the number of samples in the new cluster and the mean of the new cluster. The ParamDesc information is used to ensure that circular dimensions are handled correctly.

Parameters

N	# of dimensions (size of arrays)
ParamDesc	array of dimension descriptions
n1,n2	number of samples in each old cluster
m	array to hold mean of new cluster
m1,m2	arrays containing means of old clusters

Returns: The number of samples in the new cluster.

Definition at line 1870 of file cluster.cpp.

                                              {
  int32_t i, n;
 
  n = n1 + n2;
  for (i = N; i > 0; i--, ParamDesc++, m++, m1++, m2++) {
    if (ParamDesc->Circular) {
      // if distance between means is greater than allowed
      // reduce upper point by one "rotation" to compute mean
      // then normalize the mean back into the accepted range
      if ((*m2 - *m1) > ParamDesc->HalfRange) {
        *m = (n1 * *m1 + n2 * (*m2 - ParamDesc->Range)) / n;
        if (*m < ParamDesc->Min) {
          *m += ParamDesc->Range;
        }
      } else if ((*m1 - *m2) > ParamDesc->HalfRange) {
        *m = (n1 * (*m1 - ParamDesc->Range) + n2 * *m2) / n;
        if (*m < ParamDesc->Min) {
          *m += ParamDesc->Range;
        }
      } else {
        *m = (n1 * *m1 + n2 * *m2) / n;
      }
    } else {
      *m = (n1 * *m1 + n2 * *m2) / n;
    }
  }
  return n;
} // MergeClusters

◆ MergeInsignificantProtos()

TESS_COMMON_TRAINING_API void tesseract::MergeInsignificantProtos	(	LIST	ProtoList,
		const char *	label,
		CLUSTERER *	Clusterer,
		CLUSTERCONFIG *	clusterconfig
	)

Definition at line 466 of file commontraining.cpp.

                                                            {
  PROTOTYPE *Prototype;
  bool debug = strcmp(FLAGS_test_ch.c_str(), label) == 0;
 
  LIST pProtoList = ProtoList;
  iterate(pProtoList) {
    Prototype = reinterpret_cast<PROTOTYPE *>(pProtoList->first_node());
    if (Prototype->Significant || Prototype->Merged) {
      continue;
    }
    float best_dist = 0.125;
    PROTOTYPE *best_match = nullptr;
    // Find the nearest alive prototype.
    LIST list_it = ProtoList;
    iterate(list_it) {
      auto *test_p = reinterpret_cast<PROTOTYPE *>(list_it->first_node());
      if (test_p != Prototype && !test_p->Merged) {
        float dist = ComputeDistance(Clusterer->SampleSize, Clusterer->ParamDesc, &Prototype->Mean[0],
                                     &test_p->Mean[0]);
        if (dist < best_dist) {
          best_match = test_p;
          best_dist = dist;
        }
      }
    }
    if (best_match != nullptr && !best_match->Significant) {
      if (debug) {
        auto bestMatchNumSamples = best_match->NumSamples;
        auto prototypeNumSamples = Prototype->NumSamples;
        tprintf("Merging red clusters (%d+%d) at %g,%g and %g,%g\n", bestMatchNumSamples,
                prototypeNumSamples, best_match->Mean[0], best_match->Mean[1], Prototype->Mean[0],
                Prototype->Mean[1]);
      }
      best_match->NumSamples =
          MergeClusters(Clusterer->SampleSize, Clusterer->ParamDesc, best_match->NumSamples,
                        Prototype->NumSamples, &best_match->Mean[0], &best_match->Mean[0], &Prototype->Mean[0]);
      Prototype->NumSamples = 0;
      Prototype->Merged = true;
    } else if (best_match != nullptr) {
      if (debug) {
        tprintf("Red proto at %g,%g matched a green one at %g,%g\n", Prototype->Mean[0],
                Prototype->Mean[1], best_match->Mean[0], best_match->Mean[1]);
      }
      Prototype->Merged = true;
    }
  }
  // Mark significant those that now have enough samples.
  int min_samples = static_cast<int32_t>(clusterconfig->MinSamples * Clusterer->NumChar);
  pProtoList = ProtoList;
  iterate(pProtoList) {
    Prototype = reinterpret_cast<PROTOTYPE *>(pProtoList->first_node());
    // Process insignificant protos that do not match a green one
    if (!Prototype->Significant && Prototype->NumSamples >= min_samples && !Prototype->Merged) {
      if (debug) {
        tprintf("Red proto at %g,%g becoming green\n", Prototype->Mean[0], Prototype->Mean[1]);
      }
      Prototype->Significant = true;
    }
  }
} /* MergeInsignificantProtos */

◆ Modulo()

int tesseract::Modulo	(	int	a,
		int	b
	)

inline

Definition at line 153 of file helpers.h.

                                {
  return (a % b + b) % b;
}

◆ most_overlapping_row() [1/2]

OVERLAP_STATE tesseract::most_overlapping_row	(	TO_ROW_IT *	row_it,
		TO_ROW *&	best_row,
		float	top,
		float	bottom,
		float	rowsize,
		bool	testing_blob
	)

Definition at line 2451 of file makerow.cpp.

  {
  OVERLAP_STATE result;          // result of tests
  float overlap;                 // of blob & row
  float bestover;                // nearest row
  float merge_top, merge_bottom; // size of merged row
  ICOORD testpt;                 // testing only
  TO_ROW *row;                   // current row
  TO_ROW *test_row;              // for multiple overlaps
  BLOBNBOX_IT blob_it;           // for merging rows
 
  result = ASSIGN;
  row = row_it->data();
  bestover = top - bottom;
  if (top > row->max_y()) {
    bestover -= top - row->max_y();
  }
  if (bottom < row->min_y()) {
    // compute overlap
    bestover -= row->min_y() - bottom;
  }
  if (testing_blob && textord_debug_blob) {
    tprintf("Test blob y=(%g,%g), row=(%f,%f), size=%g, overlap=%f\n", bottom, top, row->min_y(),
            row->max_y(), rowsize, bestover);
  }
  test_row = row;
  do {
    if (!row_it->at_last()) {
      row_it->forward();
      test_row = row_it->data();
      if (test_row->min_y() <= top && test_row->max_y() >= bottom) {
        merge_top = test_row->max_y() > row->max_y() ? test_row->max_y() : row->max_y();
        merge_bottom = test_row->min_y() < row->min_y() ? test_row->min_y() : row->min_y();
        if (merge_top - merge_bottom <= rowsize) {
          if (testing_blob && textord_debug_blob) {
            tprintf("Merging rows at (%g,%g), (%g,%g)\n", row->min_y(), row->max_y(),
                    test_row->min_y(), test_row->max_y());
          }
          test_row->set_limits(merge_bottom, merge_top);
          blob_it.set_to_list(test_row->blob_list());
          blob_it.add_list_after(row->blob_list());
          blob_it.sort(blob_x_order);
          row_it->backward();
          delete row_it->extract();
          row_it->forward();
          bestover = -1.0f; // force replacement
        }
        overlap = top - bottom;
        if (top > test_row->max_y()) {
          overlap -= top - test_row->max_y();
        }
        if (bottom < test_row->min_y()) {
          overlap -= test_row->min_y() - bottom;
        }
        if (bestover >= rowsize - 1 && overlap >= rowsize - 1) {
          result = REJECT;
        }
        if (overlap > bestover) {
          bestover = overlap; // find biggest overlap
          row = test_row;
        }
        if (testing_blob && textord_debug_blob) {
          tprintf("Test blob y=(%g,%g), row=(%f,%f), size=%g, overlap=%f->%f\n", bottom, top,
                  test_row->min_y(), test_row->max_y(), rowsize, overlap, bestover);
        }
      }
    }
  } while (!row_it->at_last() && test_row->min_y() <= top && test_row->max_y() >= bottom);
  while (row_it->data() != row) {
    row_it->backward(); // make it point to row
  }
                        // doesn't overlap much
  if (top - bottom - bestover > rowsize * textord_overlap_x &&
      (!textord_fix_makerow_bug || bestover < rowsize * textord_overlap_x) && result == ASSIGN) {
    result = NEW_ROW; // doesn't overlap enough
  }
  best_row = row;
  return result;
}

◆ most_overlapping_row() [2/2]

TO_ROW * tesseract::most_overlapping_row	(	TO_ROW_LIST *	rows,
		BLOBNBOX *	blob
	)

Definition at line 103 of file underlin.cpp.

  {
  int16_t x = (blob->bounding_box().left() + blob->bounding_box().right()) / 2;
  TO_ROW_IT row_it = rows; // row iterator
  TO_ROW *row;             // current row
  TO_ROW *best_row;        // output row
  float overlap;           // of blob & row
  float bestover;          // best overlap
 
  best_row = nullptr;
  bestover = static_cast<float>(-INT32_MAX);
  if (row_it.empty()) {
    return nullptr;
  }
  row = row_it.data();
  row_it.mark_cycle_pt();
  while (row->baseline.y(x) + row->descdrop > blob->bounding_box().top() && !row_it.cycled_list()) {
    best_row = row;
    bestover = blob->bounding_box().top() - row->baseline.y(x) + row->descdrop;
    row_it.forward();
    row = row_it.data();
  }
  while (row->baseline.y(x) + row->xheight + row->ascrise >= blob->bounding_box().bottom() &&
         !row_it.cycled_list()) {
    overlap = row->baseline.y(x) + row->xheight + row->ascrise;
    if (blob->bounding_box().top() < overlap) {
      overlap = blob->bounding_box().top();
    }
    if (blob->bounding_box().bottom() > row->baseline.y(x) + row->descdrop) {
      overlap -= blob->bounding_box().bottom();
    } else {
      overlap -= row->baseline.y(x) + row->descdrop;
    }
    if (overlap > bestover) {
      bestover = overlap;
      best_row = row;
    }
    row_it.forward();
    row = row_it.data();
  }
  if (bestover < 0 &&
      row->baseline.y(x) + row->xheight + row->ascrise - blob->bounding_box().bottom() > bestover) {
    best_row = row;
  }
  return best_row;
}

◆ MultiplyAccumulate()

void tesseract::MultiplyAccumulate	(	int	n,
		const TFloat *	u,
		const TFloat *	v,
		TFloat *	out
	)

inline

Definition at line 229 of file functions.h.

                                                                                     {
  for (int i = 0; i < n; i++) {
    out[i] += u[i] * v[i];
  }
}

◆ MultiplyVectorsInPlace()

void tesseract::MultiplyVectorsInPlace	(	int	n,
		const TFloat *	src,
		TFloat *	inout
	)

inline

Definition at line 222 of file functions.h.

                                                                            {
  for (int i = 0; i < n; ++i) {
    inout[i] *= src[i];
  }
}

◆ NearlyEqual()

template<class T >

bool tesseract::NearlyEqual	(	T	x,
		T	y,
		T	tolerance
	)

Definition at line 51 of file host.h.

                                        {
  T diff = x - y;
  return diff <= tolerance && -diff <= tolerance;
}

◆ NewClass()

TESS_API CLASS_TYPE tesseract::NewClass	(	int	NumProtos,
		int	NumConfigs
	)

Definition at line 145 of file protos.cpp.

                                                   {
  CLASS_TYPE Class;
 
  Class = new CLASS_STRUCT;
 
  Class->Prototypes.resize(NumProtos);
  Class->Configurations.resize(NumConfigs);
  Class->MaxNumProtos = NumProtos;
  Class->MaxNumConfigs = NumConfigs;
  Class->NumProtos = 0;
  Class->NumConfigs = 0;
  return (Class);
}

◆ NewProgressTester()

void tesseract::NewProgressTester	(	const char *	imgname,
		const char *	tessdatadir,
		const char *	lang
	)

Definition at line 116 of file progress_test.cc.

                                                                                       {
  using ::testing::_;
  using ::testing::AllOf;
  using ::testing::AtLeast;
  using ::testing::DoAll;
  using ::testing::Gt;
  using ::testing::Le;
  using ::testing::Return;
  using ::testing::SaveArg;
 
  auto api = std::make_unique<tesseract::TessBaseAPI>();
  ASSERT_FALSE(api->Init(tessdatadir, lang)) << "Could not initialize tesseract.";
  Image image = pixRead(imgname);
  ASSERT_TRUE(image != nullptr) << "Failed to read test image.";
  api->SetImage(image);
 
  NewMockProgressSink progressSink;
 
  int currentProgress = -1;
  EXPECT_CALL(progressSink, classicProgress(_)).Times(0);
  EXPECT_CALL(progressSink, progress(AllOf(Gt<int &>(currentProgress), Le(100))))
      .Times(AtLeast(5))
      .WillRepeatedly(DoAll(SaveArg<0>(&currentProgress), Return(false)));
  EXPECT_CALL(progressSink, cancel(_)).Times(AtLeast(5)).WillRepeatedly(Return(false));
 
  EXPECT_EQ(api->Recognize(&progressSink.monitor), false);
  EXPECT_GE(currentProgress, 50) << "The reported progress did not reach 50%";
 
  api->End();
  image.destroy();
}

◆ NextDirectionChange()

MFOUTLINE tesseract::NextDirectionChange ( MFOUTLINE EdgePoint )

This routine returns the next point in the micro-feature outline that has a direction different than EdgePoint. The routine assumes that the outline being searched is not a degenerate outline (i.e. it must have 2 or more edge points).

Parameters

EdgePoint start search from this point

Returns: Point of next direction change in micro-feature outline.

Note: Globals: none

Definition at line 402 of file mfoutline.cpp.

                                                   {
  DIRECTION InitialDirection;
 
  InitialDirection = PointAt(EdgePoint)->Direction;
 
  MFOUTLINE next_pt = nullptr;
  do {
    EdgePoint = NextPointAfter(EdgePoint);
    next_pt = NextPointAfter(EdgePoint);
  } while (PointAt(EdgePoint)->Direction == InitialDirection && !PointAt(EdgePoint)->Hidden &&
           next_pt != nullptr && !PointAt(next_pt)->Hidden);
 
  return (EdgePoint);
}

◆ NextExtremity()

MFOUTLINE tesseract::NextExtremity ( MFOUTLINE EdgePoint )

This routine returns the next point in the micro-feature outline that is an extremity. The search starts after EdgePoint. The routine assumes that the outline being searched is not a degenerate outline (i.e. it must have 2 or more edge points).

Parameters

EdgePoint start search from this point

Returns: Next extremity in the outline after EdgePoint.

Note: Globals: none

Definition at line 196 of file mfoutline.cpp.

                                             {
  EdgePoint = NextPointAfter(EdgePoint);
  while (!PointAt(EdgePoint)->ExtremityMark) {
    EdgePoint = NextPointAfter(EdgePoint);
  }
 
  return (EdgePoint);
 
} /* NextExtremity */

◆ NextSample()

CLUSTER * tesseract::NextSample ( LIST * SearchState )

This routine is used to find all of the samples which belong to a cluster. It starts by removing the top cluster on the cluster list (SearchState). If this cluster is a leaf it is returned. Otherwise, the right subcluster is pushed on the list and we continue the search in the left subcluster. This continues until a leaf is found. If all samples have been found, nullptr is returned. InitSampleSearch() must be called before NextSample() to initialize the search.

Parameters

SearchState ptr to list containing clusters to be searched

Returns: Pointer to the next leaf cluster (sample) or nullptr.

Definition at line 1638 of file cluster.cpp.

                                       {
  CLUSTER *Cluster;
 
  if (*SearchState == NIL_LIST) {
    return (nullptr);
  }
  Cluster = reinterpret_cast<CLUSTER *>((*SearchState)->first_node());
  *SearchState = pop(*SearchState);
  for (;;) {
    if (Cluster->Left == nullptr) {
      return (Cluster);
    }
    *SearchState = push(*SearchState, Cluster->Right);
    Cluster = Cluster->Left;
  }
} // NextSample

◆ NO_LIST()

constexpr ERRCODE tesseract::NO_LIST ( "Iterator not set to a list" )

constexpr

◆ non_0_digit()

bool tesseract::non_0_digit	(	const char *	str,
		int	length
	)

◆ Normalize()

void tesseract::Normalize ( float * Values )

Definition at line 691 of file commontraining.cpp.

                              {
  float Slope;
  float Intercept;
  float Normalizer;
 
  Slope = tan(Values[2] * 2 * M_PI);
  Intercept = Values[1] - Slope * Values[0];
  Normalizer = 1 / sqrt(Slope * Slope + 1.0);
 
  Values[0] = Slope * Normalizer;
  Values[1] = -Normalizer;
  Values[2] = Intercept * Normalizer;
} // Normalize

◆ NormalizeCleanAndSegmentUTF8()

TESS_UNICHARSET_TRAINING_API bool tesseract::NormalizeCleanAndSegmentUTF8	(	UnicodeNormMode	u_mode,
		OCRNorm	ocr_normalize,
		GraphemeNormMode	g_mode,
		bool	report_errors,
		const char *	str8,
		std::vector< std::string > *	graphemes
	)

Definition at line 179 of file normstrngs.cpp.

                                                                   {
  std::vector<char32> normed32;
  NormalizeUTF8ToUTF32(u_mode, ocr_normalize, str8, &normed32);
  StripJoiners(&normed32);
  std::vector<std::vector<char32>> graphemes32;
  bool success = Validator::ValidateCleanAndSegment(g_mode, report_errors, normed32, &graphemes32);
  if (g_mode != GraphemeNormMode::kSingleString && success) {
    // If we modified the string to clean it up, the segmentation may not be
    // correct, so check for changes and do it again.
    std::vector<char32> cleaned32;
    for (const auto &g : graphemes32) {
      cleaned32.insert(cleaned32.end(), g.begin(), g.end());
    }
    if (cleaned32 != normed32) {
      graphemes32.clear();
      success = Validator::ValidateCleanAndSegment(g_mode, report_errors, cleaned32, &graphemes32);
    }
  }
  graphemes->clear();
  graphemes->reserve(graphemes32.size());
  for (const auto &grapheme : graphemes32) {
    graphemes->push_back(UNICHAR::UTF32ToUTF8(grapheme));
  }
  return success;
}

◆ NormalizeOutline()

void tesseract::NormalizeOutline	(	MFOUTLINE	Outline,
		float	XOrigin
	)

This routine normalizes the coordinates of the specified outline so that the outline is deskewed down to the baseline, translated so that x=0 is at XOrigin, and scaled so that the height of a character cell from descender to ascender is 1. Of this height, 0.25 is for the descender, 0.25 for the ascender, and 0.5 for the x-height. The y coordinate of the baseline is 0.

Parameters

Outline	outline to be normalized
XOrigin	x-origin of text

Definition at line 218 of file mfoutline.cpp.

                                                        {
  if (Outline == NIL_LIST) {
    return;
  }
 
  MFOUTLINE EdgePoint = Outline;
  do {
    MFEDGEPT *Current = PointAt(EdgePoint);
    Current->Point.y = MF_SCALE_FACTOR * (Current->Point.y - kBlnBaselineOffset);
    Current->Point.x = MF_SCALE_FACTOR * (Current->Point.x - XOrigin);
    EdgePoint = NextPointAfter(EdgePoint);
  } while (EdgePoint != Outline);
} /* NormalizeOutline */

◆ NormalizeOutlineX()

void tesseract::NormalizeOutlineX ( FEATURE_SET FeatureSet )

This routine computes the weighted average x position over all of the outline-features in FeatureSet and then renormalizes the outline-features to force this average to be the x origin (i.e. x=0). FeatureSet is changed.

Parameters

FeatureSet outline-features to be normalized

Definition at line 134 of file outfeat.cpp.

                                               {
  int i;
  FEATURE Feature;
  float Length;
  float TotalX = 0.0;
  float TotalWeight = 0.0;
  float Origin;
 
  if (FeatureSet->NumFeatures <= 0) {
    return;
  }
 
  for (i = 0; i < FeatureSet->NumFeatures; i++) {
    Feature = FeatureSet->Features[i];
    Length = Feature->Params[OutlineFeatLength];
    TotalX += Feature->Params[OutlineFeatX] * Length;
    TotalWeight += Length;
  }
  Origin = TotalX / TotalWeight;
 
  for (i = 0; i < FeatureSet->NumFeatures; i++) {
    Feature = FeatureSet->Features[i];
    Feature->Params[OutlineFeatX] -= Origin;
  }
} /* NormalizeOutlineX */

◆ NormalizePicoX()

void tesseract::NormalizePicoX ( FEATURE_SET FeatureSet )

This routine computes the average x position over all of the pico-features in FeatureSet and then renormalizes the pico-features to force this average to be the x origin (i.e. x=0). FeatureSet is changed.

Parameters

FeatureSet pico-features to be normalized

Definition at line 181 of file picofeat.cpp.

                                            {
  int i;
  FEATURE Feature;
  float Origin = 0.0;
 
  for (i = 0; i < FeatureSet->NumFeatures; i++) {
    Feature = FeatureSet->Features[i];
    Origin += Feature->Params[PicoFeatX];
  }
  Origin /= FeatureSet->NumFeatures;
 
  for (i = 0; i < FeatureSet->NumFeatures; i++) {
    Feature = FeatureSet->Features[i];
    Feature->Params[PicoFeatX] -= Origin;
  }
} /* NormalizePicoX */

◆ NormalizeUTF8String()

TESS_UNICHARSET_TRAINING_API bool tesseract::NormalizeUTF8String	(	UnicodeNormMode	u_mode,
		OCRNorm	ocr_normalize,
		GraphemeNorm	grapheme_normalize,
		const char *	str8,
		std::string *	normalized
	)

Definition at line 152 of file normstrngs.cpp.

                                                {
  std::vector<char32> normed32;
  NormalizeUTF8ToUTF32(u_mode, ocr_normalize, str8, &normed32);
  if (grapheme_normalize == GraphemeNorm::kNormalize) {
    StripJoiners(&normed32);
    std::vector<std::vector<char32>> graphemes;
    bool success = Validator::ValidateCleanAndSegment(GraphemeNormMode::kSingleString, false,
                                                      normed32, &graphemes);
    if (graphemes.empty() || graphemes[0].empty()) {
      success = false;
    } else if (normalized != nullptr) {
      *normalized = UNICHAR::UTF32ToUTF8(graphemes[0]);
    }
    return success;
  }
  if (normalized != nullptr) {
    *normalized = UNICHAR::UTF32ToUTF8(normed32);
  }
  return true;
}

◆ NULL_CURRENT()

constexpr ERRCODE tesseract::NULL_CURRENT ( "List current position is nullptr" )

constexpr

◆ NULL_DATA()

constexpr ERRCODE tesseract::NULL_DATA ( "List would have returned a nullptr data pointer" )

constexpr

◆ NULL_NEXT()

constexpr ERRCODE tesseract::NULL_NEXT ( "Next element on the list is nullptr" )

constexpr

◆ NULL_PREV()

constexpr ERRCODE tesseract::NULL_PREV ( "Previous element on the list is nullptr" )

constexpr

◆ NumberOfProtos()

TESS_COMMON_TRAINING_API int tesseract::NumberOfProtos	(	LIST	ProtoList,
		bool	CountSigProtos,
		bool	CountInsigProtos
	)

Definition at line 732 of file commontraining.cpp.

                                                                               {
  int N = 0;
  iterate(ProtoList) {
    auto *Proto = reinterpret_cast<PROTOTYPE *>(ProtoList->first_node());
    if ((Proto->Significant && CountSigProtos) || (!Proto->Significant && CountInsigProtos)) {
      N++;
    }
  }
  return (N);
}

◆ OCRNormalize()

char32 tesseract::OCRNormalize ( char32 ch )

Definition at line 208 of file normstrngs.cpp.

                               {
  if (is_hyphen_punc(ch)) {
    return '-';
  } else if (is_single_quote(ch)) {
    return '\'';
  } else if (is_double_quote(ch)) {
    return '"';
  }
  return ch;
}

◆ OCRTester()

void tesseract::OCRTester	(	const char *	imgname,
		const char *	groundtruth,
		const char *	tessdatadir,
		const char *	lang
	)

Definition at line 60 of file apiexample_test.cc.

                                 {
  // log.info() << tessdatadir << " for language: " << lang << std::endl;
  char *outText;
  std::locale loc("C"); // You can also use "" for the default system locale
  std::ifstream file(groundtruth);
  file.imbue(loc); // Use it for file input
  std::string gtText((std::istreambuf_iterator<char>(file)), std::istreambuf_iterator<char>());
  auto api = std::make_unique<tesseract::TessBaseAPI>();
  ASSERT_FALSE(api->Init(tessdatadir, lang)) << "Could not initialize tesseract.";
  Image image = pixRead(imgname);
  ASSERT_TRUE(image != nullptr) << "Failed to read test image.";
  api->SetImage(image);
  outText = api->GetUTF8Text();
  EXPECT_EQ(gtText, outText) << "Phototest.tif OCR does not match ground truth for "
                             << ::testing::PrintToString(lang);
  api->End();
  api->ClearPersistentCache();
  delete[] outText;
  image.destroy();
}

◆ old_first_xheight()

void tesseract::old_first_xheight	(	TO_ROW *	row,
		TBOX	blobcoords[],
		int	initialheight,
		int	blobcount,
		QSPLINE *	baseline,
		float	jumplimit
	)

Definition at line 1340 of file oldbasel.cpp.

  {
  int blobindex; /*current blob */
                 /*height statistics */
  STATS heightstat(0, MAXHEIGHT - 1);
  int height;      /*height of blob */
  int xcentre;     /*centre of blob */
  int lineheight;  /*approx xheight */
  float ascenders; /*ascender sum */
  int asccount;    /*no of ascenders */
  float xsum;      /*xheight sum */
  int xcount;      /*xheight count */
  float diff;      /*height difference */
 
  if (blobcount > 1) {
    for (blobindex = 0; blobindex < blobcount; blobindex++) {
      xcentre = (blobcoords[blobindex].left() + blobcoords[blobindex].right()) / 2;
      /*height of blob */
      height = static_cast<int>(blobcoords[blobindex].top() - baseline->y(xcentre) + 0.5);
      if (height > initialheight * oldbl_xhfract && height > textord_min_xheight) {
        heightstat.add(height, 1);
      }
    }
    if (heightstat.get_total() > 3) {
      lineheight = static_cast<int>(heightstat.ile(0.25));
      if (lineheight <= 0) {
        lineheight = static_cast<int>(heightstat.ile(0.5));
      }
    } else {
      lineheight = initialheight;
    }
  } else {
    lineheight =
        static_cast<int>(blobcoords[0].top() -
                         baseline->y((blobcoords[0].left() + blobcoords[0].right()) / 2) + 0.5);
  }
 
  xsum = 0.0f;
  xcount = 0;
  for (ascenders = 0.0f, asccount = 0, blobindex = 0; blobindex < blobcount; blobindex++) {
    xcentre = (blobcoords[blobindex].left() + blobcoords[blobindex].right()) / 2;
    diff = blobcoords[blobindex].top() - baseline->y(xcentre);
    /*is it ascender */
    if (diff > lineheight + jumplimit) {
      ascenders += diff;
      asccount++; /*count ascenders */
    } else if (diff > lineheight - jumplimit) {
      xsum += diff; /*mean xheight */
      xcount++;
    }
  }
  if (xcount > 0) {
    xsum /= xcount; /*average xheight */
  } else {
    xsum = static_cast<float>(lineheight); /*guess it */
  }
  row->xheight *= xsum;
  if (asccount > 0) {
    row->ascrise = ascenders / asccount - xsum;
  } else {
    row->ascrise = 0.0f; /*had none */
  }
  if (row->xheight == 0) {
    row->xheight = -1.0f;
  }
}

◆ OpenBoxFile()

TESS_API FILE * tesseract::OpenBoxFile ( const char * fname )

Definition at line 59 of file boxread.cpp.

                                     {
  std::string filename = BoxFileName(fname);
  FILE *box_file = nullptr;
  if (!(box_file = fopen(filename.c_str(), "rb"))) {
    CANTOPENFILE.error("read_next_box", TESSEXIT, "Can't open box file %s", filename.c_str());
    tprintf("Can't open box file %s", filename.c_str());
  }
  return box_file;
}

◆ operator!() [1/2]

FCOORD tesseract::operator! ( const FCOORD & src )

inline

Definition at line 524 of file points.h.

  {
  FCOORD result; // output
 
  result.xcoord = -src.ycoord;
  result.ycoord = src.xcoord;
  return result;
}

◆ operator!() [2/2]

ICOORD tesseract::operator! ( const ICOORD & src )

inline

Definition at line 324 of file points.h.

  {
  ICOORD result; // output
 
  result.xcoord = -src.ycoord;
  result.ycoord = src.xcoord;
  return result;
}

◆ operator%() [1/2]

float tesseract::operator%	(	const FCOORD &	op1,
		const FCOORD &	op2
	)

inline

Definition at line 616 of file points.h.

                       {
  return op1.xcoord * op2.xcoord + op1.ycoord * op2.ycoord;
}

◆ operator%() [2/2]

int32_t tesseract::operator%	(	const ICOORD &	op1,
		const ICOORD &	op2
	)

inline

Definition at line 416 of file points.h.

                       {
  return op1.xcoord * op2.xcoord + op1.ycoord * op2.ycoord;
}

◆ operator&=()

TBOX & tesseract::operator&=	(	TBOX &	op1,
		const TBOX &	op2
	)

Definition at line 242 of file rect.cpp.

                                             {
  if (op1.overlap(op2)) {
    if (op2.bot_left.x() > op1.bot_left.x()) {
      op1.bot_left.set_x(op2.bot_left.x());
    }
 
    if (op2.top_right.x() < op1.top_right.x()) {
      op1.top_right.set_x(op2.top_right.x());
    }
 
    if (op2.bot_left.y() > op1.bot_left.y()) {
      op1.bot_left.set_y(op2.bot_left.y());
    }
 
    if (op2.top_right.y() < op1.top_right.y()) {
      op1.top_right.set_y(op2.top_right.y());
    }
  } else {
    op1.bot_left.set_x(INT16_MAX);
    op1.bot_left.set_y(INT16_MAX);
    op1.top_right.set_x(-INT16_MAX);
    op1.top_right.set_y(-INT16_MAX);
  }
  return op1;
}

◆ operator*() [1/6]

float tesseract::operator*	(	const FCOORD &	op1,
		const FCOORD &	op2
	)

inline

Definition at line 628 of file points.h.

                       {
  return op1.xcoord * op2.ycoord - op1.ycoord * op2.xcoord;
}

◆ operator*() [2/6]

FCOORD tesseract::operator*	(	const FCOORD &	op1,
		float	scale
	)

inline

Definition at line 640 of file points.h.

                 {
  FCOORD result; // output
 
  result.xcoord = op1.xcoord * scale;
  result.ycoord = op1.ycoord * scale;
  return result;
}

◆ operator*() [3/6]

int32_t tesseract::operator*	(	const ICOORD &	op1,
		const ICOORD &	op2
	)

inline

Definition at line 428 of file points.h.

                       {
  return op1.xcoord * op2.ycoord - op1.ycoord * op2.xcoord;
}

◆ operator*() [4/6]

ICOORD tesseract::operator*	(	const ICOORD &	op1,
		TDimension	scale
	)

inline

Definition at line 440 of file points.h.

                      {
  ICOORD result; // output
 
  result.xcoord = op1.xcoord * scale;
  result.ycoord = op1.ycoord * scale;
  return result;
}

◆ operator*() [5/6]

FCOORD tesseract::operator*	(	float	scale,
		const FCOORD &	op1
	)

inline

Definition at line 650 of file points.h.

  {
  FCOORD result; // output
 
  result.xcoord = op1.xcoord * scale;
  result.ycoord = op1.ycoord * scale;
  return result;
}

◆ operator*() [6/6]

ICOORD tesseract::operator*	(	TDimension	scale,
		const ICOORD &	op1
	)

inline

Definition at line 450 of file points.h.

  {
  ICOORD result; // output
 
  result.xcoord = op1.xcoord * scale;
  result.ycoord = op1.ycoord * scale;
  return result;
}

◆ operator*=() [1/2]

FCOORD & tesseract::operator*=	(	FCOORD &	op1,
		float	scale
	)

inline

Definition at line 667 of file points.h.

                 {
  op1.xcoord *= scale;
  op1.ycoord *= scale;
  return op1;
}

◆ operator*=() [2/2]

ICOORD & tesseract::operator*=	(	ICOORD &	op1,
		TDimension	scale
	)

inline

Definition at line 467 of file points.h.

                      {
  op1.xcoord *= scale;
  op1.ycoord *= scale;
  return op1;
}

◆ operator+() [1/2]

FCOORD tesseract::operator+	(	const FCOORD &	op1,
		const FCOORD &	op2
	)

inline

Definition at line 556 of file points.h.

                       {
  FCOORD sum; // result
 
  sum.xcoord = op1.xcoord + op2.xcoord;
  sum.ycoord = op1.ycoord + op2.ycoord;
  return sum;
}

◆ operator+() [2/2]

ICOORD tesseract::operator+	(	const ICOORD &	op1,
		const ICOORD &	op2
	)

inline

Definition at line 356 of file points.h.

                       {
  ICOORD sum; // result
 
  sum.xcoord = op1.xcoord + op2.xcoord;
  sum.ycoord = op1.ycoord + op2.ycoord;
  return sum;
}

◆ operator+=() [1/3]

FCOORD & tesseract::operator+=	(	FCOORD &	op1,
		const FCOORD &	op2
	)

inline

Definition at line 572 of file points.h.

                       {
  op1.xcoord += op2.xcoord;
  op1.ycoord += op2.ycoord;
  return op1;
}

◆ operator+=() [2/3]

ICOORD & tesseract::operator+=	(	ICOORD &	op1,
		const ICOORD &	op2
	)

inline

Definition at line 372 of file points.h.

                       {
  op1.xcoord += op2.xcoord;
  op1.ycoord += op2.ycoord;
  return op1;
}

◆ operator+=() [3/3]

TBOX & tesseract::operator+=	(	TBOX &	op1,
		const TBOX &	op2
	)

Definition at line 214 of file rect.cpp.

                     {
  if (op2.bot_left.x() < op1.bot_left.x()) {
    op1.bot_left.set_x(op2.bot_left.x());
  }
 
  if (op2.top_right.x() > op1.top_right.x()) {
    op1.top_right.set_x(op2.top_right.x());
  }
 
  if (op2.bot_left.y() < op1.bot_left.y()) {
    op1.bot_left.set_y(op2.bot_left.y());
  }
 
  if (op2.top_right.y() > op1.top_right.y()) {
    op1.top_right.set_y(op2.top_right.y());
  }
 
  return op1;
}

◆ operator-() [1/4]

FCOORD tesseract::operator-	(	const FCOORD &	op1,
		const FCOORD &	op2
	)

inline

Definition at line 586 of file points.h.

                       {
  FCOORD sum; // result
 
  sum.xcoord = op1.xcoord - op2.xcoord;
  sum.ycoord = op1.ycoord - op2.ycoord;
  return sum;
}

◆ operator-() [2/4]

FCOORD tesseract::operator- ( const FCOORD & src )

inline

Definition at line 540 of file points.h.

  {
  FCOORD result; // output
 
  result.xcoord = -src.xcoord;
  result.ycoord = -src.ycoord;
  return result;
}

◆ operator-() [3/4]

ICOORD tesseract::operator-	(	const ICOORD &	op1,
		const ICOORD &	op2
	)

inline

Definition at line 386 of file points.h.

                       {
  ICOORD sum; // result
 
  sum.xcoord = op1.xcoord - op2.xcoord;
  sum.ycoord = op1.ycoord - op2.ycoord;
  return sum;
}

◆ operator-() [4/4]

ICOORD tesseract::operator- ( const ICOORD & src )

inline

Definition at line 340 of file points.h.

  {
  ICOORD result; // output
 
  result.xcoord = -src.xcoord;
  result.ycoord = -src.ycoord;
  return result;
}

◆ operator-=() [1/2]

FCOORD & tesseract::operator-=	(	FCOORD &	op1,
		const FCOORD &	op2
	)

inline

Definition at line 602 of file points.h.

                       {
  op1.xcoord -= op2.xcoord;
  op1.ycoord -= op2.ycoord;
  return op1;
}

◆ operator-=() [2/2]

ICOORD & tesseract::operator-=	(	ICOORD &	op1,
		const ICOORD &	op2
	)

inline

Definition at line 402 of file points.h.

                       {
  op1.xcoord -= op2.xcoord;
  op1.ycoord -= op2.ycoord;
  return op1;
}

◆ operator/() [1/2]

FCOORD tesseract::operator/	(	const FCOORD &	op1,
		float	scale
	)

inline

Definition at line 681 of file points.h.

                 {
  FCOORD result; // output
  ASSERT_HOST(scale != 0.0f);
  result.xcoord = op1.xcoord / scale;
  result.ycoord = op1.ycoord / scale;
  return result;
}

◆ operator/() [2/2]

ICOORD tesseract::operator/	(	const ICOORD &	op1,
		TDimension	scale
	)

inline

Definition at line 481 of file points.h.

                      {
  ICOORD result; // output
 
  result.xcoord = op1.xcoord / scale;
  result.ycoord = op1.ycoord / scale;
  return result;
}

◆ operator/=() [1/2]

FCOORD & tesseract::operator/=	(	FCOORD &	op1,
		float	scale
	)

inline

Definition at line 697 of file points.h.

                 {
  ASSERT_HOST(scale != 0.0f);
  op1.xcoord /= scale;
  op1.ycoord /= scale;
  return op1;
}

◆ operator/=() [2/2]

ICOORD & tesseract::operator/=	(	ICOORD &	op1,
		TDimension	scale
	)

inline

Definition at line 497 of file points.h.

                      {
  op1.xcoord /= scale;
  op1.ycoord /= scale;
  return op1;
}

◆ orientation_and_script_detection()

int tesseract::orientation_and_script_detection	(	const char *	filename,
		OSResults *	osr,
		tesseract::Tesseract *	tess
	)

Definition at line 188 of file osdetect.cpp.

                                                               {
  std::string name = filename; // truncated name
 
  const char *lastdot = strrchr(name.c_str(), '.');
  if (lastdot != nullptr) {
    name[lastdot - name.c_str()] = '\0';
  }
 
  ASSERT_HOST(tess->pix_binary() != nullptr);
  int width = pixGetWidth(tess->pix_binary());
  int height = pixGetHeight(tess->pix_binary());
 
  BLOCK_LIST blocks;
  if (!read_unlv_file(name, width, height, &blocks)) {
    FullPageBlock(width, height, &blocks);
  }
 
  // Try to remove non-text regions from consideration.
  TO_BLOCK_LIST land_blocks, port_blocks;
  remove_nontext_regions(tess, &blocks, &port_blocks);
 
  if (port_blocks.empty()) {
    // page segmentation did not succeed, so we need to find_components first.
    tess->mutable_textord()->find_components(tess->pix_binary(), &blocks, &port_blocks);
  } else {
    TBOX page_box(0, 0, width, height);
    // Filter_blobs sets up the TO_BLOCKs the same as find_components does.
    tess->mutable_textord()->filter_blobs(page_box.topright(), &port_blocks, true);
  }
 
  return os_detect(&port_blocks, osr, tess);
}

◆ OrientationIdToValue()

int tesseract::OrientationIdToValue ( const int & id )

Definition at line 566 of file osdetect.cpp.

                                        {
  switch (id) {
    case 0:
      return 0;
    case 1:
      return 270;
    case 2:
      return 180;
    case 3:
      return 90;
    default:
      return -1;
  }
}

◆ os_detect()

int tesseract::os_detect	(	TO_BLOCK_LIST *	port_blocks,
		OSResults *	osr,
		tesseract::Tesseract *	tess
	)

Definition at line 225 of file osdetect.cpp.

                                                                                    {
  int blobs_total = 0;
  TO_BLOCK_IT block_it;
  block_it.set_to_list(port_blocks);
 
  BLOBNBOX_CLIST filtered_list;
  BLOBNBOX_C_IT filtered_it(&filtered_list);
 
  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
    TO_BLOCK *to_block = block_it.data();
    if (to_block->block->pdblk.poly_block() && !to_block->block->pdblk.poly_block()->IsText()) {
      continue;
    }
    BLOBNBOX_IT bbox_it;
    bbox_it.set_to_list(&to_block->blobs);
    for (bbox_it.mark_cycle_pt(); !bbox_it.cycled_list(); bbox_it.forward()) {
      BLOBNBOX *bbox = bbox_it.data();
      C_BLOB *blob = bbox->cblob();
      TBOX box = blob->bounding_box();
      ++blobs_total;
 
      // Catch illegal value of box width and avoid division by zero.
      if (box.width() == 0) {
        continue;
      }
      // TODO: Can height and width be negative? If not, remove fabs.
      float y_x = std::fabs((box.height() * 1.0f) / box.width());
      float x_y = 1.0f / y_x;
      // Select a >= 1.0 ratio
      float ratio = x_y > y_x ? x_y : y_x;
      // Blob is ambiguous
      if (ratio > kSizeRatioToReject) {
        continue;
      }
      if (box.height() < kMinAcceptableBlobHeight) {
        continue;
      }
      filtered_it.add_to_end(bbox);
    }
  }
  return os_detect_blobs(nullptr, &filtered_list, osr, tess);
}

◆ os_detect_blob()

bool tesseract::os_detect_blob	(	BLOBNBOX *	bbox,
		OrientationDetector *	o,
		ScriptDetector *	s,
		OSResults *	osr,
		tesseract::Tesseract *	tess
	)

Definition at line 323 of file osdetect.cpp.

                                              {
  tess->tess_cn_matching.set_value(true); // turn it on
  tess->tess_bn_matching.set_value(false);
  C_BLOB *blob = bbox->cblob();
  TBLOB *tblob = TBLOB::PolygonalCopy(tess->poly_allow_detailed_fx, blob);
  TBOX box = tblob->bounding_box();
  FCOORD current_rotation(1.0f, 0.0f);
  FCOORD rotation90(0.0f, 1.0f);
  BLOB_CHOICE_LIST ratings[4];
  // Test the 4 orientations
  for (int i = 0; i < 4; ++i) {
    // Normalize the blob. Set the origin to the place we want to be the
    // bottom-middle after rotation.
    // Scaling is to make the rotated height the x-height.
    float scaling = static_cast<float>(kBlnXHeight) / box.height();
    float x_origin = (box.left() + box.right()) / 2.0f;
    float y_origin = (box.bottom() + box.top()) / 2.0f;
    if (i == 0 || i == 2) {
      // Rotation is 0 or 180.
      y_origin = i == 0 ? box.bottom() : box.top();
    } else {
      // Rotation is 90 or 270.
      scaling = static_cast<float>(kBlnXHeight) / box.width();
      x_origin = i == 1 ? box.left() : box.right();
    }
    std::unique_ptr<TBLOB> rotated_blob(new TBLOB(*tblob));
    rotated_blob->Normalize(nullptr, &current_rotation, nullptr, x_origin, y_origin, scaling,
                            scaling, 0.0f, static_cast<float>(kBlnBaselineOffset), false, nullptr);
    tess->AdaptiveClassifier(rotated_blob.get(), ratings + i);
    current_rotation.rotate(rotation90);
  }
  delete tblob;
 
  bool stop = o->detect_blob(ratings);
  s->detect_blob(ratings);
  int orientation = o->get_orientation();
  stop = s->must_stop(orientation) && stop;
  return stop;
}

◆ os_detect_blobs()

int tesseract::os_detect_blobs	(	const std::vector< int > *	allowed_scripts,
		BLOBNBOX_CLIST *	blob_list,
		OSResults *	osr,
		tesseract::Tesseract *	tess
	)

Definition at line 274 of file osdetect.cpp.

                                                              {
  OSResults osr_;
  int minCharactersToTry = tess->min_characters_to_try;
  int maxCharactersToTry = 5 * minCharactersToTry;
  if (osr == nullptr) {
    osr = &osr_;
  }
 
  osr->unicharset = &tess->unicharset;
  OrientationDetector o(allowed_scripts, osr);
  ScriptDetector s(allowed_scripts, osr, tess);
 
  BLOBNBOX_C_IT filtered_it(blob_list);
  int real_max = std::min(filtered_it.length(), maxCharactersToTry);
  // tprintf("Total blobs found = %d\n", blobs_total);
  // tprintf("Number of blobs post-filtering = %d\n", filtered_it.length());
  // tprintf("Number of blobs to try = %d\n", real_max);
 
  // If there are too few characters, skip this page entirely.
  if (real_max < minCharactersToTry / 2) {
    tprintf("Too few characters. Skipping this page\n");
    return 0;
  }
 
  auto **blobs = new BLOBNBOX *[filtered_it.length()];
  int number_of_blobs = 0;
  for (filtered_it.mark_cycle_pt(); !filtered_it.cycled_list(); filtered_it.forward()) {
    blobs[number_of_blobs++] = filtered_it.data();
  }
  QRSequenceGenerator sequence(number_of_blobs);
  int num_blobs_evaluated = 0;
  for (int i = 0; i < real_max; ++i) {
    if (os_detect_blob(blobs[sequence.GetVal()], &o, &s, osr, tess) && i > minCharactersToTry) {
      break;
    }
    ++num_blobs_evaluated;
  }
  delete[] blobs;
 
  // Make sure the best_result is up-to-date
  int orientation = o.get_orientation();
  osr->update_best_script(orientation);
  return num_blobs_evaluated;
}

◆ OtsuStats()

int tesseract::OtsuStats	(	const int *	histogram,
		int *	H_out,
		int *	omega0_out
	)

Definition at line 166 of file otsuthr.cpp.

                                                                 {
  int H = 0;
  double mu_T = 0.0;
  for (int i = 0; i < kHistogramSize; ++i) {
    H += histogram[i];
    mu_T += static_cast<double>(i) * histogram[i];
  }
 
  // Now maximize sig_sq_B over t.
  // http://www.ctie.monash.edu.au/hargreave/Cornall_Terry_328.pdf
  int best_t = -1;
  int omega_0, omega_1;
  int best_omega_0 = 0;
  double best_sig_sq_B = 0.0;
  double mu_0, mu_1, mu_t;
  omega_0 = 0;
  mu_t = 0.0;
  for (int t = 0; t < kHistogramSize - 1; ++t) {
    omega_0 += histogram[t];
    mu_t += t * static_cast<double>(histogram[t]);
    if (omega_0 == 0) {
      continue;
    }
    omega_1 = H - omega_0;
    if (omega_1 == 0) {
      break;
    }
    mu_0 = mu_t / omega_0;
    mu_1 = (mu_T - mu_t) / omega_1;
    double sig_sq_B = mu_1 - mu_0;
    sig_sq_B *= sig_sq_B * omega_0 * omega_1;
    if (best_t < 0 || sig_sq_B > best_sig_sq_B) {
      best_sig_sq_B = sig_sq_B;
      best_t = t;
      best_omega_0 = omega_0;
    }
  }
  if (H_out != nullptr) {
    *H_out = H;
  }
  if (omega0_out != nullptr) {
    *omega0_out = best_omega_0;
  }
  return best_t;
}

◆ OtsuThreshold()

int tesseract::OtsuThreshold	(	Image	src_pix,
		int	left,
		int	top,
		int	width,
		int	height,
		std::vector< int > &	thresholds,
		std::vector< int > &	hi_values
	)

Definition at line 38 of file otsuthr.cpp.

                                             {
  int num_channels = pixGetDepth(src_pix) / 8;
  // Of all channels with no good hi_value, keep the best so we can always
  // produce at least one answer.
  int best_hi_value = 1;
  int best_hi_index = 0;
  bool any_good_hivalue = false;
  double best_hi_dist = 0.0;
  thresholds.resize(num_channels);
  hi_values.resize(num_channels);
 
  // only use opencl if compiled w/ OpenCL and selected device is opencl
#ifdef USE_OPENCL
  // all of channel 0 then all of channel 1...
  std::vector<int> histogramAllChannels(kHistogramSize * num_channels);
 
  // Calculate Histogram on GPU
  OpenclDevice od;
  if (od.selectedDeviceIsOpenCL() && (num_channels == 1 || num_channels == 4) && top == 0 &&
      left == 0) {
    od.HistogramRectOCL(pixGetData(src_pix), num_channels, pixGetWpl(src_pix) * 4, left, top, width,
                        height, kHistogramSize, &histogramAllChannels[0]);
 
    // Calculate Threshold from Histogram on cpu
    for (int ch = 0; ch < num_channels; ++ch) {
      thresholds[ch] = -1;
      hi_values[ch] = -1;
      int *histogram = &histogramAllChannels[kHistogramSize * ch];
      int H;
      int best_omega_0;
      int best_t = OtsuStats(histogram, &H, &best_omega_0);
      if (best_omega_0 == 0 || best_omega_0 == H) {
        // This channel is empty.
        continue;
      }
      // To be a convincing foreground we must have a small fraction of H
      // or to be a convincing background we must have a large fraction of H.
      // In between we assume this channel contains no thresholding information.
      int hi_value = best_omega_0 < H * 0.5;
      thresholds[ch] = best_t;
      if (best_omega_0 > H * 0.75) {
        any_good_hivalue = true;
        hi_values[ch] = 0;
      } else if (best_omega_0 < H * 0.25) {
        any_good_hivalue = true;
        hi_values[ch] = 1;
      } else {
        // In case all channels are like this, keep the best of the bad lot.
        double hi_dist = hi_value ? (H - best_omega_0) : best_omega_0;
        if (hi_dist > best_hi_dist) {
          best_hi_dist = hi_dist;
          best_hi_value = hi_value;
          best_hi_index = ch;
        }
      }
    }
  } else {
#endif
    for (int ch = 0; ch < num_channels; ++ch) {
      thresholds[ch] = -1;
      hi_values[ch] = -1;
      // Compute the histogram of the image rectangle.
      int histogram[kHistogramSize];
      HistogramRect(src_pix, ch, left, top, width, height, histogram);
      int H;
      int best_omega_0;
      int best_t = OtsuStats(histogram, &H, &best_omega_0);
      if (best_omega_0 == 0 || best_omega_0 == H) {
        // This channel is empty.
        continue;
      }
      // To be a convincing foreground we must have a small fraction of H
      // or to be a convincing background we must have a large fraction of H.
      // In between we assume this channel contains no thresholding information.
      int hi_value = best_omega_0 < H * 0.5;
      thresholds[ch] = best_t;
      if (best_omega_0 > H * 0.75) {
        any_good_hivalue = true;
        hi_values[ch] = 0;
      } else if (best_omega_0 < H * 0.25) {
        any_good_hivalue = true;
        hi_values[ch] = 1;
      } else {
        // In case all channels are like this, keep the best of the bad lot.
        double hi_dist = hi_value ? (H - best_omega_0) : best_omega_0;
        if (hi_dist > best_hi_dist) {
          best_hi_dist = hi_dist;
          best_hi_value = hi_value;
          best_hi_index = ch;
        }
      }
    }
#ifdef USE_OPENCL
  }
#endif // USE_OPENCL
 
  if (!any_good_hivalue) {
    // Use the best of the ones that were not good enough.
    hi_values[best_hi_index] = best_hi_value;
  }
  return num_channels;
}

◆ outlines_to_blobs()

void tesseract::outlines_to_blobs	(	BLOCK *	block,
		ICOORD	bleft,
		ICOORD	tright,
		C_OUTLINE_LIST *	outlines
	)

Definition at line 460 of file edgblob.cpp.

                                                           {
  // make buckets
  OL_BUCKETS buckets(bleft, tright);
 
  fill_buckets(outlines, &buckets);
  empty_buckets(block, &buckets);
}

◆ ParamsTrainingFeatureByName()

int tesseract::ParamsTrainingFeatureByName ( const char * name )

Definition at line 26 of file params_training_featdef.cpp.

                                                  {
  if (name == nullptr) {
    return -1;
  }
  int array_size =
      sizeof(kParamsTrainingFeatureTypeName) / sizeof(kParamsTrainingFeatureTypeName[0]);
  for (int i = 0; i < array_size; i++) {
    if (kParamsTrainingFeatureTypeName[i] == nullptr) {
      continue;
    }
    if (strcmp(name, kParamsTrainingFeatureTypeName[i]) == 0) {
      return i;
    }
  }
  return -1;
}

◆ ParseArguments()

TESS_COMMON_TRAINING_API void tesseract::ParseArguments	(	int *	argc,
		char ***	argv
	)

This routine parses the command line arguments that were passed to the program and uses them to set relevant training-related global parameters.

Globals:

Config current clustering parameters
Parameters

argc number of command line arguments to parse

argv command line arguments

Definition at line 125 of file commontraining.cpp.

                                             {
  std::string usage;
  if (*argc) {
    usage += (*argv)[0];
    usage += " -v | --version | ";
    usage += (*argv)[0];
  }
  usage += " [.tr files ...]";
  tesseract::ParseCommandLineFlags(usage.c_str(), argc, argv, true);
  // Set some global values based on the flags.
  Config.MinSamples =
      std::max(0.0, std::min(1.0, double(FLAGS_clusterconfig_min_samples_fraction)));
  Config.MaxIllegal = std::max(0.0, std::min(1.0, double(FLAGS_clusterconfig_max_illegal)));
  Config.Independence = std::max(0.0, std::min(1.0, double(FLAGS_clusterconfig_independence)));
  Config.Confidence = std::max(0.0, std::min(1.0, double(FLAGS_clusterconfig_confidence)));
  // Set additional parameters from config file if specified.
  if (!FLAGS_configfile.empty()) {
    tesseract::ParamUtils::ReadParamsFile(
        FLAGS_configfile.c_str(), tesseract::SET_PARAM_CONSTRAINT_NON_INIT_ONLY, ccutil.params());
  }
}

◆ ParseBoxFileStr()

TESS_API bool tesseract::ParseBoxFileStr	(	const char *	boxfile_str,
		int *	page_number,
		std::string &	utf8_str,
		TBOX *	bounding_box
	)

Definition at line 205 of file boxread.cpp.

                                         {
  *bounding_box = TBOX(); // Initialize it to empty.
  utf8_str = "";
  char uch[kBoxReadBufSize];
  const char *buffptr = boxfile_str;
  // Read the unichar without messing up on Tibetan.
  // According to issue 253 the utf-8 surrogates 85 and A0 are treated
  // as whitespace by sscanf, so it is more reliable to just find
  // ascii space and tab.
  int uch_len = 0;
  // Skip unicode file designation, if present.
  const auto *ubuf = reinterpret_cast<const unsigned char *>(buffptr);
  if (ubuf[0] == 0xef && ubuf[1] == 0xbb && ubuf[2] == 0xbf) {
    buffptr += 3;
  }
  // Allow a single blank as the UTF-8 string. Check for empty string and
  // then blindly eat the first character.
  if (*buffptr == '\0') {
    return false;
  }
  do {
    uch[uch_len++] = *buffptr++;
  } while (*buffptr != '\0' && *buffptr != ' ' && *buffptr != '\t' &&
           uch_len < kBoxReadBufSize - 1);
  uch[uch_len] = '\0';
  if (*buffptr != '\0') {
    ++buffptr;
  }
  int x_min = INT_MAX;
  int y_min = INT_MAX;
  int x_max = INT_MIN;
  int y_max = INT_MIN;
  *page_number = 0;
  std::stringstream stream(buffptr);
  stream.imbue(std::locale::classic());
  stream >> x_min;
  stream >> y_min;
  stream >> x_max;
  stream >> y_max;
  stream >> *page_number;
  if (x_max < x_min || y_max < y_min) {
    tprintf("Bad box coordinates in boxfile string! %s\n", ubuf);
    return false;
  }
  // Test for long space-delimited string label.
  if (strcmp(uch, kMultiBlobLabelCode) == 0 && (buffptr = strchr(buffptr, '#')) != nullptr) {
    strncpy(uch, buffptr + 1, kBoxReadBufSize - 1);
    uch[kBoxReadBufSize - 1] = '\0'; // Prevent buffer overrun.
    chomp_string(uch);
    uch_len = strlen(uch);
  }
  // Validate UTF8 by making unichars with it.
  int used = 0;
  while (used < uch_len) {
    tesseract::UNICHAR ch(uch + used, uch_len - used);
    int new_used = ch.utf8_len();
    if (new_used == 0) {
      tprintf("Bad UTF-8 str %s starts with 0x%02x at col %d\n", uch + used, uch[used], used + 1);
      return false;
    }
    used += new_used;
  }
  utf8_str = uch;
  if (x_min > x_max) {
    std::swap(x_min, x_max);
  }
  if (y_min > y_max) {
    std::swap(y_min, y_max);
  }
  bounding_box->set_to_given_coords(x_min, y_min, x_max, y_max);
  return true; // Successfully read a box.
}

◆ ParseCommandLineFlags()

TESS_COMMON_TRAINING_API void tesseract::ParseCommandLineFlags	(	const char *	usage,
		int *	argc,
		char ***	argv,
		const bool	remove_flags
	)

Definition at line 168 of file commandlineflags.cpp.

                                                                                                {
  if (*argc == 1) {
    printf("USAGE: %s\n", usage);
    PrintCommandLineFlags();
    exit(0);
  }
 
  if (*argc > 1 && (!strcmp((*argv)[1], "-v") || !strcmp((*argv)[1], "--version"))) {
    printf("%s\n", TessBaseAPI::Version());
    exit(0);
  }
 
  int i;
  for (i = 1; i < *argc; ++i) {
    const char *current_arg = (*argv)[i];
    // If argument does not start with a hyphen then break.
    if (current_arg[0] != '-') {
      break;
    }
    // Position current_arg after startings hyphens. We treat a sequence of
    // one or two consecutive hyphens identically.
    ++current_arg;
    if (current_arg[0] == '-') {
      ++current_arg;
    }
    // If this is asking for usage, print the help message and abort.
    if (!strcmp(current_arg, "help")) {
      printf("Usage:\n  %s [OPTION ...]\n\n", usage);
      PrintCommandLineFlags();
      exit(0);
    }
    // Find the starting position of the value if it was specified in this
    // string.
    const char *equals_position = strchr(current_arg, '=');
    const char *rhs = nullptr;
    if (equals_position != nullptr) {
      rhs = equals_position + 1;
    }
    // Extract the flag name.
    std::string lhs;
    if (equals_position == nullptr) {
      lhs = current_arg;
    } else {
      lhs.assign(current_arg, equals_position - current_arg);
    }
    if (!lhs.length()) {
      tprintf("ERROR: Bad argument: %s\n", (*argv)[i]);
      exit(1);
    }
 
    // Find the flag name in the list of global flags.
    // int32_t flag
    int32_t int_val;
    if (IntFlagExists(lhs.c_str(), &int_val)) {
      if (rhs != nullptr) {
        if (!strlen(rhs)) {
          // Bad input of the format --int_flag=
          tprintf("ERROR: Bad argument: %s\n", (*argv)[i]);
          exit(1);
        }
        if (!SafeAtoi(rhs, &int_val)) {
          tprintf("ERROR: Could not parse int from %s in flag %s\n", rhs, (*argv)[i]);
          exit(1);
        }
      } else {
        // We need to parse the next argument
        if (i + 1 >= *argc) {
          tprintf("ERROR: Could not find value argument for flag %s\n", lhs.c_str());
          exit(1);
        } else {
          ++i;
          if (!SafeAtoi((*argv)[i], &int_val)) {
            tprintf("ERROR: Could not parse int32_t from %s\n", (*argv)[i]);
            exit(1);
          }
        }
      }
      SetIntFlagValue(lhs.c_str(), int_val);
      continue;
    }
 
    // double flag
    double double_val;
    if (DoubleFlagExists(lhs.c_str(), &double_val)) {
      if (rhs != nullptr) {
        if (!strlen(rhs)) {
          // Bad input of the format --double_flag=
          tprintf("ERROR: Bad argument: %s\n", (*argv)[i]);
          exit(1);
        }
        if (!SafeAtod(rhs, &double_val)) {
          tprintf("ERROR: Could not parse double from %s in flag %s\n", rhs, (*argv)[i]);
          exit(1);
        }
      } else {
        // We need to parse the next argument
        if (i + 1 >= *argc) {
          tprintf("ERROR: Could not find value argument for flag %s\n", lhs.c_str());
          exit(1);
        } else {
          ++i;
          if (!SafeAtod((*argv)[i], &double_val)) {
            tprintf("ERROR: Could not parse double from %s\n", (*argv)[i]);
            exit(1);
          }
        }
      }
      SetDoubleFlagValue(lhs.c_str(), double_val);
      continue;
    }
 
    // Bool flag. Allow input forms --flag (equivalent to --flag=true),
    // --flag=false, --flag=true, --flag=0 and --flag=1
    bool bool_val;
    if (BoolFlagExists(lhs.c_str(), &bool_val)) {
      if (rhs == nullptr) {
        // --flag form
        bool_val = true;
      } else {
        if (!strlen(rhs)) {
          // Bad input of the format --bool_flag=
          tprintf("ERROR: Bad argument: %s\n", (*argv)[i]);
          exit(1);
        }
        if (!strcmp(rhs, "false") || !strcmp(rhs, "0")) {
          bool_val = false;
        } else if (!strcmp(rhs, "true") || !strcmp(rhs, "1")) {
          bool_val = true;
        } else {
          tprintf("ERROR: Could not parse bool from flag %s\n", (*argv)[i]);
          exit(1);
        }
      }
      SetBoolFlagValue(lhs.c_str(), bool_val);
      continue;
    }
 
    // string flag
    const char *string_val;
    if (StringFlagExists(lhs.c_str(), &string_val)) {
      if (rhs != nullptr) {
        string_val = rhs;
      } else {
        // Pick the next argument
        if (i + 1 >= *argc) {
          tprintf("ERROR: Could not find string value for flag %s\n", lhs.c_str());
          exit(1);
        } else {
          string_val = (*argv)[++i];
        }
      }
      SetStringFlagValue(lhs.c_str(), string_val);
      continue;
    }
 
    // Flag was not found. Exit with an error message.
    tprintf("ERROR: Non-existent flag %s\n", (*argv)[i]);
    exit(1);
  } // for each argv
  if (remove_flags) {
    (*argv)[i - 1] = (*argv)[0];
    (*argv) += (i - 1);
    (*argc) -= (i - 1);
  }
}

◆ partition_coords()

int tesseract::partition_coords	(	TBOX	blobcoords[],
		int	blobcount,
		char	partids[],
		int	bestpart,
		int	xcoords[],
		int	ycoords[]
	)

Definition at line 977 of file oldbasel.cpp.

  {
  int blobindex;  /*no along text line */
  int pointcount; /*no of points */
 
  pointcount = 0;
  for (blobindex = 0; blobindex < blobcount; blobindex++) {
    if (partids[blobindex] == bestpart) {
      /*centre of blob */
      xcoords[pointcount] = (blobcoords[blobindex].left() + blobcoords[blobindex].right()) >> 1;
      ycoords[pointcount++] = blobcoords[blobindex].bottom();
    }
  }
  return pointcount; /*no of points found */
}

◆ partition_line()

int tesseract::partition_line	(	TBOX	blobcoords[],
		int	blobcount,
		int *	numparts,
		char	partids[],
		int	partsizes[],
		QSPLINE *	spline,
		float	jumplimit,
		float	ydiffs[]
	)

Definition at line 673 of file oldbasel.cpp.

  {
  int blobindex;             /*no along text line */
  int bestpart;              /*best new partition */
  int biggestpart;           /*part with most members */
  float diff;                /*difference from line */
  int startx;                /*index of start blob */
  float partdiffs[MAXPARTS]; /*step between parts */
 
  for (bestpart = 0; bestpart < MAXPARTS; bestpart++) {
    partsizes[bestpart] = 0; /*zero them all */
  }
 
  startx = get_ydiffs(blobcoords, blobcount, spline, ydiffs);
  *numparts = 1; /*1 partition */
  bestpart = -1; /*first point */
  float drift = 0.0f;
  float last_delta = 0.0f;
  for (blobindex = startx; blobindex < blobcount; blobindex++) {
    /*do each blob in row */
    diff = ydiffs[blobindex]; /*diff from line */
    if (textord_oldbl_debug) {
      tprintf("%d(%d,%d), ", blobindex, blobcoords[blobindex].left(),
              blobcoords[blobindex].bottom());
    }
    bestpart =
        choose_partition(diff, partdiffs, bestpart, jumplimit, &drift, &last_delta, numparts);
    /*record partition */
    partids[blobindex] = bestpart;
    partsizes[bestpart]++; /*another in it */
  }
 
  bestpart = -1; /*first point */
  drift = 0.0f;
  last_delta = 0.0f;
  partsizes[0]--; /*doing 1st pt again */
                  /*do each blob in row */
  for (blobindex = startx; blobindex >= 0; blobindex--) {
    diff = ydiffs[blobindex]; /*diff from line */
    if (textord_oldbl_debug) {
      tprintf("%d(%d,%d), ", blobindex, blobcoords[blobindex].left(),
              blobcoords[blobindex].bottom());
    }
    bestpart =
        choose_partition(diff, partdiffs, bestpart, jumplimit, &drift, &last_delta, numparts);
    /*record partition */
    partids[blobindex] = bestpart;
    partsizes[bestpart]++; /*another in it */
  }
 
  for (biggestpart = 0, bestpart = 1; bestpart < *numparts; bestpart++) {
    if (partsizes[bestpart] >= partsizes[biggestpart]) {
      biggestpart = bestpart; /*new biggest */
    }
  }
  if (textord_oldbl_merge_parts) {
    merge_oldbl_parts(blobcoords, blobcount, partids, partsizes, biggestpart, jumplimit);
  }
  return biggestpart; /*biggest partition */
}

◆ pick_x_height()

void tesseract::pick_x_height	(	TO_ROW *	row,
		int	modelist[],
		int	lefts[],
		int	rights[],
		STATS *	heightstat,
		int	mode_threshold
	)

Definition at line 1547 of file oldbasel.cpp.

                                       {
  int x;
  int y;
  int z;
  float ratio;
  int found_one_bigger = false;
  int best_x_height = 0;
  int best_asc = 0;
  int num_in_best;
 
  for (x = 0; x < MODENUM; x++) {
    for (y = 0; y < MODENUM; y++) {
      /* Check for two modes */
      if (modelist[x] && modelist[y] && heightstat->pile_count(modelist[x]) > mode_threshold &&
          (!textord_ocropus_mode || std::min(rights[modelist[x]], rights[modelist[y]]) >
                                        std::max(lefts[modelist[x]], lefts[modelist[y]]))) {
        ratio = static_cast<float>(modelist[y]) / static_cast<float>(modelist[x]);
        if (1.2 < ratio && ratio < 1.8) {
          /* Two modes found */
          best_x_height = modelist[x];
          num_in_best = heightstat->pile_count(modelist[x]);
 
          /* Try to get one higher */
          do {
            found_one_bigger = false;
            for (z = 0; z < MODENUM; z++) {
              if (modelist[z] == best_x_height + 1 &&
                  (!textord_ocropus_mode || std::min(rights[modelist[x]], rights[modelist[y]]) >
                                                std::max(lefts[modelist[x]], lefts[modelist[y]]))) {
                ratio = static_cast<float>(modelist[y]) / static_cast<float>(modelist[z]);
                if ((1.2 < ratio && ratio < 1.8) &&
                    /* Should be half of best */
                    heightstat->pile_count(modelist[z]) > num_in_best * 0.5) {
                  best_x_height++;
                  found_one_bigger = true;
                  break;
                }
              }
            }
          } while (found_one_bigger);
 
          /* try to get a higher ascender */
 
          best_asc = modelist[y];
          num_in_best = heightstat->pile_count(modelist[y]);
 
          /* Try to get one higher */
          do {
            found_one_bigger = false;
            for (z = 0; z < MODENUM; z++) {
              if (modelist[z] > best_asc &&
                  (!textord_ocropus_mode || std::min(rights[modelist[x]], rights[modelist[y]]) >
                                                std::max(lefts[modelist[x]], lefts[modelist[y]]))) {
                ratio = static_cast<float>(modelist[z]) / static_cast<float>(best_x_height);
                if ((1.2 < ratio && ratio < 1.8) &&
                    /* Should be half of best */
                    heightstat->pile_count(modelist[z]) > num_in_best * 0.5) {
                  best_asc = modelist[z];
                  found_one_bigger = true;
                  break;
                }
              }
            }
          } while (found_one_bigger);
 
          row->xheight = static_cast<float>(best_x_height);
          row->ascrise = static_cast<float>(best_asc) - best_x_height;
          return;
        }
      }
    }
  }
 
  best_x_height = modelist[0]; /* Single Mode found */
  num_in_best = heightstat->pile_count(best_x_height);
  do {
    /* Try to get one higher */
    found_one_bigger = false;
    for (z = 1; z < MODENUM; z++) {
      /* Should be half of best */
      if ((modelist[z] == best_x_height + 1) &&
          (heightstat->pile_count(modelist[z]) > num_in_best * 0.5)) {
        best_x_height++;
        found_one_bigger = true;
        break;
      }
    }
  } while (found_one_bigger);
 
  row->ascrise = 0.0f;
  row->xheight = static_cast<float>(best_x_height);
  if (row->xheight == 0) {
    row->xheight = -1.0f;
  }
}

◆ plot_blob_list()

void tesseract::plot_blob_list	(	ScrollView *	win,
		BLOBNBOX_LIST *	list,
		ScrollView::Color	body_colour,
		ScrollView::Color	child_colour
	)

Definition at line 1071 of file blobbox.cpp.

                                                  { // colour of child
  BLOBNBOX_IT it = list;
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
    it.data()->plot(win, body_colour, child_colour);
  }
}

◆ plot_box_list()

void tesseract::plot_box_list	(	ScrollView *	win,
		BLOBNBOX_LIST *	list,
		ScrollView::Color	body_colour
	)

Definition at line 69 of file drawtord.cpp.

  {
  BLOBNBOX_IT it = list; // iterator
 
  win->Pen(body_colour);
  win->Brush(ScrollView::NONE);
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
    it.data()->bounding_box().plot(win);
  }
}

◆ plot_fp_cells()

void tesseract::plot_fp_cells	(	ScrollView *	win,
		ScrollView::Color	colour,
		BLOBNBOX_IT *	blob_it,
		int16_t	pitch,
		int16_t	blob_count,
		STATS *	projection,
		int16_t	projection_left,
		int16_t	projection_right,
		float	projection_scale
	)

Definition at line 309 of file drawtord.cpp.

                                                      {
  int16_t occupation;    // occupied cells
  TBOX word_box;         // bounding box
  FPSEGPT_LIST seg_list; // list of cuts
  FPSEGPT_IT seg_it;
  FPSEGPT *segpt; // current point
 
  if (pitsync_linear_version) {
    check_pitch_sync2(blob_it, blob_count, pitch, 2, projection, projection_left, projection_right,
                      projection_scale, occupation, &seg_list, 0, 0);
  } else {
    check_pitch_sync(blob_it, blob_count, pitch, 2, projection, &seg_list);
  }
  word_box = blob_it->data()->bounding_box();
  for (; blob_count > 0; blob_count--) {
    word_box += box_next(blob_it);
  }
  seg_it.set_to_list(&seg_list);
  for (seg_it.mark_cycle_pt(); !seg_it.cycled_list(); seg_it.forward()) {
    segpt = seg_it.data();
    if (segpt->faked) {
      colour = ScrollView::WHITE;
      win->Pen(colour);
    } else {
      win->Pen(colour);
    }
    win->Line(segpt->position(), word_box.bottom(), segpt->position(), word_box.top());
  }
}

◆ plot_fp_cells2()

void tesseract::plot_fp_cells2	(	ScrollView *	win,
		ScrollView::Color	colour,
		TO_ROW *	row,
		FPSEGPT_LIST *	seg_list
	)

Definition at line 353 of file drawtord.cpp.

  {
  TBOX word_box; // bounding box
  FPSEGPT_IT seg_it = seg_list;
  // blobs in row
  BLOBNBOX_IT blob_it = row->blob_list();
  FPSEGPT *segpt; // current point
 
  word_box = blob_it.data()->bounding_box();
  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list();) {
    word_box += box_next(&blob_it);
  }
  for (seg_it.mark_cycle_pt(); !seg_it.cycled_list(); seg_it.forward()) {
    segpt = seg_it.data();
    if (segpt->faked) {
      colour = ScrollView::WHITE;
      win->Pen(colour);
    } else {
      win->Pen(colour);
    }
    win->Line(segpt->position(), word_box.bottom(), segpt->position(), word_box.top());
  }
}

◆ plot_fp_word()

void tesseract::plot_fp_word	(	TO_BLOCK *	block,
		float	pitch,
		float	nonspace
	)

Definition at line 1730 of file topitch.cpp.

  {
  TO_ROW *row; // current row
  TO_ROW_IT row_it = block->get_rows();
 
  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
    row = row_it.data();
    row->min_space = static_cast<int32_t>((pitch + nonspace) / 2);
    row->max_nonspace = row->min_space;
    row->space_threshold = row->min_space;
    plot_word_decisions(to_win, static_cast<int16_t>(pitch), row);
  }
}

◆ plot_parallel_row()

void tesseract::plot_parallel_row	(	TO_ROW *	row,
		float	gradient,
		int32_t	left,
		ScrollView::Color	colour,
		FCOORD	rotation
	)

Definition at line 122 of file drawtord.cpp.

  {
  FCOORD plot_pt; // point to plot
                  // blobs
  BLOBNBOX_IT it = row->blob_list();
  auto fleft = static_cast<float>(left); // floating version
  float right;                           // end of row
 
  //      left=it.data()->bounding_box().left();
  it.move_to_last();
  right = it.data()->bounding_box().right();
  plot_blob_list(to_win, row->blob_list(), colour, ScrollView::BROWN);
  to_win->Pen(colour);
  plot_pt = FCOORD(fleft, gradient * left + row->max_y());
  plot_pt.rotate(rotation);
  to_win->SetCursor(plot_pt.x(), plot_pt.y());
  plot_pt = FCOORD(fleft, gradient * left + row->min_y());
  plot_pt.rotate(rotation);
  to_win->DrawTo(plot_pt.x(), plot_pt.y());
  plot_pt = FCOORD(fleft, gradient * left + row->parallel_c());
  plot_pt.rotate(rotation);
  to_win->SetCursor(plot_pt.x(), plot_pt.y());
  plot_pt = FCOORD(right, gradient * right + row->parallel_c());
  plot_pt.rotate(rotation);
  to_win->DrawTo(plot_pt.x(), plot_pt.y());
}

◆ plot_row_cells()

void tesseract::plot_row_cells	(	ScrollView *	win,
		ScrollView::Color	colour,
		TO_ROW *	row,
		float	xshift,
		ICOORDELT_LIST *	cells
	)

Definition at line 387 of file drawtord.cpp.

  {
  TBOX word_box; // bounding box
  ICOORDELT_IT cell_it = cells;
  // blobs in row
  BLOBNBOX_IT blob_it = row->blob_list();
  ICOORDELT *cell; // current cell
 
  word_box = blob_it.data()->bounding_box();
  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list();) {
    word_box += box_next(&blob_it);
  }
  win->Pen(colour);
  for (cell_it.mark_cycle_pt(); !cell_it.cycled_list(); cell_it.forward()) {
    cell = cell_it.data();
    win->Line(cell->x() + xshift, word_box.bottom(), cell->x() + xshift, word_box.top());
  }
}

◆ plot_to_row()

void tesseract::plot_to_row	(	TO_ROW *	row,
		ScrollView::Color	colour,
		FCOORD	rotation
	)

Definition at line 89 of file drawtord.cpp.

  {
  FCOORD plot_pt; // point to plot
                  // blobs
  BLOBNBOX_IT it = row->blob_list();
  float left, right; // end of row
 
  if (it.empty()) {
    tprintf("No blobs in row at %g\n", row->parallel_c());
    return;
  }
  left = it.data()->bounding_box().left();
  it.move_to_last();
  right = it.data()->bounding_box().right();
  plot_blob_list(to_win, row->blob_list(), colour, ScrollView::BROWN);
  to_win->Pen(colour);
  plot_pt = FCOORD(left, row->line_m() * left + row->line_c());
  plot_pt.rotate(rotation);
  to_win->SetCursor(plot_pt.x(), plot_pt.y());
  plot_pt = FCOORD(right, row->line_m() * right + row->line_c());
  plot_pt.rotate(rotation);
  to_win->DrawTo(plot_pt.x(), plot_pt.y());
}

◆ plot_word_decisions()

void tesseract::plot_word_decisions	(	ScrollView *	win,
		int16_t	pitch,
		TO_ROW *	row
	)

Definition at line 238 of file drawtord.cpp.

  {
  ScrollView::Color colour = ScrollView::MAGENTA; // current colour
  ScrollView::Color rect_colour;                  // fuzzy colour
  int32_t prev_x;                                 // end of prev blob
  int16_t blob_count;                             // blobs in word
  BLOBNBOX *blob;                                 // current blob
  TBOX blob_box;                                  // bounding box
                                                  // iterator
  BLOBNBOX_IT blob_it = row->blob_list();
  BLOBNBOX_IT start_it = blob_it; // word start
 
  rect_colour = ScrollView::BLACK;
  prev_x = -INT16_MAX;
  blob_count = 0;
  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
    blob = blob_it.data();
    blob_box = blob->bounding_box();
    if (!blob->joined_to_prev() && blob_box.left() - prev_x > row->max_nonspace) {
      if ((blob_box.left() - prev_x >= row->min_space ||
           blob_box.left() - prev_x > row->space_threshold) &&
          blob_count > 0) {
        if (pitch > 0 && textord_show_fixed_cuts) {
          plot_fp_cells(win, colour, &start_it, pitch, blob_count, &row->projection,
                        row->projection_left, row->projection_right,
                        row->xheight * textord_projection_scale);
        }
        blob_count = 0;
        start_it = blob_it;
      }
      if (colour == ScrollView::MAGENTA) {
        colour = ScrollView::RED;
      } else {
        colour = static_cast<ScrollView::Color>(colour + 1);
      }
      if (blob_box.left() - prev_x < row->min_space) {
        if (blob_box.left() - prev_x > row->space_threshold) {
          rect_colour = ScrollView::GOLDENROD;
        } else {
          rect_colour = ScrollView::CORAL;
        }
        // fill_color_index(win, rect_colour);
        win->Brush(rect_colour);
        win->Rectangle(prev_x, blob_box.bottom(), blob_box.left(), blob_box.top());
      }
    }
    if (!blob->joined_to_prev()) {
      prev_x = blob_box.right();
    }
    if (blob->cblob() != nullptr) {
      blob->cblob()->plot(win, colour, colour);
    }
    if (!blob->joined_to_prev() && blob->cblob() != nullptr) {
      blob_count++;
    }
  }
  if (pitch > 0 && textord_show_fixed_cuts && blob_count > 0) {
    plot_fp_cells(win, colour, &start_it, pitch, blob_count, &row->projection, row->projection_left,
                  row->projection_right, row->xheight * textord_projection_scale);
  }
}

◆ pop()

LIST tesseract::pop ( LIST list )

Definition at line 166 of file oldlist.cpp.

                    {
  LIST temp = list->list_rest();
  delete list;
  return temp;
}

◆ pre_associate_blobs()

void tesseract::pre_associate_blobs	(	ICOORD	page_tr,
		TO_BLOCK *	block,
		FCOORD	rotation,
		bool	testing_on
	)

Definition at line 1846 of file makerow.cpp.

  {
#ifndef GRAPHICS_DISABLED
  ScrollView::Color colour; // of boxes
#endif
  BLOBNBOX *blob;     // current blob
  BLOBNBOX *nextblob; // next in list
  TBOX blob_box;
  FCOORD blob_rotation; // inverse of rotation
  BLOBNBOX_IT blob_it;  // iterator
  BLOBNBOX_IT start_it; // iterator
  TO_ROW_IT row_it = block->get_rows();
 
#ifndef GRAPHICS_DISABLED
  colour = ScrollView::RED;
#endif
 
  blob_rotation = FCOORD(rotation.x(), -rotation.y());
  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
    // get blobs
    blob_it.set_to_list(row_it.data()->blob_list());
    for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
      blob = blob_it.data();
      blob_box = blob->bounding_box();
      start_it = blob_it; // save start point
      //                      if (testing_on && textord_show_final_blobs)
      //                      {
      //                              tprintf("Blob at (%d,%d)->(%d,%d),
      //                              addr=%x, count=%d\n",
      //                                      blob_box.left(),blob_box.bottom(),
      //                                      blob_box.right(),blob_box.top(),
      //                                      (void*)blob,blob_it.length());
      //                      }
      bool overlap;
      do {
        overlap = false;
        if (!blob_it.at_last()) {
          nextblob = blob_it.data_relative(1);
          overlap = blob_box.major_x_overlap(nextblob->bounding_box());
          if (overlap) {
            blob->merge(nextblob);           // merge new blob
            blob_box = blob->bounding_box(); // get bigger box
            blob_it.forward();
          }
        }
      } while (overlap);
      blob->chop(&start_it, &blob_it, blob_rotation,
                 block->line_size * tesseract::CCStruct::kXHeightFraction * textord_chop_width);
      // attempt chop
    }
#ifndef GRAPHICS_DISABLED
    if (testing_on && textord_show_final_blobs) {
      if (to_win == nullptr) {
        create_to_win(page_tr);
      }
      to_win->Pen(colour);
      for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
        blob = blob_it.data();
        blob_box = blob->bounding_box();
        blob_box.rotate(rotation);
        if (!blob->joined_to_prev()) {
          to_win->Rectangle(blob_box.left(), blob_box.bottom(), blob_box.right(), blob_box.top());
        }
      }
      colour = static_cast<ScrollView::Color>(colour + 1);
      if (colour > ScrollView::MAGENTA) {
        colour = ScrollView::RED;
      }
    }
#endif
  }
}

◆ PrepareDistortedPix()

Image tesseract::PrepareDistortedPix	(	const Image	pix,
		bool	perspective,
		bool	invert,
		bool	white_noise,
		bool	smooth_noise,
		bool	blur,
		int	box_reduction,
		TRand *	randomizer,
		std::vector< TBOX > *	boxes
	)

Definition at line 179 of file degradeimage.cpp.

                                                 {
  Image distorted = pix.copy();
  // Things to do to synthetic training data.
  if ((white_noise || smooth_noise) && randomizer->SignedRand(1.0) > 0.0) {
    // TODO(rays) Cook noise in a more thread-safe manner than rand().
    // Attempt to make the sequences reproducible.
    srand(randomizer->IntRand());
    Image pixn = pixAddGaussianNoise(distorted, 8.0);
    distorted.destroy();
    if (smooth_noise) {
      distorted = pixBlockconv(pixn, 1, 1);
      pixn.destroy();
    } else {
      distorted = pixn;
    }
  }
  if (blur && randomizer->SignedRand(1.0) > 0.0) {
    Image blurred = pixBlockconv(distorted, 1, 1);
    distorted.destroy();
    distorted = blurred;
  }
  if (perspective) {
    GeneratePerspectiveDistortion(0, 0, randomizer, &distorted, boxes);
  }
  if (boxes != nullptr) {
    for (auto &b : *boxes) {
      b.scale(1.0f / box_reduction);
      if (b.width() <= 0) {
        b.set_right(b.left() + 1);
      }
    }
  }
  if (invert && randomizer->SignedRand(1.0) < -0) {
    pixInvert(distorted, distorted);
  }
  return distorted;
}

◆ print_block_counts()

void tesseract::print_block_counts	(	TO_BLOCK *	block,
		int32_t	block_index
	)

Definition at line 575 of file topitch.cpp.

  {
  int32_t def_fixed = 0; // counters
  int32_t def_prop = 0;
  int32_t maybe_fixed = 0;
  int32_t maybe_prop = 0;
  int32_t dunno = 0;
  int32_t corr_fixed = 0;
  int32_t corr_prop = 0;
 
  count_block_votes(block, def_fixed, def_prop, maybe_fixed, maybe_prop, corr_fixed, corr_prop,
                    dunno);
  tprintf("Block %d has (%d,%d,%d)", block_index, def_fixed, maybe_fixed, corr_fixed);
  if (textord_blocksall_prop && (def_fixed || maybe_fixed || corr_fixed)) {
    tprintf(" (Wrongly)");
  }
  tprintf(" fixed, (%d,%d,%d)", def_prop, maybe_prop, corr_prop);
  if (textord_blocksall_fixed && (def_prop || maybe_prop || corr_prop)) {
    tprintf(" (Wrongly)");
  }
  tprintf(" prop, %d dunno\n", dunno);
}

◆ print_pitch_sd()

void tesseract::print_pitch_sd	(	TO_ROW *	row,
		STATS *	projection,
		int16_t	projection_left,
		int16_t	projection_right,
		float	space_size,
		float	initial_pitch
	)

Definition at line 1535 of file topitch.cpp.

  {
  const char *res2;   // pitch result
  int16_t occupation; // used cells
  float sp_sd;        // space sd
                      // blobs
  BLOBNBOX_IT blob_it = row->blob_list();
  BLOBNBOX_IT start_it;     // start of word
  BLOBNBOX_IT row_start;    // start of row
  int16_t blob_count;       // no of blobs
  int16_t total_blob_count; // total blobs in line
  TBOX blob_box;            // bounding box
  TBOX prev_box;            // of super blob
  int32_t prev_right;       // of word sync
  int scale_factor;         // on scores for big words
  int32_t sp_count;         // spaces
  FPSEGPT_LIST seg_list;    // char cells
  FPSEGPT_IT seg_it;        // iterator
  double sqsum;             // sum of squares
  double spsum;             // of spaces
  double sp_var;            // space error
  double word_sync;         // result for word
  double total_count;       // total cuts
 
  if (blob_it.empty()) {
    return;
  }
  row_start = blob_it;
  total_blob_count = 0;
 
  total_count = 0;
  sqsum = 0;
  sp_count = 0;
  spsum = 0;
  prev_right = -1;
  blob_it = row_start;
  start_it = blob_it;
  blob_count = 0;
  blob_box = box_next(&blob_it); // first blob
  blob_it.mark_cycle_pt();
  do {
    for (; blob_count > 0; blob_count--) {
      box_next(&start_it);
    }
    do {
      prev_box = blob_box;
      blob_count++;
      blob_box = box_next(&blob_it);
    } while (!blob_it.cycled_list() && blob_box.left() - prev_box.right() < space_size);
    word_sync = check_pitch_sync2(
        &start_it, blob_count, static_cast<int16_t>(initial_pitch), 2, projection, projection_left,
        projection_right, row->xheight * textord_projection_scale, occupation, &seg_list, 0, 0);
    total_blob_count += blob_count;
    seg_it.set_to_list(&seg_list);
    if (prev_right >= 0) {
      sp_var = seg_it.data()->position() - prev_right;
      sp_var -= floor(sp_var / initial_pitch + 0.5) * initial_pitch;
      sp_var *= sp_var;
      spsum += sp_var;
      sp_count++;
    }
    seg_it.move_to_last();
    prev_right = seg_it.data()->position();
    if (textord_pitch_scalebigwords) {
      scale_factor = (seg_list.length() - 2) / 2;
      if (scale_factor < 1) {
        scale_factor = 1;
      }
    } else {
      scale_factor = 1;
    }
    sqsum += word_sync * scale_factor;
    total_count += (seg_list.length() - 1) * scale_factor;
    seg_list.clear();
  } while (!blob_it.cycled_list());
  sp_sd = sp_count > 0 ? sqrt(spsum / sp_count) : 0;
  word_sync = total_count > 0 ? sqrt(sqsum / total_count) : space_size * 10;
  tprintf("new_sd=%g:sd/p=%g:new_sp_sd=%g:res=%c:", word_sync, word_sync / initial_pitch, sp_sd,
          word_sync < textord_words_pitchsd_threshold * initial_pitch ? 'F' : 'P');
 
  start_it = row_start;
  blob_it = row_start;
  word_sync =
      check_pitch_sync2(&blob_it, total_blob_count, static_cast<int16_t>(initial_pitch), 2,
                        projection, projection_left, projection_right,
                        row->xheight * textord_projection_scale, occupation, &seg_list, 0, 0);
  if (occupation > 1) {
    word_sync /= occupation;
  }
  word_sync = sqrt(word_sync);
 
#ifndef GRAPHICS_DISABLED
  if (textord_show_row_cuts && to_win != nullptr) {
    plot_fp_cells2(to_win, ScrollView::CORAL, row, &seg_list);
  }
#endif
  seg_list.clear();
  if (word_sync < textord_words_pitchsd_threshold * initial_pitch) {
    if (word_sync < textord_words_def_fixed * initial_pitch && !row->all_caps) {
      res2 = "DF";
    } else {
      res2 = "MF";
    }
  } else {
    res2 = word_sync < textord_words_def_prop * initial_pitch ? "MP" : "DP";
  }
  tprintf(
      "row_sd=%g:sd/p=%g:res=%c:N=%d:res2=%s,init pitch=%g, row_pitch=%g, "
      "all_caps=%d\n",
      word_sync, word_sync / initial_pitch,
      word_sync < textord_words_pitchsd_threshold * initial_pitch ? 'F' : 'P', occupation, res2,
      initial_pitch, row->fixed_pitch, row->all_caps);
}

◆ print_ratings_list()

void tesseract::print_ratings_list	(	const char *	msg,
		BLOB_CHOICE_LIST *	ratings,
		const UNICHARSET &	current_unicharset
	)

print_ratings_list

Send all the ratings out to the logfile.

Parameters

msg	intro message
ratings	list of ratings
current_unicharset	unicharset that can be used for id-to-unichar conversion

Definition at line 804 of file ratngs.cpp.

                                                              {
  if (ratings->empty()) {
    tprintf("%s:<none>\n", msg);
    return;
  }
  if (*msg != '\0') {
    tprintf("%s\n", msg);
  }
  BLOB_CHOICE_IT c_it;
  c_it.set_to_list(ratings);
  for (c_it.mark_cycle_pt(); !c_it.cycled_list(); c_it.forward()) {
    c_it.data()->print(&current_unicharset);
    if (!c_it.at_last()) {
      tprintf("\n");
    }
  }
  tprintf("\n");
  fflush(stdout);
}

◆ PrintSegmentationStats()

void tesseract::PrintSegmentationStats ( BLOCK_LIST * block_list )

Definition at line 407 of file ocrblock.cpp.

                                                    {
  int num_blocks = 0;
  int num_rows = 0;
  int num_words = 0;
  int num_blobs = 0;
  BLOCK_IT block_it(block_list);
  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
    BLOCK *block = block_it.data();
    ++num_blocks;
    ROW_IT row_it(block->row_list());
    for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
      ++num_rows;
      ROW *row = row_it.data();
      // Iterate over all werds in the row.
      WERD_IT werd_it(row->word_list());
      for (werd_it.mark_cycle_pt(); !werd_it.cycled_list(); werd_it.forward()) {
        WERD *werd = werd_it.data();
        ++num_words;
        num_blobs += werd->cblob_list()->length();
      }
    }
  }
  tprintf("Block list stats:\nBlocks = %d\nRows = %d\nWords = %d\nBlobs = %d\n", num_blocks,
          num_rows, num_words, num_blobs);
}

◆ PrintString32WithUnicodes()

std::string tesseract::PrintString32WithUnicodes ( const std::string & str )

inline

Definition at line 32 of file normstrngs_test.h.

                                                                 {
  std::vector<char32> str32 = UNICHAR::UTF8ToUTF32(str.c_str());
  std::string s = "\"";
  s += "\" " + CodepointList(str32);
  return s;
}

◆ PrintStringVectorWithUnicodes()

std::string tesseract::PrintStringVectorWithUnicodes ( const std::vector< std::string > & glyphs )

inline

Definition at line 39 of file normstrngs_test.h.

                                                                                   {
  std::string result;
  for (const auto &s : glyphs) {
    result += "Glyph:";
    result += PrintString32WithUnicodes(s) + "\n";
  }
  return result;
}

◆ ProjectiveCoeffs()

int tesseract::ProjectiveCoeffs	(	int	width,
		int	height,
		TRand *	randomizer,
		float **	im_coeffs,
		float **	box_coeffs
	)

Definition at line 263 of file degradeimage.cpp.

                                         {
  // Setup "from" points.
  Pta *src_pts = ptaCreate(4);
  ptaAddPt(src_pts, 0.0f, 0.0f);
  ptaAddPt(src_pts, width, 0.0f);
  ptaAddPt(src_pts, width, height);
  ptaAddPt(src_pts, 0.0f, height);
  // Extract factors from pseudo-random sequence.
  float factors[FN_NUM_FACTORS];
  float shear = 0.0f; // Shear is signed.
  for (int i = 0; i < FN_NUM_FACTORS; ++i) {
    // Everything is squared to make wild values rarer.
    if (i == FN_SHEAR) {
      // Shear is signed.
      shear = randomizer->SignedRand(0.5 / 3.0);
      shear = shear >= 0.0 ? shear * shear : -shear * shear;
      // Keep the sheared points within the original rectangle.
      if (shear < -factors[FN_X0]) {
        shear = -factors[FN_X0];
      }
      if (shear > factors[FN_X1]) {
        shear = factors[FN_X1];
      }
      factors[i] = shear;
    } else if (i != FN_INCOLOR) {
      factors[i] = fabs(randomizer->SignedRand(1.0));
      if (i <= FN_Y3) {
        factors[i] *= 5.0 / 8.0;
      } else {
        factors[i] *= 0.5;
      }
      factors[i] *= factors[i];
    }
  }
  // Setup "to" points.
  Pta *dest_pts = ptaCreate(4);
  ptaAddPt(dest_pts, factors[FN_X0] * width, factors[FN_Y0] * height);
  ptaAddPt(dest_pts, (1.0f - factors[FN_X1]) * width, factors[FN_Y1] * height);
  ptaAddPt(dest_pts, (1.0f - factors[FN_X1] + shear) * width, (1 - factors[FN_Y2]) * height);
  ptaAddPt(dest_pts, (factors[FN_X0] + shear) * width, (1 - factors[FN_Y3]) * height);
  getProjectiveXformCoeffs(dest_pts, src_pts, im_coeffs);
  getProjectiveXformCoeffs(src_pts, dest_pts, box_coeffs);
  ptaDestroy(&src_pts);
  ptaDestroy(&dest_pts);
  return factors[FN_INCOLOR] > 0.5f ? L_BRING_IN_WHITE : L_BRING_IN_BLACK;
}

◆ PSM_BLOCK_FIND_ENABLED()

bool tesseract::PSM_BLOCK_FIND_ENABLED ( int pageseg_mode )

inline

Definition at line 198 of file publictypes.h.

                                                     {
  return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_COLUMN;
}

◆ PSM_COL_FIND_ENABLED()

bool tesseract::PSM_COL_FIND_ENABLED ( int pageseg_mode )

inline

Definition at line 192 of file publictypes.h.

                                                   {
  return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_AUTO;
}

◆ PSM_LINE_FIND_ENABLED()

bool tesseract::PSM_LINE_FIND_ENABLED ( int pageseg_mode )

inline

Definition at line 201 of file publictypes.h.

                                                    {
  return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_BLOCK;
}

◆ PSM_ORIENTATION_ENABLED()

bool tesseract::PSM_ORIENTATION_ENABLED ( int pageseg_mode )

inline

Definition at line 189 of file publictypes.h.

                                                      {
  return pageseg_mode <= PSM_AUTO || pageseg_mode == PSM_SPARSE_TEXT_OSD;
}

◆ PSM_OSD_ENABLED()

bool tesseract::PSM_OSD_ENABLED ( int pageseg_mode )

inline

Inline functions that act on a PageSegMode to determine whether components of layout analysis are enabled. Depend critically on the order of elements of PageSegMode. NOTE that arg is an int for compatibility with INT_PARAM.

Definition at line 186 of file publictypes.h.

                                              {
  return pageseg_mode <= PSM_AUTO_OSD || pageseg_mode == PSM_SPARSE_TEXT_OSD;
}

◆ PSM_SPARSE()

bool tesseract::PSM_SPARSE ( int pageseg_mode )

inline

Definition at line 195 of file publictypes.h.

                                         {
  return pageseg_mode == PSM_SPARSE_TEXT || pageseg_mode == PSM_SPARSE_TEXT_OSD;
}

◆ PSM_WORD_FIND_ENABLED()

bool tesseract::PSM_WORD_FIND_ENABLED ( int pageseg_mode )

inline

Definition at line 204 of file publictypes.h.

                                                    {
  return (pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_LINE) ||
         pageseg_mode == PSM_SPARSE_TEXT || pageseg_mode == PSM_SPARSE_TEXT_OSD;
}

◆ PTIsImageType()

bool tesseract::PTIsImageType ( PolyBlockType type )

inline

Returns true if PolyBlockType is of image type

Definition at line 75 of file publictypes.h.

                                              {
  return type == PT_FLOWING_IMAGE || type == PT_HEADING_IMAGE ||
         type == PT_PULLOUT_IMAGE;
}

◆ PTIsLineType()

bool tesseract::PTIsLineType ( PolyBlockType type )

inline

Returns true if PolyBlockType is of horizontal line type

Definition at line 71 of file publictypes.h.

                                             {
  return type == PT_HORZ_LINE || type == PT_VERT_LINE;
}

◆ PTIsPulloutType()

bool tesseract::PTIsPulloutType ( PolyBlockType type )

inline

Definition at line 87 of file publictypes.h.

                                                {
  return type == PT_PULLOUT_IMAGE || type == PT_PULLOUT_TEXT;
}

◆ PTIsTextType()

bool tesseract::PTIsTextType ( PolyBlockType type )

inline

Returns true if PolyBlockType is of text type

Definition at line 80 of file publictypes.h.

                                             {
  return type == PT_FLOWING_TEXT || type == PT_HEADING_TEXT ||
         type == PT_PULLOUT_TEXT || type == PT_TABLE ||
         type == PT_VERTICAL_TEXT || type == PT_CAPTION_TEXT ||
         type == PT_INLINE_EQUATION;
}

◆ push()

TESS_API LIST tesseract::push	(	LIST	list,
		void *	element
	)

Definition at line 178 of file oldlist.cpp.

                                    {
  LIST t;
 
  t = new list_rec;
  t->node = static_cast<LIST>(element);
  set_rest(t, list);
  return (t);
}

◆ push_back_new()

template<class T >

void tesseract::push_back_new	(	std::vector< T > &	vector,
		const T &	data
	)

Definition at line 418 of file paragraphs.cpp.

                                                        {
  if (std::find(vector.begin(), vector.end(), data) == vector.end()) {
    vector.push_back(data);
  }
}

◆ push_last()

TESS_API LIST tesseract::push_last	(	LIST	list,
		void *	item
	)

Definition at line 192 of file oldlist.cpp.

                                      {
  LIST t;
 
  if (list != NIL_LIST) {
    t = last(list);
    t->next = push(NIL_LIST, item);
    return (list);
  } else {
    return (push(NIL_LIST, item));
  }
}

◆ QueryInSearch()

int tesseract::QueryInSearch ( KDTREE * tree )

◆ read_info()

bool tesseract::read_info	(	TFile *	f,
		FontInfo *	fi
	)

Definition at line 143 of file fontinfo.cpp.

                                       {
  uint32_t size;
  if (!f->DeSerialize(&size)) {
    return false;
  }
  char *font_name = new char[size + 1];
  fi->name = font_name;
  if (!f->DeSerialize(font_name, size)) {
    return false;
  }
  font_name[size] = '\0';
  return f->DeSerialize(&fi->properties);
}

◆ read_spacing_info()

bool tesseract::read_spacing_info	(	TFile *	f,
		FontInfo *	fi
	)

Definition at line 163 of file fontinfo.cpp.

                                               {
  int32_t vec_size, kern_size;
  if (!f->DeSerialize(&vec_size)) {
    return false;
  }
  ASSERT_HOST(vec_size >= 0);
  if (vec_size == 0) {
    return true;
  }
  fi->init_spacing(vec_size);
  for (int i = 0; i < vec_size; ++i) {
    auto *fs = new FontSpacingInfo();
    if (!f->DeSerialize(&fs->x_gap_before) || !f->DeSerialize(&fs->x_gap_after) ||
        !f->DeSerialize(&kern_size)) {
      delete fs;
      return false;
    }
    if (kern_size < 0) { // indication of a nullptr entry in fi->spacing_vec
      delete fs;
      continue;
    }
    if (kern_size > 0 &&
        (!f->DeSerialize(fs->kerned_unichar_ids) || !f->DeSerialize(fs->kerned_x_gaps))) {
      delete fs;
      return false;
    }
    fi->add_spacing(i, fs);
  }
  return true;
}

◆ read_unlv_file()

bool tesseract::read_unlv_file	(	std::string &	name,
		int32_t	xsize,
		int32_t	ysize,
		BLOCK_LIST *	blocks
	)

Definition at line 36 of file blread.cpp.

  {
  FILE *pdfp;   // file pointer
  BLOCK *block; // current block
  int x;        // current top-down coords
  int y;
  int width; // of current block
  int height;
  BLOCK_IT block_it = blocks; // block iterator
 
  name += UNLV_EXT; // add extension
  if ((pdfp = fopen(name.c_str(), "rb")) == nullptr) {
    return false; // didn't read one
  } else {
    while (tfscanf(pdfp, "%d %d %d %d %*s", &x, &y, &width, &height) >= 4) {
      // make rect block
      block = new BLOCK(name.c_str(), true, 0, 0, static_cast<int16_t>(x),
                        static_cast<int16_t>(ysize - y - height), static_cast<int16_t>(x + width),
                        static_cast<int16_t>(ysize - y));
      // on end of list
      block_it.add_to_end(block);
    }
    fclose(pdfp);
  }
  tprintf("UZN file %s loaded.\n", name.c_str());
  return true;
}

◆ ReadAdaptedClass()

ADAPT_CLASS_STRUCT * tesseract::ReadAdaptedClass ( TFile * fp )

Read an adapted class description from file and return a ptr to the adapted class.

Parameters

fp	open file to read adapted class from

Returns: Ptr to new adapted class.

Note: Globals: none

Definition at line 186 of file adaptive.cpp.

                                                {
  int NumTempProtos;
  int NumConfigs;
  int i;
  ADAPT_CLASS_STRUCT *Class;
 
  /* first read high level adapted class structure */
  Class = new ADAPT_CLASS_STRUCT;
  fp->FRead(Class, sizeof(ADAPT_CLASS_STRUCT), 1);
 
  /* then read in the definitions of the permanent protos and configs */
  Class->PermProtos = NewBitVector(MAX_NUM_PROTOS);
  Class->PermConfigs = NewBitVector(MAX_NUM_CONFIGS);
  fp->FRead(Class->PermProtos, sizeof(uint32_t), WordsInVectorOfSize(MAX_NUM_PROTOS));
  fp->FRead(Class->PermConfigs, sizeof(uint32_t), WordsInVectorOfSize(MAX_NUM_CONFIGS));
 
  /* then read in the list of temporary protos */
  fp->FRead(&NumTempProtos, sizeof(int), 1);
  Class->TempProtos = NIL_LIST;
  for (i = 0; i < NumTempProtos; i++) {
    auto TempProto = new TEMP_PROTO_STRUCT;
    fp->FRead(TempProto, sizeof(TEMP_PROTO_STRUCT), 1);
    Class->TempProtos = push_last(Class->TempProtos, TempProto);
  }
 
  /* then read in the adapted configs */
  fp->FRead(&NumConfigs, sizeof(int), 1);
  for (i = 0; i < NumConfigs; i++) {
    if (test_bit(Class->PermConfigs, i)) {
      Class->Config[i].Perm = ReadPermConfig(fp);
    } else {
      Class->Config[i].Temp = ReadTempConfig(fp);
    }
  }
 
  return (Class);
 
} /* ReadAdaptedClass */

◆ ReadAllBoxes()

bool tesseract::ReadAllBoxes	(	int	target_page,
		bool	skip_blanks,
		const char *	filename,
		std::vector< TBOX > *	boxes,
		std::vector< std::string > *	texts,
		std::vector< std::string > *	box_texts,
		std::vector< int > *	pages
	)

Definition at line 76 of file boxread.cpp.

                                         {
  std::ifstream input(BoxFileName(filename).c_str(), std::ios::in | std::ios::binary);
  if (input.fail()) {
    tprintf("Cannot read box data from '%s'.\n", BoxFileName(filename).c_str());
    tprintf("Does it exists?\n");
    return false;
  }
  std::vector<char> box_data(std::istreambuf_iterator<char>(input), {});
  if (box_data.empty()) {
    tprintf("No box data found in '%s'.\n", BoxFileName(filename).c_str());
    return false;
  }
  // Convert the array of bytes to a string, so it can be used by the parser.
  box_data.push_back('\0');
  return ReadMemBoxes(target_page, skip_blanks, &box_data[0],
                      /*continue_on_failure*/ true, boxes, texts, box_texts, pages);
}

◆ ReadCharDescription()

TESS_API CHAR_DESC_STRUCT * tesseract::ReadCharDescription	(	const FEATURE_DEFS_STRUCT &	FeatureDefs,
		FILE *	File
	)

Read a character description from File, and return a data structure containing this information. The data is formatted as follows:

  NumberOfSets
          ShortNameForSet1 Set1
          ShortNameForSet2 Set2
          ...

Globals:

none

Parameters

FeatureDefs	definitions of feature types/extractors
File	open text file to read character description from

Returns: Character description read from File.

Definition at line 172 of file featdefs.cpp.

                                                                                          {
  int NumSetsToRead;
  char ShortName[FEAT_NAME_SIZE];
  int Type;
 
  ASSERT_HOST(tfscanf(File, "%d", &NumSetsToRead) == 1);
  ASSERT_HOST(NumSetsToRead >= 0);
  ASSERT_HOST(NumSetsToRead <= FeatureDefs.NumFeatureTypes);
 
  auto CharDesc = new CHAR_DESC_STRUCT(FeatureDefs);
  for (; NumSetsToRead > 0; NumSetsToRead--) {
    tfscanf(File, "%s", ShortName);
    Type = ShortNameToFeatureType(FeatureDefs, ShortName);
    CharDesc->FeatureSets[Type] = ReadFeatureSet(File, FeatureDefs.FeatureDesc[Type]);
  }
  return CharDesc;
}

◆ ReadFeatureSet()

FEATURE_SET tesseract::ReadFeatureSet	(	FILE *	File,
		const FEATURE_DESC_STRUCT *	FeatureDesc
	)

Create a new feature set of the specified type and read in the features from File. The correct text representation for a feature set is an integer which specifies the number (N) of features in a set followed by a list of N feature descriptions.

Parameters

File	open text file to read new feature set from
FeatureDesc	specifies type of feature to read from File

Returns: New feature set read from File.

Definition at line 82 of file ocrfeatures.cpp.

                                                                               {
  int NumFeatures;
  ASSERT_HOST(tfscanf(File, "%d", &NumFeatures) == 1);
  ASSERT_HOST(NumFeatures >= 0);
 
  auto FeatureSet = new FEATURE_SET_STRUCT(NumFeatures);
  for (int i = 0; i < NumFeatures; i++) {
    AddFeature(FeatureSet, ReadFeature(File, FeatureDesc));
  }
 
  return FeatureSet;
}

◆ ReadFile()

TESS_UNICHARSET_TRAINING_API std::string tesseract::ReadFile	(	const std::string &	filename,
		FileReader	reader
	)

Definition at line 63 of file lang_model_helpers.cpp.

                                                                 {
  if (filename.empty()) {
    return std::string();
  }
  std::vector<char> data;
  bool read_result;
  if (reader == nullptr) {
    read_result = LoadDataFromFile(filename.c_str(), &data);
  } else {
    read_result = (*reader)(filename.c_str(), &data);
  }
  if (read_result) {
    return std::string(&data[0], data.size());
  }
  tprintf("Failed to read data from: %s\n", filename.c_str());
  return std::string();
}

◆ ReadMemBoxes()

TESS_API bool tesseract::ReadMemBoxes	(	int	target_page,
		bool	skip_blanks,
		const char *	box_data,
		bool	continue_on_failure,
		std::vector< TBOX > *	boxes,
		std::vector< std::string > *	texts,
		std::vector< std::string > *	box_texts,
		std::vector< int > *	pages
	)

Definition at line 97 of file boxread.cpp.

                                                                          {
  std::string box_str(box_data);
  std::vector<std::string> lines = split(box_str, '\n');
  if (lines.empty()) {
    return false;
  }
  int num_boxes = 0;
  for (auto &line : lines) {
    int page = 0;
    std::string utf8_str;
    TBOX box;
    if (!ParseBoxFileStr(line.c_str(), &page, utf8_str, &box)) {
      if (continue_on_failure) {
        continue;
      } else {
        return false;
      }
    }
    if (skip_blanks && (utf8_str == " " || utf8_str == "\t")) {
      continue;
    }
    if (target_page >= 0 && page != target_page) {
      continue;
    }
    if (boxes != nullptr) {
      boxes->push_back(box);
    }
    if (texts != nullptr) {
      texts->push_back(utf8_str);
    }
    if (box_texts != nullptr) {
      std::string full_text;
      MakeBoxFileStr(utf8_str.c_str(), box, target_page, full_text);
      box_texts->push_back(full_text);
    }
    if (pages != nullptr) {
      pages->push_back(page);
    }
    ++num_boxes;
  }
  return num_boxes > 0;
}

◆ ReadNextBox() [1/2]

TESS_API bool tesseract::ReadNextBox	(	int *	line_number,
		FILE *	box_file,
		std::string &	utf8_str,
		TBOX *	bounding_box
	)

Definition at line 153 of file boxread.cpp.

                                                                                            {
  return ReadNextBox(-1, line_number, box_file, utf8_str, bounding_box);
}

◆ ReadNextBox() [2/2]

TESS_API bool tesseract::ReadNextBox	(	int	target_page,
		int *	line_number,
		FILE *	box_file,
		std::string &	utf8_str,
		TBOX *	bounding_box
	)

Definition at line 160 of file boxread.cpp.

                                     {
  int page = 0;
  char buff[kBoxReadBufSize]; // boxfile read buffer
  char *buffptr = buff;
 
  while (fgets(buff, sizeof(buff) - 1, box_file)) {
    (*line_number)++;
 
    buffptr = buff;
    const auto *ubuf = reinterpret_cast<const unsigned char *>(buffptr);
    if (ubuf[0] == 0xef && ubuf[1] == 0xbb && ubuf[2] == 0xbf) {
      buffptr += 3; // Skip unicode file designation.
    }
    // Check for blank lines in box file
    if (*buffptr == '\n' || *buffptr == '\0') {
      continue;
    }
    // Skip blank boxes.
    if (*buffptr == ' ' || *buffptr == '\t') {
      continue;
    }
    if (*buffptr != '\0') {
      if (!ParseBoxFileStr(buffptr, &page, utf8_str, bounding_box)) {
        tprintf("Box file format error on line %i; ignored\n", *line_number);
        continue;
      }
      if (target_page >= 0 && target_page != page) {
        continue; // Not on the appropriate page.
      }
      return true; // Successfully read a box.
    }
  }
  fclose(box_file);
  return false; // EOF
}

◆ ReadParamDesc()

PARAM_DESC * tesseract::ReadParamDesc	(	TFile *	fp,
		uint16_t	N
	)

This routine reads textual descriptions of sets of parameters which describe the characteristics of feature dimensions.

Parameters

fp	open text file to read N parameter descriptions from
N	number of parameter descriptions to read

Returns: Pointer to an array of parameter descriptors.

Note: Globals: None

Definition at line 134 of file clusttool.cpp.

                                                 {
  auto ParamDesc = new PARAM_DESC[N];
  for (int i = 0; i < N; i++) {
    const int kMaxLineSize = TOKENSIZE * 4;
    char line[kMaxLineSize];
    ASSERT_HOST(fp->FGets(line, kMaxLineSize) != nullptr);
    std::istringstream stream(line);
    // Use "C" locale (needed for float values Min, Max).
    stream.imbue(std::locale::classic());
    std::string linear_token;
    stream >> linear_token;
    std::string essential_token;
    stream >> essential_token;
    stream >> ParamDesc[i].Min;
    stream >> ParamDesc[i].Max;
    ASSERT_HOST(!stream.fail());
    ParamDesc[i].Circular = (linear_token[0] == 'c');
    ParamDesc[i].NonEssential = (essential_token[0] != 'e');
    ParamDesc[i].Range = ParamDesc[i].Max - ParamDesc[i].Min;
    ParamDesc[i].HalfRange = ParamDesc[i].Range / 2;
    ParamDesc[i].MidRange = (ParamDesc[i].Max + ParamDesc[i].Min) / 2;
  }
  return (ParamDesc);
}

◆ ReadPermConfig()

PERM_CONFIG_STRUCT * tesseract::ReadPermConfig ( TFile * fp )

Read a permanent configuration description from file and return a ptr to it.

Parameters

fp	open file to read permanent config from

Returns: Ptr to new permanent configuration description.

Note: Globals: none

Definition at line 262 of file adaptive.cpp.

                                              {
  auto Config = new PERM_CONFIG_STRUCT;
  uint8_t NumAmbigs;
  fp->FRead(&NumAmbigs, sizeof(NumAmbigs), 1);
  Config->Ambigs = new UNICHAR_ID[NumAmbigs + 1];
  fp->FRead(Config->Ambigs, sizeof(UNICHAR_ID), NumAmbigs);
  Config->Ambigs[NumAmbigs] = -1;
  fp->FRead(&(Config->FontinfoId), sizeof(int), 1);
 
  return (Config);
 
} /* ReadPermConfig */

◆ ReadPrototype()

PROTOTYPE * tesseract::ReadPrototype	(	TFile *	fp,
		uint16_t	N
	)

This routine reads a textual description of a prototype from the specified file.

Parameters

fp	open text file to read prototype from
N	number of dimensions used in prototype

Returns: List of prototypes

Note: Globals: None

Definition at line 168 of file clusttool.cpp.

                                                {
  char sig_token[TOKENSIZE], shape_token[TOKENSIZE];
  int SampleCount;
  int i;
 
  const int kMaxLineSize = TOKENSIZE * 4;
  char line[kMaxLineSize];
  if (fp->FGets(line, kMaxLineSize) == nullptr ||
      sscanf(line, "%" QUOTED_TOKENSIZE "s %" QUOTED_TOKENSIZE "s %d", sig_token, shape_token,
             &SampleCount) != 3) {
    tprintf("Invalid prototype: %s\n", line);
    return nullptr;
  }
  auto Proto = new PROTOTYPE;
  Proto->Cluster = nullptr;
  Proto->Significant = (sig_token[0] == 's');
 
  switch (shape_token[0]) {
    case 's':
      Proto->Style = spherical;
      break;
    case 'e':
      Proto->Style = elliptical;
      break;
    case 'a':
      Proto->Style = automatic;
      break;
    default:
      tprintf("Invalid prototype style specification:%s\n", shape_token);
      Proto->Style = elliptical;
  }
 
  ASSERT_HOST(SampleCount >= 0);
  Proto->NumSamples = SampleCount;
 
  Proto->Mean.resize(N);
  ReadNFloats(fp, N, &Proto->Mean[0]);
 
  switch (Proto->Style) {
    case spherical:
      ReadNFloats(fp, 1, &(Proto->Variance.Spherical));
      Proto->Magnitude.Spherical = 1.0 / sqrt(2.0 * M_PI * Proto->Variance.Spherical);
      Proto->TotalMagnitude = std::pow(Proto->Magnitude.Spherical, static_cast<float>(N));
      Proto->LogMagnitude = log(static_cast<double>(Proto->TotalMagnitude));
      Proto->Weight.Spherical = 1.0 / Proto->Variance.Spherical;
      Proto->Distrib.clear();
      break;
    case elliptical:
      Proto->Variance.Elliptical = new float[N];
      ReadNFloats(fp, N, Proto->Variance.Elliptical);
      Proto->Magnitude.Elliptical = new float[N];
      Proto->Weight.Elliptical = new float[N];
      Proto->TotalMagnitude = 1.0;
      for (i = 0; i < N; i++) {
        Proto->Magnitude.Elliptical[i] = 1.0f / sqrt(2.0f * M_PI * Proto->Variance.Elliptical[i]);
        Proto->Weight.Elliptical[i] = 1.0f / Proto->Variance.Elliptical[i];
        Proto->TotalMagnitude *= Proto->Magnitude.Elliptical[i];
      }
      Proto->LogMagnitude = log(static_cast<double>(Proto->TotalMagnitude));
      Proto->Distrib.clear();
      break;
    default:
      delete Proto;
      tprintf("Invalid prototype style\n");
      return nullptr;
  }
  return Proto;
}

◆ ReadSampleSize()

uint16_t tesseract::ReadSampleSize ( TFile * fp )

This routine reads a single integer from the specified file and checks to ensure that it is between 0 and MAXSAMPLESIZE.

Parameters

fp	open text file to read sample size from

Returns: Sample size

Note: Globals: None

Definition at line 114 of file clusttool.cpp.

                                   {
  int SampleSize = 0;
 
  const int kMaxLineSize = 100;
  char line[kMaxLineSize];
  ASSERT_HOST(fp->FGets(line, kMaxLineSize) != nullptr);
  ASSERT_HOST(sscanf(line, "%d", &SampleSize) == 1);
  ASSERT_HOST(SampleSize >= 0 && SampleSize <= MAXSAMPLESIZE);
  return SampleSize;
}

◆ ReadTempConfig()

TEMP_CONFIG_STRUCT * tesseract::ReadTempConfig ( TFile * fp )

Read a temporary configuration description from file and return a ptr to it.

Parameters

fp	open file to read temporary config from

Returns: Ptr to new temporary configuration description.

Note: Globals: none

Definition at line 285 of file adaptive.cpp.

                                              {
  auto Config = new TEMP_CONFIG_STRUCT;
  fp->FRead(Config, sizeof(TEMP_CONFIG_STRUCT), 1);
 
  Config->Protos = NewBitVector(Config->ProtoVectorSize * BITSINLONG);
  fp->FRead(Config->Protos, sizeof(uint32_t), Config->ProtoVectorSize);
 
  return (Config);
 
} /* ReadTempConfig */

◆ ReadTrainingSamples()

TESS_COMMON_TRAINING_API void tesseract::ReadTrainingSamples	(	const FEATURE_DEFS_STRUCT &	feature_definitions,
		const char *	feature_name,
		int	max_samples,
		UNICHARSET *	unicharset,
		FILE *	file,
		LIST *	training_samples
	)

This routine reads training samples from a file and places them into a data structure which organizes the samples by FontName and CharName. It then returns this data structure.

Parameters

file	open text file to read samples from
feature_definitions
feature_name
max_samples
unicharset
training_samples

Definition at line 330 of file commontraining.cpp.

                                                 {
  char buffer[2048];
  char unichar[UNICHAR_LEN + 1];
  LABELEDLIST char_sample;
  FEATURE_SET feature_samples;
  uint32_t feature_type = ShortNameToFeatureType(feature_definitions, feature_name);
 
  // Zero out the font_sample_count for all the classes.
  LIST it = *training_samples;
  iterate(it) {
    char_sample = reinterpret_cast<LABELEDLIST>(it->first_node());
    char_sample->font_sample_count = 0;
  }
 
  while (fgets(buffer, 2048, file) != nullptr) {
    if (buffer[0] == '\n') {
      continue;
    }
 
    sscanf(buffer, "%*s %s", unichar);
    if (unicharset != nullptr && !unicharset->contains_unichar(unichar)) {
      unicharset->unichar_insert(unichar);
      if (unicharset->size() > MAX_NUM_CLASSES) {
        tprintf(
            "Error: Size of unicharset in training is "
            "greater than MAX_NUM_CLASSES\n");
        exit(1);
      }
    }
    char_sample = FindList(*training_samples, unichar);
    if (char_sample == nullptr) {
      char_sample = new LABELEDLISTNODE(unichar);
      *training_samples = push(*training_samples, char_sample);
    }
    auto char_desc = ReadCharDescription(feature_definitions, file);
    feature_samples = char_desc->FeatureSets[feature_type];
    if (char_sample->font_sample_count < max_samples || max_samples <= 0) {
      char_sample->List = push(char_sample->List, feature_samples);
      char_sample->SampleCount++;
      char_sample->font_sample_count++;
    } else {
      delete feature_samples;
    }
    for (size_t i = 0; i < char_desc->NumFeatureSets; i++) {
      if (feature_type != i) {
        delete char_desc->FeatureSets[i];
      }
      char_desc->FeatureSets[i] = nullptr;
    }
    delete char_desc;
  }
} // ReadTrainingSamples

◆ RecomputeMarginsAndClearHypotheses()

void tesseract::RecomputeMarginsAndClearHypotheses	(	std::vector< RowScratchRegisters > *	rows,
		int	start,
		int	end,
		int	percentile
	)

Definition at line 1612 of file paragraphs.cpp.

                                                                 {
  if (!AcceptableRowArgs(0, 0, __func__, rows, start, end)) {
    return;
  }
 
  int lmin, lmax, rmin, rmax;
  lmin = lmax = (*rows)[start].lmargin_ + (*rows)[start].lindent_;
  rmin = rmax = (*rows)[start].rmargin_ + (*rows)[start].rindent_;
  for (int i = start; i < end; i++) {
    RowScratchRegisters &sr = (*rows)[i];
    sr.SetUnknown();
    if (sr.ri_->num_words == 0) {
      continue;
    }
    UpdateRange(sr.lmargin_ + sr.lindent_, &lmin, &lmax);
    UpdateRange(sr.rmargin_ + sr.rindent_, &rmin, &rmax);
  }
  STATS lefts(lmin, lmax);
  STATS rights(rmin, rmax);
  for (int i = start; i < end; i++) {
    RowScratchRegisters &sr = (*rows)[i];
    if (sr.ri_->num_words == 0) {
      continue;
    }
    lefts.add(sr.lmargin_ + sr.lindent_, 1);
    rights.add(sr.rmargin_ + sr.rindent_, 1);
  }
  int ignorable_left = lefts.ile(ClipToRange(percentile, 0, 100) / 100.0);
  int ignorable_right = rights.ile(ClipToRange(percentile, 0, 100) / 100.0);
  for (int i = start; i < end; i++) {
    RowScratchRegisters &sr = (*rows)[i];
    int ldelta = ignorable_left - sr.lmargin_;
    sr.lmargin_ += ldelta;
    sr.lindent_ -= ldelta;
    int rdelta = ignorable_right - sr.rmargin_;
    sr.rmargin_ += rdelta;
    sr.rindent_ -= rdelta;
  }
}

◆ RefreshWordBlobsFromNewBlobs()

void tesseract::RefreshWordBlobsFromNewBlobs	(	BLOCK_LIST *	block_list,
		C_BLOB_LIST *	new_blobs,
		C_BLOB_LIST *	not_found_blobs
	)

Definition at line 474 of file ocrblock.cpp.

                                                                {
  // Now iterate over all the blobs in the segmentation_block_list_, and just
  // replace the corresponding c-blobs inside the werds.
  BLOCK_IT block_it(block_list);
  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
    BLOCK *block = block_it.data();
    if (block->pdblk.poly_block() != nullptr && !block->pdblk.poly_block()->IsText()) {
      continue; // Don't touch non-text blocks.
    }
    // Iterate over all rows in the block.
    ROW_IT row_it(block->row_list());
    for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
      ROW *row = row_it.data();
      // Iterate over all werds in the row.
      WERD_IT werd_it(row->word_list());
      WERD_LIST new_words;
      WERD_IT new_words_it(&new_words);
      for (werd_it.mark_cycle_pt(); !werd_it.cycled_list(); werd_it.forward()) {
        WERD *werd = werd_it.extract();
        WERD *new_werd = werd->ConstructWerdWithNewBlobs(new_blobs, not_found_blobs);
        if (new_werd) {
          // Insert this new werd into the actual row's werd-list. Remove the
          // existing one.
          new_words_it.add_after_then_move(new_werd);
          delete werd;
        } else {
          // Reinsert the older word back, for lack of better options.
          // This is critical since dropping the words messes up segmentation:
          // eg. 1st word in the row might otherwise have W_FUZZY_NON turned on.
          new_words_it.add_after_then_move(werd);
        }
      }
      // Get rid of the old word list & replace it with the new one.
      row->word_list()->clear();
      werd_it.move_to_first();
      werd_it.add_list_after(&new_words);
    }
  }
}

◆ reject_blanks()

void tesseract::reject_blanks ( WERD_RES * word )

Definition at line 182 of file reject.cpp.

                                   {
  int16_t i;
  int16_t offset;
 
  for (i = 0, offset = 0; word->best_choice->unichar_string()[offset] != '\0';
       offset += word->best_choice->unichar_lengths()[i], i += 1) {
    if (word->best_choice->unichar_string()[offset] == ' ') {
      // rej unrecognised blobs
      word->reject_map[i].setrej_tess_failure();
    }
  }
}

◆ reject_poor_matches()

void tesseract::reject_poor_matches ( WERD_RES * word )

Definition at line 208 of file reject.cpp.

                                         {
  float threshold = compute_reject_threshold(word->best_choice);
  for (unsigned i = 0; i < word->best_choice->length(); ++i) {
    if (word->best_choice->unichar_id(i) == UNICHAR_SPACE) {
      word->reject_map[i].setrej_tess_failure();
    } else if (word->best_choice->certainty(i) < threshold) {
      word->reject_map[i].setrej_poor_match();
    }
  }
}

◆ reject_whole_page()

void tesseract::reject_whole_page ( PAGE_RES_IT & page_res_it )

Definition at line 363 of file docqual.cpp.

                                                 {
  page_res_it.restart_page();
  while (page_res_it.word() != nullptr) {
    page_res_it.word()->reject_map.rej_word_doc_rej();
    page_res_it.forward();
  }
  // whole page is rejected
  page_res_it.page_res->rejected = true;
}

◆ remove_edgept()

void tesseract::remove_edgept ( EDGEPT * point )

Definition at line 199 of file split.cpp.

                                  {
  EDGEPT *prev = point->prev;
  EDGEPT *next = point->next;
  // Add point's steps onto prev's steps if they are from the same outline.
  if (prev->src_outline == point->src_outline && prev->src_outline != nullptr) {
    prev->step_count += point->step_count;
  }
  prev->next = next;
  next->prev = prev;
  prev->vec.x = next->pos.x - prev->pos.x;
  prev->vec.y = next->pos.y - prev->pos.y;
  delete point;
}

◆ RemoveInsignificantProtos()

TESS_COMMON_TRAINING_API tesseract::LIST tesseract::RemoveInsignificantProtos	(	LIST	ProtoList,
		bool	KeepSigProtos,
		bool	KeepInsigProtos,
		int	N
	)

Definition at line 544 of file commontraining.cpp.

{
  LIST NewProtoList = NIL_LIST;
  auto pProtoList = ProtoList;
  iterate(pProtoList) {
    auto Proto = reinterpret_cast<PROTOTYPE *>(pProtoList->first_node());
    if ((Proto->Significant && KeepSigProtos) || (!Proto->Significant && KeepInsigProtos)) {
      auto NewProto = new PROTOTYPE;
      NewProto->Mean = Proto->Mean;
      NewProto->Significant = Proto->Significant;
      NewProto->Style = Proto->Style;
      NewProto->NumSamples = Proto->NumSamples;
      NewProto->Cluster = nullptr;
      NewProto->Distrib.clear();
 
      if (Proto->Variance.Elliptical != nullptr) {
        NewProto->Variance.Elliptical = new float[N];
        for (int i = 0; i < N; i++) {
          NewProto->Variance.Elliptical[i] = Proto->Variance.Elliptical[i];
        }
      } else {
        NewProto->Variance.Elliptical = nullptr;
      }
      //---------------------------------------------
      if (Proto->Magnitude.Elliptical != nullptr) {
        NewProto->Magnitude.Elliptical = new float[N];
        for (int i = 0; i < N; i++) {
          NewProto->Magnitude.Elliptical[i] = Proto->Magnitude.Elliptical[i];
        }
      } else {
        NewProto->Magnitude.Elliptical = nullptr;
      }
      //------------------------------------------------
      if (Proto->Weight.Elliptical != nullptr) {
        NewProto->Weight.Elliptical = new float[N];
        for (int i = 0; i < N; i++) {
          NewProto->Weight.Elliptical[i] = Proto->Weight.Elliptical[i];
        }
      } else {
        NewProto->Weight.Elliptical = nullptr;
      }
 
      NewProto->TotalMagnitude = Proto->TotalMagnitude;
      NewProto->LogMagnitude = Proto->LogMagnitude;
      NewProtoList = push_last(NewProtoList, NewProto);
    }
  }
  FreeProtoList(&ProtoList);
  return (NewProtoList);
} /* RemoveInsignificantProtos */

◆ render_blob()

void tesseract::render_blob	(	ScrollView *	window,
		TBLOB *	blob,
		ScrollView::Color	color
	)

Definition at line 71 of file render.cpp.

                                                                         {
  /* No outline */
  if (!blob) {
    return;
  }
 
  render_outline(window, blob->outlines, color);
}

◆ render_edgepts()

void tesseract::render_edgepts	(	ScrollView *	window,
		EDGEPT *	edgept,
		ScrollView::Color	color
	)

Definition at line 86 of file render.cpp.

                                                                               {
  if (!edgept) {
    return;
  }
 
  float x = edgept->pos.x;
  float y = edgept->pos.y;
  EDGEPT *this_edge = edgept;
 
  window->Pen(color);
  window->SetCursor(x, y);
  do {
    this_edge = this_edge->next;
    x = this_edge->pos.x;
    y = this_edge->pos.y;
    window->DrawTo(x, y);
  } while (edgept != this_edge);
}

◆ render_outline()

void tesseract::render_outline	(	ScrollView *	window,
		TESSLINE *	outline,
		ScrollView::Color	color
	)

Definition at line 111 of file render.cpp.

                                                                                  {
  /* No outline */
  if (!outline) {
    return;
  }
  /* Draw Compact outline */
  if (outline->loop) {
    render_edgepts(window, outline->loop, color);
  }
  /* Add on next outlines */
  render_outline(window, outline->next, color);
}

◆ RenderIntFeature()

TESS_API void tesseract::RenderIntFeature	(	ScrollView *	window,
		const INT_FEATURE_STRUCT *	Feature,
		ScrollView::Color	color
	)

This routine renders the specified feature into ShapeList.

Parameters

window	to add feature rendering to
Feature	feature to be rendered
color	color to use for feature rendering

Returns: New shape list with rendering of Feature added.

Note: Globals: none

Definition at line 1500 of file intproto.cpp.

                                             {
  float X, Y, Dx, Dy, Length;
 
  window->Pen(color);
  assert(Feature != nullptr);
  assert(color != 0);
 
  X = Feature->X;
  Y = Feature->Y;
  Length = GetPicoFeatureLength() * 0.7 * INT_CHAR_NORM_RANGE;
  // The -PI has no significant effect here, but the value of Theta is computed
  // using BinaryAnglePlusPi in intfx.cpp.
  Dx = (Length / 2.0) * cos((Feature->Theta / 256.0) * 2.0 * M_PI - M_PI);
  Dy = (Length / 2.0) * sin((Feature->Theta / 256.0) * 2.0 * M_PI - M_PI);
 
  window->SetCursor(X, Y);
  window->DrawTo(X + Dx, Y + Dy);
} /* RenderIntFeature */

◆ RenderIntProto()

void tesseract::RenderIntProto	(	ScrollView *	window,
		INT_CLASS_STRUCT *	Class,
		PROTO_ID	ProtoId,
		ScrollView::Color	color
	)

This routine extracts the parameters of the specified proto from the class description and adds a rendering of the proto onto the ShapeList.

Parameters

window	ScrollView instance
Class	class that proto is contained in
ProtoId	id of proto to be rendered
color	color to render proto in

Globals: none

Returns: New shape list with a rendering of one proto added.

Definition at line 1534 of file intproto.cpp.

                                           {
  INT_PROTO_STRUCT *Proto;
  int ProtoSetIndex;
  int ProtoWordIndex;
  float Length;
  int Xmin, Xmax, Ymin, Ymax;
  float X, Y, Dx, Dy;
  uint32_t ProtoMask;
  int Bucket;
 
  assert(ProtoId >= 0);
  assert(Class != nullptr);
  assert(ProtoId < Class->NumProtos);
  assert(color != 0);
  window->Pen(color);
 
  auto ProtoSet = Class->ProtoSets[SetForProto(ProtoId)];
  ProtoSetIndex = IndexForProto(ProtoId);
  Proto = &(ProtoSet->Protos[ProtoSetIndex]);
  Length = (Class->ProtoLengths[ProtoId] * GetPicoFeatureLength() * INT_CHAR_NORM_RANGE);
  ProtoMask = PPrunerMaskFor(ProtoId);
  ProtoWordIndex = PPrunerWordIndexFor(ProtoId);
 
  // find the x and y extent of the proto from the proto pruning table
  Xmin = Ymin = NUM_PP_BUCKETS;
  Xmax = Ymax = 0;
  for (Bucket = 0; Bucket < NUM_PP_BUCKETS; Bucket++) {
    if (ProtoMask & ProtoSet->ProtoPruner[PRUNER_X][Bucket][ProtoWordIndex]) {
      UpdateRange(Bucket, &Xmin, &Xmax);
    }
 
    if (ProtoMask & ProtoSet->ProtoPruner[PRUNER_Y][Bucket][ProtoWordIndex]) {
      UpdateRange(Bucket, &Ymin, &Ymax);
    }
  }
  X = (Xmin + Xmax + 1) / 2.0 * PROTO_PRUNER_SCALE;
  Y = (Ymin + Ymax + 1) / 2.0 * PROTO_PRUNER_SCALE;
  // The -PI has no significant effect here, but the value of Theta is computed
  // using BinaryAnglePlusPi in intfx.cpp.
  Dx = (Length / 2.0) * cos((Proto->Angle / 256.0) * 2.0 * M_PI - M_PI);
  Dy = (Length / 2.0) * sin((Proto->Angle / 256.0) * 2.0 * M_PI - M_PI);
 
  window->SetCursor(X - Dx, Y - Dy);
  window->DrawTo(X + Dx, Y + Dy);
} /* RenderIntProto */

◆ restore_underlined_blobs()

void tesseract::restore_underlined_blobs ( TO_BLOCK * block )

Definition at line 32 of file underlin.cpp.

  {
  int16_t chop_coord;        // chop boundary
  TBOX blob_box;             // of underline
  BLOBNBOX *u_line;          // underline bit
  TO_ROW *row;               // best row for blob
  ICOORDELT_LIST chop_cells; // blobs to cut out
                             // real underlines
  BLOBNBOX_LIST residual_underlines;
  C_OUTLINE_LIST left_coutlines;
  C_OUTLINE_LIST right_coutlines;
  ICOORDELT_IT cell_it = &chop_cells;
  // under lines
  BLOBNBOX_IT under_it = &block->underlines;
  BLOBNBOX_IT ru_it = &residual_underlines;
 
  if (block->get_rows()->empty()) {
    return; // Don't crash if there are no rows.
  }
  for (under_it.mark_cycle_pt(); !under_it.cycled_list(); under_it.forward()) {
    u_line = under_it.extract();
    blob_box = u_line->bounding_box();
    row = most_overlapping_row(block->get_rows(), u_line);
    if (row == nullptr) {
      return; // Don't crash if there is no row.
    }
    find_underlined_blobs(u_line, &row->baseline, row->xheight,
                          row->xheight * textord_underline_offset, &chop_cells);
    cell_it.set_to_list(&chop_cells);
    for (cell_it.mark_cycle_pt(); !cell_it.cycled_list(); cell_it.forward()) {
      chop_coord = cell_it.data()->x();
      if (cell_it.data()->y() - chop_coord > textord_fp_chop_error + 1) {
        split_to_blob(u_line, chop_coord, textord_fp_chop_error + 0.5, &left_coutlines,
                      &right_coutlines);
        if (!left_coutlines.empty()) {
          ru_it.add_after_then_move(new BLOBNBOX(new C_BLOB(&left_coutlines)));
        }
        chop_coord = cell_it.data()->y();
        split_to_blob(nullptr, chop_coord, textord_fp_chop_error + 0.5, &left_coutlines,
                      &right_coutlines);
        if (!left_coutlines.empty()) {
          row->insert_blob(new BLOBNBOX(new C_BLOB(&left_coutlines)));
        }
        u_line = nullptr; // no more blobs to add
      }
      delete cell_it.extract();
    }
    if (!right_coutlines.empty()) {
      split_to_blob(nullptr, blob_box.right(), textord_fp_chop_error + 0.5, &left_coutlines,
                    &right_coutlines);
      if (!left_coutlines.empty()) {
        ru_it.add_after_then_move(new BLOBNBOX(new C_BLOB(&left_coutlines)));
      }
    }
    delete u_line;
  }
  if (!ru_it.empty()) {
    ru_it.move_to_first();
    for (ru_it.mark_cycle_pt(); !ru_it.cycled_list(); ru_it.forward()) {
      under_it.add_after_then_move(ru_it.extract());
    }
  }
}

◆ Reverse32()

void tesseract::Reverse32 ( void * ptr )

inline

Definition at line 196 of file helpers.h.

                                 {
  ReverseN(ptr, 4);
}

◆ ReverseN()

void tesseract::ReverseN	(	void *	ptr,
		int	num_bytes
	)

inline

Definition at line 184 of file helpers.h.

                                               {
  assert(num_bytes == 1 || num_bytes == 2 || num_bytes == 4 || num_bytes == 8);
  char *cptr = static_cast<char *>(ptr);
  int halfsize = num_bytes / 2;
  for (int i = 0; i < halfsize; ++i) {
    char tmp = cptr[i];
    cptr[i] = cptr[num_bytes - 1 - i];
    cptr[num_bytes - 1 - i] = tmp;
  }
}

◆ RightWordAttributes()

TESS_API void tesseract::RightWordAttributes	(	const UNICHARSET *	unicharset,
		const WERD_CHOICE *	werd,
		const std::string &	utf8,
		bool *	is_list,
		bool *	starts_idea,
		bool *	ends_idea
	)

Definition at line 477 of file paragraphs.cpp.

                                                                            {
  *is_list = false;
  *starts_idea = false;
  *ends_idea = false;
  if (utf8.empty() || (werd != nullptr && werd->empty())) { // Empty
    *ends_idea = true;
    return;
  }
 
  if (unicharset && werd) { // We have a proper werd and unicharset so use it.
    if (UniLikelyListItem(unicharset, werd)) {
      *is_list = true;
      *starts_idea = true;
    }
    UNICHAR_ID last_letter = werd->unichar_id(werd->length() - 1);
    if (unicharset->get_ispunctuation(last_letter)) {
      *ends_idea = true;
    }
  } else { // Assume utf8 is mostly ASCII
    if (AsciiLikelyListItem(utf8)) {
      *is_list = true;
      *starts_idea = true;
    }
    int last_letter = utf8[utf8.size() - 1];
    if (IsOpeningPunct(last_letter) || IsTerminalPunct(last_letter)) {
      *ends_idea = true;
    }
  }
}

◆ RoundUp()

int tesseract::RoundUp	(	int	n,
		int	block_size
	)

inline

Definition at line 99 of file helpers.h.

                                          {
  return block_size * ((n + block_size - 1) / block_size);
}

◆ row_pitch_stats()

bool tesseract::row_pitch_stats	(	TO_ROW *	row,
		int32_t	maxwidth,
		bool	testing_on
	)

Definition at line 648 of file topitch.cpp.

  {
  BLOBNBOX *blob;        // current blob
  int gap_index;         // current gap
  int32_t prev_x;        // end of prev blob
  int32_t cluster_count; // no of clusters
  int32_t prev_count;    // of clusters
  int32_t smooth_factor; // for smoothing stats
  TBOX blob_box;         // bounding box
  float lower, upper;    // cluster thresholds
                         // gap sizes
  float gaps[BLOCK_STATS_CLUSTERS];
  // blobs
  BLOBNBOX_IT blob_it = row->blob_list();
  STATS gap_stats(0, maxwidth - 1);
  STATS cluster_stats[BLOCK_STATS_CLUSTERS + 1];
  // clusters
 
  smooth_factor = static_cast<int32_t>(row->xheight * textord_wordstats_smooth_factor + 1.5);
  if (!blob_it.empty()) {
    prev_x = blob_it.data()->bounding_box().right();
    blob_it.forward();
    while (!blob_it.at_first()) {
      blob = blob_it.data();
      if (!blob->joined_to_prev()) {
        blob_box = blob->bounding_box();
        if (blob_box.left() - prev_x < maxwidth) {
          gap_stats.add(blob_box.left() - prev_x, 1);
        }
        prev_x = blob_box.right();
      }
      blob_it.forward();
    }
  }
  if (gap_stats.get_total() == 0) {
    return false;
  }
  cluster_count = 0;
  lower = row->xheight * words_initial_lower;
  upper = row->xheight * words_initial_upper;
  gap_stats.smooth(smooth_factor);
  do {
    prev_count = cluster_count;
    cluster_count = gap_stats.cluster(lower, upper, textord_spacesize_ratioprop,
                                      BLOCK_STATS_CLUSTERS, cluster_stats);
  } while (cluster_count > prev_count && cluster_count < BLOCK_STATS_CLUSTERS);
  if (cluster_count < 1) {
    return false;
  }
  for (gap_index = 0; gap_index < cluster_count; gap_index++) {
    gaps[gap_index] = cluster_stats[gap_index + 1].ile(0.5);
  }
  // get medians
  if (testing_on) {
    tprintf("cluster_count=%d:", cluster_count);
    for (gap_index = 0; gap_index < cluster_count; gap_index++) {
      tprintf(" %g(%d)", gaps[gap_index], cluster_stats[gap_index + 1].get_total());
    }
    tprintf("\n");
  }
  qsort(gaps, cluster_count, sizeof(float), sort_floats);
 
  // Try to find proportional non-space and space for row.
  lower = row->xheight * words_default_prop_nonspace;
  upper = row->xheight * textord_words_min_minspace;
  for (gap_index = 0; gap_index < cluster_count && gaps[gap_index] < lower; gap_index++) {
    ;
  }
  if (gap_index == 0) {
    if (testing_on) {
      tprintf("No clusters below nonspace threshold!!\n");
    }
    if (cluster_count > 1) {
      row->pr_nonsp = gaps[0];
      row->pr_space = gaps[1];
    } else {
      row->pr_nonsp = lower;
      row->pr_space = gaps[0];
    }
  } else {
    row->pr_nonsp = gaps[gap_index - 1];
    while (gap_index < cluster_count && gaps[gap_index] < upper) {
      gap_index++;
    }
    if (gap_index == cluster_count) {
      if (testing_on) {
        tprintf("No clusters above nonspace threshold!!\n");
      }
      row->pr_space = lower * textord_spacesize_ratioprop;
    } else {
      row->pr_space = gaps[gap_index];
    }
  }
 
  // Now try to find the fixed pitch space and non-space.
  upper = row->xheight * words_default_fixed_space;
  for (gap_index = 0; gap_index < cluster_count && gaps[gap_index] < upper; gap_index++) {
    ;
  }
  if (gap_index == 0) {
    if (testing_on) {
      tprintf("No clusters below space threshold!!\n");
    }
    row->fp_nonsp = upper;
    row->fp_space = gaps[0];
  } else {
    row->fp_nonsp = gaps[gap_index - 1];
    if (gap_index == cluster_count) {
      if (testing_on) {
        tprintf("No clusters above space threshold!!\n");
      }
      row->fp_space = row->xheight;
    } else {
      row->fp_space = gaps[gap_index];
    }
  }
  if (testing_on) {
    tprintf(
        "Initial estimates:pr_nonsp=%g, pr_space=%g, fp_nonsp=%g, "
        "fp_space=%g\n",
        row->pr_nonsp, row->pr_space, row->fp_nonsp, row->fp_space);
  }
  return true; // computed some stats
}

◆ row_words()

int32_t tesseract::row_words	(	TO_BLOCK *	block,
		TO_ROW *	row,
		int32_t	maxwidth,
		FCOORD	rotation,
		bool	testing_on
	)

Definition at line 168 of file wordseg.cpp.

  {
  bool testing_row;      // contains testpt
  bool prev_valid;       // if decent size
  int32_t prev_x;        // end of prev blob
  int32_t cluster_count; // no of clusters
  int32_t gap_index;     // which cluster
  int32_t smooth_factor; // for smoothing stats
  BLOBNBOX *blob;        // current blob
  float lower, upper;    // clustering parameters
  float gaps[3];         // gap clusers
  ICOORD testpt;
  TBOX blob_box; // bounding box
                 // iterator
  BLOBNBOX_IT blob_it = row->blob_list();
  STATS gap_stats(0, maxwidth - 1);
  STATS cluster_stats[4]; // clusters
 
  testpt = ICOORD(textord_test_x, textord_test_y);
  smooth_factor = static_cast<int32_t>(block->xheight * textord_wordstats_smooth_factor + 1.5);
  //      if (testing_on)
  //              tprintf("Row smooth factor=%d\n",smooth_factor);
  prev_valid = false;
  prev_x = -INT32_MAX;
  testing_row = false;
  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
    blob = blob_it.data();
    blob_box = blob->bounding_box();
    if (blob_box.contains(testpt)) {
      testing_row = true;
    }
    gap_stats.add(blob_box.width(), 1);
  }
  gap_stats.clear();
  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
    blob = blob_it.data();
    if (!blob->joined_to_prev()) {
      blob_box = blob->bounding_box();
      if (prev_valid && blob_box.left() - prev_x < maxwidth) {
        gap_stats.add(blob_box.left() - prev_x, 1);
      }
      prev_valid = true;
      prev_x = blob_box.right();
    }
  }
  if (gap_stats.get_total() == 0) {
    row->min_space = 0; // no evidence
    row->max_nonspace = 0;
    return 0;
  }
  gap_stats.smooth(smooth_factor);
  lower = row->xheight * textord_words_initial_lower;
  upper = row->xheight * textord_words_initial_upper;
  cluster_count = gap_stats.cluster(lower, upper, textord_spacesize_ratioprop, 3, cluster_stats);
  while (cluster_count < 2 && std::ceil(lower) < std::floor(upper)) {
    // shrink gap
    upper = (upper * 3 + lower) / 4;
    lower = (lower * 3 + upper) / 4;
    cluster_count = gap_stats.cluster(lower, upper, textord_spacesize_ratioprop, 3, cluster_stats);
  }
  if (cluster_count < 2) {
    row->min_space = 0; // no evidence
    row->max_nonspace = 0;
    return 0;
  }
  for (gap_index = 0; gap_index < cluster_count; gap_index++) {
    gaps[gap_index] = cluster_stats[gap_index + 1].ile(0.5);
  }
  // get medians
  if (cluster_count > 2) {
    if (testing_on && textord_show_initial_words) {
      tprintf("Row at %g has 3 sizes of gap:%g,%g,%g\n", row->intercept(),
              cluster_stats[1].ile(0.5), cluster_stats[2].ile(0.5), cluster_stats[3].ile(0.5));
    }
    lower = gaps[0];
    if (gaps[1] > lower) {
      upper = gaps[1]; // prefer most frequent
      if (upper < block->xheight * textord_words_min_minspace && gaps[2] > gaps[1]) {
        upper = gaps[2];
      }
    } else if (gaps[2] > lower && gaps[2] >= block->xheight * textord_words_min_minspace) {
      upper = gaps[2];
    } else if (lower >= block->xheight * textord_words_min_minspace) {
      upper = lower; // not nice
      lower = gaps[1];
      if (testing_on && textord_show_initial_words) {
        tprintf("Had to switch most common from lower to upper!!\n");
        gap_stats.print();
      }
    } else {
      row->min_space = 0; // no evidence
      row->max_nonspace = 0;
      return 0;
    }
  } else {
    if (gaps[1] < gaps[0]) {
      if (testing_on && textord_show_initial_words) {
        tprintf("Had to switch most common from lower to upper!!\n");
        gap_stats.print();
      }
      lower = gaps[1];
      upper = gaps[0];
    } else {
      upper = gaps[1];
      lower = gaps[0];
    }
  }
  if (upper < block->xheight * textord_words_min_minspace) {
    row->min_space = 0; // no evidence
    row->max_nonspace = 0;
    return 0;
  }
  if (upper * 3 < block->min_space * 2 + block->max_nonspace ||
      lower * 3 > block->min_space * 2 + block->max_nonspace) {
    if (testing_on && textord_show_initial_words) {
      tprintf("Disagreement between block and row at %g!!\n", row->intercept());
      tprintf("Lower=%g, upper=%g, Stats:\n", lower, upper);
      gap_stats.print();
    }
  }
  row->min_space =
      static_cast<int32_t>(ceil(upper - (upper - lower) * textord_words_definite_spread));
  row->max_nonspace =
      static_cast<int32_t>(floor(lower + (upper - lower) * textord_words_definite_spread));
  row->space_threshold = (row->max_nonspace + row->min_space) / 2;
  row->space_size = upper;
  row->kern_size = lower;
  if (testing_on && textord_show_initial_words) {
    if (testing_row) {
      tprintf("GAP STATS\n");
      gap_stats.print();
      tprintf("SPACE stats\n");
      cluster_stats[2].print_summary();
      tprintf("NONSPACE stats\n");
      cluster_stats[1].print_summary();
    }
    tprintf("Row at %g has minspace=%d(%g), max_non=%d(%g)\n", row->intercept(), row->min_space,
            upper, row->max_nonspace, lower);
  }
  return cluster_stats[2].get_total();
}

◆ row_words2()

int32_t tesseract::row_words2	(	TO_BLOCK *	block,
		TO_ROW *	row,
		int32_t	maxwidth,
		FCOORD	rotation,
		bool	testing_on
	)

Definition at line 321 of file wordseg.cpp.

  {
  bool prev_valid;       // if decent size
  bool this_valid;       // current blob big enough
  int32_t prev_x;        // end of prev blob
  int32_t min_width;     // min interesting width
  int32_t valid_count;   // good gaps
  int32_t total_count;   // total gaps
  int32_t cluster_count; // no of clusters
  int32_t prev_count;    // previous cluster_count
  int32_t gap_index;     // which cluster
  int32_t smooth_factor; // for smoothing stats
  BLOBNBOX *blob;        // current blob
  float lower, upper;    // clustering parameters
  ICOORD testpt;
  TBOX blob_box; // bounding box
                 // iterator
  BLOBNBOX_IT blob_it = row->blob_list();
  STATS gap_stats(0, maxwidth - 1);
  // gap sizes
  float gaps[BLOCK_STATS_CLUSTERS];
  STATS cluster_stats[BLOCK_STATS_CLUSTERS + 1];
  // clusters
 
  testpt = ICOORD(textord_test_x, textord_test_y);
  smooth_factor = static_cast<int32_t>(block->xheight * textord_wordstats_smooth_factor + 1.5);
  //      if (testing_on)
  //              tprintf("Row smooth factor=%d\n",smooth_factor);
  prev_valid = false;
  prev_x = -INT16_MAX;
  const bool testing_row = false;
  // min blob size
  min_width = static_cast<int32_t>(block->pr_space);
  total_count = 0;
  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
    blob = blob_it.data();
    if (!blob->joined_to_prev()) {
      blob_box = blob->bounding_box();
      this_valid = blob_box.width() >= min_width;
      if (this_valid && prev_valid && blob_box.left() - prev_x < maxwidth) {
        gap_stats.add(blob_box.left() - prev_x, 1);
      }
      total_count++; // count possibles
      prev_x = blob_box.right();
      prev_valid = this_valid;
    }
  }
  valid_count = gap_stats.get_total();
  if (valid_count < total_count * textord_words_minlarge) {
    gap_stats.clear();
    prev_x = -INT16_MAX;
    for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
      blob = blob_it.data();
      if (!blob->joined_to_prev()) {
        blob_box = blob->bounding_box();
        if (blob_box.left() - prev_x < maxwidth) {
          gap_stats.add(blob_box.left() - prev_x, 1);
        }
        prev_x = blob_box.right();
      }
    }
  }
  if (gap_stats.get_total() == 0) {
    row->min_space = 0; // no evidence
    row->max_nonspace = 0;
    return 0;
  }
 
  cluster_count = 0;
  lower = block->xheight * words_initial_lower;
  upper = block->xheight * words_initial_upper;
  gap_stats.smooth(smooth_factor);
  do {
    prev_count = cluster_count;
    cluster_count = gap_stats.cluster(lower, upper, textord_spacesize_ratioprop,
                                      BLOCK_STATS_CLUSTERS, cluster_stats);
  } while (cluster_count > prev_count && cluster_count < BLOCK_STATS_CLUSTERS);
  if (cluster_count < 1) {
    row->min_space = 0;
    row->max_nonspace = 0;
    return 0;
  }
  for (gap_index = 0; gap_index < cluster_count; gap_index++) {
    gaps[gap_index] = cluster_stats[gap_index + 1].ile(0.5);
  }
  // get medians
  if (testing_on) {
    tprintf("cluster_count=%d:", cluster_count);
    for (gap_index = 0; gap_index < cluster_count; gap_index++) {
      tprintf(" %g(%d)", gaps[gap_index], cluster_stats[gap_index + 1].get_total());
    }
    tprintf("\n");
  }
 
  // Try to find proportional non-space and space for row.
  for (gap_index = 0; gap_index < cluster_count && gaps[gap_index] > block->max_nonspace;
       gap_index++) {
    ;
  }
  if (gap_index < cluster_count) {
    lower = gaps[gap_index]; // most frequent below
  } else {
    if (testing_on) {
      tprintf("No cluster below block threshold!, using default=%g\n", block->pr_nonsp);
    }
    lower = block->pr_nonsp;
  }
  for (gap_index = 0; gap_index < cluster_count && gaps[gap_index] <= block->max_nonspace;
       gap_index++) {
    ;
  }
  if (gap_index < cluster_count) {
    upper = gaps[gap_index]; // most frequent above
  } else {
    if (testing_on) {
      tprintf("No cluster above block threshold!, using default=%g\n", block->pr_space);
    }
    upper = block->pr_space;
  }
  row->min_space =
      static_cast<int32_t>(ceil(upper - (upper - lower) * textord_words_definite_spread));
  row->max_nonspace =
      static_cast<int32_t>(floor(lower + (upper - lower) * textord_words_definite_spread));
  row->space_threshold = (row->max_nonspace + row->min_space) / 2;
  row->space_size = upper;
  row->kern_size = lower;
  if (testing_on) {
    if (testing_row) {
      tprintf("GAP STATS\n");
      gap_stats.print();
      tprintf("SPACE stats\n");
      cluster_stats[2].print_summary();
      tprintf("NONSPACE stats\n");
      cluster_stats[1].print_summary();
    }
    tprintf("Row at %g has minspace=%d(%g), max_non=%d(%g)\n", row->intercept(), row->min_space,
            upper, row->max_nonspace, lower);
  }
  return 1;
}

◆ RowsFitModel()

bool tesseract::RowsFitModel	(	const std::vector< RowScratchRegisters > *	rows,
		int	start,
		int	end,
		const ParagraphModel *	model
	)

Definition at line 1859 of file paragraphs.cpp.

                                               {
  if (!AcceptableRowArgs(0, 1, __func__, rows, start, end)) {
    return false;
  }
  if (!ValidFirstLine(rows, start, model)) {
    return false;
  }
  for (int i = start + 1; i < end; i++) {
    if (!ValidBodyLine(rows, i, model)) {
      return false;
    }
  }
  return true;
}

◆ SaveDataToFile() [1/2]

bool tesseract::SaveDataToFile	(	const GenericVector< char > &	data,
		const char *	filename
	)

inline

Definition at line 254 of file genericvector.h.

                                                                                  {
  FILE *fp = fopen(filename, "wb");
  if (fp == nullptr) {
    return false;
  }
  bool result = fwrite(&data[0], 1, data.size(), fp) == data.size();
  fclose(fp);
  return result;
}

◆ SaveDataToFile() [2/2]

TESS_API bool tesseract::SaveDataToFile	(	const std::vector< char > &	data,
		const char *	filename
	)

Definition at line 53 of file serialis.cpp.

                                                                       {
  FILE *fp = fopen(filename, "wb");
  if (fp == nullptr) {
    return false;
  }
  bool result = fwrite(&data[0], 1, data.size(), fp) == data.size();
  fclose(fp);
  return result;
}

◆ ScriptPosToString()

const char * tesseract::ScriptPosToString ( enum ScriptPos script_pos )

Definition at line 193 of file ratngs.cpp.

                                                         {
  switch (script_pos) {
    case SP_NORMAL:
      return "NORM";
    case SP_SUBSCRIPT:
      return "SUB";
    case SP_SUPERSCRIPT:
      return "SUPER";
    case SP_DROPCAP:
      return "DROPC";
  }
  return "SP_UNKNOWN";
}

◆ search()

LIST tesseract::search	(	LIST	list,
		void *	key,
		int_compare	is_equal
	)

Definition at line 211 of file oldlist.cpp.

                                                        {
  if (is_equal == nullptr) {
    is_equal = is_same;
  }
 
  iterate(list) if ((*is_equal)(list->first_node(), key)) return list;
  return (NIL_LIST);
}

◆ segment_baseline()

bool tesseract::segment_baseline	(	TO_ROW *	row,
		TO_BLOCK *	block,
		int32_t &	segments,
		int32_t *	xstarts
	)

Definition at line 2083 of file makerow.cpp.

  {
  bool needs_curve; // needs curved line
  int blobcount;    // no of blobs
  int blobindex;    // current blob
  int last_state;   // above, on , below
  int state;        // of current blob
  float yshift;     // from baseline
  TBOX box;         // blob box
  TBOX new_box;     // new_it box
  float middle;     // xcentre of blob
                    // blobs
  BLOBNBOX_IT blob_it = row->blob_list();
  BLOBNBOX_IT new_it = blob_it; // front end
  SORTED_FLOATS yshifts;        // shifts from baseline
 
  needs_curve = false;
  box = box_next_pre_chopped(&blob_it);
  xstarts[0] = box.left();
  segments = 1;
  blobcount = row->blob_list()->length();
  if (textord_oldbl_debug) {
    tprintf("Segmenting baseline of %d blobs at (%d,%d)\n", blobcount, box.left(), box.bottom());
  }
  if (blobcount <= textord_spline_medianwin || blobcount < textord_spline_minblobs) {
    blob_it.move_to_last();
    box = blob_it.data()->bounding_box();
    xstarts[1] = box.right();
    return false;
  }
  last_state = 0;
  new_it.mark_cycle_pt();
  for (blobindex = 0; blobindex < textord_spline_medianwin; blobindex++) {
    new_box = box_next_pre_chopped(&new_it);
    middle = (new_box.left() + new_box.right()) / 2.0;
    yshift = new_box.bottom() - row->line_m() * middle - row->line_c();
    // record shift
    yshifts.add(yshift, blobindex);
    if (new_it.cycled_list()) {
      xstarts[1] = new_box.right();
      return false;
    }
  }
  for (blobcount = 0; blobcount < textord_spline_medianwin / 2; blobcount++) {
    box = box_next_pre_chopped(&blob_it);
  }
  do {
    new_box = box_next_pre_chopped(&new_it);
    // get middle one
    yshift = yshifts[textord_spline_medianwin / 2];
    if (yshift > textord_spline_shift_fraction * block->line_size) {
      state = 1;
    } else if (-yshift > textord_spline_shift_fraction * block->line_size) {
      state = -1;
    } else {
      state = 0;
    }
    if (state != 0) {
      needs_curve = true;
    }
    //              tprintf("State=%d, prev=%d, shift=%g\n",
    //                      state,last_state,yshift);
    if (state != last_state && blobcount > textord_spline_minblobs) {
      xstarts[segments++] = box.left();
      blobcount = 0;
    }
    last_state = state;
    yshifts.remove(blobindex - textord_spline_medianwin);
    box = box_next_pre_chopped(&blob_it);
    middle = (new_box.left() + new_box.right()) / 2.0;
    yshift = new_box.bottom() - row->line_m() * middle - row->line_c();
    yshifts.add(yshift, blobindex);
    blobindex++;
    blobcount++;
  } while (!new_it.cycled_list());
  if (blobcount > textord_spline_minblobs || segments == 1) {
    xstarts[segments] = new_box.right();
  } else {
    xstarts[--segments] = new_box.right();
  }
  if (textord_oldbl_debug) {
    tprintf("Made %d segments on row at (%d,%d)\n", segments, box.right(), box.bottom());
  }
  return needs_curve;
}

◆ segment_spline()

int tesseract::segment_spline	(	TBOX	blobcoords[],
		int	blobcount,
		int	xcoords[],
		int	ycoords[],
		int	degree,
		int	pointcount,
		int	xstarts[]
	)

Definition at line 1006 of file oldbasel.cpp.

  {
  int ptindex;                /*no along text line */
  int segment;                /*partition no */
  int lastmin, lastmax;       /*possible turn points */
  int turnpoints[SPLINESIZE]; /*good turning points */
  int turncount;              /*no of turning points */
  int max_x;                  // max specified coord
 
  xstarts[0] = xcoords[0] - 1; // leftmost defined pt
  max_x = xcoords[pointcount - 1] + 1;
  if (degree < 2) {
    pointcount = 0;
  }
  turncount = 0; /*no turning points yet */
  if (pointcount > 3) {
    ptindex = 1;
    lastmax = lastmin = 0; /*start with first one */
    while (ptindex < pointcount - 1 && turncount < SPLINESIZE - 1) {
      /*minimum */
      if (ycoords[ptindex - 1] > ycoords[ptindex] && ycoords[ptindex] <= ycoords[ptindex + 1]) {
        if (ycoords[ptindex] < ycoords[lastmax] - TURNLIMIT) {
          if (turncount == 0 || turnpoints[turncount - 1] != lastmax) {
            /*new max point */
            turnpoints[turncount++] = lastmax;
          }
          lastmin = ptindex; /*latest minimum */
        } else if (ycoords[ptindex] < ycoords[lastmin]) {
          lastmin = ptindex; /*lower minimum */
        }
      }
 
      /*maximum */
      if (ycoords[ptindex - 1] < ycoords[ptindex] && ycoords[ptindex] >= ycoords[ptindex + 1]) {
        if (ycoords[ptindex] > ycoords[lastmin] + TURNLIMIT) {
          if (turncount == 0 || turnpoints[turncount - 1] != lastmin) {
            /*new min point */
            turnpoints[turncount++] = lastmin;
          }
          lastmax = ptindex; /*latest maximum */
        } else if (ycoords[ptindex] > ycoords[lastmax]) {
          lastmax = ptindex; /*higher maximum */
        }
      }
      ptindex++;
    }
    /*possible global min */
    if (ycoords[ptindex] < ycoords[lastmax] - TURNLIMIT &&
        (turncount == 0 || turnpoints[turncount - 1] != lastmax)) {
      if (turncount < SPLINESIZE - 1) {
        /*2 more turns */
        turnpoints[turncount++] = lastmax;
      }
      if (turncount < SPLINESIZE - 1) {
        turnpoints[turncount++] = ptindex;
      }
    } else if (ycoords[ptindex] > ycoords[lastmin] + TURNLIMIT
               /*possible global max */
               && (turncount == 0 || turnpoints[turncount - 1] != lastmin)) {
      if (turncount < SPLINESIZE - 1) {
        /*2 more turns */
        turnpoints[turncount++] = lastmin;
      }
      if (turncount < SPLINESIZE - 1) {
        turnpoints[turncount++] = ptindex;
      }
    } else if (turncount > 0 && turnpoints[turncount - 1] == lastmin &&
               turncount < SPLINESIZE - 1) {
      if (ycoords[ptindex] > ycoords[lastmax]) {
        turnpoints[turncount++] = ptindex;
      } else {
        turnpoints[turncount++] = lastmax;
      }
    } else if (turncount > 0 && turnpoints[turncount - 1] == lastmax &&
               turncount < SPLINESIZE - 1) {
      if (ycoords[ptindex] < ycoords[lastmin]) {
        turnpoints[turncount++] = ptindex;
      } else {
        turnpoints[turncount++] = lastmin;
      }
    }
  }
 
  if (textord_oldbl_debug && turncount > 0) {
    tprintf("First turn is %d at (%d,%d)\n", turnpoints[0], xcoords[turnpoints[0]],
            ycoords[turnpoints[0]]);
  }
  for (segment = 1; segment < turncount; segment++) {
    /*centre y coord */
    lastmax = (ycoords[turnpoints[segment - 1]] + ycoords[turnpoints[segment]]) / 2;
 
    /* fix alg so that it works with both rising and falling sections */
    if (ycoords[turnpoints[segment - 1]] < ycoords[turnpoints[segment]]) {
      /*find rising y centre */
      for (ptindex = turnpoints[segment - 1] + 1;
           ptindex < turnpoints[segment] && ycoords[ptindex + 1] <= lastmax; ptindex++) {
      }
    } else {
      /*find falling y centre */
      for (ptindex = turnpoints[segment - 1] + 1;
           ptindex < turnpoints[segment] && ycoords[ptindex + 1] >= lastmax; ptindex++) {
      }
    }
 
    /*centre x */
    xstarts[segment] = (xcoords[ptindex - 1] + xcoords[ptindex] + xcoords[turnpoints[segment - 1]] +
                        xcoords[turnpoints[segment]] + 2) /
                       4;
    /*halfway between turns */
    if (textord_oldbl_debug) {
      tprintf("Turn %d is %d at (%d,%d), mid pt is %d@%d, final @%d\n", segment,
              turnpoints[segment], xcoords[turnpoints[segment]], ycoords[turnpoints[segment]],
              ptindex - 1, xcoords[ptindex - 1], xstarts[segment]);
    }
  }
 
  xstarts[segment] = max_x;
  return segment; /*no of splines */
}

◆ separate_underlines()

void tesseract::separate_underlines	(	TO_BLOCK *	block,
		float	gradient,
		FCOORD	rotation,
		bool	testing_on
	)

Definition at line 1781 of file makerow.cpp.

                                          { // correct orientation
  BLOBNBOX *blob;                           // current blob
  C_BLOB *rotated_blob;                     // rotated blob
  TO_ROW *row;                              // current row
  float length;                             // of g_vec
  TBOX blob_box;
  FCOORD blob_rotation; // inverse of rotation
  FCOORD g_vec;         // skew rotation
  BLOBNBOX_IT blob_it;  // iterator
                        // iterator
  BLOBNBOX_IT under_it = &block->underlines;
  BLOBNBOX_IT large_it = &block->large_blobs;
  TO_ROW_IT row_it = block->get_rows();
  int min_blob_height = static_cast<int>(textord_min_blob_height_fraction * block->line_size + 0.5);
 
  // length of vector
  length = std::sqrt(1 + gradient * gradient);
  g_vec = FCOORD(1 / length, -gradient / length);
  blob_rotation = FCOORD(rotation.x(), -rotation.y());
  blob_rotation.rotate(g_vec); // undoing everything
  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
    row = row_it.data();
    // get blobs
    blob_it.set_to_list(row->blob_list());
    for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
      blob = blob_it.data();
      blob_box = blob->bounding_box();
      if (blob_box.width() > block->line_size * textord_underline_width) {
        ASSERT_HOST(blob->cblob() != nullptr);
        rotated_blob = crotate_cblob(blob->cblob(), blob_rotation);
        if (test_underline(testing_on && textord_show_final_rows, rotated_blob,
                           static_cast<int16_t>(row->intercept()),
                           static_cast<int16_t>(block->line_size *
                                                (tesseract::CCStruct::kXHeightFraction +
                                                 tesseract::CCStruct::kAscenderFraction / 2.0f)))) {
          under_it.add_after_then_move(blob_it.extract());
          if (testing_on && textord_show_final_rows) {
            tprintf("Underlined blob at:");
            rotated_blob->bounding_box().print();
            tprintf("Was:");
            blob_box.print();
          }
        } else if (CountOverlaps(blob->bounding_box(), min_blob_height, row->blob_list()) >
                   textord_max_blob_overlaps) {
          large_it.add_after_then_move(blob_it.extract());
          if (testing_on && textord_show_final_rows) {
            tprintf("Large blob overlaps %d blobs at:",
                    CountOverlaps(blob_box, min_blob_height, row->blob_list()));
            blob_box.print();
          }
        }
        delete rotated_blob;
      }
    }
  }
}

◆ Serialize() [1/2]

template<typename T >

bool tesseract::Serialize	(	FILE *	fp,
		const std::vector< T > &	data
	)

Definition at line 236 of file helpers.h.

                                                   {
  uint32_t size = data.size();
  if (fwrite(&size, sizeof(size), 1, fp) != 1) {
    return false;
  } else if constexpr (std::is_class<T>::value) {
    // Serialize a tesseract class.
    for (auto &item : data) {
      if (!item.Serialize(fp)) {
        return false;
      }
    }
  } else if constexpr (std::is_pointer<T>::value) {
    // Serialize pointers.
    for (auto &item : data) {
      uint8_t non_null = (item != nullptr);
      if (!Serialize(fp, &non_null)) {
        return false;
      }
      if (non_null) {
        if (!item->Serialize(fp)) {
          return false;
        }
      }
    }
  } else if (size > 0) {
    if (fwrite(&data[0], sizeof(T), size, fp) != size) {
      return false;
    }
  }
  return true;
}

◆ Serialize() [2/2]

template<typename T >

bool tesseract::Serialize	(	FILE *	fp,
		const T *	data,
		size_t	n = `1`
	)

Definition at line 55 of file serialis.h.

                                                      {
  return fwrite(data, sizeof(T), n, fp) == n;
}

◆ set_row_spaces()

void tesseract::set_row_spaces	(	TO_BLOCK *	block,
		FCOORD	rotation,
		bool	testing_on
	)

Definition at line 128 of file wordseg.cpp.

  {
  TO_ROW *row; // current row
  TO_ROW_IT row_it = block->get_rows();
 
  if (row_it.empty()) {
    return; // empty block
  }
  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
    row = row_it.data();
    if (row->fixed_pitch == 0) {
      row->min_space = static_cast<int32_t>(
          ceil(row->pr_space - (row->pr_space - row->pr_nonsp) * textord_words_definite_spread));
      row->max_nonspace = static_cast<int32_t>(
          floor(row->pr_nonsp + (row->pr_space - row->pr_nonsp) * textord_words_definite_spread));
      if (testing_on && textord_show_initial_words) {
        tprintf("Assigning defaults %d non, %d space to row at %g\n", row->max_nonspace,
                row->min_space, row->intercept());
      }
      row->space_threshold = (row->max_nonspace + row->min_space) / 2;
      row->space_size = row->pr_space;
      row->kern_size = row->pr_nonsp;
    }
#ifndef GRAPHICS_DISABLED
    if (textord_show_initial_words && testing_on) {
      plot_word_decisions(to_win, static_cast<int16_t>(row->fixed_pitch), row);
    }
#endif
  }
}

◆ SetAdaptiveThreshold()

void tesseract::SetAdaptiveThreshold ( float Threshold )

◆ SetBlobStrokeWidth()

void tesseract::SetBlobStrokeWidth	(	Image	pix,
		BLOBNBOX *	blob
	)

Definition at line 68 of file tordmain.cpp.

                                                   {
  // Cut the blob rectangle into a Pix.
  int pix_height = pixGetHeight(pix);
  const TBOX &box = blob->bounding_box();
  int width = box.width();
  int height = box.height();
  Box *blob_pix_box = boxCreate(box.left(), pix_height - box.top(), width, height);
  Image pix_blob = pixClipRectangle(pix, blob_pix_box, nullptr);
  boxDestroy(&blob_pix_box);
  Image dist_pix = pixDistanceFunction(pix_blob, 4, 8, L_BOUNDARY_BG);
  pix_blob.destroy();
  // Compute the stroke widths.
  uint32_t *data = pixGetData(dist_pix);
  int wpl = pixGetWpl(dist_pix);
  // Horizontal width of stroke.
  STATS h_stats(0, width);
  for (int y = 0; y < height; ++y) {
    uint32_t *pixels = data + y * wpl;
    int prev_pixel = 0;
    int pixel = GET_DATA_BYTE(pixels, 0);
    for (int x = 1; x < width; ++x) {
      int next_pixel = GET_DATA_BYTE(pixels, x);
      // We are looking for a pixel that is equal to its vertical neighbours,
      // yet greater than its left neighbour.
      if (prev_pixel < pixel && (y == 0 || pixel == GET_DATA_BYTE(pixels - wpl, x - 1)) &&
          (y == height - 1 || pixel == GET_DATA_BYTE(pixels + wpl, x - 1))) {
        if (pixel > next_pixel) {
          // Single local max, so an odd width.
          h_stats.add(pixel * 2 - 1, 1);
        } else if (pixel == next_pixel && x + 1 < width && pixel > GET_DATA_BYTE(pixels, x + 1)) {
          // Double local max, so an even width.
          h_stats.add(pixel * 2, 1);
        }
      }
      prev_pixel = pixel;
      pixel = next_pixel;
    }
  }
  // Vertical width of stroke.
  STATS v_stats(0, height);
  for (int x = 0; x < width; ++x) {
    int prev_pixel = 0;
    int pixel = GET_DATA_BYTE(data, x);
    for (int y = 1; y < height; ++y) {
      uint32_t *pixels = data + y * wpl;
      int next_pixel = GET_DATA_BYTE(pixels, x);
      // We are looking for a pixel that is equal to its horizontal neighbours,
      // yet greater than its upper neighbour.
      if (prev_pixel < pixel && (x == 0 || pixel == GET_DATA_BYTE(pixels - wpl, x - 1)) &&
          (x == width - 1 || pixel == GET_DATA_BYTE(pixels - wpl, x + 1))) {
        if (pixel > next_pixel) {
          // Single local max, so an odd width.
          v_stats.add(pixel * 2 - 1, 1);
        } else if (pixel == next_pixel && y + 1 < height &&
                   pixel > GET_DATA_BYTE(pixels + wpl, x)) {
          // Double local max, so an even width.
          v_stats.add(pixel * 2, 1);
        }
      }
      prev_pixel = pixel;
      pixel = next_pixel;
    }
  }
  dist_pix.destroy();
  // Store the horizontal and vertical width in the blob, keeping both
  // widths if there is enough information, otherwise only the one with
  // the most samples.
  // If there are insufficient samples, store zero, rather than using
  // 2*area/perimeter, as the numbers that gives do not match the numbers
  // from the distance method.
  if (h_stats.get_total() >= (width + height) / 4) {
    blob->set_horz_stroke_width(h_stats.ile(0.5f));
    if (v_stats.get_total() >= (width + height) / 4) {
      blob->set_vert_stroke_width(v_stats.ile(0.5f));
    } else {
      blob->set_vert_stroke_width(0.0f);
    }
  } else {
    if (v_stats.get_total() >= (width + height) / 4 || v_stats.get_total() > h_stats.get_total()) {
      blob->set_horz_stroke_width(0.0f);
      blob->set_vert_stroke_width(v_stats.ile(0.5f));
    } else {
      blob->set_horz_stroke_width(h_stats.get_total() > 2 ? h_stats.ile(0.5f) : 0.0f);
      blob->set_vert_stroke_width(0.0f);
    }
  }
}

◆ SetPropertiesForInputFile()

TESS_UNICHARSET_TRAINING_API void tesseract::SetPropertiesForInputFile	(	const std::string &	script_dir,
		const std::string &	input_unicharset_file,
		const std::string &	output_unicharset_file,
		const std::string &	output_xheights_file
	)

Definition at line 184 of file unicharset_training_utils.cpp.

                                                                      {
  UNICHARSET unicharset;
 
  // Load the input unicharset
  unicharset.load_from_file(input_unicharset_file.c_str());
  tprintf("Loaded unicharset of size %zu from file %s\n", unicharset.size(),
          input_unicharset_file.c_str());
 
  // Set unichar properties
  tprintf("Setting unichar properties\n");
  SetupBasicProperties(true, false, &unicharset);
  tprintf("Setting script properties\n");
  SetScriptProperties(script_dir, &unicharset);
  if (!output_xheights_file.empty()) {
    std::string xheights_str = GetXheightString(script_dir, unicharset);
    File::WriteStringToFileOrDie(xheights_str, output_xheights_file);
  }
 
  // Write the output unicharset
  tprintf("Writing unicharset to file %s\n", output_unicharset_file.c_str());
  unicharset.save_to_file(output_unicharset_file.c_str());
}

◆ SetScriptProperties()

TESS_UNICHARSET_TRAINING_API void tesseract::SetScriptProperties	(	const std::string &	script_dir,
		UNICHARSET *	unicharset
	)

Definition at line 145 of file unicharset_training_utils.cpp.

                                                                              {
  for (int s = 0; s < unicharset->get_script_table_size(); ++s) {
    // Load the unicharset for the script if available.
    std::string filename =
        script_dir + "/" + unicharset->get_script_from_script_id(s) + ".unicharset";
    UNICHARSET script_set;
    if (script_set.load_from_file(filename.c_str())) {
      unicharset->SetPropertiesFromOther(script_set);
    } else if (s != unicharset->common_sid() && s != unicharset->null_sid()) {
      tprintf("Failed to load script unicharset from:%s\n", filename.c_str());
    }
  }
  for (int c = SPECIAL_UNICHAR_CODES_COUNT; c < unicharset->size(); ++c) {
    if (unicharset->PropertiesIncomplete(c)) {
      tprintf("Warning: properties incomplete for index %d = %s\n", c,
              unicharset->id_to_unichar(c));
    }
  }
}

◆ SetupBasicProperties() [1/2]

TESS_UNICHARSET_TRAINING_API void tesseract::SetupBasicProperties	(	bool	report_errors,
		bool	decompose,
		UNICHARSET *	unicharset
	)

Definition at line 40 of file unicharset_training_utils.cpp.

                                                                                      {
  for (size_t unichar_id = 0; unichar_id < unicharset->size(); ++unichar_id) {
    // Convert any custom ligatures.
    const char *unichar_str = unicharset->id_to_unichar(unichar_id);
    for (int i = 0; UNICHARSET::kCustomLigatures[i][0] != nullptr; ++i) {
      if (!strcmp(UNICHARSET::kCustomLigatures[i][1], unichar_str)) {
        unichar_str = UNICHARSET::kCustomLigatures[i][0];
        break;
      }
    }
 
    // Convert the unichar to UTF32 representation
    std::vector<char32> uni_vector = UNICHAR::UTF8ToUTF32(unichar_str);
 
    // Assume that if the property is true for any character in the string,
    // then it holds for the whole "character".
    bool unichar_isalpha = false;
    bool unichar_islower = false;
    bool unichar_isupper = false;
    bool unichar_isdigit = false;
    bool unichar_ispunct = false;
 
    for (char32 u_ch : uni_vector) {
      if (u_isalpha(u_ch)) {
        unichar_isalpha = true;
      }
      if (u_islower(u_ch)) {
        unichar_islower = true;
      }
      if (u_isupper(u_ch)) {
        unichar_isupper = true;
      }
      if (u_isdigit(u_ch)) {
        unichar_isdigit = true;
      }
      if (u_ispunct(u_ch)) {
        unichar_ispunct = true;
      }
    }
 
    unicharset->set_isalpha(unichar_id, unichar_isalpha);
    unicharset->set_islower(unichar_id, unichar_islower);
    unicharset->set_isupper(unichar_id, unichar_isupper);
    unicharset->set_isdigit(unichar_id, unichar_isdigit);
    unicharset->set_ispunctuation(unichar_id, unichar_ispunct);
 
    tesseract::IcuErrorCode err;
    unicharset->set_script(unichar_id, uscript_getName(uscript_getScript(uni_vector[0], err)));
 
    const int num_code_points = uni_vector.size();
    // Obtain the lower/upper case if needed and record it in the properties.
    unicharset->set_other_case(unichar_id, unichar_id);
    if (unichar_islower || unichar_isupper) {
      std::vector<char32> other_case(num_code_points, 0);
      for (int i = 0; i < num_code_points; ++i) {
        // TODO(daria): Ideally u_strToLower()/ustrToUpper() should be used.
        // However since they deal with UChars (so need a conversion function
        // from char32 or UTF8string) and require a meaningful locale string,
        // for now u_tolower()/u_toupper() are used.
        other_case[i] = unichar_islower ? u_toupper(uni_vector[i]) : u_tolower(uni_vector[i]);
      }
      std::string other_case_uch = UNICHAR::UTF32ToUTF8(other_case);
      UNICHAR_ID other_case_id = unicharset->unichar_to_id(other_case_uch.c_str());
      if (other_case_id != INVALID_UNICHAR_ID) {
        unicharset->set_other_case(unichar_id, other_case_id);
      } else if (unichar_id >= SPECIAL_UNICHAR_CODES_COUNT && report_errors) {
        tprintf("Other case %s of %s is not in unicharset\n", other_case_uch.c_str(), unichar_str);
      }
    }
 
    // Set RTL property and obtain mirror unichar ID from ICU.
    std::vector<char32> mirrors(num_code_points, 0);
    for (int i = 0; i < num_code_points; ++i) {
      mirrors[i] = u_charMirror(uni_vector[i]);
      if (i == 0) { // set directionality to that of the 1st code point
        unicharset->set_direction(
            unichar_id, static_cast<UNICHARSET::Direction>(u_charDirection(uni_vector[i])));
      }
    }
    std::string mirror_uch = UNICHAR::UTF32ToUTF8(mirrors);
    UNICHAR_ID mirror_uch_id = unicharset->unichar_to_id(mirror_uch.c_str());
    if (mirror_uch_id != INVALID_UNICHAR_ID) {
      unicharset->set_mirror(unichar_id, mirror_uch_id);
    } else if (report_errors) {
      tprintf("Mirror %s of %s is not in unicharset\n", mirror_uch.c_str(), unichar_str);
    }
 
    // Record normalized version of this unichar.
    std::string normed_str;
    if (unichar_id != 0 &&
        tesseract::NormalizeUTF8String(
            decompose ? tesseract::UnicodeNormMode::kNFD : tesseract::UnicodeNormMode::kNFC,
            tesseract::OCRNorm::kNormalize, tesseract::GraphemeNorm::kNone, unichar_str,
            &normed_str) &&
        !normed_str.empty()) {
      unicharset->set_normed(unichar_id, normed_str.c_str());
    } else {
      unicharset->set_normed(unichar_id, unichar_str);
    }
    ASSERT_HOST(unicharset->get_other_case(unichar_id) < unicharset->size());
  }
  unicharset->post_load_setup();
}

◆ SetupBasicProperties() [2/2]

void tesseract::SetupBasicProperties	(	bool	report_errors,
		UNICHARSET *	unicharset
	)

inline

Definition at line 38 of file unicharset_training_utils.h.

                                                                             {
  SetupBasicProperties(report_errors, false, unicharset);
}

◆ SetUpForClustering()

TESS_COMMON_TRAINING_API tesseract::CLUSTERER * tesseract::SetUpForClustering	(	const FEATURE_DEFS_STRUCT &	FeatureDefs,
		LABELEDLIST	char_sample,
		const char *	program_feature_type
	)

This routine reads samples from a LABELEDLIST and enters those samples into a clusterer data structure. This data structure is then returned to the caller.

Parameters

char_sample	LABELEDLIST that holds all the feature information for a
FeatureDefs
program_feature_type	given character.

Returns: Pointer to new clusterer data structure.

Note: Globals: None

Definition at line 434 of file commontraining.cpp.

                                                                {
  uint16_t N;
  CLUSTERER *Clusterer;
  LIST FeatureList = nullptr;
  FEATURE_SET FeatureSet = nullptr;
 
  int32_t desc_index = ShortNameToFeatureType(FeatureDefs, program_feature_type);
  N = FeatureDefs.FeatureDesc[desc_index]->NumParams;
  Clusterer = MakeClusterer(N, FeatureDefs.FeatureDesc[desc_index]->ParamDesc);
 
  FeatureList = char_sample->List;
  uint32_t CharID = 0;
  std::vector<float> Sample;
  iterate(FeatureList) {
    FeatureSet = reinterpret_cast<FEATURE_SET>(FeatureList->first_node());
    for (int i = 0; i < FeatureSet->MaxNumFeatures; i++) {
      if (Sample.empty()) {
        Sample.resize(N);
      }
      for (int j = 0; j < N; j++) {
        Sample[j] = FeatureSet->Features[i]->Params[j];
      }
      MakeSample(Clusterer, &Sample[0], CharID);
    }
    CharID++;
  }
  return Clusterer;
 
} /* SetUpForClustering */

◆ SetUpForFloat2Int()

TESS_COMMON_TRAINING_API tesseract::CLASS_STRUCT * tesseract::SetUpForFloat2Int	(	const UNICHARSET &	unicharset,
		LIST	LabeledClassList
	)

Definition at line 631 of file commontraining.cpp.

                                                                                     {
  MERGE_CLASS MergeClass;
  CLASS_TYPE Class;
  int NumProtos;
  int NumConfigs;
  int NumWords;
  int i, j;
  float Values[3];
  PROTO_STRUCT *NewProto;
  PROTO_STRUCT *OldProto;
  BIT_VECTOR NewConfig;
  BIT_VECTOR OldConfig;
 
  //  printf("Float2Int ...\n");
 
  auto *float_classes = new CLASS_STRUCT[unicharset.size()];
  iterate(LabeledClassList) {
    UnicityTable<int> font_set;
    MergeClass = reinterpret_cast<MERGE_CLASS>(LabeledClassList->first_node());
    Class = &float_classes[unicharset.unichar_to_id(MergeClass->Label.c_str())];
    NumProtos = MergeClass->Class->NumProtos;
    NumConfigs = MergeClass->Class->NumConfigs;
    font_set.move(&MergeClass->Class->font_set);
    Class->NumProtos = NumProtos;
    Class->MaxNumProtos = NumProtos;
    Class->Prototypes.resize(NumProtos);
    for (i = 0; i < NumProtos; i++) {
      NewProto = ProtoIn(Class, i);
      OldProto = ProtoIn(MergeClass->Class, i);
      Values[0] = OldProto->X;
      Values[1] = OldProto->Y;
      Values[2] = OldProto->Angle;
      Normalize(Values);
      NewProto->X = OldProto->X;
      NewProto->Y = OldProto->Y;
      NewProto->Length = OldProto->Length;
      NewProto->Angle = OldProto->Angle;
      NewProto->A = Values[0];
      NewProto->B = Values[1];
      NewProto->C = Values[2];
    }
 
    Class->NumConfigs = NumConfigs;
    Class->MaxNumConfigs = NumConfigs;
    Class->font_set.move(&font_set);
    Class->Configurations.resize(NumConfigs);
    NumWords = WordsInVectorOfSize(NumProtos);
    for (i = 0; i < NumConfigs; i++) {
      NewConfig = NewBitVector(NumProtos);
      OldConfig = MergeClass->Class->Configurations[i];
      for (j = 0; j < NumWords; j++) {
        NewConfig[j] = OldConfig[j];
      }
      Class->Configurations[i] = NewConfig;
    }
  }
  return float_classes;
} // SetUpForFloat2Int

◆ ShortNameToFeatureType()

TESS_API uint32_t tesseract::ShortNameToFeatureType	(	const FEATURE_DEFS_STRUCT &	FeatureDefs,
		const char *	ShortName
	)

Search through all features currently defined and return the feature type for the feature with the specified short name. Trap an error if the specified name is not found.

Globals:

none

Parameters

FeatureDefs	definitions of feature types/extractors
ShortName	short name of a feature type

Returns: Feature type which corresponds to ShortName.

Definition at line 203 of file featdefs.cpp.

                                                                                               {
  for (int i = 0; i < FeatureDefs.NumFeatureTypes; i++) {
    if (!strcmp((FeatureDefs.FeatureDesc[i]->ShortName), ShortName)) {
      return static_cast<uint32_t>(i);
    }
  }
  ASSERT_HOST(!"Illegal short name for a feature");
  return 0;
}

◆ ShowMatchDisplay()

void tesseract::ShowMatchDisplay ( )

◆ SoftmaxInPlace()

template<typename T >

void tesseract::SoftmaxInPlace	(	int	n,
		T *	inout
	)

inline

Definition at line 181 of file functions.h.

                                            {
  if (n <= 0) {
    return;
  }
  // A limit on the negative range input to exp to guarantee non-zero output.
  const T kMaxSoftmaxActivation = 86;
 
  T max_output = inout[0];
  for (int i = 1; i < n; i++) {
    T output = inout[i];
    if (output > max_output) {
      max_output = output;
    }
  }
  T prob_total = 0;
  for (int i = 0; i < n; i++) {
    T prob = inout[i] - max_output;
    prob = std::exp(ClipToRange(prob, -kMaxSoftmaxActivation, static_cast<T>(0)));
    prob_total += prob;
    inout[i] = prob;
  }
  if (prob_total > 0) {
    for (int i = 0; i < n; i++) {
      inout[i] /= prob_total;
    }
  }
}

◆ sort_cmp()

template<typename T >

int tesseract::sort_cmp	(	const void *	t1,
		const void *	t2
	)

Definition at line 269 of file genericvector.h.

                                             {
  const T *a = static_cast<const T *>(t1);
  const T *b = static_cast<const T *>(t2);
  if (*a < *b) {
    return -1;
  }
  if (*b < *a) {
    return 1;
  }
  return 0;
}

◆ sort_ptr_cmp()

template<typename T >

int tesseract::sort_ptr_cmp	(	const void *	t1,
		const void *	t2
	)

Definition at line 286 of file genericvector.h.

                                                 {
  const T *a = *static_cast<T *const *>(t1);
  const T *b = *static_cast<T *const *>(t2);
  if (*a < *b) {
    return -1;
  }
  if (*b < *a) {
    return 1;
  }
  return 0;
}

◆ SortByBoxBottom()

template<class BBC >

int tesseract::SortByBoxBottom	(	const void *	void1,
		const void *	void2
	)

Definition at line 449 of file bbgrid.h.

                                                          {
  // The void*s are actually doubly indirected, so get rid of one level.
  const BBC *p1 = *static_cast<const BBC *const *>(void1);
  const BBC *p2 = *static_cast<const BBC *const *>(void2);
  int result = p1->bounding_box().bottom() - p2->bounding_box().bottom();
  if (result != 0) {
    return result;
  }
  result = p1->bounding_box().top() - p2->bounding_box().top();
  if (result != 0) {
    return result;
  }
  result = p1->bounding_box().left() - p2->bounding_box().left();
  if (result != 0) {
    return result;
  }
  return p1->bounding_box().right() - p2->bounding_box().right();
}

◆ SortByBoxLeft()

template<class BBC >

int tesseract::SortByBoxLeft	(	const void *	void1,
		const void *	void2
	)

Definition at line 367 of file bbgrid.h.

                                                        {
  // The void*s are actually doubly indirected, so get rid of one level.
  const BBC *p1 = *static_cast<const BBC *const *>(void1);
  const BBC *p2 = *static_cast<const BBC *const *>(void2);
  int result = p1->bounding_box().left() - p2->bounding_box().left();
  if (result != 0) {
    return result;
  }
  result = p1->bounding_box().right() - p2->bounding_box().right();
  if (result != 0) {
    return result;
  }
  result = p1->bounding_box().bottom() - p2->bounding_box().bottom();
  if (result != 0) {
    return result;
  }
  return p1->bounding_box().top() - p2->bounding_box().top();
}

◆ SortByRating()

template<class BLOB_CHOICE >

int tesseract::SortByRating	(	const void *	void1,
		const void *	void2
	)

Definition at line 79 of file pieces.cpp.

                                                       {
  const BLOB_CHOICE *p1 = *static_cast<const BLOB_CHOICE *const *>(void1);
  const BLOB_CHOICE *p2 = *static_cast<const BLOB_CHOICE *const *>(void2);
 
  if (p1->rating() < p2->rating()) {
    return 1;
  }
  return -1;
}

◆ SortByUnicharID()

template<class BLOB_CHOICE >

int tesseract::SortByUnicharID	(	const void *	void1,
		const void *	void2
	)

Definition at line 71 of file pieces.cpp.

                                                          {
  const BLOB_CHOICE *p1 = *static_cast<const BLOB_CHOICE *const *>(void1);
  const BLOB_CHOICE *p2 = *static_cast<const BLOB_CHOICE *const *>(void2);
 
  return p1->unichar_id() - p2->unichar_id();
}

◆ SortRightToLeft()

template<class BBC >

int tesseract::SortRightToLeft	(	const void *	void1,
		const void *	void2
	)

Definition at line 408 of file bbgrid.h.

                                                          {
  // The void*s are actually doubly indirected, so get rid of one level.
  const BBC *p1 = *static_cast<const BBC *const *>(void1);
  const BBC *p2 = *static_cast<const BBC *const *>(void2);
  int result = p2->bounding_box().right() - p1->bounding_box().right();
  if (result != 0) {
    return result;
  }
  result = p2->bounding_box().left() - p1->bounding_box().left();
  if (result != 0) {
    return result;
  }
  result = p1->bounding_box().bottom() - p2->bounding_box().bottom();
  if (result != 0) {
    return result;
  }
  return p1->bounding_box().top() - p2->bounding_box().top();
}

◆ SpanUTF8NotWhitespace()

TESS_UNICHARSET_TRAINING_API unsigned int tesseract::SpanUTF8NotWhitespace ( const char * text )

Definition at line 249 of file normstrngs.cpp.

                                                     {
  int n_notwhite = 0;
  for (UNICHAR::const_iterator it = UNICHAR::begin(text, strlen(text));
       it != UNICHAR::end(text, strlen(text)); ++it) {
    if (IsWhitespace(*it)) {
      break;
    }
    n_notwhite += it.utf8_len();
  }
  return n_notwhite;
}

◆ SpanUTF8Whitespace()

TESS_UNICHARSET_TRAINING_API unsigned int tesseract::SpanUTF8Whitespace ( const char * text )

Definition at line 237 of file normstrngs.cpp.

                                                  {
  int n_white = 0;
  for (UNICHAR::const_iterator it = UNICHAR::begin(text, strlen(text));
       it != UNICHAR::end(text, strlen(text)); ++it) {
    if (!IsWhitespace(*it)) {
      break;
    }
    n_white += it.utf8_len();
  }
  return n_white;
}

◆ split()

const std::vector< std::string > tesseract::split	(	const std::string &	s,
		char	c
	)

inline

Definition at line 43 of file helpers.h.

                                                                    {
  std::string buff;
  std::vector<std::string> v;
  for (auto n : s) {
    if (n != c) {
      buff += n;
    } else if (n == c && !buff.empty()) {
      v.push_back(buff);
      buff.clear();
    }
  }
  if (!buff.empty()) {
    v.push_back(buff);
  }
  return v;
}

◆ split_stepped_spline()

bool tesseract::split_stepped_spline	(	QSPLINE *	baseline,
		float	jumplimit,
		int *	xcoords,
		int *	xstarts,
		int &	segments
	)

Definition at line 1139 of file oldbasel.cpp.

  {
  bool doneany; // return value
  int segment;  /*partition no */
  int startindex, centreindex, endindex;
  float leftcoord, rightcoord;
  int leftindex, rightindex;
  float step; // spline step
 
  doneany = false;
  startindex = 0;
  for (segment = 1; segment < segments - 1; segment++) {
    step = baseline->step((xstarts[segment - 1] + xstarts[segment]) / 2.0,
                          (xstarts[segment] + xstarts[segment + 1]) / 2.0);
    if (step < 0) {
      step = -step;
    }
    if (step > jumplimit) {
      while (xcoords[startindex] < xstarts[segment - 1]) {
        startindex++;
      }
      centreindex = startindex;
      while (xcoords[centreindex] < xstarts[segment]) {
        centreindex++;
      }
      endindex = centreindex;
      while (xcoords[endindex] < xstarts[segment + 1]) {
        endindex++;
      }
      if (segments >= SPLINESIZE) {
        if (textord_debug_baselines) {
          tprintf("Too many segments to resegment spline!!\n");
        }
      } else if (endindex - startindex >= textord_spline_medianwin * 3) {
        while (centreindex - startindex < textord_spline_medianwin * 3 / 2) {
          centreindex++;
        }
        while (endindex - centreindex < textord_spline_medianwin * 3 / 2) {
          centreindex--;
        }
        leftindex = (startindex + startindex + centreindex) / 3;
        rightindex = (centreindex + endindex + endindex) / 3;
        leftcoord = (xcoords[startindex] * 2 + xcoords[centreindex]) / 3.0;
        rightcoord = (xcoords[centreindex] + xcoords[endindex] * 2) / 3.0;
        while (xcoords[leftindex] > leftcoord &&
               leftindex - startindex > textord_spline_medianwin) {
          leftindex--;
        }
        while (xcoords[leftindex] < leftcoord &&
               centreindex - leftindex > textord_spline_medianwin / 2) {
          leftindex++;
        }
        if (xcoords[leftindex] - leftcoord > leftcoord - xcoords[leftindex - 1]) {
          leftindex--;
        }
        while (xcoords[rightindex] > rightcoord &&
               rightindex - centreindex > textord_spline_medianwin / 2) {
          rightindex--;
        }
        while (xcoords[rightindex] < rightcoord &&
               endindex - rightindex > textord_spline_medianwin) {
          rightindex++;
        }
        if (xcoords[rightindex] - rightcoord > rightcoord - xcoords[rightindex - 1]) {
          rightindex--;
        }
        if (textord_debug_baselines) {
          tprintf("Splitting spline at %d with step %g at (%d,%d)\n", xstarts[segment],
                  baseline->step((xstarts[segment - 1] + xstarts[segment]) / 2.0,
                                 (xstarts[segment] + xstarts[segment + 1]) / 2.0),
                  (xcoords[leftindex - 1] + xcoords[leftindex]) / 2,
                  (xcoords[rightindex - 1] + xcoords[rightindex]) / 2);
        }
        insert_spline_point(xstarts, segment, (xcoords[leftindex - 1] + xcoords[leftindex]) / 2,
                            (xcoords[rightindex - 1] + xcoords[rightindex]) / 2, segments);
        doneany = true;
      } else if (textord_debug_baselines) {
        tprintf("Resegmenting spline failed - insufficient pts (%d,%d,%d,%d)\n", startindex,
                centreindex, endindex, (int32_t)textord_spline_medianwin);
      }
    }
    //              else tprintf("Spline step at %d is %g\n",
    //                      xstarts[segment],
    //                      baseline->step((xstarts[segment-1]+xstarts[segment])/2.0,
    //                      (xstarts[segment]+xstarts[segment+1])/2.0));
  }
  return doneany;
}

◆ split_to_blob()

void tesseract::split_to_blob	(	BLOBNBOX *	blob,
		int16_t	chop_coord,
		float	pitch_error,
		C_OUTLINE_LIST *	left_coutlines,
		C_OUTLINE_LIST *	right_coutlines
	)

Definition at line 260 of file fpchop.cpp.

                                     {
  C_BLOB *real_cblob; // cblob to chop
 
  if (blob != nullptr) {
    real_cblob = blob->remove_cblob();
  } else {
    real_cblob = nullptr;
  }
  if (!right_coutlines->empty() || real_cblob != nullptr) {
    fixed_chop_cblob(real_cblob, chop_coord, pitch_error, left_coutlines, right_coutlines);
  }
 
  delete blob;
}

◆ StandardDeviation()

float tesseract::StandardDeviation	(	PROTOTYPE *	Proto,
		uint16_t	Dimension
	)

This routine returns the standard deviation of the prototype in the indicated dimension.

Parameters

Proto	prototype to return standard deviation of
Dimension	dimension whose stddev is to be returned

Returns: Standard deviation of Prototype in Dimension

Definition at line 1673 of file cluster.cpp.

                                                              {
  switch (Proto->Style) {
    case spherical:
      return std::sqrt(Proto->Variance.Spherical);
    case elliptical:
      return std::sqrt(Proto->Variance.Elliptical[Dimension]);
    case mixed:
      switch (Proto->Distrib[Dimension]) {
        case normal:
          return std::sqrt(Proto->Variance.Elliptical[Dimension]);
        case uniform:
        case D_random:
          return Proto->Variance.Elliptical[Dimension];
        case DISTRIBUTION_COUNT:
          ASSERT_HOST(!"Distribution count not allowed!");
      }
  }
  return 0.0f;
} // StandardDeviation

◆ start_seam_list()

void tesseract::start_seam_list	(	TWERD *	word,
		std::vector< SEAM * > *	seam_array
	)

Definition at line 262 of file seam.cpp.

                                                                 {
  seam_array->clear();
  TPOINT location;
 
  for (unsigned b = 1; b < word->NumBlobs(); ++b) {
    TBOX bbox = word->blobs[b - 1]->bounding_box();
    TBOX nbox = word->blobs[b]->bounding_box();
    location.x = (bbox.right() + nbox.left()) / 2;
    location.y = (bbox.bottom() + bbox.top() + nbox.bottom() + nbox.top()) / 4;
    seam_array->push_back(new SEAM(0.0f, location));
  }
}

◆ StartParamDesc() [1/4]

MicroFeatureParams tesseract::StartParamDesc ( CharNormParams )

◆ StartParamDesc() [2/4]

MicroFeatureParams CharNormParams tesseract::StartParamDesc ( IntFeatParams )

◆ StartParamDesc() [3/4]

tesseract::StartParamDesc ( MicroFeatureParams )

◆ StartParamDesc() [4/4]

tesseract::StartParamDesc ( PicoFeatParams )

◆ StdSortByBoxLeft()

template<class BBC >

bool tesseract::StdSortByBoxLeft	(	const void *	void1,
		const void *	void2
	)

Definition at line 387 of file bbgrid.h.

                                                            {
  // The void*s are actually doubly indirected, so get rid of one level.
  const BBC *p1 = *static_cast<const BBC *const *>(void1);
  const BBC *p2 = *static_cast<const BBC *const *>(void2);
  int result = p1->bounding_box().left() - p2->bounding_box().left();
  if (result != 0) {
    return result < 0;
  }
  result = p1->bounding_box().right() - p2->bounding_box().right();
  if (result != 0) {
    return result < 0;
  }
  result = p1->bounding_box().bottom() - p2->bounding_box().bottom();
  if (result != 0) {
    return result < 0;
  }
  return p1->bounding_box().top() < p2->bounding_box().top();
}

◆ StdSortRightToLeft()

template<class BBC >

bool tesseract::StdSortRightToLeft	(	const void *	void1,
		const void *	void2
	)

Definition at line 428 of file bbgrid.h.

                                                              {
  // The void*s are actually doubly indirected, so get rid of one level.
  const BBC *p1 = *static_cast<const BBC *const *>(void1);
  const BBC *p2 = *static_cast<const BBC *const *>(void2);
  int result = p2->bounding_box().right() - p1->bounding_box().right();
  if (result != 0) {
    return result < 0;
  }
  result = p2->bounding_box().left() - p1->bounding_box().left();
  if (result != 0) {
    return result < 0;
  }
  result = p1->bounding_box().bottom() - p2->bounding_box().bottom();
  if (result != 0) {
    return result < 0;
  }
  return p1->bounding_box().top() < p2->bounding_box().top();
}

◆ STILL_LINKED()

constexpr ERRCODE tesseract::STILL_LINKED	(	"Attempting to add an element with non nullptr	links,
		to a list"
	)

constexpr

◆ STRING_PARAM_FLAG() [1/9]

tesseract::STRING_PARAM_FLAG	(	D	,
		""	,
		"Directory to write output files to"
	)

◆ STRING_PARAM_FLAG() [2/9]

tesseract::STRING_PARAM_FLAG	(	F	,
		"font_properties"	,
		"File listing font properties"
	)

◆ STRING_PARAM_FLAG() [3/9]

tesseract::STRING_PARAM_FLAG	(	fontconfig_tmpdir	,
		""	,
		""
	)

◆ STRING_PARAM_FLAG() [4/9]

tesseract::STRING_PARAM_FLAG	(	fonts_dir	,
		""	,
		""
	)

◆ STRING_PARAM_FLAG() [5/9]

tesseract::STRING_PARAM_FLAG	(	O	,
		""	,
		"File to write unicharset to"
	)

◆ STRING_PARAM_FLAG() [6/9]

tesseract::STRING_PARAM_FLAG	(	output_trainer	,
		""	,
		"File to write trainer to"
	)

◆ STRING_PARAM_FLAG() [7/9]

tesseract::STRING_PARAM_FLAG	(	test_ch	,
		""	,
		"UTF8 test character string"
	)

◆ STRING_PARAM_FLAG() [8/9]

tesseract::STRING_PARAM_FLAG	(	U	,
		"unicharset"	,
		"File to load unicharset from"
	)

◆ STRING_PARAM_FLAG() [9/9]

tesseract::STRING_PARAM_FLAG	(	X	,
		""	,
		"File listing font xheights"
	)

◆ STRING_VAR_H() [1/2]

tesseract::STRING_VAR_H ( editor_image_win_name )

◆ STRING_VAR_H() [2/2]

tesseract::STRING_VAR_H ( editor_word_name )

◆ StrongModel()

bool tesseract::StrongModel ( const ParagraphModel * model )

inline

Definition at line 69 of file paragraphs_internal.h.

                                                     {
  return model != nullptr && model != kCrownLeft && model != kCrownRight;
}

◆ SumVectors()

void tesseract::SumVectors	(	int	n,
		const TFloat *	v1,
		const TFloat *	v2,
		const TFloat *	v3,
		const TFloat *	v4,
		const TFloat *	v5,
		TFloat *	sum
	)

inline

Definition at line 236 of file functions.h.

                                                                        {
  for (int i = 0; i < n; ++i) {
    sum[i] = v1[i] + v2[i] + v3[i] + v4[i] + v5[i];
  }
}

◆ Tanh()

TFloat tesseract::Tanh ( TFloat x )

inline

Definition at line 44 of file functions.h.

                             {
  if (x < 0) {
    return -Tanh(-x);
  }
  x *= kScaleFactor;
  auto index = static_cast<unsigned>(x);
  if (index >= (kTableSize - 1)) {
    return 1;
  }
  TFloat tanh_i0 = TanhTable[index];
  TFloat tanh_i1 = TanhTable[index + 1];
  // Linear interpolation.
  return tanh_i0 + (tanh_i1 - tanh_i0) * (x - index);
}

◆ TEST() [1/88]

tesseract::TEST	(	CleanNamespaceTess	,
		DummyTest
	)

Definition at line 26 of file cleanapi_test.cc.

                                    {
  tesseract::TessBaseAPI api;
}

◆ TEST() [2/88]

tesseract::TEST	(	ConvertBasicLatinToFullwidthLatinTest	,
		DoesConvertBasicLatin
	)

Definition at line 451 of file stringrenderer_test.cc.

                                                                   {
  const std::string kHalfAlpha = "ABCD";
  const std::string kFullAlpha = "ＡＢＣＤ";
  EXPECT_EQ(kFullAlpha, StringRenderer::ConvertBasicLatinToFullwidthLatin(kHalfAlpha));
 
  const std::string kHalfDigit = "0123";
  const std::string kFullDigit = "０１２３";
  EXPECT_EQ(kFullDigit, StringRenderer::ConvertBasicLatinToFullwidthLatin(kHalfDigit));
 
  const std::string kHalfSym = "()[]:;!?";
  const std::string kFullSym = "（）［］：；！？";
  EXPECT_EQ(kFullSym, StringRenderer::ConvertBasicLatinToFullwidthLatin(kHalfSym));
}

◆ TEST() [3/88]

tesseract::TEST	(	ConvertBasicLatinToFullwidthLatinTest	,
		DoesNotConvertFullwidthLatin
	)

Definition at line 465 of file stringrenderer_test.cc.

                                                                          {
  const std::string kFullAlpha = "ＡＢＣＤ";
  EXPECT_EQ(kFullAlpha, StringRenderer::ConvertBasicLatinToFullwidthLatin(kFullAlpha));
 
  const std::string kFullDigit = "０１２３";
  EXPECT_EQ(kFullDigit, StringRenderer::ConvertBasicLatinToFullwidthLatin(kFullDigit));
 
  const std::string kFullSym = "（）［］：；！？";
  EXPECT_EQ(kFullSym, StringRenderer::ConvertBasicLatinToFullwidthLatin(kFullSym));
}

◆ TEST() [4/88]

tesseract::TEST	(	ConvertBasicLatinToFullwidthLatinTest	,
		DoesNotConvertNonLatin
	)

Definition at line 476 of file stringrenderer_test.cc.

                                                                    {
  const std::string kHalfKana = "ｱｲｳｴｵ";
  const std::string kFullKana = "アイウエオ";
  EXPECT_EQ(kHalfKana, StringRenderer::ConvertBasicLatinToFullwidthLatin(kHalfKana));
  EXPECT_EQ(kFullKana, StringRenderer::ConvertBasicLatinToFullwidthLatin(kFullKana));
}

◆ TEST() [5/88]

tesseract::TEST	(	ConvertBasicLatinToFullwidthLatinTest	,
		DoesNotConvertSpace
	)

Definition at line 483 of file stringrenderer_test.cc.

                                                                 {
  const std::string kHalfSpace = " ";
  const std::string kFullSpace = "　";
  EXPECT_EQ(kHalfSpace, StringRenderer::ConvertBasicLatinToFullwidthLatin(kHalfSpace));
  EXPECT_EQ(kFullSpace, StringRenderer::ConvertBasicLatinToFullwidthLatin(kFullSpace));
}

◆ TEST() [6/88]

tesseract::TEST	(	ConvertFullwidthLatinToBasicLatinTest	,
		DoesConvertFullwidthLatin
	)

Definition at line 492 of file stringrenderer_test.cc.

                                                                       {
  const std::string kHalfAlpha = "ABCD";
  const std::string kFullAlpha = "ＡＢＣＤ";
  EXPECT_EQ(kHalfAlpha, StringRenderer::ConvertFullwidthLatinToBasicLatin(kFullAlpha));
 
  const std::string kHalfDigit = "0123";
  const std::string kFullDigit = "０１２３";
  EXPECT_EQ(kHalfDigit, StringRenderer::ConvertFullwidthLatinToBasicLatin(kFullDigit));
 
  const std::string kHalfSym = "()[]:;!?";
  const std::string kFullSym = "（）［］：；！？";
  EXPECT_EQ(kHalfSym, StringRenderer::ConvertFullwidthLatinToBasicLatin(kFullSym));
}

◆ TEST() [7/88]

tesseract::TEST	(	ConvertFullwidthLatinToBasicLatinTest	,
		DoesNotConvertBasicLatin
	)

Definition at line 506 of file stringrenderer_test.cc.

                                                                      {
  const std::string kHalfAlpha = "ABCD";
  EXPECT_EQ(kHalfAlpha, StringRenderer::ConvertFullwidthLatinToBasicLatin(kHalfAlpha));
 
  const std::string kHalfDigit = "0123";
  EXPECT_EQ(kHalfDigit, StringRenderer::ConvertFullwidthLatinToBasicLatin(kHalfDigit));
 
  const std::string kHalfSym = "()[]:;!?";
  EXPECT_EQ(kHalfSym, StringRenderer::ConvertFullwidthLatinToBasicLatin(kHalfSym));
}

◆ TEST() [8/88]

tesseract::TEST	(	ConvertFullwidthLatinToBasicLatinTest	,
		DoesNotConvertNonLatin
	)

Definition at line 517 of file stringrenderer_test.cc.

                                                                    {
  const std::string kHalfKana = "ｱｲｳｴｵ";
  const std::string kFullKana = "アイウエオ";
  EXPECT_EQ(kHalfKana, StringRenderer::ConvertFullwidthLatinToBasicLatin(kHalfKana));
  EXPECT_EQ(kFullKana, StringRenderer::ConvertFullwidthLatinToBasicLatin(kFullKana));
}

◆ TEST() [9/88]

tesseract::TEST	(	ConvertFullwidthLatinToBasicLatinTest	,
		DoesNotConvertSpace
	)

Definition at line 524 of file stringrenderer_test.cc.

                                                                 {
  const std::string kHalfSpace = " ";
  const std::string kFullSpace = "　";
  EXPECT_EQ(kHalfSpace, StringRenderer::ConvertFullwidthLatinToBasicLatin(kHalfSpace));
  EXPECT_EQ(kFullSpace, StringRenderer::ConvertFullwidthLatinToBasicLatin(kFullSpace));
}

◆ TEST() [10/88]

tesseract::TEST	(	FileTest	,
		JoinPath
	)

Definition at line 20 of file fileio_test.cc.

                         {
  EXPECT_EQ("/abc/def", File::JoinPath("/abc", "def"));
  EXPECT_EQ("/abc/def", File::JoinPath("/abc/", "def"));
  EXPECT_EQ("def", File::JoinPath("", "def"));
}

◆ TEST() [11/88]

tesseract::TEST	(	InputBufferTest	,
		Read
	)

Definition at line 45 of file fileio_test.cc.

                            {
  const int kMaxBufSize = 128;
  char buffer[kMaxBufSize];
  auto s = "Hello\n world!";
  strncpy(buffer, s, kMaxBufSize);
  EXPECT_STREQ(s, buffer);
  FILE *fp = tmpfile();
  CHECK(fp != nullptr);
  fwrite(buffer, strlen(s), 1, fp);
  rewind(fp);
 
  std::string str;
  auto input = std::make_unique<InputBuffer>(fp);
  EXPECT_TRUE(input->Read(&str));
  std::vector<std::string> lines = split(str, '\n');
  EXPECT_EQ(2, lines.size());
  EXPECT_EQ("Hello", lines[0]);
  EXPECT_EQ(" world!", lines[1]);
}

◆ TEST() [12/88]

tesseract::TEST	(	LangModelTest	,
		AddACharacter
	)

Definition at line 32 of file lang_model_test.cc.

                                   {
  constexpr char kTestString[] = "Simple ASCII string to encode !@#$%&";
  constexpr char kTestStringRupees[] = "ASCII string with Rupee symbol ₹";
  // Setup the arguments.
  std::string script_dir = LANGDATA_DIR;
  std::string eng_dir = file::JoinPath(script_dir, "eng");
  std::string unicharset_path = TestDataNameToPath("eng_beam.unicharset");
  UNICHARSET unicharset;
  EXPECT_TRUE(unicharset.load_from_file(unicharset_path.c_str()));
  std::string version_str = "TestVersion";
  file::MakeTmpdir();
  std::string output_dir = FLAGS_test_tmpdir;
  LOG(INFO) << "Output dir=" << output_dir << "\n";
  std::string lang1 = "eng";
  bool pass_through_recoder = false;
  // If these reads fail, we get a warning message and an empty list of words.
  std::vector<std::string> words = split(ReadFile(file::JoinPath(eng_dir, "eng.wordlist")), '\n');
  EXPECT_GT(words.size(), 0);
  std::vector<std::string> puncs = split(ReadFile(file::JoinPath(eng_dir, "eng.punc")), '\n');
  EXPECT_GT(puncs.size(), 0);
  std::vector<std::string> numbers = split(ReadFile(file::JoinPath(eng_dir, "eng.numbers")), '\n');
  EXPECT_GT(numbers.size(), 0);
  bool lang_is_rtl = false;
  // Generate the traineddata file.
  EXPECT_EQ(0, CombineLangModel(unicharset, script_dir, version_str, output_dir, lang1,
                                pass_through_recoder, words, puncs, numbers, lang_is_rtl, nullptr,
                                nullptr));
  // Init a trainer with it, and encode kTestString.
  std::string traineddata1 = file::JoinPath(output_dir, lang1, lang1) + ".traineddata";
  LSTMTrainer trainer1;
  trainer1.InitCharSet(traineddata1);
  std::vector<int> labels1;
  EXPECT_TRUE(trainer1.EncodeString(kTestString, &labels1));
  std::string test1_decoded = trainer1.DecodeLabels(labels1);
  std::string test1_str(&test1_decoded[0], test1_decoded.length());
  LOG(INFO) << "Labels1=" << test1_str << "\n";
 
  // Add a new character to the unicharset and try again.
  int size_before = unicharset.size();
  unicharset.unichar_insert("₹");
  SetupBasicProperties(/*report_errors*/ true, /*decompose (NFD)*/ false, &unicharset);
  EXPECT_EQ(size_before + 1, unicharset.size());
  // Generate the traineddata file.
  std::string lang2 = "extended";
  EXPECT_EQ(EXIT_SUCCESS, CombineLangModel(unicharset, script_dir, version_str, output_dir, lang2,
                                           pass_through_recoder, words, puncs, numbers, lang_is_rtl,
                                           nullptr, nullptr));
  // Init a trainer with it, and encode kTestString.
  std::string traineddata2 = file::JoinPath(output_dir, lang2, lang2) + ".traineddata";
  LSTMTrainer trainer2;
  trainer2.InitCharSet(traineddata2);
  std::vector<int> labels2;
  EXPECT_TRUE(trainer2.EncodeString(kTestString, &labels2));
  std::string test2_decoded = trainer2.DecodeLabels(labels2);
  std::string test2_str(&test2_decoded[0], test2_decoded.length());
  LOG(INFO) << "Labels2=" << test2_str << "\n";
  // encode kTestStringRupees.
  std::vector<int> labels3;
  EXPECT_TRUE(trainer2.EncodeString(kTestStringRupees, &labels3));
  std::string test3_decoded = trainer2.DecodeLabels(labels3);
  std::string test3_str(&test3_decoded[0], test3_decoded.length());
  LOG(INFO) << "labels3=" << test3_str << "\n";
  // Copy labels1 to a std::vector, renumbering the null char to match trainer2.
  // Since Tensor Flow's CTC implementation insists on having the null be the
  // last label, and we want to be compatible, null has to be renumbered when
  // we add a class.
  int null1 = trainer1.null_char();
  int null2 = trainer2.null_char();
  EXPECT_EQ(null1 + 1, null2);
  std::vector<int> labels1_v(labels1.size());
  for (unsigned i = 0; i < labels1.size(); ++i) {
    if (labels1[i] == null1) {
      labels1_v[i] = null2;
    } else {
      labels1_v[i] = labels1[i];
    }
  }
  EXPECT_THAT(labels1_v, testing::ElementsAreArray(&labels2[0], labels2.size()));
  // To make sure we we are not cheating somehow, we can now encode the Rupee
  // symbol, which we could not do before.
  EXPECT_FALSE(trainer1.EncodeString(kTestStringRupees, &labels1));
  EXPECT_TRUE(trainer2.EncodeString(kTestStringRupees, &labels2));
}

◆ TEST() [13/88]

tesseract::TEST	(	LangModelTest	,
		AddACharacterHindi
	)

Definition at line 117 of file lang_model_test.cc.

                                        {
  constexpr char kTestString[] = "हिन्दी में एक लाइन लिखें";
  constexpr char kTestStringRupees[] = "हिंदी में रूपये का चिन्ह प्रयोग करें ₹१००.००";
  // Setup the arguments.
  std::string script_dir = LANGDATA_DIR;
  std::string hin_dir = file::JoinPath(script_dir, "hin");
  std::string unicharset_path = TestDataNameToPath("hin_beam.unicharset");
  UNICHARSET unicharset;
  EXPECT_TRUE(unicharset.load_from_file(unicharset_path.c_str()));
  std::string version_str = "TestVersion";
  file::MakeTmpdir();
  std::string output_dir = FLAGS_test_tmpdir;
  LOG(INFO) << "Output dir=" << output_dir << "\n";
  std::string lang1 = "hin";
  bool pass_through_recoder = false;
  // If these reads fail, we get a warning message and an empty list of words.
  std::vector<std::string> words = split(ReadFile(file::JoinPath(hin_dir, "hin.wordlist")), '\n');
  EXPECT_GT(words.size(), 0);
  std::vector<std::string> puncs = split(ReadFile(file::JoinPath(hin_dir, "hin.punc")), '\n');
  EXPECT_GT(puncs.size(), 0);
  std::vector<std::string> numbers = split(ReadFile(file::JoinPath(hin_dir, "hin.numbers")), '\n');
  EXPECT_GT(numbers.size(), 0);
  bool lang_is_rtl = false;
  // Generate the traineddata file.
  EXPECT_EQ(0, CombineLangModel(unicharset, script_dir, version_str, output_dir, lang1,
                                pass_through_recoder, words, puncs, numbers, lang_is_rtl, nullptr,
                                nullptr));
  // Init a trainer with it, and encode kTestString.
  std::string traineddata1 = file::JoinPath(output_dir, lang1, lang1) + ".traineddata";
  LSTMTrainer trainer1;
  trainer1.InitCharSet(traineddata1);
  std::vector<int> labels1;
  EXPECT_TRUE(trainer1.EncodeString(kTestString, &labels1));
  std::string test1_decoded = trainer1.DecodeLabels(labels1);
  std::string test1_str(&test1_decoded[0], test1_decoded.length());
  LOG(INFO) << "Labels1=" << test1_str << "\n";
 
  // Add a new character to the unicharset and try again.
  int size_before = unicharset.size();
  unicharset.unichar_insert("₹");
  SetupBasicProperties(/*report_errors*/ true, /*decompose (NFD)*/ false, &unicharset);
  EXPECT_EQ(size_before + 1, unicharset.size());
  // Generate the traineddata file.
  std::string lang2 = "extendedhin";
  EXPECT_EQ(EXIT_SUCCESS, CombineLangModel(unicharset, script_dir, version_str, output_dir, lang2,
                                           pass_through_recoder, words, puncs, numbers, lang_is_rtl,
                                           nullptr, nullptr));
  // Init a trainer with it, and encode kTestString.
  std::string traineddata2 = file::JoinPath(output_dir, lang2, lang2) + ".traineddata";
  LSTMTrainer trainer2;
  trainer2.InitCharSet(traineddata2);
  std::vector<int> labels2;
  EXPECT_TRUE(trainer2.EncodeString(kTestString, &labels2));
  std::string test2_decoded = trainer2.DecodeLabels(labels2);
  std::string test2_str(&test2_decoded[0], test2_decoded.length());
  LOG(INFO) << "Labels2=" << test2_str << "\n";
  // encode kTestStringRupees.
  std::vector<int> labels3;
  EXPECT_TRUE(trainer2.EncodeString(kTestStringRupees, &labels3));
  std::string test3_decoded = trainer2.DecodeLabels(labels3);
  std::string test3_str(&test3_decoded[0], test3_decoded.length());
  LOG(INFO) << "labels3=" << test3_str << "\n";
  // Copy labels1 to a std::vector, renumbering the null char to match trainer2.
  // Since Tensor Flow's CTC implementation insists on having the null be the
  // last label, and we want to be compatible, null has to be renumbered when
  // we add a class.
  int null1 = trainer1.null_char();
  int null2 = trainer2.null_char();
  EXPECT_EQ(null1 + 1, null2);
  std::vector<int> labels1_v(labels1.size());
  for (unsigned i = 0; i < labels1.size(); ++i) {
    if (labels1[i] == null1) {
      labels1_v[i] = null2;
    } else {
      labels1_v[i] = labels1[i];
    }
  }
  EXPECT_THAT(labels1_v, testing::ElementsAreArray(&labels2[0], labels2.size()));
  // To make sure we we are not cheating somehow, we can now encode the Rupee
  // symbol, which we could not do before.
  EXPECT_FALSE(trainer1.EncodeString(kTestStringRupees, &labels1));
  EXPECT_TRUE(trainer2.EncodeString(kTestStringRupees, &labels2));
}

◆ TEST() [14/88]

tesseract::TEST	(	NormstrngsTest	,
		AllScriptsRegtest
	)

Definition at line 183 of file normstrngs_test.cc.

                                        {
  // Tests some valid text in a large number of scripts, some of which were
  // found to be rejected by an earlier version.
  const std::vector<std::pair<std::string, std::string>> kScriptText(
      {{"Arabic",
        " فكان منهم علقمة بن قيس ، و إبراهيم النخعي ، و الأسود بن"
        "توفي بالمدينة في هذه السنة وهو ابن مائة وعشرين سنة "
        "مجموعه هیچ اثری در فنون هنر و ادب و ترجمه، تقدیم پیشگاه ارجمند "
        "سازنده تاریخ نگاه میکرد و به اصطلاح انسان و فطرت انسانی را زیربنای"},
       {"Armenian",
        "անտիկ աշխարհի փիլիսոփաների կենսագրությունը, թե′ նրանց ուս-"
        "պատրաստւում է դալ (բուլամա): Կովկասում կաթից նաև պատ-"
        "Հոգաբարձութեան յղել այդ անձին յիմարութիւնը հաստա-"
        "գծերը եւ միջագծերը կը համրուին վարէն վեր:"},
       {"Bengali",
        "এসে দাঁড়ায় দাও খানি উঁচিয়ে নিয়ে । ঝরনার স্বচ্ছ জলে প্রতিবিম্বিত "
        "পাঠিয়ে, গোবিন্দ স্মরণ করে, নির্ভয়ে রওনা হয়েছিল। তাতে সে "
        "সুলতার। মনে পড়ে বিয়ের সময় বাবা এদের বাড়ি থেকে ঘুরে "
        "কিন্তু তারপর মাতৃহৃদয় কেমন করে আছে? কী"},
       {"Cyrillic",
        "достей, є ще нагороди й почесті, є хай і сумнівна, але слава, "
        "вып., 96б). Параўн. найсвятший у 1 знач., насвятейший у 1 знач., "
        "»Правді«, — гітлерівські окупанти винищували нижчі раси, після дру- "
        "І знов майдан зачорнів од народу. Всередині чоло-"},
       {"Devanagari",
        "डा॰ नै हात्तीमाथि चढेर त्यो भएनेर आइपुगे। राजालाई देखी "
        "बाबतीत लिहिणे ही  एक मोठीच जबाबदारी आहे. काकासाहेबांच्या कार्याचा "
        "प्रबंध, आधोगिक प्रबंध तथा बैंकिंग  एवम वाणिज्य आदि विषयों में "
        "चित्रकृती दिल्या. शंभराहून अधिक देश आज आपापले चित्रपट निर्माण करीत"},
       {"Greek",
        "Μέσα ένα τετράδιο είχα στριμώξει το πρώτο "
        "νον αξίως τού ευαγγελίου τού χριστού πολιτεύεσθε, ίνα "
        "οὐδεμία ὑπ' αὐτοῦ μνεία γίνεται τῶν οἰκείων χωρίων. "
        "είτα την φάσιν αυτήν ην ούτος εποιήσατο κατά του Μίκω-"},
       {"Gujarati",
        "ઉપહારગૃહે ને નાટ્યસ્થળે આ એ જ તેલ કડકડતું "
        "શકી. ભાવવધારો અટકાવી નથી શકી અને બેકારીને "
        "ત્યાં વાંકુથી પાછે  આવ્યો, ચોરીનો માલ સોંપવા ! "
        "કહી. એણે રેશમના કપડામાં વીંટી રાખેલ કુંવરીની છબી"},
       {"Gurmukhi",
        "ਯਾਦ ਰਹੇ ਕਿ ‘ਨਫਰਤ ’ ਦਾ ਵਿਸ਼ਾ ਕ੍ਰਾਤੀ ਨਹੀ ਹੈ ਅਤੇ ਕਵੀ ਦੀ ਇਹ "
        "ਮਹਾਂ ਨੰਦਾ ਕੋਲ ਇਕ ਚੀਜ਼ ਸੀ ਉਹ ਸੀ ਸਚ, ਕੋਰਾ ਸਚ, ਬੇਧਤ੍ਰਕ ਕਹਿੳ "
        "ਭੂਰਾ  ਸਾਨੂੰ  ਥੜਾ  ਚੰਗਾ  ਲਗਦਾ  ਸੀ ।  ਉਸ  ਦਾ  ਇਕ  ਪੈਰ  ਜਨਮ ਤੋ "
        "ਨੂੰ ਇਹ ਅਧਿਕਾਰ ਦਿੱਤਾ ਕਿ ਉਹ ਸਿੱਖ ਵਿਰੋਧ ਦਾ ਸੰਗਠਨ ਕਰੇ ਅਤੇ 3 ਸਤੰਬਰ,"},
       {"Hangul",
        "로 들어갔다. 이대통령은 아이젠하워 대통령의 뒷모습을 보면서 "
        "그것뿐인 줄 아요? 노름도 했다 캅니다. 빌어묵을 놈이 그러 "
        "의 가장 과학적 태도이며, 우리 역사를 가장 정확하게 학습할 수 있는 "
        "마르크스 레"
        "각하는 그는 그들의 식사보장을 위해 때때로 집에"},
       {"HanS",
        "大凡世界上的先生可 分 三 种： 第一种只会教书， 只会拿一 "
        "书像是探宝一样，在茶叶店里我买过西湖龙井﹑黄山毛峰﹑福建的铁观音﹑大红"
        " "
        "持 “左” 倾冒险主义的干部，便扣上 “富农 "
        "笑说：“我听说了，王总工程师也跟我说过了，只是工作忙，谁"},
       {"HanT",
        "叁、 銀行資產管理的群組分析模式 "
        "民國六十三年，申請就讀台灣大學歷史研究所，並從事著述，"
        "質言之﹐在社會結構中﹐性質﹑特徵﹑地位相類似的一羣人﹐由於 "
        "董橋，一九四二年生，福建晉江人，國立成功大學外"},
       {"Hebrew",
        " אֵ-לִי, אֵ-לִי, כֵּיַצד מְטַפְּסִים בְּקִירוֹת שֶׁל זְכוּכִי"
        " הראשון חוצה אותי שוב. אני בסיבוב הרביעי, הוא בטח מתחיל את"
        " ווערטער  געהאט,  אבער  דער  עיקר  איז  ניט  דאָס  וואָרט,  נאָר"
        " על גחלת היהדות המקורית בעירך, נתת צביון ואופי מיוחד"},
       {"Japanese",
        "は異民族とみなされていた。楚の荘王（前613〜前 "
        "を詳細に吟味する。実際の治療活動の領域は便宜上、(1)　障害者 "
        "困難性は多角企業の場合原則として部門別に判断されている.). "
        "☆ご希望の団体には見本をお送りします"},
       {"Kannada",
        "ಕೂಡ ಯುದ್ಧ ಮಾಡಿ ಜಯಪಡೆ. ನಂತರ ನಗರದೊಳಕ್ಕೆ ನಡೆ ಇದನ್ನು "
        "ಅಸಹ್ಯದೃಶ್ಯ ಯಾರಿಗಾದರೂ ನಾಚಿಕೆತರುವಂತಹದಾಗಿದೆ. ಆರೋಗ್ಯ ದೃಷ್ಟಿ "
        "ಯಾಗಲಿ, ಮೋಹನನಾಗಲಿ ಇಂಥ ಬಿಸಿಲಿನಲ್ಲಿ ಎಂದೂ ಬಹಳ ಹೊತ್ತು "
        "\"ಇದೆ...ಖಂಡಿತಾ ಇದೆ\" ಅಂದ ಮನಸ್ಸಿನಲ್ಲಿಯೇ ವಂದಿಸುತ್ತಾ,"},
       {"Khmer",
        "សិតសក់និងផ្លាស់សម្លៀកបំពាក់ពេលយប់ចេញ។ "
        "និយាយអំពីនគរនេះ ប្រាប់ដល់លោកទាំងមូលឲ្យដឹងច្បាស់លាស់អំពី "
        "កន្លះកាថាសម្រាប់ទន្ទេញឲ្យងាយចាំ បោះពុម្ពនៅក្នុងទ្រង់ទ្រាយបច្ចុប្បន្ន "
        "ឯកសារនេះបានផ្សព្វផ្សាយនៅក្នុងសន្និសីទ"},
       {"Lao",
        "ເອີຍ ! ຟັງສຽງຟ້າມັນຮ້ອງຮ່ວນ ມັນດັງໄກໆ ເອີຍ "
        "ໄດລຽງດູລາວມາດວບຄວາມລາບາກຫລາຍ; "
        "ບາງໄດ້ ເຈົ້າລອງສູ້ບໍ່ໄດ້ຈຶ່ງຫນີລົງມາວຽງຈັນ. "
        "ລົບອອກຈາກ 3 ເຫລືອ 1, ຂ້ອຍຂຽນ 1 (1)"},
       {"Latin",
        "režisoru, palīdzēja to manu domīgo, kluso Dzejas metru ielikt "
        "Ešte nedávno sa chcel mladý Novomeský „liečiť” "
        "tiivisia kysymyksiä, mistä seuraa, että spekula-   |   don luonteesta "
        "Grabiel Sanchez, yang bertani selama 120 tahun meninggal"},
       {"Malayalam",
        "അമൂർത്തചിത്രമായിരിക്കും.  ഛേ! ആ വീട്ടിലേക്ക്  അവളൊന്നിച്ച്  പോകേണ്ടതാ "
        "മൃഗങ്ങൾക്ക് എന്തെക്കിലും പറ്റിയാൽ മാത്രം ഞാനതു "
        "വെലക്ക് വേണമെങ്കിൽ തരാം. എന്തോ തരും?  പറ. "
        "എല്ലാം കഴിഞ്ഞ് സീനിയറിന്റെ അടുത്തു ചെന്ന് കാൽതൊട്ട"},
       {"Tamil",
        "பொருத்தமாகப் பாடினாள் நம் ஔவைப் பாட்டி. காவிரி "
        "உள்ளடக்கி  நிற்பது  விநோத  வார்த்தையின் அஃறிணை "
        "சூரிய   கிரஹண   சமயத்தில்   குருக்ஷேத்திரம்   செல்வது "
        "காலங்களில் வெளியே போகும்பொழுது, 'ஸார்', 'ஸார்',"},
       {"Telugu",
        "1892లో ఆమె 10వ సంవత్సరంలో గుంటూరు తాలూకా వేములాపాడు "
        "ఫండ్స్ చట్టము'నందు చేయబడెను. తరువాత క్రీ. శ. "
        "సంచారము చేయును.  మీరు ఇప్పుడే కాళకాలయమునకు "
        "ఎంతటి  సరళమైన  భాషలో  వ్రాశాడో  విశదమవుతుంది.   పైగా  ఆనాటి   భాష"},
       {"Thai",
        "อ้อ! กับนัง....แม่ยอดพระกลิ่น นั่นเอง ! หรับก็ย่อมจะรู้โดยชัดเจนว่า "
        "ถ้าตราบใดยังมีเรือปืนอยู่ใกล้ ๆ แล้ว  ตราบนั้น "
        "พระดำรินี้ ที่มีคตีทำกรวยหมากและธูปเทียน "
        "อันยานมีเรือเปนต้นฃ้ามยาก ฯ เพราะว่าแม่น้ำนั่นมีน้ำใสยิ่ง แม้เพียง"},
       {"Vietnamese",
        "vợ đến tai mụ hung thần Xăng-tô- mê-a. Mụ vô cùng "
        "chiếc xe con gấu chạy qua nhà. Nhưng thỉnh thoảng "
        "hòa hoãn với người Pháp để cho họ được dựng một ngôi nhà thờ nhỏ bằng "
        "Cặp câu đói súc tích mà sâu sắc, là lời chúc lời"}});
 
  for (const auto &p : kScriptText) {
    std::string normalized;
    EXPECT_TRUE(tesseract::NormalizeUTF8String(
        tesseract::UnicodeNormMode::kNFKC, tesseract::OCRNorm::kNormalize,
        tesseract::GraphemeNorm::kNormalize, p.second.c_str(), &normalized))
        << "Script=" << p.first << " text=" << p.second;
  }
}

◆ TEST() [15/88]

tesseract::TEST	(	NormstrngsTest	,
		BasicText
	)

Definition at line 31 of file normstrngs_test.cc.

                                {
  const char *kBasicText = "AbCd Ef";
  std::string result;
  EXPECT_TRUE(NormalizeUTF8String(UnicodeNormMode::kNFKC, OCRNorm::kNormalize,
                                  GraphemeNorm::kNormalize, kBasicText, &result));
  EXPECT_STREQ(kBasicText, result.c_str());
}

◆ TEST() [16/88]

tesseract::TEST	(	NormstrngsTest	,
		DandaOK
	)

Definition at line 176 of file normstrngs_test.cc.

                              {
  std::string str = "\u0964"; // Single danda.
  ExpectGraphemeModeResults(str, UnicodeNormMode::kNFC, 1, 1, 1, str);
  str = "\u0965"; // Double danda.
  ExpectGraphemeModeResults(str, UnicodeNormMode::kNFC, 1, 1, 1, str);
}

◆ TEST() [17/88]

tesseract::TEST	(	NormstrngsTest	,
		DetectsCorrectText
	)

Definition at line 89 of file normstrngs_test.cc.

                                         {
  std::string chars;
  EXPECT_TRUE(NormalizeUTF8String(UnicodeNormMode::kNFKC, OCRNorm::kNone, GraphemeNorm::kNormalize,
                                  kEngText, &chars));
  EXPECT_STREQ(kEngText, chars.c_str());
 
  EXPECT_TRUE(NormalizeUTF8String(UnicodeNormMode::kNFKC, OCRNorm::kNone, GraphemeNorm::kNormalize,
                                  kHinText, &chars))
      << "Incorrect text: '" << kHinText << "'";
  EXPECT_STREQ(kHinText, chars.c_str());
 
  EXPECT_TRUE(NormalizeUTF8String(UnicodeNormMode::kNFKC, OCRNorm::kNone, GraphemeNorm::kNormalize,
                                  kKorText, &chars));
  EXPECT_STREQ(kKorText, chars.c_str());
}

◆ TEST() [18/88]

tesseract::TEST	(	NormstrngsTest	,
		DetectsIncorrectText
	)

Definition at line 105 of file normstrngs_test.cc.

                                           {
  for (auto &kBadlyFormedHinWord : kBadlyFormedHinWords) {
    EXPECT_FALSE(NormalizeUTF8String(UnicodeNormMode::kNFKC, OCRNorm::kNone,
                                     GraphemeNorm::kNormalize, kBadlyFormedHinWord, nullptr))
        << kBadlyFormedHinWord;
  }
  for (auto &kBadlyFormedThaiWord : kBadlyFormedThaiWords) {
    EXPECT_FALSE(NormalizeUTF8String(UnicodeNormMode::kNFKC, OCRNorm::kNone,
                                     GraphemeNorm::kNormalize, kBadlyFormedThaiWord, nullptr))
        << kBadlyFormedThaiWord;
  }
}

◆ TEST() [19/88]

tesseract::TEST	(	NormstrngsTest	,
		DigitOK
	)

Definition at line 171 of file normstrngs_test.cc.

                              {
  std::string str = "\u0cea"; // Digit 4.
  ExpectGraphemeModeResults(str, UnicodeNormMode::kNFC, 1, 1, 1, str);
}

◆ TEST() [20/88]

tesseract::TEST	(	NormstrngsTest	,
		FullwidthToHalfwidth
	)

Definition at line 389 of file normstrngs_test.cc.

                                           {
  // U+FF21 -> U+0041 (Latin capital letter A)
  EXPECT_EQ('A', FullwidthToHalfwidth(0xFF21));
  // U+FF05 -> U+0025 (percent sign)
  EXPECT_EQ('%', FullwidthToHalfwidth(0xFF05));
  // U+FFE6 -> U+20A9 (won sign)
  EXPECT_EQ(0x20A9, FullwidthToHalfwidth(0xFFE6));
 
#if defined(MISSING_CODE) && defined(INCLUDE_TENSORFLOW)
  // Skipped because of missing UniLib::FullwidthToHalfwidth.
  const int32_t kMinUnicodeValue = 33;
  const int32_t kMaxUnicodeValue = 0x10FFFF;
  for (int32_t ch = kMinUnicodeValue; ch <= kMaxUnicodeValue; ++ch) {
    if (!IsValidCodepoint(ch))
      continue;
    char text[80];
    snprintf(text, sizeof(text), "Failed at U+%x", ch);
    SCOPED_TRACE(text);
    std::string str = EncodeAsUTF8(ch);
    const std::string expected_half_str =
        UniLib::FullwidthToHalfwidth(str.c_str(), str.length(), true);
    EXPECT_EQ(expected_half_str, EncodeAsUTF8(FullwidthToHalfwidth(ch)));
  }
#endif
}

◆ TEST() [21/88]

tesseract::TEST	(	NormstrngsTest	,
		IsInterchangeValid
	)

Definition at line 353 of file normstrngs_test.cc.

                                         {
#ifdef INCLUDE_TENSORFLOW
  const int32_t kMinUnicodeValue = 33;
  const int32_t kMaxUnicodeValue = 0x10FFFF;
  for (int32_t ch = kMinUnicodeValue; ch <= kMaxUnicodeValue; ++ch) {
    char text[80];
    snprintf(text, sizeof(text), "Failed at U+%x", ch);
    SCOPED_TRACE(text);
    EXPECT_EQ(UniLib::IsInterchangeValid(ch), IsInterchangeValid(ch));
  }
#else
  GTEST_SKIP();
#endif
}

◆ TEST() [22/88]

tesseract::TEST	(	NormstrngsTest	,
		IsInterchangeValid7BitAscii
	)

Definition at line 370 of file normstrngs_test.cc.

                                                  {
#if defined(MISSING_CODE) && defined(INCLUDE_TENSORFLOW)
  const int32_t kMinUnicodeValue = 33;
  const int32_t kMaxUnicodeValue = 0x10FFFF;
  for (int32_t ch = kMinUnicodeValue; ch <= kMaxUnicodeValue; ++ch) {
    char text[80];
    snprintf(text, sizeof(text), "Failed at U+%x", ch);
    SCOPED_TRACE(text);
    std::string str = EncodeAsUTF8(ch);
    EXPECT_EQ(UniLib::IsInterchangeValid7BitAscii(str), IsInterchangeValid7BitAscii(ch));
  }
#else
  // Skipped because of missing UniLib::IsInterchangeValid7BitAscii.
  GTEST_SKIP();
#endif
}

◆ TEST() [23/88]

tesseract::TEST	(	NormstrngsTest	,
		IsWhitespace
	)

Definition at line 309 of file normstrngs_test.cc.

                                   {
  // U+0020 is whitespace
  EXPECT_TRUE(IsWhitespace(' '));
  EXPECT_TRUE(IsWhitespace('\t'));
  EXPECT_TRUE(IsWhitespace('\r'));
  EXPECT_TRUE(IsWhitespace('\n'));
  // U+2000 through U+200A
  for (char32 ch = 0x2000; ch <= 0x200A; ++ch) {
    char text[80];
    snprintf(text, sizeof(text), "Failed at U+%x", ch);
    SCOPED_TRACE(text);
    EXPECT_TRUE(IsWhitespace(ch));
  }
  // U+3000 is whitespace
  EXPECT_TRUE(IsWhitespace(0x3000));
  // ZWNBSP is not considered a space.
  EXPECT_FALSE(IsWhitespace(0xFEFF));
}

◆ TEST() [24/88]

tesseract::TEST	(	NormstrngsTest	,
		JoinersStayInArabic
	)

Definition at line 165 of file normstrngs_test.cc.

                                          {
  std::string str = "\u0628\u200c\u0628\u200d\u0628";
  // Returns true, string untouched.
  ExpectGraphemeModeResults(str, UnicodeNormMode::kNFC, 5, 5, 2, str);
}

◆ TEST() [25/88]

tesseract::TEST	(	NormstrngsTest	,
		LigatureText
	)

Definition at line 39 of file normstrngs_test.cc.

                                   {
  const char *kTwoByteLigText = "ĳ"; // U+0133 (ĳ) -> ij
  std::string result;
  EXPECT_TRUE(NormalizeUTF8String(UnicodeNormMode::kNFKC, OCRNorm::kNormalize,
                                  GraphemeNorm::kNormalize, kTwoByteLigText, &result));
  EXPECT_STREQ("ij", result.c_str());
 
  const char *kThreeByteLigText = "ﬁnds"; // U+FB01 (ﬁ) -> fi
  EXPECT_TRUE(NormalizeUTF8String(UnicodeNormMode::kNFKC, OCRNorm::kNormalize,
                                  GraphemeNorm::kNormalize, kThreeByteLigText, &result));
  EXPECT_STREQ("finds", result.c_str());
}

◆ TEST() [26/88]

tesseract::TEST	(	NormstrngsTest	,
		NoLonelyJoiners
	)

Definition at line 127 of file normstrngs_test.cc.

                                      {
  std::string str = "x\u200d\u0d06\u0d34\u0d02";
  std::vector<std::string> glyphs;
  // Returns true, but the joiner is gone.
  EXPECT_TRUE(NormalizeCleanAndSegmentUTF8(UnicodeNormMode::kNFC, OCRNorm::kNone,
                                           GraphemeNormMode::kCombined, true, str.c_str(), &glyphs))
      << PrintString32WithUnicodes(str);
  EXPECT_EQ(glyphs.size(), 3);
  EXPECT_EQ(glyphs[0], std::string("x"));
  EXPECT_EQ(glyphs[1], std::string("\u0d06"));
  EXPECT_EQ(glyphs[2], std::string("\u0d34\u0d02"));
}

◆ TEST() [27/88]

tesseract::TEST	(	NormstrngsTest	,
		NoLonelyJoinersNonAlpha
	)

Definition at line 153 of file normstrngs_test.cc.

                                              {
  std::string str = "\u200d+\u200c\u200d";
  // Returns true, but the joiners are gone.
  ExpectGraphemeModeResults(str, UnicodeNormMode::kNFC, 1, 1, 1, std::string("+"));
  str = "\u200d\u200c\u200d";
  // Without the plus, the string is invalid.
  std::string result;
  EXPECT_FALSE(NormalizeUTF8String(UnicodeNormMode::kNFC, OCRNorm::kNone, GraphemeNorm::kNormalize,
                                   str.c_str(), &result))
      << PrintString32WithUnicodes(result);
}

◆ TEST() [28/88]

tesseract::TEST	(	NormstrngsTest	,
		NoLonelyJoinersPlus
	)

Definition at line 140 of file normstrngs_test.cc.

                                          {
  std::string str = "\u0d2a\u200d+\u0d2a\u0d4b";
  std::vector<std::string> glyphs;
  // Returns true, but the joiner is gone.
  EXPECT_TRUE(NormalizeCleanAndSegmentUTF8(UnicodeNormMode::kNFC, OCRNorm::kNone,
                                           GraphemeNormMode::kCombined, true, str.c_str(), &glyphs))
      << PrintString32WithUnicodes(str);
  EXPECT_EQ(glyphs.size(), 3);
  EXPECT_EQ(glyphs[0], std::string("\u0d2a"));
  EXPECT_EQ(glyphs[1], std::string("+"));
  EXPECT_EQ(glyphs[2], std::string("\u0d2a\u0d4b"));
}

◆ TEST() [29/88]

tesseract::TEST	(	NormstrngsTest	,
		NonIndicTextDoesntBreakIndicRules
	)

Definition at line 118 of file normstrngs_test.cc.

                                                        {
  std::string nonindic = "Here's some latin text.";
  std::string dest;
  EXPECT_TRUE(NormalizeUTF8String(UnicodeNormMode::kNFC, OCRNorm::kNone, GraphemeNorm::kNormalize,
                                  nonindic.c_str(), &dest))
      << PrintString32WithUnicodes(nonindic);
  EXPECT_EQ(dest, nonindic);
}

◆ TEST() [30/88]

tesseract::TEST	(	NormstrngsTest	,
		OcrSpecificNormalization
	)

Definition at line 52 of file normstrngs_test.cc.

                                               {
  const char *kSingleQuoteText = "‘Hi"; // U+2018 (‘) -> U+027 (')
  std::string result;
  EXPECT_TRUE(NormalizeUTF8String(UnicodeNormMode::kNFKC, OCRNorm::kNormalize,
                                  GraphemeNorm::kNormalize, kSingleQuoteText, &result));
  EXPECT_STREQ("'Hi", result.c_str());
 
  const char *kDoubleQuoteText = "“Hi"; // U+201C (“) -> U+022 (")
  EXPECT_TRUE(NormalizeUTF8String(UnicodeNormMode::kNFKC, OCRNorm::kNormalize,
                                  GraphemeNorm::kNormalize, kDoubleQuoteText, &result));
  EXPECT_STREQ("\"Hi", result.c_str());
 
  const char *kEmDash = "Hi—"; // U+2014 (—) -> U+02D (-)
  EXPECT_TRUE(NormalizeUTF8String(UnicodeNormMode::kNFKC, OCRNorm::kNormalize,
                                  GraphemeNorm::kNormalize, kEmDash, &result));
  EXPECT_STREQ("Hi-", result.c_str());
  // Without the ocr normalization, these changes are not made.
  EXPECT_TRUE(NormalizeUTF8String(UnicodeNormMode::kNFKC, OCRNorm::kNone, GraphemeNorm::kNormalize,
                                  kSingleQuoteText, &result));
  EXPECT_STREQ(kSingleQuoteText, result.c_str());
  EXPECT_TRUE(NormalizeUTF8String(UnicodeNormMode::kNFKC, OCRNorm::kNone, GraphemeNorm::kNormalize,
                                  kDoubleQuoteText, &result));
  EXPECT_STREQ(kDoubleQuoteText, result.c_str());
  EXPECT_TRUE(NormalizeUTF8String(UnicodeNormMode::kNFKC, OCRNorm::kNone, GraphemeNorm::kNormalize,
                                  kEmDash, &result));
  EXPECT_STREQ(kEmDash, result.c_str());
}

◆ TEST() [31/88]

tesseract::TEST	(	NormstrngsTest	,
		SpanUTF8NotWhitespace
	)

Definition at line 335 of file normstrngs_test.cc.

                                            {
  const char kHinText[] = "पिताने विवाह";
  const char kKorText[] = "이는 것으로 다시 넣을";
  const char kMixedText[] = "والفكر 123 والصراع abc";
 
  EXPECT_EQ(0, SpanUTF8NotWhitespace(""));
  EXPECT_EQ(0, SpanUTF8NotWhitespace(" abc"));
  EXPECT_EQ(0, SpanUTF8NotWhitespace("\rabc"));
  EXPECT_EQ(0, SpanUTF8NotWhitespace("\tabc"));
  EXPECT_EQ(0, SpanUTF8NotWhitespace("\nabc"));
  EXPECT_EQ(3, SpanUTF8NotWhitespace("abc def"));
  EXPECT_EQ(18, SpanUTF8NotWhitespace(kHinText));
  EXPECT_EQ(6, SpanUTF8NotWhitespace(kKorText));
  EXPECT_EQ(12, SpanUTF8NotWhitespace(kMixedText));
}

◆ TEST() [32/88]

tesseract::TEST	(	NormstrngsTest	,
		SpanUTF8Whitespace
	)

Definition at line 328 of file normstrngs_test.cc.

                                         {
  EXPECT_EQ(4, SpanUTF8Whitespace(" \t\r\n"));
  EXPECT_EQ(4, SpanUTF8Whitespace(" \t\r\nabc"));
  EXPECT_EQ(0, SpanUTF8Whitespace("abc \t\r\nabc"));
  EXPECT_EQ(0, SpanUTF8Whitespace(""));
}

◆ TEST() [33/88]

tesseract::TEST	(	OutputBufferTest	,
		WriteString
	)

Definition at line 26 of file fileio_test.cc.

                                    {
  const int kMaxBufSize = 128;
  char buffer[kMaxBufSize];
  for (char &i : buffer) {
    i = '\0';
  }
  FILE *fp = tmpfile();
  CHECK(fp != nullptr);
 
  auto output = std::make_unique<OutputBuffer>(fp);
  output->WriteString("Hello ");
  output->WriteString("world!");
 
  rewind(fp);
  auto s = "Hello world!";
  fread(buffer, strlen(s), 1, fp);
  EXPECT_STREQ(s, buffer);
}

◆ TEST() [34/88]

tesseract::TEST	(	ParagraphsTest	,
		IndexPageTest
	)

Definition at line 724 of file paragraphs_test.cc.

                                    {
  TestParagraphDetection(kNewZealandIndex, countof(kNewZealandIndex));
}

◆ TEST() [35/88]

tesseract::TEST	(	ParagraphsTest	,
		ListItemsIdentified
	)

Definition at line 206 of file paragraphs_test.cc.

                                          {
  EXPECT_TRUE(tesseract::AsciiLikelyListItem("iii"));
  EXPECT_TRUE(tesseract::AsciiLikelyListItem("A."));
  EXPECT_TRUE(tesseract::AsciiLikelyListItem("B."));
  EXPECT_TRUE(tesseract::AsciiLikelyListItem("C."));
  EXPECT_TRUE(tesseract::AsciiLikelyListItem("1."));
  EXPECT_TRUE(tesseract::AsciiLikelyListItem("2."));
  EXPECT_TRUE(tesseract::AsciiLikelyListItem("3."));
  EXPECT_TRUE(tesseract::AsciiLikelyListItem("1"));
  EXPECT_TRUE(tesseract::AsciiLikelyListItem("2"));
  EXPECT_TRUE(tesseract::AsciiLikelyListItem("3"));
  EXPECT_TRUE(tesseract::AsciiLikelyListItem("[[1]]"));
  EXPECT_TRUE(tesseract::AsciiLikelyListItem("A-1."));
  EXPECT_TRUE(tesseract::AsciiLikelyListItem("A-2"));
  EXPECT_TRUE(tesseract::AsciiLikelyListItem("(A)(i)"));
 
  EXPECT_FALSE(tesseract::AsciiLikelyListItem("The"));
  EXPECT_FALSE(tesseract::AsciiLikelyListItem("first"));
  EXPECT_FALSE(tesseract::AsciiLikelyListItem("house"));
  EXPECT_FALSE(tesseract::AsciiLikelyListItem("Oregonian."));
  EXPECT_FALSE(tesseract::AsciiLikelyListItem("on."));
}

◆ TEST() [36/88]

tesseract::TEST	(	ParagraphsTest	,
		NotDistractedBySourceCode
	)

Definition at line 603 of file paragraphs_test.cc.

                                                {
  TestParagraphDetection(kTextWithSourceCode, countof(kTextWithSourceCode));
}

◆ TEST() [37/88]

tesseract::TEST	(	ParagraphsTest	,
		NotOverlyAggressiveWithBlockQuotes
	)

Definition at line 687 of file paragraphs_test.cc.

                                                         {
  TestParagraphDetection(kOldManAndSea, countof(kOldManAndSea));
}

◆ TEST() [38/88]

tesseract::TEST	(	ParagraphsTest	,
		TestComplexPage1
	)

Definition at line 408 of file paragraphs_test.cc.

                                       {
  TestParagraphDetection(kComplexPage1, countof(kComplexPage1));
}

◆ TEST() [39/88]

tesseract::TEST	(	ParagraphsTest	,
		TestComplexPage2
	)

Definition at line 451 of file paragraphs_test.cc.

                                       {
  TestParagraphDetection(kComplexPage2, countof(kComplexPage2));
}

◆ TEST() [40/88]

tesseract::TEST	(	ParagraphsTest	,
		TestCrownParagraphDetection
	)

Definition at line 275 of file paragraphs_test.cc.

                                                  {
  TestParagraphDetection(kCrownedParagraph, countof(kCrownedParagraph));
}

◆ TEST() [41/88]

tesseract::TEST	(	ParagraphsTest	,
		TestFewCluesWithCrown
	)

Definition at line 258 of file paragraphs_test.cc.

                                            {
  TestParagraphDetection(kFewCluesWithCrown, countof(kFewCluesWithCrown));
}

◆ TEST() [42/88]

tesseract::TEST	(	ParagraphsTest	,
		TestRightAlignedParagraph
	)

Definition at line 342 of file paragraphs_test.cc.

                                                {
  TestParagraphDetection(kRightAligned, countof(kRightAligned));
}

◆ TEST() [43/88]

tesseract::TEST	(	ParagraphsTest	,
		TestSimpleParagraphDetection
	)

Definition at line 243 of file paragraphs_test.cc.

                                                   {
  TestParagraphDetection(kTwoSimpleParagraphs, countof(kTwoSimpleParagraphs));
}

◆ TEST() [44/88]

tesseract::TEST	(	ParagraphsTest	,
		TestSingleFullPageContinuation
	)

Definition at line 315 of file paragraphs_test.cc.

                                                     {
  const TextAndModel *correct = kSingleFullPageContinuation;
  int num_rows = countof(kSingleFullPageContinuation);
  std::vector<RowInfo> row_infos;
  std::vector<PARA *> row_owners;
  PARA_LIST paragraphs;
  std::vector<ParagraphModel *> models;
  models.push_back(new ParagraphModel(kLeft, 0, 20, 0, 10));
  MakeAsciiRowInfos(correct, num_rows, &row_infos);
  tesseract::DetectParagraphs(3, &row_infos, &row_owners, &paragraphs, &models);
  EvaluateParagraphDetection(correct, num_rows, row_owners);
  for (auto *model : models) {
    delete model;
  }
}

◆ TEST() [45/88]

tesseract::TEST	(	ParagraphsTest	,
		TestSplitsOutLeaderLines
	)

Definition at line 568 of file paragraphs_test.cc.

                                               {
  TestParagraphDetection(kTableOfContents, countof(kTableOfContents));
}

◆ TEST() [46/88]

tesseract::TEST	(	ParagraphsTest	,
		TestStrayLineInBlock
	)

Definition at line 470 of file paragraphs_test.cc.

                                           {
  TestParagraphDetection(kSubtleCrown, countof(kSubtleCrown));
}

◆ TEST() [47/88]

tesseract::TEST	(	ParagraphsTest	,
		TestSubtleCrown
	)

Definition at line 466 of file paragraphs_test.cc.

                                      {
  TestParagraphDetection(kSubtleCrown, countof(kSubtleCrown) - 1);
}

◆ TEST() [48/88]

tesseract::TEST	(	ParagraphsTest	,
		TestTinyParagraphs
	)

Definition at line 364 of file paragraphs_test.cc.

                                         {
  TestParagraphDetection(kTinyParagraphs, countof(kTinyParagraphs));
}

◆ TEST() [49/88]

tesseract::TEST	(	ParagraphsTest	,
		TestUnlvInsurance
	)

Definition at line 543 of file paragraphs_test.cc.

                                        {
  TestParagraphDetection(kUnlvRep3AO, countof(kUnlvRep3AO));
}

◆ TEST() [50/88]

tesseract::TEST	(	ParagraphsText	,
		TestRealFlushLeftParagraphs
	)

Definition at line 294 of file paragraphs_test.cc.

                                                  {
  TestParagraphDetection(kFlushLeftParagraphs, countof(kFlushLeftParagraphs));
}

◆ TEST() [51/88]

tesseract::TEST	(	QRSequenceGenerator	,
		GetBinaryReversedInteger
	)

Definition at line 30 of file qrsequence_test.cc.

                                                    {
  const int kRangeSize = 8;
  TestableQRSequenceGenerator generator(kRangeSize);
  int reversed_vals[kRangeSize] = {0, 4, 2, 6, 1, 5, 3, 7};
  for (int i = 0; i < kRangeSize; ++i) {
    EXPECT_EQ(reversed_vals[i], generator.GetBinaryReversedInteger(i));
  }
}

◆ TEST() [52/88]

tesseract::TEST	(	QuickTest	,
		ClassicProgressReporting
	)

Definition at line 148 of file progress_test.cc.

                                          {
  ClassicProgressTester(TESTING_DIR "/phototest.tif", TESSDATA_DIR "_fast", "eng");
}

◆ TEST() [53/88]

tesseract::TEST	(	QuickTest	,
		NewProgressReporting
	)

Definition at line 152 of file progress_test.cc.

                                      {
  NewProgressTester(TESTING_DIR "/phototest.tif", TESSDATA_DIR "_fast", "eng");
}

◆ TEST() [54/88]

tesseract::TEST	(	TesseractInstanceTest	,
		TestMultipleTessInstances
	)

Definition at line 313 of file baseapi_test.cc.

                                                       {
  int num_langs = 0;
  while (langs[num_langs] != nullptr) {
    ++num_langs;
  }
 
  const std::string kTessdataPath = TESSDATA_DIR;
 
  // Preload images and verify that OCR is correct on them individually.
  std::vector<Image > pix(num_langs);
  for (int i = 0; i < num_langs; ++i) {
    std::string tracestring = "Single instance test with lang = ";
    tracestring += langs[i];
    SCOPED_TRACE(tracestring);
    std::string path = file::JoinPath(TESTING_DIR, image_files[i]);
    pix[i] = pixRead(path.c_str());
    QCHECK(pix[i] != nullptr) << "Could not read " << path;
 
    tesseract::TessBaseAPI tess;
    EXPECT_EQ(0, tess.Init(kTessdataPath.c_str(), langs[i]));
    std::string ocr_result = GetCleanedTextResult(&tess, pix[i]);
    EXPECT_STREQ(gt_text[i], ocr_result.c_str());
  }
 
  // Process the images in all pairwise combinations of associated languages.
  std::string ocr_result[2];
  for (int i = 0; i < num_langs; ++i) {
    for (int j = i + 1; j < num_langs; ++j) {
      tesseract::TessBaseAPI tess1, tess2;
      tess1.Init(kTessdataPath.c_str(), langs[i]);
      tess2.Init(kTessdataPath.c_str(), langs[j]);
 
      ocr_result[0] = GetCleanedTextResult(&tess1, pix[i]);
      ocr_result[1] = GetCleanedTextResult(&tess2, pix[j]);
 
      EXPECT_FALSE(strcmp(gt_text[i], ocr_result[0].c_str()) ||
                   strcmp(gt_text[j], ocr_result[1].c_str()))
          << "OCR failed on language pair " << langs[i] << "-" << langs[j];
    }
  }
 
  for (int i = 0; i < num_langs; ++i) {
    pix[i].destroy();
  }
}

◆ TEST() [55/88]

tesseract::TEST	(	TesseractInstanceTest	,
		TestMultipleTessInstanceVariables
	)

Definition at line 360 of file baseapi_test.cc.

                                                               {
  std::string illegal_name = "an_illegal_name";
  std::string langs[2] = {"eng", "hin"};
  std::string int_param_name = "tessedit_pageseg_mode";
  int int_param[2] = {1, 2};
  std::string int_param_str[2] = {"1", "2"};
  std::string bool_param_name = "tessedit_ambigs_training";
  bool bool_param[2] = {false, true};
  std::string bool_param_str[2] = {"F", "T"};
  std::string str_param_name = "tessedit_char_blacklist";
  std::string str_param[2] = {"abc", "def"};
  std::string double_param_name = "segment_penalty_dict_frequent_word";
  std::string double_param_str[2] = {"0.01", "2"};
  double double_param[2] = {0.01, 2};
 
  const std::string kTessdataPath = TESSDATA_DIR;
 
  tesseract::TessBaseAPI tess1, tess2;
  for (int i = 0; i < 2; ++i) {
    tesseract::TessBaseAPI *api = (i == 0) ? &tess1 : &tess2;
    api->Init(kTessdataPath.c_str(), langs[i].c_str());
    api->SetVariable(illegal_name.c_str(), "none");
    api->SetVariable(int_param_name.c_str(), int_param_str[i].c_str());
    api->SetVariable(bool_param_name.c_str(), bool_param_str[i].c_str());
    api->SetVariable(str_param_name.c_str(), str_param[i].c_str());
    api->SetVariable(double_param_name.c_str(), double_param_str[i].c_str());
  }
  for (int i = 0; i < 2; ++i) {
    tesseract::TessBaseAPI *api = (i == 0) ? &tess1 : &tess2;
    EXPECT_FALSE(api->GetStringVariable(illegal_name.c_str()));
    int intvar;
    EXPECT_TRUE(api->GetIntVariable(int_param_name.c_str(), &intvar));
    EXPECT_EQ(int_param[i], intvar);
    bool boolvar;
    EXPECT_TRUE(api->GetBoolVariable(bool_param_name.c_str(), &boolvar));
    EXPECT_EQ(bool_param[i], boolvar);
    EXPECT_STREQ(str_param[i].c_str(), api->GetStringVariable(str_param_name.c_str()));
    double doublevar;
    EXPECT_TRUE(api->GetDoubleVariable(double_param_name.c_str(), &doublevar));
    EXPECT_EQ(double_param[i], doublevar);
  }
}

◆ TEST() [56/88]

tesseract::TEST	(	UnicharsetTest	,
		Basics
	)

Definition at line 29 of file unicharset_test.cc.

                             {
  // This test verifies basic insertion, unichar_to_id, and encode.
  UNICHARSET u;
  u.unichar_insert("a");
  EXPECT_EQ(u.size(), 4);
  u.unichar_insert("f");
  EXPECT_EQ(u.size(), 5);
  u.unichar_insert("i");
  EXPECT_EQ(u.size(), 6);
  // The fi ligature is NOT added because it can be encoded with a cleanup as f
  // then i.
  u.unichar_insert("\ufb01");
  EXPECT_EQ(u.size(), 6);
  u.unichar_insert("e");
  EXPECT_EQ(u.size(), 7);
  u.unichar_insert("n");
  EXPECT_EQ(u.size(), 8);
  EXPECT_EQ(u.unichar_to_id("f"), 4);
  EXPECT_EQ(u.unichar_to_id("i"), 5);
  // The fi ligature has no valid id.
  EXPECT_EQ(u.unichar_to_id("\ufb01"), INVALID_UNICHAR_ID);
  // The fi pair has no valid id.
  EXPECT_EQ(u.unichar_to_id("fi"), INVALID_UNICHAR_ID);
  std::vector<int> labels;
  EXPECT_TRUE(u.encode_string("affine", true, &labels, nullptr, nullptr));
  std::vector<int> v(&labels[0], &labels[0] + labels.size());
  EXPECT_THAT(v, ElementsAreArray({3, 4, 4, 5, 7, 6}));
  // With the fi ligature encoding fails without a pre-cleanup.
  std::string lig_str = "af\ufb01ne";
  EXPECT_FALSE(u.encode_string(lig_str.c_str(), true, &labels, nullptr, nullptr));
  lig_str = u.CleanupString(lig_str.c_str());
  EXPECT_TRUE(u.encode_string(lig_str.c_str(), true, &labels, nullptr, nullptr));
  v = std::vector<int>(&labels[0], &labels[0] + labels.size());
  EXPECT_THAT(v, ElementsAreArray({3, 4, 4, 5, 7, 6}));
}

◆ TEST() [57/88]

tesseract::TEST	(	UnicharsetTest	,
		Multibyte
	)

Definition at line 65 of file unicharset_test.cc.

                                {
  // This test verifies basic insertion, unichar_to_id, and encode.
  // The difference from Basic above is that now we are testing multi-byte
  // unicodes instead of single byte.
  UNICHARSET u;
  // Insert some Arabic letters.
  u.unichar_insert("\u0627");
  EXPECT_EQ(u.size(), 4);
  u.unichar_insert("\u062c");
  EXPECT_EQ(u.size(), 5);
  u.unichar_insert("\u062f");
  EXPECT_EQ(u.size(), 6);
  u.unichar_insert("\ufb01"); // fi ligature is added as fi pair.
  EXPECT_EQ(u.size(), 7);
  u.unichar_insert("\u062b");
  EXPECT_EQ(u.size(), 8);
  u.unichar_insert("\u0635");
  EXPECT_EQ(u.size(), 9);
  EXPECT_EQ(u.unichar_to_id("\u0627"), 3);
  EXPECT_EQ(u.unichar_to_id("\u062c"), 4);
  // The first two bytes of this string is \u0627, which matches id 3;
  EXPECT_EQ(u.unichar_to_id("\u0627\u062c", 2), 3);
  EXPECT_EQ(u.unichar_to_id("\u062f"), 5);
  // Individual f and i are not present, but they are there as a pair.
  EXPECT_EQ(u.unichar_to_id("f"), INVALID_UNICHAR_ID);
  EXPECT_EQ(u.unichar_to_id("i"), INVALID_UNICHAR_ID);
  EXPECT_EQ(u.unichar_to_id("fi"), 6);
  // The fi ligature is findable.
  EXPECT_EQ(u.unichar_to_id("\ufb01"), 6);
  std::vector<int> labels;
  EXPECT_TRUE(
      u.encode_string("\u0627\u062c\u062c\u062f\u0635\u062b", true, &labels, nullptr, nullptr));
  std::vector<int> v(&labels[0], &labels[0] + labels.size());
  EXPECT_THAT(v, ElementsAreArray({3, 4, 4, 5, 8, 7}));
  // With the fi ligature the fi is picked out.
  std::vector<char> lengths;
  unsigned encoded_length;
  std::string src_str = "\u0627\u062c\ufb01\u0635\u062b";
  // src_str has to be pre-cleaned for lengths to be correct.
  std::string cleaned = u.CleanupString(src_str.c_str());
  EXPECT_TRUE(u.encode_string(cleaned.c_str(), true, &labels, &lengths, &encoded_length));
  EXPECT_EQ(encoded_length, cleaned.size());
  std::string len_str(&lengths[0], lengths.size());
  EXPECT_STREQ(len_str.c_str(), "\002\002\002\002\002");
  v = std::vector<int>(&labels[0], &labels[0] + labels.size());
  EXPECT_THAT(v, ElementsAreArray({3, 4, 6, 8, 7}));
}

◆ TEST() [58/88]

tesseract::TEST	(	UnicharsetTest	,
		MultibyteBigrams
	)

Definition at line 113 of file unicharset_test.cc.

                                       {
  // This test verifies basic insertion, unichar_to_id, and encode.
  // The difference from Basic above is that now we are testing multi-byte
  // unicodes instead of single byte.
  UNICHARSET u;
  // Insert some Arabic letters.
  u.unichar_insert("\u0c9c");
  EXPECT_EQ(u.size(), 4);
  u.unichar_insert("\u0cad");
  EXPECT_EQ(u.size(), 5);
  u.unichar_insert("\u0ccd\u0c9c");
  EXPECT_EQ(u.size(), 6);
  u.unichar_insert("\u0ccd");
  EXPECT_EQ(u.size(), 7);
  // By default the encodable bigram is NOT added.
  u.unichar_insert("\u0ccd\u0cad");
  EXPECT_EQ(u.size(), 7);
  // It is added if we force it to be.
  u.unichar_insert("\u0ccd\u0cad", OldUncleanUnichars::kTrue);
  EXPECT_EQ(u.size(), 8);
  std::vector<char> data;
  tesseract::TFile fp;
  fp.OpenWrite(&data);
  u.save_to_file(&fp);
  fp.Open(&data[0], data.size());
  UNICHARSET v;
  v.load_from_file(&fp, false);
  EXPECT_EQ(v.unichar_to_id("\u0c9c"), 3);
  EXPECT_EQ(v.unichar_to_id("\u0cad"), 4);
  EXPECT_EQ(v.unichar_to_id("\u0ccd\u0c9c"), 5);
  EXPECT_EQ(v.unichar_to_id("\u0ccd"), 6);
  EXPECT_EQ(v.unichar_to_id("\u0ccd\u0cad"), 7);
}

◆ TEST() [59/88]

tesseract::TEST	(	UnicharsetTest	,
		OldStyle
	)

Definition at line 147 of file unicharset_test.cc.

                               {
  // This test verifies an old unicharset that contains fi/fl ligatures loads
  // and keeps all the entries.
  std::string filename = file::JoinPath(TESTDATA_DIR, "eng.unicharset");
  UNICHARSET u;
  LOG(INFO) << "Filename=" << filename;
  EXPECT_TRUE(u.load_from_file(filename.c_str()));
  EXPECT_EQ(u.size(), 111);
}

◆ TEST() [60/88]

tesseract::TEST	(	UnicharTest	,
		Conversion
	)

Definition at line 18 of file unichar_test.cc.

                              {
  // This test verifies that Unichar::UTF8ToUTF32 and Unichar::UTF32ToUTF8
  // show the required conversion properties.
  // Test for round-trip utf8-32-8 for 1, 2, 3 and 4 byte codes.
  const char *kUTF8Src = "a\u05d0\u0ca4\U0002a714";
  const std::vector<char32> kUTF32Src = {'a', 0x5d0, 0xca4, 0x2a714};
  // Check for round-trip conversion.
  std::vector<char32> utf32 = UNICHAR::UTF8ToUTF32(kUTF8Src);
  EXPECT_THAT(utf32, testing::ElementsAreArray(kUTF32Src));
  std::string utf8 = UNICHAR::UTF32ToUTF8(utf32);
  EXPECT_STREQ(kUTF8Src, utf8.c_str());
}

◆ TEST() [61/88]

tesseract::TEST	(	UnicharTest	,
		InvalidText
	)

Definition at line 31 of file unichar_test.cc.

                               {
  // This test verifies that Unichar correctly deals with invalid text.
  const char *kInvalidUTF8 = "a b\200d string";
  const std::vector<char32> kInvalidUTF32 = {'a', ' ', 0x200000, 'x'};
  // Invalid utf8 produces an empty vector.
  std::vector<char32> utf32 = UNICHAR::UTF8ToUTF32(kInvalidUTF8);
  EXPECT_TRUE(utf32.empty());
  // Invalid utf32 produces an empty string.
  std::string utf8 = UNICHAR::UTF32ToUTF8(kInvalidUTF32);
  EXPECT_TRUE(utf8.empty());
}

◆ TEST() [62/88]

tesseract::TEST	(	ValidateGraphemeTest	,
		ExplicitViramaNonJoiner
	)

Definition at line 121 of file validate_grapheme_test.cc.

                                                    {
  std::string str = "\u0d15\u0d4d\u200c\u0d24"; // KA Virama ZWNJ Ta
  std::vector<std::string> glyphs;
  EXPECT_TRUE(NormalizeCleanAndSegmentUTF8(UnicodeNormMode::kNFC, OCRNorm::kNone,
                                           GraphemeNormMode::kCombined, true, str.c_str(), &glyphs))
      << PrintString32WithUnicodes(str);
  EXPECT_EQ(glyphs.size(), 2);
  EXPECT_EQ(glyphs[1], std::string("\u0d24"));
  EXPECT_TRUE(NormalizeCleanAndSegmentUTF8(UnicodeNormMode::kNFC, OCRNorm::kNone,
                                           GraphemeNormMode::kGlyphSplit, true, str.c_str(),
                                           &glyphs))
      << PrintString32WithUnicodes(str);
  EXPECT_EQ(glyphs.size(), 3);
  EXPECT_EQ(glyphs[1], std::string("\u0d4d\u200c"));
}

◆ TEST() [63/88]

tesseract::TEST	(	ValidateGraphemeTest	,
		HalfFormJoiner
	)

Definition at line 67 of file validate_grapheme_test.cc.

                                           {
  std::string str = "\u0d15\u0d4d\u200d\u0d24"; // KA Virama ZWJ Ta
  std::vector<std::string> glyphs;
  EXPECT_TRUE(NormalizeCleanAndSegmentUTF8(UnicodeNormMode::kNFC, OCRNorm::kNone,
                                           GraphemeNormMode::kCombined, true, str.c_str(), &glyphs))
      << PrintString32WithUnicodes(str);
  EXPECT_EQ(glyphs.size(), 1);
  EXPECT_EQ(glyphs[0], str);
  EXPECT_TRUE(NormalizeCleanAndSegmentUTF8(UnicodeNormMode::kNFC, OCRNorm::kNone,
                                           GraphemeNormMode::kGlyphSplit, true, str.c_str(),
                                           &glyphs))
      << PrintString32WithUnicodes(str);
  EXPECT_EQ(glyphs.size(), 2) << PrintStringVectorWithUnicodes(glyphs);
  EXPECT_EQ(glyphs[0], std::string("\u0d15\u0d4d\u200d"));
}

◆ TEST() [64/88]

tesseract::TEST	(	ValidateGraphemeTest	,
		MultipleSyllablesAreNotASingleGrapheme
	)

Definition at line 18 of file validate_grapheme_test.cc.

                                                                   {
  std::string str = "\u0c15\u0c3f\u0c15\u0c0e"; // KA - dep I - KA - ind E.
  std::vector<std::string> glyphs;
  EXPECT_TRUE(NormalizeCleanAndSegmentUTF8(UnicodeNormMode::kNFC, OCRNorm::kNone,
                                           GraphemeNormMode::kCombined, true, str.c_str(), &glyphs))
      << PrintString32WithUnicodes(str);
  // It made 3 graphemes.
  EXPECT_EQ(glyphs.size(), 3);
  EXPECT_EQ(glyphs[0], std::string("\u0c15\u0c3f"));
  EXPECT_EQ(glyphs[1], std::string("\u0c15"));
  EXPECT_EQ(glyphs[2], std::string("\u0c0e"));
}

◆ TEST() [65/88]

tesseract::TEST	(	ValidateGraphemeTest	,
		NoLonelyJoinersQuote
	)

Definition at line 154 of file validate_grapheme_test.cc.

                                                 {
  std::string str = "'\u0d24\u0d23\u0d32\u0d4d'\u200d";
  std::vector<std::string> glyphs;
  // Returns true, but the joiner is gone.
  EXPECT_TRUE(NormalizeCleanAndSegmentUTF8(UnicodeNormMode::kNFC, OCRNorm::kNone,
                                           GraphemeNormMode::kCombined, true, str.c_str(), &glyphs))
      << PrintString32WithUnicodes(str);
  EXPECT_EQ(glyphs.size(), 5);
  EXPECT_EQ(glyphs[0], std::string("'"));
  EXPECT_EQ(glyphs[1], std::string("\u0d24"));
  EXPECT_EQ(glyphs[2], std::string("\u0d23"));
  EXPECT_EQ(glyphs[3], std::string("\u0d32\u0d4d\u200c"));
  EXPECT_EQ(glyphs[4], std::string("'"));
}

◆ TEST() [66/88]

tesseract::TEST	(	ValidateGraphemeTest	,
		OpenConjunctNonJoiner
	)

Definition at line 99 of file validate_grapheme_test.cc.

                                                  {
  std::string str = "\u0d15\u200c\u0d4d\u0d24"; // KA ZWNJ Virama Ta
  std::vector<std::string> glyphs;
  EXPECT_TRUE(NormalizeCleanAndSegmentUTF8(UnicodeNormMode::kNFC, OCRNorm::kNone,
                                           GraphemeNormMode::kCombined, true, str.c_str(), &glyphs))
      << PrintString32WithUnicodes(str);
  EXPECT_EQ(glyphs.size(), 1);
  EXPECT_EQ(glyphs[0], str);
  EXPECT_TRUE(NormalizeCleanAndSegmentUTF8(UnicodeNormMode::kNFC, OCRNorm::kNone,
                                           GraphemeNormMode::kGlyphSplit, true, str.c_str(),
                                           &glyphs))
      << PrintString32WithUnicodes(str);
  EXPECT_EQ(glyphs.size(), 3);
  EXPECT_EQ(glyphs[1], std::string("\u200c\u0d4d"));
  // Malaylam only, so not allowed in Telugu.
  str = "\u0c15\u200c\u0c4d\u0c24"; // KA ZWNJ Virama Ta
  EXPECT_FALSE(NormalizeCleanAndSegmentUTF8(UnicodeNormMode::kNFC, OCRNorm::kNone,
                                            GraphemeNormMode::kCombined, true, str.c_str(),
                                            &glyphs))
      << PrintString32WithUnicodes(str);
}

◆ TEST() [67/88]

tesseract::TEST	(	ValidateGraphemeTest	,
		SimpleCV
	)

Definition at line 41 of file validate_grapheme_test.cc.

                                     {
  std::string str = "\u0cb9\u0cbf"; // HA I
  std::vector<std::string> glyphs;
  EXPECT_TRUE(NormalizeCleanAndSegmentUTF8(UnicodeNormMode::kNFC, OCRNorm::kNone,
                                           GraphemeNormMode::kCombined, true, str.c_str(), &glyphs))
      << PrintString32WithUnicodes(str);
  EXPECT_EQ(glyphs.size(), 1);
  EXPECT_EQ(glyphs[0], str);
}

◆ TEST() [68/88]

tesseract::TEST	(	ValidateGraphemeTest	,
		SingleConsonantOK
	)

Definition at line 31 of file validate_grapheme_test.cc.

                                              {
  std::string str = "\u0cb9"; // HA
  std::vector<std::string> glyphs;
  EXPECT_TRUE(NormalizeCleanAndSegmentUTF8(UnicodeNormMode::kNFC, OCRNorm::kNone,
                                           GraphemeNormMode::kCombined, true, str.c_str(), &glyphs))
      << PrintString32WithUnicodes(str);
  EXPECT_EQ(glyphs.size(), 1);
  EXPECT_EQ(glyphs[0], str);
}

◆ TEST() [69/88]

tesseract::TEST	(	ValidateGraphemeTest	,
		SubscriptConjunct
	)

Definition at line 51 of file validate_grapheme_test.cc.

                                              {
  std::string str = "\u0cb9\u0ccd\u0c95\u0cbf"; // HA Virama KA I
  std::vector<std::string> glyphs;
  EXPECT_TRUE(NormalizeCleanAndSegmentUTF8(UnicodeNormMode::kNFC, OCRNorm::kNone,
                                           GraphemeNormMode::kCombined, true, str.c_str(), &glyphs))
      << PrintString32WithUnicodes(str);
  EXPECT_EQ(glyphs.size(), 1);
  EXPECT_EQ(glyphs[0], str);
  EXPECT_TRUE(NormalizeCleanAndSegmentUTF8(UnicodeNormMode::kNFC, OCRNorm::kNone,
                                           GraphemeNormMode::kGlyphSplit, true, str.c_str(),
                                           &glyphs))
      << PrintString32WithUnicodes(str);
  EXPECT_EQ(glyphs.size(), 3);
  EXPECT_EQ(glyphs[1], std::string("\u0ccd\u0c95"));
}

◆ TEST() [70/88]

tesseract::TEST	(	ValidateGraphemeTest	,
		ThaiGraphemes
	)

Definition at line 137 of file validate_grapheme_test.cc.

                                          {
  // This is a single grapheme unless in glyph split mode
  std::string str = "\u0e14\u0e38\u0e4a";
  std::vector<std::string> glyphs;
  EXPECT_TRUE(NormalizeCleanAndSegmentUTF8(UnicodeNormMode::kNFC, OCRNorm::kNone,
                                           GraphemeNormMode::kCombined, true, str.c_str(), &glyphs))
      << PrintString32WithUnicodes(str);
  EXPECT_EQ(glyphs.size(), 1);
  EXPECT_EQ(glyphs[0], str);
  EXPECT_TRUE(NormalizeCleanAndSegmentUTF8(UnicodeNormMode::kNFC, OCRNorm::kNone,
                                           GraphemeNormMode::kGlyphSplit, true, str.c_str(),
                                           &glyphs))
      << PrintString32WithUnicodes(str);
  EXPECT_EQ(glyphs.size(), 3);
  EXPECT_EQ(glyphs[0], std::string("\u0e14"));
}

◆ TEST() [71/88]

tesseract::TEST	(	ValidateGraphemeTest	,
		TraditionalConjunctJoiner
	)

Definition at line 83 of file validate_grapheme_test.cc.

                                                      {
  std::string str = "\u0d15\u200d\u0d4d\u0d24"; // KA ZWI Virama Ta
  std::vector<std::string> glyphs;
  EXPECT_TRUE(NormalizeCleanAndSegmentUTF8(UnicodeNormMode::kNFC, OCRNorm::kNone,
                                           GraphemeNormMode::kCombined, true, str.c_str(), &glyphs))
      << PrintString32WithUnicodes(str);
  EXPECT_EQ(glyphs.size(), 1);
  EXPECT_EQ(glyphs[0], str);
  EXPECT_TRUE(NormalizeCleanAndSegmentUTF8(UnicodeNormMode::kNFC, OCRNorm::kNone,
                                           GraphemeNormMode::kGlyphSplit, true, str.c_str(),
                                           &glyphs))
      << PrintString32WithUnicodes(str);
  EXPECT_EQ(glyphs.size(), 3);
  EXPECT_EQ(glyphs[1], std::string("\u200d\u0d4d"));
}

◆ TEST() [72/88]

tesseract::TEST	(	ValidateIndicTest	,
		AddsJoinerToTerminalVirama
	)

Definition at line 28 of file validate_indic_test.cc.

                                                    {
  std::string str = "\u0c15\u0c4d";              // KA - virama
  std::string target_str = "\u0c15\u0c4d\u200c"; // KA - virama - ZWNJ
  ExpectGraphemeModeResults(str, UnicodeNormMode::kNFC, 3, 2, 1, target_str);
  // Same result if we started with the normalized string.
  ExpectGraphemeModeResults(target_str, UnicodeNormMode::kNFC, 3, 2, 1, target_str);
}

◆ TEST() [73/88]

tesseract::TEST	(	ValidateIndicTest	,
		MatrasFollowConsonantsNotVowels
	)

Definition at line 102 of file validate_indic_test.cc.

                                                         {
  std::string str = "\u0c05\u0c47"; // A EE
  std::string dest;
  EXPECT_FALSE(NormalizeUTF8String(UnicodeNormMode::kNFC, OCRNorm::kNone, GraphemeNorm::kNormalize,
                                   str.c_str(), &dest))
      << PrintString32WithUnicodes(str);
  str = "\u0c1e\u0c3e"; // NYA AA
  EXPECT_TRUE(NormalizeUTF8String(UnicodeNormMode::kNFC, OCRNorm::kNone, GraphemeNorm::kNormalize,
                                  str.c_str(), &dest))
      << PrintString32WithUnicodes(str);
  EXPECT_EQ(dest, str);
}

◆ TEST() [74/88]

tesseract::TEST	(	ValidateIndicTest	,
		Nukta
	)

Definition at line 128 of file validate_indic_test.cc.

                               {
  std::string str = "\u0c95\u0cbc\u0ccd\u0cb9"; // KA Nukta Virama HA
  std::vector<std::string> glyphs;
  EXPECT_TRUE(NormalizeCleanAndSegmentUTF8(UnicodeNormMode::kNFC, OCRNorm::kNone,
                                           GraphemeNormMode::kGlyphSplit, true, str.c_str(),
                                           &glyphs));
  EXPECT_EQ(glyphs.size(), 3);
  EXPECT_EQ(glyphs[2], std::string("\u0ccd\u0cb9"));
  // Swapped Nukta and Virama are not allowed, but NFC normalization fixes it.
  std::string str2 = "\u0c95\u0ccd\u0cbc\u0cb9"; // KA Virama Nukta HA
  ExpectGraphemeModeResults(str2, UnicodeNormMode::kNFC, 4, 3, 1, str);
}

◆ TEST() [75/88]

tesseract::TEST	(	ValidateIndicTest	,
		OnlyOneDependentVowel
	)

Definition at line 37 of file validate_indic_test.cc.

                                               {
  std::string str = "\u0d15\u0d3e\u0d42"; // KA AA UU
  std::string dest;
  EXPECT_FALSE(NormalizeUTF8String(UnicodeNormMode::kNFC, OCRNorm::kNone, GraphemeNorm::kNormalize,
                                   str.c_str(), &dest))
      << PrintString32WithUnicodes(str);
}

◆ TEST() [76/88]

tesseract::TEST	(	ValidateIndicTest	,
		OnlyOneVowelModifier
	)

Definition at line 53 of file validate_indic_test.cc.

                                              {
  std::string str = "\u0c26\u0c4d\u0c01"; // DA virama candrabindu
  std::string result;
  EXPECT_TRUE(NormalizeUTF8String(UnicodeNormMode::kNFC, OCRNorm::kNone, GraphemeNorm::kNormalize,
                                  str.c_str(), &result));
  // It made 1 grapheme of 4 chars, by terminating the explicit virama.
  EXPECT_EQ(std::string("\u0c26\u0c4d\u200c\u0c01"), result);
 
  str = "\u0995\u0983\u0981"; // KA visarga candrabindu
  EXPECT_FALSE(NormalizeUTF8String(UnicodeNormMode::kNFC, OCRNorm::kNone, GraphemeNorm::kNormalize,
                                   str.c_str(), &result));
 
  // Exception: Malayalam allows multiple anusvara.
  str = "\u0d15\u0d02\u0d02"; // KA Anusvara Anusvara
  EXPECT_TRUE(NormalizeUTF8String(UnicodeNormMode::kNFC, OCRNorm::kNone, GraphemeNorm::kNormalize,
                                  str.c_str(), &result));
  EXPECT_EQ(str, result);
}

◆ TEST() [77/88]

tesseract::TEST	(	ValidateIndicTest	,
		SinhalaRakaransaya
	)

Definition at line 142 of file validate_indic_test.cc.

                                            {
  std::string str = "\u0d9a\u0dca\u200d\u0dbb"; // KA Virama ZWJ Rayanna
  std::string dest;
  EXPECT_TRUE(NormalizeUTF8String(UnicodeNormMode::kNFC, OCRNorm::kNone, GraphemeNorm::kNormalize,
                                  str.c_str(), &dest))
      << PrintString32WithUnicodes(str);
  EXPECT_EQ(dest, str);
  std::vector<std::string> glyphs;
  EXPECT_TRUE(NormalizeCleanAndSegmentUTF8(UnicodeNormMode::kNFC, OCRNorm::kNone,
                                           GraphemeNormMode::kGlyphSplit, true, str.c_str(),
                                           &glyphs));
  EXPECT_EQ(glyphs.size(), 2);
  EXPECT_EQ(glyphs[1], std::string("\u0dca\u200d\u0dbb"));
  // Can be followed by a dependent vowel.
  str += "\u0dd9"; // E
  EXPECT_TRUE(NormalizeUTF8String(UnicodeNormMode::kNFC, OCRNorm::kNone, GraphemeNorm::kNormalize,
                                  str.c_str(), &dest))
      << PrintString32WithUnicodes(str);
  EXPECT_EQ(dest, str);
}

◆ TEST() [78/88]

tesseract::TEST	(	ValidateIndicTest	,
		SinhalaRepaya
	)

Definition at line 184 of file validate_indic_test.cc.

                                       {
  std::string str = "\u0d9a\u0dbb\u0dca\u200d\u0db8"; // KA Rayanna Virama ZWJ MA
  std::vector<std::string> glyphs;
  EXPECT_TRUE(NormalizeCleanAndSegmentUTF8(UnicodeNormMode::kNFC, OCRNorm::kNone,
                                           GraphemeNormMode::kCombined, true, str.c_str(),
                                           &glyphs));
  EXPECT_EQ(glyphs.size(), 2);
  EXPECT_EQ(glyphs[1], std::string("\u0dbb\u0dca\u200d\u0db8"));
  EXPECT_TRUE(NormalizeCleanAndSegmentUTF8(UnicodeNormMode::kNFC, OCRNorm::kNone,
                                           GraphemeNormMode::kGlyphSplit, true, str.c_str(),
                                           &glyphs));
  EXPECT_EQ(glyphs.size(), 3);
  EXPECT_EQ(glyphs[1], std::string("\u0dbb\u0dca\u200d"));
}

◆ TEST() [79/88]

tesseract::TEST	(	ValidateIndicTest	,
		SinhalaSpecials
	)

Definition at line 199 of file validate_indic_test.cc.

                                         {
  // Sinhala has some exceptions from the usual rules.
  std::string str = "\u0dc0\u0d9c\u0dca\u200d\u0dbb\u0dca\u200d\u0dbb\u0dca\u200d";
  std::vector<std::string> glyphs;
  EXPECT_TRUE(NormalizeCleanAndSegmentUTF8(UnicodeNormMode::kNFC, OCRNorm::kNone,
                                           GraphemeNormMode::kGlyphSplit, true, str.c_str(),
                                           &glyphs));
  EXPECT_EQ(glyphs.size(), 5) << PrintStringVectorWithUnicodes(glyphs);
  EXPECT_EQ(glyphs[0], std::string("\u0dc0"));
  EXPECT_EQ(glyphs[1], std::string("\u0d9c"));
  EXPECT_EQ(glyphs[2], std::string("\u0dca\u200d\u0dbb"));
  EXPECT_EQ(glyphs[3], std::string("\u0dca\u200d"));
  EXPECT_EQ(glyphs[4], std::string("\u0dbb\u0dca\u200d"));
  str = "\u0dc3\u0dbb\u0dca\u200d\u0dbb\u0dca\u200d\u0dcf";
  EXPECT_TRUE(NormalizeCleanAndSegmentUTF8(UnicodeNormMode::kNFC, OCRNorm::kNone,
                                           GraphemeNormMode::kGlyphSplit, true, str.c_str(),
                                           &glyphs));
  EXPECT_EQ(glyphs.size(), 4) << PrintStringVectorWithUnicodes(glyphs);
  EXPECT_EQ(glyphs[0], std::string("\u0dc3"));
  EXPECT_EQ(glyphs[1], std::string("\u0dbb\u0dca\u200d"));
  EXPECT_EQ(glyphs[2], std::string("\u0dbb\u0dca\u200d"));
  EXPECT_EQ(glyphs[3], std::string("\u0dcf"));
}

◆ TEST() [80/88]

tesseract::TEST	(	ValidateIndicTest	,
		SinhalaYansaya
	)

Definition at line 163 of file validate_indic_test.cc.

                                        {
  std::string str = "\u0d9a\u0dca\u200d\u0dba"; // KA Virama ZWJ Yayanna
  std::string dest;
  EXPECT_TRUE(NormalizeUTF8String(UnicodeNormMode::kNFC, OCRNorm::kNone, GraphemeNorm::kNormalize,
                                  str.c_str(), &dest))
      << PrintString32WithUnicodes(str);
  EXPECT_EQ(dest, str);
  // Can be followed by a dependent vowel.
  str += "\u0ddd"; // OO
  EXPECT_TRUE(NormalizeUTF8String(UnicodeNormMode::kNFC, OCRNorm::kNone, GraphemeNorm::kNormalize,
                                  str.c_str(), &dest))
      << PrintString32WithUnicodes(str);
  EXPECT_EQ(dest, str);
  std::vector<std::string> glyphs;
  EXPECT_TRUE(NormalizeCleanAndSegmentUTF8(UnicodeNormMode::kNFC, OCRNorm::kNone,
                                           GraphemeNormMode::kGlyphSplit, true, str.c_str(),
                                           &glyphs));
  EXPECT_EQ(glyphs.size(), 3);
  EXPECT_EQ(glyphs[1], std::string("\u0dca\u200d\u0dba"));
}

◆ TEST() [81/88]

tesseract::TEST	(	ValidateIndicTest	,
		SubGraphemes
	)

Definition at line 116 of file validate_indic_test.cc.

                                      {
  std::string str = "\u0d3e"; // AA
  std::string dest;
  EXPECT_FALSE(NormalizeUTF8String(UnicodeNormMode::kNFC, OCRNorm::kNone, GraphemeNorm::kNormalize,
                                   str.c_str(), &dest))
      << PrintString32WithUnicodes(str);
  EXPECT_TRUE(NormalizeUTF8String(UnicodeNormMode::kNFC, OCRNorm::kNone, GraphemeNorm::kNone,
                                  str.c_str(), &dest))
      << PrintString32WithUnicodes(str);
  EXPECT_EQ(dest, str);
}

◆ TEST() [82/88]

tesseract::TEST	(	ValidateIndicTest	,
		VowelModifierMustBeLast
	)

Definition at line 80 of file validate_indic_test.cc.

                                                 {
  std::string str = "\u0c28\u0c02\u0c3f"; // NA Sunna I
  std::string dest;
  EXPECT_FALSE(NormalizeUTF8String(UnicodeNormMode::kNFC, OCRNorm::kNone, GraphemeNorm::kNormalize,
                                   str.c_str(), &dest))
      << PrintString32WithUnicodes(str);
  // Swap c02/c3f and all is ok.
  str = "\u0c28\u0c3f\u0c02"; // NA I Sunna
  EXPECT_TRUE(NormalizeUTF8String(UnicodeNormMode::kNFC, OCRNorm::kNone, GraphemeNorm::kNormalize,
                                  str.c_str(), &dest))
      << PrintString32WithUnicodes(str);
  EXPECT_EQ(dest, str);
}

◆ TEST() [83/88]

tesseract::TEST	(	ValidateKhmerTest	,
		BadKhmerWords
	)

Definition at line 31 of file validate_khmer_test.cc.

                                       {
  std::string result;
  // Multiple dependent vowels not allowed
  std::string str = "\u1796\u17b6\u17b7";
  EXPECT_FALSE(NormalizeUTF8String(UnicodeNormMode::kNFC, OCRNorm::kNone, GraphemeNorm::kNormalize,
                                   str.c_str(), &result));
  // Multiple shifters not allowed
  str = "\u1798\u17c9\u17ca";
  EXPECT_FALSE(NormalizeUTF8String(UnicodeNormMode::kNFC, OCRNorm::kNone, GraphemeNorm::kNormalize,
                                   str.c_str(), &result));
  // Multiple signs not allowed
  str = "\u1780\u17b6\u17cb\u17cd";
  EXPECT_FALSE(NormalizeUTF8String(UnicodeNormMode::kNFC, OCRNorm::kNone, GraphemeNorm::kNormalize,
                                   str.c_str(), &result));
}

◆ TEST() [84/88]

tesseract::TEST	(	ValidateKhmerTest	,
		GoodKhmerWords
	)

Definition at line 19 of file validate_khmer_test.cc.

                                        {
  std::string str = "ព័ត៏មានប្លែកៗ";
  ExpectGraphemeModeResults(str, UnicodeNormMode::kNFC, 13, 12, 7, str);
  str = "ទំនុកច្រៀង";
  ExpectGraphemeModeResults(str, UnicodeNormMode::kNFC, 10, 9, 5, str);
  str = "កាលីហ្វូញ៉ា";
  ExpectGraphemeModeResults(str, UnicodeNormMode::kNFC, 11, 10, 4, str);
  str = "ចាប់ពីផ្លូវ";
  ExpectGraphemeModeResults(str, UnicodeNormMode::kNFC, 11, 10, 5, str);
}

◆ TEST() [85/88]

tesseract::TEST	(	ValidateMyanmarTest	,
		BadMyanmarWords
	)

Definition at line 27 of file validate_myanmar_test.cc.

                                           {
  std::string str = "က်န္းမာေရး";
  std::vector<std::string> glyphs;
  EXPECT_FALSE(NormalizeCleanAndSegmentUTF8(UnicodeNormMode::kNFC, OCRNorm::kNone,
                                            GraphemeNormMode::kCombined, true, str.c_str(),
                                            &glyphs));
  std::string result;
  EXPECT_FALSE(NormalizeUTF8String(UnicodeNormMode::kNFC, OCRNorm::kNone, GraphemeNorm::kNormalize,
                                   str.c_str(), &result));
  // It works if the grapheme normalization is turned off.
  EXPECT_TRUE(NormalizeUTF8String(UnicodeNormMode::kNFC, OCRNorm::kNone, GraphemeNorm::kNone,
                                  str.c_str(), &result));
  EXPECT_EQ(str, result);
  str = "ခုႏွစ္";
  EXPECT_FALSE(NormalizeCleanAndSegmentUTF8(UnicodeNormMode::kNFC, OCRNorm::kNone,
                                            GraphemeNormMode::kGlyphSplit, true, str.c_str(),
                                            &glyphs));
  EXPECT_FALSE(NormalizeUTF8String(UnicodeNormMode::kNFC, OCRNorm::kNone, GraphemeNorm::kNormalize,
                                   str.c_str(), &result));
  // It works if the grapheme normalization is turned off.
  EXPECT_TRUE(NormalizeUTF8String(UnicodeNormMode::kNFC, OCRNorm::kNone, GraphemeNorm::kNone,
                                  str.c_str(), &result));
  EXPECT_EQ(str, result);
}

◆ TEST() [86/88]

tesseract::TEST	(	ValidateMyanmarTest	,
		GoodMyanmarWords
	)

Definition at line 19 of file validate_myanmar_test.cc.

                                            {
  std::string str = "လျှာကသိသည် "; // No viramas in this one.
  ExpectGraphemeModeResults(str, UnicodeNormMode::kNFC, 11, 11, 5, str);
  str = "တုန္လႈပ္မႈ ";
  ExpectGraphemeModeResults(str, UnicodeNormMode::kNFC, 11, 9, 4, str);
}

◆ TEST() [87/88]

tesseract::TEST	(	ValidatorTest	,
		Idempotency
	)

Definition at line 49 of file validator_test.cc.

                                 {
  std::vector<char32> str1({0xd24, 0xd23, 0xd32, 0xd4d, '\'', 0x200d, 0x200c, 0x200d, 0x200c});
  std::vector<char32> str2({0xd24, 0xd23, 0xd32, 0xd4d, 0x200c, 0x200d, 0x200c, 0x200d, '\''});
  std::vector<std::vector<char32>> result1, result2, result3, result4;
  EXPECT_TRUE(
      Validator::ValidateCleanAndSegment(GraphemeNormMode::kSingleString, true, str1, &result1));
  EXPECT_TRUE(Validator::ValidateCleanAndSegment(GraphemeNormMode::kSingleString, true, result1[0],
                                                 &result2));
  EXPECT_EQ(result1.size(), result2.size());
  EXPECT_THAT(result2[0], testing::ElementsAreArray(result1[0]));
  EXPECT_TRUE(
      Validator::ValidateCleanAndSegment(GraphemeNormMode::kSingleString, true, str2, &result3));
  EXPECT_TRUE(Validator::ValidateCleanAndSegment(GraphemeNormMode::kSingleString, true, result3[0],
                                                 &result4));
  EXPECT_EQ(result3.size(), result4.size());
  EXPECT_THAT(result4[0], testing::ElementsAreArray(result3[0]));
}

◆ TEST() [88/88]

tesseract::TEST	(	ValidatorTest	,
		MostFrequentViramaScript
	)

Definition at line 28 of file validator_test.cc.

                                              {
  // The most frequent virama script should come out correct, despite
  // distractions from other scripts.
  EXPECT_EQ(ViramaScript::kTelugu, TestableValidator::TestableMostFrequentViramaScript({0xc05}));
  // It is still Telugu surrounded by Latin.
  EXPECT_EQ(ViramaScript::kTelugu,
            TestableValidator::TestableMostFrequentViramaScript({'a', 0xc05, 'b', 'c'}));
  // But not still Telugu surrounded by Devanagari.
  EXPECT_EQ(ViramaScript::kDevanagari,
            TestableValidator::TestableMostFrequentViramaScript({0x905, 0xc05, 0x906, 0x907}));
  EXPECT_EQ(ViramaScript::kKannada,
            TestableValidator::TestableMostFrequentViramaScript({0xc85, 0xc05, 0xc86, 0xc87}));
  EXPECT_EQ(ViramaScript::kBengali,
            TestableValidator::TestableMostFrequentViramaScript({0x985, 0xc05, 0x986, 0x987}));
  // Danda and double Danda don't count as Devanagari, as they are common.
  EXPECT_EQ(ViramaScript::kTelugu,
            TestableValidator::TestableMostFrequentViramaScript({0x964, 0xc05, 0x965, 0x965}));
}

◆ TEST_F() [1/229]

tesseract::TEST_F	(	ApplyBoxTest	,
		ItalicCharLevel
	)

Definition at line 115 of file applybox_test.cc.

                                      {
  VerifyBoxesAndText("trainingital.tif", kTruthTextWords, "trainingital.box", false);
}

◆ TEST_F() [2/229]

tesseract::TEST_F	(	ApplyBoxTest	,
		ItalLineLevel
	)

Definition at line 125 of file applybox_test.cc.

                                    {
  VerifyBoxesAndText("trainingitalline.tif", kTruthTextLine, "trainingital.box", true);
}

◆ TEST_F() [3/229]

tesseract::TEST_F	(	ApplyBoxTest	,
		TimesCharLevel
	)

Definition at line 110 of file applybox_test.cc.

                                     {
  VerifyBoxesAndText("trainingtimes.tif", kTruthTextWords, "trainingtimes.box", false);
}

◆ TEST_F() [4/229]

tesseract::TEST_F	(	ApplyBoxTest	,
		TimesLineLevel
	)

Definition at line 120 of file applybox_test.cc.

                                     {
  VerifyBoxesAndText("trainingtimesline.tif", kTruthTextLine, "trainingtimes.box", true);
}

◆ TEST_F() [5/229]

tesseract::TEST_F	(	BaseapiThreadTest	,
		TestAll
	)

Definition at line 221 of file baseapi_thread_test.cc.

                                   {
#ifdef INCLUDE_TENSORFLOW
  const int n = num_langs_ * FLAGS_reps;
  ResetPool();
  for (int i = 0; i < n; ++i) {
    pool_->Schedule(std::bind(VerifyTextResult, nullptr, pix_[i], langs_[i % num_langs_],
                              gt_text_[i % num_langs_]));
  }
  WaitForPoolWorkers();
#endif
}

◆ TEST_F() [6/229]

tesseract::TEST_F	(	BaseapiThreadTest	,
		TestBasicSanity
	)

Definition at line 179 of file baseapi_thread_test.cc.

                                           {
  for (int i = 0; i < num_langs_; ++i) {
    TessBaseAPI tess;
    InitTessInstance(&tess, langs_[i]);
    std::string ocr_text;
    GetCleanedText(&tess, pix_[i], ocr_text);
    CHECK(strcmp(gt_text_[i].c_str(), ocr_text.c_str()) == 0) << "Failed with lang = " << langs_[i];
  }
}

◆ TEST_F() [7/229]

tesseract::TEST_F	(	BaseapiThreadTest	,
		TestInit
	)

Definition at line 190 of file baseapi_thread_test.cc.

                                    {
#ifdef INCLUDE_TENSORFLOW
  const int n = num_langs_ * FLAGS_reps;
  ResetPool();
  std::vector<TessBaseAPI> tess(n);
  for (int i = 0; i < n; ++i) {
    pool_->Schedule(std::bind(InitTessInstance, &tess[i], langs_[i % num_langs_]));
  }
  WaitForPoolWorkers();
#endif
}

◆ TEST_F() [8/229]

tesseract::TEST_F	(	BaseapiThreadTest	,
		TestRecognition
	)

Definition at line 203 of file baseapi_thread_test.cc.

                                           {
#ifdef INCLUDE_TENSORFLOW
  const int n = num_langs_ * FLAGS_reps;
  std::vector<TessBaseAPI> tess(n);
  // Initialize api instances in a single thread.
  for (int i = 0; i < n; ++i) {
    InitTessInstance(&tess[i], langs_[i % num_langs_]);
  }
 
  ResetPool();
  for (int i = 0; i < n; ++i) {
    pool_->Schedule(std::bind(VerifyTextResult, &tess[i], pix_[i], langs_[i % num_langs_],
                              gt_text_[i % num_langs_]));
  }
  WaitForPoolWorkers();
#endif
}

◆ TEST_F() [9/229]

tesseract::TEST_F	(	BitVectorTest	,
		Primes
	)

Definition at line 116 of file bitvector_test.cc.

                              {
  BitVector map;
  ComputePrimes(&map);
  TestPrimes(map);
  // It still works if we use the copy constructor.
  BitVector map2(map);
  TestPrimes(map2);
  // Or if we assign it.
  BitVector map3;
  map3 = map;
  TestPrimes(map3);
  // Test file i/o too.
  std::string filename = OutputNameToPath("primesbitvector");
  FILE *fp = fopen(filename.c_str(), "wb");
  ASSERT_TRUE(fp != nullptr);
  EXPECT_TRUE(map.Serialize(fp));
  fclose(fp);
  fp = fopen(filename.c_str(), "rb");
  ASSERT_TRUE(fp != nullptr);
  BitVector read_map;
  EXPECT_TRUE(read_map.DeSerialize(false, fp));
  fclose(fp);
  TestPrimes(read_map);
}

◆ TEST_F() [10/229]

tesseract::TEST_F	(	BitVectorTest	,
		SetAll
	)

Definition at line 142 of file bitvector_test.cc.

                              {
  // Test the default constructor and set/resetall.
  BitVector map(42);
  TestAll(map, false);
  map.SetAllTrue();
  TestAll(map, true);
  map.SetAllFalse();
  TestAll(map, false);
}

◆ TEST_F() [11/229]

tesseract::TEST_F	(	BitVectorTest	,
		TestNextSetBit
	)

Definition at line 155 of file bitvector_test.cc.

                                      {
  BitVector bv;
  for (int spacing = 1; spacing <= 5; ++spacing) {
    SetBitPattern(0, 256, spacing, &bv);
    ExpectCorrectBits(bv);
  }
}

◆ TEST_F() [12/229]

tesseract::TEST_F	(	BitVectorTest	,
		TestNumSetBits
	)

Definition at line 165 of file bitvector_test.cc.

                                      {
  BitVector bv;
  for (int byte = 0; byte < 256; ++byte) {
    SetBitPattern(byte, byte + 1, 1, &bv);
    ExpectCorrectBits(bv);
  }
}

◆ TEST_F() [13/229]

tesseract::TEST_F	(	ColPartitionTest	,
		IsInSameColumnAsBorders
	)

Definition at line 44 of file colpartition_test.cc.

                                                  {
  TestableColPartition a, b, c, d;
  a.SetColumnRange(0, 1);
  b.SetColumnRange(1, 2);
  c.SetColumnRange(2, 3);
  d.SetColumnRange(4, 5);
 
  EXPECT_TRUE(a.IsInSameColumnAs(b));
  EXPECT_TRUE(b.IsInSameColumnAs(a));
  EXPECT_FALSE(c.IsInSameColumnAs(d));
  EXPECT_FALSE(d.IsInSameColumnAs(c));
  EXPECT_FALSE(a.IsInSameColumnAs(d));
}

◆ TEST_F() [14/229]

tesseract::TEST_F	(	ColPartitionTest	,
		IsInSameColumnAsPartialOverlap
	)

Definition at line 67 of file colpartition_test.cc.

                                                         {
  TestableColPartition a, b;
  a.SetColumnRange(3, 8);
  b.SetColumnRange(6, 10);
 
  EXPECT_TRUE(a.IsInSameColumnAs(b));
  EXPECT_TRUE(b.IsInSameColumnAs(a));
}

◆ TEST_F() [15/229]

tesseract::TEST_F	(	ColPartitionTest	,
		IsInSameColumnAsReflexive
	)

Definition at line 35 of file colpartition_test.cc.

                                                    {
  TestableColPartition a, b;
  a.SetColumnRange(1, 2);
  b.SetColumnRange(3, 3);
 
  EXPECT_TRUE(a.IsInSameColumnAs(a));
  EXPECT_TRUE(b.IsInSameColumnAs(b));
}

◆ TEST_F() [16/229]

tesseract::TEST_F	(	ColPartitionTest	,
		IsInSameColumnAsSuperset
	)

Definition at line 58 of file colpartition_test.cc.

                                                   {
  TestableColPartition a, b;
  a.SetColumnRange(4, 7);
  b.SetColumnRange(2, 8);
 
  EXPECT_TRUE(a.IsInSameColumnAs(b));
  EXPECT_TRUE(b.IsInSameColumnAs(a));
}

◆ TEST_F() [17/229]

tesseract::TEST_F	(	CommandlineflagsTest	,
		ExitsWithErrorOnInvalidFlag
	)

Definition at line 66 of file commandlineflags_test.cc.

                                                          {
  const char *argv[] = {"", "--test_nonexistent_flag"};
  EXPECT_EXIT(TestParser(countof(argv), argv), ::testing::ExitedWithCode(1),
              "ERROR: Non-existent flag");
}

◆ TEST_F() [18/229]

tesseract::TEST_F	(	CommandlineflagsTest	,
		ParseBoolFlags
	)

Definition at line 121 of file commandlineflags_test.cc.

                                             {
  const char *argv[] = {"", "--foo_bool=true", "--bar_bool=1"};
  FLAGS_foo_bool.set_value(false);
  FLAGS_bar_bool.set_value(false);
  TestParser(countof(argv), argv);
  // Verify changed value
  EXPECT_TRUE(FLAGS_foo_bool);
  EXPECT_TRUE(FLAGS_bar_bool);
 
  const char *inv_argv[] = {"", "--foo_bool=false", "--bar_bool=0"};
  FLAGS_foo_bool.set_value(true);
  FLAGS_bar_bool.set_value(true);
  TestParser(3, inv_argv);
  // Verify changed value
  EXPECT_FALSE(FLAGS_foo_bool);
  EXPECT_FALSE(FLAGS_bar_bool);
 
  const char *arg_implied_true[] = {"", "--bar_bool"};
  FLAGS_bar_bool.set_value(false);
  TestParser(2, arg_implied_true);
  EXPECT_TRUE(FLAGS_bar_bool);
 
  const char *arg_missing_val[] = {"", "--bar_bool="};
  EXPECT_EXIT(TestParser(2, arg_missing_val), ::testing::ExitedWithCode(1), "ERROR");
}

◆ TEST_F() [19/229]

tesseract::TEST_F	(	CommandlineflagsTest	,
		ParseDoubleFlags
	)

Definition at line 91 of file commandlineflags_test.cc.

                                               {
  const char *argv[] = {"", "--foo_double=3.14", "--bar_double", "1.2"};
  TestParser(countof(argv), argv);
 
  EXPECT_EQ(3.14, FLAGS_foo_double);
  EXPECT_EQ(1.2, FLAGS_bar_double);
 
  const char *arg_no_value[] = {"", "--bar_double"};
  EXPECT_EXIT(TestParser(2, arg_no_value), ::testing::ExitedWithCode(1), "ERROR");
 
  const char *arg_bad_format[] = {"", "--bar_double="};
  EXPECT_EXIT(TestParser(2, arg_bad_format), ::testing::ExitedWithCode(1), "ERROR");
}

◆ TEST_F() [20/229]

tesseract::TEST_F	(	CommandlineflagsTest	,
		ParseIntegerFlags
	)

Definition at line 72 of file commandlineflags_test.cc.

                                                {
  const char *argv[] = {"", "--foo_int=3", "--bar_int", "-4"};
  TestParser(countof(argv), argv);
  EXPECT_EQ(3, FLAGS_foo_int);
  EXPECT_EQ(-4, FLAGS_bar_int);
 
  const char *arg_no_value[] = {"", "--bar_int"};
  EXPECT_EXIT(TestParser(countof(arg_no_value), arg_no_value), ::testing::ExitedWithCode(1),
              "ERROR");
 
  const char *arg_invalid_value[] = {"", "--bar_int", "--foo_int=3"};
  EXPECT_EXIT(TestParser(countof(arg_invalid_value), arg_invalid_value),
              ::testing::ExitedWithCode(1), "ERROR");
 
  const char *arg_bad_format[] = {"", "--bar_int="};
  EXPECT_EXIT(TestParser(countof(arg_bad_format), arg_bad_format), ::testing::ExitedWithCode(1),
              "ERROR");
}

◆ TEST_F() [21/229]

tesseract::TEST_F	(	CommandlineflagsTest	,
		ParseOldFlags
	)

Definition at line 147 of file commandlineflags_test.cc.

                                            {
  EXPECT_STREQ("", FLAGS_q.c_str());
  const char *argv[] = {"", "-q", "text"};
  TestParser(countof(argv), argv);
  EXPECT_STREQ("text", FLAGS_q.c_str());
}

◆ TEST_F() [22/229]

tesseract::TEST_F	(	CommandlineflagsTest	,
		ParseStringFlags
	)

Definition at line 105 of file commandlineflags_test.cc.

                                               {
  const char *argv[] = {"", "--foo_string=abc", "--bar_string", "def"};
  TestParser(countof(argv), argv);
 
  EXPECT_STREQ("abc", FLAGS_foo_string.c_str());
  EXPECT_STREQ("def", FLAGS_bar_string.c_str());
 
  const char *arg_no_value[] = {"", "--bar_string"};
  EXPECT_EXIT(TestParser(2, arg_no_value), ::testing::ExitedWithCode(1), "ERROR");
 
  FLAGS_bar_string.set_value("bar");
  const char *arg_empty_string[] = {"", "--bar_string="};
  TestParser(2, arg_empty_string);
  EXPECT_STREQ("", FLAGS_bar_string.c_str());
}

◆ TEST_F() [23/229]

tesseract::TEST_F	(	CommandlineflagsTest	,
		RemoveFlags
	)

Definition at line 44 of file commandlineflags_test.cc.

                                          {
  const char *const_argv[] = {"Progname", "--foo_int", "3", "file1.h", "file2.h"};
  int argc = countof(const_argv);
  char **argv = const_cast<char **>(const_argv);
  tesseract::ParseCommandLineFlags(argv[0], &argc, &argv, true);
 
  // argv should be rearranged to look like { "Progname", "file1.h", "file2.h" }
  EXPECT_EQ(3, argc);
  EXPECT_STREQ("Progname", argv[0]);
  EXPECT_STREQ("file1.h", argv[1]);
  EXPECT_STREQ("file2.h", argv[2]);
}

◆ TEST_F() [24/229]

tesseract::TEST_F	(	DawgTest	,
		TestDawgConversion
	)

Definition at line 85 of file dawg_test.cc.

                                     {
  TestDawgRoundTrip("eng.unicharset", "eng.wordlist.clean.freq");
}

◆ TEST_F() [25/229]

tesseract::TEST_F	(	DawgTest	,
		TestMatching
	)

Definition at line 89 of file dawg_test.cc.

                               {
  UNICHARSET unicharset;
  unicharset.load_from_file(file::JoinPath(TESTING_DIR, "eng.unicharset").c_str());
  tesseract::Trie trie(tesseract::DAWG_TYPE_WORD, "basic_dawg", NGRAM_PERM, unicharset.size(), 0);
  WERD_CHOICE space_apos(" '", unicharset);
  trie.add_word_to_dawg(space_apos);
 
  WERD_CHOICE space(" ", unicharset);
 
  // partial match ok - then good!
  EXPECT_TRUE(trie.prefix_in_dawg(space, false));
  // require complete match - not present.
  EXPECT_FALSE(trie.word_in_dawg(space));
  EXPECT_FALSE(trie.prefix_in_dawg(space, true));
 
  // partial or complete match ok for full word:
  EXPECT_TRUE(trie.prefix_in_dawg(space_apos, false));
  EXPECT_TRUE(trie.word_in_dawg(space_apos));
  EXPECT_TRUE(trie.prefix_in_dawg(space_apos, true));
}

◆ TEST_F() [26/229]

tesseract::TEST_F	(	DENORMTest	,
		Multiple
	)

Definition at line 83 of file denorm_test.cc.

                             {
  DENORM denorm;
  denorm.SetupNormalization(nullptr, nullptr, nullptr, 1000.0f, 2000.0f, 2.0f, 3.0f, 0.0f,
                            static_cast<float>(kBlnBaselineOffset));
 
  DENORM denorm2;
  FCOORD rotation90(0.0f, 1.0f);
  denorm2.SetupNormalization(nullptr, &rotation90, &denorm, 128.0f, 128.0f, 0.5f, 0.25f, 0.0f,
                             0.0f);
  TPOINT pt1(1050, 2000);
  TPOINT result1(100, kBlnBaselineOffset);
  ExpectCorrectTransform(denorm, pt1, result1, true);
  ExpectCorrectTransform(denorm, pt1, result1, false);
  TPOINT result2(kBlnBaselineOffset / 4, -14);
  ExpectCorrectTransform(denorm2, result1, result2, true);
  ExpectCorrectTransform(denorm2, pt1, result2, false);
}

◆ TEST_F() [27/229]

tesseract::TEST_F	(	DENORMTest	,
		NoRotations
	)

Definition at line 51 of file denorm_test.cc.

                                {
  DENORM denorm;
  denorm.SetupNormalization(nullptr, nullptr, nullptr, 1000.0f, 2000.0f, 2.0f, 3.0f, 0.0f,
                            static_cast<float>(kBlnBaselineOffset));
  TPOINT pt1(1100, 2000);
  TPOINT result1(200, kBlnBaselineOffset);
  ExpectCorrectTransform(denorm, pt1, result1, true);
  ExpectCorrectTransform(denorm, pt1, result1, false);
  TPOINT pt2(900, 2100);
  TPOINT result2(-200, 300 + kBlnBaselineOffset);
  ExpectCorrectTransform(denorm, pt2, result2, true);
  ExpectCorrectTransform(denorm, pt2, result2, false);
}

◆ TEST_F() [28/229]

tesseract::TEST_F	(	DENORMTest	,
		WithRotations
	)

Definition at line 66 of file denorm_test.cc.

                                  {
  DENORM denorm;
  FCOORD rotation90(0.0f, 1.0f);
  denorm.SetupNormalization(nullptr, &rotation90, nullptr, 1000.0f, 2000.0f, 2.0f, 3.0f, 0.0f,
                            static_cast<float>(kBlnBaselineOffset));
 
  TPOINT pt1(1100, 2000);
  TPOINT result1(0, 200 + kBlnBaselineOffset);
  ExpectCorrectTransform(denorm, pt1, result1, true);
  ExpectCorrectTransform(denorm, pt1, result1, false);
  TPOINT pt2(900, 2100);
  TPOINT result2(-300, kBlnBaselineOffset - 200);
  ExpectCorrectTransform(denorm, pt2, result2, true);
  ExpectCorrectTransform(denorm, pt2, result2, false);
}

◆ TEST_F() [29/229]

tesseract::TEST_F	(	EquationFinderTest	,
		CheckSeedBlobsCount
	)

Definition at line 330 of file equationdetect_test.cc.

                                                {
  TBOX box(0, 950, 999, 999);
  ColPartition *part1 = ColPartition::FakePartition(box, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
  ColPartition *part2 = ColPartition::FakePartition(box, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
  ColPartition *part3 = ColPartition::FakePartition(box, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
  ColPartition *part4 = ColPartition::FakePartition(box, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
 
  // Part 1: 8 math, 0 digit, 20 total.
  equation_det_->AddMathDigitBlobs(8, 0, 20, part1);
  EXPECT_TRUE(equation_det_->RunCheckSeedBlobsCount(part1));
 
  // Part 2: 1 math, 8 digit, 20 total.
  equation_det_->AddMathDigitBlobs(1, 8, 20, part2);
  EXPECT_FALSE(equation_det_->RunCheckSeedBlobsCount(part2));
 
  // Part 3: 3 math, 8 digit, 8 total.
  equation_det_->AddMathDigitBlobs(3, 8, 20, part3);
  EXPECT_TRUE(equation_det_->RunCheckSeedBlobsCount(part3));
 
  // Part 4: 8 math, 0 digit, 8 total.
  equation_det_->AddMathDigitBlobs(0, 0, 8, part4);
  EXPECT_FALSE(equation_det_->RunCheckSeedBlobsCount(part4));
 
  // Release memory.
  part1->DeleteBoxes();
  delete (part1);
  part2->DeleteBoxes();
  delete (part2);
  part3->DeleteBoxes();
  delete (part3);
  part4->DeleteBoxes();
  delete (part4);
}

◆ TEST_F() [30/229]

tesseract::TEST_F	(	EquationFinderTest	,
		ComputeCPsSuperBBox
	)

Definition at line 404 of file equationdetect_test.cc.

                                                {
  Image pix = pixCreate(1001, 1001, 1);
  equation_det_->SetPixBinary(pix);
  ColPartitionGrid part_grid(10, ICOORD(0, 0), ICOORD(1000, 1000));
 
  TBOX box1(0, 0, 999, 99);
  ColPartition *part1 = ColPartition::FakePartition(box1, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
  TBOX box2(0, 100, 499, 199);
  ColPartition *part2 = ColPartition::FakePartition(box2, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
  TBOX box3(500, 100, 999, 199);
  ColPartition *part3 = ColPartition::FakePartition(box3, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
  TBOX box4(0, 200, 999, 299);
  ColPartition *part4 = ColPartition::FakePartition(box4, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
  TBOX box5(0, 900, 999, 999);
  ColPartition *part5 = ColPartition::FakePartition(box5, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
 
  // Add part1->part3 into part_grid and test.
  part_grid.InsertBBox(true, true, part1);
  part_grid.InsertBBox(true, true, part2);
  part_grid.InsertBBox(true, true, part3);
  TBOX super_box(0, 0, 999, 199);
  equation_det_->TestComputeCPsSuperBBox(super_box, &part_grid);
 
  // Add part4 and test.
  part_grid.InsertBBox(true, true, part4);
  TBOX super_box2(0, 0, 999, 299);
  equation_det_->TestComputeCPsSuperBBox(super_box2, &part_grid);
 
  // Add part5 and test.
  part_grid.InsertBBox(true, true, part5);
  TBOX super_box3(0, 0, 999, 999);
  equation_det_->TestComputeCPsSuperBBox(super_box3, &part_grid);
 
  // Release memory.
  part1->DeleteBoxes();
  delete (part1);
  part2->DeleteBoxes();
  delete (part2);
  part3->DeleteBoxes();
  delete (part3);
  part4->DeleteBoxes();
  delete (part4);
  part5->DeleteBoxes();
  delete (part5);
}

◆ TEST_F() [31/229]

tesseract::TEST_F	(	EquationFinderTest	,
		ComputeForegroundDensity
	)

Definition at line 364 of file equationdetect_test.cc.

                                                     {
  // Create the pix with top half foreground, bottom half background.
  int width = 1024, height = 768;
  Image pix = pixCreate(width, height, 1);
  pixRasterop(pix, 0, 0, width, height / 2, PIX_SET, nullptr, 0, 0);
  TBOX box1(100, 0, 140, 140), box2(100, height / 2 - 20, 140, height / 2 + 20),
      box3(100, height - 40, 140, height);
  equation_det_->SetPixBinary(pix);
 
  // Verify
  EXPECT_NEAR(0.0, equation_det_->RunComputeForegroundDensity(box1), 0.0001f);
  EXPECT_NEAR(0.5, equation_det_->RunComputeForegroundDensity(box2), 0.0001f);
  EXPECT_NEAR(1.0, equation_det_->RunComputeForegroundDensity(box3), 0.0001f);
}

◆ TEST_F() [32/229]

tesseract::TEST_F	(	EquationFinderTest	,
		CountAlignment
	)

Definition at line 379 of file equationdetect_test.cc.

                                           {
  std::vector<int> vec;
  vec.push_back(1);
  vec.push_back(1);
  vec.push_back(1);
  vec.push_back(100);
  vec.push_back(200);
  vec.push_back(200);
 
  // Test the right point.
  EXPECT_EQ(3, equation_det_->RunCountAlignment(vec, 1));
  EXPECT_EQ(1, equation_det_->RunCountAlignment(vec, 100));
  EXPECT_EQ(2, equation_det_->RunCountAlignment(vec, 200));
 
  // Test the near neighbors.
  EXPECT_EQ(3, equation_det_->RunCountAlignment(vec, 3));
  EXPECT_EQ(1, equation_det_->RunCountAlignment(vec, 99));
  EXPECT_EQ(2, equation_det_->RunCountAlignment(vec, 202));
 
  // Test the far neighbors.
  EXPECT_EQ(0, equation_det_->RunCountAlignment(vec, 150));
  EXPECT_EQ(0, equation_det_->RunCountAlignment(vec, 50));
  EXPECT_EQ(0, equation_det_->RunCountAlignment(vec, 250));
}

◆ TEST_F() [33/229]

tesseract::TEST_F	(	EquationFinderTest	,
		EstimateTypeForUnichar
	)

Definition at line 231 of file equationdetect_test.cc.

                                                   {
  // Test abc characters.
  EXPECT_EQ(BSTT_NONE, equation_det_->RunEstimateTypeForUnichar("a"));
  EXPECT_EQ(BSTT_NONE, equation_det_->RunEstimateTypeForUnichar("c"));
 
  // Test punctuation characters.
  EXPECT_EQ(BSTT_NONE, equation_det_->RunEstimateTypeForUnichar("'"));
  EXPECT_EQ(BSTT_NONE, equation_det_->RunEstimateTypeForUnichar(","));
 
  // Test digits.
  EXPECT_EQ(BSTT_DIGIT, equation_det_->RunEstimateTypeForUnichar("1"));
  EXPECT_EQ(BSTT_DIGIT, equation_det_->RunEstimateTypeForUnichar("4"));
  EXPECT_EQ(BSTT_DIGIT, equation_det_->RunEstimateTypeForUnichar("|"));
 
  // Test math symbols.
  EXPECT_EQ(BSTT_MATH, equation_det_->RunEstimateTypeForUnichar("("));
  EXPECT_EQ(BSTT_MATH, equation_det_->RunEstimateTypeForUnichar("+"));
}

◆ TEST_F() [34/229]

tesseract::TEST_F	(	EquationFinderTest	,
		IdentifySpecialText
	)

Definition at line 180 of file equationdetect_test.cc.

                                                {
#if !ENABLE_IdentifySpecialText_TEST
  GTEST_SKIP();
#else // TODO: missing equ_gt1.tif
  // Load Image.
  std::string imagefile = file::JoinPath(testdata_dir_, "equ_gt1.tif");
  Image pix_binary = pixRead(imagefile.c_str());
  CHECK(pix_binary != nullptr && pixGetDepth(pix_binary) == 1);
 
  // Get components.
  BLOCK_LIST blocks;
  TO_BLOCK_LIST to_blocks;
  AddPageBlock(pix_binary, &blocks);
  Textord *textord = tesseract_->mutable_textord();
  textord->find_components(pix_binary, &blocks, &to_blocks);
 
  // Identify special texts from to_blocks.
  TO_BLOCK_IT to_block_it(&to_blocks);
  std::map<int, int> stt_count;
  for (to_block_it.mark_cycle_pt(); !to_block_it.cycled_list(); to_block_it.forward()) {
    TO_BLOCK *to_block = to_block_it.data();
    BLOBNBOX_IT blob_it(&(to_block->blobs));
    for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
      BLOBNBOX *blob = blob_it.data();
      // blob->set_special_text_type(BSTT_NONE);
      equation_det_->RunIdentifySpecialText(blob, 0);
      tensorflow::gtl::InsertIfNotPresent(&stt_count, blob->special_text_type(), 0);
      stt_count[blob->special_text_type()]++;
    }
  }
 
  // Verify the number, but allow a range of +/- kCountRange before squealing.
  const int kCountRange = 3;
  EXPECT_GE(39 + kCountRange, stt_count[BSTT_NONE]);
  EXPECT_LE(39 - kCountRange, stt_count[BSTT_NONE]);
 
  // if you count all the subscripts etc, there are ~45 italic chars.
  EXPECT_GE(45 + kCountRange, stt_count[BSTT_ITALIC]);
  EXPECT_LE(45 - kCountRange, stt_count[BSTT_ITALIC]);
  EXPECT_GE(41 + kCountRange, stt_count[BSTT_DIGIT]);
  EXPECT_LE(41 - kCountRange, stt_count[BSTT_DIGIT]);
  EXPECT_GE(50 + kCountRange, stt_count[BSTT_MATH]);
  EXPECT_LE(50 - kCountRange, stt_count[BSTT_MATH]);
  EXPECT_GE(10 + kCountRange, stt_count[BSTT_UNCLEAR]);
  EXPECT_LE(10 - kCountRange, stt_count[BSTT_UNCLEAR]);
 
  // Release memory.
  pix_binary.destroy();
#endif
}

◆ TEST_F() [35/229]

tesseract::TEST_F	(	EquationFinderTest	,
		IsIndented
	)

Definition at line 250 of file equationdetect_test.cc.

                                       {
  ColPartitionGrid part_grid(10, ICOORD(0, 0), ICOORD(1000, 1000));
 
  // Create five ColPartitions:
  // part 1: ************
  // part 2:   *********
  // part 3: *******
  // part 4:   *****
  //
  // part 5:   ********
  TBOX box1(0, 950, 999, 999);
  ColPartition *part1 = ColPartition::FakePartition(box1, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
  part_grid.InsertBBox(true, true, part1);
  TBOX box2(300, 920, 900, 940);
  ColPartition *part2 = ColPartition::FakePartition(box2, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
  part_grid.InsertBBox(true, true, part2);
  TBOX box3(0, 900, 600, 910);
  ColPartition *part3 = ColPartition::FakePartition(box3, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
  part_grid.InsertBBox(true, true, part3);
  TBOX box4(300, 890, 600, 899);
  ColPartition *part4 = ColPartition::FakePartition(box4, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
  part_grid.InsertBBox(true, true, part4);
  TBOX box5(300, 500, 900, 510);
  ColPartition *part5 = ColPartition::FakePartition(box5, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
  part_grid.InsertBBox(true, true, part5);
 
  // Test
  // part1 should be no indent.
  EXPECT_EQ(EquationDetect::NO_INDENT, equation_det_->RunIsIndented(&part_grid, part1));
  // part2 should be left indent in terms of part1.
  EXPECT_EQ(EquationDetect::LEFT_INDENT, equation_det_->RunIsIndented(&part_grid, part2));
  // part3 should be right indent.
  EXPECT_EQ(EquationDetect::RIGHT_INDENT, equation_det_->RunIsIndented(&part_grid, part3));
  // part4 should be both indented.
  EXPECT_EQ(EquationDetect::BOTH_INDENT, equation_det_->RunIsIndented(&part_grid, part4));
  // part5 should be no indent because it is too far from part1.
  EXPECT_EQ(EquationDetect::NO_INDENT, equation_det_->RunIsIndented(&part_grid, part5));
 
  // Release memory.
  part1->DeleteBoxes();
  delete (part1);
  part2->DeleteBoxes();
  delete (part2);
  part3->DeleteBoxes();
  delete (part3);
  part4->DeleteBoxes();
  delete (part4);
  part5->DeleteBoxes();
  delete (part5);
}

◆ TEST_F() [36/229]

tesseract::TEST_F	(	EquationFinderTest	,
		IsNearSmallNeighbor
	)

Definition at line 301 of file equationdetect_test.cc.

                                                {
  // Create four tboxes:
  //          part 1, part 2
  //           *****   *****
  // part 3:   *****
  //
  // part 4: *****************
  TBOX box1(0, 950, 499, 999);
  TBOX box2(500, 950, 999, 998);
  TBOX box3(0, 900, 499, 949);
  TBOX box4(0, 550, 499, 590);
 
  // Test
  // box2 should be box1's near neighbor but not vice versa.
  EXPECT_TRUE(equation_det_->RunIsNearSmallNeighbor(box1, box2));
  EXPECT_FALSE(equation_det_->RunIsNearSmallNeighbor(box2, box1));
  // box1 and box3 should be near neighbors of each other.
  EXPECT_TRUE(equation_det_->RunIsNearSmallNeighbor(box1, box3));
  EXPECT_FALSE(equation_det_->RunIsNearSmallNeighbor(box2, box3));
  // box2 and box3 should not be near neighbors of each other.
  EXPECT_FALSE(equation_det_->RunIsNearSmallNeighbor(box2, box3));
  EXPECT_FALSE(equation_det_->RunIsNearSmallNeighbor(box3, box2));
 
  // box4 should not be the near neighbor of any one.
  EXPECT_FALSE(equation_det_->RunIsNearSmallNeighbor(box1, box4));
  EXPECT_FALSE(equation_det_->RunIsNearSmallNeighbor(box2, box4));
  EXPECT_FALSE(equation_det_->RunIsNearSmallNeighbor(box3, box4));
}

◆ TEST_F() [37/229]

tesseract::TEST_F	(	EquationFinderTest	,
		SplitCPHor
	)

Definition at line 484 of file equationdetect_test.cc.

                                       {
  TBOX box(0, 0, 999, 99);
  ColPartition *part = ColPartition::FakePartition(box, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
  part->DeleteBoxes();
  part->set_median_width(10);
  std::vector<ColPartition *> parts_splitted;
 
  // Test an empty part.
  equation_det_->RunSplitCPHor(part, &parts_splitted);
  EXPECT_TRUE(parts_splitted.empty());
  // Test with one blob.
  AddBlobIntoPart(TBOX(0, 0, 10, 50), part);
 
  equation_det_->RunSplitCPHor(part, &parts_splitted);
  EXPECT_EQ(1, parts_splitted.size());
  EXPECT_TRUE(TBOX(0, 0, 10, 50) == parts_splitted[0]->bounding_box());
 
  // Add more blob and test.
  AddBlobIntoPart(TBOX(11, 0, 20, 60), part);
  AddBlobIntoPart(TBOX(25, 0, 30, 55), part); // break point.
  AddBlobIntoPart(TBOX(100, 0, 110, 15), part);
  AddBlobIntoPart(TBOX(125, 0, 140, 45), part); // break point.
  AddBlobIntoPart(TBOX(500, 0, 540, 35), part); // break point.
  equation_det_->RunSplitCPHor(part, &parts_splitted);
 
  // Verify.
  EXPECT_EQ(3, parts_splitted.size());
  EXPECT_TRUE(TBOX(0, 0, 30, 60) == parts_splitted[0]->bounding_box());
  EXPECT_TRUE(TBOX(100, 0, 140, 45) == parts_splitted[1]->bounding_box());
  EXPECT_TRUE(TBOX(500, 0, 540, 35) == parts_splitted[2]->bounding_box());
 
  for (auto part_splitted : parts_splitted) {
    delete part_splitted;
  }
  part->DeleteBoxes();
  delete (part);
}

◆ TEST_F() [38/229]

tesseract::TEST_F	(	EquationFinderTest	,
		SplitCPHorLite
	)

Definition at line 450 of file equationdetect_test.cc.

                                           {
  TBOX box(0, 0, 999, 99);
  ColPartition *part = ColPartition::FakePartition(box, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
  part->DeleteBoxes();
  part->set_median_width(10);
  std::vector<TBOX> splitted_boxes;
 
  // Test an empty part.
  equation_det_->RunSplitCPHorLite(part, &splitted_boxes);
  EXPECT_TRUE(splitted_boxes.empty());
 
  // Test with one blob.
  AddBlobIntoPart(TBOX(0, 0, 10, 50), part);
  equation_det_->RunSplitCPHorLite(part, &splitted_boxes);
  EXPECT_EQ(1, splitted_boxes.size());
  EXPECT_TRUE(TBOX(0, 0, 10, 50) == splitted_boxes[0]);
 
  // Add more blob and test.
  AddBlobIntoPart(TBOX(11, 0, 20, 60), part);
  AddBlobIntoPart(TBOX(25, 0, 30, 55), part); // break point.
  AddBlobIntoPart(TBOX(100, 0, 110, 15), part);
  AddBlobIntoPart(TBOX(125, 0, 140, 45), part); // break point.
  AddBlobIntoPart(TBOX(500, 0, 540, 35), part); // break point.
  equation_det_->RunSplitCPHorLite(part, &splitted_boxes);
  // Verify.
  EXPECT_EQ(3, splitted_boxes.size());
  EXPECT_TRUE(TBOX(0, 0, 30, 60) == splitted_boxes[0]);
  EXPECT_TRUE(TBOX(100, 0, 140, 45) == splitted_boxes[1]);
  EXPECT_TRUE(TBOX(500, 0, 540, 35) == splitted_boxes[2]);
 
  part->DeleteBoxes();
  delete (part);
}

◆ TEST_F() [39/229]

tesseract::TEST_F	(	EuroText	,
		FastLatinOCR
	)

Definition at line 105 of file apiexample_test.cc.

                               {
  OCRTester(TESTING_DIR "/eurotext.tif", TESTING_DIR "/eurotext.txt", TESSDATA_DIR "_fast",
            "script/Latin");
}

◆ TEST_F() [40/229]

tesseract::TEST_F	(	FontUtilsTest	,
		DoesDetectMissingFonts
	)

Definition at line 220 of file pango_font_info_test.cc.

                                              {
  // Only bold italic face is available.
  EXPECT_FALSE(FontUtils::IsAvailableFont("Arial"));
  // Don't have a ttf for the Courier family.
  EXPECT_FALSE(FontUtils::IsAvailableFont("Courier"));
  // Pango "synthesizes" the italic font from the available Verdana Regular and
  // includes it in its list, but it is not really loadable.
  EXPECT_FALSE(FontUtils::IsAvailableFont("Verdana Italic"));
  // We have "Dejavu Sans Ultra-Light" but not its medium weight counterpart.
  EXPECT_FALSE(FontUtils::IsAvailableFont("DejaVu Sans"));
}

◆ TEST_F() [41/229]

tesseract::TEST_F	(	FontUtilsTest	,
		DoesFailToSelectFont
	)

Definition at line 276 of file pango_font_info_test.cc.

                                            {
  const char kMixedScriptText[] = "पिताने विवाह की | والفكر والصراع";
  std::vector<std::string> graphemes;
  std::string selected_font;
  EXPECT_FALSE(FontUtils::SelectFont(kMixedScriptText, strlen(kMixedScriptText), &selected_font,
                                     &graphemes));
}

◆ TEST_F() [42/229]

tesseract::TEST_F	(	FontUtilsTest	,
		DoesFindAvailableFonts
	)

Definition at line 210 of file pango_font_info_test.cc.

                                              {
  EXPECT_TRUE(FontUtils::IsAvailableFont("Arial Bold Italic"));
  EXPECT_TRUE(FontUtils::IsAvailableFont("Verdana"));
  EXPECT_TRUE(FontUtils::IsAvailableFont("DejaVu Sans Ultra-Light"));
 
  // Test that we can support font name convention for Pango v1.30.2 even when
  // we are running an older version.
  EXPECT_TRUE(FontUtils::IsAvailableFont("Times New Roman,"));
}

◆ TEST_F() [43/229]

tesseract::TEST_F	(	FontUtilsTest	,
		DoesListAvailableFonts
	)

Definition at line 232 of file pango_font_info_test.cc.

                                              {
  const std::vector<std::string> &fonts = FontUtils::ListAvailableFonts();
  EXPECT_THAT(fonts, ::testing::ElementsAreArray(kExpectedFontNames));
  for (auto &font : fonts) {
    PangoFontInfo font_info;
    EXPECT_TRUE(font_info.ParseFontDescriptionName(font));
  }
}

◆ TEST_F() [44/229]

tesseract::TEST_F	(	FontUtilsTest	,
		DoesSelectFont
	)

Definition at line 262 of file pango_font_info_test.cc.

                                      {
  const char *kLangText[] = {kArabicText, kEngText, kHinText, kKorText, nullptr};
  const char *kLangNames[] = {"Arabic", "English", "Hindi", "Korean", nullptr};
  for (int i = 0; kLangText[i] != nullptr; ++i) {
    SCOPED_TRACE(kLangNames[i]);
    std::vector<std::string> graphemes;
    std::string selected_font;
    EXPECT_TRUE(
        FontUtils::SelectFont(kLangText[i], strlen(kLangText[i]), &selected_font, &graphemes));
    EXPECT_TRUE(selected_font.size());
    EXPECT_TRUE(graphemes.size());
  }
}

◆ TEST_F() [45/229]

tesseract::TEST_F	(	HeapTest	,
		DoublePtrTest
	)

Definition at line 186 of file heap_test.cc.

                                {
  DoublePtr ptr1;
  DoublePtr ptr2;
  ptr1.Connect(&ptr2);
  // Check that the correct copy constructor is used.
  DoublePtr ptr3(ptr1);
  EXPECT_EQ(&ptr3, ptr3.OtherEnd()->OtherEnd());
  EXPECT_TRUE(ptr1.OtherEnd() == nullptr);
  // Check that the correct operator= is used.
  ptr1 = ptr3;
  EXPECT_EQ(&ptr1, ptr1.OtherEnd()->OtherEnd());
  EXPECT_TRUE(ptr3.OtherEnd() == nullptr);
}

◆ TEST_F() [46/229]

tesseract::TEST_F	(	HeapTest	,
		MixedTest
	)

Definition at line 94 of file heap_test.cc.

                            {
  GenericHeap<IntKDPair> heap;
  KDVector v;
  // Push the test data onto both the heap and the KDVector.
  PushTestData(&heap, &v);
  // Sort the vector and remove the first 5 values from both heap and v.
  std::sort(v.begin(), v.end());
  for (int i = 0; i < 5; ++i) {
    heap.Pop(nullptr);
    v.erase(v.begin());
  }
  // Push the test data onto both the heap and the KDVector.
  PushTestData(&heap, &v);
  // Heap and vector should still match!
  VerifyHeapVectorMatch(&heap, &v);
}

◆ TEST_F() [47/229]

tesseract::TEST_F	(	HeapTest	,
		PopWorstTest
	)

Definition at line 113 of file heap_test.cc.

                               {
  GenericHeap<IntKDPair> heap;
  KDVector v;
  // Push the test data onto both the heap and the KDVector.
  PushTestData(&heap, &v);
  // Get the worst element off the heap.
  IntKDPair pair;
  heap.PopWorst(&pair);
  EXPECT_EQ(pair.key(), 65536);
  EXPECT_EQ(pair.data(), 6);
  // Sort and remove the worst element from the vector.
  std::sort(v.begin(), v.end());
  v.resize(v.size() - 1);
  // After that they should still match!
  VerifyHeapVectorMatch(&heap, &v);
}

◆ TEST_F() [48/229]

tesseract::TEST_F	(	HeapTest	,
		RevalueTest
	)

Definition at line 132 of file heap_test.cc.

                              {
  // Here the data element of the pair is a DoublePtr, which links the entries
  // in the vector and heap, and we test a MAX heap.
  typedef KDPairDec<int, DoublePtr> PtrPair;
  GenericHeap<PtrPair> heap;
  std::vector<PtrPair> v;
  // Push the test data onto both the heap and the vector.
  for (int i : test_data) {
    PtrPair h_pair;
    h_pair.key() = i;
    PtrPair v_pair;
    v_pair.key() = i;
    h_pair.data().Connect(&v_pair.data());
    heap.Push(&h_pair);
    v.push_back(v_pair);
  }
  // Test changes both ways. Index 0 is 8, so change it to -1.
  v[0].key() = -1;
  // v[0].data.OtherEnd() is a pointer to the data element in the appropriate
  // heap entry, wherever it may be. We can change its value via that pointer.
  // Without Reshuffle, that would be a terribly bad thing to do, as it violates
  // the heap invariant, making the heap corrupt.
  auto *pair_ptr = reinterpret_cast<PtrPair *>(v[0].data().OtherEnd());
  pair_ptr->key() = v[0].key();
  heap.Reshuffle(pair_ptr);
  // Index 1 is 1. Change to 32767.
  v[1].key() = 32767;
  pair_ptr = reinterpret_cast<PtrPair *>(v[1].data().OtherEnd());
  pair_ptr->key() = v[1].key();
  heap.Reshuffle(pair_ptr);
  // After the changes, popping the heap should still match the sorted order
  // of the vector.
  std::sort(v.begin(), v.end());
  EXPECT_GT(v[0].key(), v.back().key());
  for (auto &i : v) {
    EXPECT_EQ(i.key(), heap.PeekTop().key());
    EXPECT_FALSE(heap.empty());
    heap.Pop(nullptr);
  }
  EXPECT_TRUE(heap.empty());
}

◆ TEST_F() [49/229]

tesseract::TEST_F	(	HeapTest	,
		SortTest
	)

Definition at line 81 of file heap_test.cc.

                           {
  GenericHeap<IntKDPair> heap;
  EXPECT_TRUE(heap.empty());
  KDVector v;
  EXPECT_EQ(heap.size(), v.size());
  // Push the test data onto both the heap and the KDVector.
  PushTestData(&heap, &v);
  VerifyHeapVectorMatch(&heap, &v);
}

◆ TEST_F() [50/229]

tesseract::TEST_F	(	ImagedataTest	,
		CachesMultiDocs
	)

Definition at line 90 of file imagedata_test.cc.

                                       {
  // This test verifies that DocumentCache works to store multiple DocumentData
  // and the two caching strategies read images in the right order.
  // Number of pages in each document.
  const std::vector<int> kNumPages = {6, 5, 7};
  std::vector<std::vector<std::string>> page_texts;
  std::vector<std::string> filenames;
  for (size_t d = 0; d < kNumPages.size(); ++d) {
    page_texts.emplace_back(std::vector<std::string>());
    std::string filename = MakeFakeDoc(kNumPages[d], d, &page_texts.back());
    filenames.push_back(filename);
  }
  // Now try getting them back with different cache strategies and check that
  // the pages come out in the right order.
  DocumentCache robin_cache(8000000);
  robin_cache.LoadDocuments(filenames, tesseract::CS_ROUND_ROBIN, nullptr);
  DocumentCache serial_cache(8000000);
  serial_cache.LoadDocuments(filenames, tesseract::CS_SEQUENTIAL, nullptr);
  for (int p = 0; p <= 21; ++p) {
    LOG(INFO) << "Page " << p;
    const ImageData *robin_data = robin_cache.GetPageBySerial(p);
    const ImageData *serial_data = serial_cache.GetPageBySerial(p);
    CHECK(robin_data != nullptr);
    CHECK(serial_data != nullptr);
    int robin_doc = p % kNumPages.size();
    int robin_page = p / kNumPages.size() % kNumPages[robin_doc];
    // Check that this is the right page.
    EXPECT_STREQ(page_texts[robin_doc][robin_page].c_str(), robin_data->transcription().c_str());
    int serial_doc = p / kNumPages[0] % kNumPages.size();
    int serial_page = p % kNumPages[0] % kNumPages[serial_doc];
    EXPECT_STREQ(page_texts[serial_doc][serial_page].c_str(), serial_data->transcription().c_str());
  }
}

◆ TEST_F() [51/229]

tesseract::TEST_F	(	ImagedataTest	,
		CachesProperly
	)

Definition at line 60 of file imagedata_test.cc.

                                      {
  // This test verifies that Imagedata can be stored in a DocumentData and a
  // collection of them is cached correctly given limited memory.
  // Number of pages to put in the fake document.
  const int kNumPages = 12;
  // Allowances to read the document. Big enough for 1, 3, 0, all pages.
  const int kMemoryAllowances[] = {2000000, 4000000, 1000000, 100000000, 0};
  // Order in which to read the pages, with some sequential and some seeks.
  const int kPageReadOrder[] = {0, 1, 2, 3, 8, 4, 5, 6, 7, 11, 10, 9, -1};
 
  std::vector<std::string> page_texts;
  std::string filename = MakeFakeDoc(kNumPages, 0, &page_texts);
  // Now try getting it back with different memory allowances and check that
  // the pages can still be read.
  for (int m = 0; kMemoryAllowances[m] > 0; ++m) {
    DocumentData read_doc("My document");
    EXPECT_TRUE(read_doc.LoadDocument(filename.c_str(), 0, kMemoryAllowances[m], nullptr));
    LOG(ERROR) << "Allowance = " << kMemoryAllowances[m];
    // Read the pages in a specific order.
    for (int p = 0; kPageReadOrder[p] >= 0; ++p) {
      int page = kPageReadOrder[p];
      const ImageData *imagedata = read_doc.GetPage(page);
      EXPECT_NE(nullptr, imagedata);
      // EXPECT_NE(reinterpret_cast<ImageData*>(nullptr), imagedata);
      // Check that this is the right page.
      EXPECT_STREQ(page_texts[page].c_str(), imagedata->transcription().c_str());
    }
  }
}

◆ TEST_F() [52/229]

tesseract::TEST_F	(	IndexMapBiDiTest	,
		ManyToOne
	)

Definition at line 101 of file indexmapbidi_test.cc.

                                    {
  // Test the example in the comment on CompleteMerges.
  IndexMapBiDi map;
  map.Init(13, false);
  map.SetMap(2, true);
  map.SetMap(4, true);
  map.SetMap(7, true);
  map.SetMap(9, true);
  map.SetMap(11, true);
  map.Setup();
  map.Merge(map.SparseToCompact(2), map.SparseToCompact(9));
  map.Merge(map.SparseToCompact(4), map.SparseToCompact(11));
  map.CompleteMerges();
  EXPECT_EQ(3, map.CompactSize());
  EXPECT_EQ(13, map.SparseSize());
  EXPECT_EQ(1, map.SparseToCompact(4));
  EXPECT_EQ(4, map.CompactToSparse(1));
  EXPECT_EQ(1, map.SparseToCompact(11));
}

◆ TEST_F() [53/229]

tesseract::TEST_F	(	IndexMapBiDiTest	,
		Primes
	)

Definition at line 74 of file indexmapbidi_test.cc.

                                 {
  IndexMapBiDi map;
  ComputePrimes(&map);
  TestPrimes(map);
  // It still works if we assign it to another.
  IndexMapBiDi map2;
  map2.CopyFrom(map);
  TestPrimes(map2);
  // Or if we assign it to a base class.
  IndexMap base_map;
  base_map.CopyFrom(map);
  TestPrimes(base_map);
  // Test file i/o too.
  std::string filename = OutputNameToPath("primesmap");
  FILE *fp = fopen(filename.c_str(), "wb");
  CHECK(fp != nullptr);
  EXPECT_TRUE(map.Serialize(fp));
  fclose(fp);
  fp = fopen(filename.c_str(), "rb");
  CHECK(fp != nullptr);
  IndexMapBiDi read_map;
  EXPECT_TRUE(read_map.DeSerialize(false, fp));
  fclose(fp);
  TestPrimes(read_map);
}

◆ TEST_F() [54/229]

tesseract::TEST_F	(	IntFeatureMapTest	,
		Exhaustive
	)

Definition at line 42 of file intfeaturemap_test.cc.

                                      {
#ifdef DISABLED_LEGACY_ENGINE
  // Skip test because IntFeatureSpace is missing.
  GTEST_SKIP();
#else
  IntFeatureSpace space;
  space.Init(kXBuckets, kYBuckets, kThetaBuckets);
  IntFeatureMap map;
  map.Init(space);
  int total_size = kIntFeatureExtent * kIntFeatureExtent * kIntFeatureExtent;
  auto features = std::make_unique<INT_FEATURE_STRUCT[]>(total_size);
  // Fill the features with every value.
  for (int y = 0; y < kIntFeatureExtent; ++y) {
    for (int x = 0; x < kIntFeatureExtent; ++x) {
      for (int theta = 0; theta < kIntFeatureExtent; ++theta) {
        int f_index = (y * kIntFeatureExtent + x) * kIntFeatureExtent + theta;
        features[f_index].X = x;
        features[f_index].Y = y;
        features[f_index].Theta = theta;
      }
    }
  }
  std::vector<int> index_features;
  map.IndexAndSortFeatures(features.get(), total_size, &index_features);
  EXPECT_EQ(total_size, index_features.size());
  int total_buckets = kXBuckets * kYBuckets * kThetaBuckets;
  std::vector<int> map_features;
  int misses = map.MapIndexedFeatures(index_features, &map_features);
  EXPECT_EQ(0, misses);
  EXPECT_EQ(total_buckets, map_features.size());
  ExpectContiguous(map_features, 0, total_buckets);
  EXPECT_EQ(total_buckets, map.compact_size());
  EXPECT_EQ(total_buckets, map.sparse_size());
 
  // Every offset should be within dx, dy, dtheta of the start point.
  int dx = kIntFeatureExtent / kXBuckets + 1;
  int dy = kIntFeatureExtent / kYBuckets + 1;
  int dtheta = kIntFeatureExtent / kThetaBuckets + 1;
  int bad_offsets = 0;
  for (int index = 0; index < total_buckets; ++index) {
    for (int dir = -tesseract::kNumOffsetMaps; dir <= tesseract::kNumOffsetMaps; ++dir) {
      int offset_index = map.OffsetFeature(index, dir);
      if (dir == 0) {
        EXPECT_EQ(index, offset_index);
      } else if (offset_index >= 0) {
        INT_FEATURE_STRUCT f = map.InverseIndexFeature(index);
        INT_FEATURE_STRUCT f2 = map.InverseIndexFeature(offset_index);
        EXPECT_TRUE(f.X != f2.X || f.Y != f2.Y || f.Theta != f2.Theta);
        EXPECT_LE(abs(f.X - f2.X), dx);
        EXPECT_LE(abs(f.Y - f2.Y), dy);
        int theta_delta = abs(f.Theta - f2.Theta);
        if (theta_delta > kIntFeatureExtent / 2) {
          theta_delta = kIntFeatureExtent - theta_delta;
        }
        EXPECT_LE(theta_delta, dtheta);
      } else {
        ++bad_offsets;
        INT_FEATURE_STRUCT f = map.InverseIndexFeature(index);
      }
    }
  }
  EXPECT_LE(bad_offsets, (kXBuckets + kYBuckets) * kThetaBuckets);
 
  // To test the mapping further, delete the 1st and last map feature, and
  // test again.
  map.DeleteMapFeature(0);
  map.DeleteMapFeature(total_buckets - 1);
  map.FinalizeMapping(nullptr);
  map.IndexAndSortFeatures(features.get(), total_size, &index_features);
  // Has no effect on index features.
  EXPECT_EQ(total_size, index_features.size());
  misses = map.MapIndexedFeatures(index_features, &map_features);
  int expected_misses = (kIntFeatureExtent / kXBuckets) * (kIntFeatureExtent / kYBuckets) *
                        (kIntFeatureExtent / kThetaBuckets + 1);
  expected_misses += (kIntFeatureExtent / kXBuckets) * (kIntFeatureExtent / kYBuckets + 1) *
                     (kIntFeatureExtent / kThetaBuckets);
  EXPECT_EQ(expected_misses, misses);
  EXPECT_EQ(total_buckets - 2, map_features.size());
  ExpectContiguous(map_features, 0, total_buckets - 2);
  EXPECT_EQ(total_buckets - 2, map.compact_size());
  EXPECT_EQ(total_buckets, map.sparse_size());
#endif
}

◆ TEST_F() [55/229]

tesseract::TEST_F	(	IntSimdMatrixTest	,
		AVX2
	)

Definition at line 125 of file intsimdmatrix_test.cc.

                                {
#if defined(HAVE_AVX2)
  if (!SIMDDetect::IsAVX2Available()) {
    GTEST_LOG_(INFO) << "No AVX2 found! Not tested!";
    GTEST_SKIP();
  }
  ExpectEqualResults(IntSimdMatrix::intSimdMatrixAVX2);
#else
  GTEST_LOG_(INFO) << "AVX2 unsupported! Not tested!";
  GTEST_SKIP();
#endif
}

◆ TEST_F() [56/229]

tesseract::TEST_F	(	IntSimdMatrixTest	,
		C
	)

Definition at line 105 of file intsimdmatrix_test.cc.

                             {
  static const IntSimdMatrix matrix = {nullptr, 1, 1, 1, 1};
  ExpectEqualResults(matrix);
}

◆ TEST_F() [57/229]

tesseract::TEST_F	(	IntSimdMatrixTest	,
		SSE
	)

Definition at line 111 of file intsimdmatrix_test.cc.

                               {
#if defined(HAVE_SSE4_1)
  if (!SIMDDetect::IsSSEAvailable()) {
    GTEST_LOG_(INFO) << "No SSE found! Not tested!";
    GTEST_SKIP();
  }
  ExpectEqualResults(IntSimdMatrix::intSimdMatrixSSE);
#else
  GTEST_LOG_(INFO) << "SSE unsupported! Not tested!";
  GTEST_SKIP();
#endif
}

◆ TEST_F() [58/229]

tesseract::TEST_F	(	LayoutTest	,
		ArraySizeTest
	)

Definition at line 191 of file layout_test.cc.

                                  {
  int size = 0;
  for (size = 0; kPolyBlockNames[size][0] != '\0'; ++size) {
    ;
  }
  EXPECT_EQ(size, PT_COUNT);
}

◆ TEST_F() [59/229]

tesseract::TEST_F	(	LayoutTest	,
		HebrewOrderingAndSkew
	)

Definition at line 214 of file layout_test.cc.

                                          {
  SetImage("hebrew.png", "eng");
  // Just run recognition.
  EXPECT_EQ(api_.Recognize(nullptr), 0);
  tesseract::MutableIterator *it = api_.GetMutableIterator();
  // In eng mode, block order should not be RTL.
  VerifyRoughBlockOrder(false, it);
  VerifyTotalContainment(1, it);
  delete it;
  // Now try again using Hebrew.
  SetImage("hebrew.png", "heb");
  // Just run recognition.
  EXPECT_EQ(api_.Recognize(nullptr), 0);
  it = api_.GetMutableIterator();
  // In heb mode, block order should be RTL.
  VerifyRoughBlockOrder(true, it);
  // And blobs should still be fully contained.
  VerifyTotalContainment(-1, it);
  delete it;
}

◆ TEST_F() [60/229]

tesseract::TEST_F	(	LayoutTest	,
		UNLV8087_054
	)

Definition at line 201 of file layout_test.cc.

                                 {
  SetImage("8087_054.3B.tif", "eng");
  // Just run recognition.
  EXPECT_EQ(api_.Recognize(nullptr), 0);
  // Check iterator position.
  tesseract::ResultIterator *it = api_.GetIterator();
  VerifyBlockTextOrder(kStrings8087_054, kBlocks8087_054, it);
  delete it;
}

◆ TEST_F() [61/229]

tesseract::TEST_F	(	LigatureTableTest	,
		DoesFillLigatureTables
	)

Definition at line 54 of file ligature_table_test.cc.

                                                  {
  EXPECT_GT(lig_table_->norm_to_lig_table().size(), 0);
  EXPECT_GT(lig_table_->lig_to_norm_table().size(), 0);
}

◆ TEST_F() [62/229]

tesseract::TEST_F	(	LigatureTableTest	,
		TestCustomLigatures
	)

Definition at line 84 of file ligature_table_test.cc.

                                               {
  const char *kTestCases[] = {
      "act",       "a\uE003", "publiſh",    "publi\uE006", "ſince",
      "\uE007nce", "aſleep",  "a\uE008eep", "neceſſary",   "nece\uE009ary",
  };
  for (size_t i = 0; i < countof(kTestCases); i += 2) {
    EXPECT_STREQ(kTestCases[i + 1], lig_table_->AddLigatures(kTestCases[i], nullptr).c_str());
    EXPECT_STREQ(kTestCases[i], lig_table_->RemoveLigatures(kTestCases[i + 1]).c_str());
    EXPECT_STREQ(kTestCases[i], lig_table_->RemoveCustomLigatures(kTestCases[i + 1]).c_str());
  }
}

◆ TEST_F() [63/229]

tesseract::TEST_F	(	ListTest	,
		TestCLIST
	)

Definition at line 50 of file list_test.cc.

                            {
  Clst_CLIST list;
  EXPECT_TRUE(list.empty());
  EXPECT_EQ(list.length(), 0);
  auto it = CLIST_ITERATOR(&list);
  for (unsigned i = 0; i < ListSize; i++) {
    auto *lst = new Clst(i);
    it.add_to_end(lst);
  }
  EXPECT_TRUE(!list.empty());
  EXPECT_EQ(list.length(), ListSize);
  it.move_to_first();
  unsigned n = 0;
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
    EXPECT_TRUE(n == 0 || !it.at_first());
    auto *lst = reinterpret_cast<Clst *>(it.data());
    EXPECT_EQ(lst->value, n);
    n++;
    EXPECT_TRUE(n != ListSize || it.at_last());
  }
  it.forward();
  n++;
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
    auto *lst = reinterpret_cast<Clst *>(it.extract());
    EXPECT_EQ(lst->value, n % ListSize);
    n++;
    delete lst;
  }
  // TODO: add more tests for CLIST
}

◆ TEST_F() [64/229]

tesseract::TEST_F	(	ListTest	,
		TestELIST
	)

Definition at line 81 of file list_test.cc.

                            {
  Elst_LIST list;
  EXPECT_TRUE(list.empty());
  EXPECT_EQ(list.length(), 0);
  auto it = ELIST_ITERATOR(&list);
  for (unsigned i = 0; i < ListSize; i++) {
    auto *elst = new Elst(i);
    it.add_to_end(elst);
  }
  EXPECT_TRUE(!list.empty());
  EXPECT_EQ(list.length(), ListSize);
  it.move_to_first();
  unsigned n = 0;
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
    EXPECT_TRUE(n == 0 || !it.at_first());
    auto *elst = reinterpret_cast<Elst *>(it.data());
    EXPECT_EQ(elst->value, n);
    n++;
    EXPECT_TRUE(n != ListSize || it.at_last());
  }
  it.forward();
  n++;
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
    auto *elst = reinterpret_cast<Elst *>(it.extract());
    EXPECT_EQ(elst->value, n % ListSize);
    n++;
    delete elst;
  }
  // TODO: add more tests for ELIST
}

◆ TEST_F() [65/229]

tesseract::TEST_F	(	ListTest	,
		TestELIST2
	)

Definition at line 112 of file list_test.cc.

                             {
  Elst2_LIST list;
  EXPECT_TRUE(list.empty());
  EXPECT_EQ(list.length(), 0);
  auto it = ELIST2_ITERATOR(&list);
  for (unsigned i = 0; i < ListSize; i++) {
    auto *lst = new Elst2(i);
    it.add_to_end(lst);
  }
  EXPECT_TRUE(!list.empty());
  EXPECT_EQ(list.length(), ListSize);
  it.move_to_first();
  unsigned n = 0;
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
    EXPECT_TRUE(n == 0 || !it.at_first());
    auto *lst = reinterpret_cast<Elst2 *>(it.data());
    EXPECT_EQ(lst->value, n);
    n++;
    EXPECT_TRUE(n != ListSize || it.at_last());
  }
  it.backward();
  n--;
  for (it.mark_cycle_pt(); !it.cycled_list(); it.backward()) {
    auto *lst = reinterpret_cast<Elst2 *>(it.data());
    EXPECT_EQ(lst->value, n);
    n--;
  }
  it.forward();
  n++;
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
    auto *lst = reinterpret_cast<Elst2 *>(it.extract());
    EXPECT_EQ(lst->value, n % ListSize);
    n++;
    delete lst;
  }
  // TODO: add more tests for ELIST2
}

◆ TEST_F() [66/229]

tesseract::TEST_F	(	LLSQTest	,
		BasicLines
	)

Definition at line 68 of file linlsq_test.cc.

                             {
  LLSQ llsq;
  llsq.add(1.0, 1.0);
  llsq.add(2.0, 2.0);
  ExpectCorrectLine(llsq, 1.0, 0.0, 0.0, 1.0, 1e-6);
  float half_root_2 = sqrt(2.0) / 2.0f;
  ExpectCorrectVector(llsq, FCOORD(1.5f, 1.5f), FCOORD(half_root_2, half_root_2), 1e-6);
  llsq.remove(2.0, 2.0);
  llsq.add(1.0, 2.0);
  llsq.add(10.0, 1.0);
  llsq.add(-8.0, 1.0);
  // The point at 1,2 pulls the result away from what would otherwise be a
  // perfect fit to a horizontal line by 0.25 unit, with rms error of 0.433.
  ExpectCorrectLine(llsq, 0.0, 1.25, 0.433, 0.0, 1e-2);
  ExpectCorrectVector(llsq, FCOORD(1.0f, 1.25f), FCOORD(1.0f, 0.0f), 1e-3);
  llsq.add(1.0, 2.0, 10.0);
  // With a heavy weight, the point at 1,2 pulls the line nearer.
  ExpectCorrectLine(llsq, 0.0, 1.786, 0.41, 0.0, 1e-2);
  ExpectCorrectVector(llsq, FCOORD(1.0f, 1.786f), FCOORD(1.0f, 0.0f), 1e-3);
}

◆ TEST_F() [67/229]

tesseract::TEST_F	(	LLSQTest	,
		RmsOrthWorksAsIntended
	)

Definition at line 104 of file linlsq_test.cc.

                                         {
  std::vector<FCOORD> pts;
  pts.emplace_back(0.56, 0.95);
  pts.emplace_back(0.09, 0.09);
  pts.emplace_back(0.13, 0.77);
  pts.emplace_back(0.16, 0.83);
  pts.emplace_back(0.45, 0.79);
  VerifyRmsOrth(pts, FCOORD(1, 0));
  VerifyRmsOrth(pts, FCOORD(1, 1));
  VerifyRmsOrth(pts, FCOORD(1, 2));
  VerifyRmsOrth(pts, FCOORD(2, 1));
}

◆ TEST_F() [68/229]

tesseract::TEST_F	(	LLSQTest	,
		Vectors
	)

Definition at line 90 of file linlsq_test.cc.

                          {
  LLSQ llsq;
  llsq.add(1.0, 1.0);
  llsq.add(1.0, -1.0);
  ExpectCorrectVector(llsq, FCOORD(1.0f, 0.0f), FCOORD(0.0f, 1.0f), 1e-6);
  llsq.add(0.9, -2.0);
  llsq.add(1.1, -3.0);
  llsq.add(0.9, 2.0);
  llsq.add(1.10001, 3.0);
  ExpectCorrectVector(llsq, FCOORD(1.0f, 0.0f), FCOORD(0.0f, 1.0f), 1e-3);
}

◆ TEST_F() [69/229]

tesseract::TEST_F	(	LoadLang	,
		engBest
	)

Definition at line 555 of file loadlang_test.cc.

                          {
  LangLoader("eng", TESSDATA_DIR "_best");
}

◆ TEST_F() [70/229]

tesseract::TEST_F	(	LoadLang	,
		engBestInt
	)

Definition at line 558 of file loadlang_test.cc.

                             {
  LangLoader("eng", TESSDATA_DIR);
}

◆ TEST_F() [71/229]

tesseract::TEST_F	(	LoadLang	,
		engFast
	)

Definition at line 552 of file loadlang_test.cc.

                          {
  LangLoader("eng", TESSDATA_DIR "_fast");
}

◆ TEST_F() [72/229]

tesseract::TEST_F	(	LoadLang	,
		kmrBest
	)

Definition at line 566 of file loadlang_test.cc.

                          {
  LangLoader("kmr", TESSDATA_DIR "_best");
}

◆ TEST_F() [73/229]

tesseract::TEST_F	(	LoadLang	,
		kmrFast
	)

Definition at line 563 of file loadlang_test.cc.

                          {
  LangLoader("kmr", TESSDATA_DIR "_fast");
}

◆ TEST_F() [74/229]

tesseract::TEST_F	(	LSTMTrainerTest	,
		BasicTest
	)

Definition at line 29 of file lstm_test.cc.

                                   {
  // A Convolver sliding window classifier without LSTM.
  SetupTrainer(
      "[1,32,0,1 Ct5,5,16 Mp4,4 Ct1,1,16 Ct3,3,128 Mp4,1 Ct1,1,64 S2,1 "
      "Ct1,1,64O1c1]",
      "no-lstm", "eng/eng.unicharset", "eng.Arial.exp0.lstmf", false, false, 2e-4, false, "eng");
  double non_lstm_err = TrainIterations(kTrainerIterations * 4);
  EXPECT_LT(non_lstm_err, 98);
  LOG(INFO) << "********** Expected  < 98 ************\n";
 
  // A basic single-layer, single direction LSTM.
  SetupTrainerEng("[1,1,0,32 Lfx100 O1c1]", "1D-lstm", false, false);
  double lstm_uni_err = TrainIterations(kTrainerIterations * 2);
  EXPECT_LT(lstm_uni_err, 86);
  LOG(INFO) << "********** Expected  < 86 ************\n";
  // Beats the convolver. (Although it does have a lot more weights, it still
  // iterates faster.)
  EXPECT_LT(lstm_uni_err, non_lstm_err);
}

◆ TEST_F() [75/229]

tesseract::TEST_F	(	LSTMTrainerTest	,
		BidiTest
	)

Definition at line 59 of file lstm_test.cc.

                                  {
  // A basic single-layer, bi-di 1d LSTM.
  SetupTrainerEng("[1,1,0,32 Lbx100 O1c1]", "bidi-lstm", false, false);
  double lstm_bi_err = TrainIterations(kTrainerIterations);
  EXPECT_LT(lstm_bi_err, 75);
  LOG(INFO) << "********** Expected   < 75 ************\n";
  // Int mode training is dead, so convert the trained network to int and check
  // that its error rate is close to the float version.
  TestIntMode(kTrainerIterations);
}

◆ TEST_F() [76/229]

tesseract::TEST_F	(	LSTMTrainerTest	,
		ColorTest
	)

Definition at line 50 of file lstm_test.cc.

                                   {
  // A basic single-layer, single direction LSTM.
  SetupTrainerEng("[1,32,0,3 S4,2 L2xy16 Ct1,1,16 S8,1 Lbx100 O1c1]", "2D-color-lstm", true, true);
  double lstm_uni_err = TrainIterations(kTrainerIterations);
  EXPECT_LT(lstm_uni_err, 85);
  //  EXPECT_GT(lstm_uni_err, 66);
  LOG(INFO) << "********** Expected  < 85 ************\n";
}

◆ TEST_F() [77/229]

tesseract::TEST_F	(	LSTMTrainerTest	,
		ConvertModel
	)

Definition at line 67 of file lstmtrainer_test.cc.

                                      {
  // Setup a trainer with a deu charset.
  LSTMTrainer deu_trainer;
  deu_trainer.InitCharSet(TestDataNameToPath("deu/deu.traineddata"));
  // Load the fra traineddata, strip out the model, and save to a tmp file.
  TessdataManager mgr;
  std::string fra_data = file::JoinPath(TESSDATA_DIR "_best", "fra.traineddata");
  CHECK(mgr.Init(fra_data.c_str()));
  LOG(INFO) << "Load " << fra_data << "\n";
  file::MakeTmpdir();
  std::string model_path = file::JoinPath(FLAGS_test_tmpdir, "fra.lstm");
  CHECK(mgr.ExtractToFile(model_path.c_str()));
  LOG(INFO) << "Extract " << model_path << "\n";
  // Load the fra model into the deu_trainer, and save the converted model.
  CHECK(deu_trainer.TryLoadingCheckpoint(model_path.c_str(), fra_data.c_str()));
  LOG(INFO) << "Checkpoint load for " << model_path << " and " << fra_data << "\n";
  std::string deu_data = file::JoinPath(FLAGS_test_tmpdir, "deu.traineddata");
  CHECK(deu_trainer.SaveTraineddata(deu_data.c_str()));
  LOG(INFO) << "Save " << deu_data << "\n";
  // Now run the saved model on phototest. (See BasicTesseractTest in
  // baseapi_test.cc).
  TessBaseAPI api;
  api.Init(FLAGS_test_tmpdir, "deu", tesseract::OEM_LSTM_ONLY);
  Image src_pix = pixRead(TestingNameToPath("phototest.tif").c_str());
  CHECK(src_pix);
  api.SetImage(src_pix);
  std::unique_ptr<char[]> result(api.GetUTF8Text());
  std::string truth_text;
  CHECK_OK(
      file::GetContents(TestingNameToPath("phototest.gold.txt"), &truth_text, file::Defaults()));
 
  EXPECT_STREQ(truth_text.c_str(), result.get());
  src_pix.destroy();
}

◆ TEST_F() [78/229]

tesseract::TEST_F	(	LSTMTrainerTest	,
		DeterminismTest
	)

Definition at line 109 of file lstm_test.cc.

                                         {
  SetupTrainerEng("[1,32,0,1 S4,2 L2xy16 Ct1,1,16 S8,1 Lbx100 O1c1]", "2-D-2-layer-lstm", false,
                  false);
  double lstm_2d_err_a = TrainIterations(kTrainerIterations);
  double act_error_a = trainer_->ActivationError();
  double char_error_a = trainer_->CharError();
  std::vector<char> trainer_a_data;
  EXPECT_TRUE(trainer_->SaveTrainingDump(NO_BEST_TRAINER, *trainer_, &trainer_a_data));
  SetupTrainerEng("[1,32,0,1 S4,2 L2xy16 Ct1,1,16 S8,1 Lbx100 O1c1]", "2-D-2-layer-lstm", false,
                  false);
  double lstm_2d_err_b = TrainIterations(kTrainerIterations);
  double act_error_b = trainer_->ActivationError();
  double char_error_b = trainer_->CharError();
  EXPECT_FLOAT_EQ(lstm_2d_err_a, lstm_2d_err_b);
  EXPECT_FLOAT_EQ(act_error_a, act_error_b);
  EXPECT_FLOAT_EQ(char_error_a, char_error_b);
  // Now train some more iterations.
  lstm_2d_err_b = TrainIterations(kTrainerIterations / 3);
  act_error_b = trainer_->ActivationError();
  char_error_b = trainer_->CharError();
  // Unpack into a new trainer and train that some more too.
  SetupTrainerEng("[1,32,0,1 S4,2 L2xy16 Ct1,1,16 S8,1 Lbx100 O1c1]", "2-D-2-layer-lstm", false,
                  false);
  EXPECT_TRUE(trainer_->ReadTrainingDump(trainer_a_data, *trainer_));
  lstm_2d_err_a = TrainIterations(kTrainerIterations / 3);
  act_error_a = trainer_->ActivationError();
  char_error_a = trainer_->CharError();
  EXPECT_FLOAT_EQ(lstm_2d_err_a, lstm_2d_err_b);
  EXPECT_FLOAT_EQ(act_error_a, act_error_b);
  EXPECT_FLOAT_EQ(char_error_a, char_error_b);
  LOG(INFO) << "********** *** ************\n";
}

◆ TEST_F() [79/229]

tesseract::TEST_F	(	LSTMTrainerTest	,
		EncodeDecodeBothTestKor
	)

Definition at line 41 of file lstm_recode_test.cc.

                                                 {
  TestEncodeDecodeBoth("kor", "한국어 위키백과에 오신 것을 환영합니다!");
}

◆ TEST_F() [80/229]

tesseract::TEST_F	(	LSTMTrainerTest	,
		EncodedSoftmaxTest
	)

Definition at line 175 of file lstm_test.cc.

                                            {
  // LSTM with a built-in encoded softmax can beat the external softmax.
  SetupTrainerEng("[1,1,0,32 LE96]", "Lstm-+-softmax", false, true);
  double lstm_sm_err = TrainIterations(kTrainerIterations * 2);
  EXPECT_LT(lstm_sm_err, 62.0);
  LOG(INFO) << "********** Expected   < 62 ************\n";
  // Check that it works in int mode too.
  TestIntMode(kTrainerIterations);
}

◆ TEST_F() [81/229]

tesseract::TEST_F	(	LSTMTrainerTest	,
		EncodesEng
	)

Definition at line 18 of file lstmtrainer_test.cc.

                                    {
  TestEncodeDecodeBoth("eng", "The quick brown 'fox' jumps over: the lazy dog!");
}

◆ TEST_F() [82/229]

tesseract::TEST_F	(	LSTMTrainerTest	,
		EncodesKan
	)

Definition at line 22 of file lstmtrainer_test.cc.

                                    {
  TestEncodeDecodeBoth("kan", "ಫ್ರಬ್ರವರಿ ತತ್ವಾಂಶಗಳೆಂದರೆ ಮತ್ತು ಜೊತೆಗೆ ಕ್ರಮವನ್ನು");
}

◆ TEST_F() [83/229]

tesseract::TEST_F	(	LSTMTrainerTest	,
		EncodesKor
	)

Definition at line 26 of file lstmtrainer_test.cc.

                                    {
  TestEncodeDecodeBoth("kor", "이는 것으로 다시 넣을 수는 있지만 선택의 의미는");
}

◆ TEST_F() [84/229]

tesseract::TEST_F	(	LSTMTrainerTest	,
		MapCoder
	)

Definition at line 30 of file lstmtrainer_test.cc.

                                  {
  LSTMTrainer fra_trainer;
  fra_trainer.InitCharSet(TestDataNameToPath("fra/fra.traineddata"));
  LSTMTrainer deu_trainer;
  deu_trainer.InitCharSet(TestDataNameToPath("deu/deu.traineddata"));
  // A string that uses characters common to French and German.
  std::string kTestStr = "The quick brown 'fox' jumps over: the lazy dog!";
  std::vector<int> deu_labels;
  EXPECT_TRUE(deu_trainer.EncodeString(kTestStr.c_str(), &deu_labels));
  // The french trainer cannot decode them correctly.
  std::string badly_decoded = fra_trainer.DecodeLabels(deu_labels);
  std::string bad_str(&badly_decoded[0], badly_decoded.length());
  LOG(INFO) << "bad_str fra=" << bad_str << "\n";
  EXPECT_NE(kTestStr, bad_str);
  // Encode the string as fra.
  std::vector<int> fra_labels;
  EXPECT_TRUE(fra_trainer.EncodeString(kTestStr.c_str(), &fra_labels));
  // Use the mapper to compute what the labels are as deu.
  std::vector<int> mapping =
      fra_trainer.MapRecoder(deu_trainer.GetUnicharset(), deu_trainer.GetRecoder());
  std::vector<int> mapped_fra_labels(fra_labels.size(), -1);
  for (unsigned i = 0; i < fra_labels.size(); ++i) {
    mapped_fra_labels[i] = mapping[fra_labels[i]];
    EXPECT_NE(-1, mapped_fra_labels[i]) << "i=" << i << ", ch=" << kTestStr[i];
    EXPECT_EQ(mapped_fra_labels[i], deu_labels[i])
        << "i=" << i << ", ch=" << kTestStr[i] << " has deu label=" << deu_labels[i]
        << ", but mapped to " << mapped_fra_labels[i];
  }
  // The german trainer can now decode them correctly.
  std::string decoded = deu_trainer.DecodeLabels(mapped_fra_labels);
  std::string ok_str(&decoded[0], decoded.length());
  LOG(INFO) << "ok_str deu=" << ok_str << "\n";
  EXPECT_EQ(kTestStr, ok_str);
}

◆ TEST_F() [85/229]

tesseract::TEST_F	(	LSTMTrainerTest	,
		RecodeTestKor
	)

Definition at line 29 of file lstm_recode_test.cc.

                                       {
  // A basic single-layer, bi-di 1d LSTM on Korean.
  SetupTrainer("[1,1,0,32 Lbx96 O1c1]", "kor-recode", "kor/kor.unicharset",
               "kor.Arial_Unicode_MS.exp0.lstmf", true, true, 5e-4, false, "kor");
  double kor_recode_err = TrainIterations(kTrainerIterations);
  EXPECT_LT(kor_recode_err, 60);
  LOG(INFO) << "********** Expected  < 60 ************\n";
}

◆ TEST_F() [86/229]

tesseract::TEST_F	(	LSTMTrainerTest	,
		RecodeTestKorBase
	)

Definition at line 19 of file lstm_recode_test.cc.

                                           {
  // A basic single-layer, bi-di 1d LSTM on Korean.
  SetupTrainer("[1,1,0,32 Lbx96 O1c1]", "kor-full", "kor/kor.unicharset",
               "kor.Arial_Unicode_MS.exp0.lstmf", false, true, 5e-4, false, "kor");
  double kor_full_err = TrainIterations(kTrainerIterations * 2);
  EXPECT_LT(kor_full_err, 88);
  //  EXPECT_GT(kor_full_err, 85);
  LOG(INFO) << "********** Expected  < 88 ************\n";
}

◆ TEST_F() [87/229]

tesseract::TEST_F	(	LSTMTrainerTest	,
		SoftmaxBaselineTest
	)

Definition at line 143 of file lstm_test.cc.

                                             {
  // A basic single-layer, single direction LSTM.
  SetupTrainerEng("[1,1,0,32 Lfx96 O1c1]", "1D-lstm", false, true);
  double lstm_uni_err = TrainIterations(kTrainerIterations * 2);
  EXPECT_LT(lstm_uni_err, 60);
  //  EXPECT_GT(lstm_uni_err, 48);
  LOG(INFO) << "********** Expected  < 60 ************\n";
  // Check that it works in int mode too.
  TestIntMode(kTrainerIterations);
  // If we run TestIntMode again, it tests that int_mode networks can
  // serialize and deserialize correctly.
  double delta = TestIntMode(kTrainerIterations);
  // The two tests (both of int mode this time) should be almost identical.
  LOG(INFO) << "Delta in Int mode error rates = " << delta << "\n";
  EXPECT_LT(delta, 0.01);
}

◆ TEST_F() [88/229]

tesseract::TEST_F	(	LSTMTrainerTest	,
		SoftmaxTest
	)

Definition at line 163 of file lstm_test.cc.

                                     {
  // LSTM with a built-in softmax can beat the external softmax.
  SetupTrainerEng("[1,1,0,32 LS96]", "Lstm-+-softmax", false, true);
  double lstm_sm_err = TrainIterations(kTrainerIterations * 2);
  EXPECT_LT(lstm_sm_err, 49.0);
  LOG(INFO) << "********** Expected  < 49 ************\n";
  // Check that it works in int mode too.
  TestIntMode(kTrainerIterations);
}

◆ TEST_F() [89/229]

tesseract::TEST_F	(	LSTMTrainerTest	,
		SpeedTest
	)

Definition at line 98 of file lstm_test.cc.

                                   {
  SetupTrainerEng(
      "[1,30,0,1 Ct5,5,16 Mp2,2 L2xy24 Ct1,1,48 Mp5,1 Ct1,1,32 S3,1 Lbx64 "
      "O1c1]",
      "2-D-2-layer-lstm", false, true);
  TrainIterations(kTrainerIterations);
  LOG(INFO) << "********** *** ************\n";
}

◆ TEST_F() [90/229]

tesseract::TEST_F	(	LSTMTrainerTest	,
		Test2D
	)

Definition at line 72 of file lstm_test.cc.

                                {
  // A 2-layer LSTM with a 2-D feature-extracting LSTM on the bottom.
  SetupTrainerEng("[1,32,0,1 S4,2 L2xy16 Ct1,1,16 S8,1 Lbx100 O1c1]", "2-D-2-layer-lstm", false,
                  false);
  double lstm_2d_err = TrainIterations(kTrainerIterations * 3 / 2);
  EXPECT_LT(lstm_2d_err, 98);
  //  EXPECT_GT(lstm_2d_err, 90);
  LOG(INFO) << "********** Expected  < 98 ************\n";
  // Int mode training is dead, so convert the trained network to int and check
  // that its error rate is close to the float version.
  TestIntMode(kTrainerIterations);
}

◆ TEST_F() [91/229]

tesseract::TEST_F	(	LSTMTrainerTest	,
		TestAdam
	)

Definition at line 87 of file lstm_test.cc.

                                  {
  // A 2-layer LSTM with a 2-D feature-extracting LSTM on the bottom.
  SetupTrainerEng("[1,32,0,1 S4,2 L2xy16 Ct1,1,16 S8,1 Lbx100 O1c1]", "2-D-2-layer-lstm", false,
                  true);
  double lstm_2d_err = TrainIterations(kTrainerIterations);
  EXPECT_LT(lstm_2d_err, 70);
  LOG(INFO) << "********** Expected   < 70 ************\n";
  TestIntMode(kTrainerIterations);
}

◆ TEST_F() [92/229]

tesseract::TEST_F	(	LSTMTrainerTest	,
		TestLayerAccess
	)

Definition at line 186 of file lstm_test.cc.

                                         {
  // A 2-layer LSTM with a Squashed feature-extracting LSTM on the bottom.
  SetupTrainerEng("[1,32,0,1 Ct5,5,16 Mp2,2 Lfys32 Lbx128 O1c1]", "SQU-lstm", false, false);
  // Number of layers.
  const size_t kNumLayers = 8;
  // Expected layer names.
  const char *kLayerIds[kNumLayers] = {":0", ":1:0", ":1:1", ":2", ":3:0", ":4:0", ":4:1:0", ":5"};
  const char *kLayerNames[kNumLayers] = {"Input",  "Convolve",  "ConvNL", "Maxpool",
                                         "Lfys32", "Lbx128LTR", "Lbx128", "Output"};
  // Expected number of weights.
  const int kNumWeights[kNumLayers] = {0,
                                       0,
                                       16 * (25 + 1),
                                       0,
                                       32 * (4 * (32 + 16 + 1)),
                                       128 * (4 * (128 + 32 + 1)),
                                       128 * (4 * (128 + 32 + 1)),
                                       112 * (2 * 128 + 1)};
 
  auto layers = trainer_->EnumerateLayers();
  EXPECT_EQ(kNumLayers, layers.size());
  for (unsigned i = 0; i < kNumLayers && i < layers.size(); ++i) {
    EXPECT_STREQ(kLayerIds[i], layers[i].c_str());
    EXPECT_STREQ(kLayerNames[i], trainer_->GetLayer(layers[i])->name().c_str());
    EXPECT_EQ(kNumWeights[i], trainer_->GetLayer(layers[i])->num_weights());
  }
}

◆ TEST_F() [93/229]

tesseract::TEST_F	(	LSTMTrainerTest	,
		TestSquashed
	)

Definition at line 18 of file lstm_squashed_test.cc.

                                      {
  // A 2-layer LSTM with a Squashed feature-extracting LSTM on the bottom, and
  // a small convolution/maxpool below that.
  // Match training conditions to those typically used with this spec:
  // recoding on, adam on.
  SetupTrainerEng("[1,32,0,1 Ct3,3,16 Mp3,3 Lfys48 Lbx96 O1c1]", "SQU-2-layer-lstm",
                  /*recode*/ true, /*adam*/ true);
  double lstm_2d_err = TrainIterations(kTrainerIterations * 3 / 2);
  EXPECT_LT(lstm_2d_err, 80);
  LOG(INFO) << "********** < 80 ************\n";
  TestIntMode(kTrainerIterations);
}

◆ TEST_F() [94/229]

tesseract::TEST_F	(	MatrixTest	,
		RotatingTranspose_0_2
	)

Definition at line 120 of file matrix_test.cc.

                                          {
  GENERIC_2D_ARRAY<int> m;
  src_.RotatingTranspose(dims_, kNumDims_, 0, 2, &m);
  m.ResizeNoInit(kInputSize_ / 2, 2);
  // Verify that the result is:
  // output tensor=[[[[0, 1][24, 25][48, 49][72, 73][96, 97]]
  //                 [[2, 3][26, 27][50, 51][74, 75][98, 99]]
  //                 [[4, 5][28, 29][52, 53][76, 77][100, 101]]]
  //                [[[6, 7]...
  EXPECT_EQ(0, m(0, 0));
  EXPECT_EQ(1, m(0, 1));
  EXPECT_EQ(24, m(1, 0));
  EXPECT_EQ(25, m(1, 1));
  EXPECT_EQ(96, m(4, 0));
  EXPECT_EQ(97, m(4, 1));
  EXPECT_EQ(2, m(5, 0));
  EXPECT_EQ(6, m(15, 0));
}

◆ TEST_F() [95/229]

tesseract::TEST_F	(	MatrixTest	,
		RotatingTranspose_1_3
	)

Definition at line 99 of file matrix_test.cc.

                                          {
  GENERIC_2D_ARRAY<int> m;
  src_.RotatingTranspose(dims_, kNumDims_, 1, 3, &m);
  m.ResizeNoInit(kInputSize_ / 4, 4);
  // Verify that the result is:
  // output tensor=[[[[0, 6, 12, 18][1, 7, 13, 19]]
  //                 [[2, 8, 14, 20][3, 9, 15, 21]]
  //                 [[4, 10, 16, 22][5, 11, 17, 23]]]
  //                [[[24, 30, 36, 42]...
  EXPECT_EQ(0, m(0, 0));
  EXPECT_EQ(6, m(0, 1));
  EXPECT_EQ(1, m(1, 0));
  EXPECT_EQ(2, m(2, 0));
  EXPECT_EQ(3, m(3, 0));
  EXPECT_EQ(4, m(4, 0));
  EXPECT_EQ(5, m(5, 0));
  EXPECT_EQ(24, m(6, 0));
  EXPECT_EQ(30, m(6, 1));
}

◆ TEST_F() [96/229]

tesseract::TEST_F	(	MatrixTest	,
		RotatingTranspose_2_0
	)

Definition at line 77 of file matrix_test.cc.

                                          {
  GENERIC_2D_ARRAY<int> m;
  src_.RotatingTranspose(dims_, kNumDims_, 2, 0, &m);
  m.ResizeNoInit(kInputSize_ / 2, 2);
  // Verify that the result is:
  // output tensor=[[[[0, 1][6, 7][12, 13][18, 19]]
  //                 [[24, 25][30, 31][36, 37][42, 43]]
  //                 [[48, 49][54, 55][60, 61][66, 67]]
  //                 [[72, 73][78, 79][84, 85][90, 91]]
  //                 [[96, 97][102, 103][108, 109][114, 115]]]
  //                [[[2,3]...
  EXPECT_EQ(0, m(0, 0));
  EXPECT_EQ(1, m(0, 1));
  EXPECT_EQ(6, m(1, 0));
  EXPECT_EQ(7, m(1, 1));
  EXPECT_EQ(24, m(4, 0));
  EXPECT_EQ(25, m(4, 1));
  EXPECT_EQ(30, m(5, 0));
  EXPECT_EQ(2, m(20, 0));
}

◆ TEST_F() [97/229]

tesseract::TEST_F	(	MatrixTest	,
		RotatingTranspose_3_1
	)

Definition at line 58 of file matrix_test.cc.

                                          {
  GENERIC_2D_ARRAY<int> m;
  src_.RotatingTranspose(dims_, kNumDims_, 3, 1, &m);
  m.ResizeNoInit(kInputSize_ / 3, 3);
  // Verify that the result is:
  // output tensor=[[[[0, 2, 4][6, 8, 10][12, 14, 16][18, 20, 22]]
  //                 [[1, 3, 5][7, 9, 11][13, 15, 17][19, 21, 23]]]
  //                [[[24, 26, 28]...
  EXPECT_EQ(0, m(0, 0));
  EXPECT_EQ(2, m(0, 1));
  EXPECT_EQ(4, m(0, 2));
  EXPECT_EQ(6, m(1, 0));
  EXPECT_EQ(1, m(4, 0));
  EXPECT_EQ(24, m(8, 0));
  EXPECT_EQ(26, m(8, 1));
  EXPECT_EQ(25, m(12, 0));
}

◆ TEST_F() [98/229]

tesseract::TEST_F	(	NetworkioTest	,
		CopyWithXReversal
	)

Definition at line 142 of file networkio_test.cc.

                                         {
#ifdef INCLUDE_TENSORFLOW
  NetworkIO nio;
  SetupNetworkIO(&nio);
  NetworkIO copy;
  copy.CopyWithXReversal(nio);
  StrideMap::Index index(copy.stride_map());
  int next_t = 0;
  int pos = 0;
  std::vector<int> expected_values = {3,  2,  1,  0,  7,  6,  5,  4,  11, 10, 9,
                                      8,  16, 15, 14, 13, 12, 21, 20, 19, 18, 17,
                                      26, 25, 24, 23, 22, 31, 30, 29, 28, 27};
  do {
    int t = index.t();
    // The indexed values match the expected values.
    int value = copy.i(t)[0];
    EXPECT_EQ(value, expected_values[pos]);
    value = copy.i(t)[1];
    EXPECT_EQ(value, -expected_values[pos]);
    // When we skip t values, the data is always 0.
    while (next_t < t) {
      EXPECT_EQ(copy.i(next_t)[0], 0) << "Failure t = " << next_t;
      EXPECT_EQ(copy.i(next_t)[1], 0) << "Failure t = " << next_t;
      ++next_t;
    }
    ++pos;
    ++next_t;
  } while (index.Increment());
  EXPECT_EQ(pos, 32);
  EXPECT_EQ(next_t, 40);
#else
  LOG(INFO) << "Skip test because of missing xla::Array2D";
  GTEST_SKIP();
#endif
}

◆ TEST_F() [99/229]

tesseract::TEST_F	(	NetworkioTest	,
		CopyWithXYTranspose
	)

Definition at line 179 of file networkio_test.cc.

                                           {
#ifdef INCLUDE_TENSORFLOW
  NetworkIO nio;
  SetupNetworkIO(&nio);
  NetworkIO copy;
  copy.CopyWithXYTranspose(nio);
  StrideMap::Index index(copy.stride_map());
  int next_t = 0;
  int pos = 0;
  std::vector<int> expected_values = {0,  4,  8,  1,  5,  9,  2,  6,  10, 3,  7,
                                      11, 12, 17, 22, 27, 13, 18, 23, 28, 14, 19,
                                      24, 29, 15, 20, 25, 30, 16, 21, 26, 31};
  do {
    int t = index.t();
    // The indexed values match the expected values.
    int value = copy.i(t)[0];
    EXPECT_EQ(value, expected_values[pos]);
    value = copy.i(t)[1];
    EXPECT_EQ(value, -expected_values[pos]);
    // When we skip t values, the data is always 0.
    while (next_t < t) {
      EXPECT_EQ(copy.i(next_t)[0], 0);
      EXPECT_EQ(copy.i(next_t)[1], 0);
      ++next_t;
    }
    ++pos;
    ++next_t;
  } while (index.Increment());
  EXPECT_EQ(pos, 32);
  EXPECT_EQ(next_t, 40);
#else
  LOG(INFO) << "Skip test because of missing xla::Array2D";
  GTEST_SKIP();
#endif
}

◆ TEST_F() [100/229]

tesseract::TEST_F	(	NetworkioTest	,
		CopyWithYReversal
	)

Definition at line 105 of file networkio_test.cc.

                                         {
#ifdef INCLUDE_TENSORFLOW
  NetworkIO nio;
  SetupNetworkIO(&nio);
  NetworkIO copy;
  copy.CopyWithYReversal(nio);
  StrideMap::Index index(copy.stride_map());
  int next_t = 0;
  int pos = 0;
  std::vector<int> expected_values = {8,  9,  10, 11, 4,  5,  6,  7,  0,  1,  2,
                                      3,  27, 28, 29, 30, 31, 22, 23, 24, 25, 26,
                                      17, 18, 19, 20, 21, 12, 13, 14, 15, 16};
  do {
    int t = index.t();
    // The indexed values match the expected values.
    int value = copy.i(t)[0];
    EXPECT_EQ(value, expected_values[pos]);
    value = copy.i(t)[1];
    EXPECT_EQ(value, -expected_values[pos]);
    // When we skip t values, the data is always 0.
    while (next_t < t) {
      EXPECT_EQ(copy.i(next_t)[0], 0) << "Failure t = " << next_t;
      EXPECT_EQ(copy.i(next_t)[1], 0) << "Failure t = " << next_t;
      ++next_t;
    }
    ++pos;
    ++next_t;
  } while (index.Increment());
  EXPECT_EQ(pos, 32);
  EXPECT_EQ(next_t, 40);
#else
  LOG(INFO) << "Skip test because of missing xla::Array2D";
  GTEST_SKIP();
#endif
}

◆ TEST_F() [101/229]

tesseract::TEST_F	(	NetworkioTest	,
		InitWithZeroFill
	)

Definition at line 65 of file networkio_test.cc.

                                        {
#ifdef INCLUDE_TENSORFLOW
  NetworkIO nio;
  nio.Resize2d(true, 32, 2);
  int width = nio.Width();
  for (int t = 0; t < width; ++t) {
    nio.SetPixel(t, 0, 0, 0.0f, 128.0f);
    nio.SetPixel(t, 1, 0, 0.0f, 128.0f);
  }
  // The initialization will wipe out all previously set values.
  SetupNetworkIO(&nio);
  nio.ZeroInvalidElements();
  StrideMap::Index index(nio.stride_map());
  int next_t = 0;
  int pos = 0;
  do {
    int t = index.t();
    // The indexed values just increase monotonically.
    int value = nio.i(t)[0];
    EXPECT_EQ(value, pos);
    value = nio.i(t)[1];
    EXPECT_EQ(value, -pos);
    // When we skip t values, the data is always 0.
    while (next_t < t) {
      EXPECT_EQ(nio.i(next_t)[0], 0);
      EXPECT_EQ(nio.i(next_t)[1], 0);
      ++next_t;
    }
    ++pos;
    ++next_t;
  } while (index.Increment());
  EXPECT_EQ(pos, 32);
  EXPECT_EQ(next_t, 40);
#else
  LOG(INFO) << "Skip test because of missing xla::Array2D";
  GTEST_SKIP();
#endif
}

◆ TEST_F() [102/229]

tesseract::TEST_F	(	NthItemTest	,
		BoringTest
	)

Definition at line 61 of file nthitem_test.cc.

                                {
  KDVector v;
  // Push the test data onto the KDVector.
  int test_data[] = {8, 8, 8, 8, 8, 7, 7, 7, 7};
  for (size_t i = 0; i < countof(test_data); ++i) {
    IntKDPair pair(test_data[i], i);
    v.push_back(pair);
  }
  // The 3rd item is 7 but the 4th is 8..
  size_t index = 3;
  std::nth_element(v.begin(), v.begin() + index, v.end());
  // The result is 7.
  EXPECT_EQ(7, v[index].key());
  index = 4;
  std::nth_element(v.begin(), v.begin() + index, v.end());
  // The result is 8.
  EXPECT_EQ(8, v[index].key());
  // Get the min item.
  index = 0;
  std::nth_element(v.begin(), v.begin() + index, v.end());
  // The result is 7.
  EXPECT_EQ(7, v[index].key());
  // Get the max item.
  index = v.size() - 1;
  std::nth_element(v.begin(), v.begin() + index, v.end());
  // The result is 8.
  EXPECT_EQ(8, v[index].key());
}

◆ TEST_F() [103/229]

tesseract::TEST_F	(	NthItemTest	,
		EqualTest
	)

Definition at line 104 of file nthitem_test.cc.

                               {
  KDVector v;
  // Push the test data onto the KDVector.
  PushTestData(&v);
  // Add an extra 8. This makes the median 7.
  IntKDPair pair(8, 13);
  v.push_back(pair);
  // Get the median item.
  size_t index = v.size() / 2;
  std::nth_element(v.begin(), v.begin() + index, v.end());
  // The result is 7, it started out at index 4 or 12.
  EXPECT_EQ(7, v[index].key());
  EXPECT_TRUE(v[index].data() == 4 || v[index].data() == 12);
}

◆ TEST_F() [104/229]

tesseract::TEST_F	(	NthItemTest	,
		GeneralTest
	)

Definition at line 44 of file nthitem_test.cc.

                                 {
  KDVector v;
  // Push the test data onto the KDVector.
  PushTestData(&v);
  // Get the min item.
  size_t index = 0;
  std::nth_element(v.begin(), v.begin() + index, v.end());
  // The result is -32767.
  EXPECT_EQ(-32767, v[index].key());
  // Get the max item.
  index = v.size() - 1;
  std::nth_element(v.begin(), v.begin() + index, v.end());
  // The result is 65536.
  EXPECT_EQ(65536, v[index].key());
}

◆ TEST_F() [105/229]

tesseract::TEST_F	(	NthItemTest	,
		UniqueTest
	)

Definition at line 91 of file nthitem_test.cc.

                                {
  KDVector v;
  // Push the test data onto the KDVector.
  PushTestData(&v);
  // Get the median item.
  size_t index = v.size() / 2;
  std::nth_element(v.begin(), v.begin() + index, v.end());
  // The result is 6, it started out at index 11.
  EXPECT_EQ(6, v[index].key());
  EXPECT_EQ(11, v[index].data());
}

◆ TEST_F() [106/229]

tesseract::TEST_F	(	PageSegModeTest	,
		WordTest
	)

Definition at line 87 of file pagesegmode_test.cc.

                                  {
  std::string filename = file::JoinPath(TESTING_DIR, "segmodeimg.tif");
  if (!file_exists(filename.c_str())) {
    LOG(INFO) << "Skip test because of missing " << filename << '\n';
    GTEST_SKIP();
  } else {
    SetImage(filename.c_str());
    // Test various rectangles around the inverse page number.
    VerifyRectText(tesseract::PSM_SINGLE_WORD, "183", 1419, 264, 69, 34);
    VerifyRectText(tesseract::PSM_SINGLE_WORD, "183", 1411, 252, 78, 62);
    VerifyRectText(tesseract::PSM_SINGLE_WORD, "183", 1396, 218, 114, 102);
    // Test a random pair of words as a line
    VerifyRectText(tesseract::PSM_SINGLE_LINE, "What should", 237, 393, 256, 36);
  #ifdef DISABLED_LEGACY_ENGINE
    // Skip check as LSTM mode adds a space.
    LOG(INFO) << "Skip `Whatshould` test in LSTM Mode\n";
  #else
    // Test a random pair of words as a word
    VerifyRectText(tesseract::PSM_SINGLE_WORD, "Whatshould", 237, 393, 256, 36);
  #endif
    // Test single block mode.
    VerifyRectText(tesseract::PSM_SINGLE_BLOCK, "both the\nfrom the", 237, 450, 172, 94);
    // But doesn't work in line or word mode.
    NotRectText(tesseract::PSM_SINGLE_LINE, "both the\nfrom the", 237, 450, 172, 94);
    NotRectText(tesseract::PSM_SINGLE_WORD, "both the\nfrom the", 237, 450, 172, 94);
  }
}

◆ TEST_F() [107/229]

tesseract::TEST_F	(	PangoFontInfoTest	,
		CanDropUncoveredChars
	)

Definition at line 154 of file pango_font_info_test.cc.

                                                 {
  font_info_.ParseFontDescriptionName("Verdana 12");
  // Verdana cannot render the "ff" ligature
  std::string word = "oﬀice";
  EXPECT_EQ(1, font_info_.DropUncoveredChars(&word));
  EXPECT_EQ("oice", word);
 
  // Don't drop non-letter characters like word joiners.
  const char *kJoiners[] = {
      "\u2060", // U+2060 (WJ)
      "\u200C", // U+200C (ZWJ)
      "\u200D"  // U+200D (ZWNJ)
  };
  for (auto &kJoiner : kJoiners) {
    word = kJoiner;
    EXPECT_EQ(0, font_info_.DropUncoveredChars(&word));
    EXPECT_STREQ(kJoiner, word.c_str());
  }
}

◆ TEST_F() [108/229]

tesseract::TEST_F	(	PangoFontInfoTest	,
		CannotRenderInvalidString
	)

Definition at line 145 of file pango_font_info_test.cc.

                                                     {
  font_info_.ParseFontDescriptionName("Lohit Hindi 12");
  for (int i = 0; kBadlyFormedHinWords[i] != nullptr; ++i) {
    EXPECT_FALSE(
        font_info_.CanRenderString(kBadlyFormedHinWords[i], strlen(kBadlyFormedHinWords[i])))
        << "Can render " << kBadlyFormedHinWords[i];
  }
}

◆ TEST_F() [109/229]

tesseract::TEST_F	(	PangoFontInfoTest	,
		CannotRenderUncoveredString
	)

Definition at line 140 of file pango_font_info_test.cc.

                                                       {
  font_info_.ParseFontDescriptionName("Verdana 12");
  EXPECT_FALSE(font_info_.CanRenderString(kKorText, strlen(kKorText)));
}

◆ TEST_F() [110/229]

tesseract::TEST_F	(	PangoFontInfoTest	,
		CanRenderLigature
	)

Definition at line 131 of file pango_font_info_test.cc.

                                             {
  font_info_.ParseFontDescriptionName("Arab 12");
  const char kArabicLigature[] = "لا";
  EXPECT_TRUE(font_info_.CanRenderString(kArabicLigature, strlen(kArabicLigature)));
 
  printf("Next word\n");
  EXPECT_TRUE(font_info_.CanRenderString(kArabicText, strlen(kArabicText)));
}

◆ TEST_F() [111/229]

tesseract::TEST_F	(	PangoFontInfoTest	,
		CanRenderString
	)

Definition at line 120 of file pango_font_info_test.cc.

                                           {
  font_info_.ParseFontDescriptionName("Verdana 12");
  EXPECT_TRUE(font_info_.CanRenderString(kEngText, strlen(kEngText)));
 
  font_info_.ParseFontDescriptionName("UnBatang 12");
  EXPECT_TRUE(font_info_.CanRenderString(kKorText, strlen(kKorText)));
 
  font_info_.ParseFontDescriptionName("Lohit Hindi 12");
  EXPECT_TRUE(font_info_.CanRenderString(kHinText, strlen(kHinText)));
}

◆ TEST_F() [112/229]

tesseract::TEST_F	(	PangoFontInfoTest	,
		DoesGetSpacingProperties
	)

Definition at line 111 of file pango_font_info_test.cc.

                                                    {
  EXPECT_TRUE(font_info_.ParseFontDescriptionName("Arial Italic 12"));
  int x_bearing, x_advance;
  EXPECT_TRUE(font_info_.GetSpacingProperties("A", &x_bearing, &x_advance));
  EXPECT_GT(x_advance, 0);
  EXPECT_TRUE(font_info_.GetSpacingProperties("a", &x_bearing, &x_advance));
  EXPECT_GT(x_advance, 0);
}

◆ TEST_F() [113/229]

tesseract::TEST_F	(	PangoFontInfoTest	,
		DoesParseFontDescriptionName
	)

Definition at line 84 of file pango_font_info_test.cc.

                                                        {
  EXPECT_TRUE(font_info_.ParseFontDescriptionName("Arial Bold Italic 12"));
  EXPECT_EQ(12, font_info_.font_size());
  EXPECT_EQ("Arial", font_info_.family_name());
 
  EXPECT_TRUE(font_info_.ParseFontDescriptionName("Verdana 10"));
  EXPECT_EQ(10, font_info_.font_size());
  EXPECT_EQ("Verdana", font_info_.family_name());
 
  EXPECT_TRUE(font_info_.ParseFontDescriptionName("DejaVu Sans Ultra-Light"));
  EXPECT_EQ("DejaVu Sans", font_info_.family_name());
}

◆ TEST_F() [114/229]

tesseract::TEST_F	(	PangoFontInfoTest	,
		DoesParseMissingFonts
	)

Definition at line 97 of file pango_font_info_test.cc.

                                                 {
  // Font family one of whose faces exists but this one doesn't.
  EXPECT_TRUE(font_info_.ParseFontDescriptionName("Arial Italic 12"));
  EXPECT_EQ(12, font_info_.font_size());
  EXPECT_EQ("Arial", font_info_.family_name());
 
  // Font family that doesn't exist in testdata. It will still parse the
  // description name. But without the file, it will not be able to populate
  // some font family details, like is_monospace().
  EXPECT_TRUE(font_info_.ParseFontDescriptionName("Georgia 10"));
  EXPECT_EQ(10, font_info_.font_size());
  EXPECT_EQ("Georgia", font_info_.family_name());
}

◆ TEST_F() [115/229]

tesseract::TEST_F	(	PangoFontInfoTest	,
		TestNonDefaultConstructor
	)

Definition at line 78 of file pango_font_info_test.cc.

                                                     {
  PangoFontInfo font("Arial Bold Italic 12");
  EXPECT_EQ(12, font.font_size());
  EXPECT_EQ("Arial", font.family_name());
}

◆ TEST_F() [116/229]

tesseract::TEST_F	(	ParamsModelTest	,
		TestEngParamsModelIO
	)

Definition at line 66 of file params_model_test.cc.

                                              {
#ifdef DISABLED_LEGACY_ENGINE
  // Skip test because ParamsModel::LoadFromFp is missing.
  GTEST_SKIP();
#else
  TestParamsModelRoundTrip("eng.params_model");
#endif
}

◆ TEST_F() [117/229]

tesseract::TEST_F	(	RecodeBeamTest	,
		DISABLED_ChiDictionary
	)

Definition at line 439 of file recodebeam_test.cc.

                                               {
  LOG(INFO) << "Testing zh_hans dictionary"
            << "\n";
  LoadUnicharset("zh_hans.unicharset");
  GENERIC_2D_ARRAY<float> outputs =
      GenerateSyntheticOutputs(kZHTops, kZHTopScores, kZH2nds, kZH2ndScores, nullptr);
  PointerVector<WERD_RES> words;
  ExpectCorrect(outputs, "实学储啬投学生", nullptr, &words);
  // Each is an individual word, with permuter = top choice.
  EXPECT_EQ(7, words.size());
  for (int w = 0; w < words.size(); ++w) {
    EXPECT_EQ(TOP_CHOICE_PERM, words[w]->best_choice->permuter());
  }
  // Now try again with the dictionary.
  LoadDict("zh_hans");
  ExpectCorrect(outputs, "实学储啬投学生", &lstm_dict_, &words);
  // Number of words expected.
  const int kNumWords = 5;
  // Content of the words.
  const char *kWords[kNumWords] = {"实学", "储", "啬", "投", "学生"};
  // Permuters of the words.
  const int kWordPerms[kNumWords] = {SYSTEM_DAWG_PERM, TOP_CHOICE_PERM, TOP_CHOICE_PERM,
                                     TOP_CHOICE_PERM, SYSTEM_DAWG_PERM};
  EXPECT_EQ(kNumWords, words.size());
  for (int w = 0; w < kNumWords && w < words.size(); ++w) {
    EXPECT_STREQ(kWords[w], words[w]->best_choice->unichar_string().c_str());
    EXPECT_EQ(kWordPerms[w], words[w]->best_choice->permuter());
  }
}

◆ TEST_F() [118/229]

tesseract::TEST_F	(	RecodeBeamTest	,
		DISABLED_EngDictionary
	)

Definition at line 422 of file recodebeam_test.cc.

                                               {
  LOG(INFO) << "Testing eng dictionary"
            << "\n";
  LoadUnicharset("eng_beam.unicharset");
  GENERIC_2D_ARRAY<float> outputs =
      GenerateSyntheticOutputs(kGWRTops, kGWRTopScores, kGWR2nds, kGWR2ndScores, nullptr);
  std::string default_str;
  for (int i = 0; kGWRTops[i] != nullptr; ++i) {
    default_str += kGWRTops[i];
  }
  PointerVector<WERD_RES> words;
  ExpectCorrect(outputs, default_str, nullptr, &words);
  // Now try again with the dictionary.
  LoadDict("eng_beam");
  ExpectCorrect(outputs, "Gets words right.", &lstm_dict_, &words);
}

◆ TEST_F() [119/229]

tesseract::TEST_F	(	RecodeBeamTest	,
		DISABLED_MultiCodeSequences
	)

Definition at line 471 of file recodebeam_test.cc.

                                                    {
  LOG(INFO) << "Testing duplicates in multi-code sequences"
            << "\n";
  LoadUnicharset("vie.d.unicharset");
  tesseract::SetupBasicProperties(false, true, &ccutil_.unicharset);
  TRand random;
  GENERIC_2D_ARRAY<float> outputs =
      GenerateSyntheticOutputs(kViTops, kViTopScores, kVi2nds, kVi2ndScores, &random);
  PointerVector<WERD_RES> words;
  std::string truth_str;
  tesseract::NormalizeUTF8String(tesseract::UnicodeNormMode::kNFKC, tesseract::OCRNorm::kNormalize,
                                 tesseract::GraphemeNorm::kNone, "vậy tội", &truth_str);
  ExpectCorrect(outputs, truth_str, nullptr, &words);
}

◆ TEST_F() [120/229]

tesseract::TEST_F	(	RecodeBeamTest	,
		DoesChinese
	)

Definition at line 334 of file recodebeam_test.cc.

                                    {
  LOG(INFO) << "Testing chi_tra"
            << "\n";
  LoadUnicharset("chi_tra.unicharset");
  // Correctly reproduce the first kNumchars characters from easy output.
  std::vector<int> transcription;
  for (int i = SPECIAL_UNICHAR_CODES_COUNT; i < kNumChars; ++i) {
    transcription.push_back(i);
  }
  GENERIC_2D_ARRAY<float> outputs = GenerateRandomPaddedOutputs(transcription, kPadding);
  ExpectCorrect(outputs, transcription);
  LOG(INFO) << "Testing chi_sim"
            << "\n";
  LoadUnicharset("chi_sim.unicharset");
  // Correctly reproduce the first kNumchars characters from easy output.
  transcription.clear();
  for (int i = SPECIAL_UNICHAR_CODES_COUNT; i < kNumChars; ++i) {
    transcription.push_back(i);
  }
  outputs = GenerateRandomPaddedOutputs(transcription, kPadding);
  ExpectCorrect(outputs, transcription);
}

◆ TEST_F() [121/229]

tesseract::TEST_F	(	RecodeBeamTest	,
		DoesEnglish
	)

Definition at line 409 of file recodebeam_test.cc.

                                    {
  LOG(INFO) << "Testing eng"
            << "\n";
  LoadUnicharset("eng.unicharset");
  // Correctly reproduce the first kNumchars characters from easy output.
  std::vector<int> transcription;
  for (int i = SPECIAL_UNICHAR_CODES_COUNT; i < kNumChars; ++i) {
    transcription.push_back(i);
  }
  GENERIC_2D_ARRAY<float> outputs = GenerateRandomPaddedOutputs(transcription, kPadding);
  ExpectCorrect(outputs, transcription);
}

◆ TEST_F() [122/229]

tesseract::TEST_F	(	RecodeBeamTest	,
		DoesJapanese
	)

Definition at line 357 of file recodebeam_test.cc.

                                     {
  LOG(INFO) << "Testing jpn"
            << "\n";
  LoadUnicharset("jpn.unicharset");
  // Correctly reproduce the first kNumchars characters from easy output.
  std::vector<int> transcription;
  for (int i = SPECIAL_UNICHAR_CODES_COUNT; i < kNumChars; ++i) {
    transcription.push_back(i);
  }
  GENERIC_2D_ARRAY<float> outputs = GenerateRandomPaddedOutputs(transcription, kPadding);
  ExpectCorrect(outputs, transcription);
}

◆ TEST_F() [123/229]

tesseract::TEST_F	(	RecodeBeamTest	,
		DoesKannada
	)

Definition at line 383 of file recodebeam_test.cc.

                                    {
  LOG(INFO) << "Testing kan"
            << "\n";
  LoadUnicharset("kan.unicharset");
  // Correctly reproduce the first kNumchars characters from easy output.
  std::vector<int> transcription;
  for (int i = SPECIAL_UNICHAR_CODES_COUNT; i < kNumChars; ++i) {
    transcription.push_back(i);
  }
  GENERIC_2D_ARRAY<float> outputs = GenerateRandomPaddedOutputs(transcription, kPadding);
  ExpectCorrect(outputs, transcription);
}

◆ TEST_F() [124/229]

tesseract::TEST_F	(	RecodeBeamTest	,
		DoesKorean
	)

Definition at line 370 of file recodebeam_test.cc.

                                   {
  LOG(INFO) << "Testing kor"
            << "\n";
  LoadUnicharset("kor.unicharset");
  // Correctly reproduce the first kNumchars characters from easy output.
  std::vector<int> transcription;
  for (int i = SPECIAL_UNICHAR_CODES_COUNT; i < kNumChars; ++i) {
    transcription.push_back(i);
  }
  GENERIC_2D_ARRAY<float> outputs = GenerateRandomPaddedOutputs(transcription, kPadding);
  ExpectCorrect(outputs, transcription);
}

◆ TEST_F() [125/229]

tesseract::TEST_F	(	RecodeBeamTest	,
		DoesMarathi
	)

Definition at line 396 of file recodebeam_test.cc.

                                    {
  LOG(INFO) << "Testing mar"
            << "\n";
  LoadUnicharset("mar.unicharset");
  // Correctly reproduce the first kNumchars characters from easy output.
  std::vector<int> transcription;
  for (int i = SPECIAL_UNICHAR_CODES_COUNT; i < kNumChars; ++i) {
    transcription.push_back(i);
  }
  GENERIC_2D_ARRAY<float> outputs = GenerateRandomPaddedOutputs(transcription, kPadding);
  ExpectCorrect(outputs, transcription);
}

◆ TEST_F() [126/229]

tesseract::TEST_F	(	ResultIteratorTest	,
		ComplexTest
	)

Definition at line 348 of file resultiterator_test.cc.

                                        {
  SetImage("8087_054.3B.tif");
  // Just run layout analysis.
  PageIterator *it = api_.AnalyseLayout();
  EXPECT_FALSE(it == nullptr);
  // The images should rebuild almost perfectly.
  VerifyRebuilds(2073, 2073, 2080, 2081, 2090, it);
  delete it;
}

◆ TEST_F() [127/229]

tesseract::TEST_F	(	ResultIteratorTest	,
		DISABLED_NonNullChoicesTest
	)

Definition at line 537 of file resultiterator_test.cc.

                                                        {
  SetImage("5318c4b679264.jpg");
  char *result = api_.GetUTF8Text();
  delete[] result;
  ResultIterator *r_it = api_.GetIterator();
  // Iterate over the words.
  do {
    char *word_str = r_it->GetUTF8Text(tesseract::RIL_WORD);
    if (word_str != nullptr) {
      LOG(INFO) << "Word " << word_str << ":\n";
      ResultIterator s_it = *r_it;
      do {
        tesseract::ChoiceIterator c_it(s_it);
        do {
          const char *char_str = c_it.GetUTF8Text();
          if (char_str == nullptr) {
            LOG(INFO) << "Null char choice"
                      << "\n";
          } else {
            LOG(INFO) << "Char choice " << char_str << "\n";
          }
          CHECK(char_str != nullptr);
        } while (c_it.Next());
      } while (!s_it.IsAtFinalElement(tesseract::RIL_WORD, tesseract::RIL_SYMBOL) &&
               s_it.Next(tesseract::RIL_SYMBOL));
      delete[] word_str;
    }
  } while (r_it->Next(tesseract::RIL_WORD));
  delete r_it;
}

◆ TEST_F() [128/229]

tesseract::TEST_F	(	ResultIteratorTest	,
		DualStartTextlineOrderTest
	)

Definition at line 478 of file resultiterator_test.cc.

                                                       {
  const StrongScriptDirection word_dirs[] = {dL, dL, dN, dL, dN, dR, dR, dR};
  int reading_order_rtl_context[] = {7, 6, 5, 4, ResultIterator::kMinorRunStart,
                                     0, 1, 2, 3, ResultIterator::kMinorRunEnd};
  int reading_order_ltr_context[] = {
      0, 1, 2, 3, 4, ResultIterator::kMinorRunStart, 7, 6, 5, ResultIterator::kMinorRunEnd};
 
  ExpectTextlineReadingOrder(true, word_dirs, countof(word_dirs), reading_order_ltr_context,
                             countof(reading_order_ltr_context));
  ExpectTextlineReadingOrder(false, word_dirs, countof(word_dirs), reading_order_rtl_context,
                             countof(reading_order_rtl_context));
}

◆ TEST_F() [129/229]

tesseract::TEST_F	(	ResultIteratorTest	,
		EasyTest
	)

Definition at line 258 of file resultiterator_test.cc.

                                     {
  SetImage("phototest.tif");
  // Just run layout analysis.
  PageIterator *p_it = api_.AnalyseLayout();
  EXPECT_FALSE(p_it == nullptr);
  // Check iterator position.
  EXPECT_TRUE(p_it->IsAtBeginningOf(tesseract::RIL_BLOCK));
  // This should be a single block.
  EXPECT_FALSE(p_it->Next(tesseract::RIL_BLOCK));
  EXPECT_FALSE(p_it->IsAtBeginningOf(tesseract::RIL_BLOCK));
 
  // The images should rebuild almost perfectly.
  LOG(INFO) << "Verifying image rebuilds 1 (pageiterator)"
            << "\n";
  VerifyRebuilds(10, 10, 0, 0, 0, p_it);
  delete p_it;
 
  char *result = api_.GetUTF8Text();
  ocr_text_ = result;
  delete[] result;
  ResultIterator *r_it = api_.GetIterator();
  // The images should rebuild almost perfectly.
  LOG(INFO) << "Verifying image rebuilds 2a (resultiterator)"
            << "\n";
  VerifyRebuilds(8, 8, 0, 0, 40, r_it, tesseract::RIL_WORD);
  // Test the text.
  LOG(INFO) << "Verifying text rebuilds 1 (resultiterator)"
            << "\n";
  VerifyAllText(ocr_text_, r_it);
 
  // The images should rebuild almost perfectly.
  LOG(INFO) << "Verifying image rebuilds 2b (resultiterator)"
            << "\n";
  VerifyRebuilds(8, 8, 0, 0, 40, r_it, tesseract::RIL_WORD);
 
  r_it->Begin();
  // Test baseline of the first line.
  int x1, y1, x2, y2;
  r_it->Baseline(tesseract::RIL_TEXTLINE, &x1, &y1, &x2, &y2);
  LOG(INFO) << "Baseline ("
     << x1 << ',' << y1 << ")->(" << x2 << ',' << y2 << ")\n";
  // Make sure we have a decent vector.
  EXPECT_GE(x2, x1 + 400);
  // The point 200,116 should be very close to the baseline.
  // (x3,y3) is the vector from (x1,y1) to (200,116)
  int x3 = 200 - x1;
  int y3 = 116 - y1;
  x2 -= x1;
  y2 -= y1;
  // The cross product (x2,y1)x(x3,y3) should be small.
  int product = x2 * y3 - x3 * y2;
  EXPECT_LE(abs(product), x2);
 
  // Test font attributes for each word.
  do {
    float confidence = r_it->Confidence(tesseract::RIL_WORD);
#ifndef DISABLED_LEGACY_ENGINE
    int pointsize, font_id;
    bool bold, italic, underlined, monospace, serif, smallcaps;
    const char *font = r_it->WordFontAttributes(&bold, &italic, &underlined, &monospace, &serif,
                                                &smallcaps, &pointsize, &font_id);
    EXPECT_GE(confidence, 80.0f);
#endif
    char *word_str = r_it->GetUTF8Text(tesseract::RIL_WORD);
 
#ifdef DISABLED_LEGACY_ENGINE
    LOG(INFO) << "Word " << word_str << ", conf " << confidence << "\n";
#else
    LOG(INFO) << "Word " << word_str << " in font " << font
      << ", id " << font_id << ", size " << pointsize
      << ", conf " << confidence << "\n";
#endif // def DISABLED_LEGACY_ENGINE
    delete[] word_str;
#ifndef DISABLED_LEGACY_ENGINE
    EXPECT_FALSE(bold);
    EXPECT_FALSE(italic);
    EXPECT_FALSE(underlined);
    EXPECT_FALSE(monospace);
    EXPECT_FALSE(serif);
    // The text is about 31 pixels high.  Above we say the source is 200 ppi,
    // which translates to:
    // 31 pixels / textline * (72 pts / inch) / (200 pixels / inch) = 11.16 pts
    EXPECT_GE(pointsize, 11.16 - 1.50);
    EXPECT_LE(pointsize, 11.16 + 1.50);
#endif // def DISABLED_LEGACY_ENGINE
  } while (r_it->Next(tesseract::RIL_WORD));
  delete r_it;
}

◆ TEST_F() [130/229]

tesseract::TEST_F	(	ResultIteratorTest	,
		GreyTest
	)

Definition at line 359 of file resultiterator_test.cc.

                                     {
  SetImage("8087_054.3G.tif");
  // Just run layout analysis.
  PageIterator *it = api_.AnalyseLayout();
  EXPECT_FALSE(it == nullptr);
  // The images should rebuild almost perfectly.
  VerifyRebuilds(600, 600, 600, 600, 600, it);
  delete it;
}

◆ TEST_F() [131/229]

tesseract::TEST_F	(	ResultIteratorTest	,
		LeftwardTextlineOrderTest
	)

Definition at line 493 of file resultiterator_test.cc.

                                                      {
  const StrongScriptDirection word_dirs[] = {dL, dL, dN, dL, dN, dN, dL, dL};
  // The order here is just left to right, nothing fancy.
  int reading_order_ltr_context[] = {0, 1, 2, 3, 4, 5, 6, 7};
  // In the strange event that this shows up in an RTL paragraph, nonetheless
  // just presume the whole thing is an LTR line.
  int reading_order_rtl_context[] = {ResultIterator::kMinorRunStart, 0, 1, 2, 3, 4, 5, 6, 7,
                                     ResultIterator::kMinorRunEnd};
 
  ExpectTextlineReadingOrder(true, word_dirs, countof(word_dirs), reading_order_ltr_context,
                             countof(reading_order_ltr_context));
  ExpectTextlineReadingOrder(false, word_dirs, countof(word_dirs), reading_order_rtl_context,
                             countof(reading_order_rtl_context));
}

◆ TEST_F() [132/229]

tesseract::TEST_F	(	ResultIteratorTest	,
		NonNullConfidencesTest
	)

Definition at line 569 of file resultiterator_test.cc.

                                                   {
  //  SetImage("line6.tiff");
  SetImage("trainingitalline.tif");
  api_.SetPageSegMode(tesseract::PSM_SINGLE_BLOCK);
  // Force recognition so we can used the result iterator.
  // We don't care about the return from GetUTF8Text.
  char *result = api_.GetUTF8Text();
  delete[] result;
  ResultIterator *r_it = api_.GetIterator();
  // Iterate over the words.
  do {
    char *word_str = r_it->GetUTF8Text(tesseract::RIL_WORD);
    if (word_str != nullptr) {
      EXPECT_FALSE(r_it->Empty(tesseract::RIL_WORD));
      EXPECT_FALSE(r_it->Empty(tesseract::RIL_SYMBOL));
      ResultIterator s_it = *r_it;
      do {
        const char *char_str = s_it.GetUTF8Text(tesseract::RIL_SYMBOL);
        CHECK(char_str != nullptr);
        float confidence = s_it.Confidence(tesseract::RIL_SYMBOL);
        LOG(INFO) << "Char " << char_str << " has confidence " << confidence << "\n";
        delete[] char_str;
      } while (!s_it.IsAtFinalElement(tesseract::RIL_WORD, tesseract::RIL_SYMBOL) &&
               s_it.Next(tesseract::RIL_SYMBOL));
      delete[] word_str;
    } else {
      LOG(INFO) << "Empty word found"
                << "\n";
    }
  } while (r_it->Next(tesseract::RIL_WORD));
  delete r_it;
}

◆ TEST_F() [133/229]

tesseract::TEST_F	(	ResultIteratorTest	,
		RightwardTextlineOrderTest
	)

Definition at line 510 of file resultiterator_test.cc.

                                                       {
  const StrongScriptDirection word_dirs[] = {dR, dR, dN, dR, dN, dN, dR, dR};
  // The order here is just right-to-left, nothing fancy.
  int reading_order_rtl_context[] = {7, 6, 5, 4, 3, 2, 1, 0};
  ExpectTextlineReadingOrder(false, word_dirs, countof(word_dirs), reading_order_rtl_context,
                             countof(reading_order_rtl_context));
}

◆ TEST_F() [134/229]

tesseract::TEST_F	(	ResultIteratorTest	,
		SmallCapDropCapTest
	)

Definition at line 370 of file resultiterator_test.cc.

                                                {
#ifdef DISABLED_LEGACY_ENGINE
  // Skip test as LSTM mode does not recognize smallcaps & dropcaps attributes.
  GTEST_SKIP();
#else
  SetImage("8071_093.3B.tif");
  char *result = api_.GetUTF8Text();
  delete[] result;
  ResultIterator *r_it = api_.GetIterator();
  // Iterate over the words.
  int found_dropcaps = 0;
  int found_smallcaps = 0;
  int false_positives = 0;
  do {
    bool bold, italic, underlined, monospace, serif, smallcaps;
    int pointsize, font_id;
    r_it->WordFontAttributes(&bold, &italic, &underlined, &monospace, &serif, &smallcaps,
                             &pointsize, &font_id);
    char *word_str = r_it->GetUTF8Text(tesseract::RIL_WORD);
    if (word_str != nullptr) {
      LOG(INFO) << "Word " << word_str
        << " is " << (smallcaps ? "SMALLCAPS" : "Normal") << "\n";
      if (r_it->SymbolIsDropcap()) {
        ++found_dropcaps;
      }
      if (strcmp(word_str, "SHE") == 0 || strcmp(word_str, "MOPED") == 0 ||
          strcmp(word_str, "RALPH") == 0 || strcmp(word_str, "KINNEY") == 0 || // Not working yet.
          strcmp(word_str, "BENNETT") == 0) {
        EXPECT_TRUE(smallcaps) << word_str;
        ++found_smallcaps;
      } else {
        if (smallcaps) {
          ++false_positives;
        }
      }
      // No symbol other than the first of any word should be dropcap.
      ResultIterator s_it(*r_it);
      while (s_it.Next(tesseract::RIL_SYMBOL) && !s_it.IsAtBeginningOf(tesseract::RIL_WORD)) {
        if (s_it.SymbolIsDropcap()) {
          char *sym_str = s_it.GetUTF8Text(tesseract::RIL_SYMBOL);
          LOG(ERROR) << "Symbol " << sym_str << " of word " << word_str << " is dropcap";
          delete[] sym_str;
        }
        EXPECT_FALSE(s_it.SymbolIsDropcap());
      }
      delete[] word_str;
    }
  } while (r_it->Next(tesseract::RIL_WORD));
  delete r_it;
  EXPECT_EQ(1, found_dropcaps);
  EXPECT_GE(4, found_smallcaps);
  EXPECT_LE(false_positives, 3);
#endif // DISABLED_LEGACY_ENGINE
}

◆ TEST_F() [135/229]

tesseract::TEST_F	(	ResultIteratorTest	,
		TextlineOrderSanityCheck
	)

Definition at line 518 of file resultiterator_test.cc.

                                                     {
  // Iterate through all 7-word sequences and make sure that the output
  // contains each of the indices 0..6 exactly once.
  const int kNumWords(7);
  const int kNumCombos = 1 << (2 * kNumWords); // 4 ^ 7 combinations
  StrongScriptDirection word_dirs[kNumWords];
  for (int i = 0; i < kNumCombos; i++) {
    // generate the next combination.
    int tmp = i;
    for (auto &word_dir : word_dirs) {
      word_dir = static_cast<StrongScriptDirection>(tmp % 4);
      tmp = tmp / 4;
    }
    VerifySaneTextlineOrder(true, word_dirs, kNumWords);
    VerifySaneTextlineOrder(false, word_dirs, kNumWords);
  }
}

◆ TEST_F() [136/229]

tesseract::TEST_F	(	ScanutilsTest	,
		DoesScanf
	)

Definition at line 24 of file scanutils_test.cc.

                                 {
  // This test verifies that tfscanf does Scanf the same as stdio fscanf.
  // There are probably a gazillion more test cases that could be added, but
  // these brought the tesseract and unittest test results in line.
  std::string filename = file::JoinPath(TESTDATA_DIR, "scanftest.txt");
  FILE *fp1 = fopen(filename.c_str(), "r");
  if (fp1 == nullptr) {
    std::cout << "Failed to open file " << filename << '\n';
    GTEST_SKIP();
  }
  FILE *fp2 = fopen(filename.c_str(), "r");
  if (fp2 == nullptr) {
    std::cout << "Failed to open file " << filename << '\n';
    fclose(fp1);
    GTEST_SKIP();
  }
  // The file contains this:
  // 42.5 17 0.001000 -0.001000
  // 0 1 123 -123 0x100
  // abcdefghijklmnopqrstuvwxyz
  // abcdefghijklmnopqrstuvwxyz
  // MF 25 6.25e-2 0.5e5 -1e+4
  // 42 MF 25 6.25e-2 0.5
  // 24
  const int kNumFloats = 4;
  float f1[kNumFloats], f2[kNumFloats];
  int r1 = fscanf(fp1, "%f %f %f %f", &f1[0], &f1[1], &f1[2], &f1[3]);
  int r2 = tfscanf(fp2, "%f %f %f %f", &f2[0], &f2[1], &f2[2], &f2[3]);
  EXPECT_EQ(r1, kNumFloats);
  EXPECT_EQ(r2, kNumFloats);
  if (r1 == r2) {
    for (int i = 0; i < r1; ++i) {
      EXPECT_FLOAT_EQ(f1[i], f2[i]);
    }
  }
  const int kNumInts = 5;
  int i1[kNumInts], i2[kNumInts];
  r1 = fscanf(fp1, "%d %d %d %d %i", &i1[0], &i1[1], &i1[2], &i1[3], &i1[4]);
  r2 = tfscanf(fp2, "%d %d %d %d %i", &i2[0], &i2[1], &i2[2], &i2[3], &i2[4]);
  EXPECT_EQ(r1, kNumInts);
  EXPECT_EQ(r2, kNumInts);
  if (r1 == r2) {
    for (int i = 0; i < kNumInts; ++i) {
      EXPECT_EQ(i1[i], i2[i]);
    }
  }
  const int kStrLen = 1024;
  char s1[kStrLen];
  char s2[kStrLen];
  r1 = fscanf(fp1, "%1023s", s1);
  r2 = tfscanf(fp2, "%1023s", s2);
  EXPECT_EQ(r1, r2);
  EXPECT_STREQ(s1, s2);
  EXPECT_EQ(26, strlen(s2));
  r1 = fscanf(fp1, "%20s", s1);
  r2 = tfscanf(fp2, "%20s", s2);
  EXPECT_EQ(r1, r2);
  EXPECT_STREQ(s1, s2);
  EXPECT_EQ(20, strlen(s2));
  // Now read the rest of the alphabet.
  r1 = fscanf(fp1, "%1023s", s1);
  r2 = tfscanf(fp2, "%1023s", s2);
  EXPECT_EQ(r1, r2);
  EXPECT_STREQ(s1, s2);
  EXPECT_EQ(6, strlen(s2));
  r1 = fscanf(fp1, "%1023s", s1);
  r2 = tfscanf(fp2, "%1023s", s2);
  EXPECT_EQ(r1, r2);
  EXPECT_STREQ(s1, s2);
  EXPECT_EQ(2, strlen(s2));
  r1 = fscanf(fp1, "%f %f %f %f", &f1[0], &f1[1], &f1[2], &f1[3]);
  r2 = tfscanf(fp2, "%f %f %f %f", &f2[0], &f2[1], &f2[2], &f2[3]);
  EXPECT_EQ(r1, r2);
  for (int i = 0; i < kNumFloats; ++i) {
    EXPECT_FLOAT_EQ(f1[i], f2[i]);
  }
  // Test the * for field suppression.
  r1 = fscanf(fp1, "%d %*s %*d %*f %*f", &i1[0]);
  r2 = tfscanf(fp2, "%d %*s %*d %*f %*f", &i2[0]);
  EXPECT_EQ(r1, r2);
  EXPECT_EQ(i1[0], i2[0]);
  // We should still see the next value and no phantoms.
  r1 = fscanf(fp1, "%d %1023s", &i1[0], s1);
  r2 = tfscanf(fp2, "%d %1023s", &i2[0], s2);
  EXPECT_EQ(r1, r2);
  EXPECT_EQ(1, r2);
  EXPECT_EQ(i1[0], i2[0]);
  fclose(fp2);
  fclose(fp1);
}

◆ TEST_F() [137/229]

tesseract::TEST_F	(	ShapeTableTest	,
		FullTest
	)

Definition at line 129 of file shapetable_test.cc.

                                 {
#ifdef DISABLED_LEGACY_ENGINE
  // Skip test because Shape is missing.
  GTEST_SKIP();
#else
  Shape shape1;
  Setup352(101, &shape1);
  // Build a shape table with the same data, but in separate shapes.
  UNICHARSET unicharset;
  unicharset.unichar_insert(" ");
  for (int i = 1; i <= 10; ++i) {
    char class_str[20];
    snprintf(class_str, sizeof(class_str), "class%d", i);
    unicharset.unichar_insert(class_str);
  }
  ShapeTable st(unicharset);
  EXPECT_EQ(0, st.AddShape(3, 101));
  EXPECT_EQ(1, st.AddShape(5, 101));
  EXPECT_EQ(2, st.AddShape(2, 101));
  EXPECT_EQ(3, st.NumShapes());
  Expect352(101, shape1);
  EXPECT_EQ(3, st.AddShape(shape1));
  for (int i = 0; i < 3; ++i) {
    EXPECT_FALSE(st.MutableShape(i)->IsEqualUnichars(&shape1));
  }
  EXPECT_TRUE(st.MutableShape(3)->IsEqualUnichars(&shape1));
  EXPECT_TRUE(st.AnyMultipleUnichars());
  st.DeleteShape(3);
  EXPECT_FALSE(st.AnyMultipleUnichars());
 
  // Now merge to make a single shape like shape1.
  EXPECT_EQ(1, st.MasterUnicharCount(0));
  st.MergeShapes(0, 1);
  EXPECT_EQ(3, st.MergedUnicharCount(1, 2));
  st.MergeShapes(1, 2);
  for (int i = 0; i < 3; ++i) {
    EXPECT_EQ(3, st.MasterUnicharCount(i));
    // Master font count is the sum of all the font counts in the shape, not
    // the actual number of different fonts in the shape.
    EXPECT_EQ(3, st.MasterFontCount(i));
  }
  EXPECT_EQ(0, st.MasterDestinationIndex(1));
  EXPECT_EQ(0, st.MasterDestinationIndex(2));
  ShapeTable st2;
  st2.AppendMasterShapes(st, nullptr);
  EXPECT_EQ(1, st.NumMasterShapes());
  EXPECT_EQ(1, st2.NumShapes());
  EXPECT_TRUE(st2.MutableShape(0)->IsEqualUnichars(&shape1));
  EXPECT_TRUE(st2.AnyMultipleUnichars());
#endif
}

◆ TEST_F() [138/229]

tesseract::TEST_F	(	ShapeTest	,
		AddShapeTest
	)

Definition at line 94 of file shapetable_test.cc.

                                {
#ifdef DISABLED_LEGACY_ENGINE
  // Skip test because Shape is missing.
  GTEST_SKIP();
#else
  Shape shape1;
  Setup352(101, &shape1);
  Expect352(101, shape1);
  // Now setup a different shape with different content.
  Shape shape2;
  shape2.AddToShape(3, 101); // Duplicates shape1.
  shape2.AddToShape(5, 110); // Different font to shape1.
  shape2.AddToShape(7, 101); // Different unichar to shape1.
  // They should NOT be subsets of each other.
  EXPECT_FALSE(shape1.IsSubsetOf(shape2));
  EXPECT_FALSE(shape2.IsSubsetOf(shape1));
  // Now add shape2 to shape1.
  shape1.AddShape(shape2);
  // Test subsets again.
  EXPECT_FALSE(shape1.IsSubsetOf(shape2));
  EXPECT_TRUE(shape2.IsSubsetOf(shape1));
  EXPECT_EQ(4, shape1.size());
  EXPECT_FALSE(shape1.ContainsUnichar(1));
  EXPECT_TRUE(shape1.ContainsUnicharAndFont(5, 101));
  EXPECT_TRUE(shape1.ContainsUnicharAndFont(5, 110));
  EXPECT_FALSE(shape1.ContainsUnicharAndFont(3, 110));
  EXPECT_FALSE(shape1.ContainsUnicharAndFont(7, 110));
  EXPECT_FALSE(shape1.IsEqualUnichars(&shape2));
#endif
}

◆ TEST_F() [139/229]

tesseract::TEST_F	(	ShapeTest	,
		BasicTest
	)

Definition at line 62 of file shapetable_test.cc.

                             {
#ifdef DISABLED_LEGACY_ENGINE
  // Skip test because Shape is missing.
  GTEST_SKIP();
#else
  Shape shape1;
  EXPECT_EQ(0, shape1.size());
  Setup352(101, &shape1);
  Expect352(101, shape1);
  // It should still work after file I/O.
  std::string filename = TmpNameToPath("shapefile");
  FILE *fp = fopen(filename.c_str(), "wb");
  ASSERT_TRUE(fp != nullptr);
  EXPECT_TRUE(shape1.Serialize(fp));
  fclose(fp);
  TFile tfp;
  EXPECT_TRUE(tfp.Open(filename.c_str(), nullptr));
  Shape shape2;
  EXPECT_TRUE(shape2.DeSerialize(&tfp));
  Expect352(101, shape2);
  // They should be subsets of each other.
  EXPECT_TRUE(shape1.IsSubsetOf(shape2));
  EXPECT_TRUE(shape2.IsSubsetOf(shape1));
  // They should be equal unichars.
  EXPECT_TRUE(shape1.IsEqualUnichars(&shape2));
  // and still pass afterwards.
  Expect352(101, shape1);
  Expect352(101, shape2);
#endif
}

◆ TEST_F() [140/229]

tesseract::TEST_F	(	STATSTest	,
		BasicStats
	)

Definition at line 37 of file stats_test.cc.

                              {
  EXPECT_EQ(37, stats_.get_total());
  EXPECT_EQ(2, stats_.mode());
  EXPECT_EQ(12, stats_.pile_count(2));
}

◆ TEST_F() [141/229]

tesseract::TEST_F	(	STATSTest	,
		InitStats
	)

Definition at line 43 of file stats_test.cc.

                             {
  STATS stats;
  EXPECT_EQ(0, stats.get_total());
  EXPECT_EQ(0, stats.mode());
  EXPECT_EQ(0, stats.pile_count(2));
}

◆ TEST_F() [142/229]

tesseract::TEST_F	(	STATSTest	,
		TopNModes
	)

Definition at line 51 of file stats_test.cc.

                             {
  std::vector<tesseract::KDPairInc<float, int> > modes;
  int num_modes = stats_.top_n_modes(3, modes);
  EXPECT_EQ(3, num_modes);
  // Mode0 is 12 1 1 = 14 total count with a mean of 2 3/14.
  EXPECT_FLOAT_EQ(2.0f + 3.0f / 14, modes[0].key());
  EXPECT_EQ(14, modes[0].data());
  // Mode 1 is 2 10 1 = 13 total count with a mean of 5 12/13.
  EXPECT_FLOAT_EQ(5.0f + 12.0f / 13, modes[1].key());
  EXPECT_EQ(13, modes[1].data());
  // Mode 2 is 4 1 1 = 6 total count with a mean of 13.5.
  EXPECT_FLOAT_EQ(13.5f, modes[2].key());
  EXPECT_EQ(6, modes[2].data());
}

◆ TEST_F() [143/229]

tesseract::TEST_F	(	StridemapTest	,
		Indexing
	)

Definition at line 63 of file stridemap_test.cc.

                                {
  // This test verifies that with a batch of arrays of different sizes, the
  // iteration index each of them in turn, without going out of bounds.
#ifdef INCLUDE_TENSORFLOW
  std::vector<std::unique_ptr<xla::Array2D<int>>> arrays;
  arrays.push_back(SetupArray(3, 4, 0));
  arrays.push_back(SetupArray(4, 5, 12));
  arrays.push_back(SetupArray(4, 4, 32));
  arrays.push_back(SetupArray(3, 5, 48));
  std::vector<std::pair<int, int>> h_w_sizes;
  for (size_t i = 0; i < arrays.size(); ++i) {
    h_w_sizes.emplace_back(arrays[i].get()->height(), arrays[i].get()->width());
  }
  StrideMap stride_map;
  stride_map.SetStride(h_w_sizes);
  StrideMap::Index index(stride_map);
  int pos = 0;
  do {
    EXPECT_GE(index.t(), pos);
    EXPECT_EQ((*arrays.at(index.index(FD_BATCH)))(index.index(FD_HEIGHT), index.index(FD_WIDTH)),
              pos);
    EXPECT_EQ(index.IsLast(FD_BATCH), index.index(FD_BATCH) == arrays.size() - 1);
    EXPECT_EQ(index.IsLast(FD_HEIGHT),
              index.index(FD_HEIGHT) == arrays[index.index(FD_BATCH)]->height() - 1);
    EXPECT_EQ(index.IsLast(FD_WIDTH),
              index.index(FD_WIDTH) == arrays[index.index(FD_BATCH)]->width() - 1);
    EXPECT_TRUE(index.IsValid());
    ++pos;
  } while (index.Increment());
  LOG(INFO) << "pos=" << pos;
  index.InitToLast();
  do {
    --pos;
    EXPECT_GE(index.t(), pos);
    EXPECT_EQ((*arrays.at(index.index(FD_BATCH)))(index.index(FD_HEIGHT), index.index(FD_WIDTH)),
              pos);
    StrideMap::Index copy(index);
    // Since a change in batch index changes the height and width, it isn't
    // necessarily true that the position is still valid, even when changing
    // to another valid batch index.
    if (index.IsLast(FD_BATCH)) {
      EXPECT_FALSE(copy.AddOffset(1, FD_BATCH));
    }
    copy = index;
    EXPECT_EQ(index.IsLast(FD_HEIGHT), !copy.AddOffset(1, FD_HEIGHT));
    copy = index;
    EXPECT_EQ(index.IsLast(FD_WIDTH), !copy.AddOffset(1, FD_WIDTH));
    copy = index;
    if (index.index(FD_BATCH) == 0) {
      EXPECT_FALSE(copy.AddOffset(-1, FD_BATCH));
    }
    copy = index;
    EXPECT_EQ(index.index(FD_HEIGHT) == 0, !copy.AddOffset(-1, FD_HEIGHT));
    copy = index;
    EXPECT_EQ(index.index(FD_WIDTH) == 0, !copy.AddOffset(-1, FD_WIDTH));
    copy = index;
    EXPECT_FALSE(copy.AddOffset(10, FD_WIDTH));
    copy = index;
    EXPECT_FALSE(copy.AddOffset(-10, FD_HEIGHT));
    EXPECT_TRUE(index.IsValid());
  } while (index.Decrement());
#else
  LOG(INFO) << "Skip test because of missing xla::Array2D";
  GTEST_SKIP();
#endif
}

◆ TEST_F() [144/229]

tesseract::TEST_F	(	StridemapTest	,
		Scaling
	)

Definition at line 130 of file stridemap_test.cc.

                               {
  // This test verifies that with a batch of arrays of different sizes, the
  // scaling/reduction functions work as expected.
#ifdef INCLUDE_TENSORFLOW
  std::vector<std::unique_ptr<xla::Array2D<int>>> arrays;
  arrays.push_back(SetupArray(3, 4, 0));  // 0-11
  arrays.push_back(SetupArray(4, 5, 12)); // 12-31
  arrays.push_back(SetupArray(4, 4, 32)); // 32-47
  arrays.push_back(SetupArray(3, 5, 48)); // 48-62
  std::vector<std::pair<int, int>> h_w_sizes;
  for (size_t i = 0; i < arrays.size(); ++i) {
    h_w_sizes.emplace_back(arrays[i].get()->height(), arrays[i].get()->width());
  }
  StrideMap stride_map;
  stride_map.SetStride(h_w_sizes);
 
  // Scale x by 2, keeping y the same.
  std::vector<int> values_x2 = {0,  1,  4,  5,  8,  9,  12, 13, 17, 18, 22, 23, 27, 28,
                                32, 33, 36, 37, 40, 41, 44, 45, 48, 49, 53, 54, 58, 59};
  StrideMap test_map(stride_map);
  test_map.ScaleXY(2, 1);
  StrideMap::Index index(test_map);
  int pos = 0;
  do {
    int expected_value = values_x2[pos++];
    EXPECT_EQ((*arrays.at(index.index(FD_BATCH)))(index.index(FD_HEIGHT), index.index(FD_WIDTH)),
              expected_value);
  } while (index.Increment());
  EXPECT_EQ(pos, values_x2.size());
 
  test_map = stride_map;
  // Scale y by 2, keeping x the same.
  std::vector<int> values_y2 = {0,  1,  2,  3,  12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
                                32, 33, 34, 35, 36, 37, 38, 39, 48, 49, 50, 51, 52};
  test_map.ScaleXY(1, 2);
  index.InitToFirst();
  pos = 0;
  do {
    int expected_value = values_y2[pos++];
    EXPECT_EQ((*arrays.at(index.index(FD_BATCH)))(index.index(FD_HEIGHT), index.index(FD_WIDTH)),
              expected_value);
  } while (index.Increment());
  EXPECT_EQ(pos, values_y2.size());
 
  test_map = stride_map;
  // Scale x and y by 2.
  std::vector<int> values_xy2 = {0, 1, 12, 13, 17, 18, 32, 33, 36, 37, 48, 49};
  test_map.ScaleXY(2, 2);
  index.InitToFirst();
  pos = 0;
  do {
    int expected_value = values_xy2[pos++];
    EXPECT_EQ((*arrays.at(index.index(FD_BATCH)))(index.index(FD_HEIGHT), index.index(FD_WIDTH)),
              expected_value);
  } while (index.Increment());
  EXPECT_EQ(pos, values_xy2.size());
 
  test_map = stride_map;
  // Reduce Width to 1.
  std::vector<int> values_x_to_1 = {0, 4, 8, 12, 17, 22, 27, 32, 36, 40, 44, 48, 53, 58};
  test_map.ReduceWidthTo1();
  index.InitToFirst();
  pos = 0;
  do {
    int expected_value = values_x_to_1[pos++];
    EXPECT_EQ((*arrays.at(index.index(FD_BATCH)))(index.index(FD_HEIGHT), index.index(FD_WIDTH)),
              expected_value);
  } while (index.Increment());
  EXPECT_EQ(pos, values_x_to_1.size());
#else
  LOG(INFO) << "Skip test because of missing xla::Array2D";
  GTEST_SKIP();
#endif
}

◆ TEST_F() [145/229]

tesseract::TEST_F	(	StringRendererTest	,
		ArabicBoxcharsInLTROrder
	)

Definition at line 205 of file stringrenderer_test.cc.

                                                     {
  renderer_ = std::make_unique<StringRenderer>("Arab 10", 600, 600);
  Image pix = nullptr;
  // Arabic letters should be in decreasing x-coordinates
  const char kArabicWord[] = "\u0644\u0627\u0641\u0643\u0631";
  const std::string kRevWord = "\u0631\u0643\u0641\u0627\u0644";
  renderer_->RenderToImage(kArabicWord, strlen(kArabicWord), &pix);
  std::string boxes_str = renderer_->GetBoxesStr();
  // Decode to get the box text strings.
  EXPECT_FALSE(boxes_str.empty());
  std::vector<std::string> texts;
  EXPECT_TRUE(ReadMemBoxes(0, false, boxes_str.c_str(), false, nullptr, &texts, nullptr, nullptr));
  std::string ltr_str;
  for (auto &text : texts) {
    ltr_str += text.c_str();
  }
  // The string should come out perfectly reversed, despite there being a
  // ligature.
  EXPECT_EQ(ltr_str, kRevWord);
  // Just to prove there was a ligature, the number of texts is less than the
  // number of unicodes.
  EXPECT_LT(texts.size(), 5);
  pix.destroy();
}

◆ TEST_F() [146/229]

tesseract::TEST_F	(	StringRendererTest	,
		DISABLED_DoesDropUncoveredChars
	)

Definition at line 431 of file stringrenderer_test.cc.

                                                            {
  renderer_ = std::make_unique<StringRenderer>("Verdana 10", 500, 200);
  renderer_->set_drop_uncovered_chars(true);
  const std::string kWord = "oﬀice";
  const std::string kCleanWord = "oice";
  Image pix = nullptr;
  EXPECT_FALSE(renderer_->font().CanRenderString(kWord.c_str(), kWord.length()));
  EXPECT_FALSE(renderer_->font().CoversUTF8Text(kWord.c_str(), kWord.length()));
  int offset = renderer_->RenderToImage(kWord.c_str(), kWord.length(), &pix);
  pix.destroy();
  const std::vector<BoxChar *> &boxchars = renderer_->GetBoxes();
  EXPECT_EQ(kWord.length(), offset);
  ASSERT_EQ(kCleanWord.length(), boxchars.size());
  for (size_t i = 0; i < boxchars.size(); ++i) {
    EXPECT_EQ(kCleanWord.substr(i, 1), boxchars[i]->ch());
  }
}

◆ TEST_F() [147/229]

tesseract::TEST_F	(	StringRendererTest	,
		DoesClearBoxes
	)

Definition at line 295 of file stringrenderer_test.cc.

                                           {
  renderer_ = std::make_unique<StringRenderer>("Verdana 10", 600, 600);
  Image pix = nullptr;
  EXPECT_EQ(strlen(kEngText), renderer_->RenderToImage(kEngText, strlen(kEngText), &pix));
  pix.destroy();
  EXPECT_GT(renderer_->GetBoxes().size(), 0);
  const int num_boxes_per_page = renderer_->GetBoxes().size();
 
  renderer_->ClearBoxes();
  EXPECT_EQ(strlen(kEngText), renderer_->RenderToImage(kEngText, strlen(kEngText), &pix));
  pix.destroy();
  EXPECT_EQ(num_boxes_per_page, renderer_->GetBoxes().size());
}

◆ TEST_F() [148/229]

tesseract::TEST_F	(	StringRendererTest	,
		DoesHandleNewlineCharacters
	)

Definition at line 153 of file stringrenderer_test.cc.

                                                        {
  const char kRawText[] = "\n\n\n A \nB \nC \n\n\n";
  const char kStrippedText[] = " A B C "; // text with newline chars removed
  renderer_ = std::make_unique<StringRenderer>("Verdana 10", 600, 600);
  Image pix = nullptr;
  EXPECT_EQ(strlen(kRawText), renderer_->RenderToImage(kRawText, strlen(kRawText), &pix));
  EXPECT_TRUE(pix != nullptr);
  const std::vector<BoxChar *> &boxchars = renderer_->GetBoxes();
  // 3 characters + 4 spaces => 7 boxes
  EXPECT_EQ(7, boxchars.size());
  if (boxchars.size() == 7) {
    // Verify the text content of the boxchars
    for (size_t i = 0; i < boxchars.size(); ++i) {
      EXPECT_EQ(std::string(1, kStrippedText[i]), boxchars[i]->ch());
    }
  }
  DisplayClusterBoxes(pix);
  pix.destroy();
}

◆ TEST_F() [149/229]

tesseract::TEST_F	(	StringRendererTest	,
		DoesKeepAllImageBoxes
	)

Definition at line 274 of file stringrenderer_test.cc.

                                                  {
  renderer_ = std::make_unique<StringRenderer>("Verdana 10", 600, 600);
  Image pix = nullptr;
  int num_boxes_per_page = 0;
  const int kNumTrials = 2;
  for (int i = 0; i < kNumTrials; ++i) {
    EXPECT_EQ(strlen(kEngText), renderer_->RenderToImage(kEngText, strlen(kEngText), &pix));
    EXPECT_TRUE(pix != nullptr);
    pix.destroy();
    EXPECT_GT(renderer_->GetBoxes().size(), 0);
    if (!num_boxes_per_page) {
      num_boxes_per_page = renderer_->GetBoxes().size();
    } else {
      EXPECT_EQ((i + 1) * num_boxes_per_page, renderer_->GetBoxes().size());
    }
    for (int j = i * num_boxes_per_page; j < (i + 1) * num_boxes_per_page; ++j) {
      EXPECT_EQ(i, renderer_->GetBoxes()[j]->page());
    }
  }
}

◆ TEST_F() [150/229]

tesseract::TEST_F	(	StringRendererTest	,
		DoesLigatureTextForRendering
	)

Definition at line 309 of file stringrenderer_test.cc.

                                                         {
  renderer_ = std::make_unique<StringRenderer>("Verdana 10", 600, 600);
  renderer_->set_add_ligatures(true);
  Image pix = nullptr;
  EXPECT_EQ(strlen(kEngNonLigatureText),
            renderer_->RenderToImage(kEngNonLigatureText, strlen(kEngNonLigatureText), &pix));
  pix.destroy();
#if 0 // not with NFC normalization
  // There should be one less box than letters due to the 'fi' ligature.
  EXPECT_EQ(strlen(kEngNonLigatureText) - 1, renderer_->GetBoxes().size());
  // The output box text should be ligatured.
  EXPECT_STREQ("ﬁ", renderer_->GetBoxes()[0]->ch().c_str());
#endif
}

◆ TEST_F() [151/229]

tesseract::TEST_F	(	StringRendererTest	,
		DoesNotRenderWordJoiner
	)

Definition at line 415 of file stringrenderer_test.cc.

                                                    {
  renderer_ = std::make_unique<StringRenderer>("Verdana 10", 500, 200);
  const std::string word = "A- -B C-D A BC";
  const std::string joined_word = StringRenderer::InsertWordJoiners(word);
  Image pix = nullptr;
  renderer_->RenderToImage(joined_word.c_str(), joined_word.length(), &pix);
  pix.destroy();
  const std::vector<BoxChar *> &boxchars = renderer_->GetBoxes();
  const std::string kWordJoinerUTF8 = "\u2060";
  ASSERT_EQ(word.length(), boxchars.size());
  for (size_t i = 0; i < boxchars.size(); ++i) {
    EXPECT_NE(kWordJoinerUTF8, boxchars[i]->ch());
    EXPECT_EQ(word.substr(i, 1), boxchars[i]->ch());
  }
}

◆ TEST_F() [152/229]

tesseract::TEST_F	(	StringRendererTest	,
		DoesOutputBoxcharsInReadingOrder
	)

Definition at line 230 of file stringrenderer_test.cc.

                                                             {
  renderer_ = std::make_unique<StringRenderer>("Arab 10", 600, 600);
  Image pix = nullptr;
  // Arabic letters should be in decreasing x-coordinates
  const char kArabicWord[] = "والفكر";
  renderer_->RenderToImage(kArabicWord, strlen(kArabicWord), &pix);
  EXPECT_GT(renderer_->GetBoxes().size(), 0);
  const std::vector<BoxChar *> &boxchars = renderer_->GetBoxes();
  for (size_t i = 1; i < boxchars.size(); ++i) {
    EXPECT_GT(boxchars[i - 1]->box()->x, boxchars[i]->box()->x) << boxchars[i - 1]->ch();
  }
  pix.destroy();
 
  // English letters should be in increasing x-coordinates
  const char kEnglishWord[] = "Google";
  renderer_->ClearBoxes();
  renderer_->RenderToImage(kEnglishWord, strlen(kEnglishWord), &pix);
  EXPECT_EQ(boxchars.size(), strlen(kEnglishWord));
  for (size_t i = 1; i < boxchars.size(); ++i) {
    EXPECT_LT(boxchars[i - 1]->box()->x, boxchars[i]->box()->x) << boxchars[i - 1]->ch();
  }
  pix.destroy();
 
  // Mixed text should satisfy both.
  renderer_->ClearBoxes();
  renderer_->RenderToImage(kMixedText, strlen(kMixedText), &pix);
  EXPECT_LT(FindBoxCharXCoord(boxchars, "a"), FindBoxCharXCoord(boxchars, "b"));
  EXPECT_LT(FindBoxCharXCoord(boxchars, "1"), FindBoxCharXCoord(boxchars, "2"));
  EXPECT_GT(FindBoxCharXCoord(boxchars, "و"), FindBoxCharXCoord(boxchars, "ر"));
  pix.destroy();
}

◆ TEST_F() [153/229]

tesseract::TEST_F	(	StringRendererTest	,
		DoesRenderAllFontsToImage
	)

Definition at line 395 of file stringrenderer_test.cc.

                                                      {
  renderer_ = std::make_unique<StringRenderer>("Verdana 10", 1200, 1200);
  size_t offset = 0;
  std::string font_used;
  do {
    Image pix = nullptr;
    font_used.clear();
    offset += renderer_->RenderAllFontsToImage(1.0, kEngText + offset, strlen(kEngText + offset),
                                               &font_used, &pix);
    if (offset < strlen(kEngText)) {
      EXPECT_TRUE(pix != nullptr);
      EXPECT_STRNE("", font_used.c_str());
    }
    if (FLAGS_display) {
      pixDisplay(pix, 0, 0);
    }
    pix.destroy();
  } while (offset < strlen(kEngText));
}

◆ TEST_F() [154/229]

tesseract::TEST_F	(	StringRendererTest	,
		DoesRenderLigatures
	)

Definition at line 173 of file stringrenderer_test.cc.

                                                {
  renderer_ = std::make_unique<StringRenderer>("Arab 12", 600, 250);
  const char kArabicLigature[] = "لا";
 
  Image pix = nullptr;
  EXPECT_EQ(strlen(kArabicLigature),
            renderer_->RenderToImage(kArabicLigature, strlen(kArabicLigature), &pix));
  EXPECT_TRUE(pix != nullptr);
  EXPECT_GT(renderer_->GetBoxes().size(), 0);
  const std::vector<BoxChar *> &boxes = renderer_->GetBoxes();
  EXPECT_EQ(1, boxes.size());
  EXPECT_TRUE(boxes[0]->box() != nullptr);
  EXPECT_STREQ(kArabicLigature, boxes[0]->ch().c_str());
  DisplayClusterBoxes(pix);
  pix.destroy();
 
  renderer_ = std::make_unique<StringRenderer>("Arab 12", 600, 250);
  const char kArabicMixedText[] = "والفكر والصراع 1234,\nوالفكر لا والصراع";
  renderer_->RenderToImage(kArabicMixedText, strlen(kArabicMixedText), &pix);
  DisplayClusterBoxes(pix);
  pix.destroy();
}

◆ TEST_F() [155/229]

tesseract::TEST_F	(	StringRendererTest	,
		DoesRenderToImage
	)

Definition at line 82 of file stringrenderer_test.cc.

                                              {
  renderer_ = std::make_unique<StringRenderer>("Verdana 10", 600, 600);
  Image pix = nullptr;
  EXPECT_EQ(strlen(kEngText), renderer_->RenderToImage(kEngText, strlen(kEngText), &pix));
  EXPECT_TRUE(pix != nullptr);
  EXPECT_GT(renderer_->GetBoxes().size(), 0);
  DisplayClusterBoxes(pix);
  pix.destroy();
 
  renderer_ = std::make_unique<StringRenderer>("UnBatang 10", 600, 600);
  EXPECT_EQ(strlen(kKorText), renderer_->RenderToImage(kKorText, strlen(kKorText), &pix));
  EXPECT_GT(renderer_->GetBoxes().size(), 0);
  DisplayClusterBoxes(pix);
  pix.destroy();
 
  renderer_ = std::make_unique<StringRenderer>("Lohit Hindi 10", 600, 600);
  EXPECT_EQ(strlen(kHinText), renderer_->RenderToImage(kHinText, strlen(kHinText), &pix));
  EXPECT_GT(renderer_->GetBoxes().size(), 0);
  DisplayClusterBoxes(pix);
  pix.destroy();
 
  // RTL text
  renderer_ = std::make_unique<StringRenderer>("Arab 10", 600, 600);
  EXPECT_EQ(strlen(kArabicText), renderer_->RenderToImage(kArabicText, strlen(kArabicText), &pix));
  EXPECT_TRUE(pix != nullptr);
  EXPECT_GT(renderer_->GetBoxes().size(), 0);
  DisplayClusterBoxes(pix);
  pix.destroy();
 
  // Mixed direction Arabic + english text
  renderer_ = std::make_unique<StringRenderer>("Arab 10", 600, 600);
  EXPECT_EQ(strlen(kMixedText), renderer_->RenderToImage(kMixedText, strlen(kMixedText), &pix));
  EXPECT_TRUE(pix != nullptr);
  EXPECT_GT(renderer_->GetBoxes().size(), 0);
  DisplayClusterBoxes(pix);
  pix.destroy();
}

◆ TEST_F() [156/229]

tesseract::TEST_F	(	StringRendererTest	,
		DoesRenderToImageWithUnderline
	)

Definition at line 120 of file stringrenderer_test.cc.

                                                           {
  renderer_ = std::make_unique<StringRenderer>("Verdana 10", 600, 600);
  // Underline all words but NOT intervening spaces.
  renderer_->set_underline_start_prob(1.0);
  renderer_->set_underline_continuation_prob(0);
  Image pix = nullptr;
  EXPECT_EQ(strlen(kEngText), renderer_->RenderToImage(kEngText, strlen(kEngText), &pix));
  EXPECT_TRUE(pix != nullptr);
  EXPECT_GT(renderer_->GetBoxes().size(), 0);
  DisplayClusterBoxes(pix);
  pix.destroy();
  renderer_->ClearBoxes();
 
  // Underline all words AND intervening spaces.
  renderer_->set_underline_start_prob(1.0);
  renderer_->set_underline_continuation_prob(1.0);
  EXPECT_EQ(strlen(kEngText), renderer_->RenderToImage(kEngText, strlen(kEngText), &pix));
  EXPECT_TRUE(pix != nullptr);
  EXPECT_GT(renderer_->GetBoxes().size(), 0);
  DisplayClusterBoxes(pix);
  pix.destroy();
  renderer_->ClearBoxes();
 
  // Underline words and intervening spaces with 0.5 prob.
  renderer_->set_underline_start_prob(0.5);
  renderer_->set_underline_continuation_prob(0.5);
  EXPECT_EQ(strlen(kEngText), renderer_->RenderToImage(kEngText, strlen(kEngText), &pix));
  EXPECT_TRUE(pix != nullptr);
  EXPECT_GT(renderer_->GetBoxes().size(), 0);
  DisplayClusterBoxes(pix);
  pix.destroy();
}

◆ TEST_F() [157/229]

tesseract::TEST_F	(	StringRendererTest	,
		DoesRenderVerticalText
	)

Definition at line 262 of file stringrenderer_test.cc.

                                                   {
  Image pix = nullptr;
  renderer_ = std::make_unique<StringRenderer>("UnBatang 10", 600, 600);
  renderer_->set_vertical_text(true);
  EXPECT_EQ(strlen(kKorText), renderer_->RenderToImage(kKorText, strlen(kKorText), &pix));
  EXPECT_GT(renderer_->GetBoxes().size(), 0);
  DisplayClusterBoxes(pix);
  pix.destroy();
}

◆ TEST_F() [158/229]

tesseract::TEST_F	(	StringRendererTest	,
		DoesRenderWordBoxes
	)

Definition at line 345 of file stringrenderer_test.cc.

                                                {
  renderer_ = std::make_unique<StringRenderer>("Verdana 10", 600, 600);
  renderer_->set_output_word_boxes(true);
  Image pix = nullptr;
  EXPECT_EQ(strlen(kEngText), renderer_->RenderToImage(kEngText, strlen(kEngText), &pix));
  pix.destroy();
  // Verify #boxchars = #words + #spaces
  std::vector<std::string> words = split(kEngText, ' ');
  const int kNumSpaces = words.size() - 1;
  const int kExpectedNumBoxes = words.size() + kNumSpaces;
  const std::vector<BoxChar *> &boxchars = renderer_->GetBoxes();
  EXPECT_EQ(kExpectedNumBoxes, boxchars.size());
  // Verify content of words and spaces
  for (size_t i = 0; i < boxchars.size(); i += 2) {
    EXPECT_EQ(words[i / 2], boxchars[i]->ch());
    if (i < boxchars.size() - 1) {
      EXPECT_EQ(" ", boxchars[i + 1]->ch());
      EXPECT_TRUE(boxchars[i + 1]->box() == nullptr);
    }
  }
}

◆ TEST_F() [159/229]

tesseract::TEST_F	(	StringRendererTest	,
		DoesRenderWordBoxesFromMultiLineText
	)

Definition at line 367 of file stringrenderer_test.cc.

                                                                 {
  renderer_ = std::make_unique<StringRenderer>("Verdana 10", 600, 600);
  renderer_->set_output_word_boxes(true);
  Image pix = nullptr;
  const char kMultlineText[] = "the quick brown fox\njumps over the lazy dog";
  EXPECT_EQ(strlen(kMultlineText), renderer_->RenderToImage(kMultlineText, strlen(kEngText), &pix));
  pix.destroy();
  // Verify #boxchars = #words + #spaces + #newlines
  std::vector<std::string> words;
  for (auto &line : split(kMultlineText, '\n')) {
    for (auto &word : split(line, ' ')) {
      words.push_back(word);
    }
  }
  const int kNumSeparators = words.size() - 1;
  const int kExpectedNumBoxes = words.size() + kNumSeparators;
  const std::vector<BoxChar *> &boxchars = renderer_->GetBoxes();
  EXPECT_EQ(kExpectedNumBoxes, boxchars.size());
  // Verify content of words and spaces
  for (size_t i = 0; i < boxchars.size(); i += 2) {
    EXPECT_EQ(words[i / 2], boxchars[i]->ch());
    if (i + 1 < boxchars.size()) {
      EXPECT_EQ(" ", boxchars[i + 1]->ch());
      EXPECT_TRUE(boxchars[i + 1]->box() == nullptr);
    }
  }
}

◆ TEST_F() [160/229]

tesseract::TEST_F	(	StringRendererTest	,
		DoesRetainInputLigatureForRendering
	)

Definition at line 324 of file stringrenderer_test.cc.

                                                                {
  renderer_ = std::make_unique<StringRenderer>("Verdana 10", 600, 600);
  Image pix = nullptr;
  EXPECT_EQ(strlen(kEngLigatureText),
            renderer_->RenderToImage(kEngLigatureText, strlen(kEngLigatureText), &pix));
  pix.destroy();
  // There should be one less box than letters due to the 'fi' ligature.
  EXPECT_EQ(strlen(kEngNonLigatureText) - 1, renderer_->GetBoxes().size());
  // The output box text should be ligatured.
  EXPECT_STREQ("\uFB01", renderer_->GetBoxes()[0]->ch().c_str());
}

◆ TEST_F() [161/229]

tesseract::TEST_F	(	StringRendererTest	,
		DoesStripUnrenderableWords
	)

Definition at line 336 of file stringrenderer_test.cc.

                                                       {
  // Verdana should only be able to render the english letters and numbers in
  // the mixed text.
  renderer_ = std::make_unique<StringRenderer>("Verdana 10", 600, 600);
  std::string text(kMixedText);
  EXPECT_GT(renderer_->StripUnrenderableWords(&text), 0);
  EXPECT_EQ(" 123  abc", text);
}

◆ TEST_F() [162/229]

tesseract::TEST_F	(	StructuredTableTest	,
		CountHorizontalIntersectionsAll
	)

Definition at line 260 of file tablerecog_test.cc.

                                                             {
  table_->set_bounding_box(TBOX(0, 0, 1000, 1000));
  InsertPartition(0, 3, 100, 10);
  InsertPartition(110, 5, 200, 16);
 
  EXPECT_EQ(0, table_->CountHorizontalIntersections(0));
  EXPECT_EQ(1, table_->CountHorizontalIntersections(4));
  EXPECT_EQ(2, table_->CountHorizontalIntersections(8));
  EXPECT_EQ(1, table_->CountHorizontalIntersections(12));
  EXPECT_EQ(0, table_->CountHorizontalIntersections(20));
}

◆ TEST_F() [163/229]

tesseract::TEST_F	(	StructuredTableTest	,
		CountVerticalIntersectionsAll
	)

Definition at line 245 of file tablerecog_test.cc.

                                                           {
  table_->set_bounding_box(TBOX(0, 0, 1000, 1000));
  InsertPartition(0, 0, 100, 10);
  InsertPartition(1, 12, 43, 21);
  EXPECT_EQ(2, table_->CountVerticalIntersections(4));
  EXPECT_EQ(2, table_->CountVerticalIntersections(20));
  EXPECT_EQ(2, table_->CountVerticalIntersections(40));
  EXPECT_EQ(1, table_->CountVerticalIntersections(50));
  EXPECT_EQ(1, table_->CountVerticalIntersections(60));
  EXPECT_EQ(1, table_->CountVerticalIntersections(80));
  EXPECT_EQ(1, table_->CountVerticalIntersections(95));
  EXPECT_EQ(0, table_->CountVerticalIntersections(104));
  EXPECT_EQ(0, table_->CountVerticalIntersections(150));
}

◆ TEST_F() [164/229]

tesseract::TEST_F	(	StructuredTableTest	,
		FindWhitespacedColumnsBasic
	)

Definition at line 313 of file tablerecog_test.cc.

                                                         {
  InsertPartitions();
  TBOX guess(0, 0, 500, 800);
  table_->set_bounding_box(guess);
  table_->FindWhitespacedColumns();
  table_->ExpectCellX(1, 25, 25, 475, 499);
}

◆ TEST_F() [165/229]

tesseract::TEST_F	(	StructuredTableTest	,
		FindWhitespacedColumnsSorted
	)

Definition at line 321 of file tablerecog_test.cc.

                                                          {
  InsertPartitions();
  TBOX guess(0, 0, 500, 800);
  table_->set_bounding_box(guess);
  table_->FindWhitespacedColumns();
  table_->ExpectSortedX();
}

◆ TEST_F() [166/229]

tesseract::TEST_F	(	StructuredTableTest	,
		VerifyLinedTableBasicPass
	)

Definition at line 272 of file tablerecog_test.cc.

                                                       {
  for (int y = 10; y <= 50; y += 10) {
    table_->InjectCellY(y);
  }
  for (int x = 100; x <= 450; x += 50) {
    table_->InjectCellX(x);
  }
  InsertLines();
  InsertCellsInLines();
  table_->set_bounding_box(line_box_);
  EXPECT_TRUE(table_->VerifyLinedTableCells());
}

◆ TEST_F() [167/229]

tesseract::TEST_F	(	StructuredTableTest	,
		VerifyLinedTableHorizontalFail
	)

Definition at line 285 of file tablerecog_test.cc.

                                                            {
  for (int y = 10; y <= 50; y += 10) {
    table_->InjectCellY(y);
  }
  for (int x = 100; x <= 450; x += 50) {
    table_->InjectCellX(x);
  }
  InsertLines();
  InsertCellsInLines();
  InsertPartition(101, 11, 299, 19);
  table_->set_bounding_box(line_box_);
  EXPECT_FALSE(table_->VerifyLinedTableCells());
}

◆ TEST_F() [168/229]

tesseract::TEST_F	(	StructuredTableTest	,
		VerifyLinedTableVerticalFail
	)

Definition at line 299 of file tablerecog_test.cc.

                                                          {
  for (int y = 10; y <= 50; y += 10) {
    table_->InjectCellY(y);
  }
  for (int x = 100; x <= 450; x += 50) {
    table_->InjectCellX(x);
  }
  InsertLines();
  InsertCellsInLines();
  InsertPartition(151, 21, 199, 39);
  table_->set_bounding_box(line_box_);
  EXPECT_FALSE(table_->VerifyLinedTableCells());
}

◆ TEST_F() [169/229]

tesseract::TEST_F	(	TableFinderTest	,
		GapInXProjectionEdgeGap
	)

Definition at line 141 of file tablefind_test.cc.

                                                 {
  int data[100];
  for (int i = 0; i < 10; ++i) {
    data[i] = 2;
  }
  for (int i = 10; i < 90; ++i) {
    data[i] = 10;
  }
  for (int i = 90; i < 100; ++i) {
    data[i] = 2;
  }
  EXPECT_FALSE(finder_->GapInXProjection(data, 100));
}

◆ TEST_F() [170/229]

tesseract::TEST_F	(	TableFinderTest	,
		GapInXProjectionExists
	)

Definition at line 155 of file tablefind_test.cc.

                                                {
  int data[100];
  for (int i = 0; i < 10; ++i) {
    data[i] = 10;
  }
  for (int i = 10; i < 90; ++i) {
    data[i] = 2;
  }
  for (int i = 90; i < 100; ++i) {
    data[i] = 10;
  }
  EXPECT_TRUE(finder_->GapInXProjection(data, 100));
}

◆ TEST_F() [171/229]

tesseract::TEST_F	(	TableFinderTest	,
		GapInXProjectionNoGap
	)

Definition at line 133 of file tablefind_test.cc.

                                               {
  int data[100];
  for (int &i : data) {
    i = 10;
  }
  EXPECT_FALSE(finder_->GapInXProjection(data, 100));
}

◆ TEST_F() [172/229]

tesseract::TEST_F	(	TableFinderTest	,
		HasLeaderAdjacentNoOverlap
	)

Definition at line 181 of file tablefind_test.cc.

                                                    {
  InsertLeaderPartition(90, 10, 150, 15);
  MakePartition(0, 10, 85, 20);
  EXPECT_TRUE(finder_->HasLeaderAdjacent(*partition_));
  MakePartition(0, 25, 100, 40);
  EXPECT_FALSE(finder_->HasLeaderAdjacent(*partition_));
  MakePartition(0, 0, 100, 10);
  EXPECT_FALSE(finder_->HasLeaderAdjacent(*partition_));
  // TODO(nbeato): is this a useful metric? case fails
  // MakePartition(160, 0, 200, 15);  // leader is primarily above it
  // EXPECT_FALSE(finder_->HasLeaderAdjacent(*partition_));
}

◆ TEST_F() [173/229]

tesseract::TEST_F	(	TableFinderTest	,
		HasLeaderAdjacentOverlapping
	)

Definition at line 169 of file tablefind_test.cc.

                                                      {
  InsertLeaderPartition(90, 0, 150, 5);
  MakePartition(0, 0, 100, 10);
  EXPECT_TRUE(finder_->HasLeaderAdjacent(*partition_));
  MakePartition(0, 25, 100, 40);
  EXPECT_FALSE(finder_->HasLeaderAdjacent(*partition_));
  MakePartition(145, 0, 200, 20);
  EXPECT_TRUE(finder_->HasLeaderAdjacent(*partition_));
  MakePartition(40, 0, 50, 4);
  EXPECT_TRUE(finder_->HasLeaderAdjacent(*partition_));
}

◆ TEST_F() [174/229]

tesseract::TEST_F	(	TableFinderTest	,
		HasLeaderAdjacentPreservesColumns
	)

Definition at line 194 of file tablefind_test.cc.

                                                           {
  InsertLeaderPartition(90, 0, 150, 5, 1, 2);
  MakePartition(0, 0, 85, 10, 0, 0);
  EXPECT_FALSE(finder_->HasLeaderAdjacent(*partition_));
  MakePartition(0, 0, 100, 10, 0, 1);
  EXPECT_TRUE(finder_->HasLeaderAdjacent(*partition_));
  MakePartition(0, 0, 200, 10, 0, 5);
  EXPECT_TRUE(finder_->HasLeaderAdjacent(*partition_));
  MakePartition(155, 0, 200, 10, 5, 5);
  EXPECT_FALSE(finder_->HasLeaderAdjacent(*partition_));
}

◆ TEST_F() [175/229]

tesseract::TEST_F	(	TableFinderTest	,
		SplitAndInsertFragmentedPartitionsBasicFail
	)

Definition at line 248 of file tablefind_test.cc.

                                                                     {
  finder_->set_global_median_blob_width(3);
  finder_->set_global_median_xheight(10);
 
  TBOX part_box(10, 5, 100, 15);
  auto *all = new ColPartition(BRT_UNKNOWN, ICOORD(0, 1));
  all->set_type(PT_FLOWING_TEXT);
  all->set_blob_type(BRT_TEXT);
  all->set_flow(BTFT_CHAIN);
  all->set_left_margin(10);
  all->set_right_margin(100);
  TBOX blob_box = part_box;
  for (int i = 10; i <= 95; i += 5) {
    blob_box.set_left(i + 1);
    blob_box.set_right(i + 4);
    all->AddBox(new BLOBNBOX(C_BLOB::FakeBlob(blob_box)));
  }
  // TODO(nbeato): Ray's newer code...
  // all->ClaimBoxes();
  all->ComputeLimits();     // This is to make sure median iinfo is set.
  InsertTextPartition(all); // This is to delete blobs
  ColPartition *fragment_me = all->CopyButDontOwnBlobs();
 
  finder_->SplitAndInsertFragmentedTextPartition(fragment_me);
  finder_->ExpectPartition(TBOX(11, 5, 99, 15));
  finder_->ExpectPartitionCount(1);
}

◆ TEST_F() [176/229]

tesseract::TEST_F	(	TableFinderTest	,
		SplitAndInsertFragmentedPartitionsBasicPass
	)

Definition at line 208 of file tablefind_test.cc.

                                                                     {
  finder_->set_global_median_blob_width(3);
  finder_->set_global_median_xheight(10);
 
  TBOX part_box(10, 5, 100, 15);
  auto *all = new ColPartition(BRT_UNKNOWN, ICOORD(0, 1));
  all->set_type(PT_FLOWING_TEXT);
  all->set_blob_type(BRT_TEXT);
  all->set_flow(BTFT_CHAIN);
  all->set_left_margin(10);
  all->set_right_margin(100);
  TBOX blob_box = part_box;
  for (int i = 10; i <= 20; i += 5) {
    blob_box.set_left(i + 1);
    blob_box.set_right(i + 4);
    all->AddBox(new BLOBNBOX(C_BLOB::FakeBlob(blob_box)));
  }
  for (int i = 35; i <= 55; i += 5) {
    blob_box.set_left(i + 1);
    blob_box.set_right(i + 4);
    all->AddBox(new BLOBNBOX(C_BLOB::FakeBlob(blob_box)));
  }
  for (int i = 80; i <= 95; i += 5) {
    blob_box.set_left(i + 1);
    blob_box.set_right(i + 4);
    all->AddBox(new BLOBNBOX(C_BLOB::FakeBlob(blob_box)));
  }
  // TODO(nbeato): Ray's newer code...
  // all->ClaimBoxes();
  all->ComputeLimits();     // This is to make sure median iinfo is set.
  InsertTextPartition(all); // This is to delete blobs
  ColPartition *fragment_me = all->CopyButDontOwnBlobs();
 
  finder_->SplitAndInsertFragmentedTextPartition(fragment_me);
  finder_->ExpectPartition(TBOX(11, 5, 24, 15));
  finder_->ExpectPartition(TBOX(36, 5, 59, 15));
  finder_->ExpectPartition(TBOX(81, 5, 99, 15));
  finder_->ExpectPartitionCount(3);
}

◆ TEST_F() [177/229]

tesseract::TEST_F	(	TableRecognizerTest	,
		FindLinesBoundingBoxBasic
	)

Definition at line 196 of file tablerecog_test.cc.

                                                       {
  InsertLines();
  TBOX box(0, 0, 200, 50);
  bool result = recognizer_->FindLinesBoundingBox(&box);
  EXPECT_TRUE(result);
  EXPECT_EQ(line_box_.left(), box.left());
  EXPECT_EQ(line_box_.right(), box.right());
  EXPECT_EQ(line_box_.bottom(), box.bottom());
  EXPECT_EQ(line_box_.top(), box.top());
}

◆ TEST_F() [178/229]

tesseract::TEST_F	(	TableRecognizerTest	,
		HasSignificantLinesBasicFail
	)

Definition at line 184 of file tablerecog_test.cc.

                                                          {
  InsertLines();
  TBOX box(370, 35, 500, 45);
  EXPECT_FALSE(recognizer_->HasSignificantLines(box));
}

◆ TEST_F() [179/229]

tesseract::TEST_F	(	TableRecognizerTest	,
		HasSignificantLinesBasicPass
	)

Definition at line 175 of file tablerecog_test.cc.

                                                          {
  InsertLines();
  TBOX smaller_guess(120, 15, 370, 45);
  TBOX larger_guess(90, 5, 490, 70);
  EXPECT_TRUE(recognizer_->HasSignificantLines(line_box_));
  EXPECT_TRUE(recognizer_->HasSignificantLines(larger_guess));
  EXPECT_TRUE(recognizer_->HasSignificantLines(smaller_guess));
}

◆ TEST_F() [180/229]

tesseract::TEST_F	(	TableRecognizerTest	,
		HasSignificantLinesHorizontalOnlyFails
	)

Definition at line 190 of file tablerecog_test.cc.

                                                                    {
  InsertLines();
  TBOX box(0, 100, 200, 200);
  EXPECT_FALSE(recognizer_->HasSignificantLines(box));
}

◆ TEST_F() [181/229]

tesseract::TEST_F	(	TableRecognizerTest	,
		RecognizeLinedTableBasic
	)

Definition at line 207 of file tablerecog_test.cc.

                                                      {
  InsertLines();
  TBOX guess(120, 15, 370, 45);
  tesseract::StructuredTable table;
  table.set_text_grid(text_grid_.get());
  table.set_line_grid(line_grid_.get());
 
  EXPECT_TRUE(recognizer_->RecognizeLinedTable(guess, &table));
  EXPECT_EQ(line_box_.bottom(), table.bounding_box().bottom());
  EXPECT_EQ(line_box_.top(), table.bounding_box().top());
  EXPECT_EQ(line_box_.left(), table.bounding_box().left());
  EXPECT_EQ(line_box_.right(), table.bounding_box().right());
  EXPECT_EQ(line_box_.area(), table.bounding_box().area());
  EXPECT_EQ(7, table.column_count());
  EXPECT_EQ(4, table.row_count());
  EXPECT_EQ(28, table.cell_count());
  EXPECT_TRUE(table.is_lined());
}

◆ TEST_F() [182/229]

tesseract::TEST_F	(	TableRecognizerTest	,
		RecognizeWhitespacedTableBasic
	)

Definition at line 226 of file tablerecog_test.cc.

                                                            {
  InsertPartitions();
  TBOX guess(0, 0, 500, 800);
 
  tesseract::StructuredTable table;
  table.set_text_grid(text_grid_.get());
  table.set_line_grid(line_grid_.get());
  EXPECT_TRUE(recognizer_->RecognizeWhitespacedTable(guess, &table));
  EXPECT_EQ(1, table.bounding_box().bottom());
  EXPECT_EQ(799, table.bounding_box().top());
  EXPECT_EQ(1, table.bounding_box().left());
  EXPECT_EQ(499, table.bounding_box().right());
  EXPECT_EQ(798 * 498, table.bounding_box().area());
  EXPECT_EQ(500 / 25, table.column_count());
  EXPECT_EQ(800 / 20, table.row_count());
  EXPECT_EQ(500 * 800 / 20 / 25, table.cell_count());
  EXPECT_FALSE(table.is_lined());
}

◆ TEST_F() [183/229]

tesseract::TEST_F	(	TabVectorTest	,
		SetStartEndPointsMatch
	)

Definition at line 38 of file tabvector_test.cc.

                                              {
  vector_ = std::make_unique<TabVector>();
  ICOORD start(51, 65);
  ICOORD end(7568, 234);
  // Test coordinates individually to avoid adding an ostream operator
  // explicitly to the ICOORD class (Droid doesn't support it).
  vector_->set_startpt(start);
  EXPECT_EQ(start.x(), vector_->startpt().x());
  EXPECT_EQ(start.y(), vector_->startpt().y());
  vector_->set_endpt(end);
  EXPECT_EQ(end.x(), vector_->endpt().x());
  EXPECT_EQ(end.y(), vector_->endpt().y());
}

◆ TEST_F() [184/229]

tesseract::TEST_F	(	TabVectorTest	,
		VOverlapInRangeSimple
	)

Definition at line 107 of file tabvector_test.cc.

                                             {
  MakeSimpleTabVector(0, 0, 100, 100);
  int overlap = vector_->VOverlap(90, 10);
  EXPECT_EQ(80, overlap);
  overlap = vector_->VOverlap(100, 0);
  EXPECT_EQ(100, overlap);
}

◆ TEST_F() [185/229]

tesseract::TEST_F	(	TabVectorTest	,
		VOverlapOutOfRange
	)

Definition at line 115 of file tabvector_test.cc.

                                          {
  MakeSimpleTabVector(0, 10, 100, 90);
  int overlap = vector_->VOverlap(100, 0);
  EXPECT_EQ(80, overlap);
}

◆ TEST_F() [186/229]

tesseract::TEST_F	(	TabVectorTest	,
		XAtY45DegreeSlopeInRangeExact
	)

Definition at line 52 of file tabvector_test.cc.

                                                     {
  MakeSimpleTabVector(0, 0, 100, 100);
  for (int y = 0; y <= 100; ++y) {
    int x = vector_->XAtY(y);
    EXPECT_EQ(y, x);
  }
}

◆ TEST_F() [187/229]

tesseract::TEST_F	(	TabVectorTest	,
		XAtYHorizontal
	)

Definition at line 69 of file tabvector_test.cc.

                                      {
  const int y = 76; // arbitrary
  MakeSimpleTabVector(0, y, 100, y);
  EXPECT_EQ(0, vector_->XAtY(y));
  // TODO(nbeato): What's the failure condition?
  // Undefined! Should not pass! Allow until resolved answer.
  EXPECT_EQ(0, vector_->XAtY(10));
}

◆ TEST_F() [188/229]

tesseract::TEST_F	(	TabVectorTest	,
		XAtYHorizontalInRangeExact
	)

Definition at line 99 of file tabvector_test.cc.

                                                  {
  const int y = 120; // Arbitrary choice
  MakeSimpleTabVector(50, y, 150, y);
 
  int x = vector_->XAtY(y);
  EXPECT_EQ(50, x);
}

◆ TEST_F() [189/229]

tesseract::TEST_F	(	TabVectorTest	,
		XAtYLargeNumbers
	)

Definition at line 90 of file tabvector_test.cc.

                                        {
  // Assume a document is 800 DPI,
  // the width of a page is 10 inches across (8000 pixels), and
  // the height of the page is 15 inches (12000 pixels).
  MakeSimpleTabVector(7804, 504, 7968, 11768); // Arbitrary for vertical line
  int x = vector_->XAtY(6136);                 // test mid point
  EXPECT_EQ(7886, x);
}

◆ TEST_F() [190/229]

tesseract::TEST_F	(	TabVectorTest	,
		XAtYRoundingSimple
	)

Definition at line 78 of file tabvector_test.cc.

                                          {
  MakeSimpleTabVector(0, 0, 2, 10000);
  int x = vector_->XAtY(1);
  EXPECT_EQ(0, x);
  x = vector_->XAtY(4999);
  EXPECT_EQ(0, x);
  x = vector_->XAtY(5001);
  EXPECT_EQ(1, x);
  x = vector_->XAtY(9999);
  EXPECT_EQ(1, x);
}

◆ TEST_F() [191/229]

tesseract::TEST_F	(	TabVectorTest	,
		XAtYVerticalInRangeExact
	)

Definition at line 60 of file tabvector_test.cc.

                                                {
  const int x = 120; // Arbitrary choice
  MakeSimpleTabVector(x, 0, x, 100);
  for (int y = 0; y <= 100; ++y) {
    int result_x = vector_->XAtY(y);
    EXPECT_EQ(x, result_x);
  }
}

◆ TEST_F() [192/229]

tesseract::TEST_F	(	TabVectorTest	,
		XYFlip
	)

Definition at line 121 of file tabvector_test.cc.

                              {
  MakeSimpleTabVector(1, 2, 3, 4);
  vector_->XYFlip();
  EXPECT_EQ(2, vector_->startpt().x());
  EXPECT_EQ(1, vector_->startpt().y());
  EXPECT_EQ(4, vector_->endpt().x());
  EXPECT_EQ(3, vector_->endpt().y());
}

◆ TEST_F() [193/229]

tesseract::TEST_F	(	TatweelTest	,
		DictIgnoresTatweel
	)

Definition at line 76 of file tatweel_test.cc.

                                        {
  // This test verifies that the dictionary ignores the Tatweel character.
  tesseract::Trie trie(tesseract::DAWG_TYPE_WORD, "ara", SYSTEM_DAWG_PERM, unicharset_.size(), 0);
  std::string filename = TestDataNameToPath("ara.wordlist");
  if (!file_exists(filename.c_str())) {
    LOG(INFO) << "Skip test because of missing " << filename;
    GTEST_SKIP();
  } else {
    EXPECT_TRUE(trie.read_and_add_word_list(filename.c_str(), unicharset_,
                                            tesseract::Trie::RRP_REVERSE_IF_HAS_RTL));
    EXPECT_EQ(0, trie.check_for_words(filename.c_str(), unicharset_, false));
  }
}

◆ TEST_F() [194/229]

tesseract::TEST_F	(	TatweelTest	,
		UnicharsetIgnoresTatweel
	)

Definition at line 68 of file tatweel_test.cc.

                                              {
  // This test verifies that the unicharset ignores the Tatweel character.
  for (int i = 0; i < unicharset_.size(); ++i) {
    const char *utf8 = unicharset_.id_to_unichar(i);
    EXPECT_EQ(strstr(utf8, reinterpret_cast<const char *>(u8"\u0640")), nullptr);
  }
}

◆ TEST_F() [195/229]

tesseract::TEST_F	(	TatweelTest	,
		UnicharsetLoadKeepsTatweel
	)

Definition at line 90 of file tatweel_test.cc.

                                                {
  // This test verifies that a load of an existing unicharset keeps any
  // existing tatweel for backwards compatibility.
  std::string filename = TestDataNameToPath("ara.unicharset");
  if (!file_exists(filename.c_str())) {
    LOG(INFO) << "Skip test because of missing " << filename;
    GTEST_SKIP();
  } else {
    EXPECT_TRUE(unicharset_.load_from_file(filename.c_str()));
    int num_tatweel = 0;
    for (int i = 0; i < unicharset_.size(); ++i) {
      const char *utf8 = unicharset_.id_to_unichar(i);
      if (strstr(utf8, reinterpret_cast<const char *>(u8"\u0640")) != nullptr) {
        ++num_tatweel;
      }
    }
    LOG(INFO) << "Num tatweels in unicharset=" << num_tatweel;
    EXPECT_EQ(num_tatweel, 4);
  }
}

◆ TEST_F() [196/229]

tesseract::TEST_F	(	TBOXTest	,
		OverlapBoolCorners
	)

Definition at line 37 of file rect_test.cc.

                                     {
  TBOX mid(10, 10, 30, 30);
  TBOX bottom_left(5, 5, 15, 15);
  TBOX top_left(5, 25, 15, 35);
  // other corners covered by symmetry
 
  EXPECT_TRUE(mid.overlap(bottom_left));
  EXPECT_TRUE(bottom_left.overlap(mid));
  EXPECT_TRUE(mid.overlap(top_left));
  EXPECT_TRUE(top_left.overlap(mid));
}

◆ TEST_F() [197/229]

tesseract::TEST_F	(	TBOXTest	,
		OverlapBoolSides
	)

Definition at line 61 of file rect_test.cc.

                                   {
  TBOX mid(10, 10, 30, 30);
  TBOX left(5, 15, 15, 25);
  TBOX bottom(15, 5, 25, 15);
  // other sides covered by symmetry
 
  EXPECT_TRUE(mid.overlap(left));
  EXPECT_TRUE(left.overlap(mid));
  EXPECT_TRUE(mid.overlap(bottom));
  EXPECT_TRUE(bottom.overlap(mid));
}

◆ TEST_F() [198/229]

tesseract::TEST_F	(	TBOXTest	,
		OverlapBoolSpan
	)

Definition at line 85 of file rect_test.cc.

                                  {
  TBOX mid(10, 10, 30, 30);
  TBOX vertical(15, 5, 25, 35);
  TBOX horizontal(5, 15, 35, 25);
  // other sides covered by symmetry in other test cases
 
  EXPECT_TRUE(mid.overlap(vertical));
  EXPECT_TRUE(vertical.overlap(mid));
  EXPECT_TRUE(mid.overlap(horizontal));
  EXPECT_TRUE(horizontal.overlap(mid));
}

◆ TEST_F() [199/229]

tesseract::TEST_F	(	TBOXTest	,
		OverlapFractionCorners
	)

Definition at line 49 of file rect_test.cc.

                                         {
  TBOX mid(10, 10, 30, 30);
  TBOX bottom_left(5, 5, 15, 15);
  TBOX top_left(5, 25, 15, 35);
  // other corners covered by symmetry
 
  EXPECT_DOUBLE_EQ((5.0 * 5.0) / (20.0 * 20.0), mid.overlap_fraction(bottom_left));
  EXPECT_DOUBLE_EQ((5.0 * 5.0) / (10.0 * 10.0), bottom_left.overlap_fraction(mid));
  EXPECT_DOUBLE_EQ((5.0 * 5.0) / (20.0 * 20.0), mid.overlap_fraction(top_left));
  EXPECT_DOUBLE_EQ((5.0 * 5.0) / (10.0 * 10.0), top_left.overlap_fraction(mid));
}

◆ TEST_F() [200/229]

tesseract::TEST_F	(	TBOXTest	,
		OverlapFractionSides
	)

Definition at line 73 of file rect_test.cc.

                                       {
  TBOX mid(10, 10, 30, 30);
  TBOX left(5, 15, 15, 25);
  TBOX bottom(15, 5, 25, 15);
  // other sides covered by symmetry
 
  EXPECT_DOUBLE_EQ((5.0 * 10.0) / (20.0 * 20.0), mid.overlap_fraction(left));
  EXPECT_DOUBLE_EQ((5.0 * 10.0) / (10.0 * 10.0), left.overlap_fraction(mid));
  EXPECT_DOUBLE_EQ((5.0 * 10.0) / (20.0 * 20.0), mid.overlap_fraction(bottom));
  EXPECT_DOUBLE_EQ((5.0 * 10.0) / (10.0 * 10.0), bottom.overlap_fraction(mid));
}

◆ TEST_F() [201/229]

tesseract::TEST_F	(	TBOXTest	,
		OverlapFractionSpan
	)

Definition at line 97 of file rect_test.cc.

                                      {
  TBOX mid(10, 10, 30, 30);
  TBOX vertical(15, 5, 25, 35);
  TBOX horizontal(5, 15, 35, 25);
  // other sides covered by symmetry in other test cases
 
  EXPECT_DOUBLE_EQ((10.0 * 20.0) / (20.0 * 20.0), mid.overlap_fraction(vertical));
  EXPECT_DOUBLE_EQ((10.0 * 20.0) / (10.0 * 30.0), vertical.overlap_fraction(mid));
  EXPECT_DOUBLE_EQ((20.0 * 10.0) / (20.0 * 20.0), mid.overlap_fraction(horizontal));
  EXPECT_DOUBLE_EQ((20.0 * 10.0) / (30.0 * 10.0), horizontal.overlap_fraction(mid));
}

◆ TEST_F() [202/229]

tesseract::TEST_F	(	TBOXTest	,
		OverlapInside
	)

Definition at line 27 of file rect_test.cc.

                                {
  TBOX a(10, 10, 20, 20);
  TBOX b(11, 11, 12, 12);
 
  EXPECT_TRUE(a.overlap(b));
  EXPECT_TRUE(b.overlap(a));
  EXPECT_DOUBLE_EQ(0.01, a.overlap_fraction(b));
  EXPECT_DOUBLE_EQ(1.0, b.overlap_fraction(a));
}

◆ TEST_F() [203/229]

tesseract::TEST_F	(	TBOXTest	,
		OverlapOutsideTests
	)

Definition at line 110 of file rect_test.cc.

                                      {
  TBOX mid(10, 10, 30, 30);
  TBOX left(0, 15, 5, 25);
 
  EXPECT_FALSE(mid.overlap(left));
  EXPECT_FALSE(left.overlap(mid));
  EXPECT_DOUBLE_EQ(0.0, mid.overlap_fraction(left));
  EXPECT_DOUBLE_EQ(0.0, left.overlap_fraction(mid));
}

◆ TEST_F() [204/229]

tesseract::TEST_F	(	TBOXTest	,
		OverlapXFraction
	)

Definition at line 120 of file rect_test.cc.

                                   {
  TBOX a(10, 10, 20, 20);
  TBOX b(12, 100, 26, 200);
  TBOX c(0, 0, 100, 100);
  TBOX d(0, 0, 1, 1);
 
  EXPECT_DOUBLE_EQ(8.0 / 10.0, a.x_overlap_fraction(b));
  EXPECT_DOUBLE_EQ(8.0 / 14.0, b.x_overlap_fraction(a));
  EXPECT_DOUBLE_EQ(1.0, a.x_overlap_fraction(c));
  EXPECT_DOUBLE_EQ(10.0 / 100.0, c.x_overlap_fraction(a));
  EXPECT_DOUBLE_EQ(0.0, a.x_overlap_fraction(d));
  EXPECT_DOUBLE_EQ(0.0, d.x_overlap_fraction(a));
}

◆ TEST_F() [205/229]

tesseract::TEST_F	(	TBOXTest	,
		OverlapXFractionZeroSize
	)

Definition at line 148 of file rect_test.cc.

                                           {
  TBOX zero(10, 10, 10, 10);
  TBOX big(0, 0, 100, 100);
  TBOX small(0, 0, 1, 1);
 
  EXPECT_DOUBLE_EQ(1.0, zero.x_overlap_fraction(big));
  EXPECT_DOUBLE_EQ(0.0, big.x_overlap_fraction(zero));
  EXPECT_DOUBLE_EQ(0.0, zero.x_overlap_fraction(small));
  EXPECT_DOUBLE_EQ(0.0, small.x_overlap_fraction(zero));
}

◆ TEST_F() [206/229]

tesseract::TEST_F	(	TBOXTest	,
		OverlapYFraction
	)

Definition at line 134 of file rect_test.cc.

                                   {
  TBOX a(10, 10, 20, 20);
  TBOX b(100, 12, 200, 26);
  TBOX c(0, 0, 100, 100);
  TBOX d(0, 0, 1, 1);
 
  EXPECT_DOUBLE_EQ(8.0 / 10.0, a.y_overlap_fraction(b));
  EXPECT_DOUBLE_EQ(8.0 / 14.0, b.y_overlap_fraction(a));
  EXPECT_DOUBLE_EQ(1.0, a.y_overlap_fraction(c));
  EXPECT_DOUBLE_EQ(10.0 / 100.0, c.y_overlap_fraction(a));
  EXPECT_DOUBLE_EQ(0.0, a.y_overlap_fraction(d));
  EXPECT_DOUBLE_EQ(0.0, d.y_overlap_fraction(a));
}

◆ TEST_F() [207/229]

tesseract::TEST_F	(	TBOXTest	,
		OverlapYFractionZeroSize
	)

Definition at line 159 of file rect_test.cc.

                                           {
  TBOX zero(10, 10, 10, 10);
  TBOX big(0, 0, 100, 100);
  TBOX small(0, 0, 1, 1);
 
  EXPECT_DOUBLE_EQ(1.0, zero.y_overlap_fraction(big));
  EXPECT_DOUBLE_EQ(0.0, big.y_overlap_fraction(zero));
  EXPECT_DOUBLE_EQ(0.0, zero.y_overlap_fraction(small));
  EXPECT_DOUBLE_EQ(0.0, small.y_overlap_fraction(zero));
}

◆ TEST_F() [208/229]

tesseract::TEST_F	(	TesseractTest	,
		AdaptToWordStrTest
	)

Definition at line 163 of file baseapi_test.cc.

                                          {
#ifdef DISABLED_LEGACY_ENGINE
  // Skip test because TessBaseAPI::AdaptToWordStr is missing.
  GTEST_SKIP();
#else
  static const char *kTrainingPages[] = {"136.tif", "256.tif", "410.tif", "432.tif", "540.tif",
                                         "692.tif", "779.tif", "793.tif", "808.tif", "815.tif",
                                         "12.tif",  "12.tif",  nullptr};
  static const char *kTrainingText[] = {"1 3 6", "2 5 6", "4 1 0", "4 3 2", "5 4 0",
                                        "6 9 2", "7 7 9", "7 9 3", "8 0 8", "8 1 5",
                                        "1 2",   "1 2",   nullptr};
  static const char *kTestPages[] = {"324.tif", "433.tif", "12.tif", nullptr};
  static const char *kTestText[] = {"324", "433", "12", nullptr};
  tesseract::TessBaseAPI api;
  std::string truth_text;
  std::string ocr_text;
  if (api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_TESSERACT_ONLY) == -1) {
    // eng.traineddata not found.
    GTEST_SKIP();
    return;
  }
  api.SetVariable("matcher_sufficient_examples_for_prototyping", "1");
  api.SetVariable("classify_class_pruner_threshold", "220");
  // Train on the training text.
  for (int i = 0; kTrainingPages[i] != nullptr; ++i) {
    std::string image_file = TestDataNameToPath(kTrainingPages[i]);
    Image src_pix = pixRead(image_file.c_str());
    CHECK(src_pix);
    api.SetImage(src_pix);
    EXPECT_TRUE(api.AdaptToWordStr(tesseract::PSM_SINGLE_WORD, kTrainingText[i]))
        << "Failed to adapt to text \"" << kTrainingText[i] << "\" on image " << image_file;
    src_pix.destroy();
  }
  // Test the test text.
  api.SetVariable("tess_bn_matching", "1");
  api.SetPageSegMode(tesseract::PSM_SINGLE_WORD);
  for (int i = 0; kTestPages[i] != nullptr; ++i) {
    Image src_pix = pixRead(TestDataNameToPath(kTestPages[i]).c_str());
    CHECK(src_pix);
    ocr_text = GetCleanedTextResult(&api, src_pix);
    trim(truth_text);
    EXPECT_STREQ(kTestText[i], ocr_text.c_str());
    src_pix.destroy();
  }
#endif
}

◆ TEST_F() [209/229]

tesseract::TEST_F	(	TesseractTest	,
		BasicLSTMTest
	)

Definition at line 211 of file baseapi_test.cc.

                                     {
  tesseract::TessBaseAPI api;
  std::string truth_text;
  std::string ocr_text;
  if (api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_LSTM_ONLY) == -1) {
    // eng.traineddata not found.
    GTEST_SKIP();
    return;
  }
  Image src_pix = pixRead(TestDataNameToPath("phototest_2.tif").c_str());
  CHECK(src_pix);
  ocr_text = GetCleanedTextResult(&api, src_pix);
  CHECK_OK(
      file::GetContents(TestDataNameToPath("phototest.gold.txt"), &truth_text, file::Defaults()));
  trim(truth_text);
  EXPECT_STREQ(truth_text.c_str(), ocr_text.c_str());
  src_pix.destroy();
}

◆ TEST_F() [210/229]

tesseract::TEST_F	(	TesseractTest	,
		BasicTesseractTest
	)

Definition at line 72 of file baseapi_test.cc.

                                          {
  tesseract::TessBaseAPI api;
  std::string truth_text;
  std::string ocr_text;
  if (api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_TESSERACT_ONLY) != -1) {
    Image src_pix = pixRead(TestDataNameToPath("phototest.tif").c_str());
    CHECK(src_pix);
    ocr_text = GetCleanedTextResult(&api, src_pix);
    CHECK_OK(
        file::GetContents(TestDataNameToPath("phototest.gold.txt"), &truth_text, file::Defaults()));
    trim(truth_text);
    EXPECT_STREQ(truth_text.c_str(), ocr_text.c_str());
    src_pix.destroy();
  } else {
    // eng.traineddata not found.
    GTEST_SKIP();
  }
}

◆ TEST_F() [211/229]

tesseract::TEST_F	(	TesseractTest	,
		HOCRContainsBaseline
	)

Definition at line 141 of file baseapi_test.cc.

                                            {
  tesseract::TessBaseAPI api;
  if (api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_TESSERACT_ONLY) == -1) {
    // eng.traineddata not found.
    GTEST_SKIP();
    return;
  }
  Image src_pix = pixRead(TestDataNameToPath("HelloGoogle.tif").c_str());
  CHECK(src_pix);
  api.SetInputName("HelloGoogle.tif");
  api.SetImage(src_pix);
  char *result = api.GetHOCRText(0);
  EXPECT_TRUE(result != nullptr);
  EXPECT_THAT(result, HasSubstr("Hello"));
  EXPECT_TRUE(std::regex_search(
      result, std::regex{"<span class='ocr_line'[^>]* baseline [-.0-9]+ [-.0-9]+"}));
 
  delete[] result;
  src_pix.destroy();
}

◆ TEST_F() [212/229]

tesseract::TEST_F	(	TesseractTest	,
		HOCRWorksWithoutSetInputName
	)

Definition at line 122 of file baseapi_test.cc.

                                                    {
  tesseract::TessBaseAPI api;
  if (api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_TESSERACT_ONLY) == -1) {
    // eng.traineddata not found.
    GTEST_SKIP();
    return;
  }
  Image src_pix = pixRead(TestDataNameToPath("HelloGoogle.tif").c_str());
  CHECK(src_pix);
  api.SetImage(src_pix);
  char *result = api.GetHOCRText(0);
  EXPECT_TRUE(result != nullptr);
  EXPECT_THAT(result, HasSubstr("Hello"));
  EXPECT_THAT(result, HasSubstr("<div class='ocr_page'"));
  delete[] result;
  src_pix.destroy();
}

◆ TEST_F() [213/229]

tesseract::TEST_F	(	TesseractTest	,
		InitConfigOnlyTest
	)

Definition at line 280 of file baseapi_test.cc.

                                          {
  // Languages for testing initialization.
  const char *langs[] = {"eng", "chi_tra", "jpn", "vie"};
  std::unique_ptr<tesseract::TessBaseAPI> api;
  CycleTimer timer;
  for (auto &lang : langs) {
    api = std::make_unique<tesseract::TessBaseAPI>();
    timer.Restart();
    EXPECT_EQ(0, api->Init(TessdataPath().c_str(), lang, tesseract::OEM_TESSERACT_ONLY));
    timer.Stop();
    LOG(INFO) << "Lang " << lang << " took " << timer.GetInMs() << "ms in regular init";
  }
  // Init variables to set for config-only initialization.
  std::vector<std::string> vars_vec, vars_values;
  vars_vec.emplace_back("tessedit_init_config_only");
  vars_values.emplace_back("1");
  LOG(INFO) << "Switching to config only initialization:";
  for (auto &lang : langs) {
    api = std::make_unique<tesseract::TessBaseAPI>();
    timer.Restart();
    EXPECT_EQ(0, api->Init(TessdataPath().c_str(), lang, tesseract::OEM_TESSERACT_ONLY, nullptr, 0,
                           &vars_vec, &vars_values, false));
    timer.Stop();
    LOG(INFO) << "Lang " << lang << " took " << timer.GetInMs() << "ms in config-only init";
  }
}

◆ TEST_F() [214/229]

tesseract::TEST_F	(	TesseractTest	,
		IteratesParagraphsEvenIfNotDetected
	)

Definition at line 93 of file baseapi_test.cc.

                                                           {
  tesseract::TessBaseAPI api;
  if (api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_TESSERACT_ONLY) != -1) {
    api.SetPageSegMode(tesseract::PSM_SINGLE_BLOCK);
    api.SetVariable("paragraph_debug_level", "3");
#if 0 // TODO: b622.png is missing
    Pix* src_pix = pixRead(TestDataNameToPath("b622.png").c_str());
    CHECK(src_pix);
    api.SetImage(src_pix);
    Boxa* para_boxes =
        api.GetComponentImages(tesseract::RIL_PARA, true, nullptr, nullptr);
    EXPECT_TRUE(para_boxes != nullptr);
    Boxa* block_boxes =
        api.GetComponentImages(tesseract::RIL_BLOCK, true, nullptr, nullptr);
    EXPECT_TRUE(block_boxes != nullptr);
    // TODO(eger): Get paragraphs out of this page pre-text.
    EXPECT_GE(boxaGetCount(para_boxes), boxaGetCount(block_boxes));
    boxaDestroy(&block_boxes);
    boxaDestroy(&para_boxes);
    src_pix.destroy();
#endif
  } else {
    // eng.traineddata not found.
    GTEST_SKIP();
  }
}

◆ TEST_F() [215/229]

tesseract::TEST_F	(	TesseractTest	,
		LSTMGeometryTest
	)

Definition at line 236 of file baseapi_test.cc.

                                        {
  Image src_pix = pixRead(TestDataNameToPath("deslant.tif").c_str());
  FriendlyTessBaseAPI api;
  if (api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_LSTM_ONLY) == -1) {
    // eng.traineddata not found.
    GTEST_SKIP();
    return;
  }
  api.SetImage(src_pix);
  ASSERT_EQ(api.Recognize(nullptr), 0);
 
  const PAGE_RES *page_res = api.GetPageRes();
  PAGE_RES_IT page_res_it(const_cast<PAGE_RES *>(page_res));
  page_res_it.restart_page();
  BLOCK *block = page_res_it.block()->block;
  CHECK(block);
 
  // extract word and character boxes for each word
  for (page_res_it.restart_page(); page_res_it.word() != nullptr; page_res_it.forward()) {
    WERD_RES *word = page_res_it.word();
    CHECK(word);
    CHECK(word->best_choice);
    CHECK_GT(word->best_choice->length(), 0);
    CHECK(word->word);
    CHECK(word->box_word);
    // tesseract's word box
    TBOX tess_blob_box;
    tess_blob_box = word->word->bounding_box();
    tess_blob_box.rotate(block->re_rotation());
    // verify that each of LSTM's character boxes lies close to within
    // tesseract's word box
    for (int i = 0; i < word->box_word->length(); ++i) {
      TBOX lstm_blob_box = word->box_word->BlobBox(i);
      // LSTM character box should not spill out of tesseract word box
      // by more than a few pixels in any direction
      EXPECT_LT(tess_blob_box.left() - lstm_blob_box.left(), 5);
      EXPECT_LT(lstm_blob_box.right() - tess_blob_box.right(), 5);
      EXPECT_LT(tess_blob_box.bottom() - lstm_blob_box.bottom(), 5);
      EXPECT_LT(lstm_blob_box.top() - tess_blob_box.top(), 5);
    }
  }
  src_pix.destroy();
}

◆ TEST_F() [216/229]

tesseract::TEST_F	(	TesseractTest	,
		StaticTessBaseAPI
	)

Definition at line 66 of file baseapi_test.cc.

                                         {
  static tesseract::TessBaseAPI api;
  api.End();
}

◆ TEST_F() [217/229]

tesseract::TEST_F	(	TextlineProjectionTest	,
		Rotated
	)

Definition at line 250 of file textlineprojection_test.cc.

                                        {
  VerifyBoxes("phototestrot.tif", 31);
}

◆ TEST_F() [218/229]

tesseract::TEST_F	(	TextlineProjectionTest	,
		Unrotated
	)

Definition at line 245 of file textlineprojection_test.cc.

                                          {
  VerifyBoxes("phototest.tif", 31);
}

◆ TEST_F() [219/229]

tesseract::TEST_F	(	TfileTest	,
		BigEndian
	)

Definition at line 196 of file tfile_test.cc.

                             {
  // This test verifies that Tfile can auto-reverse big-endian data.
  MathData m1;
  m1.Setup();
  std::vector<char> data;
  TFile fpw;
  fpw.OpenWrite(&data);
  EXPECT_TRUE(m1.SerializeBigEndian(&fpw));
  TFile fpr;
  EXPECT_TRUE(fpr.Open(&data[0], data.size()));
  fpr.set_swap(true);
  MathData m2;
  EXPECT_TRUE(m2.DeSerializeBigEndian(&fpr));
  // That serialize was destructive, so test against a fresh MathData.
  MathData m3;
  m3.Setup();
  m3.ExpectEq(m2);
}

◆ TEST_F() [220/229]

tesseract::TEST_F	(	TfileTest	,
		FGets
	)

Definition at line 170 of file tfile_test.cc.

                         {
  // This test verifies that Tfile can interleave FGets with binary data.
  MathData m1;
  std::string line_str = "This is a textline with a newline\n";
  m1.Setup();
  std::vector<char> data;
  TFile fpw;
  fpw.OpenWrite(&data);
  EXPECT_TRUE(m1.Serialize(&fpw));
  EXPECT_EQ(1, fpw.FWrite(line_str.data(), line_str.size(), 1));
  EXPECT_TRUE(m1.Serialize(&fpw));
  // Now get back the 2 copies of m1 with the line in between.
  TFile fpr;
  EXPECT_TRUE(fpr.Open(&data[0], data.size()));
  MathData m2;
  EXPECT_TRUE(m2.DeSerialize(&fpr));
  m1.ExpectEq(m2);
  const int kBufsize = 1024;
  char buffer[kBufsize + 1];
  EXPECT_EQ(buffer, fpr.FGets(buffer, kBufsize));
  EXPECT_STREQ(line_str.c_str(), buffer);
  MathData m3;
  EXPECT_TRUE(m3.DeSerialize(&fpr));
  m1.ExpectEq(m3);
}

◆ TEST_F() [221/229]

tesseract::TEST_F	(	TfileTest	,
		Serialize
	)

Definition at line 150 of file tfile_test.cc.

                             {
  // This test verifies that Tfile can serialize a class.
  MathData m1;
  m1.Setup();
  std::vector<char> data;
  TFile fpw;
  fpw.OpenWrite(&data);
  EXPECT_TRUE(m1.Serialize(&fpw));
  TFile fpr;
  EXPECT_TRUE(fpr.Open(&data[0], data.size()));
  MathData m2;
  EXPECT_TRUE(m2.DeSerialize(&fpr));
  m1.ExpectEq(m2);
  MathData m3;
  EXPECT_FALSE(m3.DeSerialize(&fpr));
  fpr.Rewind();
  EXPECT_TRUE(m3.DeSerialize(&fpr));
  m1.ExpectEq(m3);
}

◆ TEST_F() [222/229]

tesseract::TEST_F	(	UnicharcompressTest	,
		DoesChinese
	)

Definition at line 165 of file unicharcompress_test.cc.

                                         {
  LOG(INFO) << "Testing chi_tra";
  LoadUnicharset("chi_tra.unicharset");
  ExpectCorrect("chi_tra");
  LOG(INFO) << "Testing chi_sim";
  LoadUnicharset("chi_sim.unicharset");
  ExpectCorrect("chi_sim");
}

◆ TEST_F() [223/229]

tesseract::TEST_F	(	UnicharcompressTest	,
		DoesEnglish
	)

Definition at line 200 of file unicharcompress_test.cc.

                                         {
  LOG(INFO) << "Testing eng";
  LoadUnicharset("eng.unicharset");
  ExpectCorrect("eng");
}

◆ TEST_F() [224/229]

tesseract::TEST_F	(	UnicharcompressTest	,
		DoesJapanese
	)

Definition at line 174 of file unicharcompress_test.cc.

                                          {
  LOG(INFO) << "Testing jpn";
  LoadUnicharset("jpn.unicharset");
  ExpectCorrect("jpn");
}

◆ TEST_F() [225/229]

tesseract::TEST_F	(	UnicharcompressTest	,
		DoesKannada
	)

Definition at line 186 of file unicharcompress_test.cc.

                                         {
  LOG(INFO) << "Testing kan";
  LoadUnicharset("kan.unicharset");
  ExpectCorrect("kan");
  SerializeAndUndo();
  ExpectCorrect("kan");
}

◆ TEST_F() [226/229]

tesseract::TEST_F	(	UnicharcompressTest	,
		DoesKorean
	)

Definition at line 180 of file unicharcompress_test.cc.

                                        {
  LOG(INFO) << "Testing kor";
  LoadUnicharset("kor.unicharset");
  ExpectCorrect("kor");
}

◆ TEST_F() [227/229]

tesseract::TEST_F	(	UnicharcompressTest	,
		DoesLigaturesWithDoubles
	)

Definition at line 208 of file unicharcompress_test.cc.

                                                      {
  LOG(INFO) << "Testing por with ligatures";
  LoadUnicharset("por.unicharset");
  ExpectCorrect("por");
  // Check that any unichar-id that is encoded with multiple codes has the
  // correct encoded_nulll_char_ in between.
  for (int u = 0; u <= unicharset_.size(); ++u) {
    RecodedCharID code;
    int len = compressed_.EncodeUnichar(u, &code);
    if (len > 1) {
      // The should not be any null char in the code.
      for (int i = 0; i < len; ++i) {
        EXPECT_NE(encoded_null_char_, code(i));
      }
    }
  }
}

◆ TEST_F() [228/229]

tesseract::TEST_F	(	UnicharcompressTest	,
		DoesMarathi
	)

Definition at line 194 of file unicharcompress_test.cc.

                                         {
  LOG(INFO) << "Testing mar";
  LoadUnicharset("mar.unicharset");
  ExpectCorrect("mar");
}

◆ TEST_F() [229/229]

tesseract::TEST_F	(	UnicharcompressTest	,
		GetEncodingAsString
	)

Definition at line 228 of file unicharcompress_test.cc.

                                                 {
  LoadUnicharset("trivial.unicharset");
  ExpectCorrect("trivial");
  std::string encoding = compressed_.GetEncodingAsString(unicharset_);
  std::string encoding_str(&encoding[0], encoding.length());
  std::vector<std::string> lines = split(encoding_str, '\n');
  EXPECT_EQ(5, lines.size());
  // The first line is always space.
  EXPECT_EQ("0\t ", lines[0]);
  // Next we have i.
  EXPECT_EQ("1\ti", lines[1]);
  // Next we have f.
  EXPECT_EQ("2\tf", lines[2]);
  // Next we have the fi ligature: ﬁ. There are no nulls in it, as there are no
  // repeated letter ligatures in this unicharset, unlike por.unicharset above.
  EXPECT_EQ("2,1\tﬁ", lines[3]);
  // Finally the null character.
  EXPECT_EQ("3\t<nul>", lines[4]);
}

◆ TEST_P() [1/165]

tesseract::TEST_P	(	LoadLanguage	,
		afr
	)

Definition at line 49 of file loadlang_test.cc.

                          {
  LangLoader("afr", GetParam());
}

◆ TEST_P() [2/165]

tesseract::TEST_P	(	LoadLanguage	,
		amh
	)

Definition at line 52 of file loadlang_test.cc.

                          {
  LangLoader("amh", GetParam());
}

◆ TEST_P() [3/165]

tesseract::TEST_P	(	LoadLanguage	,
		ara
	)

Definition at line 55 of file loadlang_test.cc.

                          {
  LangLoader("ara", GetParam());
}

◆ TEST_P() [4/165]

tesseract::TEST_P	(	LoadLanguage	,
		asm
	)

Definition at line 58 of file loadlang_test.cc.

                          {
  LangLoader("asm", GetParam());
}

◆ TEST_P() [5/165]

tesseract::TEST_P	(	LoadLanguage	,
		aze
	)

Definition at line 61 of file loadlang_test.cc.

                          {
  LangLoader("aze", GetParam());
}

◆ TEST_P() [6/165]

tesseract::TEST_P	(	LoadLanguage	,
		aze_cyrl
	)

Definition at line 64 of file loadlang_test.cc.

                               {
  LangLoader("aze_cyrl", GetParam());
}

◆ TEST_P() [7/165]

tesseract::TEST_P	(	LoadLanguage	,
		bel
	)

Definition at line 67 of file loadlang_test.cc.

                          {
  LangLoader("bel", GetParam());
}

◆ TEST_P() [8/165]

tesseract::TEST_P	(	LoadLanguage	,
		ben
	)

Definition at line 70 of file loadlang_test.cc.

                          {
  LangLoader("ben", GetParam());
}

◆ TEST_P() [9/165]

tesseract::TEST_P	(	LoadLanguage	,
		bod
	)

Definition at line 73 of file loadlang_test.cc.

                          {
  LangLoader("bod", GetParam());
}

◆ TEST_P() [10/165]

tesseract::TEST_P	(	LoadLanguage	,
		bos
	)

Definition at line 76 of file loadlang_test.cc.

                          {
  LangLoader("bos", GetParam());
}

◆ TEST_P() [11/165]

tesseract::TEST_P	(	LoadLanguage	,
		bre
	)

Definition at line 79 of file loadlang_test.cc.

                          {
  LangLoader("bre", GetParam());
}

◆ TEST_P() [12/165]

tesseract::TEST_P	(	LoadLanguage	,
		bul
	)

Definition at line 82 of file loadlang_test.cc.

                          {
  LangLoader("bul", GetParam());
}

◆ TEST_P() [13/165]

tesseract::TEST_P	(	LoadLanguage	,
		cat
	)

Definition at line 85 of file loadlang_test.cc.

                          {
  LangLoader("cat", GetParam());
}

◆ TEST_P() [14/165]

tesseract::TEST_P	(	LoadLanguage	,
		ceb
	)

Definition at line 88 of file loadlang_test.cc.

                          {
  LangLoader("ceb", GetParam());
}

◆ TEST_P() [15/165]

tesseract::TEST_P	(	LoadLanguage	,
		ces
	)

Definition at line 91 of file loadlang_test.cc.

                          {
  LangLoader("ces", GetParam());
}

◆ TEST_P() [16/165]

tesseract::TEST_P	(	LoadLanguage	,
		chi_sim
	)

Definition at line 94 of file loadlang_test.cc.

                              {
  LangLoader("chi_sim", GetParam());
}

◆ TEST_P() [17/165]

tesseract::TEST_P	(	LoadLanguage	,
		chi_sim_vert
	)

Definition at line 97 of file loadlang_test.cc.

                                   {
  LangLoader("chi_sim_vert", GetParam());
}

◆ TEST_P() [18/165]

tesseract::TEST_P	(	LoadLanguage	,
		chi_tra
	)

Definition at line 100 of file loadlang_test.cc.

                              {
  LangLoader("chi_tra", GetParam());
}

◆ TEST_P() [19/165]

tesseract::TEST_P	(	LoadLanguage	,
		chi_tra_vert
	)

Definition at line 103 of file loadlang_test.cc.

                                   {
  LangLoader("chi_tra_vert", GetParam());
}

◆ TEST_P() [20/165]

tesseract::TEST_P	(	LoadLanguage	,
		chr
	)

Definition at line 106 of file loadlang_test.cc.

                          {
  LangLoader("chr", GetParam());
}

◆ TEST_P() [21/165]

tesseract::TEST_P	(	LoadLanguage	,
		cos
	)

Definition at line 109 of file loadlang_test.cc.

                          {
  LangLoader("cos", GetParam());
}

◆ TEST_P() [22/165]

tesseract::TEST_P	(	LoadLanguage	,
		cym
	)

Definition at line 112 of file loadlang_test.cc.

                          {
  LangLoader("cym", GetParam());
}

◆ TEST_P() [23/165]

tesseract::TEST_P	(	LoadLanguage	,
		dan
	)

Definition at line 115 of file loadlang_test.cc.

                          {
  LangLoader("dan", GetParam());
}

◆ TEST_P() [24/165]

tesseract::TEST_P	(	LoadLanguage	,
		deu
	)

Definition at line 118 of file loadlang_test.cc.

                          {
  LangLoader("deu", GetParam());
}

◆ TEST_P() [25/165]

tesseract::TEST_P	(	LoadLanguage	,
		div
	)

Definition at line 121 of file loadlang_test.cc.

                          {
  LangLoader("div", GetParam());
}

◆ TEST_P() [26/165]

tesseract::TEST_P	(	LoadLanguage	,
		dzo
	)

Definition at line 124 of file loadlang_test.cc.

                          {
  LangLoader("dzo", GetParam());
}

◆ TEST_P() [27/165]

tesseract::TEST_P	(	LoadLanguage	,
		ell
	)

Definition at line 127 of file loadlang_test.cc.

                          {
  LangLoader("ell", GetParam());
}

◆ TEST_P() [28/165]

tesseract::TEST_P	(	LoadLanguage	,
		eng
	)

Definition at line 130 of file loadlang_test.cc.

                          {
  LangLoader("eng", GetParam());
}

◆ TEST_P() [29/165]

tesseract::TEST_P	(	LoadLanguage	,
		enm
	)

Definition at line 133 of file loadlang_test.cc.

                          {
  LangLoader("enm", GetParam());
}

◆ TEST_P() [30/165]

tesseract::TEST_P	(	LoadLanguage	,
		epo
	)

Definition at line 136 of file loadlang_test.cc.

                          {
  LangLoader("epo", GetParam());
}

◆ TEST_P() [31/165]

tesseract::TEST_P	(	LoadLanguage	,
		est
	)

Definition at line 139 of file loadlang_test.cc.

                          {
  LangLoader("est", GetParam());
}

◆ TEST_P() [32/165]

tesseract::TEST_P	(	LoadLanguage	,
		eus
	)

Definition at line 142 of file loadlang_test.cc.

                          {
  LangLoader("eus", GetParam());
}

◆ TEST_P() [33/165]

tesseract::TEST_P	(	LoadLanguage	,
		fao
	)

Definition at line 145 of file loadlang_test.cc.

                          {
  LangLoader("fao", GetParam());
}

◆ TEST_P() [34/165]

tesseract::TEST_P	(	LoadLanguage	,
		fas
	)

Definition at line 148 of file loadlang_test.cc.

                          {
  LangLoader("fas", GetParam());
}

◆ TEST_P() [35/165]

tesseract::TEST_P	(	LoadLanguage	,
		fil
	)

Definition at line 151 of file loadlang_test.cc.

                          {
  LangLoader("fil", GetParam());
}

◆ TEST_P() [36/165]

tesseract::TEST_P	(	LoadLanguage	,
		fin
	)

Definition at line 154 of file loadlang_test.cc.

                          {
  LangLoader("fin", GetParam());
}

◆ TEST_P() [37/165]

tesseract::TEST_P	(	LoadLanguage	,
		fra
	)

Definition at line 157 of file loadlang_test.cc.

                          {
  LangLoader("fra", GetParam());
}

◆ TEST_P() [38/165]

tesseract::TEST_P	(	LoadLanguage	,
		frk
	)

Definition at line 160 of file loadlang_test.cc.

                          {
  LangLoader("frk", GetParam());
}

◆ TEST_P() [39/165]

tesseract::TEST_P	(	LoadLanguage	,
		frm
	)

Definition at line 163 of file loadlang_test.cc.

                          {
  LangLoader("frm", GetParam());
}

◆ TEST_P() [40/165]

tesseract::TEST_P	(	LoadLanguage	,
		fry
	)

Definition at line 166 of file loadlang_test.cc.

                          {
  LangLoader("fry", GetParam());
}

◆ TEST_P() [41/165]

tesseract::TEST_P	(	LoadLanguage	,
		gla
	)

Definition at line 169 of file loadlang_test.cc.

                          {
  LangLoader("gla", GetParam());
}

◆ TEST_P() [42/165]

tesseract::TEST_P	(	LoadLanguage	,
		gle
	)

Definition at line 172 of file loadlang_test.cc.

                          {
  LangLoader("gle", GetParam());
}

◆ TEST_P() [43/165]

tesseract::TEST_P	(	LoadLanguage	,
		glg
	)

Definition at line 175 of file loadlang_test.cc.

                          {
  LangLoader("glg", GetParam());
}

◆ TEST_P() [44/165]

tesseract::TEST_P	(	LoadLanguage	,
		grc
	)

Definition at line 178 of file loadlang_test.cc.

                          {
  LangLoader("grc", GetParam());
}

◆ TEST_P() [45/165]

tesseract::TEST_P	(	LoadLanguage	,
		guj
	)

Definition at line 181 of file loadlang_test.cc.

                          {
  LangLoader("guj", GetParam());
}

◆ TEST_P() [46/165]

tesseract::TEST_P	(	LoadLanguage	,
		hat
	)

Definition at line 184 of file loadlang_test.cc.

                          {
  LangLoader("hat", GetParam());
}

◆ TEST_P() [47/165]

tesseract::TEST_P	(	LoadLanguage	,
		heb
	)

Definition at line 187 of file loadlang_test.cc.

                          {
  LangLoader("heb", GetParam());
}

◆ TEST_P() [48/165]

tesseract::TEST_P	(	LoadLanguage	,
		hin
	)

Definition at line 190 of file loadlang_test.cc.

                          {
  LangLoader("hin", GetParam());
}

◆ TEST_P() [49/165]

tesseract::TEST_P	(	LoadLanguage	,
		hrv
	)

Definition at line 193 of file loadlang_test.cc.

                          {
  LangLoader("hrv", GetParam());
}

◆ TEST_P() [50/165]

tesseract::TEST_P	(	LoadLanguage	,
		hun
	)

Definition at line 196 of file loadlang_test.cc.

                          {
  LangLoader("hun", GetParam());
}

◆ TEST_P() [51/165]

tesseract::TEST_P	(	LoadLanguage	,
		hye
	)

Definition at line 199 of file loadlang_test.cc.

                          {
  LangLoader("hye", GetParam());
}

◆ TEST_P() [52/165]

tesseract::TEST_P	(	LoadLanguage	,
		iku
	)

Definition at line 202 of file loadlang_test.cc.

                          {
  LangLoader("iku", GetParam());
}

◆ TEST_P() [53/165]

tesseract::TEST_P	(	LoadLanguage	,
		ind
	)

Definition at line 205 of file loadlang_test.cc.

                          {
  LangLoader("ind", GetParam());
}

◆ TEST_P() [54/165]

tesseract::TEST_P	(	LoadLanguage	,
		isl
	)

Definition at line 208 of file loadlang_test.cc.

                          {
  LangLoader("isl", GetParam());
}

◆ TEST_P() [55/165]

tesseract::TEST_P	(	LoadLanguage	,
		ita
	)

Definition at line 211 of file loadlang_test.cc.

                          {
  LangLoader("ita", GetParam());
}

◆ TEST_P() [56/165]

tesseract::TEST_P	(	LoadLanguage	,
		ita_old
	)

Definition at line 214 of file loadlang_test.cc.

                              {
  LangLoader("ita_old", GetParam());
}

◆ TEST_P() [57/165]

tesseract::TEST_P	(	LoadLanguage	,
		jav
	)

Definition at line 217 of file loadlang_test.cc.

                          {
  LangLoader("jav", GetParam());
}

◆ TEST_P() [58/165]

tesseract::TEST_P	(	LoadLanguage	,
		jpn
	)

Definition at line 220 of file loadlang_test.cc.

                          {
  LangLoader("jpn", GetParam());
}

◆ TEST_P() [59/165]

tesseract::TEST_P	(	LoadLanguage	,
		jpn_vert
	)

Definition at line 223 of file loadlang_test.cc.

                               {
  LangLoader("jpn_vert", GetParam());
}

◆ TEST_P() [60/165]

tesseract::TEST_P	(	LoadLanguage	,
		kan
	)

Definition at line 226 of file loadlang_test.cc.

                          {
  LangLoader("kan", GetParam());
}

◆ TEST_P() [61/165]

tesseract::TEST_P	(	LoadLanguage	,
		kat
	)

Definition at line 229 of file loadlang_test.cc.

                          {
  LangLoader("kat", GetParam());
}

◆ TEST_P() [62/165]

tesseract::TEST_P	(	LoadLanguage	,
		kat_old
	)

Definition at line 232 of file loadlang_test.cc.

                              {
  LangLoader("kat_old", GetParam());
}

◆ TEST_P() [63/165]

tesseract::TEST_P	(	LoadLanguage	,
		kaz
	)

Definition at line 235 of file loadlang_test.cc.

                          {
  LangLoader("kaz", GetParam());
}

◆ TEST_P() [64/165]

tesseract::TEST_P	(	LoadLanguage	,
		khm
	)

Definition at line 238 of file loadlang_test.cc.

                          {
  LangLoader("khm", GetParam());
}

◆ TEST_P() [65/165]

tesseract::TEST_P	(	LoadLanguage	,
		kir
	)

Definition at line 241 of file loadlang_test.cc.

                          {
  LangLoader("kir", GetParam());
}

◆ TEST_P() [66/165]

tesseract::TEST_P	(	LoadLanguage	,
		kor
	)

Definition at line 245 of file loadlang_test.cc.

                          {
  LangLoader("kor", GetParam());
}

◆ TEST_P() [67/165]

tesseract::TEST_P	(	LoadLanguage	,
		kor_vert
	)

Definition at line 248 of file loadlang_test.cc.

                               {
  LangLoader("kor_vert", GetParam());
}

◆ TEST_P() [68/165]

tesseract::TEST_P	(	LoadLanguage	,
		lao
	)

Definition at line 251 of file loadlang_test.cc.

                          {
  LangLoader("lao", GetParam());
}

◆ TEST_P() [69/165]

tesseract::TEST_P	(	LoadLanguage	,
		lat
	)

Definition at line 254 of file loadlang_test.cc.

                          {
  LangLoader("lat", GetParam());
}

◆ TEST_P() [70/165]

tesseract::TEST_P	(	LoadLanguage	,
		lav
	)

Definition at line 257 of file loadlang_test.cc.

                          {
  LangLoader("lav", GetParam());
}

◆ TEST_P() [71/165]

tesseract::TEST_P	(	LoadLanguage	,
		lit
	)

Definition at line 260 of file loadlang_test.cc.

                          {
  LangLoader("lit", GetParam());
}

◆ TEST_P() [72/165]

tesseract::TEST_P	(	LoadLanguage	,
		ltz
	)

Definition at line 263 of file loadlang_test.cc.

                          {
  LangLoader("ltz", GetParam());
}

◆ TEST_P() [73/165]

tesseract::TEST_P	(	LoadLanguage	,
		mal
	)

Definition at line 266 of file loadlang_test.cc.

                          {
  LangLoader("mal", GetParam());
}

◆ TEST_P() [74/165]

tesseract::TEST_P	(	LoadLanguage	,
		mar
	)

Definition at line 269 of file loadlang_test.cc.

                          {
  LangLoader("mar", GetParam());
}

◆ TEST_P() [75/165]

tesseract::TEST_P	(	LoadLanguage	,
		mkd
	)

Definition at line 272 of file loadlang_test.cc.

                          {
  LangLoader("mkd", GetParam());
}

◆ TEST_P() [76/165]

tesseract::TEST_P	(	LoadLanguage	,
		mlt
	)

Definition at line 275 of file loadlang_test.cc.

                          {
  LangLoader("mlt", GetParam());
}

◆ TEST_P() [77/165]

tesseract::TEST_P	(	LoadLanguage	,
		mon
	)

Definition at line 278 of file loadlang_test.cc.

                          {
  LangLoader("mon", GetParam());
}

◆ TEST_P() [78/165]

tesseract::TEST_P	(	LoadLanguage	,
		mri
	)

Definition at line 281 of file loadlang_test.cc.

                          {
  LangLoader("mri", GetParam());
}

◆ TEST_P() [79/165]

tesseract::TEST_P	(	LoadLanguage	,
		msa
	)

Definition at line 284 of file loadlang_test.cc.

                          {
  LangLoader("msa", GetParam());
}

◆ TEST_P() [80/165]

tesseract::TEST_P	(	LoadLanguage	,
		mya
	)

Definition at line 287 of file loadlang_test.cc.

                          {
  LangLoader("mya", GetParam());
}

◆ TEST_P() [81/165]

tesseract::TEST_P	(	LoadLanguage	,
		nep
	)

Definition at line 290 of file loadlang_test.cc.

                          {
  LangLoader("nep", GetParam());
}

◆ TEST_P() [82/165]

tesseract::TEST_P	(	LoadLanguage	,
		nld
	)

Definition at line 293 of file loadlang_test.cc.

                          {
  LangLoader("nld", GetParam());
}

◆ TEST_P() [83/165]

tesseract::TEST_P	(	LoadLanguage	,
		nor
	)

Definition at line 296 of file loadlang_test.cc.

                          {
  LangLoader("nor", GetParam());
}

◆ TEST_P() [84/165]

tesseract::TEST_P	(	LoadLanguage	,
		oci
	)

Definition at line 299 of file loadlang_test.cc.

                          {
  LangLoader("oci", GetParam());
}

◆ TEST_P() [85/165]

tesseract::TEST_P	(	LoadLanguage	,
		ori
	)

Definition at line 302 of file loadlang_test.cc.

                          {
  LangLoader("ori", GetParam());
}

◆ TEST_P() [86/165]

tesseract::TEST_P	(	LoadLanguage	,
		osd
	)

Definition at line 305 of file loadlang_test.cc.

                          {
  LangLoader("osd", GetParam());
}

◆ TEST_P() [87/165]

tesseract::TEST_P	(	LoadLanguage	,
		pan
	)

Definition at line 308 of file loadlang_test.cc.

                          {
  LangLoader("pan", GetParam());
}

◆ TEST_P() [88/165]

tesseract::TEST_P	(	LoadLanguage	,
		pol
	)

Definition at line 311 of file loadlang_test.cc.

                          {
  LangLoader("pol", GetParam());
}

◆ TEST_P() [89/165]

tesseract::TEST_P	(	LoadLanguage	,
		por
	)

Definition at line 314 of file loadlang_test.cc.

                          {
  LangLoader("por", GetParam());
}

◆ TEST_P() [90/165]

tesseract::TEST_P	(	LoadLanguage	,
		pus
	)

Definition at line 317 of file loadlang_test.cc.

                          {
  LangLoader("pus", GetParam());
}

◆ TEST_P() [91/165]

tesseract::TEST_P	(	LoadLanguage	,
		que
	)

Definition at line 320 of file loadlang_test.cc.

                          {
  LangLoader("que", GetParam());
}

◆ TEST_P() [92/165]

tesseract::TEST_P	(	LoadLanguage	,
		ron
	)

Definition at line 323 of file loadlang_test.cc.

                          {
  LangLoader("ron", GetParam());
}

◆ TEST_P() [93/165]

tesseract::TEST_P	(	LoadLanguage	,
		rus
	)

Definition at line 326 of file loadlang_test.cc.

                          {
  LangLoader("rus", GetParam());
}

◆ TEST_P() [94/165]

tesseract::TEST_P	(	LoadLanguage	,
		san
	)

Definition at line 329 of file loadlang_test.cc.

                          {
  LangLoader("san", GetParam());
}

◆ TEST_P() [95/165]

tesseract::TEST_P	(	LoadLanguage	,
		sin
	)

Definition at line 332 of file loadlang_test.cc.

                          {
  LangLoader("sin", GetParam());
}

◆ TEST_P() [96/165]

tesseract::TEST_P	(	LoadLanguage	,
		slk
	)

Definition at line 335 of file loadlang_test.cc.

                          {
  LangLoader("slk", GetParam());
}

◆ TEST_P() [97/165]

tesseract::TEST_P	(	LoadLanguage	,
		slv
	)

Definition at line 338 of file loadlang_test.cc.

                          {
  LangLoader("slv", GetParam());
}

◆ TEST_P() [98/165]

tesseract::TEST_P	(	LoadLanguage	,
		snd
	)

Definition at line 341 of file loadlang_test.cc.

                          {
  LangLoader("snd", GetParam());
}

◆ TEST_P() [99/165]

tesseract::TEST_P	(	LoadLanguage	,
		spa
	)

Definition at line 344 of file loadlang_test.cc.

                          {
  LangLoader("spa", GetParam());
}

◆ TEST_P() [100/165]

tesseract::TEST_P	(	LoadLanguage	,
		spa_old
	)

Definition at line 347 of file loadlang_test.cc.

                              {
  LangLoader("spa_old", GetParam());
}

◆ TEST_P() [101/165]

tesseract::TEST_P	(	LoadLanguage	,
		sqi
	)

Definition at line 350 of file loadlang_test.cc.

                          {
  LangLoader("sqi", GetParam());
}

◆ TEST_P() [102/165]

tesseract::TEST_P	(	LoadLanguage	,
		srp
	)

Definition at line 353 of file loadlang_test.cc.

                          {
  LangLoader("srp", GetParam());
}

◆ TEST_P() [103/165]

tesseract::TEST_P	(	LoadLanguage	,
		srp_latn
	)

Definition at line 356 of file loadlang_test.cc.

                               {
  LangLoader("srp_latn", GetParam());
}

◆ TEST_P() [104/165]

tesseract::TEST_P	(	LoadLanguage	,
		sun
	)

Definition at line 359 of file loadlang_test.cc.

                          {
  LangLoader("sun", GetParam());
}

◆ TEST_P() [105/165]

tesseract::TEST_P	(	LoadLanguage	,
		swa
	)

Definition at line 362 of file loadlang_test.cc.

                          {
  LangLoader("swa", GetParam());
}

◆ TEST_P() [106/165]

tesseract::TEST_P	(	LoadLanguage	,
		swe
	)

Definition at line 365 of file loadlang_test.cc.

                          {
  LangLoader("swe", GetParam());
}

◆ TEST_P() [107/165]

tesseract::TEST_P	(	LoadLanguage	,
		syr
	)

Definition at line 368 of file loadlang_test.cc.

                          {
  LangLoader("syr", GetParam());
}

◆ TEST_P() [108/165]

tesseract::TEST_P	(	LoadLanguage	,
		tam
	)

Definition at line 371 of file loadlang_test.cc.

                          {
  LangLoader("tam", GetParam());
}

◆ TEST_P() [109/165]

tesseract::TEST_P	(	LoadLanguage	,
		tat
	)

Definition at line 374 of file loadlang_test.cc.

                          {
  LangLoader("tat", GetParam());
}

◆ TEST_P() [110/165]

tesseract::TEST_P	(	LoadLanguage	,
		tel
	)

Definition at line 377 of file loadlang_test.cc.

                          {
  LangLoader("tel", GetParam());
}

◆ TEST_P() [111/165]

tesseract::TEST_P	(	LoadLanguage	,
		tgk
	)

Definition at line 380 of file loadlang_test.cc.

                          {
  LangLoader("tgk", GetParam());
}

◆ TEST_P() [112/165]

tesseract::TEST_P	(	LoadLanguage	,
		tha
	)

Definition at line 383 of file loadlang_test.cc.

                          {
  LangLoader("tha", GetParam());
}

◆ TEST_P() [113/165]

tesseract::TEST_P	(	LoadLanguage	,
		tir
	)

Definition at line 386 of file loadlang_test.cc.

                          {
  LangLoader("tir", GetParam());
}

◆ TEST_P() [114/165]

tesseract::TEST_P	(	LoadLanguage	,
		ton
	)

Definition at line 389 of file loadlang_test.cc.

                          {
  LangLoader("ton", GetParam());
}

◆ TEST_P() [115/165]

tesseract::TEST_P	(	LoadLanguage	,
		tur
	)

Definition at line 392 of file loadlang_test.cc.

                          {
  LangLoader("tur", GetParam());
}

◆ TEST_P() [116/165]

tesseract::TEST_P	(	LoadLanguage	,
		uig
	)

Definition at line 395 of file loadlang_test.cc.

                          {
  LangLoader("uig", GetParam());
}

◆ TEST_P() [117/165]

tesseract::TEST_P	(	LoadLanguage	,
		ukr
	)

Definition at line 398 of file loadlang_test.cc.

                          {
  LangLoader("ukr", GetParam());
}

◆ TEST_P() [118/165]

tesseract::TEST_P	(	LoadLanguage	,
		urd
	)

Definition at line 401 of file loadlang_test.cc.

                          {
  LangLoader("urd", GetParam());
}

◆ TEST_P() [119/165]

tesseract::TEST_P	(	LoadLanguage	,
		uzb
	)

Definition at line 404 of file loadlang_test.cc.

                          {
  LangLoader("uzb", GetParam());
}

◆ TEST_P() [120/165]

tesseract::TEST_P	(	LoadLanguage	,
		uzb_cyrl
	)

Definition at line 407 of file loadlang_test.cc.

                               {
  LangLoader("uzb_cyrl", GetParam());
}

◆ TEST_P() [121/165]

tesseract::TEST_P	(	LoadLanguage	,
		vie
	)

Definition at line 410 of file loadlang_test.cc.

                          {
  LangLoader("vie", GetParam());
}

◆ TEST_P() [122/165]

tesseract::TEST_P	(	LoadLanguage	,
		yid
	)

Definition at line 413 of file loadlang_test.cc.

                          {
  LangLoader("yid", GetParam());
}

◆ TEST_P() [123/165]

tesseract::TEST_P	(	LoadLanguage	,
		yor
	)

Definition at line 416 of file loadlang_test.cc.

                          {
  LangLoader("yor", GetParam());
}

◆ TEST_P() [124/165]

tesseract::TEST_P	(	LoadScript	,
		Arabic
	)

Definition at line 430 of file loadlang_test.cc.

                           {
  LangLoader("script/Arabic", GetParam());
}

◆ TEST_P() [125/165]

tesseract::TEST_P	(	LoadScript	,
		Armenian
	)

Definition at line 433 of file loadlang_test.cc.

                             {
  LangLoader("script/Armenian", GetParam());
}

◆ TEST_P() [126/165]

tesseract::TEST_P	(	LoadScript	,
		Bengali
	)

Definition at line 436 of file loadlang_test.cc.

                            {
  LangLoader("script/Bengali", GetParam());
}

◆ TEST_P() [127/165]

tesseract::TEST_P	(	LoadScript	,
		Canadian_Aboriginal
	)

Definition at line 439 of file loadlang_test.cc.

                                        {
  LangLoader("script/Canadian_Aboriginal", GetParam());
}

◆ TEST_P() [128/165]

tesseract::TEST_P	(	LoadScript	,
		Cherokee
	)

Definition at line 442 of file loadlang_test.cc.

                             {
  LangLoader("script/Cherokee", GetParam());
}

◆ TEST_P() [129/165]

tesseract::TEST_P	(	LoadScript	,
		Cyrillic
	)

Definition at line 445 of file loadlang_test.cc.

                             {
  LangLoader("script/Cyrillic", GetParam());
}

◆ TEST_P() [130/165]

tesseract::TEST_P	(	LoadScript	,
		Devanagari
	)

Definition at line 448 of file loadlang_test.cc.

                               {
  LangLoader("script/Devanagari", GetParam());
}

◆ TEST_P() [131/165]

tesseract::TEST_P	(	LoadScript	,
		Ethiopic
	)

Definition at line 451 of file loadlang_test.cc.

                             {
  LangLoader("script/Ethiopic", GetParam());
}

◆ TEST_P() [132/165]

tesseract::TEST_P	(	LoadScript	,
		Fraktur
	)

Definition at line 454 of file loadlang_test.cc.

                            {
  LangLoader("script/Fraktur", GetParam());
}

◆ TEST_P() [133/165]

tesseract::TEST_P	(	LoadScript	,
		Georgian
	)

Definition at line 457 of file loadlang_test.cc.

                             {
  LangLoader("script/Georgian", GetParam());
}

◆ TEST_P() [134/165]

tesseract::TEST_P	(	LoadScript	,
		Greek
	)

Definition at line 460 of file loadlang_test.cc.

                          {
  LangLoader("script/Greek", GetParam());
}

◆ TEST_P() [135/165]

tesseract::TEST_P	(	LoadScript	,
		Gujarati
	)

Definition at line 463 of file loadlang_test.cc.

                             {
  LangLoader("script/Gujarati", GetParam());
}

◆ TEST_P() [136/165]

tesseract::TEST_P	(	LoadScript	,
		Gurmukhi
	)

Definition at line 466 of file loadlang_test.cc.

                             {
  LangLoader("script/Gurmukhi", GetParam());
}

◆ TEST_P() [137/165]

tesseract::TEST_P	(	LoadScript	,
		Hangul
	)

Definition at line 481 of file loadlang_test.cc.

                           {
  LangLoader("script/Hangul", GetParam());
}

◆ TEST_P() [138/165]

tesseract::TEST_P	(	LoadScript	,
		Hangul_vert
	)

Definition at line 484 of file loadlang_test.cc.

                                {
  LangLoader("script/Hangul_vert", GetParam());
}

◆ TEST_P() [139/165]

tesseract::TEST_P	(	LoadScript	,
		HanS
	)

Definition at line 469 of file loadlang_test.cc.

                         {
  LangLoader("script/HanS", GetParam());
}

◆ TEST_P() [140/165]

tesseract::TEST_P	(	LoadScript	,
		HanS_vert
	)

Definition at line 472 of file loadlang_test.cc.

                              {
  LangLoader("script/HanS_vert", GetParam());
}

◆ TEST_P() [141/165]

tesseract::TEST_P	(	LoadScript	,
		HanT
	)

Definition at line 475 of file loadlang_test.cc.

                         {
  LangLoader("script/HanT", GetParam());
}

◆ TEST_P() [142/165]

tesseract::TEST_P	(	LoadScript	,
		HanT_vert
	)

Definition at line 478 of file loadlang_test.cc.

                              {
  LangLoader("script/HanT_vert", GetParam());
}

◆ TEST_P() [143/165]

tesseract::TEST_P	(	LoadScript	,
		Hebrew
	)

Definition at line 487 of file loadlang_test.cc.

                           {
  LangLoader("script/Hebrew", GetParam());
}

◆ TEST_P() [144/165]

tesseract::TEST_P	(	LoadScript	,
		Japanese
	)

Definition at line 490 of file loadlang_test.cc.

                             {
  LangLoader("script/Japanese", GetParam());
}

◆ TEST_P() [145/165]

tesseract::TEST_P	(	LoadScript	,
		Japanese_vert
	)

Definition at line 493 of file loadlang_test.cc.

                                  {
  LangLoader("script/Japanese_vert", GetParam());
}

◆ TEST_P() [146/165]

tesseract::TEST_P	(	LoadScript	,
		Kannada
	)

Definition at line 496 of file loadlang_test.cc.

                            {
  LangLoader("script/Kannada", GetParam());
}

◆ TEST_P() [147/165]

tesseract::TEST_P	(	LoadScript	,
		Khmer
	)

Definition at line 499 of file loadlang_test.cc.

                          {
  LangLoader("script/Khmer", GetParam());
}

◆ TEST_P() [148/165]

tesseract::TEST_P	(	LoadScript	,
		Lao
	)

Definition at line 502 of file loadlang_test.cc.

                        {
  LangLoader("script/Lao", GetParam());
}

◆ TEST_P() [149/165]

tesseract::TEST_P	(	LoadScript	,
		Latin
	)

Definition at line 505 of file loadlang_test.cc.

                          {
  LangLoader("script/Latin", GetParam());
}

◆ TEST_P() [150/165]

tesseract::TEST_P	(	LoadScript	,
		Malayalam
	)

Definition at line 508 of file loadlang_test.cc.

                              {
  LangLoader("script/Malayalam", GetParam());
}

◆ TEST_P() [151/165]

tesseract::TEST_P	(	LoadScript	,
		Myanmar
	)

Definition at line 511 of file loadlang_test.cc.

                            {
  LangLoader("script/Myanmar", GetParam());
}

◆ TEST_P() [152/165]

tesseract::TEST_P	(	LoadScript	,
		Oriya
	)

Definition at line 514 of file loadlang_test.cc.

                          {
  LangLoader("script/Oriya", GetParam());
}

◆ TEST_P() [153/165]

tesseract::TEST_P	(	LoadScript	,
		Sinhala
	)

Definition at line 517 of file loadlang_test.cc.

                            {
  LangLoader("script/Sinhala", GetParam());
}

◆ TEST_P() [154/165]

tesseract::TEST_P	(	LoadScript	,
		Syriac
	)

Definition at line 520 of file loadlang_test.cc.

                           {
  LangLoader("script/Syriac", GetParam());
}

◆ TEST_P() [155/165]

tesseract::TEST_P	(	LoadScript	,
		Tamil
	)

Definition at line 523 of file loadlang_test.cc.

                          {
  LangLoader("script/Tamil", GetParam());
}

◆ TEST_P() [156/165]

tesseract::TEST_P	(	LoadScript	,
		Telugu
	)

Definition at line 526 of file loadlang_test.cc.

                           {
  LangLoader("script/Telugu", GetParam());
}

◆ TEST_P() [157/165]

tesseract::TEST_P	(	LoadScript	,
		Thaana
	)

Definition at line 529 of file loadlang_test.cc.

                           {
  LangLoader("script/Thaana", GetParam());
}

◆ TEST_P() [158/165]

tesseract::TEST_P	(	LoadScript	,
		Thai
	)

Definition at line 532 of file loadlang_test.cc.

                         {
  LangLoader("script/Thai", GetParam());
}

◆ TEST_P() [159/165]

tesseract::TEST_P	(	LoadScript	,
		Tibetan
	)

Definition at line 535 of file loadlang_test.cc.

                            {
  LangLoader("script/Tibetan", GetParam());
}

◆ TEST_P() [160/165]

tesseract::TEST_P	(	LoadScript	,
		Vietnamese
	)

Definition at line 538 of file loadlang_test.cc.

                               {
  LangLoader("script/Vietnamese", GetParam());
}

◆ TEST_P() [161/165]

tesseract::TEST_P	(	MatchGroundTruth	,
		BestPhototestOCR
	)

Definition at line 89 of file apiexample_test.cc.

                                           {
  OCRTester(TESTING_DIR "/phototest.tif", TESTING_DIR "/phototest.txt", TESSDATA_DIR "_best",
            GetParam());
}

◆ TEST_P() [162/165]

tesseract::TEST_P	(	MatchGroundTruth	,
		FastPhototestOCR
	)

Definition at line 84 of file apiexample_test.cc.

                                           {
  OCRTester(TESTING_DIR "/phototest.tif", TESTING_DIR "/phototest.txt", TESSDATA_DIR "_fast",
            GetParam());
}

◆ TEST_P() [163/165]

tesseract::TEST_P	(	MatchGroundTruth	,
		TessPhototestOCR
	)

Definition at line 94 of file apiexample_test.cc.

                                           {
  OCRTester(TESTING_DIR "/phototest.tif", TESTING_DIR "/phototest.txt", TESSDATA_DIR, GetParam());
}

◆ TEST_P() [164/165]

tesseract::TEST_P	(	OSDTest	,
		MatchOrientationDegrees
	)

Definition at line 65 of file osd_test.cc.

                                         {
#ifdef DISABLED_LEGACY_ENGINE
  // Skip test because TessBaseAPI::DetectOrientationScript is missing.
  GTEST_SKIP();
#else
  OSDTester(std::get<0>(GetParam()), std::get<1>(GetParam()), std::get<2>(GetParam()));
#endif
}

◆ TEST_P() [165/165]

tesseract::TEST_P	(	QRSequenceGeneratorTest	,
		GeneratesValidSequence
	)

Definition at line 47 of file qrsequence_test.cc.

                                                        {
  const int kRangeSize = GetParam();
  TestableQRSequenceGenerator generator(kRangeSize);
  std::vector<int> vals(kRangeSize);
  CycleTimer timer;
  timer.Restart();
  for (int i = 0; i < kRangeSize; ++i) {
    vals[i] = generator.GetVal();
  }
  LOG(INFO) << kRangeSize << "-length sequence took " << timer.GetInMs() << "ms";
  // Sort the numbers to verify that we've covered the range without repetition.
  std::sort(vals.begin(), vals.end());
  for (int i = 0; i < kRangeSize; ++i) {
    EXPECT_EQ(i, vals[i]);
    if (i != vals[i]) {
      LOG(INFO) << "Aborting remaining comparisons";
      break;
    }
  }
}

◆ test_underline()

bool tesseract::test_underline	(	bool	testing_on,
		C_BLOB *	blob,
		int16_t	baseline,
		int16_t	xheight
	)

test_underline

Check to see if the blob is an underline. Return true if it is.

Parameters

testing_on	drawing blob
blob	blob to test
baseline	coords of baseline
xheight	height of line

Definition at line 47 of file blkocc.cpp.

  {
  TDimension occ;
  STATS projection;
 
  auto blob_box = blob->bounding_box();
  auto blob_width = blob->bounding_box().width();
  projection.set_range(blob_box.bottom(), blob_box.top());
  if (testing_on) {
    //              blob->plot(to_win,GOLDENROD,GOLDENROD);
    //              line_color_index(to_win,GOLDENROD);
    //              move2d(to_win,blob_box.left(),baseline);
    //              draw2d(to_win,blob_box.right(),baseline);
    //              move2d(to_win,blob_box.left(),baseline+xheight);
    //              draw2d(to_win,blob_box.right(),baseline+xheight);
    tprintf("Testing underline on blob at (%d,%d)->(%d,%d), base=%d\nOccs:",
            blob->bounding_box().left(), blob->bounding_box().bottom(),
            blob->bounding_box().right(), blob->bounding_box().top(), baseline);
  }
  horizontal_cblob_projection(blob, &projection);
  int32_t desc_occ = 0;
  for (occ = blob_box.bottom(); occ < baseline; occ++) {
    if (occ <= blob_box.top() && projection.pile_count(occ) > desc_occ) {
      // max in region
      desc_occ = projection.pile_count(occ);
    }
  }
  int32_t x_occ = 0;
  for (occ = baseline; occ <= baseline + xheight; occ++) {
    if (occ >= blob_box.bottom() && occ <= blob_box.top() && projection.pile_count(occ) > x_occ) {
      // max in region
      x_occ = projection.pile_count(occ);
    }
  }
  int32_t asc_occ = 0;
  for (occ = baseline + xheight + 1; occ <= blob_box.top(); occ++) {
    if (occ >= blob_box.bottom() && projection.pile_count(occ) > asc_occ) {
      asc_occ = projection.pile_count(occ);
    }
  }
  if (testing_on) {
    tprintf("%d %d %d\n", desc_occ, x_occ, asc_occ);
  }
  if (desc_occ == 0 && x_occ == 0 && asc_occ == 0) {
    tprintf("Bottom=%d, top=%d, base=%d, x=%d\n", blob_box.bottom(), blob_box.top(), baseline,
            xheight);
    projection.print();
  }
  if (desc_occ > x_occ + x_occ && desc_occ > blob_width * textord_underline_threshold) {
    return true; // real underline
  }
  return asc_occ > x_occ + x_occ && asc_occ > blob_width * textord_underline_threshold; // overline
                                                                                        // neither
}

◆ TestDataNameToPath()

std::string tesseract::TestDataNameToPath ( const std::string & name )

Definition at line 24 of file lang_model_test.cc.

                                                    {
  return file::JoinPath(TESTING_DIR, name);
}

◆ TestParagraphDetection()

void tesseract::TestParagraphDetection	(	const TextAndModel *	correct,
		int	num_rows
	)

Definition at line 191 of file paragraphs_test.cc.

                                                                       {
  std::vector<RowInfo> row_infos;
  std::vector<PARA *> row_owners;
  PARA_LIST paragraphs;
  std::vector<ParagraphModel *> models;
 
  MakeAsciiRowInfos(correct, num_rows, &row_infos);
  int debug_level(3);
  tesseract::DetectParagraphs(debug_level, &row_infos, &row_owners, &paragraphs, &models);
  EvaluateParagraphDetection(correct, num_rows, row_owners);
  for (auto *model : models) {
    delete model;
  }
}

◆ tprintf()

TESS_API void tesseract::tprintf	(	const char *	format,
			...
	)

Definition at line 41 of file tprintf.cpp.

                                      {
  const char *debug_file_name = debug_file.c_str();
  static FILE *debugfp = nullptr; // debug file
 
  if (debug_file_name == nullptr) {
    // This should not happen.
    return;
  }
 
#ifdef _WIN32
  // Replace /dev/null by nul for Windows.
  if (strcmp(debug_file_name, "/dev/null") == 0) {
    debug_file_name = "nul";
    debug_file.set_value(debug_file_name);
  }
#endif
 
  if (debugfp == nullptr && debug_file_name[0] != '\0') {
    debugfp = fopen(debug_file_name, "wb");
  } else if (debugfp != nullptr && debug_file_name[0] == '\0') {
    fclose(debugfp);
    debugfp = nullptr;
  }
 
  va_list args;           // variable args
  va_start(args, format); // variable list
  if (debugfp != nullptr) {
    vfprintf(debugfp, format, args);
  } else {
    vfprintf(stderr, format, args);
  }
  va_end(args);
}

◆ TraceBlockOnReducedPix()

Image tesseract::TraceBlockOnReducedPix	(	BLOCK *	block,
		int	gridsize,
		ICOORD	bleft,
		int *	left,
		int *	bottom
	)

Definition at line 250 of file bbgrid.cpp.

                                                                                               {
  const TBOX &box = block->pdblk.bounding_box();
  Image pix = GridReducedPix(box, gridsize, bleft, left, bottom);
  int wpl = pixGetWpl(pix);
  l_uint32 *data = pixGetData(pix);
  ICOORDELT_IT it(block->pdblk.poly_block()->points());
  for (it.mark_cycle_pt(); !it.cycled_list();) {
    ICOORD pos = *it.data();
    it.forward();
    ICOORD next_pos = *it.data();
    ICOORD line_vector = next_pos - pos;
    int major, minor;
    ICOORD major_step, minor_step;
    line_vector.setup_render(&major_step, &minor_step, &major, &minor);
    int accumulator = major / 2;
    while (pos != next_pos) {
      int grid_x = (pos.x() - bleft.x()) / gridsize - *left;
      int grid_y = (pos.y() - bleft.y()) / gridsize - *bottom;
      SET_DATA_BIT(data + grid_y * wpl, grid_x);
      pos += major_step;
      accumulator += minor;
      if (accumulator >= major) {
        accumulator -= major;
        pos += minor_step;
      }
    }
  }
  return pix;
}

◆ TraceOutlineOnReducedPix()

Image tesseract::TraceOutlineOnReducedPix	(	C_OUTLINE *	outline,
		int	gridsize,
		ICOORD	bleft,
		int *	left,
		int *	bottom
	)

Definition at line 224 of file bbgrid.cpp.

                                           {
  const TBOX &box = outline->bounding_box();
  Image pix = GridReducedPix(box, gridsize, bleft, left, bottom);
  int wpl = pixGetWpl(pix);
  l_uint32 *data = pixGetData(pix);
  int length = outline->pathlength();
  ICOORD pos = outline->start_pos();
  for (int i = 0; i < length; ++i) {
    int grid_x = (pos.x() - bleft.x()) / gridsize - *left;
    int grid_y = (pos.y() - bleft.y()) / gridsize - *bottom;
    SET_DATA_BIT(data + grid_y * wpl, grid_x);
    pos += outline->step(i);
  }
  return pix;
}

◆ transform_to_next_perm()

void tesseract::transform_to_next_perm ( WERD_RES_LIST & words )

Definition at line 391 of file fixspace.cpp.

                                                  {
  WERD_RES_IT word_it(&words);
  WERD_RES_IT prev_word_it(&words);
  WERD_RES *word;
  WERD_RES *prev_word;
  WERD_RES *combo;
  WERD *copy_word;
  int16_t prev_right = -INT16_MAX;
  TBOX box;
  int16_t gap;
  int16_t min_gap = INT16_MAX;
 
  for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) {
    word = word_it.data();
    if (!word->part_of_combo) {
      box = word->word->bounding_box();
      if (prev_right > -INT16_MAX) {
        gap = box.left() - prev_right;
        if (gap < min_gap) {
          min_gap = gap;
        }
      }
      prev_right = box.right();
    }
  }
  if (min_gap < INT16_MAX) {
    prev_right = -INT16_MAX; // back to start
    word_it.set_to_list(&words);
    // Note: we can't use cycle_pt due to inserted combos at start of list.
    for (; (prev_right == -INT16_MAX) || !word_it.at_first(); word_it.forward()) {
      word = word_it.data();
      if (!word->part_of_combo) {
        box = word->word->bounding_box();
        if (prev_right > -INT16_MAX) {
          gap = box.left() - prev_right;
          if (gap <= min_gap) {
            prev_word = prev_word_it.data();
            if (prev_word->combination) {
              combo = prev_word;
            } else {
              /* Make a new combination and insert before
               * the first word being joined. */
              copy_word = new WERD;
              *copy_word = *(prev_word->word);
              // deep copy
              combo = new WERD_RES(copy_word);
              combo->combination = true;
              combo->x_height = prev_word->x_height;
              prev_word->part_of_combo = true;
              prev_word_it.add_before_then_move(combo);
            }
            combo->word->set_flag(W_EOL, word->word->flag(W_EOL));
            if (word->combination) {
              combo->word->join_on(word->word);
              // Move blobs to combo
              // old combo no longer needed
              delete word_it.extract();
            } else {
              // Copy current wd to combo
              combo->copy_on(word);
              word->part_of_combo = true;
            }
            combo->done = false;
            combo->ClearResults();
          } else {
            prev_word_it = word_it; // catch up
          }
        }
        prev_right = box.right();
      }
    }
  } else {
    words.clear(); // signal termination
  }
}

◆ try_block_fixed()

bool tesseract::try_block_fixed	(	TO_BLOCK *	block,
		int32_t	block_index
	)

Definition at line 502 of file topitch.cpp.

  {
  return false;
}

◆ try_doc_fixed()

bool tesseract::try_doc_fixed	(	ICOORD	page_tr,
		TO_BLOCK_LIST *	port_blocks,
		float	gradient
	)

Definition at line 371 of file topitch.cpp.

  {
  int16_t master_x; // uniform shifts
  int16_t pitch;    // median pitch.
  int x;            // profile coord
  int prop_blocks;  // correct counts
  int fixed_blocks;
  int total_row_count; // total in page
                       // iterator
  TO_BLOCK_IT block_it = port_blocks;
  TO_BLOCK *block;         // current block;
  TO_ROW *row;             // current row
  int16_t projection_left; // edges
  int16_t projection_right;
  int16_t row_left; // edges of row
  int16_t row_right;
  float master_y;     // uniform shifts
  float shift_factor; // page skew correction
  float final_pitch;  // output pitch
  float row_y;        // baseline
  STATS projection;   // entire page
  STATS pitches(0, MAX_ALLOWED_PITCH - 1);
  // for median
  float sp_sd;      // space sd
  int16_t mid_cuts; // no of cheap cuts
  float pitch_sd;   // sync rating
 
  if (!textord_blockndoc_fixed ||
      block_it.empty() || block_it.data()->get_rows()->empty()) {
    return false;
  }
  shift_factor = gradient / (gradient * gradient + 1);
  // row iterator
  TO_ROW_IT row_it(block_it.data()->get_rows());
  master_x = row_it.data()->projection_left;
  master_y = row_it.data()->baseline.y(master_x);
  projection_left = INT16_MAX;
  projection_right = -INT16_MAX;
  prop_blocks = 0;
  fixed_blocks = 0;
  total_row_count = 0;
 
  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
    block = block_it.data();
    row_it.set_to_list(block->get_rows());
    for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
      row = row_it.data();
      total_row_count++;
      if (row->fixed_pitch > 0) {
        pitches.add(static_cast<int32_t>(row->fixed_pitch), 1);
      }
      // find median
      row_y = row->baseline.y(master_x);
      row_left = static_cast<int16_t>(row->projection_left - shift_factor * (master_y - row_y));
      row_right = static_cast<int16_t>(row->projection_right - shift_factor * (master_y - row_y));
      if (row_left < projection_left) {
        projection_left = row_left;
      }
      if (row_right > projection_right) {
        projection_right = row_right;
      }
    }
  }
  if (pitches.get_total() == 0) {
    return false;
  }
  projection.set_range(projection_left, projection_right - 1);
 
  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
    block = block_it.data();
    row_it.set_to_list(block->get_rows());
    for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
      row = row_it.data();
      row_y = row->baseline.y(master_x);
      row_left = static_cast<int16_t>(row->projection_left - shift_factor * (master_y - row_y));
      for (x = row->projection_left; x < row->projection_right; x++, row_left++) {
        projection.add(row_left, row->projection.pile_count(x));
      }
    }
  }
 
  row_it.set_to_list(block_it.data()->get_rows());
  row = row_it.data();
#ifndef GRAPHICS_DISABLED
  if (textord_show_page_cuts && to_win != nullptr) {
    projection.plot(to_win, projection_left, row->intercept(), 1.0f, -1.0f, ScrollView::CORAL);
  }
#endif
  final_pitch = pitches.ile(0.5);
  pitch = static_cast<int16_t>(final_pitch);
  pitch_sd = tune_row_pitch(row, &projection, projection_left, projection_right, pitch * 0.75,
                            final_pitch, sp_sd, mid_cuts, &row->char_cells, false);
 
  if (textord_debug_pitch_metric) {
    tprintf(
        "try_doc:props=%d:fixed=%d:pitch=%d:final_pitch=%g:pitch_sd=%g:sp_sd=%"
        "g:sd/trc=%g:sd/p=%g:sd/trc/p=%g\n",
        prop_blocks, fixed_blocks, pitch, final_pitch, pitch_sd, sp_sd, pitch_sd / total_row_count,
        pitch_sd / pitch, pitch_sd / total_row_count / pitch);
  }
 
#ifndef GRAPHICS_DISABLED
  if (textord_show_page_cuts && to_win != nullptr) {
    float row_shift;              // shift for row
    ICOORDELT_LIST *master_cells; // cells for page
    master_cells = &row->char_cells;
    for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
      block = block_it.data();
      row_it.set_to_list(block->get_rows());
      for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
        row = row_it.data();
        row_y = row->baseline.y(master_x);
        row_shift = shift_factor * (master_y - row_y);
        plot_row_cells(to_win, ScrollView::GOLDENROD, row, row_shift, master_cells);
      }
    }
  }
#endif
  row->char_cells.clear();
  return false;
}

◆ try_rows_fixed()

bool tesseract::try_rows_fixed	(	TO_BLOCK *	block,
		int32_t	block_index,
		bool	testing_on
	)

Definition at line 515 of file topitch.cpp.

  {
  TO_ROW *row;           // current row
  int32_t row_index;     // row number.
  int32_t def_fixed = 0; // counters
  int32_t def_prop = 0;
  int32_t maybe_fixed = 0;
  int32_t maybe_prop = 0;
  int32_t dunno = 0;
  int32_t corr_fixed = 0;
  int32_t corr_prop = 0;
  float lower, upper; // cluster thresholds
  TO_ROW_IT row_it = block->get_rows();
 
  row_index = 1;
  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
    row = row_it.data();
    ASSERT_HOST(row->xheight > 0);
    if (row->fixed_pitch > 0 && fixed_pitch_row(row, block->block, block_index)) {
      if (row->fixed_pitch == 0) {
        lower = row->pr_nonsp;
        upper = row->pr_space;
        row->space_size = upper;
        row->kern_size = lower;
      }
    }
    row_index++;
  }
  count_block_votes(block, def_fixed, def_prop, maybe_fixed, maybe_prop, corr_fixed, corr_prop,
                    dunno);
  if (testing_on &&
      (textord_debug_pitch_test || textord_blocksall_prop || textord_blocksall_fixed)) {
    tprintf("Initially:");
    print_block_counts(block, block_index);
  }
  if (def_fixed > def_prop * textord_words_veto_power) {
    block->pitch_decision = PITCH_DEF_FIXED;
  } else if (def_prop > def_fixed * textord_words_veto_power) {
    block->pitch_decision = PITCH_DEF_PROP;
  } else if (def_fixed > 0 || def_prop > 0) {
    block->pitch_decision = PITCH_DUNNO;
  } else if (maybe_fixed > maybe_prop * textord_words_veto_power) {
    block->pitch_decision = PITCH_MAYBE_FIXED;
  } else if (maybe_prop > maybe_fixed * textord_words_veto_power) {
    block->pitch_decision = PITCH_MAYBE_PROP;
  } else {
    block->pitch_decision = PITCH_DUNNO;
  }
  return false;
}

◆ tune_row_pitch()

float tesseract::tune_row_pitch	(	TO_ROW *	row,
		STATS *	projection,
		int16_t	projection_left,
		int16_t	projection_right,
		float	space_size,
		float &	initial_pitch,
		float &	best_sp_sd,
		int16_t &	best_mid_cuts,
		ICOORDELT_LIST *	best_cells,
		bool	testing_on
	)

Definition at line 1097 of file topitch.cpp.

  {
  int pitch_delta;           // offset pitch
  int16_t mid_cuts;          // cheap cuts
  float pitch_sd;            // current sd
  float best_sd;             // best result
  float best_pitch;          // pitch for best result
  float initial_sd;          // starting error
  float sp_sd;               // space sd
  ICOORDELT_LIST test_cells; // row cells
  ICOORDELT_IT best_it;      // start of best list
 
  if (textord_fast_pitch_test) {
    return tune_row_pitch2(row, projection, projection_left, projection_right, space_size,
                           initial_pitch, best_sp_sd,
                           // space sd
                           best_mid_cuts, best_cells, testing_on);
  }
  if (textord_disable_pitch_test) {
    best_sp_sd = initial_pitch;
    return initial_pitch;
  }
  initial_sd = compute_pitch_sd(row, projection, projection_left, projection_right, space_size,
                                initial_pitch, best_sp_sd, best_mid_cuts, best_cells, testing_on);
  best_sd = initial_sd;
  best_pitch = initial_pitch;
  if (testing_on) {
    tprintf("tune_row_pitch:start pitch=%g, sd=%g\n", best_pitch, best_sd);
  }
  for (pitch_delta = 1; pitch_delta <= textord_pitch_range; pitch_delta++) {
    pitch_sd =
        compute_pitch_sd(row, projection, projection_left, projection_right, space_size,
                         initial_pitch + pitch_delta, sp_sd, mid_cuts, &test_cells, testing_on);
    if (testing_on) {
      tprintf("testing pitch at %g, sd=%g\n", initial_pitch + pitch_delta, pitch_sd);
    }
    if (pitch_sd < best_sd) {
      best_sd = pitch_sd;
      best_mid_cuts = mid_cuts;
      best_sp_sd = sp_sd;
      best_pitch = initial_pitch + pitch_delta;
      best_cells->clear();
      best_it.set_to_list(best_cells);
      best_it.add_list_after(&test_cells);
    } else {
      test_cells.clear();
    }
    if (pitch_sd > initial_sd) {
      break; // getting worse
    }
  }
  for (pitch_delta = 1; pitch_delta <= textord_pitch_range; pitch_delta++) {
    pitch_sd =
        compute_pitch_sd(row, projection, projection_left, projection_right, space_size,
                         initial_pitch - pitch_delta, sp_sd, mid_cuts, &test_cells, testing_on);
    if (testing_on) {
      tprintf("testing pitch at %g, sd=%g\n", initial_pitch - pitch_delta, pitch_sd);
    }
    if (pitch_sd < best_sd) {
      best_sd = pitch_sd;
      best_mid_cuts = mid_cuts;
      best_sp_sd = sp_sd;
      best_pitch = initial_pitch - pitch_delta;
      best_cells->clear();
      best_it.set_to_list(best_cells);
      best_it.add_list_after(&test_cells);
    } else {
      test_cells.clear();
    }
    if (pitch_sd > initial_sd) {
      break;
    }
  }
  initial_pitch = best_pitch;
 
  if (textord_debug_pitch_metric) {
    print_pitch_sd(row, projection, projection_left, projection_right, space_size, best_pitch);
  }
 
  return best_sd;
}

◆ tune_row_pitch2()

float tesseract::tune_row_pitch2	(	TO_ROW *	row,
		STATS *	projection,
		int16_t	projection_left,
		int16_t	projection_right,
		float	space_size,
		float &	initial_pitch,
		float &	best_sp_sd,
		int16_t &	best_mid_cuts,
		ICOORDELT_LIST *	best_cells,
		bool	testing_on
	)

Definition at line 1196 of file topitch.cpp.

  {
  int pitch_delta;    // offset pitch
  int16_t pixel;      // pixel coord
  int16_t best_pixel; // pixel coord
  int16_t best_delta; // best pitch
  int16_t best_pitch; // best pitch
  int16_t start;      // of good range
  int16_t end;        // of good range
  int32_t best_count; // lowest sum
  float best_sd;      // best result
 
  best_sp_sd = initial_pitch;
 
  best_pitch = static_cast<int>(initial_pitch);
  if (textord_disable_pitch_test || best_pitch <= textord_pitch_range) {
    return initial_pitch;
  }
  std::unique_ptr<STATS[]> sum_proj(new STATS[textord_pitch_range * 2 + 1]); // summed projection
 
  for (pitch_delta = -textord_pitch_range; pitch_delta <= textord_pitch_range; pitch_delta++) {
    sum_proj[textord_pitch_range + pitch_delta].set_range(0, best_pitch + pitch_delta);
  }
  for (pixel = projection_left; pixel <= projection_right; pixel++) {
    for (pitch_delta = -textord_pitch_range; pitch_delta <= textord_pitch_range; pitch_delta++) {
      sum_proj[textord_pitch_range + pitch_delta].add(
          (pixel - projection_left) % (best_pitch + pitch_delta), projection->pile_count(pixel));
    }
  }
  best_count = sum_proj[textord_pitch_range].pile_count(0);
  best_delta = 0;
  best_pixel = 0;
  for (pitch_delta = -textord_pitch_range; pitch_delta <= textord_pitch_range; pitch_delta++) {
    for (pixel = 0; pixel < best_pitch + pitch_delta; pixel++) {
      if (sum_proj[textord_pitch_range + pitch_delta].pile_count(pixel) < best_count) {
        best_count = sum_proj[textord_pitch_range + pitch_delta].pile_count(pixel);
        best_delta = pitch_delta;
        best_pixel = pixel;
      }
    }
  }
  if (testing_on) {
    tprintf("tune_row_pitch:start pitch=%g, best_delta=%d, count=%d\n", initial_pitch, best_delta,
            best_count);
  }
  best_pitch += best_delta;
  initial_pitch = best_pitch;
  best_count++;
  best_count += best_count;
  for (start = best_pixel - 2;
       start > best_pixel - best_pitch &&
       sum_proj[textord_pitch_range + best_delta].pile_count(start % best_pitch) <= best_count;
       start--) {
    ;
  }
  for (end = best_pixel + 2;
       end < best_pixel + best_pitch &&
       sum_proj[textord_pitch_range + best_delta].pile_count(end % best_pitch) <= best_count;
       end++) {
    ;
  }
 
  best_sd = compute_pitch_sd(row, projection, projection_left, projection_right, space_size,
                             initial_pitch, best_sp_sd, best_mid_cuts, best_cells, testing_on,
                             start, end);
  if (testing_on) {
    tprintf("tune_row_pitch:output pitch=%g, sd=%g\n", initial_pitch, best_sd);
  }
 
  if (textord_debug_pitch_metric) {
    print_pitch_sd(row, projection, projection_left, projection_right, space_size, initial_pitch);
  }
 
  return best_sd;
}

◆ tweak_row_baseline()

void tesseract::tweak_row_baseline	(	ROW *	row,
		double	blshift_maxshift,
		double	blshift_xfraction
	)

Definition at line 864 of file tordmain.cpp.

                                                                                     {
  TBOX blob_box;      // bounding box
  C_BLOB *blob;       // current blob
  WERD *word;         // current word
  int32_t blob_count; // no of blobs
  int32_t src_index;  // source segment
  int32_t dest_index; // destination segment
  float ydiff;        // baseline error
  float x_centre;     // centre of blob
                      // words of row
  WERD_IT word_it = row->word_list();
  C_BLOB_IT blob_it; // blob iterator
 
  blob_count = 0;
  for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) {
    word = word_it.data(); // current word
                           // get total blobs
    blob_count += word->cblob_list()->length();
  }
  if (blob_count == 0) {
    return;
  }
  // spline segments
  std::vector<int32_t> xstarts(blob_count + row->baseline.segments + 1);
  // spline coeffs
  std::vector<double> coeffs((blob_count + row->baseline.segments) * 3);
 
  src_index = 0;
  dest_index = 0;
  xstarts[0] = row->baseline.xcoords[0];
  for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) {
    word = word_it.data(); // current word
                           // blobs in word
    blob_it.set_to_list(word->cblob_list());
    for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
      blob = blob_it.data();
      blob_box = blob->bounding_box();
      x_centre = (blob_box.left() + blob_box.right()) / 2.0;
      ydiff = blob_box.bottom() - row->base_line(x_centre);
      if (ydiff < 0) {
        ydiff = -ydiff / row->x_height();
      } else {
        ydiff = ydiff / row->x_height();
      }
      if (ydiff < blshift_maxshift && blob_box.height() / row->x_height() > blshift_xfraction) {
        if (xstarts[dest_index] >= x_centre) {
          xstarts[dest_index] = blob_box.left();
        }
        coeffs[dest_index * 3] = 0;
        coeffs[dest_index * 3 + 1] = 0;
        coeffs[dest_index * 3 + 2] = blob_box.bottom();
        // shift it
        dest_index++;
        xstarts[dest_index] = blob_box.right() + 1;
      } else {
        if (xstarts[dest_index] <= x_centre) {
          while (row->baseline.xcoords[src_index + 1] <= x_centre &&
                 src_index < row->baseline.segments - 1) {
            if (row->baseline.xcoords[src_index + 1] > xstarts[dest_index]) {
              coeffs[dest_index * 3] = row->baseline.quadratics[src_index].a;
              coeffs[dest_index * 3 + 1] = row->baseline.quadratics[src_index].b;
              coeffs[dest_index * 3 + 2] = row->baseline.quadratics[src_index].c;
              dest_index++;
              xstarts[dest_index] = row->baseline.xcoords[src_index + 1];
            }
            src_index++;
          }
          coeffs[dest_index * 3] = row->baseline.quadratics[src_index].a;
          coeffs[dest_index * 3 + 1] = row->baseline.quadratics[src_index].b;
          coeffs[dest_index * 3 + 2] = row->baseline.quadratics[src_index].c;
          dest_index++;
          xstarts[dest_index] = row->baseline.xcoords[src_index + 1];
        }
      }
    }
  }
  while (src_index < row->baseline.segments &&
         row->baseline.xcoords[src_index + 1] <= xstarts[dest_index]) {
    src_index++;
  }
  while (src_index < row->baseline.segments) {
    coeffs[dest_index * 3] = row->baseline.quadratics[src_index].a;
    coeffs[dest_index * 3 + 1] = row->baseline.quadratics[src_index].b;
    coeffs[dest_index * 3 + 2] = row->baseline.quadratics[src_index].c;
    dest_index++;
    src_index++;
    xstarts[dest_index] = row->baseline.xcoords[src_index];
  }
  // turn to spline
  row->baseline = QSPLINE(dest_index, &xstarts[0], &coeffs[0]);
}

◆ UpdateMatchDisplay()

void tesseract::UpdateMatchDisplay ( )

This routine clears the global feature and proto display lists.

Globals:

FeatureShapes display list for features
ProtoShapes display list for protos

Definition at line 413 of file intproto.cpp.

                          {
  if (IntMatchWindow != nullptr) {
    IntMatchWindow->Update();
  }
} /* ClearMatchDisplay */

◆ UpdateRange() [1/2]

template<typename T1 , typename T2 >

void tesseract::UpdateRange	(	const T1 &	x,
		T2 *	lower_bound,
		T2 *	upper_bound
	)

inline

Definition at line 117 of file helpers.h.

                                                                       {
  if (x < *lower_bound) {
    *lower_bound = x;
  }
  if (x > *upper_bound) {
    *upper_bound = x;
  }
}

◆ UpdateRange() [2/2]

template<typename T1 , typename T2 >

void tesseract::UpdateRange	(	const T1 &	x_lo,
		const T1 &	x_hi,
		T2 *	lower_bound,
		T2 *	upper_bound
	)

inline

Definition at line 128 of file helpers.h.

                                                                                          {
  if (x_lo < *lower_bound) {
    *lower_bound = x_lo;
  }
  if (x_hi > *upper_bound) {
    *upper_bound = x_hi;
  }
}

◆ ValidBodyLine()

bool tesseract::ValidBodyLine	(	const std::vector< RowScratchRegisters > *	rows,
		int	row,
		const ParagraphModel *	model
	)

Definition at line 1340 of file paragraphs.cpp.

                                                {
  if (!StrongModel(model)) {
    tprintf("ValidBodyLine() should only be called with strong models!\n");
  }
  return StrongModel(model) && model->ValidBodyLine((*rows)[row].lmargin_, (*rows)[row].lindent_,
                                                    (*rows)[row].rindent_, (*rows)[row].rmargin_);
}

◆ ValidCharDescription()

bool tesseract::ValidCharDescription	(	const FEATURE_DEFS_STRUCT &	FeatureDefs,
		CHAR_DESC_STRUCT *	CharDesc
	)

Definition at line 131 of file featdefs.cpp.

                                                                                              {
  bool anything_written = false;
  bool well_formed = true;
  for (size_t Type = 0; Type < CharDesc->NumFeatureSets; Type++) {
    if (CharDesc->FeatureSets[Type]) {
      for (int i = 0; i < CharDesc->FeatureSets[Type]->NumFeatures; i++) {
        FEATURE feat = CharDesc->FeatureSets[Type]->Features[i];
        for (int p = 0; p < feat->Type->NumParams; p++) {
          if (std::isnan(feat->Params[p]) || std::isinf(feat->Params[p])) {
            well_formed = false;
          } else {
            anything_written = true;
          }
        }
      }
    } else {
      return false;
    }
  }
  return anything_written && well_formed;
} /* ValidCharDescription */

◆ ValidFirstLine()

bool tesseract::ValidFirstLine	(	const std::vector< RowScratchRegisters > *	rows,
		int	row,
		const ParagraphModel *	model
	)

Definition at line 1331 of file paragraphs.cpp.

                                                 {
  if (!StrongModel(model)) {
    tprintf("ValidFirstLine() should only be called with strong models!\n");
  }
  return StrongModel(model) && model->ValidFirstLine((*rows)[row].lmargin_, (*rows)[row].lindent_,
                                                     (*rows)[row].rindent_, (*rows)[row].rmargin_);
}

◆ vertical_cblob_projection()

void tesseract::vertical_cblob_projection	(	C_BLOB *	blob,
		STATS *	stats
	)

Definition at line 871 of file blobbox.cpp.

  {
  // outlines of blob
  C_OUTLINE_IT out_it = blob->out_list();
 
  for (out_it.mark_cycle_pt(); !out_it.cycled_list(); out_it.forward()) {
    vertical_coutline_projection(out_it.data(), stats);
  }
}

◆ vertical_coutline_projection()

void tesseract::vertical_coutline_projection	(	C_OUTLINE *	outline,
		STATS *	stats
	)

Definition at line 890 of file blobbox.cpp.

  {
  ICOORD pos;        // current point
  ICOORD step;       // edge step
  int32_t length;    // of outline
  int16_t stepindex; // current step
  C_OUTLINE_IT out_it = outline->child();
 
  pos = outline->start_pos();
  length = outline->pathlength();
  for (stepindex = 0; stepindex < length; stepindex++) {
    step = outline->step(stepindex);
    if (step.x() > 0) {
      stats->add(pos.x(), -pos.y());
    } else if (step.x() < 0) {
      stats->add(pos.x() - 1, pos.y());
    }
    pos += step;
  }
 
  for (out_it.mark_cycle_pt(); !out_it.cycled_list(); out_it.forward()) {
    vertical_coutline_projection(out_it.data(), stats);
  }
}

◆ vertical_cunderline_projection()

void tesseract::vertical_cunderline_projection	(	C_OUTLINE *	outline,
		QSPLINE *	baseline,
		float	xheight,
		float	baseline_offset,
		STATS *	lower_proj,
		STATS *	middle_proj,
		STATS *	upper_proj
	)

Definition at line 202 of file underlin.cpp.

  {
  ICOORD pos;               // current point
  ICOORD step;              // edge step
  int16_t lower_y, upper_y; // region limits
  int32_t length;           // of outline
  int16_t stepindex;        // current step
  C_OUTLINE_IT out_it = outline->child();
 
  pos = outline->start_pos();
  length = outline->pathlength();
  for (stepindex = 0; stepindex < length; stepindex++) {
    step = outline->step(stepindex);
    if (step.x() > 0) {
      lower_y = static_cast<int16_t>(floor(baseline->y(pos.x()) + baseline_offset + 0.5));
      upper_y = static_cast<int16_t>(floor(baseline->y(pos.x()) + baseline_offset + xheight + 0.5));
      if (pos.y() >= lower_y) {
        lower_proj->add(pos.x(), -lower_y);
        if (pos.y() >= upper_y) {
          middle_proj->add(pos.x(), lower_y - upper_y);
          upper_proj->add(pos.x(), upper_y - pos.y());
        } else {
          middle_proj->add(pos.x(), lower_y - pos.y());
        }
      } else {
        lower_proj->add(pos.x(), -pos.y());
      }
    } else if (step.x() < 0) {
      lower_y = static_cast<int16_t>(floor(baseline->y(pos.x() - 1) + baseline_offset + 0.5));
      upper_y =
          static_cast<int16_t>(floor(baseline->y(pos.x() - 1) + baseline_offset + xheight + 0.5));
      if (pos.y() >= lower_y) {
        lower_proj->add(pos.x() - 1, lower_y);
        if (pos.y() >= upper_y) {
          middle_proj->add(pos.x() - 1, upper_y - lower_y);
          upper_proj->add(pos.x() - 1, pos.y() - upper_y);
        } else {
          middle_proj->add(pos.x() - 1, pos.y() - lower_y);
        }
      } else {
        lower_proj->add(pos.x() - 1, pos.y());
      }
    }
    pos += step;
  }
 
  for (out_it.mark_cycle_pt(); !out_it.cycled_list(); out_it.forward()) {
    vertical_cunderline_projection(out_it.data(), baseline, xheight, baseline_offset, lower_proj,
                                   middle_proj, upper_proj);
  }
}

◆ vertical_torow_projection()

int16_t tesseract::vertical_torow_projection	(	TO_ROW *	row,
		STATS *	projection
	)

◆ vigorous_noise_removal()

void tesseract::vigorous_noise_removal ( TO_BLOCK * block )

Definition at line 508 of file makerow.cpp.

                                             {
  TO_ROW_IT row_it = block->get_rows();
  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
    TO_ROW *row = row_it.data();
    BLOBNBOX_IT b_it = row->blob_list();
    // Estimate the xheight on the row.
    int max_height = 0;
    for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
      BLOBNBOX *blob = b_it.data();
      if (blob->bounding_box().height() > max_height) {
        max_height = blob->bounding_box().height();
      }
    }
    STATS hstats(0, max_height);
    for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
      BLOBNBOX *blob = b_it.data();
      int height = blob->bounding_box().height();
      if (height >= kMinSize) {
        hstats.add(blob->bounding_box().height(), 1);
      }
    }
    float xheight = hstats.median();
    // Delete small objects.
    BLOBNBOX *prev = nullptr;
    for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
      BLOBNBOX *blob = b_it.data();
      const TBOX &box = blob->bounding_box();
      if (box.height() < kNoiseSize * xheight) {
        // Small so delete unless it looks like an i dot.
        if (prev != nullptr) {
          if (dot_of_i(blob, prev, row)) {
            continue; // Looks OK.
          }
        }
        if (!b_it.at_last()) {
          BLOBNBOX *next = b_it.data_relative(1);
          if (dot_of_i(blob, next, row)) {
            continue; // Looks OK.
          }
        }
        // It might be noise so get rid of it.
        delete blob->remove_cblob();
        delete b_it.extract();
      } else {
        prev = blob;
      }
    }
  }
}

◆ Walk()

void tesseract::Walk	(	KDTREE *	tree,
		kdwalk_proc	action,
		ClusteringContext *	context,
		KDNODE *	sub_tree,
		int32_t	level
	)

Walk a tree, calling action once on each node.

Operation: This routine walks through the specified sub_tree and invokes action action at each node as follows: action(context, data, level) data the data contents of the node being visited, level is the level of the node in the tree with the root being level 0.

Parameters

tree	root of the tree being walked.
action	action to be performed at every node
context	action's context
sub_tree	ptr to root of subtree to be walked
level	current level in the tree for this node

Definition at line 466 of file kdtree.cpp.

                                                                                                         {
  (*action)(context, sub_tree->Data, level);
  if (sub_tree->Left != nullptr) {
    Walk(tree, action, context, sub_tree->Left, NextLevel(tree, level));
  }
  if (sub_tree->Right != nullptr) {
    Walk(tree, action, context, sub_tree->Right, NextLevel(tree, level));
  }
}

◆ within_error_margin()

bool tesseract::within_error_margin	(	float	test,
		float	num,
		float	margin
	)

inline

Definition at line 102 of file makerow.h.

                                                                     {
  return (test >= num * (1 - margin) && test <= num * (1 + margin));
}

◆ word_blob_quality()

int16_t tesseract::word_blob_quality ( WERD_RES * word )

◆ word_comparator()

int tesseract::word_comparator	(	const void *	word1p,
		const void *	word2p
	)

word_comparator()

word comparator used to sort a word list so that words are in increasing order of left edge.

Definition at line 377 of file werd.cpp.

                                                            {
  const WERD *word1 = *reinterpret_cast<const WERD *const *>(word1p);
  const WERD *word2 = *reinterpret_cast<const WERD *const *>(word2p);
  return word1->bounding_box().left() - word2->bounding_box().left();
}

◆ word_contains_non_1_digit()

bool tesseract::word_contains_non_1_digit	(	const char *	word,
		const char *	word_lengths
	)

◆ write_info()

bool tesseract::write_info	(	FILE *	f,
		const FontInfo &	fi
	)

Definition at line 157 of file fontinfo.cpp.

                                             {
  int32_t size = strlen(fi.name);
  return tesseract::Serialize(f, &size) && tesseract::Serialize(f, &fi.name[0], size) &&
         tesseract::Serialize(f, &fi.properties);
}

◆ write_set()

bool tesseract::write_set	(	FILE *	f,
		const FontSet &	fs
	)

Definition at line 222 of file fontinfo.cpp.

                                           {
  int size = fs.size();
  return tesseract::Serialize(f, &size) &&
         (size > 0 ? tesseract::Serialize(f, &fs[0], size) : true);
}

◆ write_spacing_info()

bool tesseract::write_spacing_info	(	FILE *	f,
		const FontInfo &	fi
	)

Definition at line 194 of file fontinfo.cpp.

                                                     {
  int32_t vec_size = (fi.spacing_vec == nullptr) ? 0 : fi.spacing_vec->size();
  if (!tesseract::Serialize(f, &vec_size)) {
    return false;
  }
  int16_t x_gap_invalid = -1;
  for (int i = 0; i < vec_size; ++i) {
    FontSpacingInfo *fs = fi.spacing_vec->at(i);
    int32_t kern_size = (fs == nullptr) ? -1 : fs->kerned_x_gaps.size();
    if (fs == nullptr) {
      // Writing two invalid x-gaps.
      if (!tesseract::Serialize(f, &x_gap_invalid, 2) || !tesseract::Serialize(f, &kern_size)) {
        return false;
      }
    } else {
      if (!tesseract::Serialize(f, &fs->x_gap_before) ||
          !tesseract::Serialize(f, &fs->x_gap_after) || !tesseract::Serialize(f, &kern_size)) {
        return false;
      }
    }
    if (kern_size > 0 &&
        (!Serialize(f, fs->kerned_unichar_ids) || !Serialize(f, fs->kerned_x_gaps))) {
      return false;
    }
  }
  return true;
}

◆ WriteAdaptedClass()

void tesseract::WriteAdaptedClass	(	FILE *	File,
		ADAPT_CLASS_STRUCT *	Class,
		int	NumConfigs
	)

This routine writes a binary representation of Class to File.

Parameters

File	open file to write Class to
Class	adapted class to write to File
NumConfigs	number of configs in Class

Note: Globals: none

Definition at line 307 of file adaptive.cpp.

                                                                              {
  /* first write high level adapted class structure */
  fwrite(Class, sizeof(ADAPT_CLASS_STRUCT), 1, File);
 
  /* then write out the definitions of the permanent protos and configs */
  fwrite(Class->PermProtos, sizeof(uint32_t), WordsInVectorOfSize(MAX_NUM_PROTOS), File);
  fwrite(Class->PermConfigs, sizeof(uint32_t), WordsInVectorOfSize(MAX_NUM_CONFIGS), File);
 
  /* then write out the list of temporary protos */
  uint32_t NumTempProtos = Class->TempProtos->size();
  fwrite(&NumTempProtos, sizeof(NumTempProtos), 1, File);
  auto TempProtos = Class->TempProtos;
  iterate(TempProtos) {
    void *proto = TempProtos->node;
    fwrite(proto, sizeof(TEMP_PROTO_STRUCT), 1, File);
  }
 
  /* then write out the adapted configs */
  fwrite(&NumConfigs, sizeof(int), 1, File);
  for (int i = 0; i < NumConfigs; i++) {
    if (test_bit(Class->PermConfigs, i)) {
      WritePermConfig(File, Class->Config[i].Perm);
    } else {
      WriteTempConfig(File, Class->Config[i].Temp);
    }
  }
 
} /* WriteAdaptedClass */

◆ WriteCharDescription()

void tesseract::WriteCharDescription	(	const FEATURE_DEFS_STRUCT &	FeatureDefs,
		CHAR_DESC_STRUCT *	CharDesc,
		std::string &	str
	)

Appends a textual representation of CharDesc to str. The format used is to write out the number of feature sets which will be written followed by a representation of each feature set.

Each set starts with the short name for that feature followed by a description of the feature set. Feature sets which are not present are not written.

Parameters

FeatureDefs	definitions of feature types/extractors
str	string to append CharDesc to
CharDesc	character description to write to File

Definition at line 109 of file featdefs.cpp.

                                                                                                              {
  int NumSetsToWrite = 0;
 
  for (size_t Type = 0; Type < CharDesc->NumFeatureSets; Type++) {
    if (CharDesc->FeatureSets[Type]) {
      NumSetsToWrite++;
    }
  }
 
  str += " " + std::to_string(NumSetsToWrite);
  str += "\n";
  for (size_t Type = 0; Type < CharDesc->NumFeatureSets; Type++) {
    if (CharDesc->FeatureSets[Type]) {
      str += FeatureDefs.FeatureDesc[Type]->ShortName;
      str += " ";
      WriteFeatureSet(CharDesc->FeatureSets[Type], str);
    }
  }
} /* WriteCharDescription */

◆ WriteFeatureSet()

void tesseract::WriteFeatureSet	(	FEATURE_SET	FeatureSet,
		std::string &	str
	)

Write a textual representation of FeatureSet to File. This representation is an integer specifying the number of features in the set, followed by a newline, followed by text representations for each feature in the set.

Parameters

FeatureSet	feature set to write to File
str	string to write Feature to

Definition at line 129 of file ocrfeatures.cpp.

                                                             {
  if (FeatureSet) {
    str += "" + std::to_string(FeatureSet->NumFeatures);
    str += "\n";
    for (int i = 0; i < FeatureSet->NumFeatures; i++) {
      WriteFeature(FeatureSet->Features[i], str);
    }
  }
} /* WriteFeatureSet */

◆ WriteFile()

TESS_UNICHARSET_TRAINING_API bool tesseract::WriteFile	(	const std::string &	output_dir,
		const std::string &	lang,
		const std::string &	suffix,
		const std::vector< char > &	data,
		FileWriter	writer
	)

Definition at line 40 of file lang_model_helpers.cpp.

                                                               {
  if (lang.empty()) {
    return true;
  }
  std::string dirname = output_dir + "/" + lang;
  // Attempt to make the directory, but ignore errors, as it may not be a
  // standard filesystem, and the writer will complain if not successful.
#if defined(_WIN32)
  _mkdir(dirname.c_str());
#else
  mkdir(dirname.c_str(), S_IRWXU | S_IRWXG);
#endif
  std::string filename = dirname + "/" + lang + suffix;
  if (writer == nullptr) {
    return SaveDataToFile(data, filename.c_str());
  } else {
    return (*writer)(data, filename.c_str());
  }
}

◆ WriteParamDesc()

TESS_API void tesseract::WriteParamDesc	(	FILE *	File,
		uint16_t	N,
		const PARAM_DESC	ParamDesc[]
	)

This routine writes an array of dimension descriptors to the specified text file.

Parameters

File	open text file to write param descriptors to
N	number of param descriptors to write
ParamDesc	array of param descriptors to write

Definition at line 244 of file clusttool.cpp.

                                                                          {
  int i;
 
  for (i = 0; i < N; i++) {
    if (ParamDesc[i].Circular) {
      fprintf(File, "circular ");
    } else {
      fprintf(File, "linear   ");
    }
 
    if (ParamDesc[i].NonEssential) {
      fprintf(File, "non-essential ");
    } else {
      fprintf(File, "essential     ");
    }
 
    fprintf(File, "%10.6f %10.6f\n", ParamDesc[i].Min, ParamDesc[i].Max);
  }
}

◆ WritePermConfig()

void tesseract::WritePermConfig	(	FILE *	File,
		PERM_CONFIG_STRUCT *	Config
	)

This routine writes a binary representation of a permanent configuration to File.

Parameters

File	open file to write Config to
Config	permanent config to write to File

Note: Globals: none

Definition at line 368 of file adaptive.cpp.

                                                             {
  uint8_t NumAmbigs = 0;
 
  assert(Config != nullptr);
  while (Config->Ambigs[NumAmbigs] > 0) {
    ++NumAmbigs;
  }
 
  fwrite(&NumAmbigs, sizeof(uint8_t), 1, File);
  fwrite(Config->Ambigs, sizeof(UNICHAR_ID), NumAmbigs, File);
  fwrite(&(Config->FontinfoId), sizeof(int), 1, File);
} /* WritePermConfig */

◆ WritePrototype()

TESS_API void tesseract::WritePrototype	(	FILE *	File,
		uint16_t	N,
		PROTOTYPE *	Proto
	)

This routine writes a textual description of a prototype to the specified text file.

Parameters

File	open text file to write prototype to
N	number of dimensions in feature space
Proto	prototype to write out

Definition at line 271 of file clusttool.cpp.

                                                              {
  int i;
 
  if (Proto->Significant) {
    fprintf(File, "significant   ");
  } else {
    fprintf(File, "insignificant ");
  }
  WriteProtoStyle(File, static_cast<PROTOSTYLE>(Proto->Style));
  fprintf(File, "%6d\n\t", Proto->NumSamples);
  WriteNFloats(File, N, &Proto->Mean[0]);
  fprintf(File, "\t");
 
  switch (Proto->Style) {
    case spherical:
      WriteNFloats(File, 1, &(Proto->Variance.Spherical));
      break;
    case elliptical:
      WriteNFloats(File, N, Proto->Variance.Elliptical);
      break;
    case mixed:
      for (i = 0; i < N; i++) {
        switch (Proto->Distrib[i]) {
          case normal:
            fprintf(File, " %9s", "normal");
            break;
          case uniform:
            fprintf(File, " %9s", "uniform");
            break;
          case D_random:
            fprintf(File, " %9s", "random");
            break;
          case DISTRIBUTION_COUNT:
            ASSERT_HOST(!"Distribution count not allowed!");
        }
      }
      fprintf(File, "\n\t");
      WriteNFloats(File, N, Proto->Variance.Elliptical);
  }
}

◆ WriteRecoder()

bool tesseract::WriteRecoder	(	const UNICHARSET &	unicharset,
		bool	pass_through,
		const std::string &	output_dir,
		const std::string &	lang,
		FileWriter	writer,
		std::string *	radical_table_data,
		TessdataManager *	traineddata
	)

Definition at line 97 of file lang_model_helpers.cpp.

                                                {
  UnicharCompress recoder;
  // Where the unicharset is carefully setup already to contain a good
  // compact encoding, use a pass-through recoder that does nothing.
  // For scripts that have a large number of unicodes (Han, Hangul) we want
  // to use the recoder to compress the symbol space by re-encoding each
  // unicode as multiple codes from a smaller 'alphabet' that are related to the
  // shapes in the character. Hangul Jamo is a perfect example of this.
  // See the Hangul Syllables section, sub-section "Equivalence" in:
  // http://www.unicode.org/versions/Unicode10.0.0/ch18.pdf
  if (pass_through) {
    recoder.SetupPassThrough(unicharset);
  } else {
    int null_char = unicharset.has_special_codes() ? UNICHAR_BROKEN : unicharset.size();
    tprintf("Null char=%d\n", null_char);
    if (!recoder.ComputeEncoding(unicharset, null_char, radical_table_data)) {
      tprintf("Creation of encoded unicharset failed!!\n");
      return false;
    }
  }
  TFile fp;
  std::vector<char> recoder_data;
  fp.OpenWrite(&recoder_data);
  if (!recoder.Serialize(&fp)) {
    return false;
  }
  traineddata->OverwriteEntry(TESSDATA_LSTM_RECODER, &recoder_data[0], recoder_data.size());
  std::string encoding = recoder.GetEncodingAsString(unicharset);
  recoder_data.resize(encoding.length(), 0);
  memcpy(&recoder_data[0], &encoding[0], encoding.length());
  std::string suffix;
  suffix += ".charset_size=" + std::to_string(recoder.code_range());
  suffix += ".txt";
  return WriteFile(output_dir, lang, suffix.c_str(), recoder_data, writer);
}

◆ WriteShapeTable()

TESS_COMMON_TRAINING_API void tesseract::WriteShapeTable	(	const std::string &	file_prefix,
		const ShapeTable &	shape_table
	)

Definition at line 170 of file commontraining.cpp.

                                                                                  {
  std::string shape_table_file = file_prefix;
  shape_table_file += kShapeTableFileSuffix;
  FILE *fp = fopen(shape_table_file.c_str(), "wb");
  if (fp != nullptr) {
    if (!shape_table.Serialize(fp)) {
      fprintf(stderr, "Error writing shape table: %s\n", shape_table_file.c_str());
    }
    fclose(fp);
  } else {
    fprintf(stderr, "Error creating shape table: %s\n", shape_table_file.c_str());
  }
}

◆ WriteTempConfig()

void tesseract::WriteTempConfig	(	FILE *	File,
		TEMP_CONFIG_STRUCT *	Config
	)

This routine writes a binary representation of a temporary configuration to File.

Parameters

File	open file to write Config to
Config	temporary config to write to File

Note: Globals: none

Definition at line 391 of file adaptive.cpp.

                                                             {
  assert(Config != nullptr);
 
  fwrite(Config, sizeof(TEMP_CONFIG_STRUCT), 1, File);
  fwrite(Config->Protos, sizeof(uint32_t), Config->ProtoVectorSize, File);
 
} /* WriteTempConfig */

◆ WriteTrainingSamples()

void tesseract::WriteTrainingSamples	(	const tesseract::FEATURE_DEFS_STRUCT &	FeatureDefs,
		char *	Directory,
		tesseract::LIST	CharList,
		const char *	program_feature_type
	)

◆ WriteUnicharset()

bool tesseract::WriteUnicharset	(	const UNICHARSET &	unicharset,
		const std::string &	output_dir,
		const std::string &	lang,
		FileWriter	writer,
		TessdataManager *	traineddata
	)

Definition at line 82 of file lang_model_helpers.cpp.

                                                                                             {
  std::vector<char> unicharset_data;
  TFile fp;
  fp.OpenWrite(&unicharset_data);
  if (!unicharset.save_to_file(&fp)) {
    return false;
  }
  traineddata->OverwriteEntry(TESSDATA_LSTM_UNICHARSET, &unicharset_data[0],
                              unicharset_data.size());
  return WriteFile(output_dir, lang, ".unicharset", unicharset_data, writer);
}

◆ ZeroVector()

template<typename T >

void tesseract::ZeroVector	(	unsigned	n,
		T *	vec
	)

inline

Definition at line 245 of file functions.h.

                                           {
  memset(vec, 0, n * sizeof(*vec));
}

Variable Documentation

◆ _TFNetworkModel_default_instance_

TFNetworkModelDefaultTypeInternal tesseract::_TFNetworkModel_default_instance_

Definition at line 52 of file tfnetwork.pb.h.

◆ blob_window

ScrollView * tesseract::blob_window = nullptr

Definition at line 36 of file render.cpp.

◆ case_state_table

const int tesseract::case_state_table[6][4]

Initial value:

= {
    {
     
     
     0, 1, 5, 4},
    {
     0, 3, 2, 4},
    {
     0, -1, 2, -1},
    {
     0, 3, -1, 4},
    {
     0, -1, -1, 4},
    {
     5, -1, 2, -1},
}

Definition at line 28 of file context.cpp.

◆ CharNormDesc

const FEATURE_DESC_STRUCT tesseract::CharNormDesc

extern

◆ classify_max_slope

double tesseract::classify_max_slope = 2.414213562

"Slope above which lines are called vertical"

Definition at line 31 of file mfx.cpp.

◆ classify_min_slope

double tesseract::classify_min_slope = 0.414213562

"Slope below which lines are called horizontal"

Definition at line 30 of file mfx.cpp.

◆ classify_norm_adj_curl

double tesseract::classify_norm_adj_curl = 2.0

"Norm adjust curl ..."

Definition at line 74 of file normmatch.cpp.

◆ classify_norm_adj_midpoint

double tesseract::classify_norm_adj_midpoint = 32.0

control knobs used to control the normalization adjustment process "Norm adjust midpoint ..."

Definition at line 73 of file normmatch.cpp.

◆ classify_pico_feature_length

double tesseract::classify_pico_feature_length = 0.05

"Pico Feature Length"

Definition at line 37 of file picofeat.cpp.

◆ color_list

ScrollView::Color tesseract::color_list

Initial value:

= {ScrollView::RED, ScrollView::CYAN, ScrollView::YELLOW,

ScrollView::BLUE, ScrollView::GREEN, ScrollView::WHITE}

Definition at line 38 of file render.cpp.

◆ Config

TESS_COMMON_TRAINING_API CLUSTERCONFIG tesseract::Config = {elliptical, 0.625, 0.05, 1.0, 1e-6, 0}

Definition at line 89 of file commontraining.cpp.

◆ current_block_list

BLOCK_LIST* tesseract::current_block_list

extern

◆ devanagari_split_debugimage

bool tesseract::devanagari_split_debugimage = 0

"Whether to create a debug image for split shiro-rekha process."

Definition at line 39 of file devanagari_processing.cpp.

◆ devanagari_split_debuglevel

int tesseract::devanagari_split_debuglevel = 0

"Debug level for split shiro-rekha process."

Definition at line 36 of file devanagari_processing.cpp.

◆ DotProduct

DotProductFunction tesseract::DotProduct

Definition at line 80 of file simddetect.cpp.

◆ edge_window

ScrollView * tesseract::edge_window = nullptr

Definition at line 37 of file plotedges.cpp.

◆ editor_image_blob_bb_color

int tesseract::editor_image_blob_bb_color = ScrollView::YELLOW

"Blob bounding box colour"

Definition at line 127 of file pgedit.cpp.

◆ editor_image_win_name

char* tesseract::editor_image_win_name = "EditorImage"

"Editor image window name"

Definition at line 122 of file pgedit.cpp.

◆ editor_image_word_bb_color

int tesseract::editor_image_word_bb_color = ScrollView::BLUE

"Word bounding box colour"

Definition at line 126 of file pgedit.cpp.

◆ editor_image_xpos

int tesseract::editor_image_xpos = 590

"Editor image X Pos"

Definition at line 123 of file pgedit.cpp.

◆ editor_image_ypos

int tesseract::editor_image_ypos = 10

"Editor image Y Pos"

Definition at line 124 of file pgedit.cpp.

◆ editor_word_height

int tesseract::editor_word_height = 240

"Word window height"

Definition at line 132 of file pgedit.cpp.

◆ editor_word_name

char* tesseract::editor_word_name = "BlnWords"

"BL normalized word window"

Definition at line 129 of file pgedit.cpp.

◆ editor_word_width

int tesseract::editor_word_width = 655

"Word window width"

Definition at line 133 of file pgedit.cpp.

◆ editor_word_xpos

int tesseract::editor_word_xpos = 60

"Word window X Pos"

Definition at line 130 of file pgedit.cpp.

◆ editor_word_ypos

int tesseract::editor_word_ypos = 510

"Word window Y Pos"

Definition at line 131 of file pgedit.cpp.

◆ feature_defs

TESS_COMMON_TRAINING_API FEATURE_DEFS_STRUCT tesseract::feature_defs

Definition at line 90 of file commontraining.cpp.

◆ FTable

const double tesseract::FTable[FTABLE_Y][FTABLE_X]

Definition at line 41 of file cluster.cpp.

◆ fx_win

ScrollView * tesseract::fx_win = nullptr

Definition at line 42 of file drawfx.cpp.

◆ gapmap_big_gaps

double tesseract::gapmap_big_gaps = 1.75

"xht multiplier"

Definition at line 20 of file gap_map.cpp.

◆ gapmap_debug

bool tesseract::gapmap_debug = false

"Say which blocks have tables"

Definition at line 17 of file gap_map.cpp.

◆ gapmap_no_isolated_quanta

bool tesseract::gapmap_no_isolated_quanta = false

"Ensure gaps not less than 2quanta wide"

Definition at line 19 of file gap_map.cpp.

◆ gapmap_use_ends

bool tesseract::gapmap_use_ends = false

"Use large space at start and end of rows"

Definition at line 18 of file gap_map.cpp.

◆ GeoFeatDesc

const FEATURE_DESC_STRUCT tesseract::GeoFeatDesc

extern

◆ IntFeatDesc

const FEATURE_DESC_STRUCT tesseract::IntFeatDesc

extern

◆ kAdamCorrectionIterations

const int tesseract::kAdamCorrectionIterations = 200000

Definition at line 35 of file weightmatrix.cpp.

◆ kAdamEpsilon

const TFloat tesseract::kAdamEpsilon = 1e-8

Definition at line 37 of file weightmatrix.cpp.

◆ kAdamFlag

const int tesseract::kAdamFlag = 4

Definition at line 231 of file weightmatrix.cpp.

◆ kAdjacentLeaderSearchPadding

const int tesseract::kAdjacentLeaderSearchPadding = 2

Definition at line 117 of file tablefind.cpp.

◆ kAlignedFraction

const double tesseract::kAlignedFraction = 0.03125

Definition at line 46 of file alignedblob.cpp.

◆ kAlignedGapFraction

const double tesseract::kAlignedGapFraction = 0.75

Definition at line 50 of file alignedblob.cpp.

◆ kAllowBlobArea

const double tesseract::kAllowBlobArea = 0.05

Definition at line 58 of file tablefind.cpp.

◆ kAllowBlobHeight

const double tesseract::kAllowBlobHeight = 0.3

Definition at line 56 of file tablefind.cpp.

◆ kAllowBlobWidth

const double tesseract::kAllowBlobWidth = 0.4

Definition at line 57 of file tablefind.cpp.

◆ kAllowTextArea

const double tesseract::kAllowTextArea = 0.8

Definition at line 51 of file tablefind.cpp.

◆ kAllowTextHeight

const double tesseract::kAllowTextHeight = 0.5

Definition at line 49 of file tablefind.cpp.

◆ kAllowTextWidth

const double tesseract::kAllowTextWidth = 0.6

Definition at line 50 of file tablefind.cpp.

◆ kArabicText

const char tesseract::kArabicText = "والفكر والصراع 1234,\nوالفكر والصراع"

Definition at line 40 of file pango_font_info_test.cc.

◆ kBadlyFormedHinWords

const char * tesseract::kBadlyFormedHinWords = {"उपयोक्ताो", "नहीें", "प्रंात", "कहीअे", "पत्रिाका", "छह्णाीस"}

Definition at line 85 of file normstrngs_test.cc.

◆ kBadlyFormedThaiWords

const char* tesseract::kBadlyFormedThaiWords[] = {"ฤิ", "กา้ํ", "กิำ", "นำ้", "เเก"}

Definition at line 87 of file normstrngs_test.cc.

◆ kBadPriority

const double tesseract::kBadPriority = 999.0

Definition at line 39 of file split.cpp.

◆ kBatchIterations

const int tesseract::kBatchIterations = 100

Definition at line 36 of file lstm_test.h.

◆ kBestCheckpointFraction

const double tesseract::kBestCheckpointFraction = 31.0 / 32.0

Definition at line 70 of file lstmtrainer.cpp.

◆ kBigPartSizeRatio

const double tesseract::kBigPartSizeRatio = 1.75

Definition at line 47 of file colpartitiongrid.cpp.

◆ kBlameAdaption

const char tesseract::kBlameAdaption[] = "adapt"

Definition at line 46 of file blamer.cpp.

◆ kBlameChopper

const char tesseract::kBlameChopper[] = "chop"

Definition at line 40 of file blamer.cpp.

◆ kBlameClassifier

const char tesseract::kBlameClassifier[] = "cl"

Definition at line 39 of file blamer.cpp.

◆ kBlameClassLMTradeoff

const char tesseract::kBlameClassLMTradeoff[] = "cl/LM"

Definition at line 41 of file blamer.cpp.

◆ kBlameClassOldLMTradeoff

const char tesseract::kBlameClassOldLMTradeoff[] = "cl/old_LM"

Definition at line 45 of file blamer.cpp.

◆ kBlameCorrect

const char tesseract::kBlameCorrect[] = "corr"

Definition at line 38 of file blamer.cpp.

◆ kBlameNoTruth

const char tesseract::kBlameNoTruth[] = "no_tr"

Definition at line 48 of file blamer.cpp.

◆ kBlameNoTruthSplit

const char tesseract::kBlameNoTruthSplit[] = "no_tr_spl"

Definition at line 47 of file blamer.cpp.

◆ kBlamePageLayout

const char tesseract::kBlamePageLayout[] = "pglt"

Definition at line 42 of file blamer.cpp.

◆ kBlameSegsearchHeur

const char tesseract::kBlameSegsearchHeur[] = "ss_heur"

Definition at line 43 of file blamer.cpp.

◆ kBlameSegsearchPP

const char tesseract::kBlameSegsearchPP[] = "ss_pp"

Definition at line 44 of file blamer.cpp.

◆ kBlameUnknown

const char tesseract::kBlameUnknown[] = "unkn"

Definition at line 49 of file blamer.cpp.

◆ kBlnBaselineOffset

const int tesseract::kBlnBaselineOffset = 64

Definition at line 34 of file normalis.h.

◆ kBlnCellHeight

const int tesseract::kBlnCellHeight = 256

Definition at line 32 of file normalis.h.

◆ kBlnXHeight

const int tesseract::kBlnXHeight = 128

Definition at line 33 of file normalis.h.

◆ kBlocks8087_054

const PolyBlockType tesseract::kBlocks8087_054[]

Initial value:

= {PT_HEADING_TEXT, PT_FLOWING_TEXT, PT_PULLOUT_IMAGE,

PT_CAPTION_TEXT, PT_FLOWING_TEXT}

Definition at line 51 of file layout_test.cc.

◆ kBoxClipTolerance

const int tesseract::kBoxClipTolerance = 2

Definition at line 31 of file boxword.cpp.

◆ kBoxReadBufSize

const int tesseract::kBoxReadBufSize = 1024

Definition at line 33 of file boxread.h.

◆ kBrokenCJKIterationFraction

const double tesseract::kBrokenCJKIterationFraction = 0.125

Definition at line 72 of file strokewidth.cpp.

◆ kBytesPer64BitNumber

const int tesseract::kBytesPer64BitNumber = 20

Max bytes in the decimal representation of int64_t.

Definition at line 1537 of file baseapi.cpp.

◆ kBytesPerBoxFileLine

const int tesseract::kBytesPerBoxFileLine = (kBytesPerNumber + 1) * kNumbersPerBlob + 1

Multiplier for max expected textlength assumes (kBytesPerNumber + space)

kNumbersPerBlob plus the newline. Add to this the original UTF8 characters, and one kMaxBytesPerLine for safety.

Definition at line 1535 of file baseapi.cpp.

◆ kBytesPerNumber

const int tesseract::kBytesPerNumber = 5

The number of bytes taken by each number. Since we use int16_t for ICOORD, assume only 5 digits max.

Definition at line 1529 of file baseapi.cpp.

◆ kCellSplitColumnThreshold

const int tesseract::kCellSplitColumnThreshold = 0

Definition at line 41 of file tablerecog.cpp.

◆ kCellSplitRowThreshold

const int tesseract::kCellSplitRowThreshold = 0

Definition at line 40 of file tablerecog.cpp.

◆ kCenter

const ParagraphJustification tesseract::kCenter = JUSTIFICATION_CENTER

Definition at line 27 of file paragraphs_test.cc.

◆ kCenterGradeCap

const int tesseract::kCenterGradeCap = 25

Definition at line 37 of file split.cpp.

◆ kCertaintyScale

const float tesseract::kCertaintyScale = 7.0f

Definition at line 33 of file linerec.cpp.

◆ kCertOffset

const double tesseract::kCertOffset = -0.085

Definition at line 48 of file lstmrecognizer.cpp.

◆ kCJKAspectRatio

const double tesseract::kCJKAspectRatio = 1.25

Definition at line 66 of file strokewidth.cpp.

◆ kCJKAspectRatioIncrease

const double tesseract::kCJKAspectRatioIncrease = 1.0625

Definition at line 68 of file strokewidth.cpp.

◆ kCJKBrokenDistanceFraction

const double tesseract::kCJKBrokenDistanceFraction = 0.25

Definition at line 62 of file strokewidth.cpp.

◆ kCJKMaxComponents

const int tesseract::kCJKMaxComponents = 8

Definition at line 64 of file strokewidth.cpp.

◆ kCJKRadius

const int tesseract::kCJKRadius = 2

Definition at line 60 of file strokewidth.cpp.

◆ kCNFeatureType

TESS_API const char *const tesseract::kCNFeatureType = "cn"

Definition at line 34 of file featdefs.cpp.

◆ kColumnWidthFactor

const int tesseract::kColumnWidthFactor = 20

Pixel resolution of column width estimates.

Definition at line 41 of file tabfind.h.

◆ kComplexPage1

const TextAndModel tesseract::kComplexPage1[]

Definition at line 368 of file paragraphs_test.cc.

◆ kComplexPage2

const TextAndModel tesseract::kComplexPage2[]

Definition at line 413 of file paragraphs_test.cc.

◆ kComplexShapePerimeterRatio

const double tesseract::kComplexShapePerimeterRatio = 1.5

Definition at line 48 of file blobbox.cpp.

◆ kCosMaxSkewAngle

const double tesseract::kCosMaxSkewAngle = 0.866025

Definition at line 60 of file tabfind.cpp.

◆ kCosSmallAngle

const double tesseract::kCosSmallAngle = 0.866

Definition at line 44 of file blobbox.cpp.

◆ kCrackSpacing

const int tesseract::kCrackSpacing = 100

Spacing of cracks across the page to break up tall vertical lines.

Definition at line 43 of file linefind.cpp.

◆ kCrownedParagraph

const TextAndModel tesseract::kCrownedParagraph[]

Initial value:

= {
    {"The first paragraph on a page is", PSTART, PModel(kLeft, 0, 20, 0, 0), true, false},
    {"often not indented as the rest  ", PCONT, PModel(), false, false},
    {"of the paragraphs are.  Nonethe-", PCONT, PModel(), false, false},
    {"less it should be counted as the", PCONT, PModel(), false, false},
    {"same type of paragraph.         ", PCONT, PModel(), false, false},
    {"  The second and third para-    ", PSTART, PModel(kLeft, 0, 20, 0, 0), false, false},
    {"graphs are both indented two    ", PCONT, PModel(), false, false},
    {"spaces.                         ", PCONT, PModel(), false, false},
    {"  The first paragraph has what  ", PSTART, PModel(kLeft, 0, 20, 0, 0), false, false},
    {"fmt refers to as a 'crown.'     ", PCONT, PModel(), false, false},
}

Definition at line 262 of file paragraphs_test.cc.

◆ kCrownLeft

const ParagraphModel * tesseract::kCrownLeft

Initial value:

=

reinterpret_cast<ParagraphModel *>(static_cast<uintptr_t>(0xDEAD111F))

Definition at line 56 of file paragraphs.cpp.

◆ kCrownRight

const ParagraphModel * tesseract::kCrownRight

Initial value:

=

reinterpret_cast<ParagraphModel *>(static_cast<uintptr_t>(0xDEAD888F))

Definition at line 58 of file paragraphs.cpp.

◆ kDefaultResolution

const int tesseract::kDefaultResolution = 300

Definition at line 58 of file pango_font_info.cpp.

◆ kDefiniteAspectRatio

const double tesseract::kDefiniteAspectRatio = 2.0

Definition at line 46 of file blobbox.cpp.

◆ kDiacriticXPadRatio

const double tesseract::kDiacriticXPadRatio = 7.0

Definition at line 75 of file strokewidth.cpp.

◆ kDiacriticYPadRatio

const double tesseract::kDiacriticYPadRatio = 1.75

Definition at line 78 of file strokewidth.cpp.

◆ kDictRatio

const double tesseract::kDictRatio = 2.25

Definition at line 46 of file lstmrecognizer.cpp.

◆ kDivisibleVerticalItalic

const TPOINT tesseract::kDivisibleVerticalItalic(1, 5)	(	1	,
		5
	)

◆ kDivisibleVerticalUpright

const TPOINT tesseract::kDivisibleVerticalUpright(0, 1)	(	0	,
		1
	)

◆ kDoNotReverse

const char tesseract::kDoNotReverse[] = "RRP_DO_NO_REVERSE"

Definition at line 32 of file trie.cpp.

◆ kDoubleFlag

const int tesseract::kDoubleFlag = 128

Definition at line 235 of file weightmatrix.cpp.

◆ kEngLigatureText

const char tesseract::kEngLigatureText[] = "ﬁdelity"

Definition at line 39 of file stringrenderer_test.cc.

◆ kEngNonLigatureText

const char tesseract::kEngNonLigatureText[] = "fidelity"

Definition at line 37 of file stringrenderer_test.cc.

◆ kEngText

const char tesseract::kEngText = "the quick brown fox jumps over the lazy dog"

Definition at line 81 of file normstrngs_test.cc.

◆ kErrClip

const TFloat tesseract::kErrClip = 1.0f

Definition at line 73 of file lstm.cpp.

◆ kErrorGraphInterval

const int tesseract::kErrorGraphInterval = 1000

Definition at line 58 of file lstmtrainer.cpp.

◆ kExpectedFontNames

const char* tesseract::kExpectedFontNames[]

Initial value:

= {"Arab",
                                    "Arial Bold Italic",
                                    "DejaVu Sans Ultra-Light",
                                    "Lohit Hindi",
 
                                    "Times New Roman",
 
 
 
                                    "UnBatang",
                                    "Verdana"}

Definition at line 27 of file pango_font_info_test.cc.

◆ kExposureFactor

const int tesseract::kExposureFactor = 16

Definition at line 56 of file degradeimage.cpp.

◆ kFeaturePadding

const int tesseract::kFeaturePadding = 2

Definition at line 37 of file imagedata.h.

◆ kFewCluesWithCrown

const TextAndModel tesseract::kFewCluesWithCrown[]

Initial value:

= {
    {"This paragraph starts at the top", PSTART, PModel(kLeft, 0, 20, 0, 0), true, false},
    {"of the page and takes two lines.", PCONT, PModel(), false, false},
    {"  Here I have a second paragraph", PSTART, PModel(kLeft, 0, 20, 0, 0), false, false},
    {"which indicates that the first  ", PCONT, PModel(), false, false},
    {"paragraph is a continuation from", PCONT, PModel(), false, false},
    {"a previous page, as it is       ", PCONT, PModel(), false, false},
    {"indented just like this second  ", PCONT, PModel(), false, false},
    {"paragraph.                      ", PCONT, PModel(), false, false},
}

Definition at line 247 of file paragraphs_test.cc.

◆ kFinalPixelTolerance

const float tesseract::kFinalPixelTolerance = 0.125f

Definition at line 37 of file normalis.cpp.

◆ kFlushLeftParagraphs

const TextAndModel tesseract::kFlushLeftParagraphs[]

Initial value:

= {
    {"It  is sometimes  the case  that", PSTART, PModel(kLeft, 0, 0, 0, 0), false, false},
    {"flush  left   paragraphs  (those", PCONT, PModel(), false, false},
    {"with  no  body  indent)  are not", PCONT, PModel(), false, false},
    {"actually crowns.                ", PCONT, PModel(), false, false},
    {"Instead,  further paragraphs are", PSTART, PModel(kLeft, 0, 0, 0, 0), false, false},
    {"also flush left aligned.  Usual-", PCONT, PModel(), false, false},
    {"ly,  these  paragraphs  are  set", PCONT, PModel(), false, false},
    {"apart vertically  by some white-", PCONT, PModel(), false, false},
    {"space,  but you can also  detect", PCONT, PModel(), false, false},
    {"them by observing  the big empty", PCONT, PModel(), false, false},
    {"space at the  ends  of the para-", PCONT, PModel(), false, false},
    {"graphs.                         ", PCONT, PModel(), false, false},
}

Definition at line 279 of file paragraphs_test.cc.

◆ kFontMergeDistance

const float tesseract::kFontMergeDistance = 0.025

Definition at line 50 of file mastertrainer.cpp.

◆ kForceReverse

const char tesseract::kForceReverse[] = "RRP_FORCE_REVERSE"

Definition at line 34 of file trie.cpp.

◆ kGeoFeatureType

TESS_API const char *const tesseract::kGeoFeatureType = "tb"

Definition at line 36 of file featdefs.cpp.

◆ kGoodRowNumberOfColumnsLarge

const double tesseract::kGoodRowNumberOfColumnsLarge = 0.7

Definition at line 57 of file tablerecog.cpp.

◆ kGoodRowNumberOfColumnsSmall

const double tesseract::kGoodRowNumberOfColumnsSmall[] = {2, 2, 2, 2, 2, 3, 3}

Definition at line 55 of file tablerecog.cpp.

◆ kGutterMultiple

const int tesseract::kGutterMultiple = 4

Definition at line 36 of file tabvector.cpp.

◆ kGutterToNeighbourRatio

const int tesseract::kGutterToNeighbourRatio = 3

Definition at line 38 of file tabvector.cpp.

◆ kGWR2nds

const char* tesseract::kGWR2nds[]

Initial value:

= {"C", "c", "t", "", "S", "", "W", "O", "t", "h",

"S", " ", "t", "I", "9", "b", "f", ",", nullptr}

Definition at line 43 of file recodebeam_test.cc.

◆ kGWR2ndScores

const float tesseract::kGWR2ndScores[]

Initial value:

= {0.01, 0.10, 0.12, 0.42, 0.01, 0.25, 0.10, 0.01, 0.01,

0.01, 0.01, 0.05, 0.01, 0.09, 0.09, 0.09, 0.05, 0.25}

Definition at line 45 of file recodebeam_test.cc.

◆ kGWRTops

const char* tesseract::kGWRTops[]

Initial value:

= {"G", "e", "f", " ", "s", " ", "w", "o", "r", "d",

"s", "", "r", "i", "g", "h", "t", ".", nullptr}

Definition at line 39 of file recodebeam_test.cc.

◆ kGWRTopScores

const float tesseract::kGWRTopScores[]

Initial value:

= {0.99, 0.85, 0.87, 0.55, 0.99, 0.65, 0.89, 0.99, 0.99,

0.99, 0.99, 0.95, 0.99, 0.90, 0.90, 0.90, 0.95, 0.75}

Definition at line 41 of file recodebeam_test.cc.

◆ kHanRatioInJapanese

const float tesseract::kHanRatioInJapanese = 0.3

Definition at line 47 of file osdetect.cpp.

◆ kHanRatioInKorean

const float tesseract::kHanRatioInKorean = 0.7

Definition at line 46 of file osdetect.cpp.

◆ kHighConfidence

const double tesseract::kHighConfidence = 0.9375

Definition at line 66 of file lstmtrainer.cpp.

◆ kHinText

const char tesseract::kHinText = "पिताने विवाह की | हो गई उद्विग्न वह सोचा"

Definition at line 82 of file normstrngs_test.cc.

◆ kHistogramBuckets

const int tesseract::kHistogramBuckets = 16

Definition at line 517 of file weightmatrix.cpp.

◆ kHistogramSize

const int tesseract::kHistogramSize = 256

Definition at line 30 of file otsuthr.h.

◆ kHorizontalGapMergeFraction

const double tesseract::kHorizontalGapMergeFraction = 0.5

Definition at line 51 of file colfind.cpp.

◆ kHorizontalSpacing

const double tesseract::kHorizontalSpacing = 0.30

Definition at line 34 of file tablerecog.cpp.

◆ kHorzStrongTextlineAspect

const int tesseract::kHorzStrongTextlineAspect = 5

Definition at line 77 of file colpartition.cpp.

◆ kHorzStrongTextlineCount

const int tesseract::kHorzStrongTextlineCount = 8

Definition at line 73 of file colpartition.cpp.

◆ kHorzStrongTextlineHeight

const int tesseract::kHorzStrongTextlineHeight = 10

Definition at line 75 of file colpartition.cpp.

◆ kImagePadding

const int tesseract::kImagePadding = 4

Definition at line 39 of file imagedata.h.

◆ kImprovementFraction

const double tesseract::kImprovementFraction = 15.0 / 16.0

Definition at line 68 of file lstmtrainer.cpp.

◆ kIncorrectResultReasonNames

const char* const tesseract::kIncorrectResultReasonNames[]

Initial value:

= {
    kBlameCorrect,    kBlameClassifier,    kBlameChopper,     kBlameClassLMTradeoff,
    kBlamePageLayout, kBlameSegsearchHeur, kBlameSegsearchPP, kBlameClassOldLMTradeoff,
    kBlameAdaption,   kBlameNoTruthSplit,  kBlameNoTruth,     kBlameUnknown}

Definition at line 51 of file blamer.cpp.

◆ kInfiniteDist

const float tesseract::kInfiniteDist = 999.0f

Definition at line 966 of file mastertrainer.cpp.

◆ kInt8Flag

const int tesseract::kInt8Flag = 1

Definition at line 229 of file weightmatrix.cpp.

◆ kIntFeatureType

TESS_API const char *const tesseract::kIntFeatureType = "if"

Definition at line 35 of file featdefs.cpp.

◆ kKorText

const char tesseract::kKorText = "이는 것으로"

Definition at line 83 of file normstrngs_test.cc.

◆ kLargeTableProjectionThreshold

const double tesseract::kLargeTableProjectionThreshold = 0.45

Definition at line 107 of file tablefind.cpp.

◆ kLargeTableRowCount

const int tesseract::kLargeTableRowCount = 6

Definition at line 109 of file tablefind.cpp.

◆ kLatinChs

const int tesseract::kLatinChs[] = {0x00a2, 0x0022, 0x0022, 0x0027, 0x0027, 0x00b7, 0x002d, 0}

Latin chars corresponding to the unicode chars above.

Definition at line 1594 of file baseapi.cpp.

◆ kLearningRateDecay

const double tesseract::kLearningRateDecay = M_SQRT1_2

Definition at line 54 of file lstmtrainer.cpp.

◆ kLeft

const ParagraphJustification tesseract::kLeft = JUSTIFICATION_LEFT

Definition at line 26 of file paragraphs_test.cc.

◆ kLeftIndentAlignmentCountTh

const int tesseract::kLeftIndentAlignmentCountTh = 1

Definition at line 83 of file equationdetect.cpp.

◆ kLineCountReciprocal

const double tesseract::kLineCountReciprocal = 4.0

Definition at line 49 of file tabvector.cpp.

◆ kLinedTableMinHorizontalLines

const int tesseract::kLinedTableMinHorizontalLines = 3

Definition at line 44 of file tablerecog.cpp.

◆ kLinedTableMinVerticalLines

const int tesseract::kLinedTableMinVerticalLines = 3

Definition at line 43 of file tablerecog.cpp.

◆ kLineFindGridSize

const int tesseract::kLineFindGridSize = 50

Grid size used by line finder. Not very critical.

Definition at line 45 of file linefind.cpp.

◆ kLineFragmentAspectRatio

const double tesseract::kLineFragmentAspectRatio = 10.0

Definition at line 54 of file tabfind.cpp.

◆ kLineResidueAspectRatio

const double tesseract::kLineResidueAspectRatio = 8.0

Definition at line 99 of file strokewidth.cpp.

◆ kLineResiduePadRatio

const int tesseract::kLineResiduePadRatio = 3

Definition at line 101 of file strokewidth.cpp.

◆ kLineResidueSizeRatio

const double tesseract::kLineResidueSizeRatio = 1.75

Definition at line 103 of file strokewidth.cpp.

◆ kLineTrapLongest

const int tesseract::kLineTrapLongest = 4

Definition at line 92 of file strokewidth.cpp.

◆ kLineTrapShortest

const int tesseract::kLineTrapShortest = 2

Definition at line 94 of file strokewidth.cpp.

◆ kMarginFactor

const double tesseract::kMarginFactor = 1.1

Definition at line 49 of file tablerecog.cpp.

◆ kMarginOverlapFraction

const double tesseract::kMarginOverlapFraction = 0.25

Definition at line 45 of file colpartitiongrid.cpp.

◆ kMathDigitDensityTh1

const float tesseract::kMathDigitDensityTh1 = 0.25

Definition at line 78 of file equationdetect.cpp.

◆ kMathDigitDensityTh2

const float tesseract::kMathDigitDensityTh2 = 0.1

Definition at line 79 of file equationdetect.cpp.

◆ kMathItalicDensityTh

const float tesseract::kMathItalicDensityTh = 0.5

Definition at line 80 of file equationdetect.cpp.

◆ kMaxAmbigStringSize

const int tesseract::kMaxAmbigStringSize = UNICHAR_LEN * (MAX_AMBIG_SIZE + 1)

Definition at line 40 of file ambigs.cpp.

◆ kMaxBaselineDrift

const double tesseract::kMaxBaselineDrift = 0.0625

Definition at line 51 of file ratngs.cpp.

◆ kMaxBaselineError

const double tesseract::kMaxBaselineError = 0.4375

Definition at line 80 of file colpartition.cpp.

◆ kMaxBlobOverlapFactor

const double tesseract::kMaxBlobOverlapFactor = 4.0

Definition at line 77 of file tablefind.cpp.

◆ kMaxBlobWidth

const int tesseract::kMaxBlobWidth = 500

Definition at line 40 of file tablefind.cpp.

◆ kMaxBoxEdgeDiff

const int16_t tesseract::kMaxBoxEdgeDiff = 2

Definition at line 32 of file recogtraining.cpp.

◆ kMaxBoxesInDataPartition

const int tesseract::kMaxBoxesInDataPartition = 20

Definition at line 66 of file tablefind.cpp.

◆ kMaxBytesPerLine

const int tesseract::kMaxBytesPerLine = kNumbersPerBlob * (kBytesPer64BitNumber + 1) + 1 + UNICHAR_LEN

A maximal single box could occupy kNumbersPerBlob numbers at kBytesPer64BitNumber digits (if someone sneaks in a 64 bit value) and a space plus the newline and the maximum length of a UNICHAR. Test against this on each iteration for safety.

Definition at line 1544 of file baseapi.cpp.

◆ kMaxCaptionLines

const int tesseract::kMaxCaptionLines = 7

Definition at line 39 of file colpartitiongrid.cpp.

◆ kMaxCharTopRange

const int tesseract::kMaxCharTopRange = 48

Definition at line 69 of file fixxht.cpp.

◆ kMaxCircleErosions

const int tesseract::kMaxCircleErosions = 8

Definition at line 54 of file pagesegmain.cpp.

◆ kMaxCJKSizeRatio

const int tesseract::kMaxCJKSizeRatio = 5

Definition at line 70 of file strokewidth.cpp.

◆ kMaxColorDistance

const int tesseract::kMaxColorDistance = 900

Definition at line 87 of file colpartition.cpp.

◆ kMaxColumnHeaderDistance

const int tesseract::kMaxColumnHeaderDistance = 4

Definition at line 85 of file tablefind.cpp.

◆ kMaxCredibleResolution

constexpr int tesseract::kMaxCredibleResolution = 2400

constexpr

Maximum believable resolution.

Definition at line 38 of file publictypes.h.

◆ kMaxDiacriticDistanceRatio

const double tesseract::kMaxDiacriticDistanceRatio = 1.25

Definition at line 84 of file strokewidth.cpp.

◆ kMaxDiacriticGapToBaseCharHeight

const double tesseract::kMaxDiacriticGapToBaseCharHeight = 1.0

Definition at line 87 of file strokewidth.cpp.

◆ kMaxDistToPartSizeRatio

const double tesseract::kMaxDistToPartSizeRatio = 1.5

Definition at line 56 of file colfind.cpp.

◆ kMaxDropCapBottom

const int tesseract::kMaxDropCapBottom = -128

Definition at line 43 of file ratngs.cpp.

◆ kMaxFillinMultiple

const int tesseract::kMaxFillinMultiple = 11

Definition at line 45 of file tabvector.cpp.

◆ kMaxGapInTextPartition

const double tesseract::kMaxGapInTextPartition = 4.0

Definition at line 69 of file tablefind.cpp.

◆ kMaxGutterWidthAbsolute

const double tesseract::kMaxGutterWidthAbsolute = 2.00

Definition at line 49 of file tabfind.cpp.

◆ kMaxIncompatibleColumnCount

const int tesseract::kMaxIncompatibleColumnCount = 2

Definition at line 48 of file colfind.cpp.

◆ kMaxInputHeight

const int tesseract::kMaxInputHeight = 48

Definition at line 28 of file input.cpp.

◆ kMaxIntPairSize

const int tesseract::kMaxIntPairSize = 45

Definition at line 47 of file scrollview.cpp.

◆ kMaxLargeOverlapsWithMedium

const int tesseract::kMaxLargeOverlapsWithMedium = 12

Definition at line 44 of file ccnontextdetect.cpp.

◆ kMaxLargeOverlapsWithSmall

const int tesseract::kMaxLargeOverlapsWithSmall = 3

Definition at line 35 of file ccnontextdetect.cpp.

◆ kMaxLeaderGapFractionOfMax

const double tesseract::kMaxLeaderGapFractionOfMax = 0.25

Definition at line 63 of file colpartition.cpp.

◆ kMaxLeaderGapFractionOfMin

const double tesseract::kMaxLeaderGapFractionOfMin = 0.5

Definition at line 65 of file colpartition.cpp.

◆ kMaxLigature

const int tesseract::kMaxLigature = 0xfb17

Definition at line 45 of file ligature_table.cpp.

◆ kMaxLineLength

const int tesseract::kMaxLineLength = 1024

Definition at line 322 of file boxchar.cpp.

◆ kMaxLineResidue

const int tesseract::kMaxLineResidue = 6

Definition at line 51 of file linefind.cpp.

◆ kMaxLineSizeRatio

const double tesseract::kMaxLineSizeRatio = 1.25

Definition at line 60 of file pageres.cpp.

◆ kMaxMediumOverlapsWithSmall

const int tesseract::kMaxMediumOverlapsWithSmall = 12

Definition at line 40 of file ccnontextdetect.cpp.

◆ kMaxMediumSizeRatio

const double tesseract::kMaxMediumSizeRatio = 4.0

Definition at line 52 of file blobbox.cpp.

◆ kMaxMsgSize

const int tesseract::kMaxMsgSize = 4096

Definition at line 46 of file scrollview.cpp.

◆ kMaxNeighbourDistFactor

const int tesseract::kMaxNeighbourDistFactor = 4

Definition at line 37 of file colpartitiongrid.cpp.

◆ kMaxNonLineDensity

const double tesseract::kMaxNonLineDensity = 0.25

Definition at line 56 of file linefind.cpp.

◆ kMaxNumberOfScripts

const int tesseract::kMaxNumberOfScripts = 116 + 1 + 2 + 1

Definition at line 36 of file osdetect.h.

◆ kMaxOffsetDist

const int tesseract::kMaxOffsetDist = 32

Definition at line 31 of file intfeaturemap.cpp.

◆ kMaxOverlapDenominator

const double tesseract::kMaxOverlapDenominator = 0.125

Definition at line 45 of file ratngs.cpp.

◆ kMaxPadFactor

const int tesseract::kMaxPadFactor = 6

Definition at line 34 of file colpartitiongrid.cpp.

◆ kMaxParagraphEndingLeftSpaceMultiple

const double tesseract::kMaxParagraphEndingLeftSpaceMultiple = 3.0

Definition at line 126 of file tablefind.cpp.

◆ kMaxPartitionSpacing

const double tesseract::kMaxPartitionSpacing = 1.75

Definition at line 62 of file colpartitiongrid.cpp.

◆ kMaxPerimeterWidthRatio

const double tesseract::kMaxPerimeterWidthRatio = 8.0

Definition at line 36 of file stepblob.cpp.

◆ kMaxRaggedSearch

const int tesseract::kMaxRaggedSearch = 25

Definition at line 39 of file tabfind.cpp.

◆ kMaxReadAhead

const int tesseract::kMaxReadAhead = 8

Definition at line 42 of file imagedata.cpp.

◆ kMaxRealDistance

const int tesseract::kMaxRealDistance = 2.0

Definition at line 39 of file detlinefit.cpp.

◆ kMaxRectangularFraction

const double tesseract::kMaxRectangularFraction = 0.75

Definition at line 43 of file imagefind.cpp.

◆ kMaxRectangularGradient

const double tesseract::kMaxRectangularGradient = 0.1

Definition at line 46 of file imagefind.cpp.

◆ kMaxRMSColorNoise

const int tesseract::kMaxRMSColorNoise = 128

Definition at line 84 of file colpartition.cpp.

◆ kMaxRowSize

const double tesseract::kMaxRowSize = 2.5

Definition at line 52 of file tablerecog.cpp.

◆ kMaxSameBlockLineSpacing

const double tesseract::kMaxSameBlockLineSpacing = 3

Definition at line 59 of file colpartition.cpp.

◆ kMaxSizeRatio

const double tesseract::kMaxSizeRatio = 1.5

Definition at line 61 of file colpartition.cpp.

◆ kMaxSkewFactor

const int tesseract::kMaxSkewFactor = 15

Definition at line 72 of file alignedblob.cpp.

◆ kMaxSmallNeighboursPerPix

const double tesseract::kMaxSmallNeighboursPerPix = 1.0 / 32

Definition at line 32 of file ccnontextdetect.cpp.

◆ kMaxSpacingDrift

const double tesseract::kMaxSpacingDrift = 1.0 / 72

Definition at line 53 of file colpartition.cpp.

◆ kMaxStaveHeight

const double tesseract::kMaxStaveHeight = 1.0

Definition at line 58 of file linefind.cpp.

◆ kMaxTableCellXheight

const double tesseract::kMaxTableCellXheight = 2.0

Definition at line 81 of file tablefind.cpp.

◆ kMaxTopSpacingFraction

const double tesseract::kMaxTopSpacingFraction = 0.25

Definition at line 56 of file colpartition.cpp.

◆ kMaxUnicharsPerCluster

const int tesseract::kMaxUnicharsPerCluster = 2000

Definition at line 48 of file mastertrainer.cpp.

◆ kMaxVerticalSearch

const int tesseract::kMaxVerticalSearch = 12

Definition at line 38 of file tabfind.cpp.

◆ kMaxVerticalSpacing

const int tesseract::kMaxVerticalSpacing = 500

Definition at line 38 of file tablefind.cpp.

◆ kMaxWinSize

const int tesseract::kMaxWinSize = 2000

Definition at line 52 of file network.cpp.

◆ kMaxWordGapRatio

const double tesseract::kMaxWordGapRatio = 2.0

Definition at line 62 of file pageres.cpp.

◆ kMaxWordSizeRatio

const double tesseract::kMaxWordSizeRatio = 1.25

Definition at line 58 of file pageres.cpp.

◆ kMaxXProjectionGapFactor

const double tesseract::kMaxXProjectionGapFactor = 2.0

Definition at line 136 of file tablefind.cpp.

◆ kMicroFeatureType

TESS_API const char *const tesseract::kMicroFeatureType = "mf"

Definition at line 33 of file featdefs.cpp.

◆ kMinAcceptableBlobHeight

const int tesseract::kMinAcceptableBlobHeight = 10

Definition at line 42 of file osdetect.cpp.

◆ kMinAlignedGutter

const double tesseract::kMinAlignedGutter = 0.25

Definition at line 51 of file tabvector.cpp.

◆ kMinAlignedTabs

const int tesseract::kMinAlignedTabs = 4

Definition at line 62 of file alignedblob.cpp.

◆ kMinBaselineCoverage

const double tesseract::kMinBaselineCoverage = 0.5

Definition at line 82 of file colpartition.cpp.

◆ kMinBoxesInTextPartition

const int tesseract::kMinBoxesInTextPartition = 10

Definition at line 63 of file tablefind.cpp.

◆ kMinCapHeightFraction

const double tesseract::kMinCapHeightFraction = 0.05

Definition at line 58 of file unicharset.cpp.

◆ kMinCaptionGapHeightRatio

const double tesseract::kMinCaptionGapHeightRatio = 0.5

Definition at line 43 of file colpartitiongrid.cpp.

◆ kMinCaptionGapRatio

const double tesseract::kMinCaptionGapRatio = 2.0

Definition at line 41 of file colpartitiongrid.cpp.

◆ kMinCertainty

const float tesseract::kMinCertainty = -20.0f

Definition at line 30 of file networkio.cpp.

◆ kMinChainTextValue

const int tesseract::kMinChainTextValue = 3

Definition at line 71 of file colpartition.cpp.

◆ kMinClusteredShapes

const int tesseract::kMinClusteredShapes = 1

Definition at line 46 of file mastertrainer.cpp.

◆ kMinColumnWidth

const int tesseract::kMinColumnWidth = 2.0 / 3

Definition at line 31 of file colpartitionset.cpp.

◆ kMinCredibleResolution

constexpr int tesseract::kMinCredibleResolution = 70

constexpr

Minimum believable resolution. Used as a default if there is no other information, as it is safer to under-estimate than over-estimate.

Definition at line 36 of file publictypes.h.

◆ kMinDiacriticSizeRatio

const double tesseract::kMinDiacriticSizeRatio = 1.0625

Definition at line 81 of file strokewidth.cpp.

◆ kMinDivergenceRate

const double tesseract::kMinDivergenceRate = 50.0

Definition at line 47 of file lstmtrainer.cpp.

◆ kMinEvaluatedTabs

const int tesseract::kMinEvaluatedTabs = 3

Definition at line 56 of file tabfind.cpp.

◆ kMinFilledArea

const double tesseract::kMinFilledArea = 0.35

Definition at line 60 of file tablerecog.cpp.

◆ kMinFractionalLinesInColumn

const double tesseract::kMinFractionalLinesInColumn = 0.125

Definition at line 45 of file tabfind.cpp.

◆ kMinGoodTextPARatio

const double tesseract::kMinGoodTextPARatio = 1.5

Definition at line 60 of file ccnontextdetect.cpp.

◆ kMinGutterFraction

const double tesseract::kMinGutterFraction = 0.5

Definition at line 47 of file tabvector.cpp.

◆ kMinGutterWidthGrid

const double tesseract::kMinGutterWidthGrid = 0.5

Definition at line 53 of file colfind.cpp.

◆ kMinImageFindSize

const int tesseract::kMinImageFindSize = 100

Definition at line 48 of file imagefind.cpp.

◆ kMinLeaderCount

const int tesseract::kMinLeaderCount = 5

Definition at line 67 of file colpartition.cpp.

◆ kMinLigature

const int tesseract::kMinLigature = 0xfb00

Definition at line 44 of file ligature_table.cpp.

◆ kMinLineLengthFraction

const int tesseract::kMinLineLengthFraction = 4

Denominator of resolution makes min pixels to demand line lengths to be.

Definition at line 41 of file linefind.cpp.

◆ kMinLinesInColumn

const int tesseract::kMinLinesInColumn = 10

Definition at line 41 of file tabfind.cpp.

◆ kMinMaxGapInTextPartition

const double tesseract::kMinMaxGapInTextPartition = 0.5

Definition at line 73 of file tablefind.cpp.

◆ kMinMediumSizeRatio

const double tesseract::kMinMediumSizeRatio = 0.25

Definition at line 50 of file blobbox.cpp.

◆ kMinModeFactor

const int tesseract::kMinModeFactor = 12

Definition at line 1506 of file oldbasel.cpp.

◆ kMinModeFactorOcropus

const int tesseract::kMinModeFactorOcropus = 32

Definition at line 1505 of file oldbasel.cpp.

◆ kMinMusicPixelFraction

const double tesseract::kMinMusicPixelFraction = 0.75

Definition at line 60 of file linefind.cpp.

◆ kMinOverlapWithTable

const double tesseract::kMinOverlapWithTable = 0.6

Definition at line 97 of file tablefind.cpp.

◆ kMinParagraphEndingTextToWhitespaceRatio

const double tesseract::kMinParagraphEndingTextToWhitespaceRatio = 3.0

Definition at line 132 of file tablefind.cpp.

◆ kMinPointsForErrorCount

const int tesseract::kMinPointsForErrorCount = 16

Definition at line 36 of file detlinefit.cpp.

◆ kMinProb

const float tesseract::kMinProb = std::exp(kMinCertainty)

Definition at line 32 of file networkio.cpp.

◆ kMinRaggedGutter

const double tesseract::kMinRaggedGutter = 1.5

Definition at line 53 of file tabvector.cpp.

◆ kMinRaggedTabs

const int tesseract::kMinRaggedTabs = 5

Definition at line 60 of file alignedblob.cpp.

◆ kMinRampSize

const int tesseract::kMinRampSize = 1000

Definition at line 60 of file degradeimage.cpp.

◆ kMinRectangularFraction

const double tesseract::kMinRectangularFraction = 0.125

Definition at line 41 of file imagefind.cpp.

◆ kMinRectSize

const int tesseract::kMinRectSize = 10

Minimum sensible image size to be worth running Tesseract.

Definition at line 107 of file baseapi.cpp.

◆ kMinRowsInTable

const int tesseract::kMinRowsInTable = 3

Definition at line 112 of file tablefind.cpp.

◆ kMinSize

const int tesseract::kMinSize = 8

Definition at line 408 of file makerow.cpp.

◆ kMinStallIterations

const int tesseract::kMinStallIterations = 10000

Definition at line 49 of file lstmtrainer.cpp.

◆ kMinStartedErrorRate

const int tesseract::kMinStartedErrorRate = 75

Definition at line 62 of file lstmtrainer.cpp.

◆ kMinStrongTextValue

const int tesseract::kMinStrongTextValue = 6

Definition at line 69 of file colpartition.cpp.

◆ kMinSubscriptOffset

const int tesseract::kMinSubscriptOffset = 20

Definition at line 39 of file ratngs.cpp.

◆ kMinSuperscriptOffset

const int tesseract::kMinSuperscriptOffset = 20

Definition at line 41 of file ratngs.cpp.

◆ kMinTabGradient

const double tesseract::kMinTabGradient = 4.0

Definition at line 68 of file alignedblob.cpp.

◆ kMinThickLineWidth

const int tesseract::kMinThickLineWidth = 12

Definition at line 47 of file linefind.cpp.

◆ kMinVariance

const long double tesseract::kMinVariance = 1.0L / 1024

Definition at line 29 of file quadlsq.cpp.

◆ kMinVerticalSearch

const int tesseract::kMinVerticalSearch = 3

Definition at line 37 of file tabfind.cpp.

◆ kMinWinSize

const int tesseract::kMinWinSize = 500

Definition at line 51 of file network.cpp.

◆ kMinXHeightFraction

const double tesseract::kMinXHeightFraction = 0.25

Definition at line 57 of file unicharset.cpp.

◆ kMinXHeightMatch

const double tesseract::kMinXHeightMatch = 0.5

Definition at line 48 of file ratngs.cpp.

◆ kMixedText

const char tesseract::kMixedText[] = "والفكر 123 والصراع abc"

Definition at line 35 of file stringrenderer_test.cc.

◆ kMostlyOneDirRatio

const int tesseract::kMostlyOneDirRatio = 3

Definition at line 97 of file strokewidth.cpp.

◆ kNeighbourSearchFactor

const double tesseract::kNeighbourSearchFactor = 2.5

Definition at line 107 of file strokewidth.cpp.

◆ kNewZealandIndex

const TextAndModel tesseract::kNewZealandIndex[]

Definition at line 691 of file paragraphs_test.cc.

◆ kNoiseOverlapAreaFactor

const double tesseract::kNoiseOverlapAreaFactor = 1.0 / 512

Definition at line 112 of file strokewidth.cpp.

◆ kNoiseOverlapGrowthFactor

const double tesseract::kNoiseOverlapGrowthFactor = 4.0

Definition at line 109 of file strokewidth.cpp.

◆ kNoisePadding

const int tesseract::kNoisePadding = 4

Definition at line 51 of file ccnontextdetect.cpp.

◆ kNoiseSize

const double tesseract::kNoiseSize = 0.5

Definition at line 407 of file makerow.cpp.

◆ kNonAmbiguousMargin

const float tesseract::kNonAmbiguousMargin = 1.0

Definition at line 49 of file osdetect.cpp.

◆ kNumAdjustmentIterations

const int tesseract::kNumAdjustmentIterations = 100

Definition at line 56 of file lstmtrainer.cpp.

◆ kNumbersPerBlob

const int tesseract::kNumbersPerBlob = 5

The 5 numbers output for each box (the usual 4 and a page number.)

Definition at line 1524 of file baseapi.cpp.

◆ kNumChars

const int tesseract::kNumChars = 100

Definition at line 28 of file recodebeam_test.cc.

◆ kNumEndPoints

const int tesseract::kNumEndPoints = 3

Definition at line 30 of file detlinefit.cpp.

◆ kNumPagesPerBatch

const int tesseract::kNumPagesPerBatch = 100

Definition at line 60 of file lstmtrainer.cpp.

◆ kOldManAndSea

const TextAndModel tesseract::kOldManAndSea[]

Definition at line 607 of file paragraphs_test.cc.

◆ kOriginalNoiseMultiple

const int tesseract::kOriginalNoiseMultiple = 8

Definition at line 47 of file ccnontextdetect.cpp.

◆ kPadding

const int tesseract::kPadding = 64

Definition at line 30 of file recodebeam_test.cc.

◆ kParagraphEndingPreviousLineRatio

const double tesseract::kParagraphEndingPreviousLineRatio = 1.3

Definition at line 122 of file tablefind.cpp.

◆ kPhotoOffsetFraction

const double tesseract::kPhotoOffsetFraction = 0.375

Definition at line 54 of file ccnontextdetect.cpp.

◆ kPointsPerInch

constexpr int tesseract::kPointsPerInch = 72

constexpr

Number of printers' points in an inch. The unit of the pointsize return.

Definition at line 31 of file publictypes.h.

◆ kPrime1

const int tesseract::kPrime1 = 17

Definition at line 41 of file trainingsampleset.cpp.

◆ kPrime2

const int tesseract::kPrime2 = 13

Definition at line 42 of file trainingsampleset.cpp.

◆ kRadicalRadix

const int tesseract::kRadicalRadix = 29

Definition at line 31 of file unicharcompress.cpp.

◆ kRaggedFraction

const double tesseract::kRaggedFraction = 2.5

Definition at line 48 of file alignedblob.cpp.

◆ kRaggedGapFraction

const double tesseract::kRaggedGapFraction = 1.0

Definition at line 52 of file alignedblob.cpp.

◆ kRaggedGutterMultiple

const int tesseract::kRaggedGutterMultiple = 5

Definition at line 51 of file tabfind.cpp.

◆ kRandomizingCenter

const int tesseract::kRandomizingCenter = 128

Definition at line 36 of file trainingsample.cpp.

◆ kRatingEpsilon

const double tesseract::kRatingEpsilon = 1.0 / 32

Definition at line 36 of file errorcounter.cpp.

◆ kRequiredColumns

const double tesseract::kRequiredColumns = 0.7

Definition at line 47 of file tablerecog.cpp.

◆ kResolutionEstimationFactor

constexpr int tesseract::kResolutionEstimationFactor = 10

constexpr

Ratio between median blob size and likely resolution. Used to estimate resolution when none is provided. This is basically 1/usual text size in inches.

Definition at line 43 of file publictypes.h.

◆ kReverseIfHasRTL

const char tesseract::kReverseIfHasRTL[] = "RRP_REVERSE_IF_HAS_RTL"

Definition at line 33 of file trie.cpp.

◆ kRGBRMSColors

const int tesseract::kRGBRMSColors = 4

Definition at line 36 of file colpartition.h.

◆ kRight

const ParagraphJustification tesseract::kRight = JUSTIFICATION_RIGHT

Definition at line 28 of file paragraphs_test.cc.

◆ kRightAligned

const TextAndModel tesseract::kRightAligned[]

Initial value:

= {
    {"Right-aligned paragraphs are", PSTART, PModel(kRight, 0, 0, 0, 0), false, false},
    {"   uncommon in Left-to-Right", PCONT, PModel(), false, false},
    {"      languages, but they do", PCONT, PModel(), false, false},
    {"                      exist.", PCONT, PModel(), false, false},
    {"    Mostly, however, they're", PSTART, PModel(kRight, 0, 0, 0, 0), false, false},
    {" horribly tiny paragraphs in", PCONT, PModel(), false, false},
    {"  tables on which we have no", PCONT, PModel(), false, false},
    {"             chance anyways.", PCONT, PModel(), false, false},
}

Definition at line 331 of file paragraphs_test.cc.

◆ kRotationRange

const float tesseract::kRotationRange = 0.02f

Definition at line 54 of file degradeimage.cpp.

◆ kSaltnPepper

const int tesseract::kSaltnPepper = 5

Definition at line 58 of file degradeimage.cpp.

◆ kScaleFactor

constexpr TFloat tesseract::kScaleFactor = 256.0

constexpr

Definition at line 37 of file functions.h.

◆ kScriptAcceptRatio

const float tesseract::kScriptAcceptRatio = 1.3

Definition at line 44 of file osdetect.cpp.

◆ kSeedBlobsCountTh

const int tesseract::kSeedBlobsCountTh = 10

Definition at line 82 of file equationdetect.cpp.

◆ kSideSpaceMargin

const int tesseract::kSideSpaceMargin = 10

Definition at line 102 of file tablefind.cpp.

◆ kSimilarRaggedDist

const int tesseract::kSimilarRaggedDist = 50

Definition at line 43 of file tabvector.cpp.

◆ kSimilarVectorDist

const int tesseract::kSimilarVectorDist = 10

Definition at line 40 of file tabvector.cpp.

◆ kSingleFullPageContinuation

const TextAndModel tesseract::kSingleFullPageContinuation[]

Initial value:

= {
    {"sometimes a page is one giant", PSTART, PModel(kLeft, 0, 20, 0, 0), true, false},
    {"continuation.  It flows  from", PCONT, PModel(), false, false},
    {"line to  line, using the full", PCONT, PModel(), false, false},
    {"column  width  with  no clear", PCONT, PModel(), false, false},
    {"paragraph  break,  because it", PCONT, PModel(), false, false},
    {"actually doesn't have one. It", PCONT, PModel(), false, false},
    {"is the  middle of one monster", PCONT, PModel(), false, false},
    {"paragraph continued  from the", PCONT, PModel(), false, false},
    {"previous page and  continuing", PCONT, PModel(), false, false},
    {"onto the  next  page.  There-", PCONT, PModel(), false, false},
    {"fore,  it  ends  up   getting", PCONT, PModel(), false, false},
    {"marked  as a  crown  and then", PCONT, PModel(), false, false},
    {"getting re-marked as any  ex-", PCONT, PModel(), false, false},
    {"isting model.  Not great, but", PCONT, PModel(), false, false},
}

Definition at line 298 of file paragraphs_test.cc.

◆ ksizeofUniversalAmbigsFile

const int tesseract::ksizeofUniversalAmbigsFile = sizeof(kUniversalAmbigsFile)

inline

Definition at line 19040 of file universalambigs.h.

◆ kSizeRatioToReject

const float tesseract::kSizeRatioToReject = 2.0

Definition at line 41 of file osdetect.cpp.

◆ kSloppyTolerance

const int tesseract::kSloppyTolerance = 4

Definition at line 35 of file normalis.cpp.

◆ kSmallTableProjectionThreshold

const double tesseract::kSmallTableProjectionThreshold = 0.35

Definition at line 106 of file tablefind.cpp.

◆ kSmoothDecisionMargin

const int tesseract::kSmoothDecisionMargin = 4

Definition at line 65 of file colpartitiongrid.cpp.

◆ kSplitPartitionSize

const double tesseract::kSplitPartitionSize = 2.0

Definition at line 44 of file tablefind.cpp.

◆ kSquareLimit

const int tesseract::kSquareLimit = 25

Definition at line 39 of file trainingsampleset.cpp.

◆ kStageTransitionThreshold

const double tesseract::kStageTransitionThreshold = 10.0

Definition at line 64 of file lstmtrainer.cpp.

◆ kStandardFeatureLength

const double tesseract::kStandardFeatureLength = 64.0 / 5

Definition at line 44 of file intfx.h.

◆ kStateClip

const TFloat tesseract::kStateClip = 100.0

Definition at line 71 of file lstm.cpp.

◆ kStrings8087_054

const char* tesseract::kStrings8087_054[] = {"dat", "Dalmatian", "", "DAMAGED DURING", "margarine,", nullptr}

Definition at line 50 of file layout_test.cc.

◆ kStrokeWidthCJK

const double tesseract::kStrokeWidthCJK = 2.0

Definition at line 57 of file strokewidth.cpp.

◆ kStrokeWidthConstantTolerance

const double tesseract::kStrokeWidthConstantTolerance = 2.0

Definition at line 141 of file tablefind.cpp.

◆ kStrokeWidthFractionalTolerance

const double tesseract::kStrokeWidthFractionalTolerance = 0.25

Definition at line 140 of file tablefind.cpp.

◆ kStrokeWidthFractionCJK

const double tesseract::kStrokeWidthFractionCJK = 0.25

Definition at line 56 of file strokewidth.cpp.

◆ kStrokeWidthFractionTolerance

const double tesseract::kStrokeWidthFractionTolerance = 0.125

Allowed proportional change in stroke width to be the same font.

Definition at line 49 of file strokewidth.cpp.

◆ kStrokeWidthTolerance

const double tesseract::kStrokeWidthTolerance = 1.5

Allowed constant change in stroke width to be the same font. Really 1.5 pixels.

Definition at line 54 of file strokewidth.cpp.

◆ kSubtleCrown

const TextAndModel tesseract::kSubtleCrown[]

Initial value:

= {
    {"The first paragraph on a page is", PSTART, PModel(kLeft, 0, 20, 0, 0), true, false},
    {"often not indented as the rest  ", PCONT, PModel(), false, false},
    {"of the paragraphs are.  Nonethe-", PCONT, PModel(), false, false},
    {"less it should be counted as the", PCONT, PModel(), false, false},
    {"same type of paragraph.         ", PCONT, PModel(), false, false},
    {"  Even a short second paragraph ", PSTART, PModel(kLeft, 0, 20, 0, 0), false, false},
    {"should suffice.                 ", PCONT, PModel(), false, false},
    {"             1235               ", PNONE, PModel(), false, false},
}

Definition at line 455 of file paragraphs_test.cc.

◆ kSubTrainerMarginFraction

const double tesseract::kSubTrainerMarginFraction = 3.0 / 128

Definition at line 52 of file lstmtrainer.cpp.

◆ kSvPort

const int tesseract::kSvPort = 8461

Definition at line 45 of file scrollview.cpp.

◆ kTableColumnThreshold

const double tesseract::kTableColumnThreshold = 3.0

Definition at line 89 of file tablefind.cpp.

◆ kTableOfContents

const TextAndModel tesseract::kTableOfContents[]

Initial value:

= {
    {"1 Hmong People ........... 1", PSTART, PModel(kUnknown, 0, 0, 0, 0), false, false},
    {"   Hmong Origins . . . . . 1", PSTART, PModel(kUnknown, 0, 0, 0, 0), false, false},
    {"    Language . . . . . . . 1", PSTART, PModel(kUnknown, 0, 0, 0, 0), false, false},
    {"     Proverbs . . . . . .  2", PSTART, PModel(kUnknown, 0, 0, 0, 0), false, false},
    {"        Discussion . . . . 2", PSTART, PModel(kUnknown, 0, 0, 0, 0), false, false},
    {"     Riddles . . . . . . . 2", PSTART, PModel(kUnknown, 0, 0, 0, 0), false, false},
    {"        Discussion . . . . 3", PSTART, PModel(kUnknown, 0, 0, 0, 0), false, false},
    {"     Appearance . . . . .  3", PSTART, PModel(kUnknown, 0, 0, 0, 0), false, false},
    {"   Hmong History . . . . . 4", PSTART, PModel(kUnknown, 0, 0, 0, 0), false, false},
    {"    Hmong in SE Asia . . . 4", PSTART, PModel(kUnknown, 0, 0, 0, 0), false, false},
    {"    Hmong in the West . . .5", PSTART, PModel(kUnknown, 0, 0, 0, 0), false, false},
    {"    Hmong in the USA . . . 5", PSTART, PModel(kUnknown, 0, 0, 0, 0), false, false},
    {"        Discussion . . . . 6", PSTART, PModel(kUnknown, 0, 0, 0, 0), false, false},
}

Definition at line 552 of file paragraphs_test.cc.

◆ kTableSize

constexpr int tesseract::kTableSize = 4096

constexpr

Definition at line 35 of file functions.h.

◆ kTabRadiusFactor

const int tesseract::kTabRadiusFactor = 5

Definition at line 35 of file tabfind.cpp.

◆ kTargetXScale

const int tesseract::kTargetXScale = 5

Definition at line 73 of file lstmtrainer.cpp.

◆ kTargetYScale

const int tesseract::kTargetYScale = 100

Definition at line 74 of file lstmtrainer.cpp.

◆ kTesseractReject

const char tesseract::kTesseractReject = '~'

Character returned when Tesseract couldn't recognize as anything.

Definition at line 109 of file baseapi.cpp.

◆ kTestChar

const int tesseract::kTestChar = -1

Definition at line 37 of file trainingsampleset.cpp.

◆ kTestData

const int tesseract::kTestData[] = {2, 0, 12, 1, 1, 2, 10, 1, 0, 0, 0, 2, 0, 4, 1, 1}

Definition at line 19 of file stats_test.cc.

◆ kTextWithSourceCode

const TextAndModel tesseract::kTextWithSourceCode[]

Initial value:

= {
    {"  A typical page of a programming book may contain", PSTART, PModel(kLeft, 0, 20, 0, 0),
     false, false},
    {"examples of source code to exemplify an algorithm ", PCONT, PModel(), false, false},
    {"being described in prose.  Such examples should be", PCONT, PModel(), false, false},
    {"rendered as lineated text, meaning text with      ", PCONT, PModel(), false, false},
    {"explicit line breaks but without extra inter-line ", PCONT, PModel(), false, false},
    {"spacing.  Accidentally finding stray paragraphs in", PCONT, PModel(), false, false},
    {"source code would lead to a bad reading experience", PCONT, PModel(), false, false},
    {"when the text is re-flowed.                       ", PCONT, PModel(), false, false},
    {"  Let's show this by describing the function fact-", PSTART, PModel(kLeft, 0, 20, 0, 0),
     false, false},
    {"orial.  Factorial is a simple recursive function  ", PCONT, PModel(), false, false},
    {"which grows very quickly.  So quickly, in fact,   ", PCONT, PModel(), false, false},
    {"that the typical C implementation will only work  ", PCONT, PModel(), false, false},
    {"for values less than about 12:                    ", PCONT, PModel(), false, false},
    {"                                                  ", PNONE, PModel(), false, false},
    {"  # Naive implementation in C                     ", PCONT, PModel(), false, false},
    {"  int factorial(int n) {                          ", PCONT, PModel(), false, false},
    {"    if (n < 2)                                    ", PCONT, PModel(), false, false},
    {"      return 1;                                   ", PCONT, PModel(), false, false},
    {"    return  n * factorial(n - 1);                 ", PCONT, PModel(), false, false},
    {"  }                                               ", PCONT, PModel(), false, false},
    {"                                                  ", PCONT, PModel(), false, false},
    {"  The C programming language does not have built- ", PSTART, PModel(kLeft, 0, 20, 0, 0),
     false, false},
    {"in support for detecting integer overflow, so this", PCONT, PModel(), false, false},
    {"naive implementation simply returns random values ", PCONT, PModel(), false, false},
    {"if even a moderate sized n is provided.           ", PCONT, PModel(), false, false},
}

Definition at line 572 of file paragraphs_test.cc.

◆ kThickLengthMultiple

const double tesseract::kThickLengthMultiple = 0.75

Definition at line 54 of file linefind.cpp.

◆ kThinLineFraction

const int tesseract::kThinLineFraction = 20

Denominator of resolution makes max pixel width to allow thin lines.

Definition at line 39 of file linefind.cpp.

◆ kTinyEnoughTextlineOverlapFraction

const double tesseract::kTinyEnoughTextlineOverlapFraction = 0.25

Definition at line 49 of file colpartitiongrid.cpp.

◆ kTinyParagraphs

const TextAndModel tesseract::kTinyParagraphs[]

Initial value:

= {
    {"  Occasionally, interspersed with", PSTART, PModel(kLeft, 0, 20, 0, 0), false, false},
    {"obvious paragraph text, you might", PCONT, PModel(), false, false},
    {"find short exchanges of dialogue ", PCONT, PModel(), false, false},
    {"between characters.              ", PCONT, PModel(), false, false},
    {"  'Oh?'                          ", PSTART, PModel(kLeft, 0, 20, 0, 0), false, false},
    {"  'Don't be confused!'           ", PSTART, PModel(kLeft, 0, 20, 0, 0), false, false},
    {"  'Not me!'                      ", PSTART, PModel(kLeft, 0, 20, 0, 0), false, false},
    {"  One naive approach would be to ", PSTART, PModel(kLeft, 0, 20, 0, 0), false, false},
    {"mark a new paragraph whenever one", PCONT, PModel(), false, false},
    {"of the statistics (left, right or", PCONT, PModel(), false, false},
    {"center)  changes  from  one text-", PCONT, PModel(), false, false},
    {"line  to  the  next.    Such   an", PCONT, PModel(), false, false},
    {"approach  would  misclassify  the", PCONT, PModel(), false, false},
    {"tiny paragraphs above as a single", PCONT, PModel(), false, false},
    {"paragraph.                       ", PCONT, PModel(), false, false},
}

Definition at line 346 of file paragraphs_test.cc.

◆ kTrainerIterations

const int tesseract::kTrainerIterations = 600

Definition at line 34 of file lstm_test.h.

◆ kTruthTextLine

const char* tesseract::kTruthTextLine = "Tosimpleburnrunningofgoodslately.\n"

Definition at line 24 of file applybox_test.cc.

◆ kTruthTextWords

const char* tesseract::kTruthTextWords = "To simple burn running of goods lately.\n"

Definition at line 23 of file applybox_test.cc.

◆ kTwoSimpleParagraphs

const TextAndModel tesseract::kTwoSimpleParagraphs[]

Initial value:

= {
    {"  Look here, I have a paragraph.", PSTART, PModel(kLeft, 0, 20, 0, 0), false, false},
    {"This paragraph starts at the top", PCONT, PModel(), false, false},
    {"of the page and takes 3 lines.  ", PCONT, PModel(), false, false},
    {"  Here I have a second paragraph", PSTART, PModel(kLeft, 0, 20, 0, 0), false, false},
    {"which indicates that the first  ", PCONT, PModel(), false, false},
    {"paragraph is not a continuation ", PCONT, PModel(), false, false},
    {"from a previous page, as it is  ", PCONT, PModel(), false, false},
    {"indented just like this second  ", PCONT, PModel(), false, false},
    {"paragraph.                      ", PCONT, PModel(), false, false},
}

Definition at line 231 of file paragraphs_test.cc.

◆ kUnclearDensityTh

const float tesseract::kUnclearDensityTh = 0.25

Definition at line 81 of file equationdetect.cpp.

◆ kUniChs

const int tesseract::kUniChs[] = {0x20ac, 0x201c, 0x201d, 0x2018, 0x2019, 0x2022, 0x2014, 0}

Conversion table for non-latin characters. Maps characters out of the latin set into the latin set. TODO(rays) incorporate this translation into unicharset.

Definition at line 1592 of file baseapi.cpp.

◆ kUniversalAmbigsFile

const char tesseract::kUniversalAmbigsFile[]

inline

Definition at line 27 of file universalambigs.h.

◆ kUnknown

const ParagraphJustification tesseract::kUnknown = JUSTIFICATION_UNKNOWN

Definition at line 29 of file paragraphs_test.cc.

◆ kUNLVReject

const char tesseract::kUNLVReject = '~'

Character used by UNLV error counter as a reject.

Definition at line 111 of file baseapi.cpp.

◆ kUnlvRep3AO

const TextAndModel tesseract::kUnlvRep3AO[]

Definition at line 474 of file paragraphs_test.cc.

◆ kUNLVSuspect

const char tesseract::kUNLVSuspect = '^'

Character used by UNLV as a suspect marker.

Definition at line 113 of file baseapi.cpp.

◆ kVerticalSpacing

const double tesseract::kVerticalSpacing = -0.2

Definition at line 37 of file tablerecog.cpp.

◆ kVi2nds

const char* tesseract::kVi2nds[] = {"V", "a", "v", "", "l", "o", "", nullptr}

Definition at line 55 of file recodebeam_test.cc.

◆ kVi2ndScores

const float tesseract::kVi2ndScores[] = {0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01}

Definition at line 56 of file recodebeam_test.cc.

◆ kViTops

const char* tesseract::kViTops[] = {"v", "ậ", "y", " ", "t", "ộ", "i", nullptr}

Definition at line 53 of file recodebeam_test.cc.

◆ kViTopScores

const float tesseract::kViTopScores[] = {0.98, 0.98, 0.98, 0.98, 0.98, 0.98, 0.97}

Definition at line 54 of file recodebeam_test.cc.

◆ kVLineAlignment

const int tesseract::kVLineAlignment = 3

Definition at line 54 of file alignedblob.cpp.

◆ kVLineGutter

const int tesseract::kVLineGutter = 1

Definition at line 56 of file alignedblob.cpp.

◆ kVLineMinLength

const int tesseract::kVLineMinLength = 300

Definition at line 64 of file alignedblob.cpp.

◆ kVLineSearchSize

const int tesseract::kVLineSearchSize = 150

Definition at line 58 of file alignedblob.cpp.

◆ kWidthErrorWeighting

const double tesseract::kWidthErrorWeighting = 0.125

Weight of width variance against height and vertical position.

Definition at line 76 of file normmatch.cpp.

◆ kWordrecMaxNumJoinChunks

const int tesseract::kWordrecMaxNumJoinChunks = 4

Definition at line 55 of file pageres.cpp.

◆ kWorstDictCertainty

const float tesseract::kWorstDictCertainty = -25.0f

Definition at line 35 of file linerec.cpp.

◆ kXWinFrameSize

const int tesseract::kXWinFrameSize = 30

Definition at line 54 of file network.cpp.

◆ kYWinFrameSize

const int tesseract::kYWinFrameSize = 80

Definition at line 55 of file network.cpp.

◆ kZH2nds

const char* tesseract::kZH2nds[] = {"学", "储", "投", "生", "学", "生", "实", nullptr}

Definition at line 50 of file recodebeam_test.cc.

◆ kZH2ndScores

const float tesseract::kZH2ndScores[] = {0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01}

Definition at line 51 of file recodebeam_test.cc.

◆ kZHTops

const char* tesseract::kZHTops[] = {"实", "学", "储", "啬", "投", "学", "生", nullptr}

Definition at line 48 of file recodebeam_test.cc.

◆ kZHTopScores

const float tesseract::kZHTopScores[] = {0.98, 0.98, 0.98, 0.98, 0.98, 0.98, 0.98}

Definition at line 49 of file recodebeam_test.cc.

◆ log_level

int tesseract::log_level = INT_MAX

"Logging level"

Definition at line 36 of file tprintf.cpp.

◆ LogisticTable

const TFloat tesseract::LogisticTable

Definition at line 4102 of file functions.cpp.

◆ MF_SCALE_FACTOR

const float tesseract::MF_SCALE_FACTOR = 0.5f / kBlnXHeight

Definition at line 61 of file mfoutline.h.

◆ MicroFeatureDesc

const FEATURE_DESC_STRUCT tesseract::MicroFeatureDesc

extern

Global Data Definitions and Declarations

◆ of

EndParamDesc tesseract::of

Definition at line 78 of file featdefs.cpp.

◆ OutlineFeatDesc

const FEATURE_DESC_STRUCT tesseract::OutlineFeatDesc

extern

◆ par1

const int tesseract::par1 = 4500 / (approx_dist * approx_dist)

Definition at line 43 of file polyaprx.cpp.

◆ par2

const int tesseract::par2 = 6750 / (approx_dist * approx_dist)

Definition at line 44 of file polyaprx.cpp.

◆ PicoFeatDesc

TESS_API const FEATURE_DESC_STRUCT tesseract::PicoFeatDesc

extern

◆ PicoFeatureLength

TESS_API float tesseract::PicoFeatureLength

extern

Global Data Definitions and Declarations

◆ pitsync_joined_edge

double tesseract::pitsync_joined_edge = 0.75

"Dist inside big blob for chopping"

Definition at line 27 of file pitsync1.cpp.

◆ pitsync_linear_version

int tesseract::pitsync_linear_version = 6

"Use new fast algorithm"

Definition at line 26 of file pitsync1.cpp.

◆ pitsync_offset_freecut_fraction

double tesseract::pitsync_offset_freecut_fraction = 0.25

"Fraction of cut for free cuts"

Definition at line 28 of file pitsync1.cpp.

◆ RTLReversePolicyNames

const char* const tesseract::RTLReversePolicyNames[] = {kDoNotReverse, kReverseIfHasRTL, kForceReverse}

Definition at line 36 of file trie.cpp.

◆ TanhTable

const TFloat tesseract::TanhTable

Definition at line 4 of file functions.cpp.

◆ test_data

int tesseract::test_data = {8, 1, 2, -4, 7, 9, 65536, 4, 9, 0}

Definition at line 23 of file heap_test.cc.

◆ textord_ascheight_mode_fraction

double tesseract::textord_ascheight_mode_fraction = 0.08

"Min pile height to make ascheight"

Definition at line 87 of file makerow.cpp.

◆ textord_ascx_ratio_max

double tesseract::textord_ascx_ratio_max = 1.8

"Max cap/xheight"

Definition at line 90 of file makerow.cpp.

◆ textord_ascx_ratio_min

double tesseract::textord_ascx_ratio_min = 1.25

"Min cap/xheight"

Definition at line 89 of file makerow.cpp.

◆ textord_balance_factor

double tesseract::textord_balance_factor = 1.0

"Ding rate for unbalanced char cells"

Definition at line 50 of file topitch.cpp.

◆ textord_blockndoc_fixed

bool tesseract::textord_blockndoc_fixed = false

"Attempt whole doc/block fixed pitch"

Definition at line 48 of file topitch.cpp.

◆ textord_blocksall_fixed

bool tesseract::textord_blocksall_fixed = false

"Moan about prop blocks"

Definition at line 26 of file tovars.cpp.

◆ textord_blocksall_prop

bool tesseract::textord_blocksall_prop = false

"Moan about fixed pitch blocks"

Definition at line 27 of file tovars.cpp.

◆ textord_chop_width

double tesseract::textord_chop_width = 1.5

"Max width before chopping"

Definition at line 76 of file makerow.cpp.

◆ textord_chopper_test

bool tesseract::textord_chopper_test = false

"Chopper is being tested."

Definition at line 42 of file wordseg.cpp.

◆ textord_debug_blob

bool tesseract::textord_debug_blob = false

"Print test blob information"

Definition at line 96 of file makerow.cpp.

◆ textord_debug_block

int tesseract::textord_debug_block = 0

"Block to do debug on"

Definition at line 29 of file tovars.cpp.

◆ textord_debug_bugs

int tesseract::textord_debug_bugs = 0

"Turn on output related to bugs in tab finding"

Definition at line 30 of file alignedblob.cpp.

◆ textord_debug_pitch_metric

bool tesseract::textord_debug_pitch_metric = false

"Write full metric stuff"

Definition at line 45 of file topitch.cpp.

◆ textord_debug_pitch_test

bool tesseract::textord_debug_pitch_test = false

"Debug on fixed pitch test"

Definition at line 42 of file topitch.cpp.

◆ textord_debug_printable

bool tesseract::textord_debug_printable = false

"Make debug windows printable"

Definition at line 43 of file alignedblob.cpp.

◆ textord_debug_tabfind

int tesseract::textord_debug_tabfind = 0

"Debug tab finding"

Definition at line 29 of file alignedblob.cpp.

◆ textord_debug_xheights

bool tesseract::textord_debug_xheights = false

"Test xheight algorithms"

Definition at line 59 of file makerow.cpp.

◆ textord_descx_ratio_max

double tesseract::textord_descx_ratio_max = 0.6

"Max desc/xheight"

Definition at line 92 of file makerow.cpp.

◆ textord_descx_ratio_min

double tesseract::textord_descx_ratio_min = 0.25

"Min desc/xheight"

Definition at line 91 of file makerow.cpp.

◆ textord_dotmatrix_gap

int tesseract::textord_dotmatrix_gap = 3

"Max pixel gap for broken pixed pitch"

Definition at line 28 of file tovars.cpp.

◆ textord_excess_blobsize

double tesseract::textord_excess_blobsize = 1.3

"New row made if blob makes row this big"

Definition at line 81 of file makerow.cpp.

◆ textord_fast_pitch_test

bool tesseract::textord_fast_pitch_test = false

"Do even faster pitch algorithm"

Definition at line 44 of file topitch.cpp.

◆ textord_fix_makerow_bug

bool tesseract::textord_fix_makerow_bug = true

"Prevent multiple baselines"

Definition at line 58 of file makerow.cpp.

◆ textord_fix_xheight_bug

bool tesseract::textord_fix_xheight_bug = true

"Use spline baseline"

Definition at line 57 of file makerow.cpp.

◆ textord_force_make_prop_words

bool tesseract::textord_force_make_prop_words = false

"Force proportional word segmentation on all rows"

Definition at line 41 of file wordseg.cpp.

◆ textord_fp_chop_error

int tesseract::textord_fp_chop_error = 2

"Max allowed bending of chop cells"

Definition at line 34 of file fpchop.cpp.

◆ textord_fpiqr_ratio

double tesseract::textord_fpiqr_ratio = 1.5

"Pitch IQR/Gap IQR threshold"

Definition at line 53 of file tovars.cpp.

◆ textord_heavy_nr

bool tesseract::textord_heavy_nr = false

"Vigorously remove noise"

Definition at line 46 of file makerow.cpp.

◆ textord_linespace_iqrlimit

double tesseract::textord_linespace_iqrlimit = 0.2

"Max iqr/median for linespace"

Definition at line 74 of file makerow.cpp.

◆ textord_lms_line_trials

int tesseract::textord_lms_line_trials = 12

"Number of linew fits to do"

Definition at line 94 of file makerow.cpp.

◆ textord_max_pitch_iqr

double tesseract::textord_max_pitch_iqr = 0.20

"Xh fraction noise in pitch"

Definition at line 54 of file tovars.cpp.

◆ textord_min_blob_height_fraction

double tesseract::textord_min_blob_height_fraction = 0.75

"Min blob height/top to include blob top into xheight stats"

Definition at line 85 of file makerow.cpp.

◆ textord_min_blobs_in_row

int tesseract::textord_min_blobs_in_row = 4

"Min blobs before gradient counted"

Definition at line 66 of file makerow.cpp.

◆ textord_min_linesize

double tesseract::textord_min_linesize = 1.25

"* blob height for initial linesize"

Definition at line 80 of file makerow.cpp.

◆ textord_min_xheight

int tesseract::textord_min_xheight = 10

"Min credible pixel xheight"

Definition at line 70 of file makerow.cpp.

◆ textord_minxh

double tesseract::textord_minxh = 0.25

"fraction of linesize for min xheight"

Definition at line 79 of file makerow.cpp.

◆ textord_new_initial_xheight

bool tesseract::textord_new_initial_xheight = true

"Use test xheight mechanism"

Definition at line 95 of file makerow.cpp.

◆ textord_occupancy_threshold

double tesseract::textord_occupancy_threshold = 0.4

"Fraction of neighbourhood"

Definition at line 82 of file makerow.cpp.

◆ textord_old_baselines

bool tesseract::textord_old_baselines = true

"Use old baseline algorithm"

Definition at line 55 of file makerow.cpp.

◆ textord_old_xheight

bool tesseract::textord_old_xheight = false

"Use old xheight algorithm"

Definition at line 56 of file makerow.cpp.

◆ textord_oldbl_debug

bool tesseract::textord_oldbl_debug = false

"Debug old baseline generation"

Definition at line 43 of file oldbasel.cpp.

◆ textord_parallel_baselines

bool tesseract::textord_parallel_baselines = true

"Force parallel baselines"

Definition at line 53 of file makerow.cpp.

◆ textord_pitch_range

int tesseract::textord_pitch_range = 2

"Max range test on pitch"

Definition at line 30 of file tovars.cpp.

◆ textord_pitch_rowsimilarity

double tesseract::textord_pitch_rowsimilarity = 0.08

"Fraction of xheight for sameness"

Definition at line 44 of file tovars.cpp.

◆ textord_pitch_scalebigwords

bool tesseract::textord_pitch_scalebigwords = false

"Scale scores on big words"

Definition at line 45 of file tovars.cpp.

◆ textord_projection_scale

double tesseract::textord_projection_scale = 0.200

"Ding rate for mid-cuts"

Definition at line 49 of file topitch.cpp.

◆ textord_restore_underlines

bool tesseract::textord_restore_underlines = true

"Chop underlines & put back"

Definition at line 24 of file underlin.cpp.

◆ textord_show_expanded_rows

bool tesseract::textord_show_expanded_rows = false

"Display rows after expanding"

Definition at line 49 of file makerow.cpp.

◆ textord_show_final_blobs

bool tesseract::textord_show_final_blobs = false

"Display blob bounds after pre-ass"

Definition at line 51 of file makerow.cpp.

◆ textord_show_final_rows

bool tesseract::textord_show_final_rows = false

"Display rows after final fitting"

Definition at line 50 of file makerow.cpp.

◆ textord_show_fixed_cuts

bool tesseract::textord_show_fixed_cuts = false

"Draw fixed pitch cell boundaries"

Definition at line 35 of file drawtord.cpp.

◆ textord_show_initial_rows

bool tesseract::textord_show_initial_rows = false

"Display row accumulation"

Definition at line 47 of file makerow.cpp.

◆ textord_show_initial_words

bool tesseract::textord_show_initial_words = false

"Display separate words"

Definition at line 25 of file tovars.cpp.

◆ textord_show_page_cuts

bool tesseract::textord_show_page_cuts = false

"Draw page-level cuts"

Definition at line 47 of file topitch.cpp.

◆ textord_show_parallel_rows

bool tesseract::textord_show_parallel_rows = false

"Display page correlated rows"

Definition at line 48 of file makerow.cpp.

◆ textord_show_row_cuts

bool tesseract::textord_show_row_cuts = false

"Draw row-level cuts"

Definition at line 46 of file topitch.cpp.

◆ textord_skew_ile

double tesseract::textord_skew_ile = 0.5

"Ile of gradients for page skew"

Definition at line 72 of file makerow.cpp.

◆ textord_skew_lag

double tesseract::textord_skew_lag = 0.02

"Lag for skew on row accumulation"

Definition at line 73 of file makerow.cpp.

◆ textord_spacesize_ratioprop

double tesseract::textord_spacesize_ratioprop = 2.0

"Min ratio space/nonspace"

Definition at line 52 of file tovars.cpp.

◆ textord_spline_medianwin

int tesseract::textord_spline_medianwin = 6

"Size of window for spline segmentation"

Definition at line 68 of file makerow.cpp.

◆ textord_spline_minblobs

int tesseract::textord_spline_minblobs = 8

"Min blobs in each spline segment"

Definition at line 67 of file makerow.cpp.

◆ textord_spline_shift_fraction

double tesseract::textord_spline_shift_fraction = 0.02

"Fraction of line spacing for quad"

Definition at line 71 of file makerow.cpp.

◆ textord_straight_baselines

bool tesseract::textord_straight_baselines = false

"Force straight baselines"

Definition at line 54 of file makerow.cpp.

◆ textord_tabvector_vertical_box_ratio

double tesseract::textord_tabvector_vertical_box_ratio = 0.5

"Fraction of box matches required to declare a line vertical"

Definition at line 60 of file tabvector.cpp.

◆ textord_tabvector_vertical_gap_fraction

double tesseract::textord_tabvector_vertical_gap_fraction = 0.5

"max fraction of mean blob width allowed for vertical gaps in " "vertical text"

Definition at line 57 of file tabvector.cpp.

◆ textord_test_landscape

bool tesseract::textord_test_landscape = false

"Tests refer to land/port"

Definition at line 52 of file makerow.cpp.

◆ textord_test_x

int tesseract::textord_test_x = -INT32_MAX

"coord of test pt"

Definition at line 64 of file makerow.cpp.

◆ textord_test_y

int tesseract::textord_test_y = -INT32_MAX

"coord of test pt"

Definition at line 65 of file makerow.cpp.

◆ textord_underline_offset

double tesseract::textord_underline_offset = 0.1

"Fraction of x to ignore"

Definition at line 23 of file underlin.cpp.

◆ textord_underline_threshold

double tesseract::textord_underline_threshold = 0.5

"Fraction of width occupied"

Definition at line 32 of file blkocc.cpp.

◆ textord_underline_width

double tesseract::textord_underline_width = 2.0

"Multiple of line_size for underline"

Definition at line 83 of file makerow.cpp.

◆ textord_width_limit

double tesseract::textord_width_limit = 8

"Max width of blobs to make rows"

Definition at line 75 of file makerow.cpp.

◆ textord_words_def_fixed

double tesseract::textord_words_def_fixed = 0.016

"Threshold for definite fixed"

Definition at line 41 of file tovars.cpp.

◆ textord_words_def_prop

double tesseract::textord_words_def_prop = 0.090

"Threshold for definite prop"

Definition at line 42 of file tovars.cpp.

◆ textord_words_default_maxspace

double tesseract::textord_words_default_maxspace = 3.5

"Max believable third space"

Definition at line 33 of file tovars.cpp.

◆ textord_words_default_minspace

double tesseract::textord_words_default_minspace = 0.6

"Fraction of xheight"

Definition at line 34 of file tovars.cpp.

◆ textord_words_default_nonspace

double tesseract::textord_words_default_nonspace = 0.2

"Fraction of xheight"

Definition at line 36 of file tovars.cpp.

◆ textord_words_definite_spread

double tesseract::textord_words_definite_spread = 0.30

"Non-fuzzy spacing region"

Definition at line 51 of file tovars.cpp.

◆ textord_words_initial_lower

double tesseract::textord_words_initial_lower = 0.25

"Max initial cluster size"

Definition at line 37 of file tovars.cpp.

◆ textord_words_initial_upper

double tesseract::textord_words_initial_upper = 0.15

"Min initial cluster spacing"

Definition at line 38 of file tovars.cpp.

◆ textord_words_maxspace

double tesseract::textord_words_maxspace = 4.0

"Multiple of xheight"

Definition at line 32 of file tovars.cpp.

◆ textord_words_min_minspace

double tesseract::textord_words_min_minspace = 0.3

"Fraction of xheight"

Definition at line 35 of file tovars.cpp.

◆ textord_words_minlarge

double tesseract::textord_words_minlarge = 0.75

"Fraction of valid gaps needed"

Definition at line 39 of file tovars.cpp.

◆ textord_words_pitchsd_threshold

double tesseract::textord_words_pitchsd_threshold = 0.040

"Pitch sync threshold"

Definition at line 40 of file tovars.cpp.

◆ textord_words_veto_power

int tesseract::textord_words_veto_power = 5

"Rows required to outvote a veto"

Definition at line 43 of file tovars.cpp.

◆ textord_wordstats_smooth_factor

double tesseract::textord_wordstats_smooth_factor = 0.05

"Smoothing gap stats"

Definition at line 31 of file tovars.cpp.

◆ textord_xheight_error_margin

double tesseract::textord_xheight_error_margin = 0.1

"Accepted variation"

Definition at line 93 of file makerow.cpp.

◆ textord_xheight_mode_fraction

double tesseract::textord_xheight_mode_fraction = 0.4

"Min pile height to make xheight"

Definition at line 86 of file makerow.cpp.

◆ to_debug

FILE* tesseract::to_debug

extern

◆ to_win

ScrollView * tesseract::to_win = nullptr

Definition at line 37 of file drawtord.cpp.

◆ wordrec_blob_pause

bool tesseract::wordrec_blob_pause = 0

"Blob pause"

Definition at line 43 of file render.cpp.

◆ wordrec_display_all_blobs

bool tesseract::wordrec_display_all_blobs = 0

"Display Blobs"

Definition at line 41 of file render.cpp.

◆ wordrec_display_splits

bool tesseract::wordrec_display_splits = 0

"Display splits"

Definition at line 41 of file split.cpp.

◆ words_default_fixed_limit

double tesseract::words_default_fixed_limit = 0.6

"Allowed size variance"

Definition at line 50 of file tovars.cpp.

◆ words_default_fixed_space

double tesseract::words_default_fixed_space = 0.75

"Fraction of xheight"

Definition at line 49 of file tovars.cpp.

◆ words_default_prop_nonspace

double tesseract::words_default_prop_nonspace = 0.25

"Fraction of xheight"

Definition at line 48 of file tovars.cpp.

◆ words_initial_lower

double tesseract::words_initial_lower = 0.5

"Max initial cluster size"

Definition at line 46 of file tovars.cpp.

◆ words_initial_upper

double tesseract::words_initial_upper = 0.15

"Min initial cluster spacing"

Definition at line 47 of file tovars.cpp.

Proto	floating-pt proto to add to class pruner
ClassId	class id corresponding to Proto
Templates	set of templates containing class pruner

Start	starting point of pico-feature
End	ending point of pico-feature
FeatureSet	set to add pico-feature to

Feature	pico-feature to be displayed
Evidence	best evidence for this feature (0-1)

Class	class to take proto from
ProtoId	id of proto in Class to be displayed
Evidence	total evidence for proto (0-1)

argc	number of command line arguments to parse
argv	command line arguments

Classes

Typedefs

Enumerations

Functions

Variables

NormEvidenceOf

compute_page_skew

Detailed Description

Include Files and Type Defines

Typedef Documentation

◆ BLOB_CHOICE_LIST_VECTOR

◆ BLOB_WIDTH

◆ BlobGridSearch

◆ CANCEL_FUNC

◆ char32

◆ CHAR_FEATURES

◆ CLASS_ID

◆ CLASS_TYPE

◆ CLASSES

◆ ClusterHeap

◆ ClusterPair

◆ ColPartitionGridSearch

◆ ColSegmentGrid

◆ ColSegmentGridSearch

◆ CONFIG_PRUNER

◆ DANGERR

◆ DawgVector

◆ DENSITYFUNC

◆ DictFunc

◆ DotProductFunction

◆ EDGE_ARRAY

◆ EDGE_INDEX

◆ EDGE_RECORD

◆ EDGE_REF

◆ EDGE_VECTOR

◆ FEATURE

◆ FEATURE_DEFS

◆ FEATURE_DESC

◆ FEATURE_ID

◆ FEATURE_SET

◆ FileReader

◆ FileWriter

◆ FontSet

◆ int_compare

◆ INT_FEATURE_ARRAY

◆ IntKDPair

◆ kdwalk_proc

◆ LABELEDLIST

◆ LanguageModelFlagsType

◆ LigHash

◆ LIST

◆ MatrixCoordPair

◆ MERGE_CLASS

◆ MFOUTLINE

◆ MicroFeature

◆ MICROFEATURES

◆ NODE_MAP

◆ NODE_REF

◆ NodeChildVector

◆ PainPointHeap

◆ ParamsTrainingHypothesisList

◆ PartSetVector

◆ PModel

◆ PointHeap

◆ PointPair

◆ PRIORITY

◆ ProbabilityInContextFunc

◆ PROGRESS_FUNC

◆ PROGRESS_FUNC2

◆ PROTO_ID

◆ PROTO_PRUNER

◆ RecodeHeap

◆ RecodePair

◆ RSCounts

◆ RSMap

◆ SAMPLE

◆ SeamDecPair

◆ SeamPair

◆ SeamPile

◆ SeamQueue