tesseract v5.3.3.20231005
tesseract Namespace Reference

Classes

struct  ADAPT_CLASS_STRUCT
 
struct  ADAPT_RESULTS
 
class  ADAPT_TEMPLATES_STRUCT
 
union  ADAPTED_CONFIG
 
class  AlignedBlob
 
struct  AlignedBlobParams
 
class  AmbigSpec
 
class  ApplyBoxTest
 
struct  AssociateStats
 
class  AssociateUtils
 
class  BAND
 
class  BandTriMatrix
 
class  BaseapiThreadTest
 
class  BaselineBlock
 
class  BaselineDetect
 
class  BaselineRow
 
class  BBGrid
 
struct  BestChoiceBundle
 Bundle together all the things pertaining to the best choice/state. More...
 
class  BitVector
 
class  BitVectorTest
 
struct  BlamerBundle
 
class  BlnEventHandler
 
class  BLOB_CHOICE
 
struct  BlobData
 
class  BlobGrid
 
class  BLOBNBOX
 
class  BLOCK
 
class  BLOCK_LINE_IT
 rectangle iterator More...
 
class  BLOCK_RECT_IT
 
class  BLOCK_RES
 
struct  BlockGroup
 
class  BoolParam
 
class  BoxChar
 
struct  BoxCharPtrSort
 
class  BoxWord
 
struct  BUCKETS
 
class  C_BLOB
 
class  C_OUTLINE
 
class  C_OUTLINE_FRAG
 
class  CCNonTextDetect
 
class  CCStruct
 
class  CCUtil
 
struct  CHAR_DESC_STRUCT
 
class  CHAR_FRAGMENT
 
struct  CHAR_FRAGMENT_INFO
 
struct  CHISTRUCT
 
class  ChoiceIterator
 
struct  CLASS_PRUNER_STRUCT
 
struct  CLASS_STRUCT
 
class  ClassicMockProgressSink
 
class  Classify
 
class  ClassPruner
 
struct  ClipFFunc
 
struct  ClipFPrime
 
struct  ClipGFunc
 
struct  ClipGPrime
 
class  CLIST
 
class  CLIST_ITERATOR
 
class  CLIST_LINK
 
class  Clst
 
struct  CLUSTER
 
struct  Cluster
 
struct  CLUSTERCONFIG
 
struct  CLUSTERER
 
struct  ClusteringContext
 
class  ColPartition
 
class  ColPartitionGrid
 
class  ColPartitionSet
 
class  ColPartitionTest
 
class  ColSegment
 
class  ColumnFinder
 
class  CommandlineflagsTest
 
class  Convolve
 
struct  CP_RESULT_STRUCT
 
class  CRACKEDGE
 
struct  CrackPos
 
class  CTC
 
struct  DANGERR_INFO
 
class  Dawg
 
struct  DawgArgs
 
class  DawgCache
 
struct  DawgLoader
 
struct  DawgPosition
 
class  DawgPositionVector
 
class  DawgTest
 
class  DebugPixa
 
class  DENORM
 
class  DENORMTest
 
class  DetLineFit
 
class  Dict
 
class  DIR128
 
class  DocumentCache
 
class  DocumentData
 
class  DoubleParam
 
class  DoublePtr
 
class  DPPoint
 
struct  EANYCODE_CHAR
 
struct  EdgeOffset
 
struct  EDGEPT
 
class  ELIST
 
class  ELIST2
 
class  ELIST2_ITERATOR
 
class  ELIST2_LINK
 
class  ELIST_ITERATOR
 
class  ELIST_LINK
 
class  Elst
 
class  Elst2
 
class  EquationDetect
 
class  EquationDetectBase
 
class  EquationFinderTest
 
class  ERRCODE
 
class  ErrorCounter
 
class  ETEXT_DESC
 
class  EuroText
 
class  FCOORD
 
struct  FEATURE_DEFS_STRUCT
 
struct  FEATURE_DESC_STRUCT
 
struct  FEATURE_SET_STRUCT
 
struct  FEATURE_STRUCT
 
struct  FFunc
 
class  File
 
struct  FILL_SPEC
 
struct  FILL_SWITCH
 
union  FLOATUNION
 
struct  FontInfo
 
class  FontInfoTable
 
struct  FontSpacingInfo
 
class  FontUtils
 
class  FontUtilsTest
 
class  FPAnalyzer
 
class  FPChar
 
class  FPCUTPT
 
struct  FPrime
 
class  FPRow
 
class  FPSEGPT
 
class  FRAGMENT
 
class  FriendlyTessBaseAPI
 
class  FullyConnected
 
class  GAPMAP
 
class  GENERIC_2D_ARRAY
 
class  GenericHeap
 
class  GenericVector
 
struct  GeometricClassifierState
 
struct  GFunc
 
struct  GPrime
 
struct  greater_than
 
class  GridBase
 
class  GridSearch
 
class  HeapTest
 
struct  HFunc
 
struct  HPrime
 
class  ICOORD
 integer coordinate More...
 
class  ICOORDELT
 
class  IcuErrorCode
 
struct  IdentityFunc
 
class  Image
 
class  ImageData
 
class  ImagedataTest
 
class  ImageFind
 
class  ImageThresholder
 
class  IndexMap
 
class  IndexMapBiDi
 
class  IndexMapBiDiTest
 
class  Input
 
class  InputBuffer
 
struct  INT_CLASS_STRUCT
 
struct  INT_FEATURE_STRUCT
 
struct  INT_FX_RESULT_STRUCT
 
struct  INT_PROTO_STRUCT
 
struct  INT_TEMPLATES_STRUCT
 
class  IntegerMatcher
 
struct  Interval
 
class  IntFeatureDist
 
class  IntFeatureMap
 
class  IntFeatureMapTest
 
class  IntFeatureSpace
 
class  IntGrid
 
class  IntParam
 
struct  IntSimdMatrix
 
class  IntSimdMatrixTest
 
struct  KDNODE
 
struct  KDPair
 
struct  KDPairDec
 
struct  KDPairInc
 
class  KDPtrPair
 
struct  KDPtrPairDec
 
struct  KDPtrPairInc
 
struct  KDTREE
 
class  KDTreeSearch
 
class  KDVector
 
struct  LABELEDLISTNODE
 
class  LanguageModel
 
struct  LanguageModelDawgInfo
 
struct  LanguageModelNgramInfo
 
struct  LanguageModelState
 Struct to store information maintained by various language model components. More...
 
class  LayoutTest
 
class  LigatureTable
 
class  LigatureTableTest
 
class  LineFinder
 
struct  LineHypothesis
 
struct  list_rec
 
class  ListTest
 
class  LLSQ
 
class  LLSQTest
 
struct  LMConsistencyInfo
 
class  LMPainPoints
 
class  LoadLang
 
class  LoadLanguage
 
class  LoadScript
 
class  LocalCorrelation
 
class  LocalFilePointer
 
class  LSTM
 
class  LSTMRecognizer
 
class  LSTMTester
 
class  LSTMTrainer
 
class  LSTMTrainerTest
 
class  LTRResultIterator
 
class  MasterTrainer
 
class  MatchGroundTruth
 
class  MATRIX
 
struct  MATRIX_COORD
 
class  MatrixTest
 
class  Maxpool
 
struct  MERGE_CLASS_NODE
 
struct  MFEDGEPT
 
class  MinK
 
class  MutableIterator
 
class  Network
 
class  NetworkBuilder
 
class  NetworkIO
 
class  NetworkioTest
 
class  NetworkScratch
 
class  NewMockProgressSink
 
struct  NodeChild
 
struct  NORM_PROTOS
 
class  NthItemTest
 
class  ObjectCache
 
class  OL_BUCKETS
 
class  OrientationDetector
 
struct  OSBestResult
 
class  OSDTest
 
struct  OSResults
 
class  OutputBuffer
 
class  PAGE_RES
 
class  PAGE_RES_IT
 
class  PageIterator
 
class  PageSegModeTest
 
class  PangoFontInfo
 
class  PangoFontInfoTest
 
struct  PARA
 
class  ParagraphModel
 
class  ParagraphModelSmearer
 
class  ParagraphTheory
 
class  Parallel
 
class  Param
 
struct  PARAM_DESC
 
class  ParamContent
 
class  ParamsEditor
 
class  ParamsModel
 
class  ParamsModelTest
 
class  ParamsTrainingBundle
 
struct  ParamsTrainingHypothesis
 
struct  ParamsVectors
 
class  ParamUtils
 
class  PB_LINE_IT
 
class  PDBLK
 page block More...
 
struct  PERM_CONFIG_STRUCT
 
class  PGEventHandler
 
class  PixelHistogram
 
class  Plumbing
 
class  PointerVector
 
class  POLY_BLOCK
 
struct  PROTO_KEY
 
struct  PROTO_SET_STRUCT
 
struct  PROTO_STRUCT
 
struct  PROTOTYPE
 
class  QLSQ
 
class  QRSequenceGeneratorTest
 
class  QSPLINE
 
class  QUAD_COEFFS
 
class  QuickTest
 
class  RecodeBeamSearch
 
class  RecodeBeamTest
 
class  RecodedCharID
 
struct  RecodeNode
 
class  Reconfig
 
class  REGION_OCC
 
class  REJ
 
class  REJMAP
 
struct  Relu
 
struct  ReluPrime
 
class  ResultIterator
 
class  ResultIteratorTest
 
class  Reversed
 
class  ROW
 
class  ROW_RES
 
class  RowInfo
 
class  RowScratchRegisters
 
class  SampleIterator
 
struct  SAMPLELIST
 
class  ScanutilsTest
 
struct  ScoredFont
 
struct  ScratchEvidence
 
class  ScriptDetector
 
class  ScrollView
 
class  SEAM
 
class  SegSearchPending
 
class  Series
 
class  Shape
 
class  ShapeClassifier
 
struct  ShapeDist
 
struct  ShapeQueueEntry
 
struct  ShapeRating
 
class  ShapeTable
 
class  ShapeTableTest
 
class  ShapeTest
 
class  SharedTest
 
class  ShiroRekhaSplitter
 
class  SIMDDetect
 
class  SimpleClusterer
 
class  SimpleStats
 
class  SORTED_FLOAT
 
class  SORTED_FLOATS
 
class  SortHelper
 
struct  SpacingProperties
 
struct  SPLIT
 
class  SquishedDawg
 
class  StaticShape
 
struct  STATISTICS
 
class  STATS
 
class  STATSTest
 
class  StrideMap
 
class  StridemapTest
 
class  StringParam
 
class  StringRenderer
 
class  StringRendererTest
 
class  StrokeWidth
 
class  StructuredTable
 
class  StructuredTableTest
 
struct  SVEvent
 
class  SVEventHandler
 
class  SVMenuNode
 
class  SVNetwork
 
class  SVPaint
 
struct  SVPolyLineBuffer
 
class  SVSemaphore
 
class  SVSync
 The SVSync class provides functionality for Thread & Process Creation. More...
 
class  TabConstraint
 
class  TabEventHandler
 
class  TabFind
 
struct  TABLE_FILLER
 
class  TableFinder
 
class  TableFinderTest
 
class  TableRecognizer
 
class  TableRecognizerTest
 
class  TabVector
 
class  TabVectorTest
 
class  TatweelTest
 
struct  TBLOB
 
class  TBOX
 
class  TBOXTest
 
struct  TEMP_CONFIG_STRUCT
 
struct  TEMP_PROTO_STRUCT
 
struct  TEMPCLUSTER
 
class  TessAltoRenderer
 
class  TessBaseAPI
 
class  TessBoxTextRenderer
 
class  TessClassifier
 
class  TessdataManager
 
class  Tesseract
 
struct  TesseractStats
 
class  TesseractTest
 
class  TessHOcrRenderer
 
struct  TESSLINE
 
class  TessLSTMBoxRenderer
 
class  TessOsdRenderer
 
class  TessPDFRenderer
 
class  TessResultRenderer
 
class  TessTextRenderer
 
class  TessTsvRenderer
 
class  TessUnlvRenderer
 
class  TessWordStrBoxRenderer
 
class  TestableColPartition
 
class  TestableEquationDetect
 
class  TestableQRSequenceGenerator
 
class  TestableStructuredTable
 
class  TestableTableFinder
 
class  TestableTableRecognizer
 
class  TestableValidator
 
class  TestClass
 
struct  TextAndModel
 
class  TextlineProjection
 
class  TextlineProjectionTest
 
class  Textord
 
class  TFile
 
class  TfileTest
 
class  TFNetworkModel
 
class  TFNetworkModelDefaultTypeInternal
 
class  TO_BLOCK
 
class  TO_ROW
 
struct  TPOINT
 
class  TrainingSample
 
class  TrainingSampleSet
 
class  TRand
 
class  TransposedArray
 
class  Trie
 
struct  TRIE_NODE_RECORD
 
struct  TWERD
 
class  UNICHAR
 
class  UnicharAmbigs
 
struct  UnicharAndFonts
 
class  UnicharCompress
 
class  UnicharcompressTest
 
class  UnicharIdArrayUtils
 
class  UNICHARMAP
 
struct  UnicharRating
 
class  UNICHARSET
 
class  UnicharsetTest
 
class  UnicityTable
 
class  UnicodeSpanSkipper
 
struct  UnityFunc
 
class  ValidateGrapheme
 
class  ValidateIndic
 
class  ValidateJavanese
 
class  ValidateKhmer
 
class  ValidateMyanmar
 
class  Validator
 
struct  ViterbiStateEntry
 
class  WeightMatrix
 
class  WERD
 
class  WERD_CHOICE
 
class  WERD_RES
 
struct  WordData
 
class  Wordrec
 
class  WordWithBox
 
class  WorkingPartSet
 
class  X_CLIST
 
class  X_ITER
 
class  X_LIST
 

Typedefs

using FileReader = bool(*)(const char *filename, std::vector< char > *data)
 
using DictFunc = int(Dict::*)(void *, const UNICHARSET &, UNICHAR_ID, bool) const
 
using ProbabilityInContextFunc = double(Dict::*)(const char *, const char *, int, const char *, int)
 
using CANCEL_FUNC = bool(*)(void *, int)
 
using PROGRESS_FUNC = bool(*)(int, int, int, int, int)
 
using PROGRESS_FUNC2 = bool(*)(ETEXT_DESC *, int, int, int, int)
 
using UNICHAR_ID = int
 
using char32 = signed int
 
using DotProductFunction = TFloat(*)(const TFloat *, const TFloat *, int)
 
using SetOfModels = std::vector< const ParagraphModel * >
 
using WordRecognizer = void(Tesseract::*)(const WordData &, WERD_RES **, PointerVector< WERD_RES > *)
 
using VECTOR = TPOINT
 
using FontSet = std::vector< int >
 
using MatrixCoordPair = KDPairInc< float, MATRIX_COORD >
 
using ParamsTrainingHypothesisList = std::vector< ParamsTrainingHypothesis >
 
using BLOB_CHOICE_LIST_VECTOR = std::vector< BLOB_CHOICE_LIST * >
 
using PRIORITY = float
 
using UnicharIdVector = std::vector< UNICHAR_ID >
 
using UnicharAmbigsVector = std::vector< AmbigSpec_LIST * >
 
using IntKDPair = KDPairInc< int, int >
 
using FileWriter = bool(*)(const std::vector< char > &data, const char *filename)
 
using TDimension = int16_t
 
using TFloat = double
 
using RSMap = std::unordered_map< int, std::unique_ptr< std::vector< int > > >
 
using RSCounts = std::unordered_map< int, int >
 
using ClusterPair = tesseract::KDPairInc< float, TEMPCLUSTER * >
 
using ClusterHeap = tesseract::GenericHeap< ClusterPair >
 
using DENSITYFUNC = double(*)(int32_t)
 
using SOLVEFUNC = double(*)(CHISTRUCT *, double)
 
using SAMPLE = CLUSTER
 
using FEATURE_DEFS = FEATURE_DEFS_STRUCT *
 
typedef uint32_t PROTO_PRUNER[NUM_PP_PARAMS][NUM_PP_BUCKETS][WERDS_PER_PP_VECTOR]
 
typedef uint32_t CONFIG_PRUNER[NUM_PP_PARAMS][NUM_PP_BUCKETS][4]
 
typedef INT_FEATURE_STRUCT INT_FEATURE_ARRAY[MAX_NUM_INT_FEATURES]
 
using kdwalk_proc = void(*)(ClusteringContext *context, CLUSTER *Cluster, int32_t Level)
 
using MicroFeature = std::array< float,(int) MicroFeatureParameter::MFCount >
 
using MICROFEATURES = std::forward_list< MicroFeature >
 
using MFOUTLINE = LIST
 
using FEATURE_DESC = FEATURE_DESC_STRUCT *
 
using FEATURE = FEATURE_STRUCT *
 
using FEATURE_SET = FEATURE_SET_STRUCT *
 
using CHAR_FEATURES = char *
 
using CLASS_TYPE = CLASS_STRUCT *
 
using CLASSES = CLASS_STRUCT *
 
using ShapeQueue = GenericHeap< ShapeQueueEntry >
 
using int_compare = int(*)(void *, void *)
 
using void_dest = void(*)(void *)
 
using LIST = list_rec *
 
using EDGE_RECORD = uint64_t
 
using EDGE_ARRAY = EDGE_RECORD *
 
using EDGE_REF = int64_t
 
using NODE_REF = int64_t
 
using NODE_MAP = EDGE_REF *
 
using NodeChildVector = std::vector< NodeChild >
 
using SuccessorList = std::vector< int >
 
using SuccessorListsVector = std::vector< SuccessorList * >
 
using DawgVector = std::vector< Dawg * >
 
using CLASS_ID = UNICHAR_ID
 
using PROTO_ID = int16_t
 
using FEATURE_ID = uint8_t
 
using BLOB_WIDTH = uint8_t
 
using DANGERR = std::vector< DANGERR_INFO >
 
using EDGE_INDEX = int64_t
 
using EDGE_VECTOR = std::vector< EDGE_RECORD >
 
using TRIE_NODES = std::vector< TRIE_NODE_RECORD * >
 
using RecodePair = KDPairInc< double, RecodeNode >
 
using RecodeHeap = GenericHeap< RecodePair >
 
using BlobGridSearch = GridSearch< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT >
 
using ColPartitionGridSearch = GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT >
 
using PartSetVector = std::vector< ColPartitionSet * >
 
using WidthCallback = std::function< bool(int)>
 
using ColSegmentGrid = BBGrid< ColSegment, ColSegment_CLIST, ColSegment_C_IT >
 
using ColSegmentGridSearch = GridSearch< ColSegment, ColSegment_CLIST, ColSegment_C_IT >
 
using WordGrid = BBGrid< WordWithBox, WordWithBox_CLIST, WordWithBox_C_IT >
 
using WordSearch = GridSearch< WordWithBox, WordWithBox_CLIST, WordWithBox_C_IT >
 
using LABELEDLIST = LABELEDLISTNODE *
 
using MERGE_CLASS = MERGE_CLASS_NODE *
 
using LigHash = std::unordered_map< std::string, std::string >
 
using TestCallback = std::function< std::string(int, const double *, const TessdataManager &, int)>
 
using PointPair = KDPairInc< float, EDGEPT * >
 
using PointHeap = GenericHeap< PointPair >
 
using SeamPair = KDPtrPairInc< float, SEAM >
 
using SeamQueue = GenericHeap< SeamPair >
 
using SeamDecPair = KDPtrPairDec< float, SEAM >
 
using SeamPile = GenericHeap< SeamDecPair >
 
using PainPointHeap = GenericHeap< MatrixCoordPair >
 
using LanguageModelFlagsType = unsigned char
 Used for expressing various language model flags. More...
 
typedef ParagraphModel PModel
 

Enumerations

enum  PolyBlockType {
  PT_UNKNOWN , PT_FLOWING_TEXT , PT_HEADING_TEXT , PT_PULLOUT_TEXT ,
  PT_EQUATION , PT_INLINE_EQUATION , PT_TABLE , PT_VERTICAL_TEXT ,
  PT_CAPTION_TEXT , PT_FLOWING_IMAGE , PT_HEADING_IMAGE , PT_PULLOUT_IMAGE ,
  PT_HORZ_LINE , PT_VERT_LINE , PT_NOISE , PT_COUNT
}
 
enum  Orientation { ORIENTATION_PAGE_UP = 0 , ORIENTATION_PAGE_RIGHT = 1 , ORIENTATION_PAGE_DOWN = 2 , ORIENTATION_PAGE_LEFT = 3 }
 
enum  WritingDirection { WRITING_DIRECTION_LEFT_TO_RIGHT = 0 , WRITING_DIRECTION_RIGHT_TO_LEFT = 1 , WRITING_DIRECTION_TOP_TO_BOTTOM = 2 }
 
enum  TextlineOrder { TEXTLINE_ORDER_LEFT_TO_RIGHT = 0 , TEXTLINE_ORDER_RIGHT_TO_LEFT = 1 , TEXTLINE_ORDER_TOP_TO_BOTTOM = 2 }
 
enum  PageSegMode {
  PSM_OSD_ONLY = 0 , PSM_AUTO_OSD = 1 , PSM_AUTO_ONLY = 2 , PSM_AUTO = 3 ,
  PSM_SINGLE_COLUMN = 4 , PSM_SINGLE_BLOCK_VERT_TEXT = 5 , PSM_SINGLE_BLOCK = 6 , PSM_SINGLE_LINE = 7 ,
  PSM_SINGLE_WORD = 8 , PSM_CIRCLE_WORD = 9 , PSM_SINGLE_CHAR = 10 , PSM_SPARSE_TEXT ,
  PSM_SPARSE_TEXT_OSD = 12 , PSM_RAW_LINE = 13 , PSM_COUNT
}
 
enum  PageIteratorLevel {
  RIL_BLOCK , RIL_PARA , RIL_TEXTLINE , RIL_WORD ,
  RIL_SYMBOL
}
 
enum  ParagraphJustification { JUSTIFICATION_UNKNOWN , JUSTIFICATION_LEFT , JUSTIFICATION_CENTER , JUSTIFICATION_RIGHT }
 
enum  OcrEngineMode {
  OEM_TESSERACT_ONLY , OEM_LSTM_ONLY , OEM_TESSERACT_LSTM_COMBINED , OEM_DEFAULT ,
  OEM_COUNT
}
 
enum  StrongScriptDirection { DIR_NEUTRAL = 0 , DIR_LEFT_TO_RIGHT = 1 , DIR_RIGHT_TO_LEFT = 2 , DIR_MIX = 3 }
 
enum  GARBAGE_LEVEL { G_NEVER_CRUNCH , G_OK , G_DODGY , G_TERRIBLE }
 
enum  LineType { LT_START = 'S' , LT_BODY = 'C' , LT_UNKNOWN = 'U' , LT_MULTIPLE = 'M' }
 
enum  ParamType { VT_INTEGER , VT_BOOLEAN , VT_STRING , VT_DOUBLE }
 
enum  CMD_EVENTS {
  NULL_CMD_EVENT , CHANGE_DISP_CMD_EVENT , DUMP_WERD_CMD_EVENT , SHOW_POINT_CMD_EVENT ,
  SHOW_BLN_WERD_CMD_EVENT , DEBUG_WERD_CMD_EVENT , BLAMER_CMD_EVENT , BOUNDING_BOX_CMD_EVENT ,
  CORRECT_TEXT_CMD_EVENT , POLYGONAL_CMD_EVENT , BL_NORM_CMD_EVENT , BITMAP_CMD_EVENT ,
  IMAGE_CMD_EVENT , BLOCKS_CMD_EVENT , BASELINES_CMD_EVENT , UNIFORM_DISP_CMD_EVENT ,
  REFRESH_CMD_EVENT , QUIT_CMD_EVENT , RECOG_WERDS , RECOG_PSEUDO ,
  SHOW_BLOB_FEATURES , SHOW_SUBSCRIPT_CMD_EVENT , SHOW_SUPERSCRIPT_CMD_EVENT , SHOW_ITALIC_CMD_EVENT ,
  SHOW_BOLD_CMD_EVENT , SHOW_UNDERLINE_CMD_EVENT , SHOW_FIXEDPITCH_CMD_EVENT , SHOW_SERIF_CMD_EVENT ,
  SHOW_SMALLCAPS_CMD_EVENT , SHOW_DROPCAPS_CMD_EVENT , ACTION_1_CMD_EVENT , RECOG_WERDS ,
  RECOG_PSEUDO , ACTION_2_CMD_EVENT
}
 
enum  ColorationMode {
  CM_RAINBOW , CM_SUBSCRIPT , CM_SUPERSCRIPT , CM_ITALIC ,
  CM_BOLD , CM_UNDERLINE , CM_FIXEDPITCH , CM_SERIF ,
  CM_SMALLCAPS , CM_DROPCAPS
}
 
enum  CMD_EVENTS {
  NULL_CMD_EVENT , CHANGE_DISP_CMD_EVENT , DUMP_WERD_CMD_EVENT , SHOW_POINT_CMD_EVENT ,
  SHOW_BLN_WERD_CMD_EVENT , DEBUG_WERD_CMD_EVENT , BLAMER_CMD_EVENT , BOUNDING_BOX_CMD_EVENT ,
  CORRECT_TEXT_CMD_EVENT , POLYGONAL_CMD_EVENT , BL_NORM_CMD_EVENT , BITMAP_CMD_EVENT ,
  IMAGE_CMD_EVENT , BLOCKS_CMD_EVENT , BASELINES_CMD_EVENT , UNIFORM_DISP_CMD_EVENT ,
  REFRESH_CMD_EVENT , QUIT_CMD_EVENT , RECOG_WERDS , RECOG_PSEUDO ,
  SHOW_BLOB_FEATURES , SHOW_SUBSCRIPT_CMD_EVENT , SHOW_SUPERSCRIPT_CMD_EVENT , SHOW_ITALIC_CMD_EVENT ,
  SHOW_BOLD_CMD_EVENT , SHOW_UNDERLINE_CMD_EVENT , SHOW_FIXEDPITCH_CMD_EVENT , SHOW_SERIF_CMD_EVENT ,
  SHOW_SMALLCAPS_CMD_EVENT , SHOW_DROPCAPS_CMD_EVENT , ACTION_1_CMD_EVENT , RECOG_WERDS ,
  RECOG_PSEUDO , ACTION_2_CMD_EVENT
}
 
enum class  ThresholdMethod { Otsu , LeptonicaOtsu , Sauvola , Max }
 
enum  IncorrectResultReason {
  IRR_CORRECT , IRR_CLASSIFIER , IRR_CHOPPER , IRR_CLASS_LM_TRADEOFF ,
  IRR_PAGE_LAYOUT , IRR_SEGSEARCH_HEUR , IRR_SEGSEARCH_PP , IRR_CLASS_OLD_LM_TRADEOFF ,
  IRR_ADAPTION , IRR_NO_TRUTH_SPLIT , IRR_NO_TRUTH , IRR_UNKNOWN ,
  IRR_NUM_REASONS
}
 
enum  PITCH_TYPE {
  PITCH_DUNNO , PITCH_DEF_FIXED , PITCH_MAYBE_FIXED , PITCH_DEF_PROP ,
  PITCH_MAYBE_PROP , PITCH_CORR_FIXED , PITCH_CORR_PROP
}
 
enum  TabType {
  TT_NONE , TT_DELETED , TT_MAYBE_RAGGED , TT_MAYBE_ALIGNED ,
  TT_CONFIRMED , TT_VLINE
}
 
enum  BlobRegionType {
  BRT_NOISE , BRT_HLINE , BRT_VLINE , BRT_RECTIMAGE ,
  BRT_POLYIMAGE , BRT_UNKNOWN , BRT_VERT_TEXT , BRT_TEXT ,
  BRT_COUNT
}
 
enum  BlobNeighbourDir {
  BND_LEFT , BND_BELOW , BND_RIGHT , BND_ABOVE ,
  BND_COUNT
}
 
enum  BlobSpecialTextType {
  BSTT_NONE , BSTT_ITALIC , BSTT_DIGIT , BSTT_MATH ,
  BSTT_UNCLEAR , BSTT_SKIP , BSTT_COUNT
}
 
enum  BlobTextFlowType {
  BTFT_NONE , BTFT_NONTEXT , BTFT_NEIGHBOURS , BTFT_CHAIN ,
  BTFT_STRONG_CHAIN , BTFT_TEXT_ON_IMAGE , BTFT_LEADER , BTFT_COUNT
}
 
enum  C_OUTLINE_FLAGS { COUT_INVERSE }
 
enum  CachingStrategy { CS_SEQUENTIAL , CS_ROUND_ROBIN }
 
enum  NormalizationMode { NM_BASELINE = -3 , NM_CHAR_ISOTROPIC = -2 , NM_CHAR_ANISOTROPIC = -1 }
 
enum  CRUNCH_MODE { CR_NONE , CR_KEEP_SPACE , CR_LOOSE_SPACE , CR_DELETE }
 
enum  kParamsTrainingFeatureType {
  PTRAIN_DIGITS_SHORT , PTRAIN_DIGITS_MED , PTRAIN_DIGITS_LONG , PTRAIN_NUM_SHORT ,
  PTRAIN_NUM_MED , PTRAIN_NUM_LONG , PTRAIN_DOC_SHORT , PTRAIN_DOC_MED ,
  PTRAIN_DOC_LONG , PTRAIN_DICT_SHORT , PTRAIN_DICT_MED , PTRAIN_DICT_LONG ,
  PTRAIN_FREQ_SHORT , PTRAIN_FREQ_MED , PTRAIN_FREQ_LONG , PTRAIN_SHAPE_COST_PER_CHAR ,
  PTRAIN_NGRAM_COST_PER_CHAR , PTRAIN_NUM_BAD_PUNC , PTRAIN_NUM_BAD_CASE , PTRAIN_XHEIGHT_CONSISTENCY ,
  PTRAIN_NUM_BAD_CHAR_TYPE , PTRAIN_NUM_BAD_SPACING , PTRAIN_NUM_BAD_FONT , PTRAIN_RATING_PER_CHAR ,
  PTRAIN_NUM_FEATURE_TYPES
}
 
enum  BlobChoiceClassifier {
  BCC_STATIC_CLASSIFIER , BCC_ADAPTED_CLASSIFIER , BCC_SPECKLE_CLASSIFIER , BCC_AMBIG ,
  BCC_FAKE
}
 
enum  PermuterType {
  NO_PERM , PUNC_PERM , TOP_CHOICE_PERM , LOWER_CASE_PERM ,
  UPPER_CASE_PERM , NGRAM_PERM , NUMBER_PERM , USER_PATTERN_PERM ,
  SYSTEM_DAWG_PERM , DOC_DAWG_PERM , USER_DAWG_PERM , FREQ_DAWG_PERM ,
  COMPOUND_PERM , NUM_PERMUTER_TYPES
}
 
enum  ScriptPos { SP_NORMAL , SP_SUBSCRIPT , SP_SUPERSCRIPT , SP_DROPCAP }
 
enum  REJ_FLAGS {
  R_TESS_FAILURE , R_SMALL_XHT , R_EDGE_CHAR , R_1IL_CONFLICT ,
  R_POSTNN_1IL , R_REJ_CBLOB , R_MM_REJECT , R_BAD_REPETITION ,
  R_POOR_MATCH , R_NOT_TESS_ACCEPTED , R_CONTAINS_BLANKS , R_BAD_PERMUTER ,
  R_HYPHEN , R_DUBIOUS , R_NO_ALPHANUMS , R_MOSTLY_REJ ,
  R_XHT_FIXUP , R_BAD_QUALITY , R_DOC_REJ , R_BLOCK_REJ ,
  R_ROW_REJ , R_UNLV_REJ , R_NN_ACCEPT , R_HYPHEN_ACCEPT ,
  R_MM_ACCEPT , R_QUALITY_ACCEPT , R_MINIMAL_REJ_ACCEPT
}
 
enum  WERD_FLAGS {
  W_SEGMENTED , W_ITALIC , W_BOLD , W_BOL ,
  W_EOL , W_NORMALIZED , W_SCRIPT_HAS_XHEIGHT , W_SCRIPT_IS_LATIN ,
  W_DONT_CHOP , W_REP_CHAR , W_FUZZY_SP , W_FUZZY_NON ,
  W_INVERSE
}
 
enum  DISPLAY_FLAGS {
  DF_BOX , DF_TEXT , DF_POLYGONAL , DF_EDGE_STEP ,
  DF_BN_POLYGONAL , DF_BLAMER
}
 
enum  AmbigType {
  NOT_AMBIG , REPLACE_AMBIG , DEFINITE_AMBIG , SIMILAR_AMBIG ,
  CASE_AMBIG , AMBIG_TYPE_COUNT
}
 
enum  TessErrorLogCode { DBG = -1 , TESSLOG = 0 , TESSEXIT = 1 , ABORT = 2 }
 
enum  SetParamConstraint { SET_PARAM_CONSTRAINT_NONE , SET_PARAM_CONSTRAINT_DEBUG_ONLY , SET_PARAM_CONSTRAINT_NON_DEBUG_ONLY , SET_PARAM_CONSTRAINT_NON_INIT_ONLY }
 
enum  TessdataType {
  TESSDATA_LANG_CONFIG , TESSDATA_UNICHARSET , TESSDATA_AMBIGS , TESSDATA_INTTEMP ,
  TESSDATA_PFFMTABLE , TESSDATA_NORMPROTO , TESSDATA_PUNC_DAWG , TESSDATA_SYSTEM_DAWG ,
  TESSDATA_NUMBER_DAWG , TESSDATA_FREQ_DAWG , TESSDATA_FIXED_LENGTH_DAWGS , TESSDATA_CUBE_UNICHARSET ,
  TESSDATA_CUBE_SYSTEM_DAWG , TESSDATA_SHAPE_TABLE , TESSDATA_BIGRAM_DAWG , TESSDATA_UNAMBIG_DAWG ,
  TESSDATA_PARAMS_MODEL , TESSDATA_LSTM , TESSDATA_LSTM_PUNC_DAWG , TESSDATA_LSTM_SYSTEM_DAWG ,
  TESSDATA_LSTM_NUMBER_DAWG , TESSDATA_LSTM_UNICHARSET , TESSDATA_LSTM_RECODER , TESSDATA_VERSION ,
  TESSDATA_NUM_ENTRIES
}
 
enum  SpecialUnicharCodes { UNICHAR_SPACE , UNICHAR_JOINED , UNICHAR_BROKEN , SPECIAL_UNICHAR_CODES_COUNT }
 
enum class  OldUncleanUnichars { kFalse , kTrue }
 
enum  CharSegmentationType { CST_FRAGMENT , CST_WHOLE , CST_IMPROPER , CST_NGRAM }
 
enum  PROTOSTYLE { spherical , elliptical , mixed , automatic }
 
enum  DISTRIBUTION { normal , uniform , D_random , DISTRIBUTION_COUNT }
 
enum  SWITCH_TYPE { StartSwitch , EndSwitch , LastSwitch }
 
enum  IntmatcherDebugAction { IDA_ADAPTIVE , IDA_STATIC , IDA_SHAPE_INDEX , IDA_BOTH }
 
enum class  MicroFeatureParameter {
  MFXPosition , MFYPosition , MFLength , MFDirection ,
  MFBulge1 , MFBulge2 , MFCount
}
 
enum  DIRECTION : uint8_t {
  north , south , east , west ,
  northeast , northwest , southeast , southwest
}
 
enum  OUTLINETYPE { outer , hole }
 
enum  NORM_METHOD { baseline , character }
 
enum  NORM_PARAM_NAME { CharNormY , CharNormLength , CharNormRx , CharNormRy }
 
enum  OUTLINE_FEAT_PARAM_NAME { OutlineFeatX , OutlineFeatY , OutlineFeatLength , OutlineFeatDir }
 
enum  IntParams { IntX , IntY , IntDir }
 
enum  GeoParams { GeoBottom , GeoTop , GeoWidth , GeoCount }
 
enum  PICO_FEAT_PARAM_NAME { PicoFeatY , PicoFeatDir , PicoFeatX }
 
enum  DawgType {
  DAWG_TYPE_PUNCTUATION , DAWG_TYPE_WORD , DAWG_TYPE_NUMBER , DAWG_TYPE_PATTERN ,
  DAWG_TYPE_COUNT
}
 
enum  XHeightConsistencyEnum { XH_GOOD , XH_SUBNORMAL , XH_INCONSISTENT }
 
enum  TrainingFlags { TF_INT_MODE = 1 , TF_COMPRESS_UNICHARSET = 64 }
 
enum  NetworkType {
  NT_NONE , NT_INPUT , NT_CONVOLVE , NT_MAXPOOL ,
  NT_PARALLEL , NT_REPLICATED , NT_PAR_RL_LSTM , NT_PAR_UD_LSTM ,
  NT_PAR_2D_LSTM , NT_SERIES , NT_RECONFIG , NT_XREVERSED ,
  NT_YREVERSED , NT_XYTRANSPOSE , NT_LSTM , NT_LSTM_SUMMARY ,
  NT_LOGISTIC , NT_POSCLIP , NT_SYMCLIP , NT_TANH ,
  NT_RELU , NT_LINEAR , NT_SOFTMAX , NT_SOFTMAX_NO_CTC ,
  NT_LSTM_SOFTMAX , NT_LSTM_SOFTMAX_ENCODED , NT_TENSORFLOW , NT_COUNT
}
 
enum  NetworkFlags { NF_LAYER_SPECIFIC_LR = 64 , NF_ADAM = 128 }
 
enum  TrainingState { TS_DISABLED , TS_ENABLED , TS_TEMP_DISABLE , TS_RE_ENABLE }
 
enum  NodeContinuation { NC_ANYTHING , NC_ONLY_DUP , NC_NO_DUP , NC_COUNT }
 
enum  TopNState { TN_TOP2 , TN_TOPN , TN_ALSO_RAN , TN_COUNT }
 
enum  LossType { LT_NONE , LT_CTC , LT_SOFTMAX , LT_LOGISTIC }
 
enum  FlexDimensions { FD_BATCH , FD_HEIGHT , FD_WIDTH , FD_DIMSIZE }
 
enum  SpacingNeighbourhood {
  PN_ABOVE2 , PN_ABOVE1 , PN_UPPER , PN_LOWER ,
  PN_BELOW1 , PN_BELOW2 , PN_COUNT
}
 
enum  ColumnSpanningType {
  CST_NOISE , CST_FLOWING , CST_HEADING , CST_PULLOUT ,
  CST_COUNT
}
 
enum  NeighbourPartitionType {
  NPT_HTEXT , NPT_VTEXT , NPT_WEAK_HTEXT , NPT_WEAK_VTEXT ,
  NPT_IMAGE , NPT_COUNT
}
 
enum  OVERLAP_STATE { ASSIGN , REJECT , NEW_ROW }
 
enum  ROW_CATEGORY { ROW_ASCENDERS_FOUND , ROW_DESCENDERS_FOUND , ROW_UNKNOWN , ROW_INVALID }
 
enum  LeftOrRight { LR_LEFT , LR_RIGHT }
 
enum  PartitionFindResult { PFR_OK , PFR_SKEW , PFR_NOISE }
 
enum  ColSegType {
  COL_UNKNOWN , COL_TEXT , COL_TABLE , COL_MIXED ,
  COL_COUNT
}
 
enum  TabAlignment {
  TA_LEFT_ALIGNED , TA_LEFT_RAGGED , TA_CENTER_JUSTIFIED , TA_RIGHT_ALIGNED ,
  TA_RIGHT_RAGGED , TA_SEPARATOR , TA_COUNT
}
 
enum  CountTypes {
  CT_UNICHAR_TOP_OK , CT_UNICHAR_TOP1_ERR , CT_UNICHAR_TOP2_ERR , CT_UNICHAR_TOPN_ERR ,
  CT_UNICHAR_TOPTOP_ERR , CT_OK_MULTI_UNICHAR , CT_OK_JOINED , CT_OK_BROKEN ,
  CT_REJECT , CT_FONT_ATTR_ERR , CT_OK_MULTI_FONT , CT_NUM_RESULTS ,
  CT_RANK , CT_REJECTED_JUNK , CT_ACCEPTED_JUNK , CT_SIZE
}
 
enum  FactorNames {
  FN_INCOLOR , FN_Y0 , FN_Y1 , FN_Y2 ,
  FN_Y3 , FN_X0 , FN_X1 , FN_SHEAR ,
  FN_NUM_FACTORS
}
 
enum  ErrorTypes {
  ET_RMS , ET_DELTA , ET_WORD_RECERR , ET_CHAR_ERROR ,
  ET_SKIP_RATIO , ET_COUNT
}
 
enum  Trainability {
  TRAINABLE , PERFECT , UNENCODABLE , HI_PRECISION_ERR ,
  NOT_BOXED
}
 
enum  SerializeAmount { LIGHT , NO_BEST_TRAINER , FULL }
 
enum  SubTrainerResult { STR_NONE , STR_UPDATED , STR_REPLACED }
 
enum class  UnicodeNormMode { kNFD , kNFC , kNFKD , kNFKC }
 
enum class  OCRNorm { kNone , kNormalize }
 
enum class  GraphemeNorm { kNone , kNormalize }
 
enum class  GraphemeNormMode { kSingleString , kCombined , kGlyphSplit , kIndividualUnicodes }
 
enum class  ViramaScript : char32 {
  kNonVirama = 0 , kDevanagari = 0x900 , kBengali = 0x980 , kGurmukhi = 0xa00 ,
  kGujarati = 0xa80 , kOriya = 0xb00 , kTamil = 0xb80 , kTelugu = 0xc00 ,
  kKannada = 0xc80 , kMalayalam = 0xd00 , kSinhala = 0xd80 , kMyanmar = 0x1000 ,
  kKhmer = 0x1780 , kJavanese = 0xa980
}
 
enum  SVEventType {
  SVET_DESTROY , SVET_EXIT , SVET_CLICK , SVET_SELECTION ,
  SVET_INPUT , SVET_MOUSE , SVET_MOTION , SVET_HOVER ,
  SVET_POPUP , SVET_MENU , SVET_ANY , SVET_COUNT
}
 
enum  LMPainPointsType {
  LM_PPTYPE_BLAMER , LM_PPTYPE_AMBIG , LM_PPTYPE_PATH , LM_PPTYPE_SHAPE ,
  LM_PPTYPE_NUM
}
 
enum  TextModelInputType { PCONT = 0 , PSTART = 1 , PNONE = 2 }
 

Functions

std::string HOcrEscape (const char *text)
 
int orientation_and_script_detection (const char *filename, OSResults *, tesseract::Tesseract *)
 
int os_detect (TO_BLOCK_LIST *port_blocks, OSResults *osr, tesseract::Tesseract *tess)
 
int os_detect_blobs (const std::vector< int > *allowed_scripts, BLOBNBOX_CLIST *blob_list, OSResults *osr, tesseract::Tesseract *tess)
 
bool os_detect_blob (BLOBNBOX *bbox, OrientationDetector *o, ScriptDetector *s, OSResults *, tesseract::Tesseract *tess)
 
TESS_API int OrientationIdToValue (const int &id)
 
bool PTIsLineType (PolyBlockType type)
 
bool PTIsImageType (PolyBlockType type)
 
bool PTIsTextType (PolyBlockType type)
 
bool PTIsPulloutType (PolyBlockType type)
 
bool PSM_OSD_ENABLED (int pageseg_mode)
 
bool PSM_ORIENTATION_ENABLED (int pageseg_mode)
 
bool PSM_COL_FIND_ENABLED (int pageseg_mode)
 
bool PSM_SPARSE (int pageseg_mode)
 
bool PSM_BLOCK_FIND_ENABLED (int pageseg_mode)
 
bool PSM_LINE_FIND_ENABLED (int pageseg_mode)
 
bool PSM_WORD_FIND_ENABLED (int pageseg_mode)
 
TFloat DotProductNative (const TFloat *u, const TFloat *v, int n)
 
TFloat DotProductAVX (const TFloat *u, const TFloat *v, int n)
 
TFloat DotProductAVX512F (const TFloat *u, const TFloat *v, int n)
 
TFloat DotProductFMA (const TFloat *u, const TFloat *v, int n)
 
TFloat DotProductSSE (const TFloat *u, const TFloat *v, int n)
 
TFloat DotProductNEON (const TFloat *u, const TFloat *v, int n)
 
void reject_whole_page (PAGE_RES_IT &page_res_it)
 
int16_t word_blob_quality (WERD_RES *word)
 
bool IsTextOrEquationType (PolyBlockType type)
 
bool IsLeftIndented (const EquationDetect::IndentType type)
 
bool IsRightIndented (const EquationDetect::IndentType type)
 
char determine_newline_type (WERD *word, BLOCK *block, WERD *next_word, BLOCK *next_block)
 
bool AsciiLikelyListItem (const std::string &word)
 
template<class T >
void push_back_new (std::vector< T > &vector, const T &data)
 
void LeftWordAttributes (const UNICHARSET *unicharset, const WERD_CHOICE *werd, const std::string &utf8, bool *is_list, bool *starts_idea, bool *ends_idea)
 
void RightWordAttributes (const UNICHARSET *unicharset, const WERD_CHOICE *werd, const std::string &utf8, bool *is_list, bool *starts_idea, bool *ends_idea)
 
bool ValidFirstLine (const std::vector< RowScratchRegisters > *rows, int row, const ParagraphModel *model)
 
bool ValidBodyLine (const std::vector< RowScratchRegisters > *rows, int row, const ParagraphModel *model)
 
bool CrownCompatible (const std::vector< RowScratchRegisters > *rows, int a, int b, const ParagraphModel *model)
 
void RecomputeMarginsAndClearHypotheses (std::vector< RowScratchRegisters > *rows, int start, int end, int percentile)
 
int InterwordSpace (const std::vector< RowScratchRegisters > &rows, int row_start, int row_end)
 
bool FirstWordWouldHaveFit (const RowScratchRegisters &before, const RowScratchRegisters &after, tesseract::ParagraphJustification justification)
 
bool FirstWordWouldHaveFit (const RowScratchRegisters &before, const RowScratchRegisters &after)
 
bool RowsFitModel (const std::vector< RowScratchRegisters > *rows, int start, int end, const ParagraphModel *model)
 
void CanonicalizeDetectionResults (std::vector< PARA * > *row_owners, PARA_LIST *paragraphs)
 
void DetectParagraphs (int debug_level, std::vector< RowInfo > *row_infos, std::vector< PARA * > *row_owners, PARA_LIST *paragraphs, std::vector< ParagraphModel * > *models)
 
void DetectParagraphs (int debug_level, bool after_text_recognition, const MutableIterator *block_start, std::vector< ParagraphModel * > *models)
 
bool StrongModel (const ParagraphModel *model)
 
 STRING_VAR_H (editor_image_win_name)
 
 INT_VAR_H (editor_image_xpos)
 
 INT_VAR_H (editor_image_ypos)
 
 INT_VAR_H (editor_image_word_bb_color)
 
 INT_VAR_H (editor_image_blob_bb_color)
 
 STRING_VAR_H (editor_word_name)
 
 INT_VAR_H (editor_word_xpos)
 
 INT_VAR_H (editor_word_ypos)
 
 INT_VAR_H (editor_word_height)
 
 INT_VAR_H (editor_word_width)
 
void reject_blanks (WERD_RES *word)
 
void reject_poor_matches (WERD_RES *word)
 
float compute_reject_threshold (WERD_CHOICE *word)
 
bool word_contains_non_1_digit (const char *word, const char *word_lengths)
 
void dont_allow_1Il (WERD_RES *word)
 
void flip_hyphens (WERD_RES *word)
 
void flip_0O (WERD_RES *word)
 
bool non_0_digit (const char *str, int length)
 
PAGE_RES_ITmake_pseudo_word (PAGE_RES *page_res, const TBOX &selection_box)
 
void find_cblob_limits (C_BLOB *blob, float leftx, float rightx, FCOORD rotation, float &ymin, float &ymax)
 
void find_cblob_vlimits (C_BLOB *blob, float leftx, float rightx, float &ymin, float &ymax)
 
void find_cblob_hlimits (C_BLOB *blob, float bottomy, float topy, float &xmin, float &xmax)
 
C_BLOBcrotate_cblob (C_BLOB *blob, FCOORD rotation)
 
TBOX box_next (BLOBNBOX_IT *it)
 
TBOX box_next_pre_chopped (BLOBNBOX_IT *it)
 
void vertical_cblob_projection (C_BLOB *blob, STATS *stats)
 
void vertical_coutline_projection (C_OUTLINE *outline, STATS *stats)
 
void plot_blob_list (ScrollView *win, BLOBNBOX_LIST *list, ScrollView::Color body_colour, ScrollView::Color child_colour)
 
BlobNeighbourDir DirOtherWay (BlobNeighbourDir dir)
 
bool DominatesInMerge (BlobTextFlowType type1, BlobTextFlowType type2)
 
bool divisible_blob (TBLOB *blob, bool italic_blob, TPOINT *location)
 
void divide_blobs (TBLOB *blob, TBLOB *other_blob, bool italic_blob, const TPOINT &location)
 
bool read_unlv_file (std::string &name, int32_t xsize, int32_t ysize, BLOCK_LIST *blocks)
 
void FullPageBlock (int width, int height, BLOCK_LIST *blocks)
 
FILE * OpenBoxFile (const char *fname)
 
bool ReadAllBoxes (int target_page, bool skip_blanks, const char *filename, std::vector< TBOX > *boxes, std::vector< std::string > *texts, std::vector< std::string > *box_texts, std::vector< int > *pages)
 
bool ReadMemBoxes (int target_page, bool skip_blanks, const char *box_data, bool continue_on_failure, std::vector< TBOX > *boxes, std::vector< std::string > *texts, std::vector< std::string > *box_texts, std::vector< int > *pages)
 
bool ReadNextBox (int *line_number, FILE *box_file, std::string &utf8_str, TBOX *bounding_box)
 
bool ReadNextBox (int target_page, int *line_number, FILE *box_file, std::string &utf8_str, TBOX *bounding_box)
 
bool ParseBoxFileStr (const char *boxfile_str, int *page_number, std::string &utf8_str, TBOX *bounding_box)
 
void MakeBoxFileStr (const char *unichar_str, const TBOX &box, int page_num, std::string &box_str)
 
void FontInfoDeleteCallback (FontInfo f)
 
bool read_info (TFile *f, FontInfo *fi)
 
bool write_info (FILE *f, const FontInfo &fi)
 
bool read_spacing_info (TFile *f, FontInfo *fi)
 
bool write_spacing_info (FILE *f, const FontInfo &fi)
 
bool write_set (FILE *f, const FontSet &fs)
 
constexpr ERRCODE EMPTY_LLSQ ("Can't delete from an empty LLSQ")
 
template<typename T >
MedianOfCircularValues (T modulus, std::vector< T > &v)
 
void PrintSegmentationStats (BLOCK_LIST *block_list)
 
void ExtractBlobsFromSegmentation (BLOCK_LIST *blocks, C_BLOB_LIST *output_blob_list)
 
void RefreshWordBlobsFromNewBlobs (BLOCK_LIST *block_list, C_BLOB_LIST *new_blobs, C_BLOB_LIST *not_found_blobs)
 
int OtsuThreshold (Image src_pix, int left, int top, int width, int height, std::vector< int > &thresholds, std::vector< int > &hi_values)
 
void HistogramRect (Image src_pix, int channel, int left, int top, int width, int height, int *histogram)
 
int OtsuStats (const int *histogram, int *H_out, int *omega0_out)
 
int ParamsTrainingFeatureByName (const char *name)
 
constexpr ERRCODE BADBLOCKLINE ("Y coordinate in block out of bounds")
 
constexpr ERRCODE LOSTBLOCKLINE ("Can't find rectangle for line")
 
ICOORD operator! (const ICOORD &src)
 
ICOORD operator- (const ICOORD &src)
 
ICOORD operator+ (const ICOORD &op1, const ICOORD &op2)
 
ICOORDoperator+= (ICOORD &op1, const ICOORD &op2)
 
ICOORD operator- (const ICOORD &op1, const ICOORD &op2)
 
ICOORDoperator-= (ICOORD &op1, const ICOORD &op2)
 
int32_t operator% (const ICOORD &op1, const ICOORD &op2)
 
int32_t operator* (const ICOORD &op1, const ICOORD &op2)
 
ICOORD operator* (const ICOORD &op1, TDimension scale)
 
ICOORD operator* (TDimension scale, const ICOORD &op1)
 
ICOORDoperator*= (ICOORD &op1, TDimension scale)
 
ICOORD operator/ (const ICOORD &op1, TDimension scale)
 
ICOORDoperator/= (ICOORD &op1, TDimension scale)
 
FCOORD operator! (const FCOORD &src)
 
FCOORD operator- (const FCOORD &src)
 
FCOORD operator+ (const FCOORD &op1, const FCOORD &op2)
 
FCOORDoperator+= (FCOORD &op1, const FCOORD &op2)
 
FCOORD operator- (const FCOORD &op1, const FCOORD &op2)
 
FCOORDoperator-= (FCOORD &op1, const FCOORD &op2)
 
float operator% (const FCOORD &op1, const FCOORD &op2)
 
float operator* (const FCOORD &op1, const FCOORD &op2)
 
FCOORD operator* (const FCOORD &op1, float scale)
 
FCOORD operator* (float scale, const FCOORD &op1)
 
FCOORDoperator*= (FCOORD &op1, float scale)
 
FCOORD operator/ (const FCOORD &op1, float scale)
 
FCOORDoperator/= (FCOORD &op1, float scale)
 
TESSLINEApproximateOutline (bool allow_detailed_fx, C_OUTLINE *c_outline)
 
BLOB_CHOICEFindMatchingChoice (UNICHAR_ID char_id, BLOB_CHOICE_LIST *bc_list)
 
const char * ScriptPosToString (enum ScriptPos script_pos)
 
bool EqualIgnoringCaseAndTerminalPunct (const WERD_CHOICE &word1, const WERD_CHOICE &word2)
 
void print_ratings_list (const char *msg, BLOB_CHOICE_LIST *ratings, const UNICHARSET &current_unicharset)
 
TBOXoperator+= (TBOX &op1, const TBOX &op2)
 
TBOXoperator&= (TBOX &op1, const TBOX &op2)
 
EDGEPTmake_edgept (TDimension x, TDimension y, EDGEPT *next, EDGEPT *prev)
 
void remove_edgept (EDGEPT *point)
 
 BOOL_VAR_H (wordrec_display_splits)
 
int word_comparator (const void *word1p, const void *word2p)
 
constexpr ERRCODE BADERRACTION ("Illegal error action")
 
constexpr ERRCODE ASSERT_FAILED ("Assert failed")
 
constexpr ERRCODE CANTOPENFILE ("Can't open file")
 
bool LoadDataFromFile (const char *filename, GenericVector< char > *data)
 
bool SaveDataToFile (const GenericVector< char > &data, const char *filename)
 
template<typename T >
int sort_cmp (const void *t1, const void *t2)
 
template<typename T >
int sort_ptr_cmp (const void *t1, const void *t2)
 
template<class T >
bool contains (const std::vector< T > &data, const T &value)
 
const std::vector< std::string > split (const std::string &s, char c)
 
void chomp_string (char *str)
 
int RoundUp (int n, int block_size)
 
template<typename T >
ClipToRange (const T &x, const T &lower_bound, const T &upper_bound)
 
template<typename T1 , typename T2 >
void UpdateRange (const T1 &x, T2 *lower_bound, T2 *upper_bound)
 
template<typename T1 , typename T2 >
void UpdateRange (const T1 &x_lo, const T1 &x_hi, T2 *lower_bound, T2 *upper_bound)
 
template<typename T >
void IntersectRange (const T &lower1, const T &upper1, T *lower2, T *upper2)
 
int Modulo (int a, int b)
 
int DivRounded (int a, int b)
 
int IntCastRounded (double x)
 
int IntCastRounded (float x)
 
void ReverseN (void *ptr, int num_bytes)
 
void Reverse32 (void *ptr)
 
template<typename T >
bool DeSerialize (bool swap, FILE *fp, std::vector< T > &data)
 
template<typename T >
bool Serialize (FILE *fp, const std::vector< T > &data)
 
template<class T >
bool NearlyEqual (T x, T y, T tolerance)
 
constexpr ERRCODE NO_LIST ("Iterator not set to a list")
 
constexpr ERRCODE NULL_DATA ("List would have returned a nullptr data pointer")
 
constexpr ERRCODE NULL_CURRENT ("List current position is nullptr")
 
constexpr ERRCODE NULL_NEXT ("Next element on the list is nullptr")
 
constexpr ERRCODE NULL_PREV ("Previous element on the list is nullptr")
 
constexpr ERRCODE EMPTY_LIST ("List is empty")
 
constexpr ERRCODE BAD_PARAMETER ("List parameter error")
 
constexpr ERRCODE STILL_LINKED ("Attempting to add an element with non nullptr links, to a list")
 
tesseract::ParamsVectorsGlobalParams ()
 
bool LoadDataFromFile (const char *filename, std::vector< char > *data)
 
bool SaveDataToFile (const std::vector< char > &data, const char *filename)
 
template<typename T , size_t N>
constexpr size_t countof (T const (&)[N]) noexcept
 
template<typename T >
bool DeSerialize (FILE *fp, T *data, size_t n=1)
 
template<typename T >
bool Serialize (FILE *fp, const T *data, size_t n=1)
 
void tprintf (const char *format,...)
 
TESS_API INT_VAR_H (log_level)
 
void AddAdaptedClass (ADAPT_TEMPLATES_STRUCT *Templates, ADAPT_CLASS_STRUCT *Class, CLASS_ID ClassId)
 
ADAPT_CLASS_STRUCTReadAdaptedClass (TFile *fp)
 
PERM_CONFIG_STRUCTReadPermConfig (TFile *fp)
 
TEMP_CONFIG_STRUCTReadTempConfig (TFile *fp)
 
void WriteAdaptedClass (FILE *File, ADAPT_CLASS_STRUCT *Class, int NumConfigs)
 
void WritePermConfig (FILE *File, PERM_CONFIG_STRUCT *Config)
 
void WriteTempConfig (FILE *File, TEMP_CONFIG_STRUCT *Config)
 
bool MarginalMatch (float confidence, float matcher_great_threshold)
 
void InitMatcherRatings (float *Rating)
 
int MakeTempProtoPerm (void *item1, void *item2)
 
void SetAdaptiveThreshold (float Threshold)
 
CLUSTERERMakeClusterer (int16_t SampleSize, const PARAM_DESC ParamDesc[])
 
SAMPLEMakeSample (CLUSTERER *Clusterer, const float *Feature, uint32_t CharID)
 
LIST ClusterSamples (CLUSTERER *Clusterer, CLUSTERCONFIG *Config)
 
void FreeClusterer (CLUSTERER *Clusterer)
 
void FreeProtoList (LIST *ProtoList)
 
void FreePrototype (void *arg)
 
CLUSTERNextSample (LIST *SearchState)
 
float Mean (PROTOTYPE *Proto, uint16_t Dimension)
 
float StandardDeviation (PROTOTYPE *Proto, uint16_t Dimension)
 
int32_t MergeClusters (int16_t N, PARAM_DESC ParamDesc[], int32_t n1, int32_t n2, float m[], float m1[], float m2[])
 
uint16_t ReadSampleSize (TFile *fp)
 
PARAM_DESCReadParamDesc (TFile *fp, uint16_t N)
 
PROTOTYPEReadPrototype (TFile *fp, uint16_t N)
 
void WriteParamDesc (FILE *File, uint16_t N, const PARAM_DESC ParamDesc[])
 
void WritePrototype (FILE *File, uint16_t N, PROTOTYPE *Proto)
 
 StartParamDesc (MicroFeatureParams) DefineParam(0
 
 DefineParam (0, 0, -0.25, 0.75) DefineParam(0
 
 DefineParam (1, 0, 0.0, 1.0) DefineParam(0
 
 DefineParam (0, 1, -0.5, 0.5) EndParamDesc DefineFeature(MicroFeatureDesc
 
MicroFeatureParams StartParamDesc (CharNormParams) DefineParam(0
 
MicroFeatureParams DefineParam (0, 1, 0.0, 1.0) DefineParam(0
 
MicroFeatureParams DefineParam (0, 0, 0.0, 1.0) EndParamDesc DefineFeature(CharNormDesc
 
MicroFeatureParams CharNormParams StartParamDesc (IntFeatParams) DefineParam(0
 
MicroFeatureParams CharNormParams DefineParam (0, 0, 0.0, 255.0) DefineParam(1
 
MicroFeatureParams CharNormParams EndParamDesc DefineFeature (IntFeatDesc, 2, 1, kIntFeatureType, IntFeatParams) StartParamDesc(GeoFeatParams) DefineParam(0
 
MicroFeatureParams CharNormParams EndParamDesc EndParamDesc DefineFeature (GeoFeatDesc, 3, 0, kGeoFeatureType, GeoFeatParams) float PicoFeatureLength
 
 StartParamDesc (PicoFeatParams) DefineParam(0
 
EndParamDesc DefineFeature (PicoFeatDesc, 2, 1, "pf", PicoFeatParams) StartParamDesc(OutlineFeatParams) DefineParam(0
 
void InitFeatureDefs (FEATURE_DEFS_STRUCT *featuredefs)
 
void WriteCharDescription (const FEATURE_DEFS_STRUCT &FeatureDefs, CHAR_DESC_STRUCT *CharDesc, std::string &str)
 
bool ValidCharDescription (const FEATURE_DEFS_STRUCT &FeatureDefs, CHAR_DESC_STRUCT *CharDesc)
 
CHAR_DESC_STRUCTReadCharDescription (const FEATURE_DEFS_STRUCT &FeatureDefs, FILE *File)
 
uint32_t ShortNameToFeatureType (const FEATURE_DEFS_STRUCT &FeatureDefs, const char *ShortName)
 
void InitIntegerFX ()
 
FCOORD FeatureDirection (uint8_t theta)
 
TrainingSampleBlobToTrainingSample (const TBLOB &blob, bool nonlinear_norm, INT_FX_RESULT_STRUCT *fx_info, std::vector< INT_FEATURE_STRUCT > *bl_features)
 
 BOOL_VAR_H (disable_character_fragments)
 
 INT_VAR_H (classify_integer_matcher_multiplier)
 
float BucketStart (int Bucket, float Offset, int NumBuckets)
 
float BucketEnd (int Bucket, float Offset, int NumBuckets)
 
void DoFill (FILL_SPEC *FillSpec, CLASS_PRUNER_STRUCT *Pruner, uint32_t ClassMask, uint32_t ClassCount, uint32_t WordIndex)
 
bool FillerDone (TABLE_FILLER *Filler)
 
void FillPPCircularBits (uint32_t ParamTable[NUM_PP_BUCKETS][WERDS_PER_PP_VECTOR], int Bit, float Center, float Spread, bool debug)
 
void FillPPLinearBits (uint32_t ParamTable[NUM_PP_BUCKETS][WERDS_PER_PP_VECTOR], int Bit, float Center, float Spread, bool debug)
 
void GetCPPadsForLevel (int Level, float *EndPad, float *SidePad, float *AnglePad)
 
ScrollView::Color GetMatchColorFor (float Evidence)
 
void GetNextFill (TABLE_FILLER *Filler, FILL_SPEC *Fill)
 
void InitTableFiller (float EndPad, float SidePad, float AnglePad, PROTO_STRUCT *Proto, TABLE_FILLER *Filler)
 
void RenderIntFeature (ScrollView *window, const INT_FEATURE_STRUCT *Feature, ScrollView::Color color)
 
void RenderIntProto (ScrollView *window, INT_CLASS_STRUCT *Class, PROTO_ID ProtoId, ScrollView::Color color)
 
void AddIntClass (INT_TEMPLATES_STRUCT *Templates, CLASS_ID ClassId, INT_CLASS_STRUCT *Class)
 
int AddIntConfig (INT_CLASS_STRUCT *Class)
 
int AddIntProto (INT_CLASS_STRUCT *Class)
 
void AddProtoToClassPruner (PROTO_STRUCT *Proto, CLASS_ID ClassId, INT_TEMPLATES_STRUCT *Templates)
 
void AddProtoToProtoPruner (PROTO_STRUCT *Proto, int ProtoId, INT_CLASS_STRUCT *Class, bool debug)
 
uint8_t Bucket8For (float param, float offset, int num_buckets)
 
uint16_t Bucket16For (float param, float offset, int num_buckets)
 
uint8_t CircBucketFor (float param, float offset, int num_buckets)
 
void UpdateMatchDisplay ()
 
void ConvertConfig (BIT_VECTOR Config, int ConfigId, INT_CLASS_STRUCT *Class)
 
void DisplayIntFeature (const INT_FEATURE_STRUCT *Feature, float Evidence)
 
void DisplayIntProto (INT_CLASS_STRUCT *Class, PROTO_ID ProtoId, float Evidence)
 
void ClearFeatureSpaceWindow (NORM_METHOD norm_method, ScrollView *window)
 
void InitIntMatchWindowIfReqd ()
 
void InitProtoDisplayWindowIfReqd ()
 
void InitFeatureDisplayWindowIfReqd ()
 
ScrollViewCreateFeatureSpaceWindow (const char *name, int xpos, int ypos)
 
void ShowMatchDisplay ()
 
KDTREEMakeKDTree (int16_t KeySize, const PARAM_DESC KeyDesc[])
 
void KDStore (KDTREE *Tree, float *Key, CLUSTER *Data)
 
void KDDelete (KDTREE *Tree, float Key[], void *Data)
 
void KDNearestNeighborSearch (KDTREE *Tree, float Query[], int QuerySize, float MaxDistance, int *NumberOfResults, void **NBuffer, float DBuffer[])
 
void KDWalk (KDTREE *Tree, kdwalk_proc action, ClusteringContext *context)
 
float DistanceSquared (int k, PARAM_DESC *dim, float p1[], float p2[])
 
float ComputeDistance (int k, PARAM_DESC *dim, float p1[], float p2[])
 
void Walk (KDTREE *tree, kdwalk_proc action, ClusteringContext *context, KDNODE *sub_tree, int32_t level)
 
void InsertNodes (KDTREE *tree, KDNODE *nodes)
 
int QueryInSearch (KDTREE *tree)
 
FEATURE_SET ExtractMicros (TBLOB *Blob, const DENORM &cn_denorm)
 
LIST ConvertBlob (TBLOB *blob)
 
MFOUTLINE ConvertOutline (TESSLINE *outline)
 
LIST ConvertOutlines (TESSLINE *outline, LIST mf_outlines, OUTLINETYPE outline_type)
 
void FindDirectionChanges (MFOUTLINE Outline, float MinSlope, float MaxSlope)
 
void FreeMFOutline (void *arg)
 
void FreeOutlines (LIST Outlines)
 
void MarkDirectionChanges (MFOUTLINE Outline)
 
MFOUTLINE NextExtremity (MFOUTLINE EdgePoint)
 
void NormalizeOutline (MFOUTLINE Outline, float XOrigin)
 
void ChangeDirection (MFOUTLINE Start, MFOUTLINE End, DIRECTION Direction)
 
void CharNormalizeOutline (MFOUTLINE Outline, const DENORM &cn_denorm)
 
void ComputeDirection (MFEDGEPT *Start, MFEDGEPT *Finish, float MinSlope, float MaxSlope)
 
MFOUTLINE NextDirectionChange (MFOUTLINE EdgePoint)
 
void ComputeBlobCenter (TBLOB *Blob, TPOINT *BlobCenter)
 
void FilterEdgeNoise (MFOUTLINE Outline, float NoiseSegmentLength)
 
MICROFEATURES ConvertToMicroFeatures (MFOUTLINE Outline, MICROFEATURES MicroFeatures)
 
MicroFeature ExtractMicroFeature (MFOUTLINE Start, MFOUTLINE End)
 
MICROFEATURES BlobMicroFeatures (TBLOB *Blob, const DENORM &cn_denorm)
 
 double_VAR_H (classify_min_slope)
 
 double_VAR_H (classify_max_slope)
 
float ActualOutlineLength (FEATURE Feature)
 
FEATURE_SET ExtractCharNormFeatures (const INT_FX_RESULT_STRUCT &fx_info)
 
 double_VAR_H (classify_norm_adj_midpoint)
 
 double_VAR_H (classify_norm_adj_curl)
 
bool AddFeature (FEATURE_SET FeatureSet, FEATURE Feature)
 
FEATURE_SET ReadFeatureSet (FILE *File, const FEATURE_DESC_STRUCT *FeatureDesc)
 
void WriteFeatureSet (FEATURE_SET FeatureSet, std::string &str)
 
void AddOutlineFeatureToSet (FPOINT *Start, FPOINT *End, FEATURE_SET FeatureSet)
 
void ConvertToOutlineFeatures (MFOUTLINE Outline, FEATURE_SET FeatureSet)
 
void NormalizeOutlineX (FEATURE_SET FeatureSet)
 
void ConvertSegmentToPicoFeat (FPOINT *Start, FPOINT *End, FEATURE_SET FeatureSet)
 
void ConvertToPicoFeatures2 (MFOUTLINE Outline, FEATURE_SET FeatureSet)
 
void NormalizePicoX (FEATURE_SET FeatureSet)
 
 double_VAR_H (classify_pico_feature_length)
 
void InitPrototypes ()
 
LIST delete_d (LIST list, void *key, int_compare is_equal)
 
LIST destroy (LIST list)
 
void destroy_nodes (LIST list, void_dest destructor)
 
LIST last (LIST var_list)
 
LIST pop (LIST list)
 
LIST push (LIST list, void *element)
 
LIST push_last (LIST list, void *item)
 
LIST search (LIST list, void *key, int_compare is_equal)
 
TFloat Tanh (TFloat x)
 
TFloat Logistic (TFloat x)
 
template<class Func >
void FuncInplace (int n, TFloat *inout)
 
template<class Func >
void FuncMultiply (const TFloat *u, const TFloat *v, int n, TFloat *out)
 
template<typename T >
void SoftmaxInPlace (int n, T *inout)
 
void CopyVector (unsigned n, const TFloat *src, TFloat *dest)
 
void AccumulateVector (int n, const TFloat *src, TFloat *dest)
 
void MultiplyVectorsInPlace (int n, const TFloat *src, TFloat *inout)
 
void MultiplyAccumulate (int n, const TFloat *u, const TFloat *v, TFloat *out)
 
void SumVectors (int n, const TFloat *v1, const TFloat *v2, const TFloat *v3, const TFloat *v4, const TFloat *v5, TFloat *sum)
 
template<typename T >
void ZeroVector (unsigned n, T *vec)
 
template<typename T >
void ClipVector (int n, T lower, T upper, T *vec)
 
void CodeInBinary (int n, int nf, TFloat *vec)
 
 INT_VAR_H (textord_debug_bugs)
 
 INT_VAR_H (textord_debug_tabfind)
 
 BOOL_VAR_H (textord_debug_printable)
 
Image TraceOutlineOnReducedPix (C_OUTLINE *outline, int gridsize, ICOORD bleft, int *left, int *bottom)
 
Image TraceBlockOnReducedPix (BLOCK *block, int gridsize, ICOORD bleft, int *left, int *bottom)
 
template<class BBC >
int SortByBoxLeft (const void *void1, const void *void2)
 
template<class BBC >
bool StdSortByBoxLeft (const void *void1, const void *void2)
 
template<class BBC >
int SortRightToLeft (const void *void1, const void *void2)
 
template<class BBC >
bool StdSortRightToLeft (const void *void1, const void *void2)
 
template<class BBC >
int SortByBoxBottom (const void *void1, const void *void2)
 
bool test_underline (bool testing_on, C_BLOB *blob, int16_t baseline, int16_t xheight)
 
 double_VAR_H (textord_underline_threshold)
 
void compute_fixed_pitch_cjk (ICOORD page_tr, TO_BLOCK_LIST *port_blocks)
 
 INT_VAR_H (devanagari_split_debuglevel)
 
 BOOL_VAR_H (devanagari_split_debugimage)
 
ScrollViewcreate_to_win (ICOORD page_tr)
 
void close_to_win ()
 
void plot_box_list (ScrollView *win, BLOBNBOX_LIST *list, ScrollView::Color body_colour)
 
void plot_to_row (TO_ROW *row, ScrollView::Color colour, FCOORD rotation)
 
void plot_parallel_row (TO_ROW *row, float gradient, int32_t left, ScrollView::Color colour, FCOORD rotation)
 
void draw_occupation (int32_t xleft, int32_t ybottom, int32_t min_y, int32_t max_y, int32_t occupation[], int32_t thresholds[])
 
void draw_meanlines (TO_BLOCK *block, float gradient, int32_t left, ScrollView::Color colour, FCOORD rotation)
 
void plot_word_decisions (ScrollView *win, int16_t pitch, TO_ROW *row)
 
void plot_fp_cells (ScrollView *win, ScrollView::Color colour, BLOBNBOX_IT *blob_it, int16_t pitch, int16_t blob_count, STATS *projection, int16_t projection_left, int16_t projection_right, float projection_scale)
 
void plot_fp_cells2 (ScrollView *win, ScrollView::Color colour, TO_ROW *row, FPSEGPT_LIST *seg_list)
 
void plot_row_cells (ScrollView *win, ScrollView::Color colour, TO_ROW *row, float xshift, ICOORDELT_LIST *cells)
 
 BOOL_VAR_H (textord_show_fixed_cuts)
 
void create_todebug_win ()
 
void complete_edge (CRACKEDGE *start, C_OUTLINE_IT *outline_it)
 
ScrollView::Color check_path_legal (CRACKEDGE *start)
 
int16_t loop_bounding_box (CRACKEDGE *&start, ICOORD &botleft, ICOORD &topright)
 
ROWfixed_pitch_words (TO_ROW *row, FCOORD rotation)
 
void split_to_blob (BLOBNBOX *blob, int16_t chop_coord, float pitch_error, C_OUTLINE_LIST *left_coutlines, C_OUTLINE_LIST *right_coutlines)
 
 INT_VAR_H (textord_fp_chop_error)
 
 BOOL_VAR_H (gapmap_debug)
 
 BOOL_VAR_H (gapmap_use_ends)
 
 BOOL_VAR_H (gapmap_no_isolated_quanta)
 
 double_VAR_H (gapmap_big_gaps)
 
 BOOL_VAR_H (textord_heavy_nr)
 
 BOOL_VAR_H (textord_show_initial_rows)
 
 BOOL_VAR_H (textord_show_parallel_rows)
 
 BOOL_VAR_H (textord_show_expanded_rows)
 
 BOOL_VAR_H (textord_show_final_rows)
 
 BOOL_VAR_H (textord_show_final_blobs)
 
 BOOL_VAR_H (textord_test_landscape)
 
 BOOL_VAR_H (textord_parallel_baselines)
 
 BOOL_VAR_H (textord_straight_baselines)
 
 BOOL_VAR_H (textord_old_baselines)
 
 BOOL_VAR_H (textord_old_xheight)
 
 BOOL_VAR_H (textord_fix_xheight_bug)
 
 BOOL_VAR_H (textord_fix_makerow_bug)
 
 BOOL_VAR_H (textord_debug_xheights)
 
 INT_VAR_H (textord_test_x)
 
 INT_VAR_H (textord_test_y)
 
 INT_VAR_H (textord_min_blobs_in_row)
 
 INT_VAR_H (textord_spline_minblobs)
 
 INT_VAR_H (textord_spline_medianwin)
 
 INT_VAR_H (textord_min_xheight)
 
 double_VAR_H (textord_spline_shift_fraction)
 
 double_VAR_H (textord_skew_ile)
 
 double_VAR_H (textord_skew_lag)
 
 double_VAR_H (textord_linespace_iqrlimit)
 
 double_VAR_H (textord_width_limit)
 
 double_VAR_H (textord_chop_width)
 
 double_VAR_H (textord_minxh)
 
 double_VAR_H (textord_min_linesize)
 
 double_VAR_H (textord_excess_blobsize)
 
 double_VAR_H (textord_occupancy_threshold)
 
 double_VAR_H (textord_underline_width)
 
 double_VAR_H (textord_min_blob_height_fraction)
 
 double_VAR_H (textord_xheight_mode_fraction)
 
 double_VAR_H (textord_ascheight_mode_fraction)
 
 double_VAR_H (textord_ascx_ratio_min)
 
 double_VAR_H (textord_ascx_ratio_max)
 
 double_VAR_H (textord_descx_ratio_min)
 
 double_VAR_H (textord_descx_ratio_max)
 
 double_VAR_H (textord_xheight_error_margin)
 
 INT_VAR_H (textord_lms_line_trials)
 
 BOOL_VAR_H (textord_new_initial_xheight)
 
 BOOL_VAR_H (textord_debug_blob)
 
void get_min_max_xheight (int block_linesize, int *min_height, int *max_height)
 
ROW_CATEGORY get_row_category (const TO_ROW *row)
 
bool within_error_margin (float test, float num, float margin)
 
float median_block_xheight (TO_BLOCK *block, float gradient)
 
int get_blob_coords (TO_ROW *row, int32_t lineheight, TBOX *blobcoords, bool &holed_line, int &outcount)
 
void make_first_baseline (TBOX blobcoords[], int blobcount, int xcoords[], int ycoords[], QSPLINE *spline, QSPLINE *baseline, float jumplimit)
 
void make_holed_baseline (TBOX blobcoords[], int blobcount, QSPLINE *spline, QSPLINE *baseline, float gradient)
 
int partition_line (TBOX blobcoords[], int blobcount, int *numparts, char partids[], int partsizes[], QSPLINE *spline, float jumplimit, float ydiffs[])
 
void merge_oldbl_parts (TBOX blobcoords[], int blobcount, char partids[], int partsizes[], int biggestpart, float jumplimit)
 
int get_ydiffs (TBOX blobcoords[], int blobcount, QSPLINE *spline, float ydiffs[])
 
int choose_partition (float diff, float partdiffs[], int lastpart, float jumplimit, float *drift, float *lastdelta, int *partcount)
 
int partition_coords (TBOX blobcoords[], int blobcount, char partids[], int bestpart, int xcoords[], int ycoords[])
 
int segment_spline (TBOX blobcoords[], int blobcount, int xcoords[], int ycoords[], int degree, int pointcount, int xstarts[])
 
bool split_stepped_spline (QSPLINE *baseline, float jumplimit, int *xcoords, int *xstarts, int &segments)
 
void insert_spline_point (int xstarts[], int segment, int coord1, int coord2, int &segments)
 
void find_lesser_parts (TO_ROW *row, TBOX blobcoords[], int blobcount, char partids[], int partsizes[], int partcount, int bestpart)
 
void old_first_xheight (TO_ROW *row, TBOX blobcoords[], int initialheight, int blobcount, QSPLINE *baseline, float jumplimit)
 
void make_first_xheight (TO_ROW *row, TBOX blobcoords[], int lineheight, int init_lineheight, int blobcount, QSPLINE *baseline, float jumplimit)
 
void find_top_modes (STATS *stats, int statnum, int modelist[], int modenum)
 
void pick_x_height (TO_ROW *row, int modelist[], int lefts[], int rights[], STATS *heightstat, int mode_threshold)
 
 BOOL_VAR_H (textord_oldbl_debug)
 
int * make_height_array (TBOX blobcoords[], int blobcount, QSPLINE *baseline)
 
double check_pitch_sync2 (BLOBNBOX_IT *blob_it, int16_t blob_count, int16_t pitch, int16_t pitch_error, STATS *projection, int16_t projection_left, int16_t projection_right, float projection_scale, int16_t &occupation_count, FPSEGPT_LIST *seg_list, int16_t start, int16_t end)
 
double check_pitch_sync3 (int16_t projection_left, int16_t projection_right, int16_t zero_count, int16_t pitch, int16_t pitch_error, STATS *projection, float projection_scale, int16_t &occupation_count, FPSEGPT_LIST *seg_list, int16_t start, int16_t end)
 
double check_pitch_sync (BLOBNBOX_IT *blob_it, int16_t blob_count, int16_t pitch, int16_t pitch_error, STATS *projection, FPSEGPT_LIST *seg_list)
 
void make_illegal_segment (FPSEGPT_LIST *prev_list, TBOX blob_box, BLOBNBOX_IT blob_it, int16_t region_index, int16_t pitch, int16_t pitch_error, FPSEGPT_LIST *seg_list)
 
 INT_VAR_H (pitsync_linear_version)
 
 double_VAR_H (pitsync_joined_edge)
 
 double_VAR_H (pitsync_offset_freecut_fraction)
 
int16_t vertical_torow_projection (TO_ROW *row, STATS *projection)
 
void block_edges (Image t_pix, PDBLK *block, C_OUTLINE_IT *outline_it)
 
template<typename T >
void DeleteObject (T *object)
 
 double_VAR_H (textord_tabvector_vertical_gap_fraction)
 
 double_VAR_H (textord_tabvector_vertical_box_ratio)
 
void compute_fixed_pitch (ICOORD page_tr, TO_BLOCK_LIST *port_blocks, float gradient, FCOORD rotation, bool testing_on)
 
void fix_row_pitch (TO_ROW *bad_row, TO_BLOCK *bad_block, TO_BLOCK_LIST *blocks, int32_t row_target, int32_t block_target)
 
void compute_block_pitch (TO_BLOCK *block, FCOORD rotation, int32_t block_index, bool testing_on)
 
bool compute_rows_pitch (TO_BLOCK *block, int32_t block_index, bool testing_on)
 
bool try_doc_fixed (ICOORD page_tr, TO_BLOCK_LIST *port_blocks, float gradient)
 
bool try_block_fixed (TO_BLOCK *block, int32_t block_index)
 
bool try_rows_fixed (TO_BLOCK *block, int32_t block_index, bool testing_on)
 
void print_block_counts (TO_BLOCK *block, int32_t block_index)
 
void count_block_votes (TO_BLOCK *block, int32_t &def_fixed, int32_t &def_prop, int32_t &maybe_fixed, int32_t &maybe_prop, int32_t &corr_fixed, int32_t &corr_prop, int32_t &dunno)
 
bool row_pitch_stats (TO_ROW *row, int32_t maxwidth, bool testing_on)
 
bool find_row_pitch (TO_ROW *row, int32_t maxwidth, int32_t dm_gap, TO_BLOCK *block, int32_t block_index, int32_t row_index, bool testing_on)
 
bool fixed_pitch_row (TO_ROW *row, BLOCK *block, int32_t block_index)
 
bool count_pitch_stats (TO_ROW *row, STATS *gap_stats, STATS *pitch_stats, float initial_pitch, float min_space, bool ignore_outsize, bool split_outsize, int32_t dm_gap)
 
float tune_row_pitch (TO_ROW *row, STATS *projection, int16_t projection_left, int16_t projection_right, float space_size, float &initial_pitch, float &best_sp_sd, int16_t &best_mid_cuts, ICOORDELT_LIST *best_cells, bool testing_on)
 
float tune_row_pitch2 (TO_ROW *row, STATS *projection, int16_t projection_left, int16_t projection_right, float space_size, float &initial_pitch, float &best_sp_sd, int16_t &best_mid_cuts, ICOORDELT_LIST *best_cells, bool testing_on)
 
float compute_pitch_sd (TO_ROW *row, STATS *projection, int16_t projection_left, int16_t projection_right, float space_size, float initial_pitch, float &sp_sd, int16_t &mid_cuts, ICOORDELT_LIST *row_cells, bool testing_on, int16_t start, int16_t end)
 
float compute_pitch_sd2 (TO_ROW *row, STATS *projection, int16_t projection_left, int16_t projection_right, float initial_pitch, int16_t &occupation, int16_t &mid_cuts, ICOORDELT_LIST *row_cells, bool testing_on, int16_t start, int16_t end)
 
void print_pitch_sd (TO_ROW *row, STATS *projection, int16_t projection_left, int16_t projection_right, float space_size, float initial_pitch)
 
void find_repeated_chars (TO_BLOCK *block, bool testing_on)
 
void plot_fp_word (TO_BLOCK *block, float pitch, float nonspace)
 
 BOOL_VAR_H (textord_debug_pitch_test)
 
 BOOL_VAR_H (textord_debug_pitch_metric)
 
 BOOL_VAR_H (textord_show_row_cuts)
 
 BOOL_VAR_H (textord_show_page_cuts)
 
 BOOL_VAR_H (textord_blockndoc_fixed)
 
 BOOL_VAR_H (textord_fast_pitch_test)
 
 double_VAR_H (textord_projection_scale)
 
 double_VAR_H (textord_balance_factor)
 
void SetBlobStrokeWidth (Image pix, BLOBNBOX *blob)
 
void assign_blobs_to_blocks2 (Image pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *port_blocks)
 
void tweak_row_baseline (ROW *row, double blshift_maxshift, double blshift_xfraction)
 
 BOOL_VAR_H (textord_show_initial_words)
 
 BOOL_VAR_H (textord_blocksall_fixed)
 
 BOOL_VAR_H (textord_blocksall_prop)
 
 INT_VAR_H (textord_dotmatrix_gap)
 
 INT_VAR_H (textord_debug_block)
 
 INT_VAR_H (textord_pitch_range)
 
 double_VAR_H (textord_wordstats_smooth_factor)
 
 double_VAR_H (textord_words_maxspace)
 
 double_VAR_H (textord_words_default_maxspace)
 
 double_VAR_H (textord_words_default_minspace)
 
 double_VAR_H (textord_words_min_minspace)
 
 double_VAR_H (textord_words_default_nonspace)
 
 double_VAR_H (textord_words_initial_lower)
 
 double_VAR_H (textord_words_initial_upper)
 
 double_VAR_H (textord_words_minlarge)
 
 double_VAR_H (textord_words_pitchsd_threshold)
 
 double_VAR_H (textord_words_def_fixed)
 
 double_VAR_H (textord_words_def_prop)
 
 INT_VAR_H (textord_words_veto_power)
 
 double_VAR_H (textord_pitch_rowsimilarity)
 
 BOOL_VAR_H (textord_pitch_scalebigwords)
 
 double_VAR_H (words_initial_lower)
 
 double_VAR_H (words_initial_upper)
 
 double_VAR_H (words_default_prop_nonspace)
 
 double_VAR_H (words_default_fixed_space)
 
 double_VAR_H (words_default_fixed_limit)
 
 double_VAR_H (textord_words_definite_spread)
 
 double_VAR_H (textord_spacesize_ratioprop)
 
 double_VAR_H (textord_fpiqr_ratio)
 
 double_VAR_H (textord_max_pitch_iqr)
 
void restore_underlined_blobs (TO_BLOCK *block)
 
TO_ROWmost_overlapping_row (TO_ROW_LIST *rows, BLOBNBOX *blob)
 
void find_underlined_blobs (BLOBNBOX *u_line, QSPLINE *baseline, float xheight, float baseline_offset, ICOORDELT_LIST *chop_cells)
 
void vertical_cunderline_projection (C_OUTLINE *outline, QSPLINE *baseline, float xheight, float baseline_offset, STATS *lower_proj, STATS *middle_proj, STATS *upper_proj)
 
 double_VAR_H (textord_underline_offset)
 
 BOOL_VAR_H (textord_restore_underlines)
 
 BOOL_VAR_H (textord_force_make_prop_words)
 
 BOOL_VAR_H (textord_chopper_test)
 
void ParseCommandLineFlags (const char *usage, int *argc, char ***argv, const bool remove_flags)
 
TESS_COMMON_TRAINING_API DECLARE_INT_PARAM_FLAG (debug_level)
 
TESS_COMMON_TRAINING_API DECLARE_STRING_PARAM_FLAG (D)
 
TESS_COMMON_TRAINING_API DECLARE_STRING_PARAM_FLAG (F)
 
TESS_COMMON_TRAINING_API DECLARE_STRING_PARAM_FLAG (O)
 
TESS_COMMON_TRAINING_API DECLARE_STRING_PARAM_FLAG (U)
 
TESS_COMMON_TRAINING_API DECLARE_STRING_PARAM_FLAG (X)
 
TESS_COMMON_TRAINING_API DECLARE_STRING_PARAM_FLAG (fonts_dir)
 
TESS_COMMON_TRAINING_API DECLARE_STRING_PARAM_FLAG (fontconfig_tmpdir)
 
TESS_COMMON_TRAINING_API DECLARE_STRING_PARAM_FLAG (output_trainer)
 
TESS_COMMON_TRAINING_API DECLARE_STRING_PARAM_FLAG (test_ch)
 
 INT_PARAM_FLAG (debug_level, 0, "Level of Trainer debugging")
 
 STRING_PARAM_FLAG (D, "", "Directory to write output files to")
 
 STRING_PARAM_FLAG (F, "font_properties", "File listing font properties")
 
 STRING_PARAM_FLAG (X, "", "File listing font xheights")
 
 STRING_PARAM_FLAG (U, "unicharset", "File to load unicharset from")
 
 STRING_PARAM_FLAG (O, "", "File to write unicharset to")
 
 STRING_PARAM_FLAG (output_trainer, "", "File to write trainer to")
 
 STRING_PARAM_FLAG (test_ch, "", "UTF8 test character string")
 
 STRING_PARAM_FLAG (fonts_dir, "", "")
 
 STRING_PARAM_FLAG (fontconfig_tmpdir, "", "")
 
void ParseArguments (int *argc, char ***argv)
 
ShapeTableLoadShapeTable (const std::string &file_prefix)
 
void WriteShapeTable (const std::string &file_prefix, const ShapeTable &shape_table)
 
std::unique_ptr< MasterTrainerLoadTrainingData (const char *const *filelist, bool replication, ShapeTable **shape_table, std::string &file_prefix)
 
LABELEDLIST FindList (LIST List, const std::string &Label)
 
void ReadTrainingSamples (const FEATURE_DEFS_STRUCT &feature_definitions, const char *feature_name, int max_samples, UNICHARSET *unicharset, FILE *file, LIST *training_samples)
 
void FreeTrainingSamples (LIST CharList)
 
void FreeLabeledList (LABELEDLIST LabeledList)
 
CLUSTERERSetUpForClustering (const FEATURE_DEFS_STRUCT &FeatureDefs, LABELEDLIST char_sample, const char *program_feature_type)
 
void MergeInsignificantProtos (LIST ProtoList, const char *label, CLUSTERER *Clusterer, CLUSTERCONFIG *clusterconfig)
 
void CleanUpUnusedData (LIST ProtoList)
 
LIST RemoveInsignificantProtos (LIST ProtoList, bool KeepSigProtos, bool KeepInsigProtos, int N)
 
MERGE_CLASS FindClass (LIST List, const std::string &Label)
 
void FreeLabeledClassList (LIST ClassList)
 
CLASS_STRUCTSetUpForFloat2Int (const UNICHARSET &unicharset, LIST LabeledClassList)
 
void Normalize (float *Values)
 
void FreeNormProtoList (LIST CharList)
 
void AddToNormProtosList (LIST *NormProtoList, LIST ProtoList, const std::string &CharName)
 
int NumberOfProtos (LIST ProtoList, bool CountSigProtos, bool CountInsigProtos)
 
void WriteTrainingSamples (const tesseract::FEATURE_DEFS_STRUCT &FeatureDefs, char *Directory, tesseract::LIST CharList, const char *program_feature_type)
 
void allocNormProtos ()
 
Image DegradeImage (Image input, int exposure, TRand *randomizer, float *rotation)
 
Image PrepareDistortedPix (const Image pix, bool perspective, bool invert, bool white_noise, bool smooth_noise, bool blur, int box_reduction, TRand *randomizer, std::vector< TBOX > *boxes)
 
void GeneratePerspectiveDistortion (int width, int height, TRand *randomizer, Image *pix, std::vector< TBOX > *boxes)
 
int ProjectiveCoeffs (int width, int height, TRand *randomizer, float **im_coeffs, float **box_coeffs)
 
bool LoadFileLinesToStrings (const char *filename, std::vector< std::string > *lines)
 
bool WriteFile (const std::string &output_dir, const std::string &lang, const std::string &suffix, const std::vector< char > &data, FileWriter writer)
 
std::string ReadFile (const std::string &filename, FileReader reader)
 
bool WriteUnicharset (const UNICHARSET &unicharset, const std::string &output_dir, const std::string &lang, FileWriter writer, TessdataManager *traineddata)
 
bool WriteRecoder (const UNICHARSET &unicharset, bool pass_through, const std::string &output_dir, const std::string &lang, FileWriter writer, std::string *radical_table_data, TessdataManager *traineddata)
 
int CombineLangModel (const UNICHARSET &unicharset, const std::string &script_dir, const std::string &version_str, const std::string &output_dir, const std::string &lang, bool pass_through_recoder, const std::vector< std::string > &words, const std::vector< std::string > &puncs, const std::vector< std::string > &numbers, bool lang_is_rtl, FileReader reader, FileWriter writer)
 
bool NormalizeUTF8String (UnicodeNormMode u_mode, OCRNorm ocr_normalize, GraphemeNorm grapheme_normalize, const char *str8, std::string *normalized)
 
bool NormalizeCleanAndSegmentUTF8 (UnicodeNormMode u_mode, OCRNorm ocr_normalize, GraphemeNormMode g_mode, bool report_errors, const char *str8, std::vector< std::string > *graphemes)
 
char32 OCRNormalize (char32 ch)
 
bool IsOCREquivalent (char32 ch1, char32 ch2)
 
bool IsValidCodepoint (const char32 ch)
 
bool IsWhitespace (const char32 ch)
 
bool IsUTF8Whitespace (const char *text)
 
unsigned int SpanUTF8Whitespace (const char *text)
 
unsigned int SpanUTF8NotWhitespace (const char *text)
 
bool IsInterchangeValid (const char32 ch)
 
bool IsInterchangeValid7BitAscii (const char32 ch)
 
char32 FullwidthToHalfwidth (const char32 ch)
 
void SetupBasicProperties (bool report_errors, bool decompose, UNICHARSET *unicharset)
 
void SetScriptProperties (const std::string &script_dir, UNICHARSET *unicharset)
 
std::string GetXheightString (const std::string &script_dir, const UNICHARSET &unicharset)
 
void SetPropertiesForInputFile (const std::string &script_dir, const std::string &input_unicharset_file, const std::string &output_unicharset_file, const std::string &output_xheights_file)
 
void SetupBasicProperties (bool report_errors, UNICHARSET *unicharset)
 
void create_fx_win ()
 
void clear_fx_win ()
 
void create_fxdebug_win ()
 
template<class BLOB_CHOICE >
int SortByUnicharID (const void *void1, const void *void2)
 
template<class BLOB_CHOICE >
int SortByRating (const void *void1, const void *void2)
 
void display_edgepts (LIST outlines)
 
void draw_blob_edges (TBLOB *blob)
 
void mark_outline (EDGEPT *edgept)
 
void display_blob (TBLOB *blob, ScrollView::Color color)
 
void render_blob (ScrollView *window, TBLOB *blob, ScrollView::Color color)
 
void render_edgepts (ScrollView *window, EDGEPT *edgept, ScrollView::Color color)
 
void render_outline (ScrollView *window, TESSLINE *outline, ScrollView::Color color)
 
 BOOL_VAR_H (wordrec_display_all_blobs)
 
 BOOL_VAR_H (wordrec_blob_pause)
 
void OCRTester (const char *imgname, const char *groundtruth, const char *tessdatadir, const char *lang)
 
 TEST_P (MatchGroundTruth, FastPhototestOCR)
 
 TEST_P (MatchGroundTruth, BestPhototestOCR)
 
 TEST_P (MatchGroundTruth, TessPhototestOCR)
 
 INSTANTIATE_TEST_SUITE_P (Eng, MatchGroundTruth, ::testing::Values("eng"))
 
 INSTANTIATE_TEST_SUITE_P (DISABLED_Latin, MatchGroundTruth, ::testing::Values("script/Latin"))
 
 INSTANTIATE_TEST_SUITE_P (DISABLED_Deva, MatchGroundTruth, ::testing::Values("script/Devanagari"))
 
 INSTANTIATE_TEST_SUITE_P (DISABLED_Arabic, MatchGroundTruth, ::testing::Values("script/Arabic"))
 
 TEST_F (EuroText, FastLatinOCR)
 
 TEST_F (ApplyBoxTest, TimesCharLevel)
 
 TEST_F (ApplyBoxTest, ItalicCharLevel)
 
 TEST_F (ApplyBoxTest, TimesLineLevel)
 
 TEST_F (ApplyBoxTest, ItalLineLevel)
 
std::string GetCleanedTextResult (tesseract::TessBaseAPI *tess, Image pix)
 
 TEST_F (TesseractTest, StaticTessBaseAPI)
 
 TEST_F (TesseractTest, BasicTesseractTest)
 
 TEST_F (TesseractTest, IteratesParagraphsEvenIfNotDetected)
 
 TEST_F (TesseractTest, HOCRWorksWithoutSetInputName)
 
 TEST_F (TesseractTest, HOCRContainsBaseline)
 
 TEST_F (TesseractTest, AdaptToWordStrTest)
 
 TEST_F (TesseractTest, BasicLSTMTest)
 
 TEST_F (TesseractTest, LSTMGeometryTest)
 
 TEST_F (TesseractTest, InitConfigOnlyTest)
 
 TEST (TesseractInstanceTest, TestMultipleTessInstances)
 
 TEST (TesseractInstanceTest, TestMultipleTessInstanceVariables)
 
 TEST_F (BaseapiThreadTest, TestBasicSanity)
 
 TEST_F (BaseapiThreadTest, TestInit)
 
 TEST_F (BaseapiThreadTest, TestRecognition)
 
 TEST_F (BaseapiThreadTest, TestAll)
 
 TEST_F (BitVectorTest, Primes)
 
 TEST_F (BitVectorTest, SetAll)
 
 TEST_F (BitVectorTest, TestNextSetBit)
 
 TEST_F (BitVectorTest, TestNumSetBits)
 
 TEST (CleanNamespaceTess, DummyTest)
 
 TEST_F (ColPartitionTest, IsInSameColumnAsReflexive)
 
 TEST_F (ColPartitionTest, IsInSameColumnAsBorders)
 
 TEST_F (ColPartitionTest, IsInSameColumnAsSuperset)
 
 TEST_F (ColPartitionTest, IsInSameColumnAsPartialOverlap)
 
 TEST_F (CommandlineflagsTest, RemoveFlags)
 
 TEST_F (CommandlineflagsTest, ExitsWithErrorOnInvalidFlag)
 
 TEST_F (CommandlineflagsTest, ParseIntegerFlags)
 
 TEST_F (CommandlineflagsTest, ParseDoubleFlags)
 
 TEST_F (CommandlineflagsTest, ParseStringFlags)
 
 TEST_F (CommandlineflagsTest, ParseBoolFlags)
 
 TEST_F (CommandlineflagsTest, ParseOldFlags)
 
 TEST_F (DawgTest, TestDawgConversion)
 
 TEST_F (DawgTest, TestMatching)
 
 TEST_F (DENORMTest, NoRotations)
 
 TEST_F (DENORMTest, WithRotations)
 
 TEST_F (DENORMTest, Multiple)
 
 TEST_F (EquationFinderTest, IdentifySpecialText)
 
 TEST_F (EquationFinderTest, EstimateTypeForUnichar)
 
 TEST_F (EquationFinderTest, IsIndented)
 
 TEST_F (EquationFinderTest, IsNearSmallNeighbor)
 
 TEST_F (EquationFinderTest, CheckSeedBlobsCount)
 
 TEST_F (EquationFinderTest, ComputeForegroundDensity)
 
 TEST_F (EquationFinderTest, CountAlignment)
 
 TEST_F (EquationFinderTest, ComputeCPsSuperBBox)
 
 TEST_F (EquationFinderTest, SplitCPHorLite)
 
 TEST_F (EquationFinderTest, SplitCPHor)
 
 TEST (FileTest, JoinPath)
 
 TEST (OutputBufferTest, WriteString)
 
 TEST (InputBufferTest, Read)
 
 TEST_F (HeapTest, SortTest)
 
 TEST_F (HeapTest, MixedTest)
 
 TEST_F (HeapTest, PopWorstTest)
 
 TEST_F (HeapTest, RevalueTest)
 
 TEST_F (HeapTest, DoublePtrTest)
 
 TEST_F (ImagedataTest, CachesProperly)
 
 TEST_F (ImagedataTest, CachesMultiDocs)
 
 TEST_F (IndexMapBiDiTest, Primes)
 
 TEST_F (IndexMapBiDiTest, ManyToOne)
 
 TEST_F (IntFeatureMapTest, Exhaustive)
 
 TEST_F (IntSimdMatrixTest, C)
 
 TEST_F (IntSimdMatrixTest, SSE)
 
 TEST_F (IntSimdMatrixTest, AVX2)
 
std::string TestDataNameToPath (const std::string &name)
 
 TEST (LangModelTest, AddACharacter)
 
 TEST (LangModelTest, AddACharacterHindi)
 
 TEST_F (LayoutTest, ArraySizeTest)
 
 TEST_F (LayoutTest, UNLV8087_054)
 
 TEST_F (LayoutTest, HebrewOrderingAndSkew)
 
 TEST_F (LigatureTableTest, DoesFillLigatureTables)
 
 TEST_F (LigatureTableTest, TestCustomLigatures)
 
 TEST_F (LLSQTest, BasicLines)
 
 TEST_F (LLSQTest, Vectors)
 
 TEST_F (LLSQTest, RmsOrthWorksAsIntended)
 
 TEST_F (ListTest, TestCLIST)
 
 TEST_F (ListTest, TestELIST)
 
 TEST_F (ListTest, TestELIST2)
 
void LangLoader (const char *lang, const char *tessdatadir)
 
 TEST_P (LoadLanguage, afr)
 
 TEST_P (LoadLanguage, amh)
 
 TEST_P (LoadLanguage, ara)
 
 TEST_P (LoadLanguage, asm)
 
 TEST_P (LoadLanguage, aze)
 
 TEST_P (LoadLanguage, aze_cyrl)
 
 TEST_P (LoadLanguage, bel)
 
 TEST_P (LoadLanguage, ben)
 
 TEST_P (LoadLanguage, bod)
 
 TEST_P (LoadLanguage, bos)
 
 TEST_P (LoadLanguage, bre)
 
 TEST_P (LoadLanguage, bul)
 
 TEST_P (LoadLanguage, cat)
 
 TEST_P (LoadLanguage, ceb)
 
 TEST_P (LoadLanguage, ces)
 
 TEST_P (LoadLanguage, chi_sim)
 
 TEST_P (LoadLanguage, chi_sim_vert)
 
 TEST_P (LoadLanguage, chi_tra)
 
 TEST_P (LoadLanguage, chi_tra_vert)
 
 TEST_P (LoadLanguage, chr)
 
 TEST_P (LoadLanguage, cos)
 
 TEST_P (LoadLanguage, cym)
 
 TEST_P (LoadLanguage, dan)
 
 TEST_P (LoadLanguage, deu)
 
 TEST_P (LoadLanguage, div)
 
 TEST_P (LoadLanguage, dzo)
 
 TEST_P (LoadLanguage, ell)
 
 TEST_P (LoadLanguage, eng)
 
 TEST_P (LoadLanguage, enm)
 
 TEST_P (LoadLanguage, epo)
 
 TEST_P (LoadLanguage, est)
 
 TEST_P (LoadLanguage, eus)
 
 TEST_P (LoadLanguage, fao)
 
 TEST_P (LoadLanguage, fas)
 
 TEST_P (LoadLanguage, fil)
 
 TEST_P (LoadLanguage, fin)
 
 TEST_P (LoadLanguage, fra)
 
 TEST_P (LoadLanguage, frk)
 
 TEST_P (LoadLanguage, frm)
 
 TEST_P (LoadLanguage, fry)
 
 TEST_P (LoadLanguage, gla)
 
 TEST_P (LoadLanguage, gle)
 
 TEST_P (LoadLanguage, glg)
 
 TEST_P (LoadLanguage, grc)
 
 TEST_P (LoadLanguage, guj)
 
 TEST_P (LoadLanguage, hat)
 
 TEST_P (LoadLanguage, heb)
 
 TEST_P (LoadLanguage, hin)
 
 TEST_P (LoadLanguage, hrv)
 
 TEST_P (LoadLanguage, hun)
 
 TEST_P (LoadLanguage, hye)
 
 TEST_P (LoadLanguage, iku)
 
 TEST_P (LoadLanguage, ind)
 
 TEST_P (LoadLanguage, isl)
 
 TEST_P (LoadLanguage, ita)
 
 TEST_P (LoadLanguage, ita_old)
 
 TEST_P (LoadLanguage, jav)
 
 TEST_P (LoadLanguage, jpn)
 
 TEST_P (LoadLanguage, jpn_vert)
 
 TEST_P (LoadLanguage, kan)
 
 TEST_P (LoadLanguage, kat)
 
 TEST_P (LoadLanguage, kat_old)
 
 TEST_P (LoadLanguage, kaz)
 
 TEST_P (LoadLanguage, khm)
 
 TEST_P (LoadLanguage, kir)
 
 TEST_P (LoadLanguage, kor)
 
 TEST_P (LoadLanguage, kor_vert)
 
 TEST_P (LoadLanguage, lao)
 
 TEST_P (LoadLanguage, lat)
 
 TEST_P (LoadLanguage, lav)
 
 TEST_P (LoadLanguage, lit)
 
 TEST_P (LoadLanguage, ltz)
 
 TEST_P (LoadLanguage, mal)
 
 TEST_P (LoadLanguage, mar)
 
 TEST_P (LoadLanguage, mkd)
 
 TEST_P (LoadLanguage, mlt)
 
 TEST_P (LoadLanguage, mon)
 
 TEST_P (LoadLanguage, mri)
 
 TEST_P (LoadLanguage, msa)
 
 TEST_P (LoadLanguage, mya)
 
 TEST_P (LoadLanguage, nep)
 
 TEST_P (LoadLanguage, nld)
 
 TEST_P (LoadLanguage, nor)
 
 TEST_P (LoadLanguage, oci)
 
 TEST_P (LoadLanguage, ori)
 
 TEST_P (LoadLanguage, osd)
 
 TEST_P (LoadLanguage, pan)
 
 TEST_P (LoadLanguage, pol)
 
 TEST_P (LoadLanguage, por)
 
 TEST_P (LoadLanguage, pus)
 
 TEST_P (LoadLanguage, que)
 
 TEST_P (LoadLanguage, ron)
 
 TEST_P (LoadLanguage, rus)
 
 TEST_P (LoadLanguage, san)
 
 TEST_P (LoadLanguage, sin)
 
 TEST_P (LoadLanguage, slk)
 
 TEST_P (LoadLanguage, slv)
 
 TEST_P (LoadLanguage, snd)
 
 TEST_P (LoadLanguage, spa)
 
 TEST_P (LoadLanguage, spa_old)
 
 TEST_P (LoadLanguage, sqi)
 
 TEST_P (LoadLanguage, srp)
 
 TEST_P (LoadLanguage, srp_latn)
 
 TEST_P (LoadLanguage, sun)
 
 TEST_P (LoadLanguage, swa)
 
 TEST_P (LoadLanguage, swe)
 
 TEST_P (LoadLanguage, syr)
 
 TEST_P (LoadLanguage, tam)
 
 TEST_P (LoadLanguage, tat)
 
 TEST_P (LoadLanguage, tel)
 
 TEST_P (LoadLanguage, tgk)
 
 TEST_P (LoadLanguage, tha)
 
 TEST_P (LoadLanguage, tir)
 
 TEST_P (LoadLanguage, ton)
 
 TEST_P (LoadLanguage, tur)
 
 TEST_P (LoadLanguage, uig)
 
 TEST_P (LoadLanguage, ukr)
 
 TEST_P (LoadLanguage, urd)
 
 TEST_P (LoadLanguage, uzb)
 
 TEST_P (LoadLanguage, uzb_cyrl)
 
 TEST_P (LoadLanguage, vie)
 
 TEST_P (LoadLanguage, yid)
 
 TEST_P (LoadLanguage, yor)
 
 INSTANTIATE_TEST_SUITE_P (DISABLED_Tessdata_fast, LoadLanguage, ::testing::Values(TESSDATA_DIR "_fast"))
 
 INSTANTIATE_TEST_SUITE_P (DISABLED_Tessdata_best, LoadLanguage, ::testing::Values(TESSDATA_DIR "_best"))
 
 INSTANTIATE_TEST_SUITE_P (DISABLED_Tessdata, LoadLanguage, ::testing::Values(TESSDATA_DIR))
 
 TEST_P (LoadScript, Arabic)
 
 TEST_P (LoadScript, Armenian)
 
 TEST_P (LoadScript, Bengali)
 
 TEST_P (LoadScript, Canadian_Aboriginal)
 
 TEST_P (LoadScript, Cherokee)
 
 TEST_P (LoadScript, Cyrillic)
 
 TEST_P (LoadScript, Devanagari)
 
 TEST_P (LoadScript, Ethiopic)
 
 TEST_P (LoadScript, Fraktur)
 
 TEST_P (LoadScript, Georgian)
 
 TEST_P (LoadScript, Greek)
 
 TEST_P (LoadScript, Gujarati)
 
 TEST_P (LoadScript, Gurmukhi)
 
 TEST_P (LoadScript, HanS)
 
 TEST_P (LoadScript, HanS_vert)
 
 TEST_P (LoadScript, HanT)
 
 TEST_P (LoadScript, HanT_vert)
 
 TEST_P (LoadScript, Hangul)
 
 TEST_P (LoadScript, Hangul_vert)
 
 TEST_P (LoadScript, Hebrew)
 
 TEST_P (LoadScript, Japanese)
 
 TEST_P (LoadScript, Japanese_vert)
 
 TEST_P (LoadScript, Kannada)
 
 TEST_P (LoadScript, Khmer)
 
 TEST_P (LoadScript, Lao)
 
 TEST_P (LoadScript, Latin)
 
 TEST_P (LoadScript, Malayalam)
 
 TEST_P (LoadScript, Myanmar)
 
 TEST_P (LoadScript, Oriya)
 
 TEST_P (LoadScript, Sinhala)
 
 TEST_P (LoadScript, Syriac)
 
 TEST_P (LoadScript, Tamil)
 
 TEST_P (LoadScript, Telugu)
 
 TEST_P (LoadScript, Thaana)
 
 TEST_P (LoadScript, Thai)
 
 TEST_P (LoadScript, Tibetan)
 
 TEST_P (LoadScript, Vietnamese)
 
 INSTANTIATE_TEST_SUITE_P (DISABLED_Tessdata_fast, LoadScript, ::testing::Values(TESSDATA_DIR "_fast"))
 
 INSTANTIATE_TEST_SUITE_P (DISABLED_Tessdata_best, LoadScript, ::testing::Values(TESSDATA_DIR "_best"))
 
 INSTANTIATE_TEST_SUITE_P (DISABLED_Tessdata, LoadScript, ::testing::Values(TESSDATA_DIR))
 
 TEST_F (LoadLang, engFast)
 
 TEST_F (LoadLang, engBest)
 
 TEST_F (LoadLang, engBestInt)
 
 TEST_F (LoadLang, kmrFast)
 
 TEST_F (LoadLang, kmrBest)
 
 TEST_F (LSTMTrainerTest, RecodeTestKorBase)
 
 TEST_F (LSTMTrainerTest, RecodeTestKor)
 
 TEST_F (LSTMTrainerTest, EncodeDecodeBothTestKor)
 
 TEST_F (LSTMTrainerTest, TestSquashed)
 
 TEST_F (LSTMTrainerTest, BasicTest)
 
 TEST_F (LSTMTrainerTest, ColorTest)
 
 TEST_F (LSTMTrainerTest, BidiTest)
 
 TEST_F (LSTMTrainerTest, Test2D)
 
 TEST_F (LSTMTrainerTest, TestAdam)
 
 TEST_F (LSTMTrainerTest, SpeedTest)
 
 TEST_F (LSTMTrainerTest, DeterminismTest)
 
 TEST_F (LSTMTrainerTest, SoftmaxBaselineTest)
 
 TEST_F (LSTMTrainerTest, SoftmaxTest)
 
 TEST_F (LSTMTrainerTest, EncodedSoftmaxTest)
 
 TEST_F (LSTMTrainerTest, TestLayerAccess)
 
 TEST_F (LSTMTrainerTest, EncodesEng)
 
 TEST_F (LSTMTrainerTest, EncodesKan)
 
 TEST_F (LSTMTrainerTest, EncodesKor)
 
 TEST_F (LSTMTrainerTest, MapCoder)
 
 TEST_F (LSTMTrainerTest, ConvertModel)
 
 TEST_F (MatrixTest, RotatingTranspose_3_1)
 
 TEST_F (MatrixTest, RotatingTranspose_2_0)
 
 TEST_F (MatrixTest, RotatingTranspose_1_3)
 
 TEST_F (MatrixTest, RotatingTranspose_0_2)
 
 TEST_F (NetworkioTest, InitWithZeroFill)
 
 TEST_F (NetworkioTest, CopyWithYReversal)
 
 TEST_F (NetworkioTest, CopyWithXReversal)
 
 TEST_F (NetworkioTest, CopyWithXYTranspose)
 
 TEST (NormstrngsTest, BasicText)
 
 TEST (NormstrngsTest, LigatureText)
 
 TEST (NormstrngsTest, OcrSpecificNormalization)
 
 TEST (NormstrngsTest, DetectsCorrectText)
 
 TEST (NormstrngsTest, DetectsIncorrectText)
 
 TEST (NormstrngsTest, NonIndicTextDoesntBreakIndicRules)
 
 TEST (NormstrngsTest, NoLonelyJoiners)
 
 TEST (NormstrngsTest, NoLonelyJoinersPlus)
 
 TEST (NormstrngsTest, NoLonelyJoinersNonAlpha)
 
 TEST (NormstrngsTest, JoinersStayInArabic)
 
 TEST (NormstrngsTest, DigitOK)
 
 TEST (NormstrngsTest, DandaOK)
 
 TEST (NormstrngsTest, AllScriptsRegtest)
 
 TEST (NormstrngsTest, IsWhitespace)
 
 TEST (NormstrngsTest, SpanUTF8Whitespace)
 
 TEST (NormstrngsTest, SpanUTF8NotWhitespace)
 
 TEST (NormstrngsTest, IsInterchangeValid)
 
 TEST (NormstrngsTest, IsInterchangeValid7BitAscii)
 
 TEST (NormstrngsTest, FullwidthToHalfwidth)
 
std::string CodepointList (const std::vector< char32 > &str32)
 
std::string PrintString32WithUnicodes (const std::string &str)
 
std::string PrintStringVectorWithUnicodes (const std::vector< std::string > &glyphs)
 
void ExpectGraphemeModeResults (const std::string &str, UnicodeNormMode u_mode, int unicode_count, int glyph_count, int grapheme_count, const std::string &target_str)
 
 TEST_F (NthItemTest, GeneralTest)
 
 TEST_F (NthItemTest, BoringTest)
 
 TEST_F (NthItemTest, UniqueTest)
 
 TEST_F (NthItemTest, EqualTest)
 
 TEST_P (OSDTest, MatchOrientationDegrees)
 
 INSTANTIATE_TEST_SUITE_P (TessdataEngEuroHebrew, OSDTest, ::testing::Combine(::testing::Values(0), ::testing::Values(TESTING_DIR "/phototest.tif", TESTING_DIR "/eurotext.tif", TESTING_DIR "/hebrew.png"), ::testing::Values(TESSDATA_DIR)))
 
 INSTANTIATE_TEST_SUITE_P (TessdataBestEngEuroHebrew, OSDTest, ::testing::Combine(::testing::Values(0), ::testing::Values(TESTING_DIR "/phototest.tif", TESTING_DIR "/eurotext.tif", TESTING_DIR "/hebrew.png"), ::testing::Values(TESSDATA_DIR "_best")))
 
 INSTANTIATE_TEST_SUITE_P (TessdataFastEngEuroHebrew, OSDTest, ::testing::Combine(::testing::Values(0), ::testing::Values(TESTING_DIR "/phototest.tif", TESTING_DIR "/eurotext.tif", TESTING_DIR "/hebrew.png"), ::testing::Values(TESSDATA_DIR "_fast")))
 
 INSTANTIATE_TEST_SUITE_P (TessdataFastRotated90, OSDTest, ::testing::Combine(::testing::Values(90), ::testing::Values(TESTING_DIR "/phototest-rotated-R.png"), ::testing::Values(TESSDATA_DIR "_fast")))
 
 INSTANTIATE_TEST_SUITE_P (TessdataFastRotated180, OSDTest, ::testing::Combine(::testing::Values(180), ::testing::Values(TESTING_DIR "/phototest-rotated-180.png"), ::testing::Values(TESSDATA_DIR "_fast")))
 
 INSTANTIATE_TEST_SUITE_P (TessdataFastRotated270, OSDTest, ::testing::Combine(::testing::Values(270), ::testing::Values(TESTING_DIR "/phototest-rotated-L.png"), ::testing::Values(TESSDATA_DIR "_fast")))
 
 INSTANTIATE_TEST_SUITE_P (TessdataFastDevaRotated270, OSDTest, ::testing::Combine(::testing::Values(270), ::testing::Values(TESTING_DIR "/devatest-rotated-270.png"), ::testing::Values(TESSDATA_DIR "_fast")))
 
 INSTANTIATE_TEST_SUITE_P (TessdataFastDeva, OSDTest, ::testing::Combine(::testing::Values(0), ::testing::Values(TESTING_DIR "/devatest.png"), ::testing::Values(TESSDATA_DIR "_fast")))
 
 TEST_F (PageSegModeTest, WordTest)
 
 TEST_F (PangoFontInfoTest, TestNonDefaultConstructor)
 
 TEST_F (PangoFontInfoTest, DoesParseFontDescriptionName)
 
 TEST_F (PangoFontInfoTest, DoesParseMissingFonts)
 
 TEST_F (PangoFontInfoTest, DoesGetSpacingProperties)
 
 TEST_F (PangoFontInfoTest, CanRenderString)
 
 TEST_F (PangoFontInfoTest, CanRenderLigature)
 
 TEST_F (PangoFontInfoTest, CannotRenderUncoveredString)
 
 TEST_F (PangoFontInfoTest, CannotRenderInvalidString)
 
 TEST_F (PangoFontInfoTest, CanDropUncoveredChars)
 
 TEST_F (FontUtilsTest, DoesFindAvailableFonts)
 
 TEST_F (FontUtilsTest, DoesDetectMissingFonts)
 
 TEST_F (FontUtilsTest, DoesListAvailableFonts)
 
 TEST_F (FontUtilsTest, DoesSelectFont)
 
 TEST_F (FontUtilsTest, DoesFailToSelectFont)
 
void AsciiToRowInfo (const char *text, int row_number, RowInfo *info)
 
void MakeAsciiRowInfos (const TextAndModel *row_infos, int n, std::vector< RowInfo > *output)
 
void EvaluateParagraphDetection (const TextAndModel *correct, int n, const std::vector< PARA * > &detector_output)
 
void TestParagraphDetection (const TextAndModel *correct, int num_rows)
 
 TEST (ParagraphsTest, ListItemsIdentified)
 
 TEST (ParagraphsTest, TestSimpleParagraphDetection)
 
 TEST (ParagraphsTest, TestFewCluesWithCrown)
 
 TEST (ParagraphsTest, TestCrownParagraphDetection)
 
 TEST (ParagraphsText, TestRealFlushLeftParagraphs)
 
 TEST (ParagraphsTest, TestSingleFullPageContinuation)
 
 TEST (ParagraphsTest, TestRightAlignedParagraph)
 
 TEST (ParagraphsTest, TestTinyParagraphs)
 
 TEST (ParagraphsTest, TestComplexPage1)
 
 TEST (ParagraphsTest, TestComplexPage2)
 
 TEST (ParagraphsTest, TestSubtleCrown)
 
 TEST (ParagraphsTest, TestStrayLineInBlock)
 
 TEST (ParagraphsTest, TestUnlvInsurance)
 
 TEST (ParagraphsTest, TestSplitsOutLeaderLines)
 
 TEST (ParagraphsTest, NotDistractedBySourceCode)
 
 TEST (ParagraphsTest, NotOverlyAggressiveWithBlockQuotes)
 
 TEST (ParagraphsTest, IndexPageTest)
 
 TEST_F (ParamsModelTest, TestEngParamsModelIO)
 
void ClassicProgressTester (const char *imgname, const char *tessdatadir, const char *lang)
 
void NewProgressTester (const char *imgname, const char *tessdatadir, const char *lang)
 
 TEST (QuickTest, ClassicProgressReporting)
 
 TEST (QuickTest, NewProgressReporting)
 
 TEST (QRSequenceGenerator, GetBinaryReversedInteger)
 
 TEST_P (QRSequenceGeneratorTest, GeneratesValidSequence)
 
 INSTANTIATE_TEST_SUITE_P (RangeTest, QRSequenceGeneratorTest, ::testing::Values(2, 7, 8, 9, 16, 1e2, 1e4, 1e6))
 
 TEST_F (RecodeBeamTest, DoesChinese)
 
 TEST_F (RecodeBeamTest, DoesJapanese)
 
 TEST_F (RecodeBeamTest, DoesKorean)
 
 TEST_F (RecodeBeamTest, DoesKannada)
 
 TEST_F (RecodeBeamTest, DoesMarathi)
 
 TEST_F (RecodeBeamTest, DoesEnglish)
 
 TEST_F (RecodeBeamTest, DISABLED_EngDictionary)
 
 TEST_F (RecodeBeamTest, DISABLED_ChiDictionary)
 
 TEST_F (RecodeBeamTest, DISABLED_MultiCodeSequences)
 
 TEST_F (TBOXTest, OverlapInside)
 
 TEST_F (TBOXTest, OverlapBoolCorners)
 
 TEST_F (TBOXTest, OverlapFractionCorners)
 
 TEST_F (TBOXTest, OverlapBoolSides)
 
 TEST_F (TBOXTest, OverlapFractionSides)
 
 TEST_F (TBOXTest, OverlapBoolSpan)
 
 TEST_F (TBOXTest, OverlapFractionSpan)
 
 TEST_F (TBOXTest, OverlapOutsideTests)
 
 TEST_F (TBOXTest, OverlapXFraction)
 
 TEST_F (TBOXTest, OverlapYFraction)
 
 TEST_F (TBOXTest, OverlapXFractionZeroSize)
 
 TEST_F (TBOXTest, OverlapYFractionZeroSize)
 
 TEST_F (ResultIteratorTest, EasyTest)
 
 TEST_F (ResultIteratorTest, ComplexTest)
 
 TEST_F (ResultIteratorTest, GreyTest)
 
 TEST_F (ResultIteratorTest, SmallCapDropCapTest)
 
 TEST_F (ResultIteratorTest, DualStartTextlineOrderTest)
 
 TEST_F (ResultIteratorTest, LeftwardTextlineOrderTest)
 
 TEST_F (ResultIteratorTest, RightwardTextlineOrderTest)
 
 TEST_F (ResultIteratorTest, TextlineOrderSanityCheck)
 
 TEST_F (ResultIteratorTest, DISABLED_NonNullChoicesTest)
 
 TEST_F (ResultIteratorTest, NonNullConfidencesTest)
 
 TEST_F (ScanutilsTest, DoesScanf)
 
 TEST_F (ShapeTest, BasicTest)
 
 TEST_F (ShapeTest, AddShapeTest)
 
 TEST_F (ShapeTableTest, FullTest)
 
 TEST_F (STATSTest, BasicStats)
 
 TEST_F (STATSTest, InitStats)
 
 TEST_F (STATSTest, TopNModes)
 
 TEST_F (StridemapTest, Indexing)
 
 TEST_F (StridemapTest, Scaling)
 
 TEST_F (StringRendererTest, DoesRenderToImage)
 
 TEST_F (StringRendererTest, DoesRenderToImageWithUnderline)
 
 TEST_F (StringRendererTest, DoesHandleNewlineCharacters)
 
 TEST_F (StringRendererTest, DoesRenderLigatures)
 
 TEST_F (StringRendererTest, ArabicBoxcharsInLTROrder)
 
 TEST_F (StringRendererTest, DoesOutputBoxcharsInReadingOrder)
 
 TEST_F (StringRendererTest, DoesRenderVerticalText)
 
 TEST_F (StringRendererTest, DoesKeepAllImageBoxes)
 
 TEST_F (StringRendererTest, DoesClearBoxes)
 
 TEST_F (StringRendererTest, DoesLigatureTextForRendering)
 
 TEST_F (StringRendererTest, DoesRetainInputLigatureForRendering)
 
 TEST_F (StringRendererTest, DoesStripUnrenderableWords)
 
 TEST_F (StringRendererTest, DoesRenderWordBoxes)
 
 TEST_F (StringRendererTest, DoesRenderWordBoxesFromMultiLineText)
 
 TEST_F (StringRendererTest, DoesRenderAllFontsToImage)
 
 TEST_F (StringRendererTest, DoesNotRenderWordJoiner)
 
 TEST_F (StringRendererTest, DISABLED_DoesDropUncoveredChars)
 
 TEST (ConvertBasicLatinToFullwidthLatinTest, DoesConvertBasicLatin)
 
 TEST (ConvertBasicLatinToFullwidthLatinTest, DoesNotConvertFullwidthLatin)
 
 TEST (ConvertBasicLatinToFullwidthLatinTest, DoesNotConvertNonLatin)
 
 TEST (ConvertBasicLatinToFullwidthLatinTest, DoesNotConvertSpace)
 
 TEST (ConvertFullwidthLatinToBasicLatinTest, DoesConvertFullwidthLatin)
 
 TEST (ConvertFullwidthLatinToBasicLatinTest, DoesNotConvertBasicLatin)
 
 TEST (ConvertFullwidthLatinToBasicLatinTest, DoesNotConvertNonLatin)
 
 TEST (ConvertFullwidthLatinToBasicLatinTest, DoesNotConvertSpace)
 
 TEST_F (TableFinderTest, GapInXProjectionNoGap)
 
 TEST_F (TableFinderTest, GapInXProjectionEdgeGap)
 
 TEST_F (TableFinderTest, GapInXProjectionExists)
 
 TEST_F (TableFinderTest, HasLeaderAdjacentOverlapping)
 
 TEST_F (TableFinderTest, HasLeaderAdjacentNoOverlap)
 
 TEST_F (TableFinderTest, HasLeaderAdjacentPreservesColumns)
 
 TEST_F (TableFinderTest, SplitAndInsertFragmentedPartitionsBasicPass)
 
 TEST_F (TableFinderTest, SplitAndInsertFragmentedPartitionsBasicFail)
 
 TEST_F (TableRecognizerTest, HasSignificantLinesBasicPass)
 
 TEST_F (TableRecognizerTest, HasSignificantLinesBasicFail)
 
 TEST_F (TableRecognizerTest, HasSignificantLinesHorizontalOnlyFails)
 
 TEST_F (TableRecognizerTest, FindLinesBoundingBoxBasic)
 
 TEST_F (TableRecognizerTest, RecognizeLinedTableBasic)
 
 TEST_F (TableRecognizerTest, RecognizeWhitespacedTableBasic)
 
 TEST_F (StructuredTableTest, CountVerticalIntersectionsAll)
 
 TEST_F (StructuredTableTest, CountHorizontalIntersectionsAll)
 
 TEST_F (StructuredTableTest, VerifyLinedTableBasicPass)
 
 TEST_F (StructuredTableTest, VerifyLinedTableHorizontalFail)
 
 TEST_F (StructuredTableTest, VerifyLinedTableVerticalFail)
 
 TEST_F (StructuredTableTest, FindWhitespacedColumnsBasic)
 
 TEST_F (StructuredTableTest, FindWhitespacedColumnsSorted)
 
 TEST_F (TabVectorTest, SetStartEndPointsMatch)
 
 TEST_F (TabVectorTest, XAtY45DegreeSlopeInRangeExact)
 
 TEST_F (TabVectorTest, XAtYVerticalInRangeExact)
 
 TEST_F (TabVectorTest, XAtYHorizontal)
 
 TEST_F (TabVectorTest, XAtYRoundingSimple)
 
 TEST_F (TabVectorTest, XAtYLargeNumbers)
 
 TEST_F (TabVectorTest, XAtYHorizontalInRangeExact)
 
 TEST_F (TabVectorTest, VOverlapInRangeSimple)
 
 TEST_F (TabVectorTest, VOverlapOutOfRange)
 
 TEST_F (TabVectorTest, XYFlip)
 
 TEST_F (TatweelTest, UnicharsetIgnoresTatweel)
 
 TEST_F (TatweelTest, DictIgnoresTatweel)
 
 TEST_F (TatweelTest, UnicharsetLoadKeepsTatweel)
 
 TEST_F (TextlineProjectionTest, Unrotated)
 
 TEST_F (TextlineProjectionTest, Rotated)
 
 TEST_F (TfileTest, Serialize)
 
 TEST_F (TfileTest, FGets)
 
 TEST_F (TfileTest, BigEndian)
 
 TEST (UnicharTest, Conversion)
 
 TEST (UnicharTest, InvalidText)
 
 TEST_F (UnicharcompressTest, DoesChinese)
 
 TEST_F (UnicharcompressTest, DoesJapanese)
 
 TEST_F (UnicharcompressTest, DoesKorean)
 
 TEST_F (UnicharcompressTest, DoesKannada)
 
 TEST_F (UnicharcompressTest, DoesMarathi)
 
 TEST_F (UnicharcompressTest, DoesEnglish)
 
 TEST_F (UnicharcompressTest, DoesLigaturesWithDoubles)
 
 TEST_F (UnicharcompressTest, GetEncodingAsString)
 
 TEST (UnicharsetTest, Basics)
 
 TEST (UnicharsetTest, Multibyte)
 
 TEST (UnicharsetTest, MultibyteBigrams)
 
 TEST (UnicharsetTest, OldStyle)
 
 TEST (ValidateGraphemeTest, MultipleSyllablesAreNotASingleGrapheme)
 
 TEST (ValidateGraphemeTest, SingleConsonantOK)
 
 TEST (ValidateGraphemeTest, SimpleCV)
 
 TEST (ValidateGraphemeTest, SubscriptConjunct)
 
 TEST (ValidateGraphemeTest, HalfFormJoiner)
 
 TEST (ValidateGraphemeTest, TraditionalConjunctJoiner)
 
 TEST (ValidateGraphemeTest, OpenConjunctNonJoiner)
 
 TEST (ValidateGraphemeTest, ExplicitViramaNonJoiner)
 
 TEST (ValidateGraphemeTest, ThaiGraphemes)
 
 TEST (ValidateGraphemeTest, NoLonelyJoinersQuote)
 
 TEST (ValidateIndicTest, AddsJoinerToTerminalVirama)
 
 TEST (ValidateIndicTest, OnlyOneDependentVowel)
 
 TEST (ValidateIndicTest, OnlyOneVowelModifier)
 
 TEST (ValidateIndicTest, VowelModifierMustBeLast)
 
 TEST (ValidateIndicTest, MatrasFollowConsonantsNotVowels)
 
 TEST (ValidateIndicTest, SubGraphemes)
 
 TEST (ValidateIndicTest, Nukta)
 
 TEST (ValidateIndicTest, SinhalaRakaransaya)
 
 TEST (ValidateIndicTest, SinhalaYansaya)
 
 TEST (ValidateIndicTest, SinhalaRepaya)
 
 TEST (ValidateIndicTest, SinhalaSpecials)
 
 TEST (ValidateKhmerTest, GoodKhmerWords)
 
 TEST (ValidateKhmerTest, BadKhmerWords)
 
 TEST (ValidateMyanmarTest, GoodMyanmarWords)
 
 TEST (ValidateMyanmarTest, BadMyanmarWords)
 
 TEST (ValidatorTest, MostFrequentViramaScript)
 
 TEST (ValidatorTest, Idempotency)
 
fix_fuzzy_spaces()

Walk over the page finding sequences of words joined by fuzzy spaces. Extract them as a sublist, process the sublist to find the optimal arrangement of spaces then replace the sublist in the ROW_RES.

Parameters
monitorprogress monitor
word_countcount of words in doc
[out]page_res
void initialise_search (WERD_RES_LIST &src_list, WERD_RES_LIST &new_list)
 
transform_to_next_perm()

Examines the current word list to find the smallest word gap size. Then walks the word list closing any gaps of this size by either inserted new combination words, or extending existing ones.

The routine COULD be limited to stop it building words longer than N blobs.

If there are no more gaps then it DELETES the entire list and returns the empty list to cause termination.

void transform_to_next_perm (WERD_RES_LIST &words)
 
fix_sp_fp_word()

Test the current word to see if it can be split by deleting noise blobs. If so, do the business. Return with the iterator pointing to the same place if the word is unchanged, or the last of the replacement words.

void fixspace_dbg (WERD_RES *word)
 
C_OUTLINE::move

Move C_OUTLINE by vector

Parameters
vecvector to reposition OUTLINE by
POLY_BLOCK::reflect_in_y_axis

Reflect the coords of the polygon in the y-axis. (Flip the sign of x.)

int lessthan (const void *first, const void *second)
 
start_seam_list

Initialize a list of seams that match the original number of blobs present in the starting segmentation. Each of the seams created by this routine have location information only.

void start_seam_list (TWERD *word, std::vector< SEAM * > *seam_array)
 
AddConfigToClass

Add a new config to this class. Malloc new space and copy the old configs if necessary. Return the config id for the new config.

Parameters
ClassThe class to add to
int AddConfigToClass (CLASS_TYPE Class)
 
AddProtoToClass

Add a new proto to this class. Malloc new space and copy the old protos if necessary. Return the proto id for the new proto.

Parameters
ClassThe class to add to
int AddProtoToClass (CLASS_TYPE Class)
 
void FillABC (PROTO_STRUCT *Proto)
 
void FreeClass (CLASS_TYPE Class)
 
void FreeClassFields (CLASS_TYPE Class)
 
CLASS_TYPE NewClass (int NumProtos, int NumConfigs)
 
extract_edges
void extract_edges (Image pix, BLOCK *block)
 
fill_buckets
capture_children

Find all neighbouring outlines that are children of this outline and either move them to the output list or declare this outline illegal and return false.

empty_buckets

Run the edge detector over the block and return a list of blobs.

outlines_to_blobs

Gather together outlines into blobs using the usual bucket sort.

void outlines_to_blobs (BLOCK *block, ICOORD bleft, ICOORD tright, C_OUTLINE_LIST *outlines)
 
row_y_order

Sort function to sort rows in y from page top.

row_spacing_order

Qsort style function to compare 2 TO_ROWS based on their spacing value.

make_single_row

Arrange the blobs into a single row... well actually, if there is only a single blob, it makes 2 rows, in case the top-level blob is a container of the real blobs to recognize.

float make_single_row (ICOORD page_tr, bool allow_sub_blobs, TO_BLOCK *block, TO_BLOCK_LIST *blocks)
 
make_rows

Arrange the blobs into rows.

float make_rows (ICOORD page_tr, TO_BLOCK_LIST *port_blocks)
 
make_initial_textrows

Arrange the good blobs into rows of text.

void make_initial_textrows (ICOORD page_tr, TO_BLOCK *block, FCOORD rotation, bool testing_on)
 
fit_lms_line

Fit an LMS line to a row.

void fit_lms_line (TO_ROW *row)
 
find_best_dropout_row

Delete this row if it has a neighbour with better dropout characteristics. true is returned if the row should be deleted.

bool find_best_dropout_row (TO_ROW *row, int32_t distance, float dist_limit, int32_t line_index, TO_ROW_IT *row_it, bool testing_on)
 
deskew_block_coords

Compute the bounding box of all the blobs in the block if they were deskewed without actually doing it.

TBOX deskew_block_coords (TO_BLOCK *block, float gradient)
 
compute_line_occupation

Compute the pixel projection back on the y axis given the global skew. Also compute the 1st derivative.

void compute_line_occupation (TO_BLOCK *block, float gradient, int32_t min_y, int32_t max_y, int32_t *occupation, int32_t *deltas)
 
void compute_occupation_threshold (int32_t low_window, int32_t high_window, int32_t line_count, int32_t *occupation, int32_t *thresholds)
 
compute_dropout_distances

Compute the distance from each coordinate to the nearest dropout.

void compute_dropout_distances (int32_t *occupation, int32_t *thresholds, int32_t line_count)
 
expand_rows

Expand each row to the least of its allowed size and touching its neighbours. If the expansion would entirely swallow a neighbouring row then do so.

void expand_rows (ICOORD page_tr, TO_BLOCK *block, float gradient, FCOORD rotation, int32_t block_edge, bool testing_on)
 
void adjust_row_limits (TO_BLOCK *block)
 
compute_row_stats

Compute the linespacing and offset.

void compute_row_stats (TO_BLOCK *block, bool testing_on)
 
fill_heights

Fill the given heights with heights of the blobs that are legal candidates for estimating xheight.

void fill_heights (TO_ROW *row, float gradient, int min_height, int max_height, STATS *heights, STATS *floating_heights)
 
compute_xheight_from_modes

Given a STATS object heights, looks for two most frequently occurring heights that look like xheight and xheight + ascrise. If found, sets the values of *xheight and *ascrise accordingly, otherwise sets xheight to any most frequently occurring height and sets *ascrise to 0. Returns the number of times xheight occurred in heights. For each mode that is considered for being an xheight the count of floating blobs (stored in floating_heights) is subtracted from the total count of the blobs of this height. This is done because blobs that sit far above the baseline could represent valid ascenders, but it is highly unlikely that such a character's height will be an xheight (e.g. -, ', =, ^, ‘, ", ’, etc) If cap_only, then force finding of only the top mode.

int compute_xheight_from_modes (STATS *heights, STATS *floating_heights, bool cap_only, int min_height, int max_height, float *xheight, float *ascrise)
 
compute_row_descdrop

Estimates the descdrop of this row. This function looks for "significant" descenders of lowercase letters (those that could not just be the small descenders of upper case letters like Q,J). The function also takes into account how many potential ascenders this row might contain. If the number of potential ascenders along with descenders is close to the expected fraction of the total number of blobs in the row, the function returns the descender height, returns 0 otherwise.

int32_t compute_row_descdrop (TO_ROW *row, float gradient, int xheight_blob_count, STATS *asc_heights)
 
compute_height_modes

Find the top maxmodes values in the input array and put their indices in the output in the order in which they occurred.

int32_t compute_height_modes (STATS *heights, int32_t min_height, int32_t max_height, int32_t *modes, int32_t maxmodes)
 
correct_row_xheight

Adjust the xheight etc of this row if not within reasonable limits of the average for the block.

void correct_row_xheight (TO_ROW *row, float xheight, float ascrise, float descdrop)
 
separate_underlines

Test wide objects for being potential underlines. If they are then put them in a separate list in the block.

void separate_underlines (TO_BLOCK *block, float gradient, FCOORD rotation, bool testing_on)
 
pre_associate_blobs

Associate overlapping blobs and fake chop wide blobs.

void pre_associate_blobs (ICOORD page_tr, TO_BLOCK *block, FCOORD rotation, bool testing_on)
 
fit_parallel_rows

Re-fit the rows in the block to the given gradient.

void fit_parallel_rows (TO_BLOCK *block, float gradient, FCOORD rotation, int32_t block_edge, bool testing_on)
 
fit_parallel_lms

Fit an LMS line to a row. Make the fit parallel to the given gradient and set the row accordingly.

void fit_parallel_lms (float gradient, TO_ROW *row)
 
make_baseline_spline

Fit an LMS line to a row. Make the fit parallel to the given gradient and set the row accordingly.

void make_baseline_spline (TO_ROW *row, TO_BLOCK *block)
 
segment_baseline

Divide the baseline up into segments which require a different quadratic fitted to them. Return true if enough blobs were far enough away to need a quadratic.

bool segment_baseline (TO_ROW *row, TO_BLOCK *block, int32_t &segments, int32_t *xstarts)
 
linear_spline_baseline

Divide the baseline up into segments which require a different quadratic fitted to them.

Returns
true if enough blobs were far enough away to need a quadratic.
double * linear_spline_baseline (TO_ROW *row, TO_BLOCK *block, int32_t &segments, int32_t xstarts[])
 
assign_blobs_to_rows

Make enough rows to allocate all the given blobs to one. If a block skew is given, use that, else attempt to track it.

void assign_blobs_to_rows (TO_BLOCK *block, float *gradient, int pass, bool reject_misses, bool make_new_rows, bool drawing_skew)
 
most_overlapping_row

Return the row which most overlaps the blob.

OVERLAP_STATE most_overlapping_row (TO_ROW_IT *row_it, TO_ROW *&best_row, float top, float bottom, float rowsize, bool testing_blob)
 
blob_x_order

Sort function to sort blobs in x from page left.

int blob_x_order (const void *item1, const void *item2)
 
mark_repeated_chars

Mark blobs marked with BTFT_LEADER in repeated sets using the repeated_set member of BLOBNBOX.

void mark_repeated_chars (TO_ROW *row)
 
make_single_word

For each row, arrange the blobs into one word. There is no fixed pitch detection.

void make_single_word (bool one_blob, TO_ROW_LIST *rows, ROW_LIST *real_rows)
 
void make_words (tesseract::Textord *textord, ICOORD page_tr, float gradient, BLOCK_LIST *blocks, TO_BLOCK_LIST *port_blocks)
 
set_row_spaces

Set the min_space and max_nonspace members of the row so that the blobs can be arranged into words.

void set_row_spaces (TO_BLOCK *block, FCOORD rotation, bool testing_on)
 
row_words

Compute the max nonspace and min space for the row.

int32_t row_words (TO_BLOCK *block, TO_ROW *row, int32_t maxwidth, FCOORD rotation, bool testing_on)
 
row_words2

Compute the max nonspace and min space for the row.

int32_t row_words2 (TO_BLOCK *block, TO_ROW *row, int32_t maxwidth, FCOORD rotation, bool testing_on)
 
make_real_words

Convert a TO_BLOCK to a BLOCK.

void make_real_words (tesseract::Textord *textord, TO_BLOCK *block, FCOORD rotation)
 
make_rep_words

Fabricate a real row from only the repeated blob words. Get the xheight from the block as it may be more meaningful.

ROWmake_rep_words (TO_ROW *row, TO_BLOCK *block)
 
make_real_word

Construct a WERD from a given number of adjacent entries in a list of BLOBNBOXs.

WERDmake_real_word (BLOBNBOX_IT *box_it, int32_t blobcount, bool bol, uint8_t blanks)
 
check_blob
Returns
true if blob has a non whole outline.
any_shared_split_points

Return true if any of the splits share a point with this one.

preserve_outline_tree

Copy the list of outlines.

restore_outline_tree

Copy the list of outlines.

Variables

const int kMaxNumberOfScripts = 116 + 1 + 2 + 1
 
constexpr int kPointsPerInch = 72
 
constexpr int kMinCredibleResolution = 70
 
constexpr int kMaxCredibleResolution = 2400
 
constexpr int kResolutionEstimationFactor = 10
 
const int kMinRectSize = 10
 
const char kTesseractReject = '~'
 
const char kUNLVReject = '~'
 
const char kUNLVSuspect = '^'
 
const int kNumbersPerBlob = 5
 
const int kBytesPerNumber = 5
 
const int kBytesPerBoxFileLine = (kBytesPerNumber + 1) * kNumbersPerBlob + 1
 
const int kBytesPer64BitNumber = 20
 
const int kMaxBytesPerLine = kNumbersPerBlob * (kBytesPer64BitNumber + 1) + 1 + UNICHAR_LEN
 
const int kUniChs [] = {0x20ac, 0x201c, 0x201d, 0x2018, 0x2019, 0x2022, 0x2014, 0}
 
const int kLatinChs [] = {0x00a2, 0x0022, 0x0022, 0x0027, 0x0027, 0x00b7, 0x002d, 0}
 
DotProductFunction DotProduct
 
const float kMathDigitDensityTh1 = 0.25
 
const float kMathDigitDensityTh2 = 0.1
 
const float kMathItalicDensityTh = 0.5
 
const float kUnclearDensityTh = 0.25
 
const int kSeedBlobsCountTh = 10
 
const int kLeftIndentAlignmentCountTh = 1
 
const int kMaxCharTopRange = 48
 
const float kCertaintyScale = 7.0f
 
const float kWorstDictCertainty = -25.0f
 
const float kSizeRatioToReject = 2.0
 
const int kMinAcceptableBlobHeight = 10
 
const float kScriptAcceptRatio = 1.3
 
const float kHanRatioInKorean = 0.7
 
const float kHanRatioInJapanese = 0.3
 
const float kNonAmbiguousMargin = 1.0
 
const int kMaxCircleErosions = 8
 
const ParagraphModelkCrownLeft
 
const ParagraphModelkCrownRight
 
char * editor_image_win_name = "EditorImage"
 
int editor_image_xpos = 590
 
int editor_image_ypos = 10
 
int editor_image_word_bb_color = ScrollView::BLUE
 
int editor_image_blob_bb_color = ScrollView::YELLOW
 
char * editor_word_name = "BlnWords"
 
int editor_word_xpos = 60
 
int editor_word_ypos = 510
 
int editor_word_height = 240
 
int editor_word_width = 655
 
BLOCK_LIST * current_block_list
 
const int16_t kMaxBoxEdgeDiff = 2
 
const char kBlameCorrect [] = "corr"
 
const char kBlameClassifier [] = "cl"
 
const char kBlameChopper [] = "chop"
 
const char kBlameClassLMTradeoff [] = "cl/LM"
 
const char kBlamePageLayout [] = "pglt"
 
const char kBlameSegsearchHeur [] = "ss_heur"
 
const char kBlameSegsearchPP [] = "ss_pp"
 
const char kBlameClassOldLMTradeoff [] = "cl/old_LM"
 
const char kBlameAdaption [] = "adapt"
 
const char kBlameNoTruthSplit [] = "no_tr_spl"
 
const char kBlameNoTruth [] = "no_tr"
 
const char kBlameUnknown [] = "unkn"
 
const char *const kIncorrectResultReasonNames []
 
const double kCosSmallAngle = 0.866
 
const double kDefiniteAspectRatio = 2.0
 
const double kComplexShapePerimeterRatio = 1.5
 
const double kMinMediumSizeRatio = 0.25
 
const double kMaxMediumSizeRatio = 4.0
 
const TPOINT kDivisibleVerticalUpright (0, 1)
 
const TPOINT kDivisibleVerticalItalic (1, 5)
 
const int kBoxReadBufSize = 1024
 
const int kBoxClipTolerance = 2
 
const int kNumEndPoints = 3
 
const int kMinPointsForErrorCount = 16
 
const int kMaxRealDistance = 2.0
 
const int kMaxReadAhead = 8
 
const int kFeaturePadding = 2
 
const int kImagePadding = 4
 
const int kSloppyTolerance = 4
 
const float kFinalPixelTolerance = 0.125f
 
const int kBlnCellHeight = 256
 
const int kBlnXHeight = 128
 
const int kBlnBaselineOffset = 64
 
const int kHistogramSize = 256
 
const int kWordrecMaxNumJoinChunks = 4
 
const double kMaxWordSizeRatio = 1.25
 
const double kMaxLineSizeRatio = 1.25
 
const double kMaxWordGapRatio = 2.0
 
const int par1 = 4500 / (approx_dist * approx_dist)
 
const int par2 = 6750 / (approx_dist * approx_dist)
 
const long double kMinVariance = 1.0L / 1024
 
const int kMinSubscriptOffset = 20
 
const int kMinSuperscriptOffset = 20
 
const int kMaxDropCapBottom = -128
 
const double kMaxOverlapDenominator = 0.125
 
const double kMinXHeightMatch = 0.5
 
const double kMaxBaselineDrift = 0.0625
 
const int kCenterGradeCap = 25
 
const double kBadPriority = 999.0
 
bool wordrec_display_splits = 0
 
const double kMaxPerimeterWidthRatio = 8.0
 
const int kMaxAmbigStringSize = UNICHAR_LEN * (MAX_AMBIG_SIZE + 1)
 
int log_level = INT_MAX
 
const int kRadicalRadix = 29
 
const double kMinXHeightFraction = 0.25
 
const double kMinCapHeightFraction = 0.05
 
const char kUniversalAmbigsFile []
 
const int ksizeofUniversalAmbigsFile = sizeof(kUniversalAmbigsFile)
 
const double FTable [FTABLE_Y][FTABLE_X]
 
const char *const kMicroFeatureType = "mf"
 
const char *const kCNFeatureType = "cn"
 
const char *const kIntFeatureType = "if"
 
const char *const kGeoFeatureType = "tb"
 
EndParamDesc of
 
const FEATURE_DESC_STRUCT MicroFeatureDesc
 
TESS_API const FEATURE_DESC_STRUCT PicoFeatDesc
 
const FEATURE_DESC_STRUCT CharNormDesc
 
const FEATURE_DESC_STRUCT OutlineFeatDesc
 
const FEATURE_DESC_STRUCT IntFeatDesc
 
const FEATURE_DESC_STRUCT GeoFeatDesc
 
const double kStandardFeatureLength = 64.0 / 5
 
const float MF_SCALE_FACTOR = 0.5f / kBlnXHeight
 
double classify_min_slope = 0.414213562
 
double classify_max_slope = 2.414213562
 
double classify_pico_feature_length = 0.05
 
TESS_API float PicoFeatureLength
 
const int kRandomizingCenter = 128
 
const int case_state_table [6][4]
 
const char kDoNotReverse [] = "RRP_DO_NO_REVERSE"
 
const char kReverseIfHasRTL [] = "RRP_REVERSE_IF_HAS_RTL"
 
const char kForceReverse [] = "RRP_FORCE_REVERSE"
 
const char *const RTLReversePolicyNames [] = {kDoNotReverse, kReverseIfHasRTL, kForceReverse}
 
const TFloat TanhTable []
 
const TFloat LogisticTable []
 
constexpr int kTableSize = 4096
 
constexpr TFloat kScaleFactor = 256.0
 
const int kMaxInputHeight = 48
 
const TFloat kStateClip = 100.0
 
const TFloat kErrClip = 1.0f
 
const double kDictRatio = 2.25
 
const double kCertOffset = -0.085
 
const int kMinWinSize = 500
 
const int kMaxWinSize = 2000
 
const int kXWinFrameSize = 30
 
const int kYWinFrameSize = 80
 
const float kMinCertainty = -20.0f
 
const float kMinProb = std::exp(kMinCertainty)
 
class tesseract::TFNetworkModelDefaultTypeInternal _TFNetworkModel_default_instance_
 
const int kAdamCorrectionIterations = 200000
 
const TFloat kAdamEpsilon = 1e-8
 
const int kInt8Flag = 1
 
const int kAdamFlag = 4
 
const int kDoubleFlag = 128
 
const int kHistogramBuckets = 16
 
int textord_debug_tabfind = 0
 
int textord_debug_bugs = 0
 
bool textord_debug_printable = false
 
const double kAlignedFraction = 0.03125
 
const double kRaggedFraction = 2.5
 
const double kAlignedGapFraction = 0.75
 
const double kRaggedGapFraction = 1.0
 
const int kVLineAlignment = 3
 
const int kVLineGutter = 1
 
const int kVLineSearchSize = 150
 
const int kMinRaggedTabs = 5
 
const int kMinAlignedTabs = 4
 
const int kVLineMinLength = 300
 
const double kMinTabGradient = 4.0
 
const int kMaxSkewFactor = 15
 
double textord_underline_threshold = 0.5
 
const double kMaxSmallNeighboursPerPix = 1.0 / 32
 
const int kMaxLargeOverlapsWithSmall = 3
 
const int kMaxMediumOverlapsWithSmall = 12
 
const int kMaxLargeOverlapsWithMedium = 12
 
const int kOriginalNoiseMultiple = 8
 
const int kNoisePadding = 4
 
const double kPhotoOffsetFraction = 0.375
 
const double kMinGoodTextPARatio = 1.5
 
const int kMaxIncompatibleColumnCount = 2
 
const double kHorizontalGapMergeFraction = 0.5
 
const double kMinGutterWidthGrid = 0.5
 
const double kMaxDistToPartSizeRatio = 1.5
 
const double kMaxSpacingDrift = 1.0 / 72
 
const double kMaxTopSpacingFraction = 0.25
 
const double kMaxSameBlockLineSpacing = 3
 
const double kMaxSizeRatio = 1.5
 
const double kMaxLeaderGapFractionOfMax = 0.25
 
const double kMaxLeaderGapFractionOfMin = 0.5
 
const int kMinLeaderCount = 5
 
const int kMinStrongTextValue = 6
 
const int kMinChainTextValue = 3
 
const int kHorzStrongTextlineCount = 8
 
const int kHorzStrongTextlineHeight = 10
 
const int kHorzStrongTextlineAspect = 5
 
const double kMaxBaselineError = 0.4375
 
const double kMinBaselineCoverage = 0.5
 
const int kMaxRMSColorNoise = 128
 
const int kMaxColorDistance = 900
 
const int kRGBRMSColors = 4
 
const int kMaxPadFactor = 6
 
const int kMaxNeighbourDistFactor = 4
 
const int kMaxCaptionLines = 7
 
const double kMinCaptionGapRatio = 2.0
 
const double kMinCaptionGapHeightRatio = 0.5
 
const double kMarginOverlapFraction = 0.25
 
const double kBigPartSizeRatio = 1.75
 
const double kTinyEnoughTextlineOverlapFraction = 0.25
 
const double kMaxPartitionSpacing = 1.75
 
const int kSmoothDecisionMargin = 4
 
const double kMinColumnWidth = 2.0 / 3
 
int devanagari_split_debuglevel = 0
 
bool devanagari_split_debugimage = 0
 
bool textord_show_fixed_cuts = false
 
ScrollViewto_win = nullptr
 
FILE * to_debug
 
int textord_fp_chop_error = 2
 
bool gapmap_debug = false
 
bool gapmap_use_ends = false
 
bool gapmap_no_isolated_quanta = false
 
double gapmap_big_gaps = 1.75
 
const double kMinRectangularFraction = 0.125
 
const double kMaxRectangularFraction = 0.75
 
const double kMaxRectangularGradient = 0.1
 
const int kMinImageFindSize = 100
 
const int kThinLineFraction = 20
 Denominator of resolution makes max pixel width to allow thin lines. More...
 
const int kMinLineLengthFraction = 4
 Denominator of resolution makes min pixels to demand line lengths to be. More...
 
const int kCrackSpacing = 100
 Spacing of cracks across the page to break up tall vertical lines. More...
 
const int kLineFindGridSize = 50
 Grid size used by line finder. Not very critical. More...
 
const int kMinThickLineWidth = 12
 
const int kMaxLineResidue = 6
 
const double kThickLengthMultiple = 0.75
 
const double kMaxNonLineDensity = 0.25
 
const double kMaxStaveHeight = 1.0
 
const double kMinMusicPixelFraction = 0.75
 
bool textord_heavy_nr = false
 
bool textord_show_initial_rows = false
 
bool textord_show_parallel_rows = false
 
bool textord_show_expanded_rows = false
 
bool textord_show_final_rows = false
 
bool textord_show_final_blobs = false
 
bool textord_test_landscape = false
 
bool textord_parallel_baselines = true
 
bool textord_straight_baselines = false
 
bool textord_old_baselines = true
 
bool textord_old_xheight = false
 
bool textord_fix_xheight_bug = true
 
bool textord_fix_makerow_bug = true
 
bool textord_debug_xheights = false
 
int textord_test_x = -INT32_MAX
 
int textord_test_y = -INT32_MAX
 
int textord_min_blobs_in_row = 4
 
int textord_spline_minblobs = 8
 
int textord_spline_medianwin = 6
 
int textord_min_xheight = 10
 
double textord_spline_shift_fraction = 0.02
 
double textord_skew_ile = 0.5
 
double textord_skew_lag = 0.02
 
double textord_linespace_iqrlimit = 0.2
 
double textord_width_limit = 8
 
double textord_chop_width = 1.5
 
double textord_minxh = 0.25
 
double textord_min_linesize = 1.25
 
double textord_excess_blobsize = 1.3
 
double textord_occupancy_threshold = 0.4
 
double textord_underline_width = 2.0
 
double textord_min_blob_height_fraction = 0.75
 
double textord_xheight_mode_fraction = 0.4
 
double textord_ascheight_mode_fraction = 0.08
 
double textord_ascx_ratio_min = 1.25
 
double textord_ascx_ratio_max = 1.8
 
double textord_descx_ratio_min = 0.25
 
double textord_descx_ratio_max = 0.6
 
double textord_xheight_error_margin = 0.1
 
int textord_lms_line_trials = 12
 
bool textord_new_initial_xheight = true
 
bool textord_debug_blob = false
 
bool textord_oldbl_debug = false
 
const int kMinModeFactorOcropus = 32
 
const int kMinModeFactor = 12
 
int pitsync_linear_version = 6
 
double pitsync_joined_edge = 0.75
 
double pitsync_offset_freecut_fraction = 0.25
 
const double kStrokeWidthFractionTolerance = 0.125
 
const double kStrokeWidthTolerance = 1.5
 
const double kStrokeWidthFractionCJK = 0.25
 
const double kStrokeWidthCJK = 2.0
 
const int kCJKRadius = 2
 
const double kCJKBrokenDistanceFraction = 0.25
 
const int kCJKMaxComponents = 8
 
const double kCJKAspectRatio = 1.25
 
const double kCJKAspectRatioIncrease = 1.0625
 
const int kMaxCJKSizeRatio = 5
 
const double kBrokenCJKIterationFraction = 0.125
 
const double kDiacriticXPadRatio = 7.0
 
const double kDiacriticYPadRatio = 1.75
 
const double kMinDiacriticSizeRatio = 1.0625
 
const double kMaxDiacriticDistanceRatio = 1.25
 
const double kMaxDiacriticGapToBaseCharHeight = 1.0
 
const int kLineTrapLongest = 4
 
const int kLineTrapShortest = 2
 
const int kMostlyOneDirRatio = 3
 
const double kLineResidueAspectRatio = 8.0
 
const int kLineResiduePadRatio = 3
 
const double kLineResidueSizeRatio = 1.75
 
const double kNeighbourSearchFactor = 2.5
 
const double kNoiseOverlapGrowthFactor = 4.0
 
const double kNoiseOverlapAreaFactor = 1.0 / 512
 
const int kTabRadiusFactor = 5
 
const int kMinVerticalSearch = 3
 
const int kMaxVerticalSearch = 12
 
const int kMaxRaggedSearch = 25
 
const int kMinLinesInColumn = 10
 
const double kMinFractionalLinesInColumn = 0.125
 
const double kMaxGutterWidthAbsolute = 2.00
 
const int kRaggedGutterMultiple = 5
 
const double kLineFragmentAspectRatio = 10.0
 
const int kMinEvaluatedTabs = 3
 
const double kCosMaxSkewAngle = 0.866025
 
const int kColumnWidthFactor = 20
 
const int kMaxVerticalSpacing = 500
 
const int kMaxBlobWidth = 500
 
const double kSplitPartitionSize = 2.0
 
const double kAllowTextHeight = 0.5
 
const double kAllowTextWidth = 0.6
 
const double kAllowTextArea = 0.8
 
const double kAllowBlobHeight = 0.3
 
const double kAllowBlobWidth = 0.4
 
const double kAllowBlobArea = 0.05
 
const int kMinBoxesInTextPartition = 10
 
const int kMaxBoxesInDataPartition = 20
 
const double kMaxGapInTextPartition = 4.0
 
const double kMinMaxGapInTextPartition = 0.5
 
const double kMaxBlobOverlapFactor = 4.0
 
const double kMaxTableCellXheight = 2.0
 
const int kMaxColumnHeaderDistance = 4
 
const double kTableColumnThreshold = 3.0
 
const double kMinOverlapWithTable = 0.6
 
const int kSideSpaceMargin = 10
 
const double kSmallTableProjectionThreshold = 0.35
 
const double kLargeTableProjectionThreshold = 0.45
 
const int kLargeTableRowCount = 6
 
const int kMinRowsInTable = 3
 
const int kAdjacentLeaderSearchPadding = 2
 
const double kParagraphEndingPreviousLineRatio = 1.3
 
const double kMaxParagraphEndingLeftSpaceMultiple = 3.0
 
const double kMinParagraphEndingTextToWhitespaceRatio = 3.0
 
const double kMaxXProjectionGapFactor = 2.0
 
const double kStrokeWidthFractionalTolerance = 0.25
 
const double kStrokeWidthConstantTolerance = 2.0
 
const double kHorizontalSpacing = 0.30
 
const double kVerticalSpacing = -0.2
 
const int kCellSplitRowThreshold = 0
 
const int kCellSplitColumnThreshold = 0
 
const int kLinedTableMinVerticalLines = 3
 
const int kLinedTableMinHorizontalLines = 3
 
const double kRequiredColumns = 0.7
 
const double kMarginFactor = 1.1
 
const double kMaxRowSize = 2.5
 
const double kGoodRowNumberOfColumnsSmall [] = {2, 2, 2, 2, 2, 3, 3}
 
const double kGoodRowNumberOfColumnsLarge = 0.7
 
const double kMinFilledArea = 0.35
 
const int kGutterMultiple = 4
 
const int kGutterToNeighbourRatio = 3
 
const int kSimilarVectorDist = 10
 
const int kSimilarRaggedDist = 50
 
const int kMaxFillinMultiple = 11
 
const double kMinGutterFraction = 0.5
 
const double kLineCountReciprocal = 4.0
 
const double kMinAlignedGutter = 0.25
 
const double kMinRaggedGutter = 1.5
 
double textord_tabvector_vertical_gap_fraction = 0.5
 
double textord_tabvector_vertical_box_ratio = 0.5
 
bool textord_debug_pitch_test = false
 
bool textord_fast_pitch_test = false
 
bool textord_debug_pitch_metric = false
 
bool textord_show_row_cuts = false
 
bool textord_show_page_cuts = false
 
bool textord_blockndoc_fixed = false
 
double textord_projection_scale = 0.200
 
double textord_balance_factor = 1.0
 
bool textord_show_initial_words = false
 
bool textord_blocksall_fixed = false
 
bool textord_blocksall_prop = false
 
int textord_dotmatrix_gap = 3
 
int textord_debug_block = 0
 
int textord_pitch_range = 2
 
double textord_wordstats_smooth_factor = 0.05
 
double textord_words_maxspace = 4.0
 
double textord_words_default_maxspace = 3.5
 
double textord_words_default_minspace = 0.6
 
double textord_words_min_minspace = 0.3
 
double textord_words_default_nonspace = 0.2
 
double textord_words_initial_lower = 0.25
 
double textord_words_initial_upper = 0.15
 
double textord_words_minlarge = 0.75
 
double textord_words_pitchsd_threshold = 0.040
 
double textord_words_def_fixed = 0.016
 
double textord_words_def_prop = 0.090
 
int textord_words_veto_power = 5
 
double textord_pitch_rowsimilarity = 0.08
 
bool textord_pitch_scalebigwords = false
 
double words_initial_lower = 0.5
 
double words_initial_upper = 0.15
 
double words_default_prop_nonspace = 0.25
 
double words_default_fixed_space = 0.75
 
double words_default_fixed_limit = 0.6
 
double textord_words_definite_spread = 0.30
 
double textord_spacesize_ratioprop = 2.0
 
double textord_fpiqr_ratio = 1.5
 
double textord_max_pitch_iqr = 0.20
 
double textord_underline_offset = 0.1
 
bool textord_restore_underlines = true
 
bool textord_force_make_prop_words = false
 
bool textord_chopper_test = false
 
CLUSTERCONFIG Config = {elliptical, 0.625, 0.05, 1.0, 1e-6, 0}
 
FEATURE_DEFS_STRUCT feature_defs
 
const double kRatingEpsilon = 1.0 / 32
 
const int kMaxOffsetDist = 32
 
const int kMinClusteredShapes = 1
 
const int kMaxUnicharsPerCluster = 2000
 
const float kFontMergeDistance = 0.025
 
const float kInfiniteDist = 999.0f
 
const int kTestChar = -1
 
const int kSquareLimit = 25
 
const int kPrime1 = 17
 
const int kPrime2 = 13
 
const float kRotationRange = 0.02f
 
const int kExposureFactor = 16
 
const int kSaltnPepper = 5
 
const int kMinRampSize = 1000
 
const int kMaxLineLength = 1024
 
const int kMinLigature = 0xfb00
 
const int kMaxLigature = 0xfb17
 
const int kDefaultResolution = 300
 
const double kMinDivergenceRate = 50.0
 
const int kMinStallIterations = 10000
 
const double kSubTrainerMarginFraction = 3.0 / 128
 
const double kLearningRateDecay = M_SQRT1_2
 
const int kNumAdjustmentIterations = 100
 
const int kErrorGraphInterval = 1000
 
const int kNumPagesPerBatch = 100
 
const int kMinStartedErrorRate = 75
 
const double kStageTransitionThreshold = 10.0
 
const double kHighConfidence = 0.9375
 
const double kImprovementFraction = 15.0 / 16.0
 
const double kBestCheckpointFraction = 31.0 / 32.0
 
const int kTargetXScale = 5
 
const int kTargetYScale = 100
 
const int kSvPort = 8461
 
const int kMaxMsgSize = 4096
 
const int kMaxIntPairSize = 45
 
ScrollViewfx_win = nullptr
 
ScrollViewedge_window = nullptr
 
ScrollViewblob_window = nullptr
 
ScrollView::Color color_list []
 
bool wordrec_display_all_blobs = 0
 
bool wordrec_blob_pause = 0
 
const char * kTruthTextWords = "To simple burn running of goods lately.\n"
 
const char * kTruthTextLine = "Tosimpleburnrunningofgoodslately.\n"
 
int test_data [] = {8, 1, 2, -4, 7, 9, 65536, 4, 9, 0}
 
const char * kStrings8087_054 [] = {"dat", "Dalmatian", "", "DAMAGED DURING", "margarine,", nullptr}
 
const PolyBlockType kBlocks8087_054 []
 
const int kTrainerIterations = 600
 
const int kBatchIterations = 100
 
const char kEngText [] = "the quick brown fox jumps over the lazy dog"
 
const char kHinText [] = "पिताने विवाह की | हो गई उद्विग्न वह सोचा"
 
const char kKorText [] = "이는 것으로"
 
const char * kBadlyFormedHinWords [] = {"उपयोक्ताो", "नहीें", "प्रंात", "कहीअे", "पत्रिाका", "छह्णाीस"}
 
const char * kBadlyFormedThaiWords [] = {"ฤิ", "กา้ํ", "กิำ", "นำ้", "เเก"}
 
const char * kExpectedFontNames []
 
const char kArabicText [] = "والفكر والصراع 1234,\nوالفكر والصراع"
 
const ParagraphJustification kLeft = JUSTIFICATION_LEFT
 
const ParagraphJustification kCenter = JUSTIFICATION_CENTER
 
const ParagraphJustification kRight = JUSTIFICATION_RIGHT
 
const ParagraphJustification kUnknown = JUSTIFICATION_UNKNOWN
 
const TextAndModel kTwoSimpleParagraphs []
 
const TextAndModel kFewCluesWithCrown []
 
const TextAndModel kCrownedParagraph []
 
const TextAndModel kFlushLeftParagraphs []
 
const TextAndModel kSingleFullPageContinuation []
 
const TextAndModel kRightAligned []
 
const TextAndModel kTinyParagraphs []
 
const TextAndModel kComplexPage1 []
 
const TextAndModel kComplexPage2 []
 
const TextAndModel kSubtleCrown []
 
const TextAndModel kUnlvRep3AO []
 
const TextAndModel kTableOfContents []
 
const TextAndModel kTextWithSourceCode []
 
const TextAndModel kOldManAndSea []
 
const TextAndModel kNewZealandIndex []
 
const int kNumChars = 100
 
const int kPadding = 64
 
const char * kGWRTops []
 
const float kGWRTopScores []
 
const char * kGWR2nds []
 
const float kGWR2ndScores []
 
const char * kZHTops [] = {"实", "学", "储", "啬", "投", "学", "生", nullptr}
 
const float kZHTopScores [] = {0.98, 0.98, 0.98, 0.98, 0.98, 0.98, 0.98}
 
const char * kZH2nds [] = {"学", "储", "投", "生", "学", "生", "实", nullptr}
 
const float kZH2ndScores [] = {0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01}
 
const char * kViTops [] = {"v", "ậ", "y", " ", "t", "ộ", "i", nullptr}
 
const float kViTopScores [] = {0.98, 0.98, 0.98, 0.98, 0.98, 0.98, 0.97}
 
const char * kVi2nds [] = {"V", "a", "v", "", "l", "o", "", nullptr}
 
const float kVi2ndScores [] = {0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01}
 
const int kTestData [] = {2, 0, 12, 1, 1, 2, 10, 1, 0, 0, 0, 2, 0, 4, 1, 1}
 
const char kMixedText [] = "والفكر 123 والصراع abc"
 
const char kEngNonLigatureText [] = "fidelity"
 
const char kEngLigatureText [] = "fidelity"
 

NormEvidenceOf

Return the new type of evidence number corresponding to this normalization adjustment. The equation that represents the transform is: 1 / (1 + (NormAdj / midpoint) ^ curl)

double classify_norm_adj_midpoint = 32.0
 
double classify_norm_adj_curl = 2.0
 
const double kWidthErrorWeighting = 0.125
 

compute_page_skew

Compute the skew over a full page by averaging the gradients over all the lines. Get the error of the same row.

const double kNoiseSize = 0.5
 
const int kMinSize = 8
 
void compute_page_skew (TO_BLOCK_LIST *blocks, float &page_m, float &page_err)
 
void vigorous_noise_removal (TO_BLOCK *block)
 
void cleanup_rows_making (ICOORD page_tr, TO_BLOCK *block, float gradient, FCOORD rotation, int32_t block_edge, bool testing_on)
 
void delete_non_dropout_rows (TO_BLOCK *block, float gradient, FCOORD rotation, int32_t block_edge, bool testing_on)
 

Detailed Description

The box file is assumed to contain box definitions, one per line, of the following format for blob-level boxes:

*   <UTF8 str> <left> <bottom> <right> <top> <page id>
* 

and for word/line-level boxes:

*   WordStr <left> <bottom> <right> <top> <page id> #<space-delimited word str>
* 

NOTES: The boxes use tesseract coordinates, i.e. 0,0 is at BOTTOM-LEFT.

<page id> is 0-based, and the page number is used for multipage input (tiff).

In the blob-level form, each line represents a recognizable unit, which may be several UTF-8 bytes, but there is a bounding box around each recognizable unit, and no classifier is needed to train in this mode (bootstrapping.)

In the word/line-level form, the line begins with the literal "WordStr", and the bounding box bounds either a whole line or a whole word. The recognizable units in the word/line are listed after the # at the end of the line and are space delimited, ignoring any original spaces on the line. Eg.

* word -> #w o r d
* multi word line -> #m u l t i w o r d l i n e
* 

The recognizable units must be space-delimited in order to allow multiple unicodes to be used for a single recognizable unit, eg Hindi.

In this mode, the classifier must have been pre-trained with the desired character set, or it will not be able to find the character segmentations.

Make a word from the selected blobs and run Tess on them.

Parameters
page_resrecognise blobs
selection_boxwithin this box

Include Files and Type Defines

Typedef Documentation

◆ BLOB_CHOICE_LIST_VECTOR

using tesseract::BLOB_CHOICE_LIST_VECTOR = typedef std::vector<BLOB_CHOICE_LIST *>

Definition at line 627 of file ratngs.h.

◆ BLOB_WIDTH

using tesseract::BLOB_WIDTH = typedef uint8_t

Definition at line 29 of file stopper.h.

◆ BlobGridSearch

using tesseract::BlobGridSearch = typedef GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT>

Definition at line 30 of file blobgrid.h.

◆ CANCEL_FUNC

using tesseract::CANCEL_FUNC = typedef bool (*)(void *, int)

Definition at line 98 of file ocrclass.h.

◆ char32

using tesseract::char32 = typedef signed int

Definition at line 49 of file unichar.h.

◆ CHAR_FEATURES

using tesseract::CHAR_FEATURES = typedef char *

Definition at line 93 of file ocrfeatures.h.

◆ CLASS_ID

a CLASS_ID is the ascii character to be associated with a class

Definition at line 34 of file matchdefs.h.

◆ CLASS_TYPE

Definition at line 49 of file protos.h.

◆ CLASSES

using tesseract::CLASSES = typedef CLASS_STRUCT *

Definition at line 50 of file protos.h.

◆ ClusterHeap

Definition at line 1263 of file cluster.cpp.

◆ ClusterPair

Definition at line 1262 of file cluster.cpp.

◆ ColPartitionGridSearch

using tesseract::ColPartitionGridSearch = typedef GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>

Definition at line 918 of file colpartition.h.

◆ ColSegmentGrid

using tesseract::ColSegmentGrid = typedef BBGrid<ColSegment, ColSegment_CLIST, ColSegment_C_IT>

Definition at line 109 of file tablefind.h.

◆ ColSegmentGridSearch

using tesseract::ColSegmentGridSearch = typedef GridSearch<ColSegment, ColSegment_CLIST, ColSegment_C_IT>

Definition at line 110 of file tablefind.h.

◆ CONFIG_PRUNER

typedef uint32_t tesseract::CONFIG_PRUNER[NUM_PP_PARAMS][NUM_PP_BUCKETS][4]

Definition at line 91 of file intproto.h.

◆ DANGERR

using tesseract::DANGERR = typedef std::vector<DANGERR_INFO>

Definition at line 47 of file stopper.h.

◆ DawgVector

using tesseract::DawgVector = typedef std::vector<Dawg *>

Definition at line 57 of file dict.h.

◆ DENSITYFUNC

using tesseract::DENSITYFUNC = typedef double (*)(int32_t)

Definition at line 1311 of file cluster.cpp.

◆ DictFunc

using tesseract::DictFunc = typedef int (Dict::*)(void *, const UNICHARSET &, UNICHAR_ID, bool) const

Definition at line 63 of file baseapi.h.

◆ DotProductFunction

using tesseract::DotProductFunction = typedef TFloat (*)(const TFloat *, const TFloat *, int)

Definition at line 26 of file simddetect.h.

◆ EDGE_ARRAY

Definition at line 48 of file dawg.h.

◆ EDGE_INDEX

using tesseract::EDGE_INDEX = typedef int64_t

Definition at line 38 of file trie.h.

◆ EDGE_RECORD

using tesseract::EDGE_RECORD = typedef uint64_t

Definition at line 47 of file dawg.h.

◆ EDGE_REF

using tesseract::EDGE_REF = typedef int64_t

Definition at line 49 of file dawg.h.

◆ EDGE_VECTOR

using tesseract::EDGE_VECTOR = typedef std::vector<EDGE_RECORD>

Definition at line 39 of file trie.h.

◆ FEATURE

Definition at line 68 of file ocrfeatures.h.

◆ FEATURE_DEFS

Definition at line 45 of file featdefs.h.

◆ FEATURE_DESC

Definition at line 56 of file ocrfeatures.h.

◆ FEATURE_ID

using tesseract::FEATURE_ID = typedef uint8_t

FEATURE_ID is the index of a feature within a character description The feature id ranges from 0 to N-1 where N is the number of features in a character description.

Definition at line 46 of file matchdefs.h.

◆ FEATURE_SET

Definition at line 87 of file ocrfeatures.h.

◆ FileReader

using tesseract::FileReader = typedef bool (*)(const char *filename, std::vector<char> *data)

Definition at line 61 of file baseapi.h.

◆ FileWriter

using tesseract::FileWriter = typedef bool (*)(const std::vector<char> &data, const char *filename)

Definition at line 40 of file serialis.h.

◆ FontSet

using tesseract::FontSet = typedef std::vector<int>

Definition at line 154 of file fontinfo.h.

◆ int_compare

using tesseract::int_compare = typedef int (*)(void *, void *)

Definition at line 77 of file oldlist.h.

◆ INT_FEATURE_ARRAY

typedef INT_FEATURE_STRUCT tesseract::INT_FEATURE_ARRAY[MAX_NUM_INT_FEATURES]

Definition at line 137 of file intproto.h.

◆ IntKDPair

using tesseract::IntKDPair = typedef KDPairInc<int, int>

Definition at line 191 of file kdpair.h.

◆ kdwalk_proc

using tesseract::kdwalk_proc = typedef void (*)(ClusteringContext *context, CLUSTER *Cluster, int32_t Level)

Definition at line 39 of file kdtree.h.

◆ LABELEDLIST

Definition at line 89 of file commontraining.h.

◆ LanguageModelFlagsType

using tesseract::LanguageModelFlagsType = typedef unsigned char

Used for expressing various language model flags.

Definition at line 35 of file lm_state.h.

◆ LigHash

using tesseract::LigHash = typedef std::unordered_map<std::string, std::string>

Definition at line 36 of file ligature_table.h.

◆ LIST

using tesseract::LIST = typedef list_rec *

Definition at line 125 of file oldlist.h.

◆ MatrixCoordPair

Definition at line 724 of file matrix.h.

◆ MERGE_CLASS

Definition at line 98 of file commontraining.h.

◆ MFOUTLINE

using tesseract::MFOUTLINE = typedef LIST

Definition at line 28 of file mfoutline.h.

◆ MicroFeature

using tesseract::MicroFeature = typedef std::array<float, (int)MicroFeatureParameter::MFCount>

Definition at line 36 of file mfdefs.h.

◆ MICROFEATURES

using tesseract::MICROFEATURES = typedef std::forward_list<MicroFeature>

Definition at line 37 of file mfdefs.h.

◆ NODE_MAP

using tesseract::NODE_MAP = typedef EDGE_REF *

Definition at line 51 of file dawg.h.

◆ NODE_REF

using tesseract::NODE_REF = typedef int64_t

Definition at line 50 of file dawg.h.

◆ NodeChildVector

using tesseract::NodeChildVector = typedef std::vector<NodeChild>

Definition at line 60 of file dawg.h.

◆ PainPointHeap

Definition at line 36 of file lm_pain_points.h.

◆ ParamsTrainingHypothesisList

Definition at line 126 of file params_training_featdef.h.

◆ PartSetVector

using tesseract::PartSetVector = typedef std::vector<ColPartitionSet *>

Definition at line 32 of file colpartitionset.h.

◆ PModel

Definition at line 229 of file paragraphs_test.cc.

◆ PointHeap

Definition at line 32 of file chop.h.

◆ PointPair

using tesseract::PointPair = typedef KDPairInc<float, EDGEPT *>

Definition at line 31 of file chop.h.

◆ PRIORITY

using tesseract::PRIORITY = typedef float

Definition at line 31 of file seam.h.

◆ ProbabilityInContextFunc

using tesseract::ProbabilityInContextFunc = typedef double (Dict::*)(const char *, const char *, int, const char *, int)

Definition at line 65 of file baseapi.h.

◆ PROGRESS_FUNC

using tesseract::PROGRESS_FUNC = typedef bool (*)(int, int, int, int, int)

Definition at line 99 of file ocrclass.h.

◆ PROGRESS_FUNC2

using tesseract::PROGRESS_FUNC2 = typedef bool (*)(ETEXT_DESC *, int, int, int, int)

Definition at line 100 of file ocrclass.h.

◆ PROTO_ID

using tesseract::PROTO_ID = typedef int16_t

a PROTO_ID is the index of a prototype within it's class. Valid proto id's are 0 to N-1 where N is the number of prototypes that make up the class.

Definition at line 40 of file matchdefs.h.

◆ PROTO_PRUNER

typedef uint32_t tesseract::PROTO_PRUNER[NUM_PP_PARAMS][NUM_PP_BUCKETS][WERDS_PER_PP_VECTOR]

Definition at line 84 of file intproto.h.

◆ RecodeHeap

Definition at line 178 of file recodebeam.h.

◆ RecodePair

using tesseract::RecodePair = typedef KDPairInc<double, RecodeNode>

Definition at line 177 of file recodebeam.h.

◆ RSCounts

using tesseract::RSCounts = typedef std::unordered_map<int, int>

Definition at line 48 of file unicharcompress.cpp.

◆ RSMap

using tesseract::RSMap = typedef std::unordered_map<int, std::unique_ptr<std::vector<int> >>

Definition at line 46 of file unicharcompress.cpp.

◆ SAMPLE

using tesseract::SAMPLE = typedef CLUSTER

Definition at line 51 of file cluster.h.

◆ SeamDecPair

using tesseract::SeamDecPair = typedef KDPtrPairDec<float, SEAM>

Definition at line 33 of file findseam.h.

◆ SeamPair

using tesseract::SeamPair = typedef KDPtrPairInc<float, SEAM>

Definition at line 30 of file findseam.h.

◆ SeamPile

Definition at line 34 of file findseam.h.

◆ SeamQueue

Definition at line 31 of file findseam.h.

◆ SetOfModels

using tesseract::SetOfModels = typedef std::vector<const ParagraphModel *>

Definition at line 91 of file paragraphs_internal.h.

◆ ShapeQueue

Definition at line 125 of file shapetable.h.

◆ SOLVEFUNC

using tesseract::SOLVEFUNC = typedef double (*)(CHISTRUCT *, double)

Definition at line 1312 of file cluster.cpp.

◆ SuccessorList

using tesseract::SuccessorList = typedef std::vector<int>

Definition at line 61 of file dawg.h.

◆ SuccessorListsVector

using tesseract::SuccessorListsVector = typedef std::vector<SuccessorList *>

Definition at line 62 of file dawg.h.

◆ TDimension

using tesseract::TDimension = typedef int16_t

Definition at line 32 of file tesstypes.h.

◆ TestCallback

using tesseract::TestCallback = typedef std::function<std::string(int, const double *, const TessdataManager &, int)>

Definition at line 77 of file lstmtrainer.h.

◆ TFloat

using tesseract::TFloat = typedef double

Definition at line 39 of file tesstypes.h.

◆ TRIE_NODES

using tesseract::TRIE_NODES = typedef std::vector<TRIE_NODE_RECORD *>

Definition at line 45 of file trie.h.

◆ UNICHAR_ID

using tesseract::UNICHAR_ID = typedef int

Definition at line 34 of file unichar.h.

◆ UnicharAmbigsVector

using tesseract::UnicharAmbigsVector = typedef std::vector<AmbigSpec_LIST *>

Definition at line 140 of file ambigs.h.

◆ UnicharIdVector

using tesseract::UnicharIdVector = typedef std::vector<UNICHAR_ID>

Definition at line 38 of file ambigs.h.

◆ VECTOR

using tesseract::VECTOR = typedef TPOINT

Definition at line 93 of file blobs.h.

◆ void_dest

using tesseract::void_dest = typedef void (*)(void *)

Definition at line 78 of file oldlist.h.

◆ WidthCallback

using tesseract::WidthCallback = typedef std::function<bool(int)>

Definition at line 35 of file tabfind.h.

◆ WordGrid

using tesseract::WordGrid = typedef BBGrid<WordWithBox, WordWithBox_CLIST, WordWithBox_C_IT>

Definition at line 73 of file textord.h.

◆ WordRecognizer

using tesseract::WordRecognizer = typedef void (Tesseract::*)(const WordData &, WERD_RES **, PointerVector<WERD_RES> *)

Definition at line 175 of file tesseractclass.h.

◆ WordSearch

using tesseract::WordSearch = typedef GridSearch<WordWithBox, WordWithBox_CLIST, WordWithBox_C_IT>

Definition at line 74 of file textord.h.

Enumeration Type Documentation

◆ AmbigType

Enumerator
NOT_AMBIG 
REPLACE_AMBIG 
DEFINITE_AMBIG 
SIMILAR_AMBIG 
CASE_AMBIG 
AMBIG_TYPE_COUNT 

Definition at line 40 of file ambigs.h.

40 {
41 NOT_AMBIG, // the ngram pair is not ambiguous
42 REPLACE_AMBIG, // ocred ngram should always be substituted with correct
43 DEFINITE_AMBIG, // add correct ngram to the classifier results (1-1)
44 SIMILAR_AMBIG, // use pairwise classifier for ocred/correct pair (1-1)
45 CASE_AMBIG, // this is a case ambiguity (1-1)
46
47 AMBIG_TYPE_COUNT // number of enum entries
48};
@ CASE_AMBIG
Definition: ambigs.h:45
@ DEFINITE_AMBIG
Definition: ambigs.h:43
@ REPLACE_AMBIG
Definition: ambigs.h:42
@ AMBIG_TYPE_COUNT
Definition: ambigs.h:47
@ SIMILAR_AMBIG
Definition: ambigs.h:44
@ NOT_AMBIG
Definition: ambigs.h:41

◆ BlobChoiceClassifier

Enumerator
BCC_STATIC_CLASSIFIER 
BCC_ADAPTED_CLASSIFIER 
BCC_SPECKLE_CLASSIFIER 
BCC_AMBIG 
BCC_FAKE 

Definition at line 48 of file ratngs.h.

48 {
49 BCC_STATIC_CLASSIFIER, // From the char_norm classifier.
50 BCC_ADAPTED_CLASSIFIER, // From the adaptive classifier.
51 BCC_SPECKLE_CLASSIFIER, // Backup for failed classification.
52 BCC_AMBIG, // Generated by ambiguity detection.
53 BCC_FAKE, // From some other process.
54};
@ BCC_AMBIG
Definition: ratngs.h:52
@ BCC_FAKE
Definition: ratngs.h:53
@ BCC_SPECKLE_CLASSIFIER
Definition: ratngs.h:51
@ BCC_STATIC_CLASSIFIER
Definition: ratngs.h:49
@ BCC_ADAPTED_CLASSIFIER
Definition: ratngs.h:50

◆ BlobNeighbourDir

Enumerator
BND_LEFT 
BND_BELOW 
BND_RIGHT 
BND_ABOVE 
BND_COUNT 

Definition at line 89 of file blobbox.h.

@ BND_LEFT
Definition: blobbox.h:89
@ BND_RIGHT
Definition: blobbox.h:89
@ BND_BELOW
Definition: blobbox.h:89
@ BND_ABOVE
Definition: blobbox.h:89
@ BND_COUNT
Definition: blobbox.h:89

◆ BlobRegionType

Enumerator
BRT_NOISE 
BRT_HLINE 
BRT_VLINE 
BRT_RECTIMAGE 
BRT_POLYIMAGE 
BRT_UNKNOWN 
BRT_VERT_TEXT 
BRT_TEXT 
BRT_COUNT 

Definition at line 74 of file blobbox.h.

74 {
75 BRT_NOISE, // Neither text nor image.
76 BRT_HLINE, // Horizontal separator line.
77 BRT_VLINE, // Vertical separator line.
78 BRT_RECTIMAGE, // Rectangular image.
79 BRT_POLYIMAGE, // Non-rectangular image.
80 BRT_UNKNOWN, // Not determined yet.
81 BRT_VERT_TEXT, // Vertical alignment, not necessarily vertically oriented.
82 BRT_TEXT, // Convincing text.
83
84 BRT_COUNT // Number of possibilities.
85};
@ BRT_TEXT
Definition: blobbox.h:82
@ BRT_COUNT
Definition: blobbox.h:84
@ BRT_HLINE
Definition: blobbox.h:76
@ BRT_NOISE
Definition: blobbox.h:75
@ BRT_VLINE
Definition: blobbox.h:77
@ BRT_POLYIMAGE
Definition: blobbox.h:79
@ BRT_VERT_TEXT
Definition: blobbox.h:81
@ BRT_UNKNOWN
Definition: blobbox.h:80
@ BRT_RECTIMAGE
Definition: blobbox.h:78

◆ BlobSpecialTextType

Enumerator
BSTT_NONE 
BSTT_ITALIC 
BSTT_DIGIT 
BSTT_MATH 
BSTT_UNCLEAR 
BSTT_SKIP 
BSTT_COUNT 

Definition at line 92 of file blobbox.h.

92 {
93 BSTT_NONE, // No special.
94 BSTT_ITALIC, // Italic style.
95 BSTT_DIGIT, // Digit symbols.
96 BSTT_MATH, // Mathematical symbols (not including digit).
97 BSTT_UNCLEAR, // Characters with low recognition rate.
98 BSTT_SKIP, // Characters that we skip labeling (usually too small).
100};
@ BSTT_SKIP
Definition: blobbox.h:98
@ BSTT_MATH
Definition: blobbox.h:96
@ BSTT_UNCLEAR
Definition: blobbox.h:97
@ BSTT_DIGIT
Definition: blobbox.h:95
@ BSTT_ITALIC
Definition: blobbox.h:94
@ BSTT_NONE
Definition: blobbox.h:93
@ BSTT_COUNT
Definition: blobbox.h:99

◆ BlobTextFlowType

Enumerator
BTFT_NONE 
BTFT_NONTEXT 
BTFT_NEIGHBOURS 
BTFT_CHAIN 
BTFT_STRONG_CHAIN 
BTFT_TEXT_ON_IMAGE 
BTFT_LEADER 
BTFT_COUNT 

Definition at line 110 of file blobbox.h.

110 {
111 BTFT_NONE, // No text flow set yet.
112 BTFT_NONTEXT, // Flow too poor to be likely text.
113 BTFT_NEIGHBOURS, // Neighbours support flow in this direction.
114 BTFT_CHAIN, // There is a weak chain of text in this direction.
115 BTFT_STRONG_CHAIN, // There is a strong chain of text in this direction.
116 BTFT_TEXT_ON_IMAGE, // There is a strong chain of text on an image.
117 BTFT_LEADER, // Leader dots/dashes etc.
119};
@ BTFT_STRONG_CHAIN
Definition: blobbox.h:115
@ BTFT_NONE
Definition: blobbox.h:111
@ BTFT_CHAIN
Definition: blobbox.h:114
@ BTFT_LEADER
Definition: blobbox.h:117
@ BTFT_TEXT_ON_IMAGE
Definition: blobbox.h:116
@ BTFT_COUNT
Definition: blobbox.h:118
@ BTFT_NEIGHBOURS
Definition: blobbox.h:113
@ BTFT_NONTEXT
Definition: blobbox.h:112

◆ C_OUTLINE_FLAGS

Enumerator
COUT_INVERSE 

Definition at line 45 of file coutln.h.

45 {
46 COUT_INVERSE // White on black blob
47};
@ COUT_INVERSE
Definition: coutln.h:46

◆ CachingStrategy

Enumerator
CS_SEQUENTIAL 
CS_ROUND_ROBIN 

Definition at line 42 of file imagedata.h.

42 {
43 // Reads all of one file before moving on to the next. Requires samples to be
44 // shuffled across files. Uses the count of samples in the first file as
45 // the count in all the files to achieve high-speed random access. As a
46 // consequence, if subsequent files are smaller, they get entries used more
47 // than once, and if subsequent files are larger, some entries are not used.
48 // Best for larger data sets that don't fit in memory.
50 // Reads one sample from each file in rotation. Does not require shuffled
51 // samples, but is extremely disk-intensive. Samples in smaller files also
52 // get used more often than samples in larger files.
53 // Best for smaller data sets that mostly fit in memory.
55};
@ CS_SEQUENTIAL
Definition: imagedata.h:49
@ CS_ROUND_ROBIN
Definition: imagedata.h:54

◆ CharSegmentationType

Enumerator
CST_FRAGMENT 
CST_WHOLE 
CST_IMPROPER 
CST_NGRAM 

Definition at line 87 of file classify.h.

87 {
88 CST_FRAGMENT, // A partial character.
89 CST_WHOLE, // A correctly segmented character.
90 CST_IMPROPER, // More than one but less than 2 characters.
91 CST_NGRAM // Multiple characters.
92};
@ CST_IMPROPER
Definition: classify.h:90
@ CST_NGRAM
Definition: classify.h:91
@ CST_WHOLE
Definition: classify.h:89
@ CST_FRAGMENT
Definition: classify.h:88

◆ CMD_EVENTS [1/2]

Enumerator
NULL_CMD_EVENT 
CHANGE_DISP_CMD_EVENT 
DUMP_WERD_CMD_EVENT 
SHOW_POINT_CMD_EVENT 
SHOW_BLN_WERD_CMD_EVENT 
DEBUG_WERD_CMD_EVENT 
BLAMER_CMD_EVENT 
BOUNDING_BOX_CMD_EVENT 
CORRECT_TEXT_CMD_EVENT 
POLYGONAL_CMD_EVENT 
BL_NORM_CMD_EVENT 
BITMAP_CMD_EVENT 
IMAGE_CMD_EVENT 
BLOCKS_CMD_EVENT 
BASELINES_CMD_EVENT 
UNIFORM_DISP_CMD_EVENT 
REFRESH_CMD_EVENT 
QUIT_CMD_EVENT 
RECOG_WERDS 
RECOG_PSEUDO 
SHOW_BLOB_FEATURES 
SHOW_SUBSCRIPT_CMD_EVENT 
SHOW_SUPERSCRIPT_CMD_EVENT 
SHOW_ITALIC_CMD_EVENT 
SHOW_BOLD_CMD_EVENT 
SHOW_UNDERLINE_CMD_EVENT 
SHOW_FIXEDPITCH_CMD_EVENT 
SHOW_SERIF_CMD_EVENT 
SHOW_SMALLCAPS_CMD_EVENT 
SHOW_DROPCAPS_CMD_EVENT 
ACTION_1_CMD_EVENT 
RECOG_WERDS 
RECOG_PSEUDO 
ACTION_2_CMD_EVENT 

Definition at line 50 of file pgedit.cpp.

50 {
81};
@ SHOW_SUBSCRIPT_CMD_EVENT
Definition: pgedit.cpp:72
@ DEBUG_WERD_CMD_EVENT
Definition: pgedit.cpp:56
@ SHOW_UNDERLINE_CMD_EVENT
Definition: pgedit.cpp:76
@ SHOW_SERIF_CMD_EVENT
Definition: pgedit.cpp:78
@ BASELINES_CMD_EVENT
Definition: pgedit.cpp:65
@ SHOW_BOLD_CMD_EVENT
Definition: pgedit.cpp:75
@ BLAMER_CMD_EVENT
Definition: pgedit.cpp:57
@ SHOW_BLN_WERD_CMD_EVENT
Definition: pgedit.cpp:55
@ RECOG_PSEUDO
Definition: pgedit.cpp:70
@ SHOW_SUPERSCRIPT_CMD_EVENT
Definition: pgedit.cpp:73
@ BL_NORM_CMD_EVENT
Definition: pgedit.cpp:61
@ REFRESH_CMD_EVENT
Definition: pgedit.cpp:67
@ BITMAP_CMD_EVENT
Definition: pgedit.cpp:62
@ DUMP_WERD_CMD_EVENT
Definition: pgedit.cpp:53
@ SHOW_BLOB_FEATURES
Definition: pgedit.cpp:71
@ SHOW_POINT_CMD_EVENT
Definition: pgedit.cpp:54
@ IMAGE_CMD_EVENT
Definition: pgedit.cpp:63
@ RECOG_WERDS
Definition: pgedit.cpp:69
@ NULL_CMD_EVENT
Definition: pgedit.cpp:51
@ SHOW_DROPCAPS_CMD_EVENT
Definition: pgedit.cpp:80
@ SHOW_FIXEDPITCH_CMD_EVENT
Definition: pgedit.cpp:77
@ CHANGE_DISP_CMD_EVENT
Definition: pgedit.cpp:52
@ CORRECT_TEXT_CMD_EVENT
Definition: pgedit.cpp:59
@ BOUNDING_BOX_CMD_EVENT
Definition: pgedit.cpp:58
@ BLOCKS_CMD_EVENT
Definition: pgedit.cpp:64
@ POLYGONAL_CMD_EVENT
Definition: pgedit.cpp:60
@ UNIFORM_DISP_CMD_EVENT
Definition: pgedit.cpp:66
@ QUIT_CMD_EVENT
Definition: pgedit.cpp:68
@ SHOW_SMALLCAPS_CMD_EVENT
Definition: pgedit.cpp:79
@ SHOW_ITALIC_CMD_EVENT
Definition: pgedit.cpp:74

◆ CMD_EVENTS [2/2]

Enumerator
NULL_CMD_EVENT 
CHANGE_DISP_CMD_EVENT 
DUMP_WERD_CMD_EVENT 
SHOW_POINT_CMD_EVENT 
SHOW_BLN_WERD_CMD_EVENT 
DEBUG_WERD_CMD_EVENT 
BLAMER_CMD_EVENT 
BOUNDING_BOX_CMD_EVENT 
CORRECT_TEXT_CMD_EVENT 
POLYGONAL_CMD_EVENT 
BL_NORM_CMD_EVENT 
BITMAP_CMD_EVENT 
IMAGE_CMD_EVENT 
BLOCKS_CMD_EVENT 
BASELINES_CMD_EVENT 
UNIFORM_DISP_CMD_EVENT 
REFRESH_CMD_EVENT 
QUIT_CMD_EVENT 
RECOG_WERDS 
RECOG_PSEUDO 
SHOW_BLOB_FEATURES 
SHOW_SUBSCRIPT_CMD_EVENT 
SHOW_SUPERSCRIPT_CMD_EVENT 
SHOW_ITALIC_CMD_EVENT 
SHOW_BOLD_CMD_EVENT 
SHOW_UNDERLINE_CMD_EVENT 
SHOW_FIXEDPITCH_CMD_EVENT 
SHOW_SERIF_CMD_EVENT 
SHOW_SMALLCAPS_CMD_EVENT 
SHOW_DROPCAPS_CMD_EVENT 
ACTION_1_CMD_EVENT 
RECOG_WERDS 
RECOG_PSEUDO 
ACTION_2_CMD_EVENT 

Definition at line 463 of file tessedit.cpp.

◆ ColorationMode

Enumerator
CM_RAINBOW 
CM_SUBSCRIPT 
CM_SUPERSCRIPT 
CM_ITALIC 
CM_BOLD 
CM_UNDERLINE 
CM_FIXEDPITCH 
CM_SERIF 
CM_SMALLCAPS 
CM_DROPCAPS 

Definition at line 83 of file pgedit.cpp.

83 {
88 CM_BOLD,
94};
@ CM_ITALIC
Definition: pgedit.cpp:87
@ CM_SUBSCRIPT
Definition: pgedit.cpp:85
@ CM_RAINBOW
Definition: pgedit.cpp:84
@ CM_FIXEDPITCH
Definition: pgedit.cpp:90
@ CM_BOLD
Definition: pgedit.cpp:88
@ CM_SMALLCAPS
Definition: pgedit.cpp:92
@ CM_SUPERSCRIPT
Definition: pgedit.cpp:86
@ CM_SERIF
Definition: pgedit.cpp:91
@ CM_DROPCAPS
Definition: pgedit.cpp:93
@ CM_UNDERLINE
Definition: pgedit.cpp:89

◆ ColSegType

Enumerator
COL_UNKNOWN 
COL_TEXT 
COL_TABLE 
COL_MIXED 
COL_COUNT 

Definition at line 29 of file tablefind.h.

◆ ColumnSpanningType

Enumerator
CST_NOISE 
CST_FLOWING 
CST_HEADING 
CST_PULLOUT 
CST_COUNT 

Definition at line 47 of file colpartition.h.

47 {
48 CST_NOISE, // Strictly between columns.
49 CST_FLOWING, // Strictly within a single column.
50 CST_HEADING, // Spans multiple columns.
51 CST_PULLOUT, // Touches multiple columns, but doesn't span them.
52 CST_COUNT // Number of entries.
53};

◆ CountTypes

Enumerator
CT_UNICHAR_TOP_OK 
CT_UNICHAR_TOP1_ERR 
CT_UNICHAR_TOP2_ERR 
CT_UNICHAR_TOPN_ERR 
CT_UNICHAR_TOPTOP_ERR 
CT_OK_MULTI_UNICHAR 
CT_OK_JOINED 
CT_OK_BROKEN 
CT_REJECT 
CT_FONT_ATTR_ERR 
CT_OK_MULTI_FONT 
CT_NUM_RESULTS 
CT_RANK 
CT_REJECTED_JUNK 
CT_ACCEPTED_JUNK 
CT_SIZE 

Definition at line 69 of file errorcounter.h.

69 {
70 CT_UNICHAR_TOP_OK, // Top shape contains correct unichar id.
71 // The rank of the results in TOP1, TOP2, TOPN is determined by a gap of
72 // kRatingEpsilon from the first result in each group. The real top choice
73 // is measured using TOPTOP.
74 CT_UNICHAR_TOP1_ERR, // Top shape does not contain correct unichar id.
75 CT_UNICHAR_TOP2_ERR, // Top 2 shapes don't contain correct unichar id.
76 CT_UNICHAR_TOPN_ERR, // No output shape contains correct unichar id.
77 CT_UNICHAR_TOPTOP_ERR, // Very top choice not correct.
78 CT_OK_MULTI_UNICHAR, // Top shape id has correct unichar id, and others.
79 CT_OK_JOINED, // Top shape id is correct but marked joined.
80 CT_OK_BROKEN, // Top shape id is correct but marked broken.
81 CT_REJECT, // Classifier hates this.
82 CT_FONT_ATTR_ERR, // Top unichar OK, but font attributes incorrect.
83 CT_OK_MULTI_FONT, // CT_FONT_ATTR_OK but there are multiple font attrs.
84 CT_NUM_RESULTS, // Number of answers produced.
85 CT_RANK, // Rank of correct answer.
86 CT_REJECTED_JUNK, // Junk that was correctly rejected.
87 CT_ACCEPTED_JUNK, // Junk that was incorrectly classified otherwise.
88
89 CT_SIZE // Number of types for array sizing.
90};
@ CT_UNICHAR_TOPN_ERR
Definition: errorcounter.h:76
@ CT_UNICHAR_TOP_OK
Definition: errorcounter.h:70
@ CT_UNICHAR_TOP1_ERR
Definition: errorcounter.h:74
@ CT_UNICHAR_TOP2_ERR
Definition: errorcounter.h:75
@ CT_OK_MULTI_FONT
Definition: errorcounter.h:83
@ CT_REJECTED_JUNK
Definition: errorcounter.h:86
@ CT_UNICHAR_TOPTOP_ERR
Definition: errorcounter.h:77
@ CT_FONT_ATTR_ERR
Definition: errorcounter.h:82
@ CT_ACCEPTED_JUNK
Definition: errorcounter.h:87
@ CT_OK_MULTI_UNICHAR
Definition: errorcounter.h:78
@ CT_NUM_RESULTS
Definition: errorcounter.h:84

◆ CRUNCH_MODE

Enumerator
CR_NONE 
CR_KEEP_SPACE 
CR_LOOSE_SPACE 
CR_DELETE 

Definition at line 160 of file pageres.h.

◆ DawgType

Enumerator
DAWG_TYPE_PUNCTUATION 
DAWG_TYPE_WORD 
DAWG_TYPE_NUMBER 
DAWG_TYPE_PATTERN 
DAWG_TYPE_COUNT 

Definition at line 64 of file dawg.h.

64 {
69
70 DAWG_TYPE_COUNT // number of enum entries
71};
@ DAWG_TYPE_NUMBER
Definition: dawg.h:67
@ DAWG_TYPE_PATTERN
Definition: dawg.h:68
@ DAWG_TYPE_WORD
Definition: dawg.h:66
@ DAWG_TYPE_PUNCTUATION
Definition: dawg.h:65
@ DAWG_TYPE_COUNT
Definition: dawg.h:70

◆ DIRECTION

enum tesseract::DIRECTION : uint8_t
Enumerator
north 
south 
east 
west 
northeast 
northwest 
southeast 
southwest 

Definition at line 30 of file mfoutline.h.

@ northeast
Definition: mfoutline.h:30
@ southeast
Definition: mfoutline.h:30
@ northwest
Definition: mfoutline.h:30
@ southwest
Definition: mfoutline.h:30

◆ DISPLAY_FLAGS

Enumerator
DF_BOX 

Bounding box.

DF_TEXT 

Correct ascii.

DF_POLYGONAL 

Polyg approx.

DF_EDGE_STEP 

Edge steps.

DF_BN_POLYGONAL 

BL normalisd polyapx.

DF_BLAMER 

Blamer information.

Definition at line 46 of file werd.h.

46 {
47 /* Display flags bit number allocations */
48 DF_BOX,
49 DF_TEXT,
54};
@ DF_POLYGONAL
Polyg approx.
Definition: werd.h:50
@ DF_BLAMER
Blamer information.
Definition: werd.h:53
@ DF_BOX
Bounding box.
Definition: werd.h:48
@ DF_BN_POLYGONAL
BL normalisd polyapx.
Definition: werd.h:52
@ DF_EDGE_STEP
Edge steps.
Definition: werd.h:51
@ DF_TEXT
Correct ascii.
Definition: werd.h:49

◆ DISTRIBUTION

Enumerator
normal 
uniform 
D_random 
DISTRIBUTION_COUNT 

Definition at line 65 of file cluster.h.

◆ ErrorTypes

Enumerator
ET_RMS 
ET_DELTA 
ET_WORD_RECERR 
ET_CHAR_ERROR 
ET_SKIP_RATIO 
ET_COUNT 

Definition at line 41 of file lstmtrainer.h.

41 {
42 ET_RMS, // RMS activation error.
43 ET_DELTA, // Number of big errors in deltas.
44 ET_WORD_RECERR, // Output text string word recall error.
45 ET_CHAR_ERROR, // Output text string total char error.
46 ET_SKIP_RATIO, // Fraction of samples skipped.
47 ET_COUNT // For array sizing.
48};
@ ET_WORD_RECERR
Definition: lstmtrainer.h:44
@ ET_SKIP_RATIO
Definition: lstmtrainer.h:46
@ ET_CHAR_ERROR
Definition: lstmtrainer.h:45

◆ FactorNames

Enumerator
FN_INCOLOR 
FN_Y0 
FN_Y1 
FN_Y2 
FN_Y3 
FN_X0 
FN_X1 
FN_SHEAR 
FN_NUM_FACTORS 

Definition at line 39 of file degradeimage.cpp.

39 {
41 FN_Y0,
42 FN_Y1,
43 FN_Y2,
44 FN_Y3,
45 FN_X0,
46 FN_X1,
48 // x2 = x1 - shear
49 // x3 = x0 + shear
51};

◆ FlexDimensions

Enumerator
FD_BATCH 
FD_HEIGHT 
FD_WIDTH 
FD_DIMSIZE 

Definition at line 32 of file stridemap.h.

32 {
33 FD_BATCH, // Index of multiple images.
34 FD_HEIGHT, // y-coordinate in image.
35 FD_WIDTH, // x-coordinate in image.
36 FD_DIMSIZE, // Number of flexible non-depth dimensions.
37};
@ FD_WIDTH
Definition: stridemap.h:35
@ FD_DIMSIZE
Definition: stridemap.h:36
@ FD_BATCH
Definition: stridemap.h:33
@ FD_HEIGHT
Definition: stridemap.h:34

◆ GARBAGE_LEVEL

Enumerator
G_NEVER_CRUNCH 
G_OK 
G_DODGY 
G_TERRIBLE 

Definition at line 30 of file docqual.h.

@ G_TERRIBLE
Definition: docqual.h:30
@ G_NEVER_CRUNCH
Definition: docqual.h:30
@ G_OK
Definition: docqual.h:30
@ G_DODGY
Definition: docqual.h:30

◆ GeoParams

Enumerator
GeoBottom 
GeoTop 
GeoWidth 
GeoCount 

Definition at line 35 of file picofeat.h.

35 {
36 GeoBottom, // Bounding box bottom in baseline space (0-255).
37 GeoTop, // Bounding box top in baseline space (0-255).
38 GeoWidth, // Bounding box width in baseline space (0-255).
39
40 GeoCount // Number of geo features.
41};
@ GeoCount
Definition: picofeat.h:40
@ GeoTop
Definition: picofeat.h:37
@ GeoWidth
Definition: picofeat.h:38
@ GeoBottom
Definition: picofeat.h:36

◆ GraphemeNorm

enum class tesseract::GraphemeNorm
strong
Enumerator
kNone 
kNormalize 

Definition at line 51 of file normstrngs.h.

◆ GraphemeNormMode

enum class tesseract::GraphemeNormMode
strong
Enumerator
kSingleString 
kCombined 
kGlyphSplit 
kIndividualUnicodes 

Definition at line 36 of file validator.h.

36 {
37 // Validation result is a single string, even if input is multi-word.
39 // Standard unicode graphemes are validated and output as grapheme units.
41 // Graphemes are validated and sub-divided. For virama-using scripts, units
42 // that correspond to repeatable glyphs are generated. (Mostly single unicodes
43 // but viramas and joiners are paired with the most sensible neighbor.)
44 // For non-virama scripts, this means that base/accent pairs are separated,
45 // ie the output is individual unicodes.
47 // The output is always single unicodes, regardless of the script.
49};

◆ IncorrectResultReason

Enumerator
IRR_CORRECT 
IRR_CLASSIFIER 
IRR_CHOPPER 
IRR_CLASS_LM_TRADEOFF 
IRR_PAGE_LAYOUT 
IRR_SEGSEARCH_HEUR 
IRR_SEGSEARCH_PP 
IRR_CLASS_OLD_LM_TRADEOFF 
IRR_ADAPTION 
IRR_NO_TRUTH_SPLIT 
IRR_NO_TRUTH 
IRR_UNKNOWN 
IRR_NUM_REASONS 

Definition at line 56 of file blamer.h.

56 {
57 // The text recorded in best choice == truth text
59 // Either: Top choice is incorrect and is a dictionary word (language model
60 // is unlikely to help correct such errors, so blame the classifier).
61 // Or: the correct unichar was not included in shortlist produced by the
62 // classifier at all.
64 // Chopper have not found one or more splits that correspond to the correct
65 // character bounding boxes recorded in BlamerBundle::truth_word.
67 // Classifier did include correct unichars for each blob in the correct
68 // segmentation, however its rating could have been too bad to allow the
69 // language model to pull out the correct choice. On the other hand the
70 // strength of the language model might have been too weak to favor the
71 // correct answer, this we call this case a classifier-language model
72 // tradeoff error.
74 // Page layout failed to produce the correct bounding box. Blame page layout
75 // if the truth was not found for the word, which implies that the bounding
76 // box of the word was incorrect (no truth word had a similar bounding box).
78 // SegSearch heuristic prevented one or more blobs from the correct
79 // segmentation state to be classified (e.g. the blob was too wide).
81 // The correct segmentaiton state was not explored because of poor SegSearch
82 // pain point prioritization. We blame SegSearch pain point prioritization
83 // if the best rating of a choice constructed from correct segmentation is
84 // better than that of the best choice (i.e. if we got to explore the correct
85 // segmentation state, language model would have picked the correct choice).
87 // Same as IRR_CLASS_LM_TRADEOFF, but used when we only run chopper on a word,
88 // and thus use the old language model (permuters).
89 // TODO(antonova): integrate the new language mode with chopper
91 // If there is an incorrect adaptive template match with a better score than
92 // a correct one (either pre-trained or adapted), mark this as adaption error.
94 // split_and_recog_word() failed to find a suitable split in truth.
96 // Truth is not available for this word (e.g. when words in corrected content
97 // file are turned into ~~~~ because an appropriate alignment was not found.
99 // The text recorded in best choice != truth text, but none of the above
100 // reasons are set.
102
104};
@ IRR_CLASS_OLD_LM_TRADEOFF
Definition: blamer.h:90
@ IRR_SEGSEARCH_HEUR
Definition: blamer.h:80
@ IRR_CORRECT
Definition: blamer.h:58
@ IRR_SEGSEARCH_PP
Definition: blamer.h:86
@ IRR_CHOPPER
Definition: blamer.h:66
@ IRR_PAGE_LAYOUT
Definition: blamer.h:77
@ IRR_UNKNOWN
Definition: blamer.h:101
@ IRR_CLASS_LM_TRADEOFF
Definition: blamer.h:73
@ IRR_NUM_REASONS
Definition: blamer.h:103
@ IRR_CLASSIFIER
Definition: blamer.h:63
@ IRR_NO_TRUTH
Definition: blamer.h:98
@ IRR_NO_TRUTH_SPLIT
Definition: blamer.h:95
@ IRR_ADAPTION
Definition: blamer.h:93

◆ IntmatcherDebugAction

Enumerator
IDA_ADAPTIVE 
IDA_STATIC 
IDA_SHAPE_INDEX 
IDA_BOTH 

Definition at line 139 of file intproto.h.

◆ IntParams

Enumerator
IntX 
IntY 
IntDir 

Definition at line 28 of file picofeat.h.

28 {
29 IntX, // x-position (0-255).
30 IntY, // y-position (0-255).
31 IntDir // Direction (0-255, circular).
32};
@ IntDir
Definition: picofeat.h:31

◆ kParamsTrainingFeatureType

Enumerator
PTRAIN_DIGITS_SHORT 
PTRAIN_DIGITS_MED 
PTRAIN_DIGITS_LONG 
PTRAIN_NUM_SHORT 
PTRAIN_NUM_MED 
PTRAIN_NUM_LONG 
PTRAIN_DOC_SHORT 
PTRAIN_DOC_MED 
PTRAIN_DOC_LONG 
PTRAIN_DICT_SHORT 
PTRAIN_DICT_MED 
PTRAIN_DICT_LONG 
PTRAIN_FREQ_SHORT 
PTRAIN_FREQ_MED 
PTRAIN_FREQ_LONG 
PTRAIN_SHAPE_COST_PER_CHAR 
PTRAIN_NGRAM_COST_PER_CHAR 
PTRAIN_NUM_BAD_PUNC 
PTRAIN_NUM_BAD_CASE 
PTRAIN_XHEIGHT_CONSISTENCY 
PTRAIN_NUM_BAD_CHAR_TYPE 
PTRAIN_NUM_BAD_SPACING 
PTRAIN_NUM_BAD_FONT 
PTRAIN_RATING_PER_CHAR 
PTRAIN_NUM_FEATURE_TYPES 

Definition at line 39 of file params_training_featdef.h.

39 {
40 // Digits
44 // Number or pattern (NUMBER_PERM, USER_PATTERN_PERM)
46 PTRAIN_NUM_MED, // 4
47 PTRAIN_NUM_LONG, // 5
48 // Document word (DOC_DAWG_PERM)
50 PTRAIN_DOC_MED, // 7
51 PTRAIN_DOC_LONG, // 8
52 // Word (SYSTEM_DAWG_PERM, USER_DAWG_PERM, COMPOUND_PERM)
54 PTRAIN_DICT_MED, // 10
55 PTRAIN_DICT_LONG, // 11
56 // Frequent word (FREQ_DAWG_PERM)
58 PTRAIN_FREQ_MED, // 13
59 PTRAIN_FREQ_LONG, // 14
69
71};

◆ LeftOrRight

Enumerator
LR_LEFT 
LR_RIGHT 

Definition at line 38 of file strokewidth.h.

38{ LR_LEFT, LR_RIGHT };

◆ LineType

Enumerator
LT_START 
LT_BODY 
LT_UNKNOWN 
LT_MULTIPLE 

Definition at line 48 of file paragraphs_internal.h.

48 {
49 LT_START = 'S', // First line of a paragraph.
50 LT_BODY = 'C', // Continuation line of a paragraph.
51 LT_UNKNOWN = 'U', // No clues.
52 LT_MULTIPLE = 'M', // Matches for both LT_START and LT_BODY.
53};

◆ LMPainPointsType

Enumerator
LM_PPTYPE_BLAMER 
LM_PPTYPE_AMBIG 
LM_PPTYPE_PATH 
LM_PPTYPE_SHAPE 
LM_PPTYPE_NUM 

Definition at line 39 of file lm_pain_points.h.

◆ LossType

Enumerator
LT_NONE 
LT_CTC 
LT_SOFTMAX 
LT_LOGISTIC 

Definition at line 29 of file static_shape.h.

29 {
30 LT_NONE, // Undefined.
31 LT_CTC, // Softmax with standard CTC for training/decoding.
32 LT_SOFTMAX, // Outputs sum to 1 in fixed positions.
33 LT_LOGISTIC, // Logistic outputs with independent values.
34};

◆ MicroFeatureParameter

Enumerator
MFXPosition 
MFYPosition 
MFLength 
MFDirection 
MFBulge1 
MFBulge2 
MFCount 

Definition at line 25 of file mfdefs.h.

◆ NeighbourPartitionType

Enumerator
NPT_HTEXT 
NPT_VTEXT 
NPT_WEAK_HTEXT 
NPT_WEAK_VTEXT 
NPT_IMAGE 
NPT_COUNT 

Definition at line 1548 of file colpartitiongrid.cpp.

1548 {
1549 NPT_HTEXT, // Definite horizontal text.
1550 NPT_VTEXT, // Definite vertical text.
1551 NPT_WEAK_HTEXT, // Weakly horizontal text. Counts as HTEXT for HTEXT, but
1552 // image for image and VTEXT.
1553 NPT_WEAK_VTEXT, // Weakly vertical text. Counts as VTEXT for VTEXT, but
1554 // image for image and HTEXT.
1555 NPT_IMAGE, // Defininte non-text.
1556 NPT_COUNT // Number of array elements.
1557};

◆ NetworkFlags

Enumerator
NF_LAYER_SPECIFIC_LR 
NF_ADAM 

Definition at line 83 of file network.h.

83 {
84 // Network forward/backprop behavior.
85 NF_LAYER_SPECIFIC_LR = 64, // Separate learning rate for each layer.
86 NF_ADAM = 128, // Weight-specific learning rate.
87};
@ NF_LAYER_SPECIFIC_LR
Definition: network.h:85
@ NF_ADAM
Definition: network.h:86

◆ NetworkType

Enumerator
NT_NONE 
NT_INPUT 
NT_CONVOLVE 
NT_MAXPOOL 
NT_PARALLEL 
NT_REPLICATED 
NT_PAR_RL_LSTM 
NT_PAR_UD_LSTM 
NT_PAR_2D_LSTM 
NT_SERIES 
NT_RECONFIG 
NT_XREVERSED 
NT_YREVERSED 
NT_XYTRANSPOSE 
NT_LSTM 
NT_LSTM_SUMMARY 
NT_LOGISTIC 
NT_POSCLIP 
NT_SYMCLIP 
NT_TANH 
NT_RELU 
NT_LINEAR 
NT_SOFTMAX 
NT_SOFTMAX_NO_CTC 
NT_LSTM_SOFTMAX 
NT_LSTM_SOFTMAX_ENCODED 
NT_TENSORFLOW 
NT_COUNT 

Definition at line 41 of file network.h.

41 {
42 NT_NONE, // The naked base class.
43 NT_INPUT, // Inputs from an image.
44 // Plumbing networks combine other networks or rearrange the inputs.
45 NT_CONVOLVE, // Duplicates inputs in a sliding window neighborhood.
46 NT_MAXPOOL, // Chooses the max result from a rectangle.
47 NT_PARALLEL, // Runs networks in parallel.
48 NT_REPLICATED, // Runs identical networks in parallel.
49 NT_PAR_RL_LSTM, // Runs LTR and RTL LSTMs in parallel.
50 NT_PAR_UD_LSTM, // Runs Up and Down LSTMs in parallel.
51 NT_PAR_2D_LSTM, // Runs 4 LSTMs in parallel.
52 NT_SERIES, // Executes a sequence of layers.
53 NT_RECONFIG, // Scales the time/y size but makes the output deeper.
54 NT_XREVERSED, // Reverses the x direction of the inputs/outputs.
55 NT_YREVERSED, // Reverses the y-direction of the inputs/outputs.
56 NT_XYTRANSPOSE, // Transposes x and y (for just a single op).
57 // Functional networks actually calculate stuff.
58 NT_LSTM, // Long-Short-Term-Memory block.
59 NT_LSTM_SUMMARY, // LSTM that only keeps its last output.
60 NT_LOGISTIC, // Fully connected logistic nonlinearity.
61 NT_POSCLIP, // Fully connected rect lin version of logistic.
62 NT_SYMCLIP, // Fully connected rect lin version of tanh.
63 NT_TANH, // Fully connected with tanh nonlinearity.
64 NT_RELU, // Fully connected with rectifier nonlinearity.
65 NT_LINEAR, // Fully connected with no nonlinearity.
66 NT_SOFTMAX, // Softmax uses exponential normalization, with CTC.
67 NT_SOFTMAX_NO_CTC, // Softmax uses exponential normalization, no CTC.
68 // The SOFTMAX LSTMs both have an extra softmax layer on top, but inside, with
69 // the outputs fed back to the input of the LSTM at the next timestep.
70 // The ENCODED version binary encodes the softmax outputs, providing log2 of
71 // the number of outputs as additional inputs, and the other version just
72 // provides all the softmax outputs as additional inputs.
73 NT_LSTM_SOFTMAX, // 1-d LSTM with built-in fully connected softmax.
74 NT_LSTM_SOFTMAX_ENCODED, // 1-d LSTM with built-in binary encoded softmax.
75 // A TensorFlow graph encapsulated as a Tesseract network.
77
78 NT_COUNT // Array size.
79};
@ NT_LINEAR
Definition: network.h:65
@ NT_MAXPOOL
Definition: network.h:46
@ NT_RELU
Definition: network.h:64
@ NT_XREVERSED
Definition: network.h:54
@ NT_LSTM
Definition: network.h:58
@ NT_CONVOLVE
Definition: network.h:45
@ NT_SOFTMAX
Definition: network.h:66
@ NT_NONE
Definition: network.h:42
@ NT_LOGISTIC
Definition: network.h:60
@ NT_PAR_UD_LSTM
Definition: network.h:50
@ NT_LSTM_SOFTMAX_ENCODED
Definition: network.h:74
@ NT_PARALLEL
Definition: network.h:47
@ NT_SYMCLIP
Definition: network.h:62
@ NT_PAR_2D_LSTM
Definition: network.h:51
@ NT_LSTM_SUMMARY
Definition: network.h:59
@ NT_YREVERSED
Definition: network.h:55
@ NT_RECONFIG
Definition: network.h:53
@ NT_INPUT
Definition: network.h:43
@ NT_TENSORFLOW
Definition: network.h:76
@ NT_POSCLIP
Definition: network.h:61
@ NT_LSTM_SOFTMAX
Definition: network.h:73
@ NT_XYTRANSPOSE
Definition: network.h:56
@ NT_SERIES
Definition: network.h:52
@ NT_SOFTMAX_NO_CTC
Definition: network.h:67
@ NT_TANH
Definition: network.h:63
@ NT_PAR_RL_LSTM
Definition: network.h:49
@ NT_COUNT
Definition: network.h:78
@ NT_REPLICATED
Definition: network.h:48

◆ NodeContinuation

Enumerator
NC_ANYTHING 
NC_ONLY_DUP 
NC_NO_DUP 
NC_COUNT 

Definition at line 72 of file recodebeam.h.

72 {
73 NC_ANYTHING, // This node used just its own score, so anything can follow.
74 NC_ONLY_DUP, // The current node combined another score with the score for
75 // itself, without a stand-alone duplicate before, so must be
76 // followed by a stand-alone duplicate.
77 NC_NO_DUP, // The current node combined another score with the score for
78 // itself, after a stand-alone, so can only be followed by
79 // something other than a duplicate of the current node.
81};
@ NC_ANYTHING
Definition: recodebeam.h:73
@ NC_ONLY_DUP
Definition: recodebeam.h:74

◆ NORM_METHOD

Enumerator
baseline 
character 

Definition at line 53 of file mfoutline.h.

@ character
Definition: mfoutline.h:53
@ baseline
Definition: mfoutline.h:53

◆ NORM_PARAM_NAME

Enumerator
CharNormY 
CharNormLength 
CharNormRx 
CharNormRy 

Definition at line 30 of file normfeat.h.

◆ NormalizationMode

Enumerator
NM_BASELINE 
NM_CHAR_ISOTROPIC 
NM_CHAR_ANISOTROPIC 

Definition at line 46 of file normalis.h.

46 {
47 NM_BASELINE = -3, // The original BL normalization mode.
48 NM_CHAR_ISOTROPIC = -2, // Character normalization but isotropic.
49 NM_CHAR_ANISOTROPIC = -1 // The original CN normalization mode.
50};
@ NM_BASELINE
Definition: normalis.h:47
@ NM_CHAR_ANISOTROPIC
Definition: normalis.h:49
@ NM_CHAR_ISOTROPIC
Definition: normalis.h:48

◆ OcrEngineMode

When Tesseract/Cube is initialized we can choose to instantiate/load/run only the Tesseract part, only the Cube part or both along with the combiner. The preference of which engine to use is stored in tessedit_ocr_engine_mode.

ATTENTION: When modifying this enum, please make sure to make the appropriate changes to all the enums mirroring it (e.g. OCREngine in cityblock/workflow/detection/detection_storage.proto). Such enums will mention the connection to OcrEngineMode in the comments.

Enumerator
OEM_TESSERACT_ONLY 
OEM_LSTM_ONLY 
OEM_TESSERACT_LSTM_COMBINED 
OEM_DEFAULT 
OEM_COUNT 

Definition at line 263 of file publictypes.h.

263 {
264 OEM_TESSERACT_ONLY, // Run Tesseract only - fastest; deprecated
265 OEM_LSTM_ONLY, // Run just the LSTM line recognizer.
266 OEM_TESSERACT_LSTM_COMBINED, // Run the LSTM recognizer, but allow fallback
267 // to Tesseract when things get difficult.
268 // deprecated
269 OEM_DEFAULT, // Specify this mode when calling init_*(),
270 // to indicate that any of the above modes
271 // should be automatically inferred from the
272 // variables in the language-specific config,
273 // command-line configs, or if not specified
274 // in any of the above should be set to the
275 // default OEM_TESSERACT_ONLY.
276 OEM_COUNT // Number of OEMs
277};
@ OEM_DEFAULT
Definition: capi.h:69
@ OEM_TESSERACT_ONLY
Definition: capi.h:66
@ OEM_LSTM_ONLY
Definition: capi.h:67
@ OEM_TESSERACT_LSTM_COMBINED
Definition: capi.h:68

◆ OCRNorm

enum class tesseract::OCRNorm
strong
Enumerator
kNone 
kNormalize 

Definition at line 43 of file normstrngs.h.

43 {
44 kNone,
46};

◆ OldUncleanUnichars

enum class tesseract::OldUncleanUnichars
strong
Enumerator
kFalse 
kTrue 

Definition at line 45 of file unicharset.h.

◆ Orientation

+---------------—+ Orientation Example: | 1 Aaaa Aaaa Aaaa | ==================== | Aaa aa aaa aa | To left is a diagram of some (1) English and | aaaaaa A aa aaa. | (2) Chinese text and a (3) photo credit. | 2 | | ####### c c C | Upright Latin characters are represented as A and a. | ####### c c c | '<' represents a latin character rotated | < ####### c c c | anti-clockwise 90 degrees. | < ####### c c | | < ####### . c | Upright Chinese characters are represented C and c. | 3 ####### c | +---------------—+ NOTA BENE: enum values here should match goodoc.proto

If you orient your head so that "up" aligns with Orientation, then the characters will appear "right side up" and readable.

In the example above, both the English and Chinese paragraphs are oriented so their "up" is the top of the page (page up). The photo credit is read with one's head turned leftward ("up" is to page left).

The values of this enum match the convention of Tesseract's osdetect.h

Enumerator
ORIENTATION_PAGE_UP 
ORIENTATION_PAGE_RIGHT 
ORIENTATION_PAGE_DOWN 
ORIENTATION_PAGE_LEFT 

Definition at line 114 of file publictypes.h.

114 {
119};
@ ORIENTATION_PAGE_DOWN
Definition: capi.h:116
@ ORIENTATION_PAGE_RIGHT
Definition: capi.h:115
@ ORIENTATION_PAGE_UP
Definition: capi.h:114
@ ORIENTATION_PAGE_LEFT
Definition: capi.h:117

◆ OUTLINE_FEAT_PARAM_NAME

Enumerator
OutlineFeatX 
OutlineFeatY 
OutlineFeatLength 
OutlineFeatDir 

Definition at line 27 of file outfeat.h.

27 {
OUTLINE_FEAT_PARAM_NAME
Definition: outfeat.h:27
@ OutlineFeatLength
Definition: outfeat.h:30
@ OutlineFeatY
Definition: outfeat.h:29
@ OutlineFeatX
Definition: outfeat.h:28
@ OutlineFeatDir
Definition: outfeat.h:31

◆ OUTLINETYPE

Enumerator
outer 
hole 

Definition at line 51 of file mfoutline.h.

51{ outer, hole };

◆ OVERLAP_STATE

Enumerator
ASSIGN 
REJECT 
NEW_ROW 

Definition at line 30 of file makerow.h.

30 {
31 ASSIGN, // assign it to row
32 REJECT, // reject it - dual overlap
34};
@ ASSIGN
Definition: makerow.h:31
@ NEW_ROW
Definition: makerow.h:33
@ REJECT
Definition: makerow.h:32

◆ PageIteratorLevel

enum of the elements of the page hierarchy, used in ResultIterator to provide functions that operate on each level without having to have 5x as many functions.

Enumerator
RIL_BLOCK 
RIL_PARA 
RIL_TEXTLINE 
RIL_WORD 
RIL_SYMBOL 

Definition at line 214 of file publictypes.h.

214 {
215 RIL_BLOCK, // Block of text/image/separator line.
216 RIL_PARA, // Paragraph within a block.
217 RIL_TEXTLINE, // Line within a paragraph.
218 RIL_WORD, // Word within a textline.
219 RIL_SYMBOL // Symbol/character within a word.
220};
@ RIL_TEXTLINE
Definition: capi.h:91
@ RIL_PARA
Definition: capi.h:90
@ RIL_BLOCK
Definition: capi.h:89
@ RIL_WORD
Definition: capi.h:92
@ RIL_SYMBOL
Definition: capi.h:93

◆ PageSegMode

Possible modes for page layout analysis. These must be kept in order of decreasing amount of layout analysis to be done, except for OSD_ONLY, so that the inequality test macros below work.

Enumerator
PSM_OSD_ONLY 

Orientation and script detection only.

PSM_AUTO_OSD 

Automatic page segmentation with orientation and script detection. (OSD)

PSM_AUTO_ONLY 

Automatic page segmentation, but no OSD, or OCR.

PSM_AUTO 

Fully automatic page segmentation, but no OSD.

PSM_SINGLE_COLUMN 

Assume a single column of text of variable sizes.

PSM_SINGLE_BLOCK_VERT_TEXT 

Assume a single uniform block of vertically aligned text.

PSM_SINGLE_BLOCK 

Assume a single uniform block of text. (Default.)

PSM_SINGLE_LINE 

Treat the image as a single text line.

PSM_SINGLE_WORD 

Treat the image as a single word.

PSM_CIRCLE_WORD 

Treat the image as a single word in a circle.

PSM_SINGLE_CHAR 

Treat the image as a single character.

PSM_SPARSE_TEXT 

Find as much text as possible in no particular order.

PSM_SPARSE_TEXT_OSD 

Sparse text with orientation and script det.

PSM_RAW_LINE 

Treat the image as a single text line, bypassing hacks that are Tesseract-specific.

PSM_COUNT 

Number of enum entries.

Definition at line 157 of file publictypes.h.

157 {
158 PSM_OSD_ONLY = 0,
159 PSM_AUTO_OSD = 1,
161 PSM_AUTO_ONLY = 2,
162 PSM_AUTO = 3,
166 PSM_SINGLE_BLOCK = 6,
167 PSM_SINGLE_LINE = 7,
168 PSM_SINGLE_WORD = 8,
169 PSM_CIRCLE_WORD = 9,
170 PSM_SINGLE_CHAR = 10,
172 11,
174 PSM_RAW_LINE = 13,
176
177 PSM_COUNT
178};
@ PSM_AUTO
Definition: capi.h:75
@ PSM_OSD_ONLY
Definition: capi.h:72
@ PSM_SINGLE_CHAR
Definition: capi.h:82
@ PSM_AUTO_OSD
Definition: capi.h:73
@ PSM_CIRCLE_WORD
Definition: capi.h:81
@ PSM_SINGLE_BLOCK_VERT_TEXT
Definition: capi.h:77
@ PSM_SINGLE_COLUMN
Definition: capi.h:76
@ PSM_SPARSE_TEXT
Definition: capi.h:83
@ PSM_COUNT
Definition: capi.h:86
@ PSM_SINGLE_LINE
Definition: capi.h:79
@ PSM_SINGLE_BLOCK
Definition: capi.h:78
@ PSM_RAW_LINE
Definition: capi.h:85
@ PSM_AUTO_ONLY
Definition: capi.h:74
@ PSM_SINGLE_WORD
Definition: capi.h:80
@ PSM_SPARSE_TEXT_OSD
Definition: capi.h:84

◆ ParagraphJustification

JUSTIFICATION_UNKNOWN The alignment is not clearly one of the other options. This could happen for example if there are only one or two lines of text or the text looks like source code or poetry.

NOTA BENE: Fully justified paragraphs (text aligned to both left and right margins) are marked by Tesseract with JUSTIFICATION_LEFT if their text is written with a left-to-right script and with JUSTIFICATION_RIGHT if their text is written in a right-to-left script.

Interpretation for text read in vertical lines: "Left" is wherever the starting reading position is.

JUSTIFICATION_LEFT Each line, except possibly the first, is flush to the same left tab stop.

JUSTIFICATION_CENTER The text lines of the paragraph are centered about a line going down through their middle of the text lines.

JUSTIFICATION_RIGHT Each line, except possibly the first, is flush to the same right tab stop.

Enumerator
JUSTIFICATION_UNKNOWN 
JUSTIFICATION_LEFT 
JUSTIFICATION_CENTER 
JUSTIFICATION_RIGHT 

Definition at line 246 of file publictypes.h.

246 {
251};
@ JUSTIFICATION_LEFT
Definition: capi.h:121
@ JUSTIFICATION_RIGHT
Definition: capi.h:123
@ JUSTIFICATION_CENTER
Definition: capi.h:122
@ JUSTIFICATION_UNKNOWN
Definition: capi.h:120

◆ ParamType

Enumerator
VT_INTEGER 
VT_BOOLEAN 
VT_STRING 
VT_DOUBLE 

Definition at line 40 of file paramsd.h.

@ VT_INTEGER
Definition: paramsd.h:40
@ VT_STRING
Definition: paramsd.h:40
@ VT_BOOLEAN
Definition: paramsd.h:40
@ VT_DOUBLE
Definition: paramsd.h:40

◆ PartitionFindResult

Enumerator
PFR_OK 
PFR_SKEW 
PFR_NOISE 

Definition at line 42 of file strokewidth.h.

42 {
43 PFR_OK, // Everything is OK.
44 PFR_SKEW, // Skew was detected and rotated.
45 PFR_NOISE // Noise was detected and removed.
46};

◆ PermuterType

Enumerator
NO_PERM 
PUNC_PERM 
TOP_CHOICE_PERM 
LOWER_CASE_PERM 
UPPER_CASE_PERM 
NGRAM_PERM 
NUMBER_PERM 
USER_PATTERN_PERM 
SYSTEM_DAWG_PERM 
DOC_DAWG_PERM 
USER_DAWG_PERM 
FREQ_DAWG_PERM 
COMPOUND_PERM 
NUM_PERMUTER_TYPES 

Definition at line 235 of file ratngs.h.

235 {
236 NO_PERM, // 0
237 PUNC_PERM, // 1
238 TOP_CHOICE_PERM, // 2
239 LOWER_CASE_PERM, // 3
240 UPPER_CASE_PERM, // 4
241 NGRAM_PERM, // 5
242 NUMBER_PERM, // 6
244 SYSTEM_DAWG_PERM, // 8
245 DOC_DAWG_PERM, // 9
246 USER_DAWG_PERM, // 10
247 FREQ_DAWG_PERM, // 11
248 COMPOUND_PERM, // 12
249
251};
@ UPPER_CASE_PERM
Definition: ratngs.h:240
@ NGRAM_PERM
Definition: ratngs.h:241
@ LOWER_CASE_PERM
Definition: ratngs.h:239
@ SYSTEM_DAWG_PERM
Definition: ratngs.h:244
@ TOP_CHOICE_PERM
Definition: ratngs.h:238
@ NUMBER_PERM
Definition: ratngs.h:242
@ COMPOUND_PERM
Definition: ratngs.h:248
@ NO_PERM
Definition: ratngs.h:236
@ NUM_PERMUTER_TYPES
Definition: ratngs.h:250
@ PUNC_PERM
Definition: ratngs.h:237
@ USER_DAWG_PERM
Definition: ratngs.h:246
@ USER_PATTERN_PERM
Definition: ratngs.h:243
@ DOC_DAWG_PERM
Definition: ratngs.h:245
@ FREQ_DAWG_PERM
Definition: ratngs.h:247

◆ PICO_FEAT_PARAM_NAME

Enumerator
PicoFeatY 
PicoFeatDir 
PicoFeatX 

Definition at line 43 of file picofeat.h.

◆ PITCH_TYPE

Enumerator
PITCH_DUNNO 
PITCH_DEF_FIXED 
PITCH_MAYBE_FIXED 
PITCH_DEF_PROP 
PITCH_MAYBE_PROP 
PITCH_CORR_FIXED 
PITCH_CORR_PROP 

Definition at line 47 of file blobbox.h.

47 {
48 PITCH_DUNNO, // insufficient data
49 PITCH_DEF_FIXED, // definitely fixed
50 PITCH_MAYBE_FIXED, // could be
55};
@ PITCH_DUNNO
Definition: blobbox.h:48
@ PITCH_MAYBE_FIXED
Definition: blobbox.h:50
@ PITCH_DEF_FIXED
Definition: blobbox.h:49
@ PITCH_MAYBE_PROP
Definition: blobbox.h:52
@ PITCH_DEF_PROP
Definition: blobbox.h:51
@ PITCH_CORR_FIXED
Definition: blobbox.h:53
@ PITCH_CORR_PROP
Definition: blobbox.h:54

◆ PolyBlockType

Possible types for a POLY_BLOCK or ColPartition. Must be kept in sync with kPBColors in polyblk.cpp and PTIs*Type functions below, as well as kPolyBlockNames in layout_test.cc. Used extensively by ColPartition, and POLY_BLOCK.

Enumerator
PT_UNKNOWN 
PT_FLOWING_TEXT 
PT_HEADING_TEXT 
PT_PULLOUT_TEXT 
PT_EQUATION 
PT_INLINE_EQUATION 
PT_TABLE 
PT_VERTICAL_TEXT 
PT_CAPTION_TEXT 
PT_FLOWING_IMAGE 
PT_HEADING_IMAGE 
PT_PULLOUT_IMAGE 
PT_HORZ_LINE 
PT_VERT_LINE 
PT_NOISE 
PT_COUNT 

Definition at line 51 of file publictypes.h.

51 {
52 PT_UNKNOWN, // Type is not yet known. Keep as the first element.
53 PT_FLOWING_TEXT, // Text that lives inside a column.
54 PT_HEADING_TEXT, // Text that spans more than one column.
55 PT_PULLOUT_TEXT, // Text that is in a cross-column pull-out region.
56 PT_EQUATION, // Partition belonging to an equation region.
57 PT_INLINE_EQUATION, // Partition has inline equation.
58 PT_TABLE, // Partition belonging to a table region.
59 PT_VERTICAL_TEXT, // Text-line runs vertically.
60 PT_CAPTION_TEXT, // Text that belongs to an image.
61 PT_FLOWING_IMAGE, // Image that lives inside a column.
62 PT_HEADING_IMAGE, // Image that spans more than one column.
63 PT_PULLOUT_IMAGE, // Image that is in a cross-column pull-out region.
64 PT_HORZ_LINE, // Horizontal Line.
65 PT_VERT_LINE, // Vertical Line.
66 PT_NOISE, // Lies outside of any column.
68};
@ PT_VERT_LINE
Definition: capi.h:109
@ PT_PULLOUT_TEXT
Definition: capi.h:99
@ PT_COUNT
Definition: capi.h:111
@ PT_HEADING_TEXT
Definition: capi.h:98
@ PT_TABLE
Definition: capi.h:102
@ PT_NOISE
Definition: capi.h:110
@ PT_PULLOUT_IMAGE
Definition: capi.h:107
@ PT_HEADING_IMAGE
Definition: capi.h:106
@ PT_INLINE_EQUATION
Definition: capi.h:101
@ PT_FLOWING_TEXT
Definition: capi.h:97
@ PT_UNKNOWN
Definition: capi.h:96
@ PT_HORZ_LINE
Definition: capi.h:108
@ PT_VERTICAL_TEXT
Definition: capi.h:103
@ PT_EQUATION
Definition: capi.h:100
@ PT_FLOWING_IMAGE
Definition: capi.h:105
@ PT_CAPTION_TEXT
Definition: capi.h:104

◆ PROTOSTYLE

Enumerator
spherical 
elliptical 
mixed 
automatic 

Definition at line 53 of file cluster.h.

◆ REJ_FLAGS

Enumerator
R_TESS_FAILURE 
R_SMALL_XHT 
R_EDGE_CHAR 
R_1IL_CONFLICT 
R_POSTNN_1IL 
R_REJ_CBLOB 
R_MM_REJECT 
R_BAD_REPETITION 
R_POOR_MATCH 
R_NOT_TESS_ACCEPTED 
R_CONTAINS_BLANKS 
R_BAD_PERMUTER 
R_HYPHEN 
R_DUBIOUS 
R_NO_ALPHANUMS 
R_MOSTLY_REJ 
R_XHT_FIXUP 
R_BAD_QUALITY 
R_DOC_REJ 
R_BLOCK_REJ 
R_ROW_REJ 
R_UNLV_REJ 
R_NN_ACCEPT 
R_HYPHEN_ACCEPT 
R_MM_ACCEPT 
R_QUALITY_ACCEPT 
R_MINIMAL_REJ_ACCEPT 

Definition at line 51 of file rejctmap.h.

51 {
52 /* Reject modes which are NEVER overridden */
53 R_TESS_FAILURE, // PERM Tess didn't classify
54 R_SMALL_XHT, // PERM Xht too small
55 R_EDGE_CHAR, // PERM Too close to edge of image
56 R_1IL_CONFLICT, // PERM 1Il confusion
57 R_POSTNN_1IL, // PERM 1Il unrejected by NN
58 R_REJ_CBLOB, // PERM Odd blob
59 R_MM_REJECT, // PERM Matrix match rejection (m's)
60 R_BAD_REPETITION, // TEMP Repeated char which doesn't match trend
61
62 /* Initial reject modes (pre NN_ACCEPT) */
63 R_POOR_MATCH, // TEMP Ray's original heuristic (Not used)
64 R_NOT_TESS_ACCEPTED, // TEMP Tess didn't accept WERD
65 R_CONTAINS_BLANKS, // TEMP Tess failed on other chs in WERD
66 R_BAD_PERMUTER, // POTENTIAL Bad permuter for WERD
67
68 /* Reject modes generated after NN_ACCEPT but before MM_ACCEPT */
69 R_HYPHEN, // TEMP Post NN dodgy hyphen or full stop
70 R_DUBIOUS, // TEMP Post NN dodgy chars
71 R_NO_ALPHANUMS, // TEMP No alphanumerics in word after NN
72 R_MOSTLY_REJ, // TEMP Most of word rejected so rej the rest
73 R_XHT_FIXUP, // TEMP Xht tests unsure
74
75 /* Reject modes generated after MM_ACCEPT but before QUALITY_ACCEPT */
76 R_BAD_QUALITY, // TEMP Quality metrics bad for WERD
77
78 /* Reject modes generated after QUALITY_ACCEPT but before MINIMAL_REJ accep*/
79 R_DOC_REJ, // TEMP Document rejection
80 R_BLOCK_REJ, // TEMP Block rejection
81 R_ROW_REJ, // TEMP Row rejection
82 R_UNLV_REJ, // TEMP ~ turned to - or ^ turned to space
83
84 /* Accept modes which occur between the above rejection groups */
85 R_NN_ACCEPT, // NN acceptance
86 R_HYPHEN_ACCEPT, // Hyphen acceptance
87 R_MM_ACCEPT, // Matrix match acceptance
88 R_QUALITY_ACCEPT, // Accept word in good quality doc
89 R_MINIMAL_REJ_ACCEPT // Accept EVERYTHING except tess failures
90};
@ R_MINIMAL_REJ_ACCEPT
Definition: rejctmap.h:89
@ R_ROW_REJ
Definition: rejctmap.h:81
@ R_NO_ALPHANUMS
Definition: rejctmap.h:71
@ R_TESS_FAILURE
Definition: rejctmap.h:53
@ R_QUALITY_ACCEPT
Definition: rejctmap.h:88
@ R_DOC_REJ
Definition: rejctmap.h:79
@ R_MM_ACCEPT
Definition: rejctmap.h:87
@ R_MOSTLY_REJ
Definition: rejctmap.h:72
@ R_XHT_FIXUP
Definition: rejctmap.h:73
@ R_POOR_MATCH
Definition: rejctmap.h:63
@ R_SMALL_XHT
Definition: rejctmap.h:54
@ R_BAD_PERMUTER
Definition: rejctmap.h:66
@ R_BAD_REPETITION
Definition: rejctmap.h:60
@ R_BLOCK_REJ
Definition: rejctmap.h:80
@ R_HYPHEN_ACCEPT
Definition: rejctmap.h:86
@ R_HYPHEN
Definition: rejctmap.h:69
@ R_CONTAINS_BLANKS
Definition: rejctmap.h:65
@ R_POSTNN_1IL
Definition: rejctmap.h:57
@ R_REJ_CBLOB
Definition: rejctmap.h:58
@ R_NOT_TESS_ACCEPTED
Definition: rejctmap.h:64
@ R_BAD_QUALITY
Definition: rejctmap.h:76
@ R_UNLV_REJ
Definition: rejctmap.h:82
@ R_NN_ACCEPT
Definition: rejctmap.h:85
@ R_DUBIOUS
Definition: rejctmap.h:70
@ R_MM_REJECT
Definition: rejctmap.h:59
@ R_1IL_CONFLICT
Definition: rejctmap.h:56
@ R_EDGE_CHAR
Definition: rejctmap.h:55

◆ ROW_CATEGORY

Enumerator
ROW_ASCENDERS_FOUND 
ROW_DESCENDERS_FOUND 
ROW_UNKNOWN 
ROW_INVALID 

Definition at line 36 of file makerow.h.

36 {
41};
@ ROW_ASCENDERS_FOUND
Definition: makerow.h:37
@ ROW_DESCENDERS_FOUND
Definition: makerow.h:38
@ ROW_INVALID
Definition: makerow.h:40
@ ROW_UNKNOWN
Definition: makerow.h:39

◆ ScriptPos

Enumerator
SP_NORMAL 
SP_SUBSCRIPT 
SP_SUPERSCRIPT 
SP_DROPCAP 

Definition at line 254 of file ratngs.h.

◆ SerializeAmount

Enumerator
LIGHT 
NO_BEST_TRAINER 
FULL 

Definition at line 60 of file lstmtrainer.h.

60 {
61 LIGHT, // Minimal data for remote training.
62 NO_BEST_TRAINER, // Save an empty vector in place of best_trainer_.
63 FULL, // All data including best_trainer_.
64};
@ NO_BEST_TRAINER
Definition: lstmtrainer.h:62

◆ SetParamConstraint

Enumerator
SET_PARAM_CONSTRAINT_NONE 
SET_PARAM_CONSTRAINT_DEBUG_ONLY 
SET_PARAM_CONSTRAINT_NON_DEBUG_ONLY 
SET_PARAM_CONSTRAINT_NON_INIT_ONLY 

Definition at line 39 of file params.h.

◆ SpacingNeighbourhood

Enumerator
PN_ABOVE2 
PN_ABOVE1 
PN_UPPER 
PN_LOWER 
PN_BELOW1 
PN_BELOW2 
PN_COUNT 

Definition at line 42 of file colpartition.cpp.

◆ SpecialUnicharCodes

Enumerator
UNICHAR_SPACE 
UNICHAR_JOINED 
UNICHAR_BROKEN 
SPECIAL_UNICHAR_CODES_COUNT 

Definition at line 35 of file unicharset.h.

35 {
39
41};
@ UNICHAR_SPACE
Definition: unicharset.h:36
@ UNICHAR_BROKEN
Definition: unicharset.h:38
@ SPECIAL_UNICHAR_CODES_COUNT
Definition: unicharset.h:40
@ UNICHAR_JOINED
Definition: unicharset.h:37

◆ StrongScriptDirection

Enumerator
DIR_NEUTRAL 
DIR_LEFT_TO_RIGHT 
DIR_RIGHT_TO_LEFT 
DIR_MIX 

Definition at line 41 of file unichar.h.

41 {
42 DIR_NEUTRAL = 0, // Text contains only neutral characters.
43 DIR_LEFT_TO_RIGHT = 1, // Text contains no Right-to-Left characters.
44 DIR_RIGHT_TO_LEFT = 2, // Text contains no Left-to-Right characters.
45 DIR_MIX = 3, // Text contains a mixture of left-to-right
46 // and right-to-left characters.
47};
@ DIR_MIX
Definition: unichar.h:45
@ DIR_LEFT_TO_RIGHT
Definition: unichar.h:43
@ DIR_RIGHT_TO_LEFT
Definition: unichar.h:44
@ DIR_NEUTRAL
Definition: unichar.h:42

◆ SubTrainerResult

Enumerator
STR_NONE 
STR_UPDATED 
STR_REPLACED 

Definition at line 67 of file lstmtrainer.h.

67 {
68 STR_NONE, // Did nothing as not good enough.
69 STR_UPDATED, // Subtrainer was updated, but didn't replace *this.
70 STR_REPLACED // Subtrainer replaced *this.
71};
@ STR_REPLACED
Definition: lstmtrainer.h:70

◆ SVEventType

Enumerator
SVET_DESTROY 
SVET_EXIT 
SVET_CLICK 
SVET_SELECTION 
SVET_INPUT 
SVET_MOUSE 
SVET_MOTION 
SVET_HOVER 
SVET_POPUP 
SVET_MENU 
SVET_ANY 
SVET_COUNT 

Definition at line 53 of file scrollview.h.

53 {
54 SVET_DESTROY, // Window has been destroyed by user.
55 SVET_EXIT, // User has destroyed the last window by clicking on the 'X'.
56 SVET_CLICK, // Left button pressed.
57 SVET_SELECTION, // Left button selection.
58 SVET_INPUT, // There is some input (single key or a whole string).
59 SVET_MOUSE, // The mouse has moved with a button pressed.
60 SVET_MOTION, // The mouse has moved with no button pressed.
61 SVET_HOVER, // The mouse has stayed still for a second.
62 SVET_POPUP, // A command selected through a popup menu.
63 SVET_MENU, // A command selected through the menubar.
64 SVET_ANY, // Any of the above.
65
66 SVET_COUNT // Array sizing.
67};
@ SVET_SELECTION
Definition: scrollview.h:57
@ SVET_MOTION
Definition: scrollview.h:60
@ SVET_COUNT
Definition: scrollview.h:66
@ SVET_DESTROY
Definition: scrollview.h:54
@ SVET_POPUP
Definition: scrollview.h:62
@ SVET_CLICK
Definition: scrollview.h:56
@ SVET_MOUSE
Definition: scrollview.h:59
@ SVET_INPUT
Definition: scrollview.h:58
@ SVET_HOVER
Definition: scrollview.h:61

◆ SWITCH_TYPE

Enumerator
StartSwitch 
EndSwitch 
LastSwitch 

Definition at line 69 of file intproto.cpp.

◆ TabAlignment

Enumerator
TA_LEFT_ALIGNED 
TA_LEFT_RAGGED 
TA_CENTER_JUSTIFIED 
TA_RIGHT_ALIGNED 
TA_RIGHT_RAGGED 
TA_SEPARATOR 
TA_COUNT 

Definition at line 41 of file tabvector.h.

41 {
49};
@ TA_COUNT
Definition: tabvector.h:48
@ TA_RIGHT_ALIGNED
Definition: tabvector.h:45
@ TA_RIGHT_RAGGED
Definition: tabvector.h:46
@ TA_LEFT_ALIGNED
Definition: tabvector.h:42
@ TA_SEPARATOR
Definition: tabvector.h:47
@ TA_LEFT_RAGGED
Definition: tabvector.h:43
@ TA_CENTER_JUSTIFIED
Definition: tabvector.h:44

◆ TabType

Enumerator
TT_NONE 
TT_DELETED 
TT_MAYBE_RAGGED 
TT_MAYBE_ALIGNED 
TT_CONFIRMED 
TT_VLINE 

Definition at line 61 of file blobbox.h.

61 {
62 TT_NONE, // Not a tab.
63 TT_DELETED, // Not a tab after detailed analysis.
64 TT_MAYBE_RAGGED, // Initial designation of a tab-stop candidate.
65 TT_MAYBE_ALIGNED, // Initial designation of a tab-stop candidate.
66 TT_CONFIRMED, // Aligned with neighbours.
67 TT_VLINE // Detected as a vertical line.
68};
@ TT_MAYBE_RAGGED
Definition: blobbox.h:64
@ TT_VLINE
Definition: blobbox.h:67
@ TT_MAYBE_ALIGNED
Definition: blobbox.h:65
@ TT_CONFIRMED
Definition: blobbox.h:66
@ TT_DELETED
Definition: blobbox.h:63
@ TT_NONE
Definition: blobbox.h:62

◆ TessdataType

Enumerator
TESSDATA_LANG_CONFIG 
TESSDATA_UNICHARSET 
TESSDATA_AMBIGS 
TESSDATA_INTTEMP 
TESSDATA_PFFMTABLE 
TESSDATA_NORMPROTO 
TESSDATA_PUNC_DAWG 
TESSDATA_SYSTEM_DAWG 
TESSDATA_NUMBER_DAWG 
TESSDATA_FREQ_DAWG 
TESSDATA_FIXED_LENGTH_DAWGS 
TESSDATA_CUBE_UNICHARSET 
TESSDATA_CUBE_SYSTEM_DAWG 
TESSDATA_SHAPE_TABLE 
TESSDATA_BIGRAM_DAWG 
TESSDATA_UNAMBIG_DAWG 
TESSDATA_PARAMS_MODEL 
TESSDATA_LSTM 
TESSDATA_LSTM_PUNC_DAWG 
TESSDATA_LSTM_SYSTEM_DAWG 
TESSDATA_LSTM_NUMBER_DAWG 
TESSDATA_LSTM_UNICHARSET 
TESSDATA_LSTM_RECODER 
TESSDATA_VERSION 
TESSDATA_NUM_ENTRIES 

Definition at line 58 of file tessdatamanager.h.

58 {
61 TESSDATA_AMBIGS, // 2
69 TESSDATA_FIXED_LENGTH_DAWGS, // 10 // deprecated
70 TESSDATA_CUBE_UNICHARSET, // 11 // deprecated
71 TESSDATA_CUBE_SYSTEM_DAWG, // 12 // deprecated
76 TESSDATA_LSTM, // 17
82 TESSDATA_VERSION, // 23
83
85};
@ TESSDATA_UNAMBIG_DAWG
@ TESSDATA_LSTM_SYSTEM_DAWG
@ TESSDATA_LSTM_UNICHARSET
@ TESSDATA_CUBE_SYSTEM_DAWG
@ TESSDATA_PARAMS_MODEL
@ TESSDATA_NUMBER_DAWG
@ TESSDATA_CUBE_UNICHARSET
@ TESSDATA_LSTM_PUNC_DAWG
@ TESSDATA_BIGRAM_DAWG
@ TESSDATA_LSTM_RECODER
@ TESSDATA_LANG_CONFIG
@ TESSDATA_LSTM_NUMBER_DAWG
@ TESSDATA_NUM_ENTRIES
@ TESSDATA_SHAPE_TABLE
@ TESSDATA_FIXED_LENGTH_DAWGS
@ TESSDATA_SYSTEM_DAWG

◆ TessErrorLogCode

Enumerator
DBG 
TESSLOG 
TESSEXIT 
ABORT 

Definition at line 27 of file errcode.h.

27 {
28 DBG = -1, /*log without alert */
29 TESSLOG = 0, /*alert user */
30 TESSEXIT = 1, /*exit after error */
31 ABORT = 2 /*abort after error */
32};
@ ABORT
@ TESSLOG
Definition: errcode.h:29
@ TESSEXIT
Definition: errcode.h:30

◆ TextlineOrder

The text lines are read in the given sequence.

In English, the order is top-to-bottom. In Chinese, vertical text lines are read right-to-left. Mongolian is written in vertical columns top to bottom like Chinese, but the lines order left-to right.

Note that only some combinations make sense. For example, WRITING_DIRECTION_LEFT_TO_RIGHT implies TEXTLINE_ORDER_TOP_TO_BOTTOM

Enumerator
TEXTLINE_ORDER_LEFT_TO_RIGHT 
TEXTLINE_ORDER_RIGHT_TO_LEFT 
TEXTLINE_ORDER_TOP_TO_BOTTOM 

Definition at line 146 of file publictypes.h.

146 {
150};
@ TEXTLINE_ORDER_TOP_TO_BOTTOM
Definition: capi.h:133
@ TEXTLINE_ORDER_RIGHT_TO_LEFT
Definition: capi.h:132
@ TEXTLINE_ORDER_LEFT_TO_RIGHT
Definition: capi.h:131

◆ TextModelInputType

Enumerator
PCONT 
PSTART 
PNONE 

Definition at line 31 of file paragraphs_test.cc.

31 {
32 PCONT = 0, // Continuation line of a paragraph (default).
33 PSTART = 1, // First line of a paragraph.
34 PNONE = 2, // Not a paragraph line.
35};

◆ ThresholdMethod

enum class tesseract::ThresholdMethod
strong
Enumerator
Otsu 
LeptonicaOtsu 
Sauvola 
Max 

Definition at line 30 of file thresholder.h.

30 {
31 Otsu, // Tesseract's legacy Otsu
32 LeptonicaOtsu, // Leptonica's Otsu
33 Sauvola, // Leptonica's Sauvola
34 Max, // Number of Thresholding methods
35};

◆ TopNState

Enumerator
TN_TOP2 
TN_TOPN 
TN_ALSO_RAN 
TN_COUNT 

Definition at line 84 of file recodebeam.h.

84 {
85 TN_TOP2, // Winner or 2nd.
86 TN_TOPN, // Runner up in top-n, but not 1st or 2nd.
87 TN_ALSO_RAN, // Not in the top-n.
89};
@ TN_ALSO_RAN
Definition: recodebeam.h:87

◆ Trainability

Enumerator
TRAINABLE 
PERFECT 
UNENCODABLE 
HI_PRECISION_ERR 
NOT_BOXED 

Definition at line 51 of file lstmtrainer.h.

51 {
52 TRAINABLE, // Non-zero delta error.
53 PERFECT, // Zero delta error.
54 UNENCODABLE, // Not trainable due to coding/alignment trouble.
55 HI_PRECISION_ERR, // Hi confidence disagreement.
56 NOT_BOXED, // Early in training and has no character boxes.
57};
@ HI_PRECISION_ERR
Definition: lstmtrainer.h:55

◆ TrainingFlags

Enumerator
TF_INT_MODE 
TF_COMPRESS_UNICHARSET 

Definition at line 44 of file lstmrecognizer.h.

44 {
45 TF_INT_MODE = 1,
47};
@ TF_COMPRESS_UNICHARSET

◆ TrainingState

Enumerator
TS_DISABLED 
TS_ENABLED 
TS_TEMP_DISABLE 
TS_RE_ENABLE 

Definition at line 90 of file network.h.

90 {
91 // Valid states of training_.
92 TS_DISABLED, // Disabled permanently.
93 TS_ENABLED, // Enabled for backprop and to write a training dump.
94 // Re-enable from ANY disabled state.
95 TS_TEMP_DISABLE, // Temporarily disabled to write a recognition dump.
96 // Valid only for SetEnableTraining.
97 TS_RE_ENABLE, // Re-Enable from TS_TEMP_DISABLE, but not TS_DISABLED.
98};
@ TS_TEMP_DISABLE
Definition: network.h:95
@ TS_ENABLED
Definition: network.h:93
@ TS_DISABLED
Definition: network.h:92
@ TS_RE_ENABLE
Definition: network.h:97

◆ UnicodeNormMode

enum class tesseract::UnicodeNormMode
strong
Enumerator
kNFD 
kNFC 
kNFKD 
kNFKC 

Definition at line 34 of file normstrngs.h.

◆ ViramaScript

enum class tesseract::ViramaScript : char32
strong
Enumerator
kNonVirama 
kDevanagari 
kBengali 
kGurmukhi 
kGujarati 
kOriya 
kTamil 
kTelugu 
kKannada 
kMalayalam 
kSinhala 
kMyanmar 
kKhmer 
kJavanese 

Definition at line 55 of file validator.h.

◆ WERD_FLAGS

Enumerator
W_SEGMENTED 

correctly segmented

W_ITALIC 

italic text

W_BOLD 

bold text

W_BOL 

start of line

W_EOL 

end of line

W_NORMALIZED 

flags

W_SCRIPT_HAS_XHEIGHT 

x-height concept makes sense.

W_SCRIPT_IS_LATIN 

Special case latin for y. splitting.

W_DONT_CHOP 

fixed pitch chopped

W_REP_CHAR 

repeated character

W_FUZZY_SP 

fuzzy space

W_FUZZY_NON 

fuzzy nonspace

W_INVERSE 

white on black

Definition at line 30 of file werd.h.

30 {
32 W_ITALIC,
33 W_BOLD,
34 W_BOL,
35 W_EOL,
44};
@ W_NORMALIZED
flags
Definition: werd.h:36
@ W_ITALIC
italic text
Definition: werd.h:32
@ W_SEGMENTED
correctly segmented
Definition: werd.h:31
@ W_BOL
start of line
Definition: werd.h:34
@ W_INVERSE
white on black
Definition: werd.h:43
@ W_BOLD
bold text
Definition: werd.h:33
@ W_FUZZY_SP
fuzzy space
Definition: werd.h:41
@ W_SCRIPT_HAS_XHEIGHT
x-height concept makes sense.
Definition: werd.h:37
@ W_EOL
end of line
Definition: werd.h:35
@ W_SCRIPT_IS_LATIN
Special case latin for y. splitting.
Definition: werd.h:38
@ W_DONT_CHOP
fixed pitch chopped
Definition: werd.h:39
@ W_REP_CHAR
repeated character
Definition: werd.h:40
@ W_FUZZY_NON
fuzzy nonspace
Definition: werd.h:42

◆ WritingDirection

The grapheme clusters within a line of text are laid out logically in this direction, judged when looking at the text line rotated so that its Orientation is "page up".

For English text, the writing direction is left-to-right. For the Chinese text in the above example, the writing direction is top-to-bottom.

Enumerator
WRITING_DIRECTION_LEFT_TO_RIGHT 
WRITING_DIRECTION_RIGHT_TO_LEFT 
WRITING_DIRECTION_TOP_TO_BOTTOM 

Definition at line 129 of file publictypes.h.

129 {
133};
@ WRITING_DIRECTION_RIGHT_TO_LEFT
Definition: capi.h:127
@ WRITING_DIRECTION_TOP_TO_BOTTOM
Definition: capi.h:128
@ WRITING_DIRECTION_LEFT_TO_RIGHT
Definition: capi.h:126

◆ XHeightConsistencyEnum

Enumerator
XH_GOOD 
XH_SUBNORMAL 
XH_INCONSISTENT 

Definition at line 81 of file dict.h.

@ XH_GOOD
Definition: dict.h:81
@ XH_SUBNORMAL
Definition: dict.h:81
@ XH_INCONSISTENT
Definition: dict.h:81

Function Documentation

◆ AccumulateVector()

void tesseract::AccumulateVector ( int  n,
const TFloat src,
TFloat dest 
)
inline

Definition at line 215 of file functions.h.

215 {
216 for (int i = 0; i < n; ++i) {
217 dest[i] += src[i];
218 }
219}
dest
Definition: upload.py:409

◆ ActualOutlineLength()

float tesseract::ActualOutlineLength ( FEATURE  Feature)

Return the length of the outline in baseline normalized form.

Definition at line 27 of file normfeat.cpp.

27 {
28 return (Feature->Params[CharNormLength] * LENGTH_COMPRESSION);
29}
#define LENGTH_COMPRESSION
Definition: normfeat.h:26
std::vector< float > Params
Definition: ocrfeatures.h:66

◆ AddAdaptedClass()

void tesseract::AddAdaptedClass ( ADAPT_TEMPLATES_STRUCT Templates,
ADAPT_CLASS_STRUCT Class,
CLASS_ID  ClassId 
)

This routine adds a new adapted class to an existing set of adapted templates.

Parameters
Templatesset of templates to add new class to
Classnew class to add to templates
ClassIdclass id to associate with new class
Note
Globals: none

Definition at line 41 of file adaptive.cpp.

41 {
42 assert(Templates != nullptr);
43 assert(Class != nullptr);
44 assert(LegalClassId(ClassId));
45 assert(UnusedClassIdIn(Templates->Templates, ClassId));
46 assert(Class->NumPermConfigs == 0);
47
48 auto IntClass = new INT_CLASS_STRUCT(1, 1);
49 AddIntClass(Templates->Templates, ClassId, IntClass);
50
51 assert(Templates->Class[ClassId] == nullptr);
52 Templates->Class[ClassId] = Class;
53
54} /* AddAdaptedClass */
#define UnusedClassIdIn(T, c)
Definition: intproto.h:155
#define LegalClassId(c)
Definition: intproto.h:154
void AddIntClass(INT_TEMPLATES_STRUCT *Templates, CLASS_ID ClassId, INT_CLASS_STRUCT *Class)
Definition: intproto.cpp:220
ADAPT_CLASS_STRUCT * Class[MAX_NUM_CLASSES]
Definition: adaptive.h:75
INT_TEMPLATES_STRUCT * Templates
Definition: adaptive.h:72

◆ AddConfigToClass()

TESS_API int tesseract::AddConfigToClass ( CLASS_TYPE  Class)

Definition at line 49 of file protos.cpp.

49 {
50 int NewNumConfigs;
51 int NewConfig;
52 int MaxNumProtos;
54
55 MaxNumProtos = Class->MaxNumProtos;
56 ASSERT_HOST(MaxNumProtos <= MAX_NUM_PROTOS);
57
58 if (Class->NumConfigs >= Class->MaxNumConfigs) {
59 /* add configs in CONFIG_INCREMENT chunks at a time */
60 NewNumConfigs =
62
63 Class->Configurations.resize(NewNumConfigs);
64 Class->MaxNumConfigs = NewNumConfigs;
65 }
66 NewConfig = Class->NumConfigs++;
67 Config = NewBitVector(MAX_NUM_PROTOS);
68 Class->Configurations[NewConfig] = Config;
69 zero_all_bits(Config, WordsInVectorOfSize(MAX_NUM_PROTOS));
70
71 return (NewConfig);
72}
#define ASSERT_HOST(x)
Definition: errcode.h:54
uint32_t * BIT_VECTOR
Definition: bitvec.h:28
#define MAX_NUM_PROTOS
Definition: intproto.h:48
#define CONFIG_INCREMENT
Definition: protos.cpp:36
CLUSTERCONFIG Config
std::vector< BIT_VECTOR > Configurations
Definition: protos.h:46
int16_t MaxNumConfigs
Definition: protos.h:44
int16_t MaxNumProtos
Definition: protos.h:42

◆ AddFeature()

bool tesseract::AddFeature ( FEATURE_SET  FeatureSet,
FEATURE  Feature 
)

Add a feature to a feature set. If the feature set is already full, false is returned to indicate that the feature could not be added to the set; otherwise, true is returned.

Parameters
FeatureSetset of features to add Feature to
Featurefeature to be added to FeatureSet
Returns
true if feature added to set, false if set is already full.

Definition at line 40 of file ocrfeatures.cpp.

40 {
41 if (FeatureSet->NumFeatures >= FeatureSet->MaxNumFeatures) {
42 delete Feature;
43 return false;
44 }
45
46 FeatureSet->Features[FeatureSet->NumFeatures++] = Feature;
47 return true;
48} /* AddFeature */
std::vector< FEATURE_STRUCT * > Features
Definition: ocrfeatures.h:85

◆ AddIntClass()

void tesseract::AddIntClass ( INT_TEMPLATES_STRUCT Templates,
CLASS_ID  ClassId,
INT_CLASS_STRUCT Class 
)

This routine adds a new class structure to a set of templates. Classes have to be added to Templates in the order of increasing ClassIds.

Parameters
Templatestemplates to add new class to
ClassIdclass id to associate new class with
Classclass data structure to add to templates

Globals: none


Public Function Prototypes

Definition at line 220 of file intproto.cpp.

220 {
221 int Pruner;
222
223 assert(LegalClassId(ClassId));
224 if (static_cast<unsigned>(ClassId) != Templates->NumClasses) {
225 fprintf(stderr,
226 "Please make sure that classes are added to templates"
227 " in increasing order of ClassIds\n");
228 exit(1);
229 }
230 ClassForClassId(Templates, ClassId) = Class;
231 Templates->NumClasses++;
232
233 if (Templates->NumClasses > MaxNumClassesIn(Templates)) {
234 Pruner = Templates->NumClassPruners++;
235 Templates->ClassPruners[Pruner] = new CLASS_PRUNER_STRUCT;
236 memset(Templates->ClassPruners[Pruner], 0, sizeof(CLASS_PRUNER_STRUCT));
237 }
238} /* AddIntClass */
#define ClassForClassId(T, c)
Definition: intproto.h:156
#define MaxNumClassesIn(T)
Definition: intproto.h:153
CLASS_PRUNER_STRUCT * ClassPruners[MAX_NUM_CLASS_PRUNERS]
Definition: intproto.h:112

◆ AddIntConfig()

int tesseract::AddIntConfig ( INT_CLASS_STRUCT Class)

This routine returns the index of the next free config in Class.

Parameters
Classclass to add new configuration to

Globals: none

Returns
Index of next free config.

Definition at line 250 of file intproto.cpp.

250 {
251 int Index;
252
253 assert(Class->NumConfigs < MAX_NUM_CONFIGS);
254
255 Index = Class->NumConfigs++;
256 Class->ConfigLengths[Index] = 0;
257 return Index;
258} /* AddIntConfig */
#define MAX_NUM_CONFIGS
Definition: intproto.h:47
uint16_t ConfigLengths[MAX_NUM_CONFIGS]
Definition: intproto.h:102

◆ AddIntProto()

int tesseract::AddIntProto ( INT_CLASS_STRUCT Class)

This routine allocates the next free proto in Class and returns its index.

Parameters
Classclass to add new proto to

Globals: none

Returns
Proto index of new proto.

Definition at line 270 of file intproto.cpp.

270 {
271 if (Class->NumProtos >= MAX_NUM_PROTOS) {
272 return (NO_PROTO);
273 }
274
275 int Index = Class->NumProtos++;
276
277 if (Class->NumProtos > MaxNumIntProtosIn(Class)) {
278 int ProtoSetId = Class->NumProtoSets++;
279 auto ProtoSet = new PROTO_SET_STRUCT;
280 Class->ProtoSets[ProtoSetId] = ProtoSet;
281 memset(ProtoSet, 0, sizeof(*ProtoSet));
282
283 /* reallocate space for the proto lengths and install in class */
284 Class->ProtoLengths.resize(MaxNumIntProtosIn(Class));
285 }
286
287 /* initialize proto so its length is zero and it isn't in any configs */
288 Class->ProtoLengths[Index] = 0;
289 auto Proto = ProtoForProtoId(Class, Index);
290 for (uint32_t *Word = Proto->Configs; Word < Proto->Configs + WERDS_PER_CONFIG_VEC; *Word++ = 0) {
291 }
292
293 return (Index);
294}
#define NO_PROTO
Definition: matchdefs.h:41
#define MaxNumIntProtosIn(C)
Definition: intproto.h:145
#define WERDS_PER_CONFIG_VEC
Definition: intproto.h:65
#define ProtoForProtoId(C, P)
Definition: intproto.h:148
PROTO_SET_STRUCT * ProtoSets[MAX_NUM_PROTO_SETS]
Definition: intproto.h:100
std::vector< uint8_t > ProtoLengths
Definition: intproto.h:101

◆ AddOutlineFeatureToSet()

void tesseract::AddOutlineFeatureToSet ( FPOINT Start,
FPOINT End,
FEATURE_SET  FeatureSet 
)

This routine computes the midpoint between Start and End to obtain the x,y position of the outline-feature. It also computes the direction from Start to End as the direction of the outline-feature and the distance from Start to End as the length of the outline-feature. This feature is then inserted into the next feature slot in FeatureSet.

Parameters
Startstarting point of outline-feature
Endending point of outline-feature
FeatureSetset to add outline-feature to

Definition at line 78 of file outfeat.cpp.

78 {
79 auto Feature = new FEATURE_STRUCT(&OutlineFeatDesc);
80 Feature->Params[OutlineFeatDir] = NormalizedAngleFrom(Start, End, 1.0);
81 Feature->Params[OutlineFeatX] = AverageOf(Start->x, End->x);
82 Feature->Params[OutlineFeatY] = AverageOf(Start->y, End->y);
83 Feature->Params[OutlineFeatLength] = DistanceBetween(*Start, *End);
84 AddFeature(FeatureSet, Feature);
85
86} /* AddOutlineFeatureToSet */
#define AverageOf(A, B)
Definition: mfoutline.h:58
float DistanceBetween(FPOINT A, FPOINT B)
Definition: fpoint.cpp:29
float NormalizedAngleFrom(FPOINT *Point1, FPOINT *Point2, float FullScale)
Definition: fpoint.cpp:44
bool AddFeature(FEATURE_SET FeatureSet, FEATURE Feature)
Definition: ocrfeatures.cpp:40
const FEATURE_DESC_STRUCT OutlineFeatDesc
float y
Definition: fpoint.h:30
float x
Definition: fpoint.h:30

◆ AddProtoToClass()

TESS_API int tesseract::AddProtoToClass ( CLASS_TYPE  Class)

Definition at line 82 of file protos.cpp.

82 {
83 if (Class->NumProtos >= Class->MaxNumProtos) {
84 /* add protos in PROTO_INCREMENT chunks at a time */
85 int NewNumProtos =
87
88 Class->Prototypes.resize(NewNumProtos);
89
90 Class->MaxNumProtos = NewNumProtos;
91 ASSERT_HOST(NewNumProtos <= MAX_NUM_PROTOS);
92 }
93 int NewProto = Class->NumProtos++;
95 return (NewProto);
96}
#define PROTO_INCREMENT
Definition: protos.cpp:35
std::vector< PROTO_STRUCT > Prototypes
Definition: protos.h:45

◆ AddProtoToClassPruner()

void tesseract::AddProtoToClassPruner ( PROTO_STRUCT Proto,
CLASS_ID  ClassId,
INT_TEMPLATES_STRUCT Templates 
)

This routine adds Proto to the class pruning tables for the specified class in Templates.

Globals:

  • classify_num_cp_levels number of levels used in the class pruner
    Parameters
    Protofloating-pt proto to add to class pruner
    ClassIdclass id corresponding to Proto
    Templatesset of templates containing class pruner

Definition at line 306 of file intproto.cpp.

308{
309 CLASS_PRUNER_STRUCT *Pruner;
310 uint32_t ClassMask;
311 uint32_t ClassCount;
312 uint32_t WordIndex;
313 int Level;
314 float EndPad, SidePad, AnglePad;
315 TABLE_FILLER TableFiller;
316 FILL_SPEC FillSpec;
317
318 Pruner = CPrunerFor(Templates, ClassId);
319 WordIndex = CPrunerWordIndexFor(ClassId);
320 ClassMask = CPrunerMaskFor(MAX_LEVEL, ClassId);
321
322 for (Level = classify_num_cp_levels - 1; Level >= 0; Level--) {
323 GetCPPadsForLevel(Level, &EndPad, &SidePad, &AnglePad);
324 ClassCount = CPrunerMaskFor(Level, ClassId);
325 InitTableFiller(EndPad, SidePad, AnglePad, Proto, &TableFiller);
326
327 while (!FillerDone(&TableFiller)) {
328 GetNextFill(&TableFiller, &FillSpec);
329 DoFill(&FillSpec, Pruner, ClassMask, ClassCount, WordIndex);
330 }
331 }
332} /* AddProtoToClassPruner */
#define CPrunerWordIndexFor(c)
Definition: intproto.h:160
#define CPrunerMaskFor(L, c)
Definition: intproto.h:162
#define CPrunerFor(T, c)
Definition: intproto.h:159
#define MAX_LEVEL
bool FillerDone(TABLE_FILLER *Filler)
Definition: intproto.cpp:1063
void GetNextFill(TABLE_FILLER *Filler, FILL_SPEC *Fill)
Definition: intproto.cpp:1295
void InitTableFiller(float EndPad, float SidePad, float AnglePad, PROTO_STRUCT *Proto, TABLE_FILLER *Filler)
Definition: intproto.cpp:1340
void DoFill(FILL_SPEC *FillSpec, CLASS_PRUNER_STRUCT *Pruner, uint32_t ClassMask, uint32_t ClassCount, uint32_t WordIndex)
Definition: intproto.cpp:1021
void GetCPPadsForLevel(int Level, float *EndPad, float *SidePad, float *AnglePad)
Definition: intproto.cpp:1235

◆ AddProtoToProtoPruner()

void tesseract::AddProtoToProtoPruner ( PROTO_STRUCT Proto,
int  ProtoId,
INT_CLASS_STRUCT Class,
bool  debug 
)

This routine updates the proto pruner lookup tables for Class to include a new proto identified by ProtoId and described by Proto.

Parameters
Protofloating-pt proto to be added to proto pruner
ProtoIdid of proto
Classinteger class that contains desired proto pruner
debugdebug flag
Note
Globals: none

Definition at line 344 of file intproto.cpp.

344 {
345 float X, Y, Length;
346 float Pad;
347
348 if (ProtoId >= Class->NumProtos) {
349 tprintf("AddProtoToProtoPruner:assert failed: %d < %d", ProtoId, Class->NumProtos);
350 }
351 assert(ProtoId < Class->NumProtos);
352
353 int Index = IndexForProto(ProtoId);
354 auto ProtoSet = Class->ProtoSets[SetForProto(ProtoId)];
355
356 float Angle = Proto->Angle;
357#ifndef _WIN32
358 assert(!std::isnan(Angle));
359#endif
360
361 FillPPCircularBits(ProtoSet->ProtoPruner[PRUNER_ANGLE], Index, Angle + ANGLE_SHIFT,
362 classify_pp_angle_pad / 360.0, debug);
363
364 Angle *= 2.0 * M_PI;
365 Length = Proto->Length;
366
367 X = Proto->X + X_SHIFT;
368 Pad = std::max(fabs(std::cos(Angle)) * (Length / 2.0 + classify_pp_end_pad * GetPicoFeatureLength()),
369 fabs(std::sin(Angle)) * (classify_pp_side_pad * GetPicoFeatureLength()));
370
371 FillPPLinearBits(ProtoSet->ProtoPruner[PRUNER_X], Index, X, Pad, debug);
372
373 Y = Proto->Y + Y_SHIFT;
374 Pad = std::max(fabs(std::sin(Angle)) * (Length / 2.0 + classify_pp_end_pad * GetPicoFeatureLength()),
375 fabs(std::cos(Angle)) * (classify_pp_side_pad * GetPicoFeatureLength()));
376
377 FillPPLinearBits(ProtoSet->ProtoPruner[PRUNER_Y], Index, Y, Pad, debug);
378} /* AddProtoToProtoPruner */
#define ANGLE_SHIFT
Definition: intproto.h:40
#define IndexForProto(P)
Definition: intproto.h:147
#define X_SHIFT
Definition: intproto.h:41
#define PRUNER_Y
Definition: intproto.h:36
#define PRUNER_ANGLE
Definition: intproto.h:37
#define SetForProto(P)
Definition: intproto.h:146
#define Y_SHIFT
Definition: intproto.h:42
#define PRUNER_X
Definition: intproto.h:35
#define GetPicoFeatureLength()
Definition: picofeat.h:56
void FillPPCircularBits(uint32_t ParamTable[NUM_PP_BUCKETS][WERDS_PER_PP_VECTOR], int Bit, float Center, float Spread, bool debug)
Definition: intproto.cpp:1085
void tprintf(const char *format,...)
Definition: tprintf.cpp:41
void FillPPLinearBits(uint32_t ParamTable[NUM_PP_BUCKETS][WERDS_PER_PP_VECTOR], int Bit, float Center, float Spread, bool debug)
Definition: intproto.cpp:1130

◆ AddToNormProtosList()

TESS_COMMON_TRAINING_API void tesseract::AddToNormProtosList ( LIST NormProtoList,
LIST  ProtoList,
const std::string &  CharName 
)

Definition at line 722 of file commontraining.cpp.

722 {
723 auto LabeledProtoList = new LABELEDLISTNODE(CharName.c_str());
724 iterate(ProtoList) {
725 auto Proto = reinterpret_cast<PROTOTYPE *>(ProtoList->first_node());
726 LabeledProtoList->List = push(LabeledProtoList->List, Proto);
727 }
728 *NormProtoList = push(*NormProtoList, LabeledProtoList);
729}
#define iterate(l)
Definition: oldlist.h:91
LIST push(LIST list, void *element)
Definition: oldlist.cpp:178
list_rec * first_node()
Definition: oldlist.h:107

◆ adjust_row_limits()

void tesseract::adjust_row_limits ( TO_BLOCK block)

adjust_row_limits

Change the limits of rows to suit the default fractions.

Definition at line 1129 of file makerow.cpp.

1131 {
1132 TO_ROW *row; // current row
1133 float size; // size of row
1134 float ymax; // top of row
1135 float ymin; // bottom of row
1136 TO_ROW_IT row_it = block->get_rows();
1137
1139 tprintf("Adjusting row limits for block(%d,%d)\n", block->block->pdblk.bounding_box().left(),
1140 block->block->pdblk.bounding_box().top());
1141 }
1142 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
1143 row = row_it.data();
1144 size = row->max_y() - row->min_y();
1146 tprintf("Row at %f has min %f, max %f, size %f\n", row->intercept(), row->min_y(),
1147 row->max_y(), size);
1148 }
1153 row->set_limits(row->intercept() + ymin, row->intercept() + ymax);
1154 row->merged = false;
1155 }
1156}
bool textord_show_expanded_rows
Definition: makerow.cpp:49
float max_y() const
Definition: blobbox.h:568
float min_y() const
Definition: blobbox.h:571
void set_limits(float new_min, float new_max)
Definition: blobbox.h:628
float intercept() const
Definition: blobbox.h:598
TO_ROW_LIST * get_rows()
Definition: blobbox.h:709
static const double kXHeightFraction
Definition: ccstruct.h:32
static const double kDescenderFraction
Definition: ccstruct.h:31
static const double kAscenderFraction
Definition: ccstruct.h:33
PDBLK pdblk
Page Description Block.
Definition: ocrblock.h:185
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
Definition: pdblock.h:67

◆ allocNormProtos()

void tesseract::allocNormProtos ( )

◆ ApproximateOutline()

TESSLINE * tesseract::ApproximateOutline ( bool  allow_detailed_fx,
C_OUTLINE c_outline 
)

Definition at line 529 of file polyaprx.cpp.

529 {
530 EDGEPT stack_edgepts[FASTEDGELENGTH]; // converted path
531 EDGEPT *edgepts = stack_edgepts;
532
533 // Use heap memory if the stack buffer is not big enough.
534 if (c_outline->pathlength() > FASTEDGELENGTH) {
535 edgepts = new EDGEPT[c_outline->pathlength()];
536 }
537
538 // bounding box
539 const auto &loop_box = c_outline->bounding_box();
540 int32_t area = loop_box.height();
541 if (!poly_wide_objects_better && loop_box.width() > area) {
542 area = loop_box.width();
543 }
544 area *= area;
545 edgesteps_to_edgepts(c_outline, edgepts);
546 fix2(edgepts, area);
547 EDGEPT *edgept = poly2(edgepts, area); // 2nd approximation.
548 EDGEPT *startpt = edgept;
549 EDGEPT *result = nullptr;
550 EDGEPT *prev_result = nullptr;
551 do {
552 auto *new_pt = new EDGEPT;
553 new_pt->pos = edgept->pos;
554 new_pt->prev = prev_result;
555 if (prev_result == nullptr) {
556 result = new_pt;
557 } else {
558 prev_result->next = new_pt;
559 new_pt->prev = prev_result;
560 }
561 if (allow_detailed_fx) {
562 new_pt->src_outline = edgept->src_outline;
563 new_pt->start_step = edgept->start_step;
564 new_pt->step_count = edgept->step_count;
565 }
566 prev_result = new_pt;
567 edgept = edgept->next;
568 } while (edgept != startpt);
569 prev_result->next = result;
570 result->prev = prev_result;
571 if (edgepts != stack_edgepts) {
572 delete[] edgepts;
573 }
574 return TESSLINE::BuildFromOutlineList(result);
575}
#define FASTEDGELENGTH
Definition: polyaprx.cpp:34
int32_t pathlength() const
Definition: coutln.h:134
const TBOX & bounding_box() const
Definition: coutln.h:113
TDimension height() const
Definition: rect.h:118

◆ AsciiLikelyListItem()

TESS_API bool tesseract::AsciiLikelyListItem ( const std::string &  word)

Definition at line 282 of file paragraphs.cpp.

282 {
283 return LikelyListMark(word) || LikelyListNumeral(word);
284}

◆ AsciiToRowInfo()

void tesseract::AsciiToRowInfo ( const char *  text,
int  row_number,
RowInfo info 
)

Definition at line 49 of file paragraphs_test.cc.

49 {
50 const int kCharWidth = 10;
51 const int kLineSpace = 30;
52 info->text = text;
53 info->has_leaders = strstr(text, "...") != nullptr || strstr(text, ". . .") != nullptr;
54 info->has_drop_cap = false;
55 info->pix_ldistance = info->pix_rdistance = 0;
56 info->average_interword_space = kCharWidth;
57 info->pix_xheight = kCharWidth;
58 info->lword_text = info->rword_text = "";
59 info->ltr = true;
60
61 std::vector<std::string> words = split(text, ' ');
62 info->num_words = words.size();
63 if (info->num_words < 1) {
64 return;
65 }
66
67 info->lword_text = words[0].c_str();
68 info->rword_text = words[words.size() - 1].c_str();
69 int lspace = 0;
70 while (lspace < info->text.size() && text[lspace] == ' ') {
71 lspace++;
72 }
73 int rspace = 0;
74 while (rspace < info->text.size() && text[info->text.size() - rspace - 1] == ' ') {
75 rspace++;
76 }
77
78 int top = -kLineSpace * row_number;
79 int bottom = top - kLineSpace;
80 int row_right = kCharWidth * info->text.size();
81 int lword_width = kCharWidth * info->lword_text.size();
82 int rword_width = kCharWidth * info->rword_text.size();
83 info->pix_ldistance = lspace * kCharWidth;
84 info->pix_rdistance = rspace * kCharWidth;
85 info->lword_box = TBOX(info->pix_ldistance, bottom, info->pix_ldistance + lword_width, top);
86 info->rword_box = TBOX(row_right - info->pix_rdistance - rword_width, bottom,
87 row_right - info->pix_rdistance, top);
88 LeftWordAttributes(nullptr, nullptr, info->lword_text, &info->lword_indicates_list_item,
90 RightWordAttributes(nullptr, nullptr, info->rword_text, &info->rword_indicates_list_item,
92}
@ TBOX
void RightWordAttributes(const UNICHARSET *unicharset, const WERD_CHOICE *werd, const std::string &utf8, bool *is_list, bool *starts_idea, bool *ends_idea)
Definition: paragraphs.cpp:477
void LeftWordAttributes(const UNICHARSET *unicharset, const WERD_CHOICE *werd, const std::string &utf8, bool *is_list, bool *starts_idea, bool *ends_idea)
Definition: paragraphs.cpp:431
const std::vector< std::string > split(const std::string &s, char c)
Definition: helpers.h:43
bool lword_likely_ends_idea
Definition: paragraphs.h:71
bool rword_likely_ends_idea
Definition: paragraphs.h:75
int average_interword_space
Definition: paragraphs.h:50
bool rword_likely_starts_idea
Definition: paragraphs.h:74
std::string rword_text
Definition: paragraphs.h:57
std::string text
Definition: paragraphs.h:41
std::string lword_text
Definition: paragraphs.h:56
bool lword_indicates_list_item
Definition: paragraphs.h:69
bool rword_indicates_list_item
Definition: paragraphs.h:73
bool lword_likely_starts_idea
Definition: paragraphs.h:70

◆ ASSERT_FAILED()

constexpr ERRCODE tesseract::ASSERT_FAILED ( "Assert failed"  )
constexpr

◆ assign_blobs_to_blocks2()

void tesseract::assign_blobs_to_blocks2 ( Image  pix,
BLOCK_LIST *  blocks,
TO_BLOCK_LIST *  port_blocks 
)

Definition at line 162 of file tordmain.cpp.

164 { // output list
165 BLOCK_IT block_it = blocks;
166 C_BLOB_IT blob_it; // iterator
167 BLOBNBOX_IT port_box_it; // iterator
168 // destination iterator
169 TO_BLOCK_IT port_block_it = port_blocks;
170
171 for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
172 auto block = block_it.data();
173 auto port_block = new TO_BLOCK(block);
174
175 // Convert the good outlines to block->blob_list
176 port_box_it.set_to_list(&port_block->blobs);
177 blob_it.set_to_list(block->blob_list());
178 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
179 auto blob = blob_it.extract();
180 auto newblob = new BLOBNBOX(blob); // Convert blob to BLOBNBOX.
181 newblob->set_owns_cblob(true);
182 SetBlobStrokeWidth(pix, newblob);
183 port_box_it.add_after_then_move(newblob);
184 }
185
186 // Put the rejected outlines in block->noise_blobs, which allows them to
187 // be reconsidered and sorted back into rows and recover outlines mistakenly
188 // rejected.
189 port_box_it.set_to_list(&port_block->noise_blobs);
190 blob_it.set_to_list(block->reject_blobs());
191 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
192 auto blob = blob_it.extract();
193 auto newblob = new BLOBNBOX(blob); // Convert blob to BLOBNBOX.
194 newblob->set_owns_cblob(true);
195 SetBlobStrokeWidth(pix, newblob);
196 port_box_it.add_after_then_move(newblob);
197 }
198
199 port_block_it.add_after_then_move(port_block);
200 }
201}
void SetBlobStrokeWidth(Image pix, BLOBNBOX *blob)
Definition: tordmain.cpp:68

◆ assign_blobs_to_rows()

void tesseract::assign_blobs_to_rows ( TO_BLOCK block,
float *  gradient,
int  pass,
bool  reject_misses,
bool  make_new_rows,
bool  drawing_skew 
)

Definition at line 2272 of file makerow.cpp.

2279 {
2280 OVERLAP_STATE overlap_result; // what to do with it
2281 float ycoord; // current y
2282 float top, bottom; // of blob
2283 float g_length = 1.0f; // from gradient
2284 int16_t row_count; // no of rows
2285 int16_t left_x; // left edge
2286 int16_t last_x; // previous edge
2287 float block_skew; // y delta
2288 float smooth_factor; // for new coords
2289 float near_dist; // dist to nearest row
2290 ICOORD testpt; // testing only
2291 BLOBNBOX *blob; // current blob
2292 TO_ROW *row; // current row
2293 TO_ROW *dest_row = nullptr; // row to put blob in
2294 // iterators
2295 BLOBNBOX_IT blob_it = &block->blobs;
2296 TO_ROW_IT row_it = block->get_rows();
2297
2298 ycoord =
2299 (block->block->pdblk.bounding_box().bottom() + block->block->pdblk.bounding_box().top()) /
2300 2.0f;
2301 if (gradient != nullptr) {
2302 g_length = std::sqrt(1 + *gradient * *gradient);
2303 }
2304#ifndef GRAPHICS_DISABLED
2305 if (drawing_skew) {
2306 to_win->SetCursor(block->block->pdblk.bounding_box().left(), ycoord);
2307 }
2308#endif
2309 testpt = ICOORD(textord_test_x, textord_test_y);
2310 blob_it.sort(blob_x_order);
2311 smooth_factor = 1.0;
2312 block_skew = 0.0f;
2313 row_count = row_it.length(); // might have rows
2314 if (!blob_it.empty()) {
2315 left_x = blob_it.data()->bounding_box().left();
2316 } else {
2317 left_x = block->block->pdblk.bounding_box().left();
2318 }
2319 last_x = left_x;
2320 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
2321 blob = blob_it.data();
2322 if (gradient != nullptr) {
2323 block_skew = (1 - 1 / g_length) * blob->bounding_box().bottom() +
2324 *gradient / g_length * blob->bounding_box().left();
2325 } else if (blob->bounding_box().left() - last_x > block->line_size / 2 &&
2326 last_x - left_x > block->line_size * 2 && textord_interpolating_skew) {
2327 // tprintf("Interpolating skew from %g",block_skew);
2328 block_skew *= static_cast<float>(blob->bounding_box().left() - left_x) / (last_x - left_x);
2329 // tprintf("to %g\n",block_skew);
2330 }
2331 last_x = blob->bounding_box().left();
2332 top = blob->bounding_box().top() - block_skew;
2333 bottom = blob->bounding_box().bottom() - block_skew;
2334#ifndef GRAPHICS_DISABLED
2335 if (drawing_skew) {
2336 to_win->DrawTo(blob->bounding_box().left(), ycoord + block_skew);
2337 }
2338#endif
2339 if (!row_it.empty()) {
2340 for (row_it.move_to_first(); !row_it.at_last() && row_it.data()->min_y() > top;
2341 row_it.forward()) {
2342 }
2343 row = row_it.data();
2344 if (row->min_y() <= top && row->max_y() >= bottom) {
2345 // any overlap
2346 dest_row = row;
2347 overlap_result = most_overlapping_row(&row_it, dest_row, top, bottom, block->line_size,
2348 blob->bounding_box().contains(testpt));
2349 if (overlap_result == NEW_ROW && !reject_misses) {
2350 overlap_result = ASSIGN;
2351 }
2352 } else {
2353 overlap_result = NEW_ROW;
2354 if (!make_new_rows) {
2355 near_dist = row_it.data_relative(-1)->min_y() - top;
2356 // below bottom
2357 if (bottom < row->min_y()) {
2358 if (row->min_y() - bottom <= (block->line_spacing - block->line_size) *
2360 // done it
2361 overlap_result = ASSIGN;
2362 dest_row = row;
2363 }
2364 } else if (near_dist > 0 && near_dist < bottom - row->max_y()) {
2365 row_it.backward();
2366 dest_row = row_it.data();
2367 if (dest_row->min_y() - bottom <= (block->line_spacing - block->line_size) *
2369 // done it
2370 overlap_result = ASSIGN;
2371 }
2372 } else {
2373 if (top - row->max_y() <=
2374 (block->line_spacing - block->line_size) *
2375 (textord_overlap_x + tesseract::CCStruct::kAscenderFraction)) {
2376 // done it
2377 overlap_result = ASSIGN;
2378 dest_row = row;
2379 }
2380 }
2381 }
2382 }
2383 if (overlap_result == ASSIGN) {
2384 dest_row->add_blob(blob_it.extract(), top, bottom, block->line_size);
2385 }
2386 if (overlap_result == NEW_ROW) {
2387 if (make_new_rows && top - bottom < block->max_blob_size) {
2388 dest_row = new TO_ROW(blob_it.extract(), top, bottom, block->line_size);
2389 row_count++;
2390 if (bottom > row_it.data()->min_y()) {
2391 row_it.add_before_then_move(dest_row);
2392 // insert in right place
2393 } else {
2394 row_it.add_after_then_move(dest_row);
2395 }
2396 smooth_factor = 1.0 / (row_count * textord_skew_lag + textord_skewsmooth_offset);
2397 } else {
2398 overlap_result = REJECT;
2399 }
2400 }
2401 } else if (make_new_rows && top - bottom < block->max_blob_size) {
2402 overlap_result = NEW_ROW;
2403 dest_row = new TO_ROW(blob_it.extract(), top, bottom, block->line_size);
2404 row_count++;
2405 row_it.add_after_then_move(dest_row);
2406 smooth_factor = 1.0 / (row_count * textord_skew_lag + textord_skewsmooth_offset2);
2407 } else {
2408 overlap_result = REJECT;
2409 }
2410 if (blob->bounding_box().contains(testpt) && textord_debug_blob) {
2411 if (overlap_result != REJECT) {
2412 tprintf("Test blob assigned to row at (%g,%g) on pass %d\n", dest_row->min_y(),
2413 dest_row->max_y(), pass);
2414 } else {
2415 tprintf("Test blob assigned to no row on pass %d\n", pass);
2416 }
2417 }
2418 if (overlap_result != REJECT) {
2419 while (!row_it.at_first() && row_it.data()->min_y() > row_it.data_relative(-1)->min_y()) {
2420 row = row_it.extract();
2421 row_it.backward();
2422 row_it.add_before_then_move(row);
2423 }
2424 while (!row_it.at_last() && row_it.data()->min_y() < row_it.data_relative(1)->min_y()) {
2425 row = row_it.extract();
2426 row_it.forward();
2427 // Keep rows in order.
2428 row_it.add_after_then_move(row);
2429 }
2430 BLOBNBOX_IT added_blob_it(dest_row->blob_list());
2431 added_blob_it.move_to_last();
2432 TBOX prev_box = added_blob_it.data_relative(-1)->bounding_box();
2433 if (dest_row->blob_list()->singleton() || !prev_box.major_x_overlap(blob->bounding_box())) {
2434 block_skew = (1 - smooth_factor) * block_skew +
2435 smooth_factor * (blob->bounding_box().bottom() - dest_row->initial_min_y());
2436 }
2437 }
2438 }
2439 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
2440 if (row_it.data()->blob_list()->empty()) {
2441 delete row_it.extract(); // Discard empty rows.
2442 }
2443 }
2444}
int textord_test_y
Definition: makerow.cpp:65
int textord_test_x
Definition: makerow.cpp:64
double textord_skew_lag
Definition: makerow.cpp:73
ScrollView * to_win
Definition: drawtord.cpp:37
OVERLAP_STATE
Definition: makerow.h:30
OVERLAP_STATE most_overlapping_row(TO_ROW_IT *row_it, TO_ROW *&best_row, float top, float bottom, float rowsize, bool testing_blob)
Definition: makerow.cpp:2451
int blob_x_order(const void *item1, const void *item2)
Definition: makerow.cpp:2542
bool textord_debug_blob
Definition: makerow.cpp:96
const TBOX & bounding_box() const
Definition: blobbox.h:239
void add_blob(BLOBNBOX *blob, float top, float bottom, float row_size)
Definition: blobbox.cpp:734
float initial_min_y() const
Definition: blobbox.h:577
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:608
BLOBNBOX_LIST blobs
Definition: blobbox.h:776
integer coordinate
Definition: points.h:36
TDimension left() const
Definition: rect.h:82
TDimension top() const
Definition: rect.h:68
TDimension bottom() const
Definition: rect.h:75
bool contains(const FCOORD pt) const
Definition: rect.h:344
void SetCursor(int x, int y)
Definition: scrollview.cpp:485
void DrawTo(int x, int y)
Definition: scrollview.cpp:491

◆ BAD_PARAMETER()

constexpr ERRCODE tesseract::BAD_PARAMETER ( "List parameter error"  )
constexpr

◆ BADBLOCKLINE()

constexpr ERRCODE tesseract::BADBLOCKLINE ( "Y coordinate in block out of bounds"  )
constexpr

◆ BADERRACTION()

constexpr ERRCODE tesseract::BADERRACTION ( "Illegal error action"  )
constexpr

◆ blob_x_order()

int tesseract::blob_x_order ( const void *  item1,
const void *  item2 
)

Definition at line 2542 of file makerow.cpp.

2544 {
2545 // converted ptr
2546 const BLOBNBOX *blob1 = *reinterpret_cast<const BLOBNBOX *const *>(item1);
2547 // converted ptr
2548 const BLOBNBOX *blob2 = *reinterpret_cast<const BLOBNBOX *const *>(item2);
2549
2550 if (blob1->bounding_box().left() < blob2->bounding_box().left()) {
2551 return -1;
2552 } else if (blob1->bounding_box().left() > blob2->bounding_box().left()) {
2553 return 1;
2554 } else {
2555 return 0;
2556 }
2557}

◆ BlobMicroFeatures()

MICROFEATURES tesseract::BlobMicroFeatures ( TBLOB Blob,
const DENORM cn_denorm 
)

This routine extracts micro-features from the specified blob and returns a list of the micro-features. All micro-features are normalized according to the specified line statistics.

Parameters
Blobblob to extract micro-features from
cn_denormcontrol parameter to feature extractor
Returns
List of micro-features extracted from the blob.

Definition at line 54 of file mfx.cpp.

54 {
55 MICROFEATURES MicroFeatures;
56 LIST Outlines;
57 LIST RemainingOutlines;
58
59 if (Blob != nullptr) {
60 Outlines = ConvertBlob(Blob);
61
62 RemainingOutlines = Outlines;
63 iterate(RemainingOutlines) {
64 auto Outline = static_cast<MFOUTLINE>(RemainingOutlines->first_node());
65 CharNormalizeOutline(Outline, cn_denorm);
66 }
67
68 RemainingOutlines = Outlines;
69 iterate(RemainingOutlines) {
70 auto Outline = static_cast<MFOUTLINE>(RemainingOutlines->first_node());
72 MarkDirectionChanges(Outline);
73 MicroFeatures = ConvertToMicroFeatures(Outline, MicroFeatures);
74 }
75 FreeOutlines(Outlines);
76 }
77 return MicroFeatures;
78} /* BlobMicroFeatures */
LIST MFOUTLINE
Definition: mfoutline.h:28
void FreeOutlines(LIST Outlines)
Definition: mfoutline.cpp:151
void MarkDirectionChanges(MFOUTLINE Outline)
Definition: mfoutline.cpp:166
double classify_max_slope
Definition: mfx.cpp:31
LIST ConvertBlob(TBLOB *blob)
Definition: mfoutline.cpp:34
void CharNormalizeOutline(MFOUTLINE Outline, const DENORM &cn_denorm)
Definition: mfoutline.cpp:298
MICROFEATURES ConvertToMicroFeatures(MFOUTLINE Outline, MICROFEATURES MicroFeatures)
Definition: mfx.cpp:91
double classify_min_slope
Definition: mfx.cpp:30
std::forward_list< MicroFeature > MICROFEATURES
Definition: mfdefs.h:37
void FindDirectionChanges(MFOUTLINE Outline, float MinSlope, float MaxSlope)
Definition: mfoutline.cpp:104

◆ BlobToTrainingSample()

TrainingSample * tesseract::BlobToTrainingSample ( const TBLOB blob,
bool  nonlinear_norm,
INT_FX_RESULT_STRUCT fx_info,
std::vector< INT_FEATURE_STRUCT > *  bl_features 
)

Definition at line 79 of file intfx.cpp.

81 {
82 std::vector<INT_FEATURE_STRUCT> cn_features;
83 Classify::ExtractFeatures(blob, nonlinear_norm, bl_features, &cn_features, fx_info, nullptr);
84 // TODO(rays) Use blob->PreciseBoundingBox() instead.
85 TBOX box = blob.bounding_box();
86 TrainingSample *sample = nullptr;
87 int num_features = fx_info->NumCN;
88 if (num_features > 0) {
89 sample = TrainingSample::CopyFromFeatures(*fx_info, box, &cn_features[0], num_features);
90 }
91 if (sample != nullptr) {
92 // Set the bounding box (in original image coordinates) in the sample.
93 TPOINT topleft, botright;
94 topleft.x = box.left();
95 topleft.y = box.top();
96 botright.x = box.right();
97 botright.y = box.bottom();
98 TPOINT original_topleft, original_botright;
99 blob.denorm().DenormTransform(nullptr, topleft, &original_topleft);
100 blob.denorm().DenormTransform(nullptr, botright, &original_botright);
101 sample->set_bounding_box(
102 TBOX(original_topleft.x, original_botright.y, original_botright.x, original_topleft.y));
103 }
104 return sample;
105}
@ TPOINT
TBOX bounding_box() const
Definition: blobs.cpp:466
const DENORM & denorm() const
Definition: blobs.h:368
void DenormTransform(const DENORM *last_denorm, const TPOINT &pt, TPOINT *original) const
Definition: normalis.cpp:401
TDimension right() const
Definition: rect.h:89
void set_bounding_box(const TBOX &box)

◆ block_edges()

void tesseract::block_edges ( Image  t_pix,
PDBLK block,
C_OUTLINE_IT *  outline_it 
)

Definition at line 62 of file scanedg.cpp.

64 {
65 ICOORD bleft; // bounding box
66 ICOORD tright;
67 BLOCK_LINE_IT line_it = block; // line iterator
68
69 int width = pixGetWidth(t_pix);
70 int height = pixGetHeight(t_pix);
71 int wpl = pixGetWpl(t_pix);
72 // lines in progress
73 std::unique_ptr<CRACKEDGE *[]> ptrline(new CRACKEDGE *[width + 1]);
74 CRACKEDGE *free_cracks = nullptr;
75
76 block->bounding_box(bleft, tright); // block box
77 ASSERT_HOST(tright.x() <= width);
78 ASSERT_HOST(tright.y() <= height);
79 int block_width = tright.x() - bleft.x();
80 for (int x = block_width; x >= 0; x--) {
81 ptrline[x] = nullptr; // no lines in progress
82 }
83
84 std::unique_ptr<uint8_t[]> bwline(new uint8_t[width]);
85
86 const uint8_t margin = WHITE_PIX;
87
88 for (int y = tright.y() - 1; y >= bleft.y() - 1; y--) {
89 if (y >= bleft.y() && y < tright.y()) {
90 // Get the binary pixels from the image.
91 l_uint32 *line = pixGetData(t_pix) + wpl * (height - 1 - y);
92 for (int x = 0; x < block_width; ++x) {
93 bwline[x] = GET_DATA_BIT(line, x + bleft.x()) ^ 1;
94 }
95 make_margins(block, &line_it, bwline.get(), margin, bleft.x(), tright.x(), y);
96 } else {
97 memset(bwline.get(), margin, block_width * sizeof(bwline[0]));
98 }
99 line_edges(bleft.x(), y, block_width, margin, bwline.get(), ptrline.get(), &free_cracks,
100 outline_it);
101 }
102
103 free_crackedges(free_cracks); // really free them
104}
#define WHITE_PIX
Definition: scanedg.cpp:31
const double y
rectangle iterator
Definition: pdblock.h:156
TDimension y() const
access_function
Definition: points.h:62
TDimension x() const
access function
Definition: points.h:58

◆ BOOL_VAR_H() [1/40]

tesseract::BOOL_VAR_H ( devanagari_split_debugimage  )

◆ BOOL_VAR_H() [2/40]

tesseract::BOOL_VAR_H ( disable_character_fragments  )

◆ BOOL_VAR_H() [3/40]

tesseract::BOOL_VAR_H ( gapmap_debug  )

◆ BOOL_VAR_H() [4/40]

tesseract::BOOL_VAR_H ( gapmap_no_isolated_quanta  )

◆ BOOL_VAR_H() [5/40]

tesseract::BOOL_VAR_H ( gapmap_use_ends  )

◆ BOOL_VAR_H() [6/40]

tesseract::BOOL_VAR_H ( textord_blockndoc_fixed  )

◆ BOOL_VAR_H() [7/40]

tesseract::BOOL_VAR_H ( textord_blocksall_fixed  )

◆ BOOL_VAR_H() [8/40]

tesseract::BOOL_VAR_H ( textord_blocksall_prop  )

◆ BOOL_VAR_H() [9/40]

tesseract::BOOL_VAR_H ( textord_chopper_test  )

◆ BOOL_VAR_H() [10/40]

tesseract::BOOL_VAR_H ( textord_debug_blob  )

◆ BOOL_VAR_H() [11/40]

tesseract::BOOL_VAR_H ( textord_debug_pitch_metric  )

◆ BOOL_VAR_H() [12/40]

tesseract::BOOL_VAR_H ( textord_debug_pitch_test  )

◆ BOOL_VAR_H() [13/40]

tesseract::BOOL_VAR_H ( textord_debug_printable  )

◆ BOOL_VAR_H() [14/40]

tesseract::BOOL_VAR_H ( textord_debug_xheights  )

◆ BOOL_VAR_H() [15/40]

tesseract::BOOL_VAR_H ( textord_fast_pitch_test  )

◆ BOOL_VAR_H() [16/40]

tesseract::BOOL_VAR_H ( textord_fix_makerow_bug  )

◆ BOOL_VAR_H() [17/40]

tesseract::BOOL_VAR_H ( textord_fix_xheight_bug  )

◆ BOOL_VAR_H() [18/40]

tesseract::BOOL_VAR_H ( textord_force_make_prop_words  )

◆ BOOL_VAR_H() [19/40]

tesseract::BOOL_VAR_H ( textord_heavy_nr  )

◆ BOOL_VAR_H() [20/40]

tesseract::BOOL_VAR_H ( textord_new_initial_xheight  )

◆ BOOL_VAR_H() [21/40]

tesseract::BOOL_VAR_H ( textord_old_baselines  )

◆ BOOL_VAR_H() [22/40]

tesseract::BOOL_VAR_H ( textord_old_xheight  )

◆ BOOL_VAR_H() [23/40]

tesseract::BOOL_VAR_H ( textord_oldbl_debug  )

◆ BOOL_VAR_H() [24/40]

tesseract::BOOL_VAR_H ( textord_parallel_baselines  )

◆ BOOL_VAR_H() [25/40]

tesseract::BOOL_VAR_H ( textord_pitch_scalebigwords  )

◆ BOOL_VAR_H() [26/40]

tesseract::BOOL_VAR_H ( textord_restore_underlines  )

◆ BOOL_VAR_H() [27/40]

tesseract::BOOL_VAR_H ( textord_show_expanded_rows  )

◆ BOOL_VAR_H() [28/40]

tesseract::BOOL_VAR_H ( textord_show_final_blobs  )

◆ BOOL_VAR_H() [29/40]

tesseract::BOOL_VAR_H ( textord_show_final_rows  )

◆ BOOL_VAR_H() [30/40]

tesseract::BOOL_VAR_H ( textord_show_fixed_cuts  )

◆ BOOL_VAR_H() [31/40]

tesseract::BOOL_VAR_H ( textord_show_initial_rows  )

◆ BOOL_VAR_H() [32/40]

tesseract::BOOL_VAR_H ( textord_show_initial_words  )

◆ BOOL_VAR_H() [33/40]

tesseract::BOOL_VAR_H ( textord_show_page_cuts  )

◆ BOOL_VAR_H() [34/40]

tesseract::BOOL_VAR_H ( textord_show_parallel_rows  )

◆ BOOL_VAR_H() [35/40]

tesseract::BOOL_VAR_H ( textord_show_row_cuts  )

◆ BOOL_VAR_H() [36/40]

tesseract::BOOL_VAR_H ( textord_straight_baselines  )

◆ BOOL_VAR_H() [37/40]

tesseract::BOOL_VAR_H ( textord_test_landscape  )

◆ BOOL_VAR_H() [38/40]

tesseract::BOOL_VAR_H ( wordrec_blob_pause  )

◆ BOOL_VAR_H() [39/40]

tesseract::BOOL_VAR_H ( wordrec_display_all_blobs  )

◆ BOOL_VAR_H() [40/40]

tesseract::BOOL_VAR_H ( wordrec_display_splits  )

◆ box_next()

TBOX tesseract::box_next ( BLOBNBOX_IT *  it)

Definition at line 638 of file blobbox.cpp.

640 {
641 BLOBNBOX *blob; // current blob
642 TBOX result; // total box
643
644 blob = it->data();
645 result = blob->bounding_box();
646 do {
647 it->forward();
648 blob = it->data();
649 if (blob->cblob() == nullptr) {
650 // was pre-chopped
651 result += blob->bounding_box();
652 }
653 }
654 // until next real blob
655 while ((blob->cblob() == nullptr) || blob->joined_to_prev());
656 return result;
657}
C_BLOB * cblob() const
Definition: blobbox.h:277
bool joined_to_prev() const
Definition: blobbox.h:265

◆ box_next_pre_chopped()

TBOX tesseract::box_next_pre_chopped ( BLOBNBOX_IT *  it)

Definition at line 667 of file blobbox.cpp.

669 {
670 BLOBNBOX *blob; // current blob
671 TBOX result; // total box
672
673 blob = it->data();
674 result = blob->bounding_box();
675 do {
676 it->forward();
677 blob = it->data();
678 }
679 // until next real blob
680 while (blob->joined_to_prev());
681 return result;
682}

◆ Bucket16For()

uint16_t tesseract::Bucket16For ( float  param,
float  offset,
int  num_buckets 
)

Definition at line 389 of file intproto.cpp.

389 {
390 int bucket = IntCastRounded(MapParam(param, offset, num_buckets));
391 return static_cast<uint16_t>(ClipToRange<int>(bucket, 0, num_buckets - 1));
392}
#define MapParam(P, O, N)
Definition: intproto.cpp:105
int IntCastRounded(double x)
Definition: helpers.h:170

◆ Bucket8For()

uint8_t tesseract::Bucket8For ( float  param,
float  offset,
int  num_buckets 
)

Returns a quantized bucket for the given param shifted by offset, notionally (param + offset) * num_buckets, but clipped and casted to the appropriate type.

Definition at line 385 of file intproto.cpp.

385 {
386 int bucket = IntCastRounded(MapParam(param, offset, num_buckets));
387 return static_cast<uint8_t>(ClipToRange<int>(bucket, 0, num_buckets - 1));
388}

◆ BucketEnd()

float tesseract::BucketEnd ( int  Bucket,
float  Offset,
int  NumBuckets 
)

This routine returns the parameter value which corresponds to the end of the specified bucket. The bucket number should have been generated using the BucketFor() function with parameters Offset and NumBuckets.

Parameters
Bucketbucket whose end is to be computed
Offsetoffset used to map params to buckets
NumBucketstotal number of buckets
Returns
Param value corresponding to end position of Bucket.
Note
Globals: none

Definition at line 1007 of file intproto.cpp.

1007 {
1008 return static_cast<float>(Bucket + 1) / NumBuckets - Offset;
1009} /* BucketEnd */

◆ BucketStart()

float tesseract::BucketStart ( int  Bucket,
float  Offset,
int  NumBuckets 
)

This routine returns the parameter value which corresponds to the beginning of the specified bucket. The bucket number should have been generated using the BucketFor() function with parameters Offset and NumBuckets.

Parameters
Bucketbucket whose start is to be computed
Offsetoffset used to map params to buckets
NumBucketstotal number of buckets
Returns
Param value corresponding to start position of Bucket.
Note
Globals: none

Definition at line 991 of file intproto.cpp.

991 {
992 return static_cast<float>(Bucket) / NumBuckets - Offset;
993
994} /* BucketStart */

◆ CanonicalizeDetectionResults()

void tesseract::CanonicalizeDetectionResults ( std::vector< PARA * > *  row_owners,
PARA_LIST *  paragraphs 
)

Definition at line 2288 of file paragraphs.cpp.

2288 {
2289 std::vector<PARA *> &rows = *row_owners;
2290 paragraphs->clear();
2291 PARA_IT out(paragraphs);
2292 PARA *formerly_null = nullptr;
2293 for (unsigned i = 0; i < rows.size(); i++) {
2294 if (rows[i] == nullptr) {
2295 if (i == 0 || rows[i - 1] != formerly_null) {
2296 rows[i] = formerly_null = new PARA();
2297 } else {
2298 rows[i] = formerly_null;
2299 continue;
2300 }
2301 } else if (i > 0 && rows[i - 1] == rows[i]) {
2302 continue;
2303 }
2304 out.add_after_then_move(rows[i]);
2305 }
2306}

◆ CANTOPENFILE()

constexpr ERRCODE tesseract::CANTOPENFILE ( "Can't open file )
constexpr

◆ ChangeDirection()

void tesseract::ChangeDirection ( MFOUTLINE  Start,
MFOUTLINE  End,
DIRECTION  Direction 
)

Change the direction of every vector in the specified outline segment to Direction. The segment to be changed starts at Start and ends at End. Note that the previous direction of End must also be changed to reflect the change in direction of the point before it.

Parameters
Startdefines start of segment of outline to be modified
Enddefines end of segment of outline to be modified
Directionnew direction to assign to segment

Definition at line 280 of file mfoutline.cpp.

280 {
281 MFOUTLINE Current;
282
283 for (Current = Start; Current != End; Current = NextPointAfter(Current)) {
284 PointAt(Current)->Direction = Direction;
285 }
286
287 PointAt(End)->PreviousDirection = Direction;
288
289} /* ChangeDirection */

◆ CharNormalizeOutline()

void tesseract::CharNormalizeOutline ( MFOUTLINE  Outline,
const DENORM cn_denorm 
)

This routine normalizes each point in Outline by translating it to the specified center and scaling it anisotropically according to the given scale factors.

Parameters
Outlineoutline to be character normalized
cn_denorm

Definition at line 298 of file mfoutline.cpp.

298 {
299 MFOUTLINE First, Current;
300 MFEDGEPT *CurrentPoint;
301
302 if (Outline == NIL_LIST) {
303 return;
304 }
305
306 First = Outline;
307 Current = First;
308 do {
309 CurrentPoint = PointAt(Current);
310 FCOORD pos(CurrentPoint->Point.x, CurrentPoint->Point.y);
311 cn_denorm.LocalNormTransform(pos, &pos);
312 CurrentPoint->Point.x = (pos.x() - UINT8_MAX / 2) * MF_SCALE_FACTOR;
313 CurrentPoint->Point.y = (pos.y() - UINT8_MAX / 2) * MF_SCALE_FACTOR;
314
315 Current = NextPointAfter(Current);
316 } while (Current != First);
317
318} /* CharNormalizeOutline */
#define NIL_LIST
Definition: oldlist.h:75
const float MF_SCALE_FACTOR
Definition: mfoutline.h:61
void LocalNormTransform(const TPOINT &pt, TPOINT *transformed) const
Definition: normalis.cpp:310

◆ check_path_legal()

ScrollView::Color tesseract::check_path_legal ( CRACKEDGE start)

Definition at line 67 of file edgloop.cpp.

69 {
70 int lastchain; // last chain code
71 int chaindiff; // chain code diff
72 int32_t length; // length of loop
73 int32_t chainsum; // sum of chain diffs
74 CRACKEDGE *edgept; // current point
75 constexpr ERRCODE ED_ILLEGAL_SUM("Illegal sum of chain codes");
76
77 length = 0;
78 chainsum = 0; // sum of chain codes
79 edgept = start;
80 lastchain = edgept->prev->stepdir; // previous chain code
81 do {
82 length++;
83 if (edgept->stepdir != lastchain) {
84 // chain code difference
85 chaindiff = edgept->stepdir - lastchain;
86 if (chaindiff > 2) {
87 chaindiff -= 4;
88 } else if (chaindiff < -2) {
89 chaindiff += 4;
90 }
91 chainsum += chaindiff; // sum differences
92 lastchain = edgept->stepdir;
93 }
94 edgept = edgept->next;
95 } while (edgept != start && length < C_OUTLINE::kMaxOutlineLength);
96
97 if ((chainsum != 4 && chainsum != -4) || edgept != start || length < MINEDGELENGTH) {
98 if (edgept != start) {
99 return ScrollView::YELLOW;
100 } else if (length < MINEDGELENGTH) {
101 return ScrollView::MAGENTA;
102 } else {
103 ED_ILLEGAL_SUM.error("check_path_legal", TESSLOG, "chainsum=%d", chainsum);
104 return ScrollView::GREEN;
105 }
106 }
107 // colour on inside
108 return chainsum < 0 ? ScrollView::BLUE : ScrollView::RED;
109}
#define MINEDGELENGTH
Definition: edgloop.cpp:30
CRACKEDGE * next
Definition: crakedge.h:37
CRACKEDGE * prev
Definition: crakedge.h:36

◆ check_pitch_sync()

double tesseract::check_pitch_sync ( BLOBNBOX_IT *  blob_it,
int16_t  blob_count,
int16_t  pitch,
int16_t  pitch_error,
STATS projection,
FPSEGPT_LIST *  seg_list 
)

Definition at line 138 of file pitsync1.cpp.

145 {
146 int16_t x; // current coord
147 int16_t min_index; // blob number
148 int16_t max_index; // blob number
149 int16_t left_edge; // of word
150 int16_t right_edge; // of word
151 int16_t right_max; // max allowed x
152 int16_t min_x; // in this region
153 int16_t max_x;
154 int16_t region_index;
155 int16_t best_region_index = 0; // for best result
156 int16_t offset; // dist to legal area
157 int16_t left_best_x; // edge of good region
158 int16_t right_best_x; // right edge
159 TBOX min_box; // bounding box
160 TBOX max_box; // bounding box
161 TBOX next_box; // box of next blob
162 FPSEGPT *segpt; // segment point
163 FPSEGPT_LIST *segpts; // points in a segment
164 double best_cost; // best path
165 double mean_sum; // computes result
166 FPSEGPT *best_end; // end of best path
167 BLOBNBOX_IT min_it; // copy iterator
168 BLOBNBOX_IT max_it; // copy iterator
169 FPSEGPT_IT segpt_it; // iterator
170 // output segments
171 FPSEGPT_IT outseg_it = seg_list;
172 FPSEGPT_LIST_CLIST lattice; // list of lists
173 // region iterator
174 FPSEGPT_LIST_C_IT lattice_it = &lattice;
175
176 // tprintf("Computing sync on word of %d blobs with pitch %d\n",
177 // blob_count, pitch);
178 // if (blob_count==8 && pitch==27)
179 // projection->print(stdout,true);
180 if (pitch < 3) {
181 pitch = 3; // nothing ludicrous
182 }
183 if ((pitch - 3) / 2 < pitch_error) {
184 pitch_error = (pitch - 3) / 2;
185 }
186 min_it = *blob_it;
187 min_box = box_next(&min_it); // get box
188 // if (blob_count==8 && pitch==27)
189 // tprintf("1st box at (%d,%d)->(%d,%d)\n",
190 // min_box.left(),min_box.bottom(),
191 // min_box.right(),min_box.top());
192 // left of word
193 left_edge = min_box.left() + pitch_error;
194 for (min_index = 1; min_index < blob_count; min_index++) {
195 min_box = box_next(&min_it);
196 // if (blob_count==8 && pitch==27)
197 // tprintf("Box at (%d,%d)->(%d,%d)\n",
198 // min_box.left(),min_box.bottom(),
199 // min_box.right(),min_box.top());
200 }
201 right_edge = min_box.right(); // end of word
202 max_x = left_edge;
203 // min permissible
204 min_x = max_x - pitch + pitch_error * 2 + 1;
205 right_max = right_edge + pitch - pitch_error - 1;
206 segpts = new FPSEGPT_LIST; // list of points
207 segpt_it.set_to_list(segpts);
208 for (x = min_x; x <= max_x; x++) {
209 segpt = new FPSEGPT(x); // make a new one
210 // put in list
211 segpt_it.add_after_then_move(segpt);
212 }
213 // first segment
214 lattice_it.add_before_then_move(segpts);
215 min_index = 0;
216 region_index = 1;
217 best_cost = FLT_MAX;
218 best_end = nullptr;
219 min_it = *blob_it;
220 min_box = box_next(&min_it); // first box
221 do {
222 left_best_x = -1;
223 right_best_x = -1;
224 segpts = new FPSEGPT_LIST; // list of points
225 segpt_it.set_to_list(segpts);
226 min_x += pitch - pitch_error; // next limits
227 max_x += pitch + pitch_error;
228 while (min_box.right() < min_x && min_index < blob_count) {
229 min_index++;
230 min_box = box_next(&min_it);
231 }
232 max_it = min_it;
233 max_index = min_index;
234 max_box = min_box;
235 next_box = box_next(&max_it);
236 for (x = min_x; x <= max_x && x <= right_max; x++) {
237 while (x < right_edge && max_index < blob_count && x > max_box.right()) {
238 max_index++;
239 max_box = next_box;
240 next_box = box_next(&max_it);
241 }
242 if (x <= max_box.left() + pitch_error || x >= max_box.right() - pitch_error ||
243 x >= right_edge || (max_index < blob_count - 1 && x >= next_box.left()) ||
244 (x - max_box.left() > pitch * pitsync_joined_edge &&
245 max_box.right() - x > pitch * pitsync_joined_edge)) {
246 // || projection->local_min(x))
247 if (x - max_box.left() > 0 && x - max_box.left() <= pitch_error) {
248 // dist to real break
249 offset = x - max_box.left();
250 } else if (max_box.right() - x > 0 && max_box.right() - x <= pitch_error &&
251 (max_index >= blob_count - 1 || x < next_box.left())) {
252 offset = max_box.right() - x;
253 } else {
254 offset = 0;
255 }
256 // offset=pitsync_offset_freecut_fraction*projection->pile_count(x);
257 segpt = new FPSEGPT(x, false, offset, region_index, pitch, pitch_error, lattice_it.data());
258 } else {
259 offset = projection->pile_count(x);
260 segpt = new FPSEGPT(x, true, offset, region_index, pitch, pitch_error, lattice_it.data());
261 }
262 if (segpt->previous() != nullptr) {
263 segpt_it.add_after_then_move(segpt);
264 if (x >= right_edge - pitch_error) {
265 segpt->terminal = true; // no more wanted
266 if (segpt->cost_function() < best_cost) {
267 best_cost = segpt->cost_function();
268 // find least
269 best_end = segpt;
270 best_region_index = region_index;
271 left_best_x = x;
272 right_best_x = x;
273 } else if (segpt->cost_function() == best_cost && right_best_x == x - 1) {
274 right_best_x = x;
275 }
276 }
277 } else {
278 delete segpt; // no good
279 }
280 }
281 if (segpts->empty()) {
282 if (best_end != nullptr) {
283 break; // already found one
284 }
285 make_illegal_segment(lattice_it.data(), min_box, min_it, region_index, pitch, pitch_error,
286 segpts);
287 } else {
288 if (right_best_x > left_best_x + 1) {
289 left_best_x = (left_best_x + right_best_x + 1) / 2;
290 for (segpt_it.mark_cycle_pt();
291 !segpt_it.cycled_list() && segpt_it.data()->position() != left_best_x;
292 segpt_it.forward()) {
293 ;
294 }
295 if (segpt_it.data()->position() == left_best_x) {
296 // middle of region
297 best_end = segpt_it.data();
298 }
299 }
300 }
301 // new segment
302 lattice_it.add_before_then_move(segpts);
303 region_index++;
304 } while (min_x < right_edge);
305 ASSERT_HOST(best_end != nullptr); // must always find some
306
307 for (lattice_it.mark_cycle_pt(); !lattice_it.cycled_list(); lattice_it.forward()) {
308 segpts = lattice_it.data();
309 segpt_it.set_to_list(segpts);
310 // if (blob_count==8 && pitch==27)
311 // {
312 // for
313 // (segpt_it.mark_cycle_pt();!segpt_it.cycled_list();segpt_it.forward())
314 // {
315 // segpt=segpt_it.data();
316 // tprintf("At %d, (%x) cost=%g, m=%g, sq=%g,
317 // pred=%x\n",
318 // segpt->position(),segpt,segpt->cost_function(),
319 // segpt->sum(),segpt->squares(),segpt->previous());
320 // }
321 // tprintf("\n");
322 // }
323 for (segpt_it.mark_cycle_pt(); !segpt_it.cycled_list() && segpt_it.data() != best_end;
324 segpt_it.forward()) {
325 ;
326 }
327 if (segpt_it.data() == best_end) {
328 // save good one
329 segpt = segpt_it.extract();
330 outseg_it.add_before_then_move(segpt);
331 best_end = segpt->previous();
332 }
333 }
334 ASSERT_HOST(best_end == nullptr);
335 ASSERT_HOST(!outseg_it.empty());
336 outseg_it.move_to_last();
337 mean_sum = outseg_it.data()->sum();
338 mean_sum = mean_sum * mean_sum / best_region_index;
339 if (outseg_it.data()->squares() - mean_sum < 0) {
340 tprintf("Impossible sqsum=%g, mean=%g, total=%d\n", outseg_it.data()->squares(),
341 outseg_it.data()->sum(), best_region_index);
342 }
343 lattice.deep_clear(); // shift the lot
344 return outseg_it.data()->squares() - mean_sum;
345}
double pitsync_joined_edge
Definition: pitsync1.cpp:27
void make_illegal_segment(FPSEGPT_LIST *prev_list, TBOX blob_box, BLOBNBOX_IT blob_it, int16_t region_index, int16_t pitch, int16_t pitch_error, FPSEGPT_LIST *seg_list)
Definition: pitsync1.cpp:353
TBOX box_next(BLOBNBOX_IT *it)
Definition: blobbox.cpp:638
int32_t pile_count(int32_t value) const
Definition: statistc.h:72
FPSEGPT * previous()
Definition: pitsync1.h:61
double cost_function()
Definition: pitsync1.h:52

◆ check_pitch_sync2()

double tesseract::check_pitch_sync2 ( BLOBNBOX_IT *  blob_it,
int16_t  blob_count,
int16_t  pitch,
int16_t  pitch_error,
STATS projection,
int16_t  projection_left,
int16_t  projection_right,
float  projection_scale,
int16_t &  occupation_count,
FPSEGPT_LIST *  seg_list,
int16_t  start,
int16_t  end 
)

Definition at line 292 of file pithsync.cpp.

304 {
305 bool faking; // illegal cut pt
306 bool mid_cut; // cheap cut pt.
307 int16_t x; // current coord
308 int16_t blob_index; // blob number
309 int16_t left_edge; // of word
310 int16_t right_edge; // of word
311 int16_t array_origin; // x coord of array
312 int16_t offset; // dist to legal area
313 int16_t zero_count; // projection zero
314 int16_t best_left_x = 0; // for equals
315 int16_t best_right_x = 0; // right edge
316 TBOX this_box; // bounding box
317 TBOX next_box; // box of next blob
318 FPSEGPT *segpt; // segment point
319 double best_cost; // best path
320 double mean_sum; // computes result
321 FPCUTPT *best_end; // end of best path
322 int16_t best_fake; // best fake level
323 int16_t best_count; // no of cuts
324 BLOBNBOX_IT this_it; // copy iterator
325 FPSEGPT_IT seg_it = seg_list; // output iterator
326
327 // tprintf("Computing sync on word of %d blobs with pitch %d\n",
328 // blob_count, pitch);
329 // if (blob_count==8 && pitch==27)
330 // projection->print(stdout,true);
331 zero_count = 0;
332 if (pitch < 3) {
333 pitch = 3; // nothing ludicrous
334 }
335 if ((pitch - 3) / 2 < pitch_error) {
336 pitch_error = (pitch - 3) / 2;
337 }
338 this_it = *blob_it;
339 this_box = box_next(&this_it); // get box
340 // left_edge=this_box.left(); //left of word right_edge=this_box.right();
341 // for (blob_index=1;blob_index<blob_count;blob_index++)
342 // {
343 // this_box=box_next(&this_it);
344 // if (this_box.right()>right_edge)
345 // right_edge=this_box.right();
346 // }
347 for (left_edge = projection_left;
348 projection->pile_count(left_edge) == 0 && left_edge < projection_right; left_edge++) {
349 ;
350 }
351 for (right_edge = projection_right;
352 projection->pile_count(right_edge) == 0 && right_edge > left_edge; right_edge--) {
353 ;
354 }
355 ASSERT_HOST(right_edge >= left_edge);
356 if (pitsync_linear_version >= 4) {
357 return check_pitch_sync3(projection_left, projection_right, zero_count, pitch, pitch_error,
358 projection, projection_scale, occupation_count, seg_list, start, end);
359 }
360 array_origin = left_edge - pitch;
361 // array of points
362 std::vector<FPCUTPT> cutpts(right_edge - left_edge + pitch * 2 + 1);
363 for (x = array_origin; x < left_edge; x++) {
364 // free cuts
365 cutpts[x - array_origin].setup(&cutpts[0], array_origin, projection, zero_count, pitch, x, 0);
366 }
367 for (offset = 0; offset <= pitch_error; offset++, x++) {
368 // not quite free
369 cutpts[x - array_origin].setup(&cutpts[0], array_origin, projection, zero_count, pitch, x,
370 offset);
371 }
372
373 this_it = *blob_it;
374 best_cost = FLT_MAX;
375 best_end = nullptr;
376 this_box = box_next(&this_it); // first box
377 next_box = box_next(&this_it); // second box
378 blob_index = 1;
379 while (x < right_edge - pitch_error) {
380 if (x > this_box.right() + pitch_error && blob_index < blob_count) {
381 this_box = next_box;
382 next_box = box_next(&this_it);
383 blob_index++;
384 }
385 faking = false;
386 mid_cut = false;
387 if (x <= this_box.left()) {
388 offset = 0;
389 } else if (x <= this_box.left() + pitch_error) {
390 offset = x - this_box.left();
391 } else if (x >= this_box.right()) {
392 offset = 0;
393 } else if (x >= next_box.left() && blob_index < blob_count) {
394 offset = x - next_box.left();
395 if (this_box.right() - x < offset) {
396 offset = this_box.right() - x;
397 }
398 } else if (x >= this_box.right() - pitch_error) {
399 offset = this_box.right() - x;
400 } else if (x - this_box.left() > pitch * pitsync_joined_edge &&
401 this_box.right() - x > pitch * pitsync_joined_edge) {
402 mid_cut = true;
403 offset = 0;
404 } else {
405 faking = true;
406 offset = projection->pile_count(x);
407 }
408 cutpts[x - array_origin].assign(&cutpts[0], array_origin, x, faking, mid_cut, offset,
409 projection, projection_scale, zero_count, pitch, pitch_error);
410 x++;
411 }
412
413 best_fake = INT16_MAX;
414 best_cost = INT32_MAX;
415 best_count = INT16_MAX;
416 while (x < right_edge + pitch) {
417 offset = x < right_edge ? right_edge - x : 0;
418 cutpts[x - array_origin].assign(&cutpts[0], array_origin, x, false, false, offset, projection,
419 projection_scale, zero_count, pitch, pitch_error);
420 cutpts[x - array_origin].terminal = true;
421 if (cutpts[x - array_origin].index() + cutpts[x - array_origin].fake_count <=
422 best_count + best_fake) {
423 if (cutpts[x - array_origin].fake_count < best_fake ||
424 (cutpts[x - array_origin].fake_count == best_fake &&
425 cutpts[x - array_origin].cost_function() < best_cost)) {
426 best_fake = cutpts[x - array_origin].fake_count;
427 best_cost = cutpts[x - array_origin].cost_function();
428 best_left_x = x;
429 best_right_x = x;
430 best_count = cutpts[x - array_origin].index();
431 } else if (cutpts[x - array_origin].fake_count == best_fake && x == best_right_x + 1 &&
432 cutpts[x - array_origin].cost_function() == best_cost) {
433 // exactly equal
434 best_right_x = x;
435 }
436 }
437 x++;
438 }
439 ASSERT_HOST(best_fake < INT16_MAX);
440
441 best_end = &cutpts[(best_left_x + best_right_x) / 2 - array_origin];
442 if (this_box.right() == textord_test_x && this_box.top() == textord_test_y) {
443 for (x = left_edge - pitch; x < right_edge + pitch; x++) {
444 tprintf("x=%d, C=%g, s=%g, sq=%g, prev=%d\n", x, cutpts[x - array_origin].cost_function(),
445 cutpts[x - array_origin].sum(), cutpts[x - array_origin].squares(),
446 cutpts[x - array_origin].previous()->position());
447 }
448 }
449 occupation_count = -1;
450 do {
451 for (x = best_end->position() - pitch + pitch_error;
452 x < best_end->position() - pitch_error && projection->pile_count(x) == 0; x++) {
453 ;
454 }
455 if (x < best_end->position() - pitch_error) {
456 occupation_count++;
457 }
458 // copy it
459 segpt = new FPSEGPT(best_end);
460 seg_it.add_before_then_move(segpt);
461 best_end = best_end->previous();
462 } while (best_end != nullptr);
463 seg_it.move_to_last();
464 mean_sum = seg_it.data()->sum();
465 mean_sum = mean_sum * mean_sum / best_count;
466 if (seg_it.data()->squares() - mean_sum < 0) {
467 tprintf("Impossible sqsum=%g, mean=%g, total=%d\n", seg_it.data()->squares(),
468 seg_it.data()->sum(), best_count);
469 }
470 // tprintf("blob_count=%d, pitch=%d, sync=%g, occ=%d\n",
471 // blob_count,pitch,seg_it.data()->squares()-mean_sum,
472 // occupation_count);
473 return seg_it.data()->squares() - mean_sum;
474}
int pitsync_linear_version
Definition: pitsync1.cpp:26
double check_pitch_sync3(int16_t projection_left, int16_t projection_right, int16_t zero_count, int16_t pitch, int16_t pitch_error, STATS *projection, float projection_scale, int16_t &occupation_count, FPSEGPT_LIST *seg_list, int16_t start, int16_t end)
Definition: pithsync.cpp:484
double sum()
Definition: pithsync.h:77
int32_t position()
Definition: pithsync.h:68
FPCUTPT * previous()
Definition: pithsync.h:80

◆ check_pitch_sync3()

double tesseract::check_pitch_sync3 ( int16_t  projection_left,
int16_t  projection_right,
int16_t  zero_count,
int16_t  pitch,
int16_t  pitch_error,
STATS projection,
float  projection_scale,
int16_t &  occupation_count,
FPSEGPT_LIST *  seg_list,
int16_t  start,
int16_t  end 
)

Definition at line 484 of file pithsync.cpp.

495 {
496 bool faking; // illegal cut pt
497 bool mid_cut; // cheap cut pt.
498 int16_t left_edge; // of word
499 int16_t right_edge; // of word
500 int16_t x; // current coord
501 int16_t array_origin; // x coord of array
502 int16_t offset; // dist to legal area
503 int16_t projection_offset; // from scaled projection
504 int16_t prev_zero; // previous zero dist
505 int16_t next_zero; // next zero dist
506 int16_t zero_offset; // scan window
507 int16_t best_left_x = 0; // for equals
508 int16_t best_right_x = 0; // right edge
509 FPSEGPT *segpt; // segment point
510 int minindex; // next input position
511 int test_index; // index to mins
512 double best_cost; // best path
513 double mean_sum; // computes result
514 FPCUTPT *best_end; // end of best path
515 int16_t best_fake; // best fake level
516 int16_t best_count; // no of cuts
517 FPSEGPT_IT seg_it = seg_list; // output iterator
518
519 end = (end - start) % pitch;
520 if (pitch < 3) {
521 pitch = 3; // nothing ludicrous
522 }
523 if ((pitch - 3) / 2 < pitch_error) {
524 pitch_error = (pitch - 3) / 2;
525 }
526 // min dist of zero
527 zero_offset = static_cast<int16_t>(pitch * pitsync_joined_edge);
528 for (left_edge = projection_left;
529 projection->pile_count(left_edge) == 0 && left_edge < projection_right; left_edge++) {
530 ;
531 }
532 for (right_edge = projection_right;
533 projection->pile_count(right_edge) == 0 && right_edge > left_edge; right_edge--) {
534 ;
535 }
536 array_origin = left_edge - pitch;
537 // array of points
538 std::vector<FPCUTPT> cutpts(right_edge - left_edge + pitch * 2 + 1);
539 // local min results
540 std::vector<bool> mins(pitch_error * 2 + 1);
541 for (x = array_origin; x < left_edge; x++) {
542 // free cuts
543 cutpts[x - array_origin].setup(&cutpts[0], array_origin, projection, zero_count, pitch, x, 0);
544 }
545 prev_zero = left_edge - 1;
546 for (offset = 0; offset <= pitch_error; offset++, x++) {
547 // not quite free
548 cutpts[x - array_origin].setup(&cutpts[0], array_origin, projection, zero_count, pitch, x,
549 offset);
550 }
551
552 best_cost = FLT_MAX;
553 best_end = nullptr;
554 for (offset = -pitch_error, minindex = 0; offset < pitch_error; offset++, minindex++) {
555 mins[minindex] = projection->local_min(x + offset);
556 }
557 next_zero = x + zero_offset + 1;
558 for (offset = next_zero - 1; offset >= x; offset--) {
559 if (projection->pile_count(offset) <= zero_count) {
560 next_zero = offset;
561 break;
562 }
563 }
564 while (x < right_edge - pitch_error) {
565 mins[minindex] = projection->local_min(x + pitch_error);
566 minindex++;
567 if (minindex > pitch_error * 2) {
568 minindex = 0;
569 }
570 faking = false;
571 mid_cut = false;
572 offset = 0;
573 if (projection->pile_count(x) <= zero_count) {
574 prev_zero = x;
575 } else {
576 for (offset = 1; offset <= pitch_error; offset++) {
577 if (projection->pile_count(x + offset) <= zero_count ||
578 projection->pile_count(x - offset) <= zero_count) {
579 break;
580 }
581 }
582 }
583 if (offset > pitch_error) {
584 if (x - prev_zero > zero_offset && next_zero - x > zero_offset) {
585 for (offset = 0; offset <= pitch_error; offset++) {
586 test_index = minindex + pitch_error + offset;
587 if (test_index > pitch_error * 2) {
588 test_index -= pitch_error * 2 + 1;
589 }
590 if (mins[test_index]) {
591 break;
592 }
593 test_index = minindex + pitch_error - offset;
594 if (test_index > pitch_error * 2) {
595 test_index -= pitch_error * 2 + 1;
596 }
597 if (mins[test_index]) {
598 break;
599 }
600 }
601 }
602 if (offset > pitch_error) {
603 offset = projection->pile_count(x);
604 faking = true;
605 } else {
606 projection_offset = static_cast<int16_t>(projection->pile_count(x) / projection_scale);
607 if (projection_offset > offset) {
608 offset = projection_offset;
609 }
610 mid_cut = true;
611 }
612 }
613 if ((start == 0 && end == 0) || !textord_fast_pitch_test ||
614 (x - projection_left - start) % pitch <= end) {
615 cutpts[x - array_origin].assign(&cutpts[0], array_origin, x, faking, mid_cut, offset,
616 projection, projection_scale, zero_count, pitch, pitch_error);
617 } else {
618 cutpts[x - array_origin].assign_cheap(&cutpts[0], array_origin, x, faking, mid_cut, offset,
619 projection, projection_scale, zero_count, pitch,
620 pitch_error);
621 }
622 x++;
623 if (next_zero < x || next_zero == x + zero_offset) {
624 next_zero = x + zero_offset + 1;
625 }
626 if (projection->pile_count(x + zero_offset) <= zero_count) {
627 next_zero = x + zero_offset;
628 }
629 }
630
631 best_fake = INT16_MAX;
632 best_cost = INT32_MAX;
633 best_count = INT16_MAX;
634 while (x < right_edge + pitch) {
635 offset = x < right_edge ? right_edge - x : 0;
636 cutpts[x - array_origin].assign(&cutpts[0], array_origin, x, false, false, offset, projection,
637 projection_scale, zero_count, pitch, pitch_error);
638 cutpts[x - array_origin].terminal = true;
639 if (cutpts[x - array_origin].index() + cutpts[x - array_origin].fake_count <=
640 best_count + best_fake) {
641 if (cutpts[x - array_origin].fake_count < best_fake ||
642 (cutpts[x - array_origin].fake_count == best_fake &&
643 cutpts[x - array_origin].cost_function() < best_cost)) {
644 best_fake = cutpts[x - array_origin].fake_count;
645 best_cost = cutpts[x - array_origin].cost_function();
646 best_left_x = x;
647 best_right_x = x;
648 best_count = cutpts[x - array_origin].index();
649 } else if (cutpts[x - array_origin].fake_count == best_fake && x == best_right_x + 1 &&
650 cutpts[x - array_origin].cost_function() == best_cost) {
651 // exactly equal
652 best_right_x = x;
653 }
654 }
655 x++;
656 }
657 ASSERT_HOST(best_fake < INT16_MAX);
658
659 best_end = &cutpts[(best_left_x + best_right_x) / 2 - array_origin];
660 // for (x=left_edge-pitch;x<right_edge+pitch;x++)
661 // {
662 // tprintf("x=%d, C=%g, s=%g, sq=%g, prev=%d\n",
663 // x,cutpts[x-array_origin].cost_function(),
664 // cutpts[x-array_origin].sum(),
665 // cutpts[x-array_origin].squares(),
666 // cutpts[x-array_origin].previous()->position());
667 // }
668 occupation_count = -1;
669 do {
670 for (x = best_end->position() - pitch + pitch_error;
671 x < best_end->position() - pitch_error && projection->pile_count(x) == 0; x++) {
672 }
673 if (x < best_end->position() - pitch_error) {
674 occupation_count++;
675 }
676 // copy it
677 segpt = new FPSEGPT(best_end);
678 seg_it.add_before_then_move(segpt);
679 best_end = best_end->previous();
680 } while (best_end != nullptr);
681 seg_it.move_to_last();
682 mean_sum = seg_it.data()->sum();
683 mean_sum = mean_sum * mean_sum / best_count;
684 if (seg_it.data()->squares() - mean_sum < 0) {
685 tprintf("Impossible sqsum=%g, mean=%g, total=%d\n", seg_it.data()->squares(),
686 seg_it.data()->sum(), best_count);
687 }
688 return seg_it.data()->squares() - mean_sum;
689}
bool textord_fast_pitch_test
Definition: topitch.cpp:44
bool local_min(int32_t x) const
Definition: statistc.cpp:268

◆ chomp_string()

void tesseract::chomp_string ( char *  str)
inline

Definition at line 91 of file helpers.h.

91 {
92 int last_index = static_cast<int>(strlen(str)) - 1;
93 while (last_index >= 0 && (str[last_index] == '\n' || str[last_index] == '\r')) {
94 str[last_index--] = '\0';
95 }
96}

◆ choose_partition()

int tesseract::choose_partition ( float  diff,
float  partdiffs[],
int  lastpart,
float  jumplimit,
float *  drift,
float *  lastdelta,
int *  partcount 
)

Definition at line 910 of file oldbasel.cpp.

916 {
917 int partition; /*partition no */
918 int bestpart; /*best new partition */
919 float bestdelta; /*best gap from a part */
920 float delta; /*diff from part */
921
922 if (lastpart < 0) {
923 partdiffs[0] = diff;
924 lastpart = 0; /*first point */
925 *drift = 0.0f;
926 *lastdelta = 0.0f;
927 }
928 /*adjusted diff from part */
929 delta = diff - partdiffs[lastpart] - *drift;
931 tprintf("Diff=%.2f, Delta=%.3f, Drift=%.3f, ", diff, delta, *drift);
932 }
933 if (ABS(delta) > jumplimit / 2) {
934 /*delta on part 0 */
935 bestdelta = diff - partdiffs[0] - *drift;
936 bestpart = 0; /*0 best so far */
937 for (partition = 1; partition < *partcount; partition++) {
938 delta = diff - partdiffs[partition] - *drift;
939 if (ABS(delta) < ABS(bestdelta)) {
940 bestdelta = delta;
941 bestpart = partition; /*part with nearest jump */
942 }
943 }
944 delta = bestdelta;
945 /*too far away */
946 if (ABS(bestdelta) > jumplimit && *partcount < MAXPARTS) { /*and spare part left */
947 bestpart = (*partcount)++; /*best was new one */
948 /*start new one */
949 partdiffs[bestpart] = diff - *drift;
950 delta = 0.0f;
951 }
952 } else {
953 bestpart = lastpart; /*best was last one */
954 }
955
956 if (bestpart == lastpart &&
957 (ABS(delta - *lastdelta) < jumplimit / 2 || ABS(delta) < jumplimit / 2)) {
958 /*smooth the drift */
959 *drift = (3 * *drift + delta) / 3;
960 }
961 *lastdelta = delta;
962
964 tprintf("P=%d\n", bestpart);
965 }
966
967 return bestpart;
968}
#define MAXPARTS
Definition: oldbasel.cpp:68
#define ABS(x)
Definition: oldbasel.cpp:71
bool textord_oldbl_debug
Definition: oldbasel.cpp:43

◆ CircBucketFor()

uint8_t tesseract::CircBucketFor ( float  param,
float  offset,
int  num_buckets 
)

Returns a quantized bucket for the given circular param shifted by offset, notionally (param + offset) * num_buckets, but modded and casted to the appropriate type.

Definition at line 399 of file intproto.cpp.

399 {
400 int bucket = IntCastRounded(MapParam(param, offset, num_buckets));
401 return static_cast<uint8_t>(Modulo(bucket, num_buckets));
402} /* CircBucketFor */
int Modulo(int a, int b)
Definition: helpers.h:153

◆ ClassicProgressTester()

void tesseract::ClassicProgressTester ( const char *  imgname,
const char *  tessdatadir,
const char *  lang 
)

Definition at line 85 of file progress_test.cc.

85 {
86 using ::testing::_;
87 using ::testing::AllOf;
90 using ::testing::Gt;
91 using ::testing::Le;
94
95 auto api = std::make_unique<tesseract::TessBaseAPI>();
96 ASSERT_FALSE(api->Init(tessdatadir, lang)) << "Could not initialize tesseract.";
97 Image image = pixRead(imgname);
98 ASSERT_TRUE(image != nullptr) << "Failed to read test image.";
99 api->SetImage(image);
100
101 ClassicMockProgressSink progressSink;
102
103 int currentProgress = -1;
104 EXPECT_CALL(progressSink, classicProgress(AllOf(Gt<int &>(currentProgress), Le(100))))
105 .Times(AtLeast(5))
106 .WillRepeatedly(DoAll(SaveArg<0>(&currentProgress), Return(false)));
107 EXPECT_CALL(progressSink, cancel(_)).Times(AtLeast(5)).WillRepeatedly(Return(false));
108
109 EXPECT_EQ(api->Recognize(&progressSink.monitor), false);
110 EXPECT_GE(currentProgress, 50) << "The reported progress did not reach 50%";
111
112 api->End();
113 image.destroy();
114}
#define EXPECT_CALL(obj, call)
#define EXPECT_EQ(val1, val2)
Definition: gtest.h:2043
#define ASSERT_FALSE(condition)
Definition: gtest.h:1994
#define EXPECT_GE(val1, val2)
Definition: gtest.h:2051
#define ASSERT_TRUE(condition)
Definition: gtest.h:1990
internal::SaveArgAction< k, Ptr > SaveArg(Ptr pointer)
GTEST_API_ Cardinality AtLeast(int n)
internal::DoAllAction< typename std::decay< Action >::type... > DoAll(Action &&... action)
internal::ReturnAction< R > Return(R value)
void destroy()
Definition: image.cpp:32

◆ cleanup_rows_making()

void tesseract::cleanup_rows_making ( ICOORD  page_tr,
TO_BLOCK block,
float  gradient,
FCOORD  rotation,
int32_t  block_edge,
bool  testing_on 
)

cleanup_rows_making

Remove overlapping rows and fit all the blobs to what's left.

Definition at line 563 of file makerow.cpp.

570 {
571 // iterators
572 BLOBNBOX_IT blob_it = &block->blobs;
573 TO_ROW_IT row_it = block->get_rows();
574
575#ifndef GRAPHICS_DISABLED
576 if (textord_show_parallel_rows && testing_on) {
577 if (to_win == nullptr) {
578 create_to_win(page_tr);
579 }
580 }
581#endif
582 // get row coords
583 fit_parallel_rows(block, gradient, rotation, block_edge,
584 textord_show_parallel_rows && testing_on);
585 delete_non_dropout_rows(block, gradient, rotation, block_edge,
586 textord_show_parallel_rows && testing_on);
587 expand_rows(page_tr, block, gradient, rotation, block_edge, testing_on);
588 blob_it.set_to_list(&block->blobs);
589 row_it.set_to_list(block->get_rows());
590 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
591 blob_it.add_list_after(row_it.data()->blob_list());
592 }
593 // give blobs back
594 assign_blobs_to_rows(block, &gradient, 1, false, false, false);
595 // now new rows must be genuine
596 blob_it.set_to_list(&block->blobs);
597 blob_it.add_list_after(&block->large_blobs);
598 assign_blobs_to_rows(block, &gradient, 2, true, true, false);
599 // safe to use big ones now
600 blob_it.set_to_list(&block->blobs);
601 // throw all blobs in
602 blob_it.add_list_after(&block->noise_blobs);
603 blob_it.add_list_after(&block->small_blobs);
604 assign_blobs_to_rows(block, &gradient, 3, false, false, false);
605}
void expand_rows(ICOORD page_tr, TO_BLOCK *block, float gradient, FCOORD rotation, int32_t block_edge, bool testing_on)
Definition: makerow.cpp:976
void delete_non_dropout_rows(TO_BLOCK *block, float gradient, FCOORD rotation, int32_t block_edge, bool testing_on)
Definition: makerow.cpp:612
void assign_blobs_to_rows(TO_BLOCK *block, float *gradient, int pass, bool reject_misses, bool make_new_rows, bool drawing_skew)
Definition: makerow.cpp:2272
bool textord_show_parallel_rows
Definition: makerow.cpp:48
void fit_parallel_rows(TO_BLOCK *block, float gradient, FCOORD rotation, int32_t block_edge, bool testing_on)
Definition: makerow.cpp:1928
ScrollView * create_to_win(ICOORD page_tr)
Definition: drawtord.cpp:47
BLOBNBOX_LIST small_blobs
Definition: blobbox.h:779
BLOBNBOX_LIST large_blobs
Definition: blobbox.h:780
BLOBNBOX_LIST noise_blobs
Definition: blobbox.h:778

◆ CleanUpUnusedData()

TESS_COMMON_TRAINING_API void tesseract::CleanUpUnusedData ( LIST  ProtoList)

Definition at line 529 of file commontraining.cpp.

529 {
530 PROTOTYPE *Prototype;
531
532 iterate(ProtoList) {
533 Prototype = reinterpret_cast<PROTOTYPE *>(ProtoList->first_node());
534 delete[] Prototype->Variance.Elliptical;
535 Prototype->Variance.Elliptical = nullptr;
536 delete[] Prototype->Magnitude.Elliptical;
537 Prototype->Magnitude.Elliptical = nullptr;
538 delete[] Prototype->Weight.Elliptical;
539 Prototype->Weight.Elliptical = nullptr;
540 }
541}
float * Elliptical
Definition: cluster.h:69
FLOATUNION Magnitude
Definition: cluster.h:87
FLOATUNION Variance
Definition: cluster.h:86
FLOATUNION Weight
Definition: cluster.h:88

◆ clear_fx_win()

void tesseract::clear_fx_win ( )

Definition at line 61 of file drawfx.cpp.

61 { // make features win
62 fx_win->Clear();
63 fx_win->Pen(64, 64, 64);
67}
#define WERDWIDTH
Definition: drawfx.cpp:38
const int kBlnXHeight
Definition: normalis.h:33
ScrollView * fx_win
Definition: drawfx.cpp:42
const int kBlnBaselineOffset
Definition: normalis.h:34
void Line(int x1, int y1, int x2, int y2)
Definition: scrollview.cpp:498
void Pen(Color color)
Definition: scrollview.cpp:710

◆ ClearFeatureSpaceWindow()

TESS_API void tesseract::ClearFeatureSpaceWindow ( NORM_METHOD  norm_method,
ScrollView window 
)

Clears the given window and draws the featurespace guides for the appropriate normalization method.

Definition at line 889 of file intproto.cpp.

889 {
890 window->Clear();
891
892 window->Pen(ScrollView::GREY);
893 // Draw the feature space limit rectangle.
894 window->Rectangle(0, 0, INT_MAX_X, INT_MAX_Y);
895 if (norm_method == baseline) {
896 window->SetCursor(0, INT_DESCENDER);
898 window->SetCursor(0, INT_BASELINE);
899 window->DrawTo(INT_MAX_X, INT_BASELINE);
900 window->SetCursor(0, INT_XHEIGHT);
901 window->DrawTo(INT_MAX_X, INT_XHEIGHT);
902 window->SetCursor(0, INT_CAPHEIGHT);
904 } else {
907 }
908}
#define INT_YRADIUS
Definition: intproto.cpp:60
#define INT_MAX_Y
Definition: intproto.cpp:64
#define INT_DESCENDER
Definition: intproto.cpp:52
#define INT_YCENTER
Definition: intproto.cpp:58
#define INT_XHEIGHT
Definition: intproto.cpp:54
#define INT_XCENTER
Definition: intproto.cpp:57
#define INT_MAX_X
Definition: intproto.cpp:63
#define INT_BASELINE
Definition: intproto.cpp:53
#define INT_XRADIUS
Definition: intproto.cpp:59
#define INT_CAPHEIGHT
Definition: intproto.cpp:55
void Rectangle(int x1, int y1, int x2, int y2)
Definition: scrollview.cpp:576

◆ ClipToRange()

template<typename T >
T tesseract::ClipToRange ( const T &  x,
const T &  lower_bound,
const T &  upper_bound 
)
inline

Definition at line 105 of file helpers.h.

105 {
106 if (x < lower_bound) {
107 return lower_bound;
108 }
109 if (x > upper_bound) {
110 return upper_bound;
111 }
112 return x;
113}

◆ ClipVector()

template<typename T >
void tesseract::ClipVector ( int  n,
lower,
upper,
T *  vec 
)
inline

Definition at line 251 of file functions.h.

251 {
252 for (int i = 0; i < n; ++i) {
253 vec[i] = ClipToRange(vec[i], lower, upper);
254 }
255}
T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound)
Definition: helpers.h:105

◆ close_to_win()

void tesseract::close_to_win ( )

Definition at line 56 of file drawtord.cpp.

56 {
57 // to_win is leaked, but this enables the user to view the contents.
58 if (to_win != nullptr) {
59 to_win->Update();
60 }
61}
static void Update()
Definition: scrollview.cpp:700

◆ ClusterSamples()

TESS_API LIST tesseract::ClusterSamples ( CLUSTERER Clusterer,
CLUSTERCONFIG Config 
)

This routine first checks to see if the samples in this clusterer have already been clustered before; if so, it does not bother to recreate the cluster tree. It simply recomputes the prototypes based on the new Config info.

If the samples have not been clustered before, the samples in the KD tree are formed into a cluster tree and then the prototypes are computed from the cluster tree.

In either case this routine returns a pointer to a list of prototypes that best represent the samples given the constraints specified in Config.

Parameters
Clustererdata struct containing samples to be clustered
Configparameters which control clustering process
Returns
Pointer to a list of prototypes

Definition at line 1543 of file cluster.cpp.

1543 {
1544 // only create cluster tree if samples have never been clustered before
1545 if (Clusterer->Root == nullptr) {
1546 CreateClusterTree(Clusterer);
1547 }
1548
1549 // deallocate the old prototype list if one exists
1550 FreeProtoList(&Clusterer->ProtoList);
1551 Clusterer->ProtoList = NIL_LIST;
1552
1553 // compute prototypes starting at the root node in the tree
1554 ComputePrototypes(Clusterer, Config);
1555 // We don't need the cluster pointers in the protos any more, so null them
1556 // out, which makes it safe to delete the clusterer.
1557 LIST proto_list = Clusterer->ProtoList;
1558 iterate(proto_list) {
1559 auto *proto = reinterpret_cast<PROTOTYPE *>(proto_list->first_node());
1560 proto->Cluster = nullptr;
1561 }
1562 return Clusterer->ProtoList;
1563} // ClusterSamples
list_rec * LIST
Definition: oldlist.h:125
void FreeProtoList(LIST *ProtoList)
Definition: cluster.cpp:1597
CLUSTER * Root
Definition: cluster.h:96

◆ CodeInBinary()

void tesseract::CodeInBinary ( int  n,
int  nf,
TFloat vec 
)
inline

Definition at line 259 of file functions.h.

259 {
260 if (nf <= 0 || n < nf) {
261 return;
262 }
263 int index = 0;
264 TFloat best_score = vec[0];
265 for (int i = 1; i < n; ++i) {
266 if (vec[i] > best_score) {
267 best_score = vec[i];
268 index = i;
269 }
270 }
271 int mask = 1;
272 for (int i = 0; i < nf; ++i, mask *= 2) {
273 vec[i] = (index & mask) ? 1.0 : 0.0;
274 }
275}
double TFloat
Definition: tesstypes.h:39

◆ CodepointList()

std::string tesseract::CodepointList ( const std::vector< char32 > &  str32)
inline

Definition at line 22 of file normstrngs_test.h.

22 {
23 std::stringstream result;
24 int total_chars = str32.size();
25 result << std::hex;
26 for (int i = 0; i < total_chars; ++i) {
27 result << "[" << str32[i] << "]";
28 }
29 return result.str();
30}

◆ CombineLangModel()

TESS_UNICHARSET_TRAINING_API int tesseract::CombineLangModel ( const UNICHARSET unicharset,
const std::string &  script_dir,
const std::string &  version_str,
const std::string &  output_dir,
const std::string &  lang,
bool  pass_through_recoder,
const std::vector< std::string > &  words,
const std::vector< std::string > &  puncs,
const std::vector< std::string > &  numbers,
bool  lang_is_rtl,
FileReader  reader,
FileWriter  writer 
)

Definition at line 194 of file lang_model_helpers.cpp.

199 {
200 // Build the traineddata file.
201 TessdataManager traineddata;
202 if (!version_str.empty()) {
203 traineddata.SetVersionString(traineddata.VersionString() + ":" + version_str);
204 }
205 // Unicharset and recoder.
206 if (!WriteUnicharset(unicharset, output_dir, lang, writer, &traineddata)) {
207 tprintf("Error writing unicharset!!\n");
208 return EXIT_FAILURE;
209 } else {
210 tprintf("Config file is optional, continuing...\n");
211 }
212 // If there is a config file, read it and add to traineddata.
213 std::string config_filename = script_dir + "/" + lang + "/" + lang + ".config";
214 std::string config_file = ReadFile(config_filename, reader);
215 if (config_file.length() > 0) {
216 traineddata.OverwriteEntry(TESSDATA_LANG_CONFIG, &config_file[0], config_file.length());
217 }
218 std::string radical_filename = script_dir + "/radical-stroke.txt";
219 std::string radical_data = ReadFile(radical_filename, reader);
220 if (radical_data.empty()) {
221 tprintf("Error reading radical code table %s\n", radical_filename.c_str());
222 return EXIT_FAILURE;
223 }
224 if (!WriteRecoder(unicharset, pass_through_recoder, output_dir, lang, writer, &radical_data,
225 &traineddata)) {
226 tprintf("Error writing recoder!!\n");
227 }
228 if (!words.empty() || !puncs.empty() || !numbers.empty()) {
229 if (!WriteDawgs(words, puncs, numbers, lang_is_rtl, unicharset, &traineddata)) {
230 tprintf("Error during conversion of wordlists to DAWGs!!\n");
231 return EXIT_FAILURE;
232 }
233 }
234
235 // Traineddata file.
236 std::vector<char> traineddata_data;
237 traineddata.Serialize(&traineddata_data);
238 if (!WriteFile(output_dir, lang, ".traineddata", traineddata_data, writer)) {
239 tprintf("Error writing output traineddata file!!\n");
240 return EXIT_FAILURE;
241 }
242 tprintf("Created %s/%s/%s.traineddata", output_dir.c_str(), lang.c_str(), lang.c_str());
243 return EXIT_SUCCESS;
244}
bool WriteRecoder(const UNICHARSET &unicharset, bool pass_through, const std::string &output_dir, const std::string &lang, FileWriter writer, std::string *radical_table_data, TessdataManager *traineddata)
bool WriteFile(const std::string &output_dir, const std::string &lang, const std::string &suffix, const std::vector< char > &data, FileWriter writer)
bool WriteUnicharset(const UNICHARSET &unicharset, const std::string &output_dir, const std::string &lang, FileWriter writer, TessdataManager *traineddata)
std::string ReadFile(const std::string &filename, FileReader reader)
void OverwriteEntry(TessdataType type, const char *data, int size)
std::string VersionString() const
void SetVersionString(const std::string &v_str)
void Serialize(std::vector< char > *data) const

◆ complete_edge()

void tesseract::complete_edge ( CRACKEDGE start,
C_OUTLINE_IT *  outline_it 
)

Definition at line 38 of file edgloop.cpp.

39 {
40 ScrollView::Color colour; // colour to draw in
41 int16_t looplength; // steps in loop
42 ICOORD botleft; // bounding box
43 ICOORD topright;
44 C_OUTLINE *outline; // new outline
45
46 // check length etc.
47 colour = check_path_legal(start);
48
49 if (colour == ScrollView::RED || colour == ScrollView::BLUE) {
50 looplength = loop_bounding_box(start, botleft, topright);
51 outline = new C_OUTLINE(start, botleft, topright, looplength);
52 // add to list
53 outline_it->add_after_then_move(outline);
54 }
55}
int16_t loop_bounding_box(CRACKEDGE *&start, ICOORD &botleft, ICOORD &topright)
Definition: edgloop.cpp:117
ScrollView::Color check_path_legal(CRACKEDGE *start)
Definition: edgloop.cpp:67

◆ compute_block_pitch()

void tesseract::compute_block_pitch ( TO_BLOCK block,
FCOORD  rotation,
int32_t  block_index,
bool  testing_on 
)

Definition at line 293 of file topitch.cpp.

296 { // correct orientation
297 TBOX block_box; // bounding box
298
299 block_box = block->block->pdblk.bounding_box();
300 if (testing_on && textord_debug_pitch_test) {
301 tprintf("Block %d at (%d,%d)->(%d,%d)\n", block_index, block_box.left(), block_box.bottom(),
302 block_box.right(), block_box.top());
303 }
304 block->min_space = static_cast<int32_t>(floor(block->xheight * textord_words_default_minspace));
305 block->max_nonspace = static_cast<int32_t>(ceil(block->xheight * textord_words_default_nonspace));
306 block->fixed_pitch = 0.0f;
307 block->space_size = static_cast<float>(block->min_space);
308 block->kern_size = static_cast<float>(block->max_nonspace);
311 if (!block->get_rows()->empty()) {
312 ASSERT_HOST(block->xheight > 0);
314#ifndef GRAPHICS_DISABLED
315 if (textord_show_initial_words && testing_on) {
316 // overlap_picture_ops(true);
317 ScrollView::Update();
318 }
319#endif
320 compute_rows_pitch(block, block_index, textord_debug_pitch_test && testing_on);
321 }
322}
double textord_words_default_nonspace
Definition: tovars.cpp:36
void find_repeated_chars(TO_BLOCK *block, bool testing_on)
Definition: topitch.cpp:1660
bool textord_debug_pitch_test
Definition: topitch.cpp:42
double words_default_prop_nonspace
Definition: tovars.cpp:48
bool textord_show_initial_words
Definition: tovars.cpp:25
double textord_words_default_minspace
Definition: tovars.cpp:34
double textord_spacesize_ratioprop
Definition: tovars.cpp:52
bool compute_rows_pitch(TO_BLOCK *block, int32_t block_index, bool testing_on)
Definition: topitch.cpp:330
int32_t min_space
Definition: blobbox.h:796
int32_t max_nonspace
Definition: blobbox.h:797

◆ compute_dropout_distances()

void tesseract::compute_dropout_distances ( int32_t *  occupation,
int32_t *  thresholds,
int32_t  line_count 
)

Definition at line 933 of file makerow.cpp.

937 {
938 int32_t line_index; // of thresholds line
939 int32_t distance; // from prev dropout
940 int32_t next_dist; // to next dropout
941 int32_t back_index; // for back filling
942 int32_t prev_threshold; // before overwrite
943
944 distance = -line_count;
945 line_index = 0;
946 do {
947 do {
948 distance--;
949 prev_threshold = thresholds[line_index];
950 // distance from prev
951 thresholds[line_index] = distance;
952 line_index++;
953 } while (line_index < line_count && (occupation[line_index] < thresholds[line_index] ||
954 occupation[line_index - 1] >= prev_threshold));
955 if (line_index < line_count) {
956 back_index = line_index - 1;
957 next_dist = 1;
958 while (next_dist < -distance && back_index >= 0) {
959 thresholds[back_index] = next_dist;
960 back_index--;
961 next_dist++;
962 distance++;
963 }
964 distance = 1;
965 }
966 } while (line_index < line_count);
967}
UnicodeText::const_iterator::difference_type distance(const UnicodeText::const_iterator &first, const UnicodeText::const_iterator &last)
Definition: unicodetext.cc:44

◆ compute_fixed_pitch()

void tesseract::compute_fixed_pitch ( ICOORD  page_tr,
TO_BLOCK_LIST *  port_blocks,
float  gradient,
FCOORD  rotation,
bool  testing_on 
)

Definition at line 75 of file topitch.cpp.

79 { // correct orientation
80 TO_BLOCK_IT block_it; // iterator
81 TO_BLOCK *block; // current block;
82 TO_ROW *row; // current row
83 int block_index; // block number
84 int row_index; // row number
85
86#ifndef GRAPHICS_DISABLED
87 if (textord_show_initial_words && testing_on) {
88 if (to_win == nullptr) {
89 create_to_win(page_tr);
90 }
91 }
92#endif
93
94 block_it.set_to_list(port_blocks);
95 block_index = 1;
96 for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
97 block = block_it.data();
98 compute_block_pitch(block, rotation, block_index, testing_on);
99 block_index++;
100 }
101
102 if (!try_doc_fixed(page_tr, port_blocks, gradient)) {
103 block_index = 1;
104 for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
105 block = block_it.data();
106 if (!try_block_fixed(block, block_index)) {
107 try_rows_fixed(block, block_index, testing_on);
108 }
109 block_index++;
110 }
111 }
112
113 block_index = 1;
114 for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
115 block = block_it.data();
116 POLY_BLOCK *pb = block->block->pdblk.poly_block();
117 if (pb != nullptr && !pb->IsText()) {
118 continue; // Non-text doesn't exist!
119 }
120 // row iterator
121 TO_ROW_IT row_it(block->get_rows());
122 row_index = 1;
123 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
124 row = row_it.data();
125 fix_row_pitch(row, block, port_blocks, row_index, block_index);
126 row_index++;
127 }
128 block_index++;
129 }
130#ifndef GRAPHICS_DISABLED
131 if (textord_show_initial_words && testing_on) {
132 ScrollView::Update();
133 }
134#endif
135}
bool try_block_fixed(TO_BLOCK *block, int32_t block_index)
Definition: topitch.cpp:502
void compute_block_pitch(TO_BLOCK *block, FCOORD rotation, int32_t block_index, bool testing_on)
Definition: topitch.cpp:293
bool try_rows_fixed(TO_BLOCK *block, int32_t block_index, bool testing_on)
Definition: topitch.cpp:515
void fix_row_pitch(TO_ROW *bad_row, TO_BLOCK *bad_block, TO_BLOCK_LIST *blocks, int32_t row_target, int32_t block_target)
Definition: topitch.cpp:144
bool try_doc_fixed(ICOORD page_tr, TO_BLOCK_LIST *port_blocks, float gradient)
Definition: topitch.cpp:371
POLY_BLOCK * poly_block() const
Definition: pdblock.h:59

◆ compute_fixed_pitch_cjk()

void tesseract::compute_fixed_pitch_cjk ( ICOORD  page_tr,
TO_BLOCK_LIST *  port_blocks 
)

Definition at line 1103 of file cjkpitch.cpp.

1103 {
1104 FPAnalyzer analyzer(page_tr, port_blocks);
1105 if (analyzer.num_rows() == 0) {
1106 return;
1107 }
1108
1109 analyzer.Pass1Analyze();
1110 analyzer.EstimatePitch(true);
1111
1112 // Perform pass1 analysis again with the initial estimation of row
1113 // pitches, for better estimation.
1114 analyzer.Pass1Analyze();
1115 analyzer.EstimatePitch(true);
1116
1117 // Early exit if the page doesn't seem to contain fixed pitch rows.
1118 if (!analyzer.maybe_fixed_pitch()) {
1120 tprintf("Page doesn't seem to contain fixed pitch rows\n");
1121 }
1122 return;
1123 }
1124
1125 unsigned iteration = 0;
1126 do {
1127 analyzer.MergeFragments();
1128 analyzer.FinalizeLargeChars();
1129 analyzer.EstimatePitch(false);
1130 iteration++;
1131 } while (analyzer.Pass2Analyze() && iteration < analyzer.max_iteration());
1132
1134 tprintf("compute_fixed_pitch_cjk finished after %u iteration (limit=%u)\n", iteration,
1135 analyzer.max_iteration());
1136 }
1137
1138 analyzer.OutputEstimations();
1140 analyzer.DebugOutputResult();
1141 }
1142}

◆ compute_height_modes()

int32_t tesseract::compute_height_modes ( STATS heights,
int32_t  min_height,
int32_t  max_height,
int32_t *  modes,
int32_t  maxmodes 
)

Definition at line 1629 of file makerow.cpp.

1633 { // size of modes
1634 int32_t pile_count; // no in source pile
1635 int32_t src_count; // no of source entries
1636 int32_t src_index; // current entry
1637 int32_t least_count; // height of smalllest
1638 int32_t least_index; // index of least
1639 int32_t dest_count; // index in modes
1640
1641 src_count = max_height + 1 - min_height;
1642 dest_count = 0;
1643 least_count = INT32_MAX;
1644 least_index = -1;
1645 for (src_index = 0; src_index < src_count; src_index++) {
1646 pile_count = heights->pile_count(min_height + src_index);
1647 if (pile_count > 0) {
1648 if (dest_count < maxmodes) {
1649 if (pile_count < least_count) {
1650 // find smallest in array
1651 least_count = pile_count;
1652 least_index = dest_count;
1653 }
1654 modes[dest_count++] = min_height + src_index;
1655 } else if (pile_count >= least_count) {
1656 while (least_index < maxmodes - 1) {
1657 modes[least_index] = modes[least_index + 1];
1658 // shuffle up
1659 least_index++;
1660 }
1661 // new one on end
1662 modes[maxmodes - 1] = min_height + src_index;
1663 if (pile_count == least_count) {
1664 // new smallest
1665 least_index = maxmodes - 1;
1666 } else {
1667 least_count = heights->pile_count(modes[0]);
1668 least_index = 0;
1669 for (dest_count = 1; dest_count < maxmodes; dest_count++) {
1670 pile_count = heights->pile_count(modes[dest_count]);
1671 if (pile_count < least_count) {
1672 // find smallest
1673 least_count = pile_count;
1674 least_index = dest_count;
1675 }
1676 }
1677 }
1678 }
1679 }
1680 }
1681 return dest_count;
1682}

◆ compute_line_occupation()

void tesseract::compute_line_occupation ( TO_BLOCK block,
float  gradient,
int32_t  min_y,
int32_t  max_y,
int32_t *  occupation,
int32_t *  deltas 
)

Definition at line 799 of file makerow.cpp.

806 {
807 int32_t line_count; // maxy-miny+1
808 int32_t line_index; // of scan line
809 int index; // array index for daft compilers
810 TO_ROW *row; // current row
811 TO_ROW_IT row_it = block->get_rows();
812 BLOBNBOX *blob; // current blob
813 BLOBNBOX_IT blob_it; // iterator
814 float length; // of skew vector
815 TBOX blob_box; // bounding box
816 FCOORD rotation; // inverse of skew
817
818 line_count = max_y - min_y + 1;
819 length = std::sqrt(gradient * gradient + 1);
820 rotation = FCOORD(1 / length, -gradient / length);
821 for (line_index = 0; line_index < line_count; line_index++) {
822 deltas[line_index] = 0;
823 }
824 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
825 row = row_it.data();
826 blob_it.set_to_list(row->blob_list());
827 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
828 blob = blob_it.data();
829 blob_box = blob->bounding_box();
830 blob_box.rotate(rotation); // de-skew it
831 int32_t width = blob_box.right() - blob_box.left();
832 index = blob_box.bottom() - min_y;
833 ASSERT_HOST(index >= 0 && index < line_count);
834 // count transitions
835 deltas[index] += width;
836 index = blob_box.top() - min_y;
837 ASSERT_HOST(index >= 0 && index < line_count);
838 deltas[index] -= width;
839 }
840 }
841 occupation[0] = deltas[0];
842 for (line_index = 1; line_index < line_count; line_index++) {
843 occupation[line_index] = occupation[line_index - 1] + deltas[line_index];
844 }
845}
void rotate(const FCOORD &vec)
Definition: rect.h:210

◆ compute_occupation_threshold()

void tesseract::compute_occupation_threshold ( int32_t  low_window,
int32_t  high_window,
int32_t  line_count,
int32_t *  occupation,
int32_t *  thresholds 
)

compute_occupation_threshold

Compute thresholds for textline or not for the occupation array.

Definition at line 852 of file makerow.cpp.

858 {
859 int32_t line_index; // of thresholds line
860 int32_t low_index; // in occupation
861 int32_t high_index; // in occupation
862 int32_t sum; // current average
863 int32_t divisor; // to get thresholds
864 int32_t min_index; // of min occ
865 int32_t min_occ; // min in locality
866 int32_t test_index; // for finding min
867
868 divisor = static_cast<int32_t>(ceil((low_window + high_window) / textord_occupancy_threshold));
869 if (low_window + high_window < line_count) {
870 for (sum = 0, high_index = 0; high_index < low_window; high_index++) {
871 sum += occupation[high_index];
872 }
873 for (low_index = 0; low_index < high_window; low_index++, high_index++) {
874 sum += occupation[high_index];
875 }
876 min_occ = occupation[0];
877 min_index = 0;
878 for (test_index = 1; test_index < high_index; test_index++) {
879 if (occupation[test_index] <= min_occ) {
880 min_occ = occupation[test_index];
881 min_index = test_index; // find min in region
882 }
883 }
884 for (line_index = 0; line_index < low_window; line_index++) {
885 thresholds[line_index] = (sum - min_occ) / divisor + min_occ;
886 }
887 // same out to end
888 for (low_index = 0; high_index < line_count; low_index++, high_index++) {
889 sum -= occupation[low_index];
890 sum += occupation[high_index];
891 if (occupation[high_index] <= min_occ) {
892 // find min in region
893 min_occ = occupation[high_index];
894 min_index = high_index;
895 }
896 // lost min from region
897 if (min_index <= low_index) {
898 min_occ = occupation[low_index + 1];
899 min_index = low_index + 1;
900 for (test_index = low_index + 2; test_index <= high_index; test_index++) {
901 if (occupation[test_index] <= min_occ) {
902 min_occ = occupation[test_index];
903 // find min in region
904 min_index = test_index;
905 }
906 }
907 }
908 thresholds[line_index++] = (sum - min_occ) / divisor + min_occ;
909 }
910 } else {
911 min_occ = occupation[0];
912 min_index = 0;
913 for (sum = 0, low_index = 0; low_index < line_count; low_index++) {
914 if (occupation[low_index] < min_occ) {
915 min_occ = occupation[low_index];
916 min_index = low_index;
917 }
918 sum += occupation[low_index];
919 }
920 line_index = 0;
921 }
922 for (; line_index < line_count; line_index++) {
923 thresholds[line_index] = (sum - min_occ) / divisor + min_occ;
924 }
925 // same out to end
926}
double textord_occupancy_threshold
Definition: makerow.cpp:82

◆ compute_page_skew()

void tesseract::compute_page_skew ( TO_BLOCK_LIST *  blocks,
float &  page_m,
float &  page_err 
)

Definition at line 315 of file makerow.cpp.

319 {
320 int32_t row_count; // total rows
321 int32_t blob_count; // total_blobs
322 int32_t row_err; // integer error
323 int32_t row_index; // of total
324 TO_ROW *row; // current row
325 TO_BLOCK_IT block_it = blocks; // iterator
326
327 row_count = 0;
328 blob_count = 0;
329 for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
330 POLY_BLOCK *pb = block_it.data()->block->pdblk.poly_block();
331 if (pb != nullptr && !pb->IsText()) {
332 continue; // Pretend non-text blocks don't exist.
333 }
334 row_count += block_it.data()->get_rows()->length();
335 // count up rows
336 TO_ROW_IT row_it(block_it.data()->get_rows());
337 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
338 blob_count += row_it.data()->blob_list()->length();
339 }
340 }
341 if (row_count == 0) {
342 page_m = 0.0f;
343 page_err = 0.0f;
344 return;
345 }
346 // of rows
347 std::vector<float> gradients(blob_count);
348 // of rows
349 std::vector<float> errors(blob_count);
350
351 row_index = 0;
352 for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
353 POLY_BLOCK *pb = block_it.data()->block->pdblk.poly_block();
354 if (pb != nullptr && !pb->IsText()) {
355 continue; // Pretend non-text blocks don't exist.
356 }
357 TO_ROW_IT row_it(block_it.data()->get_rows());
358 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
359 row = row_it.data();
360 blob_count = row->blob_list()->length();
361 row_err = static_cast<int32_t>(std::ceil(row->line_error()));
362 if (row_err <= 0) {
363 row_err = 1;
364 }
365 if (textord_biased_skewcalc) {
366 blob_count /= row_err;
367 for (blob_count /= row_err; blob_count > 0; blob_count--) {
368 gradients[row_index] = row->line_m();
369 errors[row_index] = row->line_error();
370 row_index++;
371 }
372 } else if (blob_count >= textord_min_blobs_in_row) {
373 // get gradient
374 gradients[row_index] = row->line_m();
375 errors[row_index] = row->line_error();
376 row_index++;
377 }
378 }
379 }
380 if (row_index == 0) {
381 // desperate
382 for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
383 POLY_BLOCK *pb = block_it.data()->block->pdblk.poly_block();
384 if (pb != nullptr && !pb->IsText()) {
385 continue; // Pretend non-text blocks don't exist.
386 }
387 TO_ROW_IT row_it(block_it.data()->get_rows());
388 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
389 row = row_it.data();
390 gradients[row_index] = row->line_m();
391 errors[row_index] = row->line_error();
392 row_index++;
393 }
394 }
395 }
396 row_count = row_index;
397 row_index = static_cast<int32_t>(row_count * textord_skew_ile);
398 gradients.resize(row_count);
399 std::nth_element(gradients.begin(), gradients.begin() + row_index, gradients.end());
400 page_m = gradients[row_index];
401 row_index = static_cast<int32_t>(row_count * textord_skew_ile);
402 errors.resize(row_count);
403 std::nth_element(errors.begin(), errors.begin() + row_index, errors.end());
404 page_err = errors[row_index];
405}
double textord_skew_ile
Definition: makerow.cpp:72
int textord_min_blobs_in_row
Definition: makerow.cpp:66
float line_error() const
Definition: blobbox.h:586
float line_m() const
Definition: blobbox.h:580
bool IsText() const
Definition: polyblk.h:52

◆ compute_pitch_sd()

float tesseract::compute_pitch_sd ( TO_ROW row,
STATS projection,
int16_t  projection_left,
int16_t  projection_right,
float  space_size,
float  initial_pitch,
float &  sp_sd,
int16_t &  mid_cuts,
ICOORDELT_LIST *  row_cells,
bool  testing_on,
int16_t  start,
int16_t  end 
)

Definition at line 1289 of file topitch.cpp.

1302 {
1303 int16_t occupation; // no of cells in word.
1304 // blobs
1305 BLOBNBOX_IT blob_it = row->blob_list();
1306 BLOBNBOX_IT start_it; // start of word
1307 BLOBNBOX_IT plot_it; // for plotting
1308 int16_t blob_count; // no of blobs
1309 TBOX blob_box; // bounding box
1310 TBOX prev_box; // of super blob
1311 int32_t prev_right; // of word sync
1312 int scale_factor; // on scores for big words
1313 int32_t sp_count; // spaces
1314 FPSEGPT_LIST seg_list; // char cells
1315 FPSEGPT_IT seg_it; // iterator
1316 int16_t segpos; // position of segment
1317 int16_t cellpos; // previous cell boundary
1318 // iterator
1319 ICOORDELT_IT cell_it = row_cells;
1320 ICOORDELT *cell; // new cell
1321 double sqsum; // sum of squares
1322 double spsum; // of spaces
1323 double sp_var; // space error
1324 double word_sync; // result for word
1325 int32_t total_count; // total blobs
1326
1327 if ((pitsync_linear_version & 3) > 1) {
1328 word_sync = compute_pitch_sd2(row, projection, projection_left, projection_right, initial_pitch,
1329 occupation, mid_cuts, row_cells, testing_on, start, end);
1330 sp_sd = occupation;
1331 return word_sync;
1332 }
1333 mid_cuts = 0;
1334 cellpos = 0;
1335 total_count = 0;
1336 sqsum = 0;
1337 sp_count = 0;
1338 spsum = 0;
1339 prev_right = -1;
1340 if (blob_it.empty()) {
1341 return space_size * 10;
1342 }
1343#ifndef GRAPHICS_DISABLED
1344 if (testing_on && to_win != nullptr) {
1345 blob_box = blob_it.data()->bounding_box();
1346 projection->plot(to_win, projection_left, row->intercept(), 1.0f, -1.0f, ScrollView::CORAL);
1347 }
1348#endif
1349 start_it = blob_it;
1350 blob_count = 0;
1351 blob_box = box_next(&blob_it); // first blob
1352 blob_it.mark_cycle_pt();
1353 do {
1354 for (; blob_count > 0; blob_count--) {
1355 box_next(&start_it);
1356 }
1357 do {
1358 prev_box = blob_box;
1359 blob_count++;
1360 blob_box = box_next(&blob_it);
1361 } while (!blob_it.cycled_list() && blob_box.left() - prev_box.right() < space_size);
1362 plot_it = start_it;
1363 if (pitsync_linear_version & 3) {
1364 word_sync = check_pitch_sync2(&start_it, blob_count, static_cast<int16_t>(initial_pitch), 2,
1365 projection, projection_left, projection_right,
1366 row->xheight * textord_projection_scale, occupation, &seg_list,
1367 start, end);
1368 } else {
1369 word_sync = check_pitch_sync(&start_it, blob_count, static_cast<int16_t>(initial_pitch), 2,
1370 projection, &seg_list);
1371 }
1372 if (testing_on) {
1373 tprintf("Word ending at (%d,%d), len=%d, sync rating=%g, ", prev_box.right(), prev_box.top(),
1374 seg_list.length() - 1, word_sync);
1375 seg_it.set_to_list(&seg_list);
1376 for (seg_it.mark_cycle_pt(); !seg_it.cycled_list(); seg_it.forward()) {
1377 if (seg_it.data()->faked) {
1378 tprintf("(F)");
1379 }
1380 tprintf("%d, ", seg_it.data()->position());
1381 // tprintf("C=%g, s=%g, sq=%g\n",
1382 // seg_it.data()->cost_function(),
1383 // seg_it.data()->sum(),
1384 // seg_it.data()->squares());
1385 }
1386 tprintf("\n");
1387 }
1388#ifndef GRAPHICS_DISABLED
1389 if (textord_show_fixed_cuts && blob_count > 0 && to_win != nullptr) {
1390 plot_fp_cells2(to_win, ScrollView::GOLDENROD, row, &seg_list);
1391 }
1392#endif
1393 seg_it.set_to_list(&seg_list);
1394 if (prev_right >= 0) {
1395 sp_var = seg_it.data()->position() - prev_right;
1396 sp_var -= floor(sp_var / initial_pitch + 0.5) * initial_pitch;
1397 sp_var *= sp_var;
1398 spsum += sp_var;
1399 sp_count++;
1400 }
1401 for (seg_it.mark_cycle_pt(); !seg_it.cycled_list(); seg_it.forward()) {
1402 segpos = seg_it.data()->position();
1403 if (cell_it.empty() || segpos > cellpos + initial_pitch / 2) {
1404 // big gap
1405 while (!cell_it.empty() && segpos > cellpos + initial_pitch * 3 / 2) {
1406 cell = new ICOORDELT(cellpos + static_cast<int16_t>(initial_pitch), 0);
1407 cell_it.add_after_then_move(cell);
1408 cellpos += static_cast<int16_t>(initial_pitch);
1409 }
1410 // make new one
1411 cell = new ICOORDELT(segpos, 0);
1412 cell_it.add_after_then_move(cell);
1413 cellpos = segpos;
1414 } else if (segpos > cellpos - initial_pitch / 2) {
1415 cell = cell_it.data();
1416 // average positions
1417 cell->set_x((cellpos + segpos) / 2);
1418 cellpos = cell->x();
1419 }
1420 }
1421 seg_it.move_to_last();
1422 prev_right = seg_it.data()->position();
1424 scale_factor = (seg_list.length() - 2) / 2;
1425 if (scale_factor < 1) {
1426 scale_factor = 1;
1427 }
1428 } else {
1429 scale_factor = 1;
1430 }
1431 sqsum += word_sync * scale_factor;
1432 total_count += (seg_list.length() - 1) * scale_factor;
1433 seg_list.clear();
1434 } while (!blob_it.cycled_list());
1435 sp_sd = sp_count > 0 ? sqrt(spsum / sp_count) : 0;
1436 return total_count > 0 ? sqrt(sqsum / total_count) : space_size * 10;
1437}
void plot_fp_cells2(ScrollView *win, ScrollView::Color colour, TO_ROW *row, FPSEGPT_LIST *seg_list)
Definition: drawtord.cpp:353
bool textord_show_fixed_cuts
Definition: drawtord.cpp:35
float compute_pitch_sd2(TO_ROW *row, STATS *projection, int16_t projection_left, int16_t projection_right, float initial_pitch, int16_t &occupation, int16_t &mid_cuts, ICOORDELT_LIST *row_cells, bool testing_on, int16_t start, int16_t end)
Definition: topitch.cpp:1446
double textord_projection_scale
Definition: topitch.cpp:49
double check_pitch_sync2(BLOBNBOX_IT *blob_it, int16_t blob_count, int16_t pitch, int16_t pitch_error, STATS *projection, int16_t projection_left, int16_t projection_right, float projection_scale, int16_t &occupation_count, FPSEGPT_LIST *seg_list, int16_t start, int16_t end)
Definition: pithsync.cpp:292
bool textord_pitch_scalebigwords
Definition: tovars.cpp:45
double check_pitch_sync(BLOBNBOX_IT *blob_it, int16_t blob_count, int16_t pitch, int16_t pitch_error, STATS *projection, FPSEGPT_LIST *seg_list)
Definition: pitsync1.cpp:138
void set_x(TDimension xin)
rewrite function
Definition: points.h:67
void plot(ScrollView *window, float xorigin, float yorigin, float xscale, float yscale, ScrollView::Color colour) const
Definition: statistc.cpp:596

◆ compute_pitch_sd2()

float tesseract::compute_pitch_sd2 ( TO_ROW row,
STATS projection,
int16_t  projection_left,
int16_t  projection_right,
float  initial_pitch,
int16_t &  occupation,
int16_t &  mid_cuts,
ICOORDELT_LIST *  row_cells,
bool  testing_on,
int16_t  start,
int16_t  end 
)

Definition at line 1446 of file topitch.cpp.

1458 {
1459 // blobs
1460 BLOBNBOX_IT blob_it = row->blob_list();
1461 BLOBNBOX_IT plot_it;
1462 int16_t blob_count; // no of blobs
1463 TBOX blob_box; // bounding box
1464 FPSEGPT_LIST seg_list; // char cells
1465 FPSEGPT_IT seg_it; // iterator
1466 int16_t segpos; // position of segment
1467 // iterator
1468 ICOORDELT_IT cell_it = row_cells;
1469 ICOORDELT *cell; // new cell
1470 double word_sync; // result for word
1471
1472 mid_cuts = 0;
1473 if (blob_it.empty()) {
1474 occupation = 0;
1475 return initial_pitch * 10;
1476 }
1477#ifndef GRAPHICS_DISABLED
1478 if (testing_on && to_win != nullptr) {
1479 projection->plot(to_win, projection_left, row->intercept(), 1.0f, -1.0f, ScrollView::CORAL);
1480 }
1481#endif
1482 blob_count = 0;
1483 blob_it.mark_cycle_pt();
1484 do {
1485 // first blob
1486 blob_box = box_next(&blob_it);
1487 blob_count++;
1488 } while (!blob_it.cycled_list());
1489 plot_it = blob_it;
1490 word_sync = check_pitch_sync2(
1491 &blob_it, blob_count, static_cast<int16_t>(initial_pitch), 2, projection, projection_left,
1492 projection_right, row->xheight * textord_projection_scale, occupation, &seg_list, start, end);
1493 if (testing_on) {
1494 tprintf("Row ending at (%d,%d), len=%d, sync rating=%g, ", blob_box.right(), blob_box.top(),
1495 seg_list.length() - 1, word_sync);
1496 seg_it.set_to_list(&seg_list);
1497 for (seg_it.mark_cycle_pt(); !seg_it.cycled_list(); seg_it.forward()) {
1498 if (seg_it.data()->faked) {
1499 tprintf("(F)");
1500 }
1501 tprintf("%d, ", seg_it.data()->position());
1502 // tprintf("C=%g, s=%g, sq=%g\n",
1503 // seg_it.data()->cost_function(),
1504 // seg_it.data()->sum(),
1505 // seg_it.data()->squares());
1506 }
1507 tprintf("\n");
1508 }
1509#ifndef GRAPHICS_DISABLED
1510 if (textord_show_fixed_cuts && blob_count > 0 && to_win != nullptr) {
1511 plot_fp_cells2(to_win, ScrollView::GOLDENROD, row, &seg_list);
1512 }
1513#endif
1514 seg_it.set_to_list(&seg_list);
1515 for (seg_it.mark_cycle_pt(); !seg_it.cycled_list(); seg_it.forward()) {
1516 segpos = seg_it.data()->position();
1517 // make new one
1518 cell = new ICOORDELT(segpos, 0);
1519 cell_it.add_after_then_move(cell);
1520 if (seg_it.at_last()) {
1521 mid_cuts = seg_it.data()->cheap_cuts();
1522 }
1523 }
1524 seg_list.clear();
1525 return occupation > 0 ? sqrt(word_sync / occupation) : initial_pitch * 10;
1526}

◆ compute_reject_threshold()

float tesseract::compute_reject_threshold ( WERD_CHOICE word)

Definition at line 227 of file reject.cpp.

227 {
228 float threshold; // rejection threshold
229 float bestgap = 0.0f; // biggest gap
230 float gapstart; // bottom of gap
231
232 auto blob_count = word->length();
233 std::vector<float> ratings;
234 ratings.reserve(blob_count);
235 for (unsigned i = 0; i < blob_count; ++i) {
236 ratings.push_back(word->certainty(i));
237 }
238 std::sort(ratings.begin(), ratings.end());
239 gapstart = ratings[0] - 1; // all reject if none better
240 if (blob_count >= 3) {
241 for (unsigned index = 0; index < blob_count - 1; index++) {
242 if (ratings[index + 1] - ratings[index] > bestgap) {
243 bestgap = ratings[index + 1] - ratings[index];
244 // find biggest
245 gapstart = ratings[index];
246 }
247 }
248 }
249 threshold = gapstart + bestgap / 2;
250
251 return threshold;
252}
float certainty() const
Definition: ratngs.h:315
unsigned length() const
Definition: ratngs.h:287

◆ compute_row_descdrop()

int32_t tesseract::compute_row_descdrop ( TO_ROW row,
float  gradient,
int  xheight_blob_count,
STATS asc_heights 
)

Definition at line 1576 of file makerow.cpp.

1577 {
1578 // Count how many potential ascenders are in this row.
1579 int i_min = asc_heights->min_bucket();
1580 if ((i_min / row->xheight) < textord_ascx_ratio_min) {
1581 i_min = static_cast<int>(floor(row->xheight * textord_ascx_ratio_min + 0.5));
1582 }
1583 int i_max = asc_heights->max_bucket();
1584 if ((i_max / row->xheight) > textord_ascx_ratio_max) {
1585 i_max = static_cast<int>(floor(row->xheight * textord_ascx_ratio_max));
1586 }
1587 int num_potential_asc = 0;
1588 for (int i = i_min; i <= i_max; ++i) {
1589 num_potential_asc += asc_heights->pile_count(i);
1590 }
1591 auto min_height = static_cast<int32_t>(floor(row->xheight * textord_descx_ratio_min + 0.5));
1592 auto max_height = static_cast<int32_t>(floor(row->xheight * textord_descx_ratio_max));
1593 float xcentre; // centre of blob
1594 float height; // height of blob
1595 BLOBNBOX_IT blob_it = row->blob_list();
1596 BLOBNBOX *blob; // current blob
1597 STATS heights(min_height, max_height);
1598 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
1599 blob = blob_it.data();
1600 if (!blob->joined_to_prev()) {
1601 xcentre = (blob->bounding_box().left() + blob->bounding_box().right()) / 2.0f;
1602 height = (gradient * xcentre + row->parallel_c() - blob->bounding_box().bottom());
1603 if (height >= min_height && height <= max_height) {
1604 heights.add(static_cast<int>(floor(height + 0.5)), 1);
1605 }
1606 }
1607 }
1608 int blob_index = heights.mode(); // find mode
1609 int blob_count = heights.pile_count(blob_index); // get count of mode
1610 float total_fraction = (textord_descheight_mode_fraction + textord_ascheight_mode_fraction);
1611 if (static_cast<float>(blob_count + num_potential_asc) < xheight_blob_count * total_fraction) {
1612 blob_count = 0;
1613 }
1614 int descdrop = blob_count > 0 ? -blob_index : 0;
1616 tprintf("Descdrop: %d (potential ascenders %d, descenders %d)\n", descdrop, num_potential_asc,
1617 blob_count);
1618 heights.print();
1619 }
1620 return descdrop;
1621}
double textord_ascheight_mode_fraction
Definition: makerow.cpp:87
double textord_descx_ratio_min
Definition: makerow.cpp:91
bool textord_debug_xheights
Definition: makerow.cpp:59
double textord_ascx_ratio_max
Definition: makerow.cpp:90
double textord_ascx_ratio_min
Definition: makerow.cpp:89
double textord_descx_ratio_max
Definition: makerow.cpp:92
float parallel_c() const
Definition: blobbox.h:589
int32_t min_bucket() const
Definition: statistc.cpp:204
int32_t max_bucket() const
Definition: statistc.cpp:221

◆ compute_row_stats()

void tesseract::compute_row_stats ( TO_BLOCK block,
bool  testing_on 
)

Definition at line 1163 of file makerow.cpp.

1166 {
1167 int32_t row_index; // of median
1168 TO_ROW *row; // current row
1169 TO_ROW *prev_row; // previous row
1170 float iqr; // inter quartile range
1171 TO_ROW_IT row_it = block->get_rows();
1172 // number of rows
1173 int16_t rowcount = row_it.length();
1174 // for choose nth
1175 std::vector<TO_ROW *> rows(rowcount);
1176 rowcount = 0;
1177 prev_row = nullptr;
1178 row_it.move_to_last(); // start at bottom
1179 do {
1180 row = row_it.data();
1181 if (prev_row != nullptr) {
1182 rows[rowcount++] = prev_row;
1183 prev_row->spacing = row->intercept() - prev_row->intercept();
1184 if (prev_row->spacing < 0.1 && prev_row->spacing > -0.1) {
1185 // Avoid small spacing values which give a small disp_quant_factor_.
1186 // That can cause large memory allocations with out-of-memory.
1187 prev_row->spacing = 0;
1188 }
1189 if (testing_on) {
1190 tprintf("Row at %g yields spacing of %g\n", row->intercept(), prev_row->spacing);
1191 }
1192 }
1193 prev_row = row;
1194 row_it.backward();
1195 } while (!row_it.at_last());
1196 block->key_row = prev_row;
1197 block->baseline_offset = std::fmod(prev_row->parallel_c(), block->line_spacing);
1198 if (testing_on) {
1199 tprintf("Blob based spacing=(%g,%g), offset=%g", block->line_size, block->line_spacing,
1200 block->baseline_offset);
1201 }
1202 if (rowcount > 0) {
1203 rows.resize(rowcount);
1204 row_index = rowcount * 3 / 4;
1205 std::nth_element(rows.begin(), rows.begin() + row_index, rows.end(), row_spacing_order);
1206 iqr = rows[row_index]->spacing;
1207 row_index = rowcount / 4;
1208 std::nth_element(rows.begin(), rows.begin() + row_index, rows.end(), row_spacing_order);
1209 iqr -= rows[row_index]->spacing;
1210 row_index = rowcount / 2;
1211 std::nth_element(rows.begin(), rows.begin() + row_index, rows.end(), row_spacing_order);
1212 block->key_row = rows[row_index];
1213 if (testing_on) {
1214 tprintf(" row based=%g(%g)", rows[row_index]->spacing, iqr);
1215 }
1216 if (rowcount > 2 && iqr < rows[row_index]->spacing * textord_linespace_iqrlimit) {
1218 if (rows[row_index]->spacing < block->line_spacing &&
1219 rows[row_index]->spacing > block->line_size) {
1220 // within range
1221 block->line_size = rows[row_index]->spacing;
1222 // spacing=size
1223 } else if (rows[row_index]->spacing > block->line_spacing) {
1224 block->line_size = block->line_spacing;
1225 }
1226 // too big so use max
1227 } else {
1228 if (rows[row_index]->spacing < block->line_spacing) {
1229 block->line_size = rows[row_index]->spacing;
1230 } else {
1231 block->line_size = block->line_spacing;
1232 }
1233 // too big so use max
1234 }
1235 if (block->line_size < textord_min_xheight) {
1236 block->line_size = (float)textord_min_xheight;
1237 }
1238 block->line_spacing = rows[row_index]->spacing;
1240 }
1241 block->baseline_offset = std::fmod(rows[row_index]->intercept(), block->line_spacing);
1242 }
1243 if (testing_on) {
1244 tprintf("\nEstimate line size=%g, spacing=%g, offset=%g\n", block->line_size,
1245 block->line_spacing, block->baseline_offset);
1246 }
1247}
int textord_min_xheight
Definition: makerow.cpp:70
double textord_excess_blobsize
Definition: makerow.cpp:81
bool textord_new_initial_xheight
Definition: makerow.cpp:95
double textord_linespace_iqrlimit
Definition: makerow.cpp:74
float baseline_offset
Definition: blobbox.h:791
TO_ROW * key_row
Definition: blobbox.h:802

◆ compute_rows_pitch()

bool tesseract::compute_rows_pitch ( TO_BLOCK block,
int32_t  block_index,
bool  testing_on 
)

Definition at line 330 of file topitch.cpp.

334 {
335 int32_t maxwidth; // of spaces
336 TO_ROW *row; // current row
337 int32_t row_index; // row number.
338 float lower, upper; // cluster thresholds
339 TO_ROW_IT row_it = block->get_rows();
340
341 row_index = 1;
342 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
343 row = row_it.data();
344 ASSERT_HOST(row->xheight > 0);
346 maxwidth = static_cast<int32_t>(ceil(row->xheight * textord_words_maxspace));
347 if (row_pitch_stats(row, maxwidth, testing_on) &&
348 find_row_pitch(row, maxwidth, textord_dotmatrix_gap + 1, block, block_index, row_index,
349 testing_on)) {
350 if (row->fixed_pitch == 0) {
351 lower = row->pr_nonsp;
352 upper = row->pr_space;
353 row->space_size = upper;
354 row->kern_size = lower;
355 }
356 } else {
357 row->fixed_pitch = 0.0f; // insufficient data
359 }
360 row_index++;
361 }
362 return false;
363}
int textord_dotmatrix_gap
Definition: tovars.cpp:28
bool find_row_pitch(TO_ROW *row, int32_t maxwidth, int32_t dm_gap, TO_BLOCK *block, int32_t block_index, int32_t row_index, bool testing_on)
Definition: topitch.cpp:784
double textord_words_maxspace
Definition: tovars.cpp:32
bool row_pitch_stats(TO_ROW *row, int32_t maxwidth, bool testing_on)
Definition: topitch.cpp:648
float space_size
Definition: blobbox.h:673
float fixed_pitch
Definition: blobbox.h:657
void compute_vertical_projection()
Definition: blobbox.cpp:799
PITCH_TYPE pitch_decision
Definition: blobbox.h:656

◆ compute_xheight_from_modes()

int tesseract::compute_xheight_from_modes ( STATS heights,
STATS floating_heights,
bool  cap_only,
int  min_height,
int  max_height,
float *  xheight,
float *  ascrise 
)

Definition at line 1480 of file makerow.cpp.

1481 {
1482 int blob_index = heights->mode(); // find mode
1483 int blob_count = heights->pile_count(blob_index); // get count of mode
1485 tprintf("min_height=%d, max_height=%d, mode=%d, count=%d, total=%d\n", min_height, max_height,
1486 blob_index, blob_count, heights->get_total());
1487 heights->print();
1488 floating_heights->print();
1489 }
1490 if (blob_count == 0) {
1491 return 0;
1492 }
1493 int modes[MAX_HEIGHT_MODES]; // biggest piles
1494 bool in_best_pile = false;
1495 int prev_size = -INT32_MAX;
1496 int best_count = 0;
1497 int mode_count = compute_height_modes(heights, min_height, max_height, modes, MAX_HEIGHT_MODES);
1498 if (cap_only && mode_count > 1) {
1499 mode_count = 1;
1500 }
1501 int x;
1503 tprintf("found %d modes: ", mode_count);
1504 for (x = 0; x < mode_count; x++) {
1505 tprintf("%d ", modes[x]);
1506 }
1507 tprintf("\n");
1508 }
1509
1510 for (x = 0; x < mode_count - 1; x++) {
1511 if (modes[x] != prev_size + 1) {
1512 in_best_pile = false; // had empty height
1513 }
1514 int modes_x_count = heights->pile_count(modes[x]) - floating_heights->pile_count(modes[x]);
1515 if ((modes_x_count >= blob_count * textord_xheight_mode_fraction) &&
1516 (in_best_pile || modes_x_count > best_count)) {
1517 for (int asc = x + 1; asc < mode_count; asc++) {
1518 float ratio = static_cast<float>(modes[asc]) / static_cast<float>(modes[x]);
1519 if (textord_ascx_ratio_min < ratio && ratio < textord_ascx_ratio_max &&
1520 (heights->pile_count(modes[asc]) >= blob_count * textord_ascheight_mode_fraction)) {
1521 if (modes_x_count > best_count) {
1522 in_best_pile = true;
1523 best_count = modes_x_count;
1524 }
1526 tprintf("X=%d, asc=%d, count=%d, ratio=%g\n", modes[x], modes[asc] - modes[x],
1527 modes_x_count, ratio);
1528 }
1529 prev_size = modes[x];
1530 *xheight = static_cast<float>(modes[x]);
1531 *ascrise = static_cast<float>(modes[asc] - modes[x]);
1532 }
1533 }
1534 }
1535 }
1536 if (*xheight == 0) { // single mode
1537 // Remove counts of the "floating" blobs (the one whose height is too
1538 // small in relation to it's top end of the bounding box) from heights
1539 // before computing the single-mode xheight.
1540 // Restore the counts in heights after the mode is found, since
1541 // floating blobs might be useful for determining potential ascenders
1542 // in compute_row_descdrop().
1543 if (floating_heights->get_total() > 0) {
1544 for (x = min_height; x < max_height; ++x) {
1545 heights->add(x, -(floating_heights->pile_count(x)));
1546 }
1547 blob_index = heights->mode(); // find the modified mode
1548 for (x = min_height; x < max_height; ++x) {
1549 heights->add(x, floating_heights->pile_count(x));
1550 }
1551 }
1552 *xheight = static_cast<float>(blob_index);
1553 *ascrise = 0.0f;
1554 best_count = heights->pile_count(blob_index);
1556 tprintf("Single mode xheight set to %g\n", *xheight);
1557 }
1558 } else if (textord_debug_xheights) {
1559 tprintf("Multi-mode xheight set to %g, asc=%g\n", *xheight, *ascrise);
1560 }
1561 return best_count;
1562}
#define MAX_HEIGHT_MODES
Definition: makerow.cpp:98
int32_t compute_height_modes(STATS *heights, int32_t min_height, int32_t max_height, int32_t *modes, int32_t maxmodes)
Definition: makerow.cpp:1629
double textord_xheight_mode_fraction
Definition: makerow.cpp:86
void print() const
Definition: statistc.cpp:547
void add(int32_t value, int32_t count)
Definition: statistc.cpp:99
int32_t get_total() const
Definition: statistc.h:85
int32_t mode() const
Definition: statistc.cpp:112

◆ ComputeBlobCenter()

void tesseract::ComputeBlobCenter ( TBLOB Blob,
TPOINT BlobCenter 
)

Public Function Prototypes

◆ ComputeDirection()

void tesseract::ComputeDirection ( MFEDGEPT Start,
MFEDGEPT Finish,
float  MinSlope,
float  MaxSlope 
)

This routine computes the slope from Start to Finish and and then computes the approximate direction of the line segment from Start to Finish. The direction is quantized into 8 buckets: N, S, E, W, NE, NW, SE, SW Both the slope and the direction are then stored into the appropriate fields of the Start edge point. The direction is also stored into the PreviousDirection field of the Finish edge point.

Parameters
Startstarting point to compute direction from
Finishfinishing point to compute direction to
MinSlopeslope below which lines are horizontal
MaxSlopeslope above which lines are vertical

Definition at line 335 of file mfoutline.cpp.

335 {
336 FVECTOR Delta;
337
338 Delta.x = Finish->Point.x - Start->Point.x;
339 Delta.y = Finish->Point.y - Start->Point.y;
340 if (Delta.x == 0) {
341 if (Delta.y < 0) {
342 Start->Slope = -FLT_MAX;
343 Start->Direction = south;
344 } else {
345 Start->Slope = FLT_MAX;
346 Start->Direction = north;
347 }
348 } else {
349 Start->Slope = Delta.y / Delta.x;
350 if (Delta.x > 0) {
351 if (Delta.y > 0) {
352 if (Start->Slope > MinSlope) {
353 if (Start->Slope < MaxSlope) {
354 Start->Direction = northeast;
355 } else {
356 Start->Direction = north;
357 }
358 } else {
359 Start->Direction = east;
360 }
361 } else if (Start->Slope < -MinSlope) {
362 if (Start->Slope > -MaxSlope) {
363 Start->Direction = southeast;
364 } else {
365 Start->Direction = south;
366 }
367 } else {
368 Start->Direction = east;
369 }
370 } else if (Delta.y > 0) {
371 if (Start->Slope < -MinSlope) {
372 if (Start->Slope > -MaxSlope) {
373 Start->Direction = northwest;
374 } else {
375 Start->Direction = north;
376 }
377 } else {
378 Start->Direction = west;
379 }
380 } else if (Start->Slope > MinSlope) {
381 if (Start->Slope < MaxSlope) {
382 Start->Direction = southwest;
383 } else {
384 Start->Direction = south;
385 }
386 } else {
387 Start->Direction = west;
388 }
389 }
390 Finish->PreviousDirection = Start->Direction;
391}
Definition: fpoint.h:29
DIRECTION Direction
Definition: mfoutline.h:47
DIRECTION PreviousDirection
Definition: mfoutline.h:48

◆ ComputeDistance()

TESS_API float tesseract::ComputeDistance ( int  k,
PARAM_DESC dim,
float  p1[],
float  p2[] 
)

Definition at line 400 of file kdtree.cpp.

400 {
401 return std::sqrt(DistanceSquared(k, dim, p1, p2));
402}
float DistanceSquared(int k, PARAM_DESC *dim, float p1[], float p2[])
Definition: kdtree.cpp:378

◆ contains()

template<class T >
bool tesseract::contains ( const std::vector< T > &  data,
const T &  value 
)
inline

Definition at line 39 of file helpers.h.

39 {
40 return std::find(data.begin(), data.end(), value) != data.end();
41}
int value

◆ ConvertBlob()

LIST tesseract::ConvertBlob ( TBLOB blob)

Convert a blob into a list of MFOUTLINEs (float-based microfeature format).

Definition at line 34 of file mfoutline.cpp.

34 {
35 LIST outlines = NIL_LIST;
36 return (blob == nullptr) ? NIL_LIST : ConvertOutlines(blob->outlines, outlines, outer);
37}
LIST ConvertOutlines(TESSLINE *outline, LIST mf_outlines, OUTLINETYPE outline_type)
Definition: mfoutline.cpp:79
TESSLINE * outlines
Definition: blobs.h:404

◆ ConvertConfig()

void tesseract::ConvertConfig ( BIT_VECTOR  Config,
int  ConfigId,
INT_CLASS_STRUCT Class 
)

This operation updates the config vectors of all protos in Class to indicate that the protos with 1's in Config belong to a new configuration identified by ConfigId. It is assumed that the length of the Config bit vector is equal to the number of protos in Class.

Parameters
Configconfig to be added to class
ConfigIdid to be used for new config
Classclass to add new config to

Definition at line 430 of file intproto.cpp.

430 {
431 int ProtoId;
432 INT_PROTO_STRUCT *Proto;
433 int TotalLength;
434
435 for (ProtoId = 0, TotalLength = 0; ProtoId < Class->NumProtos; ProtoId++) {
436 if (test_bit(Config, ProtoId)) {
437 Proto = ProtoForProtoId(Class, ProtoId);
438 SET_BIT(Proto->Configs, ConfigId);
439 TotalLength += Class->ProtoLengths[ProtoId];
440 }
441 }
442 Class->ConfigLengths[ConfigId] = TotalLength;
443} /* ConvertConfig */
#define test_bit(array, bit)
Definition: bitvec.h:59
#define SET_BIT(array, bit)
Definition: bitvec.h:55
uint32_t Configs[WERDS_PER_CONFIG_VEC]
Definition: intproto.h:81

◆ ConvertOutline()

MFOUTLINE tesseract::ConvertOutline ( TESSLINE outline)

Convert a TESSLINE into the float-based MFOUTLINE micro-feature format.

Definition at line 41 of file mfoutline.cpp.

41 {
42 auto MFOutline = NIL_LIST;
43
44 if (outline == nullptr || outline->loop == nullptr) {
45 return MFOutline;
46 }
47
48 auto StartPoint = outline->loop;
49 auto EdgePoint = StartPoint;
50 do {
51 auto NextPoint = EdgePoint->next;
52
53 /* filter out duplicate points */
54 if (EdgePoint->pos.x != NextPoint->pos.x || EdgePoint->pos.y != NextPoint->pos.y) {
55 auto NewPoint = new MFEDGEPT;
56 NewPoint->ClearMark();
57 NewPoint->Hidden = EdgePoint->IsHidden();
58 NewPoint->Point.x = EdgePoint->pos.x;
59 NewPoint->Point.y = EdgePoint->pos.y;
60 MFOutline = push(MFOutline, NewPoint);
61 }
62 EdgePoint = NextPoint;
63 } while (EdgePoint != StartPoint);
64
65 if (MFOutline != nullptr) {
66 MakeOutlineCircular(MFOutline);
67 }
68 return MFOutline;
69}
EDGEPT * next
Definition: blobs.h:200
bool IsHidden() const
Definition: blobs.h:184
EDGEPT * loop
Definition: blobs.h:287

◆ ConvertOutlines()

LIST tesseract::ConvertOutlines ( TESSLINE outline,
LIST  mf_outlines,
OUTLINETYPE  outline_type 
)

Convert a tree of outlines to a list of MFOUTLINEs (lists of MFEDGEPTs).

Parameters
outlinefirst outline to be converted
mf_outlineslist to add converted outlines to
outline_typeare the outlines outer or holes?

Definition at line 79 of file mfoutline.cpp.

79 {
80 MFOUTLINE mf_outline;
81
82 while (outline != nullptr) {
83 mf_outline = ConvertOutline(outline);
84 if (mf_outline != nullptr) {
85 mf_outlines = push(mf_outlines, mf_outline);
86 }
87 outline = outline->next;
88 }
89 return mf_outlines;
90}
MFOUTLINE ConvertOutline(TESSLINE *outline)
Definition: mfoutline.cpp:41
TESSLINE * next
Definition: blobs.h:288

◆ ConvertSegmentToPicoFeat()

void tesseract::ConvertSegmentToPicoFeat ( FPOINT Start,
FPOINT End,
FEATURE_SET  FeatureSet 
)

This routine converts an entire segment of an outline into a set of pico features which are added to FeatureSet. The length of the segment is rounded to the nearest whole number of pico-features. The pico-features are spaced evenly over the entire segment. Results are placed in FeatureSet. Globals:

  • classify_pico_feature_length length of a single pico-feature
    Parameters
    Startstarting point of pico-feature
    Endending point of pico-feature
    FeatureSetset to add pico-feature to

Definition at line 95 of file picofeat.cpp.

95 {
96 float Angle;
97 float Length;
98 int NumFeatures;
99 FPOINT Center;
100 FPOINT Delta;
101 int i;
102
103 Angle = NormalizedAngleFrom(Start, End, 1.0);
104 Length = DistanceBetween(*Start, *End);
105 NumFeatures = static_cast<int>(floor(Length / classify_pico_feature_length + 0.5));
106 if (NumFeatures < 1) {
107 NumFeatures = 1;
108 }
109
110 /* compute vector for one pico feature */
111 Delta.x = XDelta(*Start, *End) / NumFeatures;
112 Delta.y = YDelta(*Start, *End) / NumFeatures;
113
114 /* compute position of first pico feature */
115 Center.x = Start->x + Delta.x / 2.0;
116 Center.y = Start->y + Delta.y / 2.0;
117
118 /* compute each pico feature in segment and add to feature set */
119 for (i = 0; i < NumFeatures; i++) {
120 auto Feature = new FEATURE_STRUCT(&PicoFeatDesc);
121 Feature->Params[PicoFeatDir] = Angle;
122 Feature->Params[PicoFeatX] = Center.x;
123 Feature->Params[PicoFeatY] = Center.y;
124 AddFeature(FeatureSet, Feature);
125
126 Center.x += Delta.x;
127 Center.y += Delta.y;
128 }
129} /* ConvertSegmentToPicoFeat */
#define XDelta(A, B)
Definition: fpoint.h:38
#define YDelta(A, B)
Definition: fpoint.h:39
TESS_API const FEATURE_DESC_STRUCT PicoFeatDesc
double classify_pico_feature_length
Definition: picofeat.cpp:37

◆ ConvertToMicroFeatures()

MICROFEATURES tesseract::ConvertToMicroFeatures ( MFOUTLINE  Outline,
MICROFEATURES  MicroFeatures 
)

Convert Outline to MicroFeatures

Parameters
Outlineoutline to extract micro-features from
MicroFeatureslist of micro-features to add to
Returns
List of micro-features with new features added to front.
Note
Globals: none

Definition at line 91 of file mfx.cpp.

91 {
92 MFOUTLINE Current;
93 MFOUTLINE Last;
94 MFOUTLINE First;
95
96 if (DegenerateOutline(Outline)) {
97 return (MicroFeatures);
98 }
99
100 First = NextExtremity(Outline);
101 Last = First;
102 do {
103 Current = NextExtremity(Last);
104 if (!PointAt(Current)->Hidden) {
105 auto NewFeature = ExtractMicroFeature(Last, Current);
106 MicroFeatures.push_front(NewFeature);
107 }
108 Last = Current;
109 } while (Last != First);
110
111 return MicroFeatures;
112} /* ConvertToMicroFeatures */
MicroFeature ExtractMicroFeature(MFOUTLINE Start, MFOUTLINE End)
Definition: mfx.cpp:127
MFOUTLINE NextExtremity(MFOUTLINE EdgePoint)
Definition: mfoutline.cpp:196

◆ ConvertToOutlineFeatures()

void tesseract::ConvertToOutlineFeatures ( MFOUTLINE  Outline,
FEATURE_SET  FeatureSet 
)

This routine steps converts each section in the specified outline to a feature described by its x,y position, length and angle. Results are returned in FeatureSet.

Parameters
Outlineoutline to extract outline-features from
FeatureSetset of features to add outline-features to

Definition at line 97 of file outfeat.cpp.

97 {
98 MFOUTLINE Next;
99 MFOUTLINE First;
100 FPOINT FeatureStart;
101 FPOINT FeatureEnd;
102
103 if (DegenerateOutline(Outline)) {
104 return;
105 }
106
107 First = Outline;
108 Next = First;
109 do {
110 FeatureStart = PointAt(Next)->Point;
111 Next = NextPointAfter(Next);
112
113 /* note that an edge is hidden if the ending point of the edge is
114 marked as hidden. This situation happens because the order of
115 the outlines is reversed when they are converted from the old
116 format. In the old format, a hidden edge is marked by the
117 starting point for that edge. */
118 if (!PointAt(Next)->Hidden) {
119 FeatureEnd = PointAt(Next)->Point;
120 AddOutlineFeatureToSet(&FeatureStart, &FeatureEnd, FeatureSet);
121 }
122 } while (Next != First);
123} /* ConvertToOutlineFeatures */
void AddOutlineFeatureToSet(FPOINT *Start, FPOINT *End, FEATURE_SET FeatureSet)
Definition: outfeat.cpp:78

◆ ConvertToPicoFeatures2()

void tesseract::ConvertToPicoFeatures2 ( MFOUTLINE  Outline,
FEATURE_SET  FeatureSet 
)

This routine steps through the specified outline and cuts it up into pieces of equal length. These pieces become the desired pico-features. Each segment in the outline is converted into an integral number of pico-features. Results are returned in FeatureSet.

Globals:

  • classify_pico_feature_length length of features to be extracted
    Parameters
    Outlineoutline to extract micro-features from
    FeatureSetset of features to add pico-features to

Definition at line 144 of file picofeat.cpp.

144 {
145 MFOUTLINE Next;
146 MFOUTLINE First;
147 MFOUTLINE Current;
148
149 if (DegenerateOutline(Outline)) {
150 return;
151 }
152
153 First = Outline;
154 Current = First;
155 Next = NextPointAfter(Current);
156 do {
157 /* note that an edge is hidden if the ending point of the edge is
158 marked as hidden. This situation happens because the order of
159 the outlines is reversed when they are converted from the old
160 format. In the old format, a hidden edge is marked by the
161 starting point for that edge. */
162 if (!(PointAt(Next)->Hidden)) {
163 ConvertSegmentToPicoFeat(&(PointAt(Current)->Point), &(PointAt(Next)->Point), FeatureSet);
164 }
165
166 Current = Next;
167 Next = NextPointAfter(Current);
168 } while (Current != First);
169
170} /* ConvertToPicoFeatures2 */
void ConvertSegmentToPicoFeat(FPOINT *Start, FPOINT *End, FEATURE_SET FeatureSet)
Definition: picofeat.cpp:95

◆ CopyVector()

void tesseract::CopyVector ( unsigned  n,
const TFloat src,
TFloat dest 
)
inline

Definition at line 210 of file functions.h.

210 {
211 memcpy(dest, src, n * sizeof(dest[0]));
212}

◆ correct_row_xheight()

void tesseract::correct_row_xheight ( TO_ROW row,
float  xheight,
float  ascrise,
float  descdrop 
)

Definition at line 1690 of file makerow.cpp.

1690 {
1691 ROW_CATEGORY row_category = get_row_category(row);
1693 tprintf(
1694 "correcting row xheight: row->xheight %.4f"
1695 ", row->acrise %.4f row->descdrop %.4f\n",
1696 row->xheight, row->ascrise, row->descdrop);
1697 }
1698 bool normal_xheight = within_error_margin(row->xheight, xheight, textord_xheight_error_margin);
1699 bool cap_xheight =
1701 // Use the average xheight/ascrise for the following cases:
1702 // -- the xheight of the row could not be determined at all
1703 // -- the row has descenders (e.g. "many groups", "ISBN 12345 p.3")
1704 // and its xheight is close to either cap height or average xheight
1705 // -- the row does not have ascenders or descenders, but its xheight
1706 // is close to the average block xheight (e.g. row with "www.mmm.com")
1707 if (row_category == ROW_ASCENDERS_FOUND) {
1708 if (row->descdrop >= 0) {
1709 row->descdrop = row->xheight * (descdrop / xheight);
1710 }
1711 } else if (row_category == ROW_INVALID ||
1712 (row_category == ROW_DESCENDERS_FOUND && (normal_xheight || cap_xheight)) ||
1713 (row_category == ROW_UNKNOWN && normal_xheight)) {
1715 tprintf("using average xheight\n");
1716 }
1717 row->xheight = xheight;
1718 row->ascrise = ascrise;
1719 row->descdrop = descdrop;
1720 } else if (row_category == ROW_DESCENDERS_FOUND) {
1721 // Assume this is a row with mostly lowercase letters and it's xheight
1722 // is computed correctly (unfortunately there is no way to distinguish
1723 // this from the case when descenders are found, but the most common
1724 // height is capheight).
1726 tprintf("lowercase, corrected ascrise\n");
1727 }
1728 row->ascrise = row->xheight * (ascrise / xheight);
1729 } else if (row_category == ROW_UNKNOWN) {
1730 // Otherwise assume this row is an all-caps or small-caps row
1731 // and adjust xheight and ascrise of the row.
1732
1733 row->all_caps = true;
1734 if (cap_xheight) { // regular all caps
1736 tprintf("all caps\n");
1737 }
1738 row->xheight = xheight;
1739 row->ascrise = ascrise;
1740 row->descdrop = descdrop;
1741 } else { // small caps or caps with an odd xheight
1743 if (row->xheight < xheight + ascrise && row->xheight > xheight) {
1744 tprintf("small caps\n");
1745 } else {
1746 tprintf("all caps with irregular xheight\n");
1747 }
1748 }
1749 row->ascrise = row->xheight * (ascrise / (xheight + ascrise));
1750 row->xheight -= row->ascrise;
1751 row->descdrop = row->xheight * (descdrop / xheight);
1752 }
1753 }
1755 tprintf(
1756 "corrected row->xheight = %.4f, row->acrise = %.4f, row->descdrop"
1757 " = %.4f\n",
1758 row->xheight, row->ascrise, row->descdrop);
1759 }
1760}
double textord_xheight_error_margin
Definition: makerow.cpp:93
ROW_CATEGORY
Definition: makerow.h:36
bool within_error_margin(float test, float num, float margin)
Definition: makerow.h:102
ROW_CATEGORY get_row_category(const TO_ROW *row)
Definition: makerow.h:94

◆ count_block_votes()

void tesseract::count_block_votes ( TO_BLOCK block,
int32_t &  def_fixed,
int32_t &  def_prop,
int32_t &  maybe_fixed,
int32_t &  maybe_prop,
int32_t &  corr_fixed,
int32_t &  corr_prop,
int32_t &  dunno 
)

Definition at line 606 of file topitch.cpp.

610 {
611 TO_ROW *row; // current row
612 TO_ROW_IT row_it = block->get_rows();
613
614 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
615 row = row_it.data();
616 switch (row->pitch_decision) {
617 case PITCH_DUNNO:
618 dunno++;
619 break;
620 case PITCH_DEF_PROP:
621 def_prop++;
622 break;
623 case PITCH_MAYBE_PROP:
624 maybe_prop++;
625 break;
626 case PITCH_DEF_FIXED:
627 def_fixed++;
628 break;
630 maybe_fixed++;
631 break;
632 case PITCH_CORR_PROP:
633 corr_prop++;
634 break;
635 case PITCH_CORR_FIXED:
636 corr_fixed++;
637 break;
638 }
639 }
640}

◆ count_pitch_stats()

bool tesseract::count_pitch_stats ( TO_ROW row,
STATS gap_stats,
STATS pitch_stats,
float  initial_pitch,
float  min_space,
bool  ignore_outsize,
bool  split_outsize,
int32_t  dm_gap 
)

Definition at line 1008 of file topitch.cpp.

1017 {
1018 bool prev_valid; // not word broken
1019 BLOBNBOX *blob; // current blob
1020 // blobs
1021 BLOBNBOX_IT blob_it = row->blob_list();
1022 int32_t prev_right; // end of prev blob
1023 int32_t prev_centre; // centre of previous blob
1024 int32_t x_centre; // centre of this blob
1025 int32_t blob_width; // width of blob
1026 int32_t width_units; // no of widths in blob
1027 float width; // blob width
1028 TBOX blob_box; // bounding box
1029 TBOX joined_box; // of super blob
1030
1031 gap_stats->clear();
1032 pitch_stats->clear();
1033 if (blob_it.empty()) {
1034 return false;
1035 }
1036 prev_valid = false;
1037 prev_centre = 0;
1038 prev_right = 0; // stop compiler warning
1039 joined_box = blob_it.data()->bounding_box();
1040 do {
1041 blob_it.forward();
1042 blob = blob_it.data();
1043 if (!blob->joined_to_prev()) {
1044 blob_box = blob->bounding_box();
1045 if ((blob_box.left() - joined_box.right() < dm_gap && !blob_it.at_first()) ||
1046 blob->cblob() == nullptr) {
1047 joined_box += blob_box; // merge blobs
1048 } else {
1049 blob_width = joined_box.width();
1050 if (split_outsize) {
1051 width_units =
1052 static_cast<int32_t>(floor(static_cast<float>(blob_width) / initial_pitch + 0.5));
1053 if (width_units < 1) {
1054 width_units = 1;
1055 }
1056 width_units--;
1057 } else if (ignore_outsize) {
1058 width = static_cast<float>(blob_width) / initial_pitch;
1059 width_units =
1060 width < 1 + words_default_fixed_limit && width > 1 - words_default_fixed_limit ? 0
1061 : -1;
1062 } else {
1063 width_units = 0; // everything in
1064 }
1065 x_centre = static_cast<int32_t>(joined_box.left() +
1066 (blob_width - width_units * initial_pitch) / 2);
1067 if (prev_valid && width_units >= 0) {
1068 // if (width_units>0)
1069 // {
1070 // tprintf("wu=%d,
1071 // width=%d,
1072 // xc=%d, adding
1073 // %d\n",
1074 // width_units,blob_width,x_centre,x_centre-prev_centre);
1075 // }
1076 gap_stats->add(joined_box.left() - prev_right, 1);
1077 pitch_stats->add(x_centre - prev_centre, 1);
1078 }
1079 prev_centre = static_cast<int32_t>(x_centre + width_units * initial_pitch);
1080 prev_right = joined_box.right();
1081 prev_valid = blob_box.left() - joined_box.right() < min_space;
1082 prev_valid = prev_valid && width_units >= 0;
1083 joined_box = blob_box;
1084 }
1085 }
1086 } while (!blob_it.at_first());
1087 return gap_stats->get_total() >= 3;
1088}
double words_default_fixed_limit
Definition: tovars.cpp:50
TDimension width() const
Definition: rect.h:126

◆ countof()

template<typename T , size_t N>
constexpr size_t tesseract::countof ( T   const(&)[N])
constexprnoexcept

Definition at line 34 of file serialis.h.

34 {
35 return N;
36}

◆ create_fx_win()

void tesseract::create_fx_win ( )

Definition at line 50 of file drawfx.cpp.

50 { // make features win
52 WERDWIDTH * 2, BLN_MAX * 2, true);
53}
#define FXDEMOYSIZE
Definition: drawfx.cpp:36
#define FXDEMOWIN
Definition: drawfx.cpp:32
#define FXDEMOXSIZE
Definition: drawfx.cpp:35
#define FXDEMOXPOS
Definition: drawfx.cpp:33
#define BLN_MAX
Definition: drawfx.cpp:37
#define FXDEMOYPOS
Definition: drawfx.cpp:34

◆ create_fxdebug_win()

void tesseract::create_fxdebug_win ( )

Definition at line 77 of file drawfx.cpp.

77 { // make gradients win
78}

◆ create_to_win()

ScrollView * tesseract::create_to_win ( ICOORD  page_tr)

Definition at line 47 of file drawtord.cpp.

47 {
48 if (to_win != nullptr) {
49 return to_win;
50 }
51 to_win = new ScrollView(TO_WIN_NAME, TO_WIN_XPOS, TO_WIN_YPOS, page_tr.x() + 1, page_tr.y() + 1,
52 page_tr.x(), page_tr.y(), true);
53 return to_win;
54}
#define TO_WIN_YPOS
Definition: drawtord.cpp:31
#define TO_WIN_NAME
Definition: drawtord.cpp:32
#define TO_WIN_XPOS
Definition: drawtord.cpp:30

◆ create_todebug_win()

void tesseract::create_todebug_win ( )

◆ CreateFeatureSpaceWindow()

TESS_API ScrollView * tesseract::CreateFeatureSpaceWindow ( const char *  name,
int  xpos,
int  ypos 
)

Creates a window of the appropriate size for displaying elements in feature space.

Definition at line 1622 of file intproto.cpp.

1622 {
1623 return new ScrollView(name, xpos, ypos, 520, 520, 260, 260, true);
1624}

◆ crotate_cblob()

C_BLOB * tesseract::crotate_cblob ( C_BLOB blob,
FCOORD  rotation 
)

Definition at line 614 of file blobbox.cpp.

617 {
618 C_OUTLINE_LIST out_list; // output outlines
619 // input outlines
620 C_OUTLINE_IT in_it = blob->out_list();
621 // output outlines
622 C_OUTLINE_IT out_it = &out_list;
623
624 for (in_it.mark_cycle_pt(); !in_it.cycled_list(); in_it.forward()) {
625 out_it.add_after_then_move(new C_OUTLINE(in_it.data(), rotation));
626 }
627 return new C_BLOB(&out_list);
628}
C_OUTLINE_LIST * out_list()
Definition: stepblob.h:70

◆ CrownCompatible()

bool tesseract::CrownCompatible ( const std::vector< RowScratchRegisters > *  rows,
int  a,
int  b,
const ParagraphModel model 
)

Definition at line 1349 of file paragraphs.cpp.

1350 {
1351 if (model != kCrownRight && model != kCrownLeft) {
1352 tprintf("CrownCompatible() should only be called with crown models!\n");
1353 return false;
1354 }
1355 auto &row_a = (*rows)[a];
1356 auto &row_b = (*rows)[b];
1357 if (model == kCrownRight) {
1358 return NearlyEqual(row_a.rindent_ + row_a.rmargin_, row_b.rindent_ + row_b.rmargin_,
1359 Epsilon(row_a.ri_->average_interword_space));
1360 }
1361 return NearlyEqual(row_a.lindent_ + row_a.lmargin_, row_b.lindent_ + row_b.lmargin_,
1362 Epsilon(row_a.ri_->average_interword_space));
1363}
bool NearlyEqual(T x, T y, T tolerance)
Definition: host.h:51
const ParagraphModel * kCrownLeft
Definition: paragraphs.cpp:56
const ParagraphModel * kCrownRight
Definition: paragraphs.cpp:58

◆ DECLARE_INT_PARAM_FLAG()

TESS_COMMON_TRAINING_API tesseract::DECLARE_INT_PARAM_FLAG ( debug_level  )

◆ DECLARE_STRING_PARAM_FLAG() [1/9]

TESS_COMMON_TRAINING_API tesseract::DECLARE_STRING_PARAM_FLAG ( )

◆ DECLARE_STRING_PARAM_FLAG() [2/9]

TESS_COMMON_TRAINING_API tesseract::DECLARE_STRING_PARAM_FLAG ( )

◆ DECLARE_STRING_PARAM_FLAG() [3/9]

TESS_COMMON_TRAINING_API tesseract::DECLARE_STRING_PARAM_FLAG ( fontconfig_tmpdir  )

◆ DECLARE_STRING_PARAM_FLAG() [4/9]

TESS_COMMON_TRAINING_API tesseract::DECLARE_STRING_PARAM_FLAG ( fonts_dir  )

◆ DECLARE_STRING_PARAM_FLAG() [5/9]

TESS_COMMON_TRAINING_API tesseract::DECLARE_STRING_PARAM_FLAG ( )

◆ DECLARE_STRING_PARAM_FLAG() [6/9]

TESS_COMMON_TRAINING_API tesseract::DECLARE_STRING_PARAM_FLAG ( output_trainer  )

◆ DECLARE_STRING_PARAM_FLAG() [7/9]

TESS_COMMON_TRAINING_API tesseract::DECLARE_STRING_PARAM_FLAG ( test_ch  )

◆ DECLARE_STRING_PARAM_FLAG() [8/9]

TESS_COMMON_TRAINING_API tesseract::DECLARE_STRING_PARAM_FLAG ( )

◆ DECLARE_STRING_PARAM_FLAG() [9/9]

TESS_COMMON_TRAINING_API tesseract::DECLARE_STRING_PARAM_FLAG ( )

◆ DefineFeature() [1/3]

MicroFeatureParams CharNormParams EndParamDesc EndParamDesc tesseract::DefineFeature ( GeoFeatDesc  ,
,
,
kGeoFeatureType  ,
GeoFeatParams   
)

◆ DefineFeature() [2/3]

MicroFeatureParams CharNormParams EndParamDesc tesseract::DefineFeature ( IntFeatDesc  ,
,
,
kIntFeatureType  ,
IntFeatParams   
)

◆ DefineFeature() [3/3]

EndParamDesc tesseract::DefineFeature ( PicoFeatDesc  ,
,
,
"pf"  ,
PicoFeatParams   
)

◆ DefineParam() [1/6]

EndParamDesc tesseract::DefineParam ( ,
,
-0.  25,
0.  75 
)

◆ DefineParam() [2/6]

MicroFeatureParams tesseract::DefineParam ( ,
,
0.  0,
1.  0 
)

◆ DefineParam() [3/6]

MicroFeatureParams CharNormParams EndParamDesc tesseract::DefineParam ( ,
,
0.  0,
255.  0 
)

◆ DefineParam() [4/6]

tesseract::DefineParam ( ,
,
-0.  5,
0.  5 
)

◆ DefineParam() [5/6]

MicroFeatureParams tesseract::DefineParam ( ,
,
0.  0,
1.  0 
)

◆ DefineParam() [6/6]

EndParamDesc tesseract::DefineParam ( ,
,
0.  0,
1.  0 
)

◆ DegradeImage()

Image tesseract::DegradeImage ( Image  input,
int  exposure,
TRand randomizer,
float *  rotation 
)

Definition at line 89 of file degradeimage.cpp.

89 {
90 Image pix = pixConvertTo8(input, false);
91 input.destroy();
92 input = pix;
93 int width = pixGetWidth(input);
94 int height = pixGetHeight(input);
95
96 if (exposure >= 2) {
97 // An erosion simulates the spreading darkening of a dark copy.
98 // This is backwards to binary morphology,
99 // see http://www.leptonica.com/grayscale-morphology.html
100 pix = input;
101 input = pixErodeGray(pix, 3, 3);
102 pix.destroy();
103 }
104 // A convolution is essential to any mode as no scanner produces an
105 // image as sharp as the electronic image.
106 pix = pixBlockconv(input, 1, 1);
107 input.destroy();
108 // A small random rotation helps to make the edges jaggy in a realistic way.
109 if (rotation != nullptr) {
110 float radians_clockwise = 0.0f;
111 if (*rotation) {
112 radians_clockwise = *rotation;
113 } else if (randomizer != nullptr) {
114 radians_clockwise = randomizer->SignedRand(kRotationRange);
115 }
116
117 input = pixRotate(pix, radians_clockwise, L_ROTATE_AREA_MAP, L_BRING_IN_WHITE, 0, 0);
118 // Rotate the boxes to match.
119 *rotation = radians_clockwise;
120 pix.destroy();
121 } else {
122 input = pix;
123 }
124
125 if (exposure >= 3 || exposure == 1) {
126 // Erosion after the convolution is not as heavy as before, so it is
127 // good for level 1 and in addition as a level 3.
128 // This is backwards to binary morphology,
129 // see http://www.leptonica.com/grayscale-morphology.html
130 pix = input;
131 input = pixErodeGray(pix, 3, 3);
132 pix.destroy();
133 }
134 // The convolution really needed to be 2x2 to be realistic enough, but
135 // we only have 3x3, so we have to bias the image darker or lose thin
136 // strokes.
137 int erosion_offset = 0;
138 // For light and 0 exposure, there is no dilation, so compensate for the
139 // convolution with a big darkening bias which is undone for lighter
140 // exposures.
141 if (exposure <= 0) {
142 erosion_offset = -3 * kExposureFactor;
143 }
144 // Add in a general offset of the greyscales for the exposure level so
145 // a threshold of 128 gives a reasonable binary result.
146 erosion_offset -= exposure * kExposureFactor;
147 // Add a gradual fade over the page and a small amount of salt and pepper
148 // noise to simulate noise in the sensor/paper fibres and varying
149 // illumination.
150 l_uint32 *data = pixGetData(input);
151 for (int y = 0; y < height; ++y) {
152 for (int x = 0; x < width; ++x) {
153 int pixel = GET_DATA_BYTE(data, x);
154 if (randomizer != nullptr) {
155 pixel += randomizer->IntRand() % (kSaltnPepper * 2 + 1) - kSaltnPepper;
156 }
157 if (height + width > kMinRampSize) {
158 pixel -= (2 * x + y) * 32 / (height + width);
159 }
160 pixel += erosion_offset;
161 if (pixel < 0) {
162 pixel = 0;
163 }
164 if (pixel > 255) {
165 pixel = 255;
166 }
167 SET_DATA_BYTE(data, x, pixel);
168 }
169 data += pixGetWpl(input);
170 }
171 return input;
172}
const float kRotationRange
const int kExposureFactor
const int kSaltnPepper
const int kMinRampSize
double SignedRand(double range)
Definition: helpers.h:78
int32_t IntRand()
Definition: helpers.h:74

◆ delete_d()

LIST tesseract::delete_d ( LIST  list,
void *  key,
int_compare  is_equal 
)

Definition at line 88 of file oldlist.cpp.

88 {
89 LIST result = NIL_LIST;
90 LIST last_one = NIL_LIST;
91
92 if (is_equal == nullptr) {
93 is_equal = is_same;
94 }
95
96 while (list != NIL_LIST) {
97 if (!(*is_equal)(list->first_node(), key)) {
98 if (last_one == NIL_LIST) {
99 last_one = list;
100 list = list->list_rest();
101 result = last_one;
102 set_rest(last_one, NIL_LIST);
103 } else {
104 set_rest(last_one, list);
105 last_one = list;
106 list = list->list_rest();
107 set_rest(last_one, NIL_LIST);
108 }
109 } else {
110 list = pop(list);
111 }
112 }
113 return (result);
114}
#define is_equal(p1, p2)
Definition: outlines.h:93
#define set_rest(l, cell)
Definition: oldlist.h:101
LIST pop(LIST list)
Definition: oldlist.cpp:166
list_rec * list_rest()
Definition: oldlist.h:111

◆ delete_non_dropout_rows()

void tesseract::delete_non_dropout_rows ( TO_BLOCK block,
float  gradient,
FCOORD  rotation,
int32_t  block_edge,
bool  testing_on 
)

delete_non_dropout_rows

Compute the linespacing and offset.

Definition at line 612 of file makerow.cpp.

618 {
619 TBOX block_box; // deskewed block
620 int32_t max_y; // in block
621 int32_t min_y;
622 int32_t line_index; // of scan line
623 int32_t line_count; // no of scan lines
624 int32_t distance; // to drop-out
625 int32_t xleft; // of block
626 int32_t ybottom; // of block
627 TO_ROW *row; // current row
628 TO_ROW_IT row_it = block->get_rows();
629 BLOBNBOX_IT blob_it = &block->blobs;
630
631 if (row_it.empty()) {
632 return; // empty block
633 }
634 block_box = deskew_block_coords(block, gradient);
635 xleft = block->block->pdblk.bounding_box().left();
636 ybottom = block->block->pdblk.bounding_box().bottom();
637 min_y = block_box.bottom() - 1;
638 max_y = block_box.top() + 1;
639 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
640 line_index = static_cast<int32_t>(std::floor(row_it.data()->intercept()));
641 if (line_index <= min_y) {
642 min_y = line_index - 1;
643 }
644 if (line_index >= max_y) {
645 max_y = line_index + 1;
646 }
647 }
648 line_count = max_y - min_y + 1;
649 if (line_count <= 0) {
650 return; // empty block
651 }
652 // change in occupation
653 std::vector<int32_t> deltas(line_count);
654 // of pixel coords
655 std::vector<int32_t> occupation(line_count);
656
657 compute_line_occupation(block, gradient, min_y, max_y, &occupation[0], &deltas[0]);
659 static_cast<int32_t>(ceil(block->line_spacing * (tesseract::CCStruct::kDescenderFraction +
661 static_cast<int32_t>(ceil(block->line_spacing * (tesseract::CCStruct::kXHeightFraction +
663 max_y - min_y + 1, &occupation[0], &deltas[0]);
664#ifndef GRAPHICS_DISABLED
665 if (testing_on) {
666 draw_occupation(xleft, ybottom, min_y, max_y, &occupation[0], &deltas[0]);
667 }
668#endif
669 compute_dropout_distances(&occupation[0], &deltas[0], line_count);
670 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
671 row = row_it.data();
672 line_index = static_cast<int32_t>(std::floor(row->intercept()));
673 distance = deltas[line_index - min_y];
674 if (find_best_dropout_row(row, distance, block->line_spacing / 2, line_index, &row_it,
675 testing_on)) {
676#ifndef GRAPHICS_DISABLED
677 if (testing_on) {
678 plot_parallel_row(row, gradient, block_edge, ScrollView::WHITE, rotation);
679 }
680#endif
681 blob_it.add_list_after(row_it.data()->blob_list());
682 delete row_it.extract(); // too far away
683 }
684 }
685 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
686 blob_it.add_list_after(row_it.data()->blob_list());
687 }
688}
TBOX deskew_block_coords(TO_BLOCK *block, float gradient)
Definition: makerow.cpp:765
void plot_parallel_row(TO_ROW *row, float gradient, int32_t left, ScrollView::Color colour, FCOORD rotation)
Definition: drawtord.cpp:122
void compute_occupation_threshold(int32_t low_window, int32_t high_window, int32_t line_count, int32_t *occupation, int32_t *thresholds)
Definition: makerow.cpp:852
void compute_dropout_distances(int32_t *occupation, int32_t *thresholds, int32_t line_count)
Definition: makerow.cpp:933
void compute_line_occupation(TO_BLOCK *block, float gradient, int32_t min_y, int32_t max_y, int32_t *occupation, int32_t *deltas)
Definition: makerow.cpp:799
bool find_best_dropout_row(TO_ROW *row, int32_t distance, float dist_limit, int32_t line_index, TO_ROW_IT *row_it, bool testing_on)
Definition: makerow.cpp:696
void draw_occupation(int32_t xleft, int32_t ybottom, int32_t min_y, int32_t max_y, int32_t occupation[], int32_t thresholds[])
Definition: drawtord.cpp:161

◆ DeleteObject()

template<typename T >
void tesseract::DeleteObject ( T *  object)

Definition at line 156 of file tablefind.cpp.

156 {
157 delete object;
158}

◆ DeSerialize() [1/2]

template<typename T >
bool tesseract::DeSerialize ( bool  swap,
FILE *  fp,
std::vector< T > &  data 
)

Definition at line 205 of file helpers.h.

205 {
206 uint32_t size;
207 if (fread(&size, sizeof(size), 1, fp) != 1) {
208 return false;
209 }
210 if (swap) {
211 Reverse32(&size);
212 }
213 // Arbitrarily limit the number of elements to protect against bad data.
214 assert(size <= UINT16_MAX);
215 if (size > UINT16_MAX) {
216 return false;
217 }
218 // TODO: optimize.
219 data.resize(size);
220 if (size > 0) {
221 if (fread(&data[0], sizeof(T), size, fp) != size) {
222 return false;
223 }
224 if (swap) {
225 for (uint32_t i = 0; i < size; ++i) {
226 ReverseN(&data[i], sizeof(T));
227 }
228 }
229 }
230 return true;
231}
void ReverseN(void *ptr, int num_bytes)
Definition: helpers.h:184
void Reverse32(void *ptr)
Definition: helpers.h:196

◆ DeSerialize() [2/2]

template<typename T >
bool tesseract::DeSerialize ( FILE *  fp,
T *  data,
size_t  n = 1 
)

Definition at line 49 of file serialis.h.

49 {
50 return fread(data, sizeof(T), n, fp) == n;
51}

◆ deskew_block_coords()

TBOX tesseract::deskew_block_coords ( TO_BLOCK block,
float  gradient 
)

Definition at line 765 of file makerow.cpp.

768 {
769 TBOX result; // block bounds
770 TBOX blob_box; // of block
771 FCOORD rotation; // deskew vector
772 float length; // of gradient vector
773 TO_ROW_IT row_it = block->get_rows();
774 TO_ROW *row; // current row
775 BLOBNBOX *blob; // current blob
776 BLOBNBOX_IT blob_it; // iterator
777
778 length = std::sqrt(gradient * gradient + 1);
779 rotation = FCOORD(1 / length, -gradient / length);
780 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
781 row = row_it.data();
782 blob_it.set_to_list(row->blob_list());
783 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
784 blob = blob_it.data();
785 blob_box = blob->bounding_box();
786 blob_box.rotate(rotation); // de-skew it
787 result += blob_box;
788 }
789 }
790 return result;
791}

◆ destroy()

TESS_API LIST tesseract::destroy ( LIST  list)

Definition at line 121 of file oldlist.cpp.

121 {
122 LIST next;
123
124 while (list != NIL_LIST) {
125 next = list->list_rest();
126 delete list;
127 list = next;
128 }
129 return (NIL_LIST);
130}
def next(obj)
Definition: ast.py:56

◆ destroy_nodes()

void tesseract::destroy_nodes ( LIST  list,
void_dest  destructor 
)

Definition at line 137 of file oldlist.cpp.

137 {
138 ASSERT_HOST(destructor != nullptr);
139
140 while (list != NIL_LIST) {
141 if (list->first_node() != nullptr) {
142 (*destructor)(list->first_node());
143 }
144 list = pop(list);
145 }
146}

◆ DetectParagraphs() [1/2]

TESS_API void tesseract::DetectParagraphs ( int  debug_level,
bool  after_text_recognition,
const MutableIterator block_start,
std::vector< ParagraphModel * > *  models 
)

Definition at line 2562 of file paragraphs.cpp.

2563 {
2564 // Clear out any preconceived notions.
2565 if (block_start->Empty(RIL_TEXTLINE)) {
2566 return;
2567 }
2568 BLOCK *block = block_start->PageResIt()->block()->block;
2569 block->para_list()->clear();
2570 bool is_image_block = block->pdblk.poly_block() && !block->pdblk.poly_block()->IsText();
2571
2572 // Convert the Tesseract structures to RowInfos
2573 // for the paragraph detection algorithm.
2574 MutableIterator row(*block_start);
2575 if (row.Empty(RIL_TEXTLINE)) {
2576 return; // end of input already.
2577 }
2578
2579 std::vector<RowInfo> row_infos;
2580 do {
2581 if (!row.PageResIt()->row()) {
2582 continue; // empty row.
2583 }
2584 row.PageResIt()->row()->row->set_para(nullptr);
2585 row_infos.emplace_back();
2586 RowInfo &ri = row_infos.back();
2587 InitializeRowInfo(after_text_recognition, row, &ri);
2588 } while (!row.IsAtFinalElement(RIL_BLOCK, RIL_TEXTLINE) && row.Next(RIL_TEXTLINE));
2589
2590 // If we're called before text recognition, we might not have
2591 // tight block bounding boxes, so trim by the minimum on each side.
2592 if (!row_infos.empty()) {
2593 int min_lmargin = row_infos[0].pix_ldistance;
2594 int min_rmargin = row_infos[0].pix_rdistance;
2595 for (unsigned i = 1; i < row_infos.size(); i++) {
2596 if (row_infos[i].pix_ldistance < min_lmargin) {
2597 min_lmargin = row_infos[i].pix_ldistance;
2598 }
2599 if (row_infos[i].pix_rdistance < min_rmargin) {
2600 min_rmargin = row_infos[i].pix_rdistance;
2601 }
2602 }
2603 if (min_lmargin > 0 || min_rmargin > 0) {
2604 for (auto &row_info : row_infos) {
2605 row_info.pix_ldistance -= min_lmargin;
2606 row_info.pix_rdistance -= min_rmargin;
2607 }
2608 }
2609 }
2610
2611 // Run the paragraph detection algorithm.
2612 std::vector<PARA *> row_owners;
2613 std::vector<PARA *> the_paragraphs;
2614 if (!is_image_block) {
2615 DetectParagraphs(debug_level, &row_infos, &row_owners, block->para_list(), models);
2616 } else {
2617 row_owners.resize(row_infos.size());
2618 CanonicalizeDetectionResults(&row_owners, block->para_list());
2619 }
2620
2621 // Now stitch in the row_owners into the rows.
2622 row = *block_start;
2623 for (auto &row_owner : row_owners) {
2624 while (!row.PageResIt()->row()) {
2625 row.Next(RIL_TEXTLINE);
2626 }
2627 row.PageResIt()->row()->row->set_para(row_owner);
2628 row.Next(RIL_TEXTLINE);
2629 }
2630}
void DetectParagraphs(int debug_level, bool after_text_recognition, const MutableIterator *block_start, std::vector< ParagraphModel * > *models)
void CanonicalizeDetectionResults(std::vector< PARA * > *row_owners, PARA_LIST *paragraphs)
bool Empty(PageIteratorLevel level) const
bool Next(PageIteratorLevel level) override
const PAGE_RES_IT * PageResIt() const
PARA_LIST * para_list()
Definition: ocrblock.h:119
BLOCK_RES * block() const
Definition: pageres.h:769

◆ DetectParagraphs() [2/2]

TESS_API void tesseract::DetectParagraphs ( int  debug_level,
std::vector< RowInfo > *  row_infos,
std::vector< PARA * > *  row_owners,
PARA_LIST *  paragraphs,
std::vector< ParagraphModel * > *  models 
)

Definition at line 2318 of file paragraphs.cpp.

2320 {
2321 ParagraphTheory theory(models);
2322
2323 // Initialize row_owners to be a bunch of nullptr pointers.
2324 row_owners->clear();
2325 row_owners->resize(row_infos->size());
2326
2327 // Set up row scratch registers for the main algorithm.
2328 std::vector<RowScratchRegisters> rows(row_infos->size());
2329 for (unsigned i = 0; i < row_infos->size(); i++) {
2330 rows[i].Init((*row_infos)[i]);
2331 }
2332
2333 // Pass 1:
2334 // Detect sequences of lines that all contain leader dots (.....)
2335 // These are likely Tables of Contents. If there are three text lines in
2336 // a row with leader dots, it's pretty safe to say the middle one should
2337 // be a paragraph of its own.
2338 SeparateSimpleLeaderLines(&rows, 0, rows.size(), &theory);
2339
2340 DebugDump(debug_level > 1, "End of Pass 1", theory, rows);
2341
2342 std::vector<Interval> leftovers;
2343 LeftoverSegments(rows, &leftovers, 0, rows.size());
2344 for (auto &leftover : leftovers) {
2345 // Pass 2a:
2346 // Find any strongly evidenced start-of-paragraph lines. If they're
2347 // followed by two lines that look like body lines, make a paragraph
2348 // model for that and see if that model applies throughout the text
2349 // (that is, "smear" it).
2350 StrongEvidenceClassify(debug_level, &rows, leftover.begin, leftover.end, &theory);
2351
2352 // Pass 2b:
2353 // If we had any luck in pass 2a, we got part of the page and didn't
2354 // know how to classify a few runs of rows. Take the segments that
2355 // didn't find a model and reprocess them individually.
2356 std::vector<Interval> leftovers2;
2357 LeftoverSegments(rows, &leftovers2, leftover.begin, leftover.end);
2358 bool pass2a_was_useful =
2359 leftovers2.size() > 1 ||
2360 (leftovers2.size() == 1 && (leftovers2[0].begin != 0 || static_cast<size_t>(leftovers2[0].end) != rows.size()));
2361 if (pass2a_was_useful) {
2362 for (auto &leftover2 : leftovers2) {
2363 StrongEvidenceClassify(debug_level, &rows, leftover2.begin, leftover2.end, &theory);
2364 }
2365 }
2366 }
2367
2368 DebugDump(debug_level > 1, "End of Pass 2", theory, rows);
2369
2370 // Pass 3:
2371 // These are the dregs for which we didn't have enough strong textual
2372 // and geometric clues to form matching models for. Let's see if
2373 // the geometric clues are simple enough that we could just use those.
2374 LeftoverSegments(rows, &leftovers, 0, rows.size());
2375 for (auto &leftover : leftovers) {
2376 GeometricClassify(debug_level, &rows, leftover.begin, leftover.end, &theory);
2377 }
2378
2379 // Undo any flush models for which there's little evidence.
2380 DowngradeWeakestToCrowns(debug_level, &theory, &rows);
2381
2382 DebugDump(debug_level > 1, "End of Pass 3", theory, rows);
2383
2384 // Pass 4:
2385 // Take everything that's still not marked up well and clear all markings.
2386 LeftoverSegments(rows, &leftovers, 0, rows.size());
2387 for (auto &leftover : leftovers) {
2388 for (int j = leftover.begin; j < leftover.end; j++) {
2389 rows[j].SetUnknown();
2390 }
2391 }
2392
2393 DebugDump(debug_level > 1, "End of Pass 4", theory, rows);
2394
2395 // Convert all of the unique hypothesis runs to PARAs.
2396 ConvertHypothesizedModelRunsToParagraphs(debug_level, rows, row_owners, &theory);
2397
2398 DebugDump(debug_level > 0, "Final Paragraph Segmentation", theory, rows);
2399
2400 // Finally, clean up any dangling nullptr row paragraph parents.
2401 CanonicalizeDetectionResults(row_owners, paragraphs);
2402}

◆ determine_newline_type()

char tesseract::determine_newline_type ( WERD word,
BLOCK block,
WERD next_word,
BLOCK next_block 
)

test line ends

Parameters
wordword to do
blockcurrent block
next_wordnext word
next_blockblock of next word

Definition at line 207 of file output.cpp.

212 {
213 int16_t end_gap; // to right edge
214 int16_t width; // of next word
215 TBOX word_box; // bounding
216 TBOX next_box; // next word
217 TBOX block_box; // block bounding
218
219 if (!word->flag(W_EOL)) {
220 return false; // not end of line
221 }
222 if (next_word == nullptr || next_block == nullptr || block != next_block) {
223 return CTRL_NEWLINE;
224 }
225 if (next_word->space() > 0) {
226 return CTRL_HARDLINE; // it is tabbed
227 }
228 word_box = word->bounding_box();
229 next_box = next_word->bounding_box();
230 block_box = block->pdblk.bounding_box();
231 // gap to eol
232 end_gap = block_box.right() - word_box.right();
233 end_gap -= static_cast<int32_t>(block->space());
234 width = next_box.right() - next_box.left();
235 // tprintf("end_gap=%d-%d=%d, width=%d-%d=%d, nl=%d\n",
236 // block_box.right(),word_box.right(),end_gap,
237 // next_box.right(),next_box.left(),width,
238 // end_gap>width ? CTRL_HARDLINE : CTRL_NEWLINE);
239 return end_gap > width ? CTRL_HARDLINE : CTRL_NEWLINE;
240}
#define CTRL_NEWLINE
Definition: output.cpp:35
#define CTRL_HARDLINE
Definition: output.cpp:36
int16_t space() const
return spacing
Definition: ocrblock.h:93
bool flag(WERD_FLAGS mask) const
Definition: werd.h:128
TBOX bounding_box() const
Definition: werd.cpp:155
uint8_t space() const
Definition: werd.h:100

◆ DirOtherWay()

BlobNeighbourDir tesseract::DirOtherWay ( BlobNeighbourDir  dir)
inline

Definition at line 102 of file blobbox.h.

102 {
103 return static_cast<BlobNeighbourDir>(dir ^ 2);
104}
BlobNeighbourDir
Definition: blobbox.h:89

◆ display_blob()

void tesseract::display_blob ( TBLOB blob,
ScrollView::Color  color 
)

Definition at line 54 of file render.cpp.

54 {
55 /* Size of drawable */
56 if (blob_window == nullptr) {
57 blob_window = new ScrollView("Blobs", 520, 10, 500, 256, 2000, 256, true);
58 } else {
60 }
61
62 render_blob(blob_window, blob, color);
63}
ScrollView * blob_window
Definition: render.cpp:36
void render_blob(ScrollView *window, TBLOB *blob, ScrollView::Color color)
Definition: render.cpp:71

◆ display_edgepts()

void tesseract::display_edgepts ( LIST  outlines)

Definition at line 47 of file plotedges.cpp.

47 {
48 /* Set up window */
49 if (edge_window == nullptr) {
50 edge_window = new ScrollView("Edges", 750, 150, 400, 128, 800, 256, true);
51 } else {
53 }
54 /* Render the outlines */
55 auto window = edge_window;
56 /* Reclaim old memory */
57 iterate(outlines) {
58 render_edgepts(window, reinterpret_cast<EDGEPT *>(outlines->first_node()), ScrollView::WHITE);
59 }
60}
void render_edgepts(ScrollView *window, EDGEPT *edgept, ScrollView::Color color)
Definition: render.cpp:86
ScrollView * edge_window
Definition: plotedges.cpp:37

◆ DisplayIntFeature()

void tesseract::DisplayIntFeature ( const INT_FEATURE_STRUCT Feature,
float  Evidence 
)

This routine renders the specified feature into a global display list.

Globals:

  • FeatureShapes global display list for features
    Parameters
    Featurepico-feature to be displayed
    Evidencebest evidence for this feature (0-1)

Definition at line 543 of file intproto.cpp.

543 {
544 ScrollView::Color color = GetMatchColorFor(Evidence);
545 RenderIntFeature(IntMatchWindow, Feature, color);
546 if (FeatureDisplayWindow) {
547 RenderIntFeature(FeatureDisplayWindow, Feature, color);
548 }
549} /* DisplayIntFeature */
ScrollView::Color GetMatchColorFor(float Evidence)
Definition: intproto.cpp:1272
void RenderIntFeature(ScrollView *window, const INT_FEATURE_STRUCT *Feature, ScrollView::Color color)
Definition: intproto.cpp:1500

◆ DisplayIntProto()

void tesseract::DisplayIntProto ( INT_CLASS_STRUCT Class,
PROTO_ID  ProtoId,
float  Evidence 
)

This routine renders the specified proto into a global display list.

Globals:

  • ProtoShapes global display list for protos
    Parameters
    Classclass to take proto from
    ProtoIdid of proto in Class to be displayed
    Evidencetotal evidence for proto (0-1)

Definition at line 561 of file intproto.cpp.

561 {
562 ScrollView::Color color = GetMatchColorFor(Evidence);
563 RenderIntProto(IntMatchWindow, Class, ProtoId, color);
564 if (ProtoDisplayWindow) {
565 RenderIntProto(ProtoDisplayWindow, Class, ProtoId, color);
566 }
567} /* DisplayIntProto */
void RenderIntProto(ScrollView *window, INT_CLASS_STRUCT *Class, PROTO_ID ProtoId, ScrollView::Color color)
Definition: intproto.cpp:1534

◆ DistanceSquared()

float tesseract::DistanceSquared ( int  k,
PARAM_DESC dim,
float  p1[],
float  p2[] 
)

Returns the Euclidean distance squared between p1 and p2 for all essential dimensions.

Parameters
kkeys are in k-space
dimdimension descriptions (essential, circular, etc)
p1,p2two different points in K-D space

Definition at line 378 of file kdtree.cpp.

378 {
379 float total_distance = 0;
380
381 for (; k > 0; k--, p1++, p2++, dim++) {
382 if (dim->NonEssential) {
383 continue;
384 }
385
386 float dimension_distance = *p1 - *p2;
387
388 /* if this dimension is circular - check wraparound distance */
389 if (dim->Circular) {
390 dimension_distance = Magnitude(dimension_distance);
391 float wrap_distance = dim->Max - dim->Min - dimension_distance;
392 dimension_distance = std::min(dimension_distance, wrap_distance);
393 }
394
395 total_distance += dimension_distance * dimension_distance;
396 }
397 return total_distance;
398}
#define Magnitude(X)
Definition: kdtree.cpp:30

◆ divide_blobs()

void tesseract::divide_blobs ( TBLOB blob,
TBLOB other_blob,
bool  italic_blob,
const TPOINT location 
)

Definition at line 970 of file blobs.cpp.

970 {
972 TESSLINE *outline1 = nullptr;
973 TESSLINE *outline2 = nullptr;
974
975 TESSLINE *outline = blob->outlines;
976 blob->outlines = nullptr;
977 int location_prod = location.cross(vertical);
978
979 while (outline != nullptr) {
980 TPOINT mid_pt((outline->topleft.x + outline->botright.x) / 2,
981 (outline->topleft.y + outline->botright.y) / 2);
982 int mid_prod = mid_pt.cross(vertical);
983 if (mid_prod < location_prod) {
984 // Outline is in left blob.
985 if (outline1) {
986 outline1->next = outline;
987 } else {
988 blob->outlines = outline;
989 }
990 outline1 = outline;
991 } else {
992 // Outline is in right blob.
993 if (outline2) {
994 outline2->next = outline;
995 } else {
996 other_blob->outlines = outline;
997 }
998 outline2 = outline;
999 }
1000 outline = outline->next;
1001 }
1002
1003 if (outline1) {
1004 outline1->next = nullptr;
1005 }
1006 if (outline2) {
1007 outline2->next = nullptr;
1008 }
1009}
const TPOINT kDivisibleVerticalUpright(0, 1)
const TPOINT kDivisibleVerticalItalic(1, 5)
TDimension x
Definition: blobs.h:89
int cross(const TPOINT &other) const
Definition: blobs.h:75
TDimension y
Definition: blobs.h:90
TPOINT botright
Definition: blobs.h:284
TPOINT topleft
Definition: blobs.h:283

◆ divisible_blob()

bool tesseract::divisible_blob ( TBLOB blob,
bool  italic_blob,
TPOINT location 
)

Definition at line 923 of file blobs.cpp.

923 {
924 if (blob->outlines == nullptr || blob->outlines->next == nullptr) {
925 return false; // Need at least 2 outlines for it to be possible.
926 }
927 int max_gap = 0;
929 for (TESSLINE *outline1 = blob->outlines; outline1 != nullptr; outline1 = outline1->next) {
930 if (outline1->is_hole) {
931 continue; // Holes do not count as separable.
932 }
933 TPOINT mid_pt1((outline1->topleft.x + outline1->botright.x) / 2,
934 (outline1->topleft.y + outline1->botright.y) / 2);
935 int mid_prod1 = mid_pt1.cross(vertical);
936 int min_prod1, max_prod1;
937 outline1->MinMaxCrossProduct(vertical, &min_prod1, &max_prod1);
938 for (TESSLINE *outline2 = outline1->next; outline2 != nullptr; outline2 = outline2->next) {
939 if (outline2->is_hole) {
940 continue; // Holes do not count as separable.
941 }
942 TPOINT mid_pt2((outline2->topleft.x + outline2->botright.x) / 2,
943 (outline2->topleft.y + outline2->botright.y) / 2);
944 int mid_prod2 = mid_pt2.cross(vertical);
945 int min_prod2, max_prod2;
946 outline2->MinMaxCrossProduct(vertical, &min_prod2, &max_prod2);
947 int mid_gap = abs(mid_prod2 - mid_prod1);
948 int overlap = std::min(max_prod1, max_prod2) - std::max(min_prod1, min_prod2);
949 if (mid_gap - overlap / 4 > max_gap) {
950 max_gap = mid_gap - overlap / 4;
951 *location = mid_pt1;
952 *location += mid_pt2;
953 *location /= 2;
954 }
955 }
956 }
957 // Use the y component of the vertical vector as an approximation to its
958 // length.
959 return max_gap > vertical.y;
960}

◆ DivRounded()

int tesseract::DivRounded ( int  a,
int  b 
)
inline

Definition at line 162 of file helpers.h.

162 {
163 if (b < 0) {
164 return -DivRounded(a, -b);
165 }
166 return a >= 0 ? (a + b / 2) / b : (a - b / 2) / b;
167}
int DivRounded(int a, int b)
Definition: helpers.h:162

◆ DoFill()

void tesseract::DoFill ( FILL_SPEC FillSpec,
CLASS_PRUNER_STRUCT Pruner,
uint32_t  ClassMask,
uint32_t  ClassCount,
uint32_t  WordIndex 
)

This routine fills in the section of a class pruner corresponding to a single x value for a single proto of a class.

Parameters
FillSpecspecifies which bits to fill in pruner
Prunerclass pruner to be filled
ClassMaskindicates which bits to change in each word
ClassCountindicates what to change bits to
WordIndexindicates which word to change

Definition at line 1021 of file intproto.cpp.

1022 {
1023 int X, Y, Angle;
1024 uint32_t OldWord;
1025
1026 X = FillSpec->X;
1027 if (X < 0) {
1028 X = 0;
1029 }
1030 if (X >= NUM_CP_BUCKETS) {
1031 X = NUM_CP_BUCKETS - 1;
1032 }
1033
1034 if (FillSpec->YStart < 0) {
1035 FillSpec->YStart = 0;
1036 }
1037 if (FillSpec->YEnd >= NUM_CP_BUCKETS) {
1038 FillSpec->YEnd = NUM_CP_BUCKETS - 1;
1039 }
1040
1041 for (Y = FillSpec->YStart; Y <= FillSpec->YEnd; Y++) {
1042 for (Angle = FillSpec->AngleStart;; CircularIncrement(Angle, NUM_CP_BUCKETS)) {
1043 OldWord = Pruner->p[X][Y][Angle][WordIndex];
1044 if (ClassCount > (OldWord & ClassMask)) {
1045 OldWord &= ~ClassMask;
1046 OldWord |= ClassCount;
1047 Pruner->p[X][Y][Angle][WordIndex] = OldWord;
1048 }
1049 if (Angle == FillSpec->AngleEnd) {
1050 break;
1051 }
1052 }
1053 }
1054} /* DoFill */
#define NUM_CP_BUCKETS
Definition: intproto.h:53
#define CircularIncrement(i, r)
Definition: intproto.cpp:102
uint32_t p[NUM_CP_BUCKETS][NUM_CP_BUCKETS][NUM_CP_BUCKETS][WERDS_PER_CP_VECTOR]
Definition: intproto.h:73

◆ DominatesInMerge()

bool tesseract::DominatesInMerge ( BlobTextFlowType  type1,
BlobTextFlowType  type2 
)
inline

Definition at line 125 of file blobbox.h.

125 {
126 // LEADER always loses.
127 if (type1 == BTFT_LEADER) {
128 return false;
129 }
130 if (type2 == BTFT_LEADER) {
131 return true;
132 }
133 // With those out of the way, the ordering of the enum determines the result.
134 return type1 >= type2;
135}

◆ dont_allow_1Il()

void tesseract::dont_allow_1Il ( WERD_RES word)

◆ DotProductAVX()

TFloat tesseract::DotProductAVX ( const TFloat u,
const TFloat v,
int  n 
)

◆ DotProductAVX512F()

TFloat tesseract::DotProductAVX512F ( const TFloat u,
const TFloat v,
int  n 
)

◆ DotProductFMA()

TFloat tesseract::DotProductFMA ( const TFloat u,
const TFloat v,
int  n 
)

◆ DotProductNative()

TFloat tesseract::DotProductNative ( const TFloat u,
const TFloat v,
int  n 
)

Definition at line 22 of file dotproduct.cpp.

22 {
23 TFloat total = 0;
24#if defined(OPENMP_SIMD) || defined(_OPENMP)
25#pragma omp simd reduction(+:total)
26#endif
27 for (int k = 0; k < n; k++) {
28 total += u[k] * v[k];
29 }
30 return total;
31}

◆ DotProductNEON()

TFloat tesseract::DotProductNEON ( const TFloat u,
const TFloat v,
int  n 
)

◆ DotProductSSE()

TFloat tesseract::DotProductSSE ( const TFloat u,
const TFloat v,
int  n 
)

◆ double_VAR_H() [1/55]

tesseract::double_VAR_H ( classify_max_slope  )

◆ double_VAR_H() [2/55]

tesseract::double_VAR_H ( classify_min_slope  )

◆ double_VAR_H() [3/55]

tesseract::double_VAR_H ( classify_norm_adj_curl  )

◆ double_VAR_H() [4/55]

tesseract::double_VAR_H ( classify_norm_adj_midpoint  )

◆ double_VAR_H() [5/55]

tesseract::double_VAR_H ( classify_pico_feature_length  )

◆ double_VAR_H() [6/55]

tesseract::double_VAR_H ( gapmap_big_gaps  )

◆ double_VAR_H() [7/55]

tesseract::double_VAR_H ( pitsync_joined_edge  )

◆ double_VAR_H() [8/55]

tesseract::double_VAR_H ( pitsync_offset_freecut_fraction  )

◆ double_VAR_H() [9/55]

tesseract::double_VAR_H ( textord_ascheight_mode_fraction  )

◆ double_VAR_H() [10/55]

tesseract::double_VAR_H ( textord_ascx_ratio_max  )

◆ double_VAR_H() [11/55]

tesseract::double_VAR_H ( textord_ascx_ratio_min  )

◆ double_VAR_H() [12/55]

tesseract::double_VAR_H ( textord_balance_factor  )

◆ double_VAR_H() [13/55]

tesseract::double_VAR_H ( textord_chop_width  )

◆ double_VAR_H() [14/55]

tesseract::double_VAR_H ( textord_descx_ratio_max  )

◆ double_VAR_H() [15/55]

tesseract::double_VAR_H ( textord_descx_ratio_min  )

◆ double_VAR_H() [16/55]

tesseract::double_VAR_H ( textord_excess_blobsize  )

◆ double_VAR_H() [17/55]

tesseract::double_VAR_H ( textord_fpiqr_ratio  )

◆ double_VAR_H() [18/55]

tesseract::double_VAR_H ( textord_linespace_iqrlimit  )

◆ double_VAR_H() [19/55]

tesseract::double_VAR_H ( textord_max_pitch_iqr  )

◆ double_VAR_H() [20/55]

tesseract::double_VAR_H ( textord_min_blob_height_fraction  )

◆ double_VAR_H() [21/55]

tesseract::double_VAR_H ( textord_min_linesize  )

◆ double_VAR_H() [22/55]

tesseract::double_VAR_H ( textord_minxh  )

◆ double_VAR_H() [23/55]

tesseract::double_VAR_H ( textord_occupancy_threshold  )

◆ double_VAR_H() [24/55]

tesseract::double_VAR_H ( textord_pitch_rowsimilarity  )

◆ double_VAR_H() [25/55]

tesseract::double_VAR_H ( textord_projection_scale  )

◆ double_VAR_H() [26/55]

tesseract::double_VAR_H ( textord_skew_ile  )

◆ double_VAR_H() [27/55]

tesseract::double_VAR_H ( textord_skew_lag  )

◆ double_VAR_H() [28/55]

tesseract::double_VAR_H ( textord_spacesize_ratioprop  )

◆ double_VAR_H() [29/55]

tesseract::double_VAR_H ( textord_spline_shift_fraction  )

◆ double_VAR_H() [30/55]

tesseract::double_VAR_H ( textord_tabvector_vertical_box_ratio  )

◆ double_VAR_H() [31/55]

tesseract::double_VAR_H ( textord_tabvector_vertical_gap_fraction  )

◆ double_VAR_H() [32/55]

tesseract::double_VAR_H ( textord_underline_offset  )

◆ double_VAR_H() [33/55]

tesseract::double_VAR_H ( textord_underline_threshold  )

◆ double_VAR_H() [34/55]

tesseract::double_VAR_H ( textord_underline_width  )

◆ double_VAR_H() [35/55]

tesseract::double_VAR_H ( textord_width_limit  )

◆ double_VAR_H() [36/55]

tesseract::double_VAR_H ( textord_words_def_fixed  )

◆ double_VAR_H() [37/55]

tesseract::double_VAR_H ( textord_words_def_prop  )

◆ double_VAR_H() [38/55]

tesseract::double_VAR_H ( textord_words_default_maxspace  )

◆ double_VAR_H() [39/55]

tesseract::double_VAR_H ( textord_words_default_minspace  )

◆ double_VAR_H() [40/55]

tesseract::double_VAR_H ( textord_words_default_nonspace  )

◆ double_VAR_H() [41/55]

tesseract::double_VAR_H ( textord_words_definite_spread  )

◆ double_VAR_H() [42/55]

tesseract::double_VAR_H ( textord_words_initial_lower  )

◆ double_VAR_H() [43/55]

tesseract::double_VAR_H ( textord_words_initial_upper  )

◆ double_VAR_H() [44/55]

tesseract::double_VAR_H ( textord_words_maxspace  )

◆ double_VAR_H() [45/55]

tesseract::double_VAR_H ( textord_words_min_minspace  )

◆ double_VAR_H() [46/55]

tesseract::double_VAR_H ( textord_words_minlarge  )

◆ double_VAR_H() [47/55]

tesseract::double_VAR_H ( textord_words_pitchsd_threshold  )

◆ double_VAR_H() [48/55]

tesseract::double_VAR_H ( textord_wordstats_smooth_factor  )

◆ double_VAR_H() [49/55]

tesseract::double_VAR_H ( textord_xheight_error_margin  )

◆ double_VAR_H() [50/55]

tesseract::double_VAR_H ( textord_xheight_mode_fraction  )

◆ double_VAR_H() [51/55]

tesseract::double_VAR_H ( words_default_fixed_limit  )

◆ double_VAR_H() [52/55]

tesseract::double_VAR_H ( words_default_fixed_space  )

◆ double_VAR_H() [53/55]

tesseract::double_VAR_H ( words_default_prop_nonspace  )

◆ double_VAR_H() [54/55]

tesseract::double_VAR_H ( words_initial_lower  )

◆ double_VAR_H() [55/55]

tesseract::double_VAR_H ( words_initial_upper  )

◆ draw_blob_edges()

void tesseract::draw_blob_edges ( TBLOB blob)

Definition at line 67 of file plotedges.cpp.

67 {
69 LIST edge_list = NIL_LIST;
70 for (TESSLINE *ol = blob->outlines; ol != nullptr; ol = ol->next) {
71 edge_list = push(edge_list, ol->loop);
72 }
73 display_edgepts(edge_list);
74 destroy(edge_list);
75 }
76}
LIST destroy(LIST list)
Definition: oldlist.cpp:121
bool wordrec_display_splits
Definition: split.cpp:41
void display_edgepts(LIST outlines)
Definition: plotedges.cpp:47

◆ draw_meanlines()

void tesseract::draw_meanlines ( TO_BLOCK block,
float  gradient,
int32_t  left,
ScrollView::Color  colour,
FCOORD  rotation 
)

Definition at line 203 of file drawtord.cpp.

209 {
210 FCOORD plot_pt; // point to plot
211 // rows
212 TO_ROW_IT row_it = block->get_rows();
213 TO_ROW *row; // current row
214 BLOBNBOX_IT blob_it; // blobs
215 float right; // end of row
216 to_win->Pen(colour);
217 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
218 row = row_it.data();
219 blob_it.set_to_list(row->blob_list());
220 blob_it.move_to_last();
221 right = blob_it.data()->bounding_box().right();
222 plot_pt = FCOORD(static_cast<float>(left), gradient * left + row->parallel_c() + row->xheight);
223 plot_pt.rotate(rotation);
224 to_win->SetCursor(plot_pt.x(), plot_pt.y());
225 plot_pt = FCOORD(right, gradient * right + row->parallel_c() + row->xheight);
226 plot_pt.rotate(rotation);
227 to_win->DrawTo(plot_pt.x(), plot_pt.y());
228 }
229}
void rotate(const FCOORD vec)
Definition: points.h:712
float y() const
Definition: points.h:209
float x() const
Definition: points.h:206

◆ draw_occupation()

void tesseract::draw_occupation ( int32_t  xleft,
int32_t  ybottom,
int32_t  min_y,
int32_t  max_y,
int32_t  occupation[],
int32_t  thresholds[] 
)

Definition at line 161 of file drawtord.cpp.

167 {
168 int32_t line_index; // pixel coord
169 ScrollView::Color colour; // of histogram
170 auto fleft = static_cast<float>(xleft); // float version
171
172 colour = ScrollView::WHITE;
173 to_win->Pen(colour);
174 to_win->SetCursor(fleft, static_cast<float>(ybottom));
175 for (line_index = min_y; line_index <= max_y; line_index++) {
176 if (occupation[line_index - min_y] < thresholds[line_index - min_y]) {
177 if (colour != ScrollView::BLUE) {
178 colour = ScrollView::BLUE;
179 to_win->Pen(colour);
180 }
181 } else {
182 if (colour != ScrollView::WHITE) {
183 colour = ScrollView::WHITE;
184 to_win->Pen(colour);
185 }
186 }
187 to_win->DrawTo(fleft + occupation[line_index - min_y] / 10.0, static_cast<float>(line_index));
188 }
189 colour = ScrollView::STEEL_BLUE;
190 to_win->Pen(colour);
191 to_win->SetCursor(fleft, static_cast<float>(ybottom));
192 for (line_index = min_y; line_index <= max_y; line_index++) {
193 to_win->DrawTo(fleft + thresholds[line_index - min_y] / 10.0, static_cast<float>(line_index));
194 }
195}

◆ EMPTY_LIST()

constexpr ERRCODE tesseract::EMPTY_LIST ( "List is empty"  )
constexpr

◆ EMPTY_LLSQ()

constexpr ERRCODE tesseract::EMPTY_LLSQ ( "Can't delete from an empty LLSQ )
constexpr

◆ EqualIgnoringCaseAndTerminalPunct()

bool tesseract::EqualIgnoringCaseAndTerminalPunct ( const WERD_CHOICE word1,
const WERD_CHOICE word2 
)

Definition at line 773 of file ratngs.cpp.

773 {
774 const UNICHARSET *uchset = word1.unicharset();
775 if (word2.unicharset() != uchset) {
776 return false;
777 }
778 unsigned w1start, w1end;
779 word1.punct_stripped(&w1start, &w1end);
780 unsigned w2start, w2end;
781 word2.punct_stripped(&w2start, &w2end);
782 if (w1end - w1start != w2end - w2start) {
783 return false;
784 }
785 for (unsigned i = 0; i < w1end - w1start; i++) {
786 if (uchset->to_lower(word1.unichar_id(w1start + i)) !=
787 uchset->to_lower(word2.unichar_id(w2start + i))) {
788 return false;
789 }
790 }
791 return true;
792}
void punct_stripped(unsigned *start_core, unsigned *end_core) const
Definition: ratngs.cpp:367
UNICHAR_ID unichar_id(unsigned index) const
Definition: ratngs.h:299
const UNICHARSET * unicharset() const
Definition: ratngs.h:281
UNICHAR_ID to_lower(UNICHAR_ID unichar_id) const
Definition: unicharset.h:730

◆ EvaluateParagraphDetection()

void tesseract::EvaluateParagraphDetection ( const TextAndModel correct,
int  n,
const std::vector< PARA * > &  detector_output 
)

Definition at line 105 of file paragraphs_test.cc.

106 {
107 int incorrect_breaks = 0;
108 int missed_breaks = 0;
109 int poorly_matched_models = 0;
110 int bad_crowns = 0;
111 int bad_list_items = 0;
112 ASSERT_EQ(detector_output.size(), n);
113 for (int i = 1; i < n; i++) {
114 bool has_break = correct[i].model_type != PCONT;
115 bool detected_break = (detector_output[i - 1] != detector_output[i]);
116 if (has_break && !detected_break) {
117 missed_breaks++;
118 }
119 if (detected_break && !has_break) {
120 incorrect_breaks++;
121 }
122 if (has_break) {
123 if (correct[i].model_type == PNONE) {
124 if (detector_output[i]->model != nullptr) {
125 poorly_matched_models++;
126 }
127 } else {
128 if (correct[i].model.justification() != kUnknown &&
129 (detector_output[i]->model == nullptr ||
130 !correct[i].model.Comparable(*detector_output[i]->model))) {
131 poorly_matched_models++;
132 }
133 }
134 if (correct[i].is_very_first_or_continuation ^
135 detector_output[i]->is_very_first_or_continuation) {
136 bad_crowns++;
137 }
138 if (correct[i].is_list_item ^ detector_output[i]->is_list_item) {
139 bad_list_items++;
140 }
141 }
142 }
143 EXPECT_EQ(incorrect_breaks, 0);
144 EXPECT_EQ(missed_breaks, 0);
145 EXPECT_EQ(poorly_matched_models, 0);
146 EXPECT_EQ(bad_list_items, 0);
147 EXPECT_EQ(bad_crowns, 0);
148 if (incorrect_breaks || missed_breaks || poorly_matched_models || bad_list_items || bad_crowns) {
149 std::vector<std::string> dbg_lines;
150 dbg_lines.emplace_back("# ==========================");
151 dbg_lines.emplace_back("# Correct paragraph breaks:");
152 dbg_lines.emplace_back("# ==========================");
153 for (int i = 0; i < n; i++) {
154 if (correct[i].model_type != PCONT) {
155 std::string s = std::string(correct[i].ascii) + " # " +
156 correct[i].model.ToString() +
157 (correct[i].is_very_first_or_continuation ? " crown" : "") +
158 (correct[i].is_list_item ? " li" : "");
159 dbg_lines.push_back(s);
160 } else {
161 dbg_lines.emplace_back(correct[i].ascii);
162 }
163 }
164 dbg_lines.emplace_back("");
165 dbg_lines.emplace_back("# ==========================");
166 dbg_lines.emplace_back("# Paragraph detector output:");
167 dbg_lines.emplace_back("# ==========================");
168 for (int i = 0; i < n; i++) {
169 std::string annotation;
170 if (i == 0 || (detector_output[i - 1] != detector_output[i])) {
171 if (detector_output[i] && detector_output[i]->model) {
172 annotation +=
173 " # " + detector_output[i]->model->ToString() +
174 (detector_output[i]->is_very_first_or_continuation ? " crown" : "") +
175 (detector_output[i]->is_list_item ? " li" : "");
176 } else {
177 annotation = " # Unmodeled paragraph.";
178 }
179 }
180 std::string s = correct[i].ascii + annotation;
181 dbg_lines.push_back(s);
182 }
183 std::string s;
184 for (auto &dbg_line : dbg_lines) {
185 s += dbg_line + "\n";
186 }
187 LOG(INFO) << "Discrepancy!\n" << s;
188 }
189}
@ LOG
@ INFO
Definition: log.h:28
#define ASSERT_EQ(val1, val2)
Definition: gtest.h:2073
const ParagraphJustification kUnknown
bool Comparable(const ParagraphModel &other) const
Definition: ocrpara.cpp:73
std::string ToString() const
Definition: ocrpara.cpp:85
TextModelInputType model_type

◆ expand_rows()

void tesseract::expand_rows ( ICOORD  page_tr,
TO_BLOCK block,
float  gradient,
FCOORD  rotation,
int32_t  block_edge,
bool  testing_on 
)

Definition at line 976 of file makerow.cpp.

983 {
984 bool swallowed_row; // eaten a neighbour
985 float y_max, y_min; // new row limits
986 float y_bottom, y_top; // allowed limits
987 TO_ROW *test_row; // next row
988 TO_ROW *row; // current row
989 // iterators
990 BLOBNBOX_IT blob_it = &block->blobs;
991 TO_ROW_IT row_it = block->get_rows();
992
993#ifndef GRAPHICS_DISABLED
994 if (textord_show_expanded_rows && testing_on) {
995 if (to_win == nullptr) {
996 create_to_win(page_tr);
997 }
998 }
999#endif
1000
1001 adjust_row_limits(block); // shift min,max.
1003 if (block->get_rows()->empty()) {
1004 return;
1005 }
1006 compute_row_stats(block, textord_show_expanded_rows && testing_on);
1007 }
1008 assign_blobs_to_rows(block, &gradient, 4, true, false, false);
1009 // get real membership
1010 if (block->get_rows()->empty()) {
1011 return;
1012 }
1013 fit_parallel_rows(block, gradient, rotation, block_edge,
1014 textord_show_expanded_rows && testing_on);
1016 compute_row_stats(block, textord_show_expanded_rows && testing_on);
1017 }
1018 row_it.move_to_last();
1019 do {
1020 row = row_it.data();
1021 y_max = row->max_y(); // get current limits
1022 y_min = row->min_y();
1023 y_bottom = row->intercept() - block->line_size * textord_expansion_factor *
1025 y_top = row->intercept() +
1026 block->line_size * textord_expansion_factor *
1028 if (y_min > y_bottom) { // expansion allowed
1029 if (textord_show_expanded_rows && testing_on) {
1030 tprintf("Expanding bottom of row at %f from %f to %f\n", row->intercept(), y_min, y_bottom);
1031 }
1032 // expandable
1033 swallowed_row = true;
1034 while (swallowed_row && !row_it.at_last()) {
1035 swallowed_row = false;
1036 // get next one
1037 test_row = row_it.data_relative(1);
1038 // overlaps space
1039 if (test_row->max_y() > y_bottom) {
1040 if (test_row->min_y() > y_bottom) {
1041 if (textord_show_expanded_rows && testing_on) {
1042 tprintf("Eating row below at %f\n", test_row->intercept());
1043 }
1044 row_it.forward();
1045#ifndef GRAPHICS_DISABLED
1046 if (textord_show_expanded_rows && testing_on) {
1047 plot_parallel_row(test_row, gradient, block_edge, ScrollView::WHITE, rotation);
1048 }
1049#endif
1050 blob_it.set_to_list(row->blob_list());
1051 blob_it.add_list_after(test_row->blob_list());
1052 // swallow complete row
1053 delete row_it.extract();
1054 row_it.backward();
1055 swallowed_row = true;
1056 } else if (test_row->max_y() < y_min) {
1057 // shorter limit
1058 y_bottom = test_row->max_y();
1059 if (textord_show_expanded_rows && testing_on) {
1060 tprintf("Truncating limit to %f due to touching row at %f\n", y_bottom,
1061 test_row->intercept());
1062 }
1063 } else {
1064 y_bottom = y_min; // can't expand it
1065 if (textord_show_expanded_rows && testing_on) {
1066 tprintf("Not expanding limit beyond %f due to touching row at %f\n", y_bottom,
1067 test_row->intercept());
1068 }
1069 }
1070 }
1071 }
1072 y_min = y_bottom; // expand it
1073 }
1074 if (y_max < y_top) { // expansion allowed
1075 if (textord_show_expanded_rows && testing_on) {
1076 tprintf("Expanding top of row at %f from %f to %f\n", row->intercept(), y_max, y_top);
1077 }
1078 swallowed_row = true;
1079 while (swallowed_row && !row_it.at_first()) {
1080 swallowed_row = false;
1081 // get one above
1082 test_row = row_it.data_relative(-1);
1083 if (test_row->min_y() < y_top) {
1084 if (test_row->max_y() < y_top) {
1085 if (textord_show_expanded_rows && testing_on) {
1086 tprintf("Eating row above at %f\n", test_row->intercept());
1087 }
1088 row_it.backward();
1089 blob_it.set_to_list(row->blob_list());
1090#ifndef GRAPHICS_DISABLED
1091 if (textord_show_expanded_rows && testing_on) {
1092 plot_parallel_row(test_row, gradient, block_edge, ScrollView::WHITE, rotation);
1093 }
1094#endif
1095 blob_it.add_list_after(test_row->blob_list());
1096 // swallow complete row
1097 delete row_it.extract();
1098 row_it.forward();
1099 swallowed_row = true;
1100 } else if (test_row->min_y() < y_max) {
1101 // shorter limit
1102 y_top = test_row->min_y();
1103 if (textord_show_expanded_rows && testing_on) {
1104 tprintf("Truncating limit to %f due to touching row at %f\n", y_top,
1105 test_row->intercept());
1106 }
1107 } else {
1108 y_top = y_max; // can't expand it
1109 if (textord_show_expanded_rows && testing_on) {
1110 tprintf("Not expanding limit beyond %f due to touching row at %f\n", y_top,
1111 test_row->intercept());
1112 }
1113 }
1114 }
1115 }
1116 y_max = y_top;
1117 }
1118 // new limits
1119 row->set_limits(y_min, y_max);
1120 row_it.backward();
1121 } while (!row_it.at_last());
1122}
void compute_row_stats(TO_BLOCK *block, bool testing_on)
Definition: makerow.cpp:1163
void adjust_row_limits(TO_BLOCK *block)
Definition: makerow.cpp:1129

◆ ExpectGraphemeModeResults()

void tesseract::ExpectGraphemeModeResults ( const std::string &  str,
UnicodeNormMode  u_mode,
int  unicode_count,
int  glyph_count,
int  grapheme_count,
const std::string &  target_str 
)
inline

Definition at line 48 of file normstrngs_test.h.

50 {
51 std::vector<std::string> glyphs;
52 std::string s;
54 u_mode, OCRNorm::kNone, GraphemeNormMode::kIndividualUnicodes, true, str.c_str(), &glyphs));
55 EXPECT_EQ(glyphs.size(), unicode_count) << PrintStringVectorWithUnicodes(glyphs);
56 for (auto &glyph : glyphs) {
57 s += glyph;
58 }
59 EXPECT_EQ(target_str, s);
60 EXPECT_TRUE(NormalizeCleanAndSegmentUTF8(u_mode, OCRNorm::kNone, GraphemeNormMode::kGlyphSplit,
61 true, str.c_str(), &glyphs));
62 EXPECT_EQ(glyphs.size(), glyph_count) << PrintStringVectorWithUnicodes(glyphs);
63 s.clear();
64 for (auto &glyph : glyphs) {
65 s += glyph;
66 }
67 EXPECT_EQ(target_str, s);
68 EXPECT_TRUE(NormalizeCleanAndSegmentUTF8(u_mode, OCRNorm::kNone, GraphemeNormMode::kCombined,
69 true, str.c_str(), &glyphs));
70 EXPECT_EQ(glyphs.size(), grapheme_count) << PrintStringVectorWithUnicodes(glyphs);
71 s.clear();
72 for (auto &glyph : glyphs) {
73 s += glyph;
74 }
75 EXPECT_EQ(target_str, s);
76 EXPECT_TRUE(NormalizeCleanAndSegmentUTF8(u_mode, OCRNorm::kNone, GraphemeNormMode::kSingleString,
77 true, str.c_str(), &glyphs));
78 EXPECT_EQ(glyphs.size(), 1) << PrintStringVectorWithUnicodes(glyphs);
79 EXPECT_EQ(target_str, glyphs[0]);
80 std::string result;
82 NormalizeUTF8String(u_mode, OCRNorm::kNone, GraphemeNorm::kNormalize, str.c_str(), &result));
83 EXPECT_EQ(target_str, result);
84}
#define EXPECT_TRUE(condition)
Definition: gtest.h:1982
std::string PrintStringVectorWithUnicodes(const std::vector< std::string > &glyphs)
bool NormalizeCleanAndSegmentUTF8(UnicodeNormMode u_mode, OCRNorm ocr_normalize, GraphemeNormMode g_mode, bool report_errors, const char *str8, std::vector< std::string > *graphemes)
Definition: normstrngs.cpp:179
bool NormalizeUTF8String(UnicodeNormMode u_mode, OCRNorm ocr_normalize, GraphemeNorm grapheme_normalize, const char *str8, std::string *normalized)
Definition: normstrngs.cpp:152

◆ extract_edges()

void tesseract::extract_edges ( Image  pix,
BLOCK block 
)

Definition at line 347 of file edgblob.cpp.

348 { // block to scan
349 C_OUTLINE_LIST outlines; // outlines in block
350 C_OUTLINE_IT out_it = &outlines;
351
352 block_edges(pix, &(block->pdblk), &out_it);
353 ICOORD bleft; // block box
354 ICOORD tright;
355 block->pdblk.bounding_box(bleft, tright);
356 // make blobs
357 outlines_to_blobs(block, bleft, tright, &outlines);
358}
void block_edges(Image t_pix, PDBLK *block, C_OUTLINE_IT *outline_it)
Definition: scanedg.cpp:62
void outlines_to_blobs(BLOCK *block, ICOORD bleft, ICOORD tright, C_OUTLINE_LIST *outlines)
Definition: edgblob.cpp:460

◆ ExtractBlobsFromSegmentation()

void tesseract::ExtractBlobsFromSegmentation ( BLOCK_LIST *  blocks,
C_BLOB_LIST *  output_blob_list 
)

Definition at line 440 of file ocrblock.cpp.

440 {
441 C_BLOB_IT return_list_it(output_blob_list);
442 BLOCK_IT block_it(blocks);
443 for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
444 BLOCK *block = block_it.data();
445 ROW_IT row_it(block->row_list());
446 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
447 ROW *row = row_it.data();
448 // Iterate over all werds in the row.
449 WERD_IT werd_it(row->word_list());
450 for (werd_it.mark_cycle_pt(); !werd_it.cycled_list(); werd_it.forward()) {
451 WERD *werd = werd_it.data();
452 return_list_it.move_to_last();
453 return_list_it.add_list_after(werd->cblob_list());
454 return_list_it.move_to_last();
455 return_list_it.add_list_after(werd->rej_cblob_list());
456 }
457 }
458 }
459}
ROW_LIST * row_list()
get rows
Definition: ocrblock.h:111
WERD_LIST * word_list()
Definition: ocrrow.h:57
C_BLOB_LIST * rej_cblob_list()
Definition: werd.h:91
C_BLOB_LIST * cblob_list()
Definition: werd.h:96

◆ ExtractCharNormFeatures()

FEATURE_SET tesseract::ExtractCharNormFeatures ( const INT_FX_RESULT_STRUCT fx_info)

Return the character normalization feature for a blob.

The features returned are in a scale where the x-height has been normalized to live in the region y = [-0.25 .. 0.25]. Example ranges for English below are based on the Linux font collection on 2009-12-04:

  • Params[CharNormY]
    • The y coordinate of the grapheme's centroid.
    • English: [-0.27, 0.71]
  • Params[CharNormLength]
    • The length of the grapheme's outline (tiny segments discarded), divided by 10.0=LENGTH_COMPRESSION.
    • English: [0.16, 0.85]
  • Params[CharNormRx]
    • The radius of gyration about the x axis, as measured from CharNormY.
    • English: [0.011, 0.34]
  • Params[CharNormRy]
    • The radius of gyration about the y axis, as measured from the x center of the grapheme's bounding box.
    • English: [0.011, 0.31]

Definition at line 56 of file normfeat.cpp.

56 {
57 auto feature_set = new FEATURE_SET_STRUCT(1);
58 auto feature = new FEATURE_STRUCT(&CharNormDesc);
59
60 feature->Params[CharNormY] = MF_SCALE_FACTOR * (fx_info.Ymean - kBlnBaselineOffset);
61 feature->Params[CharNormLength] = MF_SCALE_FACTOR * fx_info.Length / LENGTH_COMPRESSION;
62 feature->Params[CharNormRx] = MF_SCALE_FACTOR * fx_info.Rx;
63 feature->Params[CharNormRy] = MF_SCALE_FACTOR * fx_info.Ry;
64
65 AddFeature(feature_set, feature);
66
67 return feature_set;
68} /* ExtractCharNormFeatures */
const FEATURE_DESC_STRUCT CharNormDesc

◆ ExtractMicroFeature()

MicroFeature tesseract::ExtractMicroFeature ( MFOUTLINE  Start,
MFOUTLINE  End 
)

This routine computes the feature parameters which describe the micro-feature that starts and Start and ends at End. A new micro-feature is allocated, filled with the feature parameters, and returned. The routine assumes that Start and End are not the same point. If they are the same point, nullptr is returned, a warning message is printed, and the current outline is dumped to stdout.

Parameters
Startstarting point of micro-feature
Endending point of micro-feature
Returns
New micro-feature or nullptr if the feature was rejected.
Note
Globals: none

Definition at line 127 of file mfx.cpp.

127 {
128 MFEDGEPT *P1, *P2;
129
130 P1 = PointAt(Start);
131 P2 = PointAt(End);
132
133 MicroFeature NewFeature;
134 NewFeature[(int)MicroFeatureParameter::MFXPosition] = AverageOf(P1->Point.x, P2->Point.x);
135 NewFeature[(int)MicroFeatureParameter::MFYPosition] = AverageOf(P1->Point.y, P2->Point.y);
136 NewFeature[(int)MicroFeatureParameter::MFLength] = DistanceBetween(P1->Point, P2->Point);
137 NewFeature[(int)MicroFeatureParameter::MFDirection] = NormalizedAngleFrom(&P1->Point, &P2->Point, 1.0);
138 NewFeature[(int)MicroFeatureParameter::MFBulge1] = 0.0f; // deprecated
139 NewFeature[(int)MicroFeatureParameter::MFBulge2] = 0.0f; // deprecated
140
141 return NewFeature;
142} /* ExtractMicroFeature */
std::array< float,(int) MicroFeatureParameter::MFCount > MicroFeature
Definition: mfdefs.h:36

◆ ExtractMicros()

FEATURE_SET tesseract::ExtractMicros ( TBLOB Blob,
const DENORM cn_denorm 
)

Call the old micro-feature extractor and then copy the features into the new format. Then deallocate the old micro-features.

Parameters
Blobblob to extract micro-features from
cn_denormcontrol parameter to feature extractor.
Returns
Micro-features for Blob.

Definition at line 41 of file mf.cpp.

41 {
42 auto features = BlobMicroFeatures(Blob, cn_denorm);
43 if (features.empty()) {
44 return nullptr;
45 }
46 int n = 0;
47 for ([[maybe_unused]] auto &f: features) {
48 ++n;
49 }
50 auto FeatureSet = new FEATURE_SET_STRUCT(n);
51
52 for (auto &f : features) {
53 auto Feature = new FEATURE_STRUCT(&MicroFeatureDesc);
54 for (int i = 0; i < (int)MicroFeatureParameter::MFCount; ++i)
55 Feature->Params[i] = f[i];
56 // Bulge features are deprecated and should not be used. Set to 0.
57 Feature->Params[(int)MicroFeatureParameter::MFBulge1] = 0.0f;
58 Feature->Params[(int)MicroFeatureParameter::MFBulge2] = 0.0f;
59
60#ifndef _WIN32
61 // Assert that feature parameters are well defined.
62 for (int i = 0; i < Feature->Type->NumParams; i++) {
63 ASSERT_HOST(!std::isnan(Feature->Params[i]));
64 }
65#endif
66
67 AddFeature(FeatureSet, Feature);
68 }
69 return FeatureSet;
70} /* ExtractMicros */
MICROFEATURES BlobMicroFeatures(TBLOB *Blob, const DENORM &cn_denorm)
Definition: mfx.cpp:54
const FEATURE_DESC_STRUCT MicroFeatureDesc

◆ FeatureDirection()

TESS_API FCOORD tesseract::FeatureDirection ( uint8_t  theta)

Definition at line 70 of file intfx.cpp.

70 {
71 return FCOORD(cos_table[theta], sin_table[theta]);
72}

◆ fill_heights()

void tesseract::fill_heights ( TO_ROW row,
float  gradient,
int  min_height,
int  max_height,
STATS heights,
STATS floating_heights 
)

Definition at line 1418 of file makerow.cpp.

1419 {
1420 float xcentre; // centre of blob
1421 float top; // top y coord of blob
1422 float height; // height of blob
1423 BLOBNBOX *blob; // current blob
1424 int repeated_set;
1425 BLOBNBOX_IT blob_it = row->blob_list();
1426 if (blob_it.empty()) {
1427 return; // no blobs in this row
1428 }
1429 bool has_rep_chars = row->rep_chars_marked() && row->num_repeated_sets() > 0;
1430 do {
1431 blob = blob_it.data();
1432 if (!blob->joined_to_prev()) {
1433 xcentre = (blob->bounding_box().left() + blob->bounding_box().right()) / 2.0f;
1434 top = blob->bounding_box().top();
1435 height = blob->bounding_box().height();
1437 top -= row->baseline.y(xcentre);
1438 } else {
1439 top -= gradient * xcentre + row->parallel_c();
1440 }
1441 if (top >= min_height && top <= max_height) {
1442 heights->add(static_cast<int32_t>(floor(top + 0.5)), 1);
1443 if (height / top < textord_min_blob_height_fraction) {
1444 floating_heights->add(static_cast<int32_t>(floor(top + 0.5)), 1);
1445 }
1446 }
1447 }
1448 // Skip repeated chars, since they are likely to skew the height stats.
1449 if (has_rep_chars && blob->repeated_set() != 0) {
1450 repeated_set = blob->repeated_set();
1451 blob_it.forward();
1452 while (!blob_it.at_first() && blob_it.data()->repeated_set() == repeated_set) {
1453 blob_it.forward();
1455 tprintf("Skipping repeated char when computing xheight\n");
1456 }
1457 }
1458 } else {
1459 blob_it.forward();
1460 }
1461 } while (!blob_it.at_first());
1462}
double textord_min_blob_height_fraction
Definition: makerow.cpp:85
bool textord_fix_xheight_bug
Definition: makerow.cpp:57
int repeated_set() const
Definition: blobbox.h:271
bool rep_chars_marked() const
Definition: blobbox.h:637
QSPLINE baseline
Definition: blobbox.h:676
int num_repeated_sets() const
Definition: blobbox.h:643
double y(double x) const
Definition: quspline.cpp:203

◆ FillABC()

TESS_API void tesseract::FillABC ( PROTO_STRUCT Proto)

Definition at line 103 of file protos.cpp.

103 {
104 float Slope, Intercept, Normalizer;
105
106 Slope = tan(Proto->Angle * 2.0 * M_PI);
107 Intercept = Proto->Y - Slope * Proto->X;
108 Normalizer = 1.0 / sqrt(Slope * Slope + 1.0);
109 Proto->A = Slope * Normalizer;
110 Proto->B = -Normalizer;
111 Proto->C = Intercept * Normalizer;
112}

◆ FillerDone()

bool tesseract::FillerDone ( TABLE_FILLER Filler)

Return true if the specified table filler is done, i.e. if it has no more lines to fill.

Parameters
Fillertable filler to check if done
Returns
true if no more lines to fill, false otherwise.
Note
Globals: none

Definition at line 1063 of file intproto.cpp.

1063 {
1064 FILL_SWITCH *Next;
1065
1066 Next = &(Filler->Switch[Filler->NextSwitch]);
1067
1068 return Filler->X > Next->X && Next->Type == LastSwitch;
1069
1070} /* FillerDone */
FILL_SWITCH Switch[MAX_NUM_SWITCHES]
Definition: intproto.cpp:85

◆ FillPPCircularBits()

void tesseract::FillPPCircularBits ( uint32_t  ParamTable[NUM_PP_BUCKETS][WERDS_PER_PP_VECTOR],
int  Bit,
float  Center,
float  Spread,
bool  debug 
)

This routine sets Bit in each bit vector whose bucket lies within the range Center +- Spread. The fill is done for a circular dimension, i.e. bucket 0 is adjacent to the last bucket. It is assumed that Center and Spread are expressed in a circular coordinate system whose range is 0 to 1.

Parameters
ParamTabletable of bit vectors, one per param bucket
Bitbit position in vectors to be filled
Centercenter of filled area
Spreadspread of filled area
debugdebug flag

Definition at line 1085 of file intproto.cpp.

1086 {
1087 int i, FirstBucket, LastBucket;
1088
1089 if (Spread > 0.5) {
1090 Spread = 0.5;
1091 }
1092
1093 FirstBucket = static_cast<int>(std::floor((Center - Spread) * NUM_PP_BUCKETS));
1094 if (FirstBucket < 0) {
1095 FirstBucket += NUM_PP_BUCKETS;
1096 }
1097
1098 LastBucket = static_cast<int>(std::floor((Center + Spread) * NUM_PP_BUCKETS));
1099 if (LastBucket >= NUM_PP_BUCKETS) {
1100 LastBucket -= NUM_PP_BUCKETS;
1101 }
1102 if (debug) {
1103 tprintf("Circular fill from %d to %d", FirstBucket, LastBucket);
1104 }
1105 for (i = FirstBucket; true; CircularIncrement(i, NUM_PP_BUCKETS)) {
1106 SET_BIT(ParamTable[i], Bit);
1107
1108 /* exit loop after we have set the bit for the last bucket */
1109 if (i == LastBucket) {
1110 break;
1111 }
1112 }
1113
1114} /* FillPPCircularBits */
#define NUM_PP_BUCKETS
Definition: intproto.h:52

◆ FillPPLinearBits()

void tesseract::FillPPLinearBits ( uint32_t  ParamTable[NUM_PP_BUCKETS][WERDS_PER_PP_VECTOR],
int  Bit,
float  Center,
float  Spread,
bool  debug 
)

This routine sets Bit in each bit vector whose bucket lies within the range Center +- Spread. The fill is done for a linear dimension, i.e. there is no wrap-around for this dimension. It is assumed that Center and Spread are expressed in a linear coordinate system whose range is approximately 0 to 1. Values outside this range will be clipped.

Parameters
ParamTabletable of bit vectors, one per param bucket
Bitbit number being filled
Centercenter of filled area
Spreadspread of filled area
debugdebug flag

Definition at line 1130 of file intproto.cpp.

1131 {
1132 int i, FirstBucket, LastBucket;
1133
1134 FirstBucket = static_cast<int>(std::floor((Center - Spread) * NUM_PP_BUCKETS));
1135 if (FirstBucket < 0) {
1136 FirstBucket = 0;
1137 }
1138
1139 LastBucket = static_cast<int>(std::floor((Center + Spread) * NUM_PP_BUCKETS));
1140 if (LastBucket >= NUM_PP_BUCKETS) {
1141 LastBucket = NUM_PP_BUCKETS - 1;
1142 }
1143
1144 if (debug) {
1145 tprintf("Linear fill from %d to %d", FirstBucket, LastBucket);
1146 }
1147 for (i = FirstBucket; i <= LastBucket; i++) {
1148 SET_BIT(ParamTable[i], Bit);
1149 }
1150
1151} /* FillPPLinearBits */

◆ FilterEdgeNoise()

void tesseract::FilterEdgeNoise ( MFOUTLINE  Outline,
float  NoiseSegmentLength 
)

◆ find_best_dropout_row()

bool tesseract::find_best_dropout_row ( TO_ROW row,
int32_t  distance,
float  dist_limit,
int32_t  line_index,
TO_ROW_IT *  row_it,
bool  testing_on 
)

Definition at line 696 of file makerow.cpp.

703 {
704 int32_t next_index; // of neighbouring row
705 int32_t row_offset; // from current row
706 int32_t abs_dist; // absolute distance
707 int8_t row_inc; // increment to row_index
708 TO_ROW *next_row; // nextious row
709
710 if (testing_on) {
711 tprintf("Row at %g(%g), dropout dist=%d,", row->intercept(), row->parallel_c(), distance);
712 }
713 if (distance < 0) {
714 row_inc = 1;
715 abs_dist = -distance;
716 } else {
717 row_inc = -1;
718 abs_dist = distance;
719 }
720 if (abs_dist > dist_limit) {
721 if (testing_on) {
722 tprintf(" too far - deleting\n");
723 }
724 return true;
725 }
726 if ((distance < 0 && !row_it->at_last()) || (distance >= 0 && !row_it->at_first())) {
727 row_offset = row_inc;
728 do {
729 next_row = row_it->data_relative(row_offset);
730 next_index = static_cast<int32_t>(std::floor(next_row->intercept()));
731 if ((distance < 0 && next_index < line_index &&
732 next_index > line_index + distance + distance) ||
733 (distance >= 0 && next_index > line_index &&
734 next_index < line_index + distance + distance)) {
735 if (testing_on) {
736 tprintf(" nearer neighbour (%d) at %g\n", line_index + distance - next_index,
737 next_row->intercept());
738 }
739 return true; // other is nearer
740 } else if (next_index == line_index || next_index == line_index + distance + distance) {
741 if (row->believability() <= next_row->believability()) {
742 if (testing_on) {
743 tprintf(" equal but more believable at %g (%g/%g)\n", next_row->intercept(),
744 row->believability(), next_row->believability());
745 }
746 return true; // other is more believable
747 }
748 }
749 row_offset += row_inc;
750 } while ((next_index == line_index || next_index == line_index + distance + distance) &&
751 row_offset < row_it->length());
752 if (testing_on) {
753 tprintf(" keeping\n");
754 }
755 }
756 return false;
757}
float believability() const
Definition: blobbox.h:595

◆ find_cblob_hlimits()

void tesseract::find_cblob_hlimits ( C_BLOB blob,
float  bottomy,
float  topy,
float &  xmin,
float &  xmax 
)

Definition at line 579 of file blobbox.cpp.

584 {
585 int16_t stepindex; // current point
586 ICOORD pos; // current coords
587 ICOORD vec; // rotated step
588 C_OUTLINE *outline; // current outline
589 // outlines
590 C_OUTLINE_IT out_it = blob->out_list();
591
592 xmin = static_cast<float>(INT32_MAX);
593 xmax = static_cast<float>(-INT32_MAX);
594 for (out_it.mark_cycle_pt(); !out_it.cycled_list(); out_it.forward()) {
595 outline = out_it.data();
596 pos = outline->start_pos(); // get coords
597 for (stepindex = 0; stepindex < outline->pathlength(); stepindex++) {
598 // inside
599 if (pos.y() >= bottomy && pos.y() <= topy) {
600 UpdateRange(pos.x(), &xmin, &xmax);
601 }
602 vec = outline->step(stepindex);
603 pos += vec; // move to next
604 }
605 }
606}
void UpdateRange(const T1 &x, T2 *lower_bound, T2 *upper_bound)
Definition: helpers.h:117
ICOORD step(int index) const
Definition: coutln.h:143
const ICOORD & start_pos() const
Definition: coutln.h:147

◆ find_cblob_limits()

void tesseract::find_cblob_limits ( C_BLOB blob,
float  leftx,
float  rightx,
FCOORD  rotation,
float &  ymin,
float &  ymax 
)

Definition at line 504 of file blobbox.cpp.

510 {
511 int16_t stepindex; // current point
512 ICOORD pos; // current coords
513 ICOORD vec; // rotated step
514 C_OUTLINE *outline; // current outline
515 // outlines
516 C_OUTLINE_IT out_it = blob->out_list();
517
518 ymin = static_cast<float>(INT32_MAX);
519 ymax = static_cast<float>(-INT32_MAX);
520 for (out_it.mark_cycle_pt(); !out_it.cycled_list(); out_it.forward()) {
521 outline = out_it.data();
522 pos = outline->start_pos(); // get coords
523 pos.rotate(rotation);
524 for (stepindex = 0; stepindex < outline->pathlength(); stepindex++) {
525 // inside
526 if (pos.x() >= leftx && pos.x() <= rightx) {
527 UpdateRange(pos.y(), &ymin, &ymax);
528 }
529 vec = outline->step(stepindex);
530 vec.rotate(rotation);
531 pos += vec; // move to next
532 }
533 }
534}
void rotate(const FCOORD &vec)
Definition: points.h:511

◆ find_cblob_vlimits()

void tesseract::find_cblob_vlimits ( C_BLOB blob,
float  leftx,
float  rightx,
float &  ymin,
float &  ymax 
)

Definition at line 543 of file blobbox.cpp.

548 {
549 int16_t stepindex; // current point
550 ICOORD pos; // current coords
551 ICOORD vec; // rotated step
552 C_OUTLINE *outline; // current outline
553 // outlines
554 C_OUTLINE_IT out_it = blob->out_list();
555
556 ymin = static_cast<float>(INT32_MAX);
557 ymax = static_cast<float>(-INT32_MAX);
558 for (out_it.mark_cycle_pt(); !out_it.cycled_list(); out_it.forward()) {
559 outline = out_it.data();
560 pos = outline->start_pos(); // get coords
561 for (stepindex = 0; stepindex < outline->pathlength(); stepindex++) {
562 // inside
563 if (pos.x() >= leftx && pos.x() <= rightx) {
564 UpdateRange(pos.y(), &ymin, &ymax);
565 }
566 vec = outline->step(stepindex);
567 pos += vec; // move to next
568 }
569 }
570}

◆ find_lesser_parts()

void tesseract::find_lesser_parts ( TO_ROW row,
TBOX  blobcoords[],
int  blobcount,
char  partids[],
int  partsizes[],
int  partcount,
int  bestpart 
)

Definition at line 1262 of file oldbasel.cpp.

1270 {
1271 int blobindex; /*index of blob */
1272 int partition; /*current partition */
1273 int xcentre; /*centre of blob */
1274 int poscount; /*count of best up step */
1275 int negcount; /*count of best down step */
1276 float partsteps[MAXPARTS]; /*average step to part */
1277 float bestneg; /*best down step */
1278 int runlength; /*length of bad run */
1279 int biggestrun; /*biggest bad run */
1280
1281 biggestrun = 0;
1282 for (partition = 0; partition < partcount; partition++) {
1283 partsteps[partition] = 0.0; /*zero accumulators */
1284 }
1285 for (runlength = 0, blobindex = 0; blobindex < blobcount; blobindex++) {
1286 xcentre = (blobcoords[blobindex].left() + blobcoords[blobindex].right()) >> 1;
1287 /*in other parts */
1288 int part_id = static_cast<int>(static_cast<unsigned char>(partids[blobindex]));
1289 if (part_id != bestpart) {
1290 runlength++; /*run of non bests */
1291 if (runlength > biggestrun) {
1292 biggestrun = runlength;
1293 }
1294 partsteps[part_id] += blobcoords[blobindex].bottom() - row->baseline.y(xcentre);
1295 } else {
1296 runlength = 0;
1297 }
1298 }
1299 if (biggestrun > MAXBADRUN) {
1300 row->xheight = -1.0f; /*failed */
1301 } else {
1302 row->xheight = 1.0f; /*success */
1303 }
1304 poscount = negcount = 0;
1305 bestneg = 0.0; /*no step yet */
1306 for (partition = 0; partition < partcount; partition++) {
1307 if (partition != bestpart) {
1308 // by jetsoft divide by zero possible
1309 if (partsizes[partition] == 0) {
1310 partsteps[partition] = 0;
1311 } else {
1312 partsteps[partition] /= partsizes[partition];
1313 }
1314 //
1315
1316 if (partsteps[partition] >= MINASCRISE && partsizes[partition] > poscount) {
1317 poscount = partsizes[partition];
1318 }
1319 if (partsteps[partition] <= -MINASCRISE && partsizes[partition] > negcount) {
1320 /*ascender rise */
1321 bestneg = partsteps[partition];
1322 /*2nd most popular */
1323 negcount = partsizes[partition];
1324 }
1325 }
1326 }
1327 /*average x-height */
1328 partsteps[bestpart] /= blobcount;
1329 row->descdrop = bestneg;
1330}
#define MAXBADRUN
Definition: oldbasel.cpp:65
#define MINASCRISE
Definition: oldbasel.cpp:61

◆ find_repeated_chars()

void tesseract::find_repeated_chars ( TO_BLOCK block,
bool  testing_on 
)

Definition at line 1660 of file topitch.cpp.

1661 { // Debug mode.
1662 POLY_BLOCK *pb = block->block->pdblk.poly_block();
1663 if (pb != nullptr && !pb->IsText()) {
1664 return; // Don't find repeated chars in non-text blocks.
1665 }
1666
1667 TO_ROW *row;
1668 BLOBNBOX_IT box_it;
1669 BLOBNBOX_IT search_it; // forward search
1670 WERD *word; // new word
1671 TBOX word_box; // for plotting
1672 int blobcount, repeated_set;
1673
1674 TO_ROW_IT row_it = block->get_rows();
1675 if (row_it.empty()) {
1676 return; // empty block
1677 }
1678 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
1679 row = row_it.data();
1680 box_it.set_to_list(row->blob_list());
1681 if (box_it.empty()) {
1682 continue; // no blobs in this row
1683 }
1684 if (!row->rep_chars_marked()) {
1686 }
1687 if (row->num_repeated_sets() == 0) {
1688 continue; // nothing to do for this row
1689 }
1690 // new words
1691 WERD_IT word_it(&row->rep_words);
1692 do {
1693 if (box_it.data()->repeated_set() != 0 && !box_it.data()->joined_to_prev()) {
1694 blobcount = 1;
1695 repeated_set = box_it.data()->repeated_set();
1696 search_it = box_it;
1697 search_it.forward();
1698 while (!search_it.at_first() && search_it.data()->repeated_set() == repeated_set) {
1699 blobcount++;
1700 search_it.forward();
1701 }
1702 // After the call to make_real_word() all the blobs from this
1703 // repeated set will be removed from the blob list. box_it will be
1704 // set to point to the blob after the end of the extracted sequence.
1705 word = make_real_word(&box_it, blobcount, box_it.at_first(), 1);
1706 if (!box_it.empty() && box_it.data()->joined_to_prev()) {
1707 tprintf("Bad box joined to prev at");
1708 box_it.data()->bounding_box().print();
1709 tprintf("After repeated word:");
1710 word->bounding_box().print();
1711 }
1712 ASSERT_HOST(box_it.empty() || !box_it.data()->joined_to_prev());
1713 word->set_flag(W_REP_CHAR, true);
1714 word->set_flag(W_DONT_CHOP, true);
1715 word_it.add_after_then_move(word);
1716 } else {
1717 box_it.forward();
1718 }
1719 } while (!box_it.at_first());
1720 }
1721}
void mark_repeated_chars(TO_ROW *row)
Definition: makerow.cpp:2565
WERD * make_real_word(BLOBNBOX_IT *box_it, int32_t blobcount, bool bol, uint8_t blanks)
Definition: wordseg.cpp:559

◆ find_row_pitch()

bool tesseract::find_row_pitch ( TO_ROW row,
int32_t  maxwidth,
int32_t  dm_gap,
TO_BLOCK block,
int32_t  block_index,
int32_t  row_index,
bool  testing_on 
)

Definition at line 784 of file topitch.cpp.

792 {
793 bool used_dm_model; // looks like dot matrix
794 float min_space; // estimate threshold
795 float non_space; // gap size
796 float gap_iqr; // interquartile range
797 float pitch_iqr;
798 float dm_gap_iqr; // interquartile range
799 float dm_pitch_iqr;
800 float dm_pitch; // pitch with dm on
801 float pitch; // revised estimate
802 float initial_pitch; // guess at pitch
803 STATS gap_stats(0, maxwidth - 1);
804 // centre-centre
805 STATS pitch_stats(0, maxwidth - 1);
806
807 row->fixed_pitch = 0.0f;
808 initial_pitch = row->fp_space;
809 if (initial_pitch > row->xheight * (1 + words_default_fixed_limit)) {
810 initial_pitch = row->xheight; // keep pitch decent
811 }
812 non_space = row->fp_nonsp;
813 if (non_space > initial_pitch) {
814 non_space = initial_pitch;
815 }
816 min_space = (initial_pitch + non_space) / 2;
817
818 if (!count_pitch_stats(row, &gap_stats, &pitch_stats, initial_pitch, min_space, true, false,
819 dm_gap)) {
820 dm_gap_iqr = 0.0001f;
821 dm_pitch_iqr = maxwidth * 2.0f;
822 dm_pitch = initial_pitch;
823 } else {
824 dm_gap_iqr = gap_stats.ile(0.75) - gap_stats.ile(0.25);
825 dm_pitch_iqr = pitch_stats.ile(0.75) - pitch_stats.ile(0.25);
826 dm_pitch = pitch_stats.ile(0.5);
827 }
828 gap_stats.clear();
829 pitch_stats.clear();
830 if (!count_pitch_stats(row, &gap_stats, &pitch_stats, initial_pitch, min_space, true, false, 0)) {
831 gap_iqr = 0.0001f;
832 pitch_iqr = maxwidth * 3.0f;
833 } else {
834 gap_iqr = gap_stats.ile(0.75) - gap_stats.ile(0.25);
835 pitch_iqr = pitch_stats.ile(0.75) - pitch_stats.ile(0.25);
836 if (testing_on) {
837 tprintf(
838 "First fp iteration:initial_pitch=%g, gap_iqr=%g, pitch_iqr=%g, "
839 "pitch=%g\n",
840 initial_pitch, gap_iqr, pitch_iqr, pitch_stats.ile(0.5));
841 }
842 initial_pitch = pitch_stats.ile(0.5);
843 if (min_space > initial_pitch && count_pitch_stats(row, &gap_stats, &pitch_stats, initial_pitch,
844 initial_pitch, true, false, 0)) {
845 min_space = initial_pitch;
846 gap_iqr = gap_stats.ile(0.75) - gap_stats.ile(0.25);
847 pitch_iqr = pitch_stats.ile(0.75) - pitch_stats.ile(0.25);
848 if (testing_on) {
849 tprintf(
850 "Revised fp iteration:initial_pitch=%g, gap_iqr=%g, pitch_iqr=%g, "
851 "pitch=%g\n",
852 initial_pitch, gap_iqr, pitch_iqr, pitch_stats.ile(0.5));
853 }
854 initial_pitch = pitch_stats.ile(0.5);
855 }
856 }
858 tprintf("Blk=%d:Row=%d:%c:p_iqr=%g:g_iqr=%g:dm_p_iqr=%g:dm_g_iqr=%g:%c:", block_index,
859 row_index, 'X', pitch_iqr, gap_iqr, dm_pitch_iqr, dm_gap_iqr,
860 pitch_iqr > maxwidth && dm_pitch_iqr > maxwidth
861 ? 'D'
862 : (pitch_iqr * dm_gap_iqr <= dm_pitch_iqr * gap_iqr ? 'S' : 'M'));
863 }
864 if (pitch_iqr > maxwidth && dm_pitch_iqr > maxwidth) {
867 tprintf("\n");
868 }
869 return false; // insufficient data
870 }
871 if (pitch_iqr * dm_gap_iqr <= dm_pitch_iqr * gap_iqr) {
872 if (testing_on) {
873 tprintf(
874 "Choosing non dm version:pitch_iqr=%g, gap_iqr=%g, dm_pitch_iqr=%g, "
875 "dm_gap_iqr=%g\n",
876 pitch_iqr, gap_iqr, dm_pitch_iqr, dm_gap_iqr);
877 }
878 gap_iqr = gap_stats.ile(0.75) - gap_stats.ile(0.25);
879 pitch_iqr = pitch_stats.ile(0.75) - pitch_stats.ile(0.25);
880 pitch = pitch_stats.ile(0.5);
881 used_dm_model = false;
882 } else {
883 if (testing_on) {
884 tprintf(
885 "Choosing dm version:pitch_iqr=%g, gap_iqr=%g, dm_pitch_iqr=%g, "
886 "dm_gap_iqr=%g\n",
887 pitch_iqr, gap_iqr, dm_pitch_iqr, dm_gap_iqr);
888 }
889 gap_iqr = dm_gap_iqr;
890 pitch_iqr = dm_pitch_iqr;
891 pitch = dm_pitch;
892 used_dm_model = true;
893 }
895 tprintf("rev_p_iqr=%g:rev_g_iqr=%g:pitch=%g:", pitch_iqr, gap_iqr, pitch);
896 tprintf("p_iqr/g=%g:p_iqr/x=%g:iqr_res=%c:", pitch_iqr / gap_iqr, pitch_iqr / block->xheight,
897 pitch_iqr < gap_iqr * textord_fpiqr_ratio &&
898 pitch_iqr < block->xheight * textord_max_pitch_iqr &&
899 pitch < block->xheight * textord_words_default_maxspace
900 ? 'F'
901 : 'P');
902 }
903 if (pitch_iqr < gap_iqr * textord_fpiqr_ratio &&
904 pitch_iqr < block->xheight * textord_max_pitch_iqr &&
905 pitch < block->xheight * textord_words_default_maxspace) {
907 } else {
909 }
910 row->fixed_pitch = pitch;
911 row->kern_size = gap_stats.ile(0.5);
912 row->min_space = static_cast<int32_t>(row->fixed_pitch + non_space) / 2;
913 if (row->min_space > row->fixed_pitch) {
914 row->min_space = static_cast<int32_t>(row->fixed_pitch);
915 }
916 row->max_nonspace = row->min_space;
917 row->space_size = row->fixed_pitch;
918 row->space_threshold = (row->max_nonspace + row->min_space) / 2;
919 row->used_dm_model = used_dm_model;
920 return true;
921}
double textord_words_default_maxspace
Definition: tovars.cpp:33
bool count_pitch_stats(TO_ROW *row, STATS *gap_stats, STATS *pitch_stats, float initial_pitch, float min_space, bool ignore_outsize, bool split_outsize, int32_t dm_gap)
Definition: topitch.cpp:1008
bool textord_debug_pitch_metric
Definition: topitch.cpp:45
double textord_fpiqr_ratio
Definition: tovars.cpp:53
double textord_max_pitch_iqr
Definition: tovars.cpp:54
int32_t min_space
Definition: blobbox.h:669
int32_t max_nonspace
Definition: blobbox.h:670
bool used_dm_model
Definition: blobbox.h:653
int32_t space_threshold
Definition: blobbox.h:671

◆ find_top_modes()

void tesseract::find_top_modes ( STATS stats,
int  statnum,
int  modelist[],
int  modenum 
)

Definition at line 1508 of file oldbasel.cpp.

1512 {
1513 int mode_count;
1514 int last_i = 0;
1515 int last_max = INT32_MAX;
1516 int i;
1517 int mode;
1518 int total_max = 0;
1519 int mode_factor = textord_ocropus_mode ? kMinModeFactorOcropus : kMinModeFactor;
1520
1521 for (mode_count = 0; mode_count < modenum; mode_count++) {
1522 mode = 0;
1523 for (i = 0; i < statnum; i++) {
1524 if (stats->pile_count(i) > stats->pile_count(mode)) {
1525 if ((stats->pile_count(i) < last_max) ||
1526 ((stats->pile_count(i) == last_max) && (i > last_i))) {
1527 mode = i;
1528 }
1529 }
1530 }
1531 last_i = mode;
1532 last_max = stats->pile_count(last_i);
1533 total_max += last_max;
1534 if (last_max <= total_max / mode_factor) {
1535 mode = 0;
1536 }
1537 modelist[mode_count] = mode;
1538 }
1539}
const int kMinModeFactor
Definition: oldbasel.cpp:1506
const int kMinModeFactorOcropus
Definition: oldbasel.cpp:1505

◆ find_underlined_blobs()

void tesseract::find_underlined_blobs ( BLOBNBOX u_line,
QSPLINE baseline,
float  xheight,
float  baseline_offset,
ICOORDELT_LIST *  chop_cells 
)

Definition at line 158 of file underlin.cpp.

164 {
165 ICOORD blob_chop; // sides of blob
166 TBOX blob_box = u_line->bounding_box();
167 // cell iterator
168 ICOORDELT_IT cell_it = chop_cells;
169 STATS upper_proj(blob_box.left(), blob_box.right());
170 STATS middle_proj(blob_box.left(), blob_box.right());
171 STATS lower_proj(blob_box.left(), blob_box.right());
172 C_OUTLINE_IT out_it; // outlines of blob
173
174 ASSERT_HOST(u_line->cblob() != nullptr);
175
176 out_it.set_to_list(u_line->cblob()->out_list());
177 for (out_it.mark_cycle_pt(); !out_it.cycled_list(); out_it.forward()) {
178 vertical_cunderline_projection(out_it.data(), baseline, xheight, baseline_offset, &lower_proj,
179 &middle_proj, &upper_proj);
180 }
181
182 for (auto x = blob_box.left(); x < blob_box.right(); x++) {
183 if (middle_proj.pile_count(x) > 0) {
184 auto y = x + 1;
185 for (; y < blob_box.right() && middle_proj.pile_count(y) > 0; y++) {
186 ;
187 }
188 blob_chop = ICOORD(x, y);
189 cell_it.add_after_then_move(new ICOORDELT(blob_chop));
190 x = y;
191 }
192 }
193}
void vertical_cunderline_projection(C_OUTLINE *outline, QSPLINE *baseline, float xheight, float baseline_offset, STATS *lower_proj, STATS *middle_proj, STATS *upper_proj)
Definition: underlin.cpp:202

◆ FindClass()

TESS_COMMON_TRAINING_API MERGE_CLASS tesseract::FindClass ( LIST  List,
const std::string &  Label 
)

Definition at line 597 of file commontraining.cpp.

597 {
598 MERGE_CLASS MergeClass;
599
600 iterate(List) {
601 MergeClass = reinterpret_cast<MERGE_CLASS>(List->first_node());
602 if (MergeClass->Label == Label) {
603 return (MergeClass);
604 }
605 }
606 return (nullptr);
607
608} /* FindClass */

◆ FindDirectionChanges()

void tesseract::FindDirectionChanges ( MFOUTLINE  Outline,
float  MinSlope,
float  MaxSlope 
)

This routine searches through the specified outline, computes a slope for each vector in the outline, and marks each vector as having one of the following directions: N, S, E, W, NE, NW, SE, SW This information is then stored in the outline and the outline is returned.

Parameters
Outlinemicro-feature outline to analyze
MinSlopecontrols "snapping" of segments to horizontal
MaxSlopecontrols "snapping" of segments to vertical

Definition at line 104 of file mfoutline.cpp.

104 {
105 MFEDGEPT *Current;
106 MFEDGEPT *Last;
107 MFOUTLINE EdgePoint;
108
109 if (DegenerateOutline(Outline)) {
110 return;
111 }
112
113 Last = PointAt(Outline);
114 Outline = NextPointAfter(Outline);
115 EdgePoint = Outline;
116 do {
117 Current = PointAt(EdgePoint);
118 ComputeDirection(Last, Current, MinSlope, MaxSlope);
119
120 Last = Current;
121 EdgePoint = NextPointAfter(EdgePoint);
122 } while (EdgePoint != Outline);
123
124} /* FindDirectionChanges */
void ComputeDirection(MFEDGEPT *Start, MFEDGEPT *Finish, float MinSlope, float MaxSlope)
Definition: mfoutline.cpp:335

◆ FindList()

LABELEDLIST tesseract::FindList ( LIST  List,
const std::string &  Label 
)

This routine searches through a list of labeled lists to find a list with the specified label. If a matching labeled list cannot be found, nullptr is returned.

Parameters
Listlist to search
Labellabel to search for
Returns
Labeled list with the specified label or nullptr.
Note
Globals: none

Definition at line 302 of file commontraining.cpp.

302 {
303 LABELEDLIST LabeledList;
304
305 iterate(List) {
306 LabeledList = reinterpret_cast<LABELEDLIST>(List->first_node());
307 if (LabeledList->Label == Label) {
308 return (LabeledList);
309 }
310 }
311 return (nullptr);
312
313} /* FindList */

◆ FindMatchingChoice()

BLOB_CHOICE * tesseract::FindMatchingChoice ( UNICHAR_ID  char_id,
BLOB_CHOICE_LIST *  bc_list 
)

Definition at line 177 of file ratngs.cpp.

177 {
178 // Find the corresponding best BLOB_CHOICE.
179 BLOB_CHOICE_IT choice_it(bc_list);
180 for (choice_it.mark_cycle_pt(); !choice_it.cycled_list(); choice_it.forward()) {
181 BLOB_CHOICE *choice = choice_it.data();
182 if (choice->unichar_id() == char_id) {
183 return choice;
184 }
185 }
186 return nullptr;
187}
UNICHAR_ID unichar_id() const
Definition: ratngs.h:81

◆ FirstWordWouldHaveFit() [1/2]

bool tesseract::FirstWordWouldHaveFit ( const RowScratchRegisters before,
const RowScratchRegisters after 
)

Definition at line 1704 of file paragraphs.cpp.

1704 {
1705 if (before.ri_->num_words == 0 || after.ri_->num_words == 0) {
1706 return true;
1707 }
1708
1709 int available_space = before.lindent_;
1710 if (before.rindent_ > available_space) {
1711 available_space = before.rindent_;
1712 }
1713 available_space -= before.ri_->average_interword_space;
1714
1715 if (before.ri_->ltr) {
1716 return after.ri_->lword_box.width() < available_space;
1717 }
1718 return after.ri_->rword_box.width() < available_space;
1719}
IntAfterTypedTestSuiteP after
IntBeforeRegisterTypedTestSuiteP before

◆ FirstWordWouldHaveFit() [2/2]

bool tesseract::FirstWordWouldHaveFit ( const RowScratchRegisters before,
const RowScratchRegisters after,
tesseract::ParagraphJustification  justification 
)

Definition at line 1678 of file paragraphs.cpp.

1679 {
1680 if (before.ri_->num_words == 0 || after.ri_->num_words == 0) {
1681 return true;
1682 }
1683
1684 if (justification == JUSTIFICATION_UNKNOWN) {
1685 tprintf("Don't call FirstWordWouldHaveFit(r, s, JUSTIFICATION_UNKNOWN).\n");
1686 }
1687 int available_space;
1688 if (justification == JUSTIFICATION_CENTER) {
1689 available_space = before.lindent_ + before.rindent_;
1690 } else {
1691 available_space = before.OffsideIndent(justification);
1692 }
1693 available_space -= before.ri_->average_interword_space;
1694
1695 if (before.ri_->ltr) {
1696 return after.ri_->lword_box.width() < available_space;
1697 }
1698 return after.ri_->rword_box.width() < available_space;
1699}

◆ fit_lms_line()

void tesseract::fit_lms_line ( TO_ROW row)

Definition at line 296 of file makerow.cpp.

296 {
297 float m, c; // fitted line
299 BLOBNBOX_IT blob_it = row->blob_list();
300
301 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
302 const TBOX &box = blob_it.data()->bounding_box();
303 lms.Add(ICOORD((box.left() + box.right()) / 2, box.bottom()));
304 }
305 double error = lms.Fit(&m, &c);
306 row->set_line(m, c, error);
307}
void set_line(float new_m, float new_c, float new_error)
Definition: blobbox.h:612
void Add(const ICOORD &pt)
Definition: detlinefit.cpp:50
double Fit(ICOORD *pt1, ICOORD *pt2)
Definition: detlinefit.h:73

◆ fit_parallel_lms()

void tesseract::fit_parallel_lms ( float  gradient,
TO_ROW row 
)

Definition at line 1970 of file makerow.cpp.

1970 {
1971 float c; // fitted line
1972 int blobcount; // no of blobs
1974 BLOBNBOX_IT blob_it = row->blob_list();
1975
1976 blobcount = 0;
1977 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
1978 if (!blob_it.data()->joined_to_prev()) {
1979 const TBOX &box = blob_it.data()->bounding_box();
1980 lms.Add(ICOORD((box.left() + box.right()) / 2, box.bottom()));
1981 blobcount++;
1982 }
1983 }
1984 double error = lms.ConstrainedFit(gradient, &c);
1985 row->set_parallel_line(gradient, c, error);
1987 error = lms.Fit(&gradient, &c);
1988 }
1989 // set the other too
1990 row->set_line(gradient, c, error);
1991}
int textord_lms_line_trials
Definition: makerow.cpp:94
bool textord_straight_baselines
Definition: makerow.cpp:54
void set_parallel_line(float gradient, float new_c, float new_error)
Definition: blobbox.h:619
double ConstrainedFit(const FCOORD &direction, double min_dist, double max_dist, bool debug, ICOORD *line_pt)
Definition: detlinefit.cpp:133

◆ fit_parallel_rows()

void tesseract::fit_parallel_rows ( TO_BLOCK block,
float  gradient,
FCOORD  rotation,
int32_t  block_edge,
bool  testing_on 
)

Definition at line 1928 of file makerow.cpp.

1934 {
1935#ifndef GRAPHICS_DISABLED
1936 ScrollView::Color colour; // of row
1937#endif
1938 TO_ROW_IT row_it = block->get_rows();
1939
1940 row_it.move_to_first();
1941 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
1942 if (row_it.data()->blob_list()->empty()) {
1943 delete row_it.extract(); // nothing in it
1944 } else {
1945 fit_parallel_lms(gradient, row_it.data());
1946 }
1947 }
1948#ifndef GRAPHICS_DISABLED
1949 if (testing_on) {
1950 colour = ScrollView::RED;
1951 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
1952 plot_parallel_row(row_it.data(), gradient, block_edge, colour, rotation);
1953 colour = static_cast<ScrollView::Color>(colour + 1);
1954 if (colour > ScrollView::MAGENTA) {
1955 colour = ScrollView::RED;
1956 }
1957 }
1958 }
1959#endif
1960 row_it.sort(row_y_order); // may have gone out of order
1961}
void fit_parallel_lms(float gradient, TO_ROW *row)
Definition: makerow.cpp:1970

◆ fix_row_pitch()

void tesseract::fix_row_pitch ( TO_ROW bad_row,
TO_BLOCK bad_block,
TO_BLOCK_LIST *  blocks,
int32_t  row_target,
int32_t  block_target 
)

Definition at line 144 of file topitch.cpp.

148 { // number of block
149 int16_t mid_cuts;
150 int block_votes; // votes in block
151 int like_votes; // votes over page
152 int other_votes; // votes of unlike blocks
153 int block_index; // number of block
154 int row_index; // number of row
155 int maxwidth; // max pitch
156 TO_BLOCK_IT block_it = blocks; // block iterator
157 TO_BLOCK *block; // current block
158 TO_ROW *row; // current row
159 float sp_sd; // space deviation
160 STATS block_stats; // pitches in block
161 STATS like_stats; // pitches in page
162
163 block_votes = like_votes = other_votes = 0;
164 maxwidth = static_cast<int32_t>(ceil(bad_row->xheight * textord_words_maxspace));
165 if (bad_row->pitch_decision != PITCH_DEF_FIXED && bad_row->pitch_decision != PITCH_DEF_PROP) {
166 block_stats.set_range(0, maxwidth - 1);
167 like_stats.set_range(0, maxwidth - 1);
168 block_index = 1;
169 for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
170 block = block_it.data();
171 POLY_BLOCK *pb = block->block->pdblk.poly_block();
172 if (pb != nullptr && !pb->IsText()) {
173 continue; // Non text doesn't exist!
174 }
175 row_index = 1;
176 TO_ROW_IT row_it(block->get_rows());
177 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
178 row = row_it.data();
179 if ((bad_row->all_caps &&
180 row->xheight + row->ascrise <
181 (bad_row->xheight + bad_row->ascrise) * (1 + textord_pitch_rowsimilarity) &&
182 row->xheight + row->ascrise >
183 (bad_row->xheight + bad_row->ascrise) * (1 - textord_pitch_rowsimilarity)) ||
184 (!bad_row->all_caps &&
185 row->xheight < bad_row->xheight * (1 + textord_pitch_rowsimilarity) &&
186 row->xheight > bad_row->xheight * (1 - textord_pitch_rowsimilarity))) {
187 if (block_index == block_target) {
188 if (row->pitch_decision == PITCH_DEF_FIXED) {
189 block_votes += textord_words_veto_power;
190 block_stats.add(static_cast<int32_t>(row->fixed_pitch), textord_words_veto_power);
191 } else if (row->pitch_decision == PITCH_MAYBE_FIXED ||
193 block_votes++;
194 block_stats.add(static_cast<int32_t>(row->fixed_pitch), 1);
195 } else if (row->pitch_decision == PITCH_DEF_PROP) {
196 block_votes -= textord_words_veto_power;
197 } else if (row->pitch_decision == PITCH_MAYBE_PROP ||
199 block_votes--;
200 }
201 } else {
202 if (row->pitch_decision == PITCH_DEF_FIXED) {
203 like_votes += textord_words_veto_power;
204 like_stats.add(static_cast<int32_t>(row->fixed_pitch), textord_words_veto_power);
205 } else if (row->pitch_decision == PITCH_MAYBE_FIXED ||
207 like_votes++;
208 like_stats.add(static_cast<int32_t>(row->fixed_pitch), 1);
209 } else if (row->pitch_decision == PITCH_DEF_PROP) {
210 like_votes -= textord_words_veto_power;
211 } else if (row->pitch_decision == PITCH_MAYBE_PROP ||
213 like_votes--;
214 }
215 }
216 } else {
217 if (row->pitch_decision == PITCH_DEF_FIXED) {
218 other_votes += textord_words_veto_power;
219 } else if (row->pitch_decision == PITCH_MAYBE_FIXED ||
221 other_votes++;
222 } else if (row->pitch_decision == PITCH_DEF_PROP) {
223 other_votes -= textord_words_veto_power;
224 } else if (row->pitch_decision == PITCH_MAYBE_PROP ||
226 other_votes--;
227 }
228 }
229 row_index++;
230 }
231 block_index++;
232 }
233 if (block_votes > textord_words_veto_power) {
234 bad_row->fixed_pitch = block_stats.ile(0.5);
236 } else if (block_votes <= textord_words_veto_power && like_votes > 0) {
237 bad_row->fixed_pitch = like_stats.ile(0.5);
239 } else {
241 if (block_votes == 0 && like_votes == 0 && other_votes > 0 &&
243 tprintf(
244 "Warning:row %d of block %d set prop with no like rows against "
245 "trend\n",
246 row_target, block_target);
247 }
248 }
249 }
251 tprintf(":b_votes=%d:l_votes=%d:o_votes=%d", block_votes, like_votes, other_votes);
252 tprintf("x=%g:asc=%g\n", bad_row->xheight, bad_row->ascrise);
253 }
254 if (bad_row->pitch_decision == PITCH_CORR_FIXED) {
255 if (bad_row->fixed_pitch < textord_min_xheight) {
256 if (block_votes > 0) {
257 bad_row->fixed_pitch = block_stats.ile(0.5);
258 } else if (block_votes == 0 && like_votes > 0) {
259 bad_row->fixed_pitch = like_stats.ile(0.5);
260 } else {
261 tprintf("Warning:guessing pitch as xheight on row %d, block %d\n", row_target,
262 block_target);
263 bad_row->fixed_pitch = bad_row->xheight;
264 }
265 }
266 if (bad_row->fixed_pitch < textord_min_xheight) {
267 bad_row->fixed_pitch = (float)textord_min_xheight;
268 }
269 bad_row->kern_size = bad_row->fixed_pitch / 4;
270 bad_row->min_space = static_cast<int32_t>(bad_row->fixed_pitch * 0.6);
271 bad_row->max_nonspace = static_cast<int32_t>(bad_row->fixed_pitch * 0.4);
272 bad_row->space_threshold = (bad_row->min_space + bad_row->max_nonspace) / 2;
273 bad_row->space_size = bad_row->fixed_pitch;
274 if (bad_row->char_cells.empty() && !bad_row->blob_list()->empty()) {
275 tune_row_pitch(bad_row, &bad_row->projection, bad_row->projection_left,
276 bad_row->projection_right,
277 (bad_row->fixed_pitch + bad_row->max_nonspace * 3) / 4, bad_row->fixed_pitch,
278 sp_sd, mid_cuts, &bad_row->char_cells, false);
279 }
280 } else if (bad_row->pitch_decision == PITCH_CORR_PROP ||
281 bad_row->pitch_decision == PITCH_DEF_PROP) {
282 bad_row->fixed_pitch = 0.0f;
283 bad_row->char_cells.clear();
284 }
285}
int textord_words_veto_power
Definition: tovars.cpp:43
float tune_row_pitch(TO_ROW *row, STATS *projection, int16_t projection_left, int16_t projection_right, float space_size, float &initial_pitch, float &best_sp_sd, int16_t &best_mid_cuts, ICOORDELT_LIST *best_cells, bool testing_on)
Definition: topitch.cpp:1097
double textord_pitch_rowsimilarity
Definition: tovars.cpp:44
ICOORDELT_LIST char_cells
Definition: blobbox.h:675
STATS projection
Definition: blobbox.h:677
int16_t projection_left
Definition: blobbox.h:654
int16_t projection_right
Definition: blobbox.h:655
double ile(double frac) const
Definition: statistc.cpp:172
bool set_range(int32_t min_bucket_value, int32_t max_bucket_value)
Definition: statistc.cpp:59

◆ fixed_pitch_row()

bool tesseract::fixed_pitch_row ( TO_ROW row,
BLOCK block,
int32_t  block_index 
)

Definition at line 931 of file topitch.cpp.

934 {
935 const char *res_string; // pitch result
936 int16_t mid_cuts; // no of cheap cuts
937 float non_space; // gap size
938 float pitch_sd; // error on pitch
939 float sp_sd = 0.0f; // space sd
940
941 non_space = row->fp_nonsp;
942 if (non_space > row->fixed_pitch) {
943 non_space = row->fixed_pitch;
944 }
945 POLY_BLOCK *pb = block != nullptr ? block->pdblk.poly_block() : nullptr;
946 if (textord_all_prop || (pb != nullptr && !pb->IsText())) {
947 // Set the decision to definitely proportional.
948 pitch_sd = textord_words_def_prop * row->fixed_pitch;
950 } else {
951 pitch_sd = tune_row_pitch(row, &row->projection, row->projection_left, row->projection_right,
952 (row->fixed_pitch + non_space * 3) / 4, row->fixed_pitch, sp_sd,
953 mid_cuts, &row->char_cells, block_index == textord_debug_block);
954 if (pitch_sd < textord_words_pitchsd_threshold * row->fixed_pitch &&
955 ((pitsync_linear_version & 3) < 3 ||
956 ((pitsync_linear_version & 3) >= 3 &&
957 (row->used_dm_model || sp_sd > 20 || (pitch_sd == 0 && sp_sd > 10))))) {
958 if (pitch_sd < textord_words_def_fixed * row->fixed_pitch && !row->all_caps &&
959 ((pitsync_linear_version & 3) < 3 || sp_sd > 20)) {
961 } else {
963 }
964 } else if ((pitsync_linear_version & 3) < 3 || sp_sd > 20 || mid_cuts > 0 ||
965 pitch_sd >= textord_words_pitchsd_threshold * row->fixed_pitch) {
966 if (pitch_sd < textord_words_def_prop * row->fixed_pitch) {
968 } else {
970 }
971 } else {
973 }
974 }
975
977 res_string = "??";
978 switch (row->pitch_decision) {
979 case PITCH_DEF_PROP:
980 res_string = "DP";
981 break;
982 case PITCH_MAYBE_PROP:
983 res_string = "MP";
984 break;
985 case PITCH_DEF_FIXED:
986 res_string = "DF";
987 break;
989 res_string = "MF";
990 break;
991 default:
992 res_string = "??";
993 }
994 tprintf(":sd/p=%g:occ=%g:init_res=%s\n", pitch_sd / row->fixed_pitch, sp_sd, res_string);
995 }
996 return true;
997}
int textord_debug_block
Definition: tovars.cpp:29
double textord_words_pitchsd_threshold
Definition: tovars.cpp:40
double textord_words_def_prop
Definition: tovars.cpp:42

◆ fixed_pitch_words()

ROW * tesseract::fixed_pitch_words ( TO_ROW row,
FCOORD  rotation 
)

Definition at line 65 of file fpchop.cpp.

68 {
69 bool bol; // start of line
70 uint8_t blanks; // in front of word
71 uint8_t new_blanks; // blanks in empty cell
72 int16_t chop_coord; // chop boundary
73 int16_t prev_chop_coord; // start of cell
74 int16_t rep_left; // left edge of rep word
75 ROW *real_row; // output row
76 C_OUTLINE_LIST left_coutlines;
77 C_OUTLINE_LIST right_coutlines;
78 C_BLOB_LIST cblobs;
79 C_BLOB_IT cblob_it = &cblobs;
80 WERD_LIST words;
81 WERD_IT word_it = &words; // new words
82 // repeated blobs
83 WERD_IT rep_it = &row->rep_words;
84 WERD *word; // new word
85 int32_t xstarts[2]; // row ends
86 int32_t prev_x; // end of prev blob
87 // iterator
88 BLOBNBOX_IT box_it = row->blob_list();
89 // boundaries
90 ICOORDELT_IT cell_it = &row->char_cells;
91
92#ifndef GRAPHICS_DISABLED
93 if (textord_show_page_cuts && to_win != nullptr) {
94 plot_row_cells(to_win, ScrollView::RED, row, 0, &row->char_cells);
95 }
96#endif
97
98 prev_x = -INT16_MAX;
99 bol = true;
100 blanks = 0;
101 if (rep_it.empty()) {
102 rep_left = INT16_MAX;
103 } else {
104 rep_left = rep_it.data()->bounding_box().left();
105 }
106 if (box_it.empty()) {
107 return nullptr; // empty row
108 }
109 xstarts[0] = box_it.data()->bounding_box().left();
110 if (rep_left < xstarts[0]) {
111 xstarts[0] = rep_left;
112 }
113 if (cell_it.empty() || row->char_cells.singleton()) {
114 tprintf("Row without enough char cells!\n");
115 tprintf("Leftmost blob is at (%d,%d)\n", box_it.data()->bounding_box().left(),
116 box_it.data()->bounding_box().bottom());
117 return nullptr;
118 }
119 ASSERT_HOST(!cell_it.empty() && !row->char_cells.singleton());
120 prev_chop_coord = cell_it.data()->x();
121 word = nullptr;
122 while (rep_left < cell_it.data()->x()) {
123 word =
124 add_repeated_word(&rep_it, rep_left, prev_chop_coord, blanks, row->fixed_pitch, &word_it);
125 }
126 cell_it.mark_cycle_pt();
127 if (prev_chop_coord >= cell_it.data()->x()) {
128 cell_it.forward();
129 }
130 for (; !cell_it.cycled_list(); cell_it.forward()) {
131 chop_coord = cell_it.data()->x();
132 while (!box_it.empty() && box_it.data()->bounding_box().left() <= chop_coord) {
133 if (box_it.data()->bounding_box().right() > prev_x) {
134 prev_x = box_it.data()->bounding_box().right();
135 }
136 split_to_blob(box_it.extract(), chop_coord, textord_fp_chop_error + 0.5f, &left_coutlines,
137 &right_coutlines);
138 box_it.forward();
139 while (!box_it.empty() && box_it.data()->cblob() == nullptr) {
140 delete box_it.extract();
141 box_it.forward();
142 }
143 }
144 if (!right_coutlines.empty() && left_coutlines.empty()) {
145 split_to_blob(nullptr, chop_coord, textord_fp_chop_error + 0.5f, &left_coutlines,
146 &right_coutlines);
147 }
148 if (!left_coutlines.empty()) {
149 cblob_it.add_after_then_move(new C_BLOB(&left_coutlines));
150 } else {
151 if (rep_left < chop_coord) {
152 if (rep_left > prev_chop_coord) {
153 new_blanks =
154 static_cast<uint8_t>(floor((rep_left - prev_chop_coord) / row->fixed_pitch + 0.5));
155 } else {
156 new_blanks = 0;
157 }
158 } else {
159 if (chop_coord > prev_chop_coord) {
160 new_blanks =
161 static_cast<uint8_t>(floor((chop_coord - prev_chop_coord) / row->fixed_pitch + 0.5));
162 } else {
163 new_blanks = 0;
164 }
165 }
166 if (!cblob_it.empty()) {
167 if (blanks < 1 && word != nullptr && !word->flag(W_REP_CHAR)) {
168 blanks = 1;
169 }
170 word = new WERD(&cblobs, blanks, nullptr);
171 cblob_it.set_to_list(&cblobs);
172 word->set_flag(W_DONT_CHOP, true);
173 word_it.add_after_then_move(word);
174 if (bol) {
175 word->set_flag(W_BOL, true);
176 bol = false;
177 }
178 blanks = new_blanks;
179 } else {
180 blanks += new_blanks;
181 }
182 while (rep_left < chop_coord) {
183 word = add_repeated_word(&rep_it, rep_left, prev_chop_coord, blanks, row->fixed_pitch,
184 &word_it);
185 }
186 }
187 if (prev_chop_coord < chop_coord) {
188 prev_chop_coord = chop_coord;
189 }
190 }
191 if (!cblob_it.empty()) {
192 word = new WERD(&cblobs, blanks, nullptr);
193 word->set_flag(W_DONT_CHOP, true);
194 word_it.add_after_then_move(word);
195 if (bol) {
196 word->set_flag(W_BOL, true);
197 }
198 }
199 ASSERT_HOST(word != nullptr);
200 while (!rep_it.empty()) {
201 add_repeated_word(&rep_it, rep_left, prev_chop_coord, blanks, row->fixed_pitch, &word_it);
202 }
203 // at end of line
204 word_it.data()->set_flag(W_EOL, true);
205 if (prev_chop_coord > prev_x) {
206 prev_x = prev_chop_coord;
207 }
208 xstarts[1] = prev_x + 1;
209 real_row =
210 new ROW(row, static_cast<int16_t>(row->kern_size), static_cast<int16_t>(row->space_size));
211 word_it.set_to_list(real_row->word_list());
212 // put words in row
213 word_it.add_list_after(&words);
214 real_row->recalc_bounding_box();
215 return real_row;
216}
int textord_fp_chop_error
Definition: fpchop.cpp:34
void split_to_blob(BLOBNBOX *blob, int16_t chop_coord, float pitch_error, C_OUTLINE_LIST *left_coutlines, C_OUTLINE_LIST *right_coutlines)
Definition: fpchop.cpp:260
void plot_row_cells(ScrollView *win, ScrollView::Color colour, TO_ROW *row, float xshift, ICOORDELT_LIST *cells)
Definition: drawtord.cpp:387
bool textord_show_page_cuts
Definition: topitch.cpp:47
WERD_LIST rep_words
Definition: blobbox.h:674
void recalc_bounding_box()
Definition: ocrrow.cpp:100
void set_flag(WERD_FLAGS mask, bool value)
Definition: werd.h:131

◆ fixspace_dbg()

void tesseract::fixspace_dbg ( WERD_RES word)

Definition at line 806 of file fixspace.cpp.

806 {
807 TBOX box = word->word->bounding_box();
808 const bool show_map_detail = false;
809 int16_t i;
810
811 box.print();
812 tprintf(" \"%s\" ", word->best_choice->unichar_string().c_str());
813 tprintf("Blob count: %d (word); %d/%d (rebuild word)\n", word->word->cblob_list()->length(),
814 word->rebuild_word->NumBlobs(), word->box_word->length());
816 tprintf("\n");
817 if (show_map_detail) {
818 tprintf("\"%s\"\n", word->best_choice->unichar_string().c_str());
819 for (i = 0; word->best_choice->unichar_string()[i] != '\0'; i++) {
820 tprintf("**** \"%c\" ****\n", word->best_choice->unichar_string()[i]);
822 }
823 }
824
825 tprintf("Tess Accepted: %s\n", word->tess_accepted ? "TRUE" : "FALSE");
826 tprintf("Done flag: %s\n\n", word->done ? "TRUE" : "FALSE");
827}
FILE * debug_fp
Definition: tessvars.cpp:24
unsigned NumBlobs() const
Definition: blobs.h:449
unsigned length() const
Definition: boxword.h:81
WERD_CHOICE * best_choice
Definition: pageres.h:239
tesseract::BoxWord * box_word
Definition: pageres.h:270
TWERD * rebuild_word
Definition: pageres.h:264
std::string & unichar_string()
Definition: ratngs.h:519
void print() const
Definition: rect.h:289
void print(FILE *fp) const
Definition: rejctmap.cpp:112
void full_print(FILE *fp) const
Definition: rejctmap.cpp:120

◆ flip_0O()

void tesseract::flip_0O ( WERD_RES word)

◆ flip_hyphens()

void tesseract::flip_hyphens ( WERD_RES word)

◆ FontInfoDeleteCallback()

void tesseract::FontInfoDeleteCallback ( FontInfo  f)

Definition at line 129 of file fontinfo.cpp.

129 {
130 if (f.spacing_vec != nullptr) {
131 for (auto data : *f.spacing_vec) {
132 delete data;
133 }
134 delete f.spacing_vec;
135 f.spacing_vec = nullptr;
136 }
137 delete[] f.name;
138 f.name = nullptr;
139}
std::vector< FontSpacingInfo * > * spacing_vec
Definition: fontinfo.h:142

◆ FreeClass()

TESS_API void tesseract::FreeClass ( CLASS_TYPE  Class)

Definition at line 119 of file protos.cpp.

119 {
120 if (Class) {
121 FreeClassFields(Class);
122 delete Class;
123 }
124}
void FreeClassFields(CLASS_TYPE Class)
Definition: protos.cpp:131

◆ FreeClassFields()

TESS_API void tesseract::FreeClassFields ( CLASS_TYPE  Class)

Definition at line 131 of file protos.cpp.

131 {
132 if (Class) {
133 for (int i = 0; i < Class->NumConfigs; i++) {
134 FreeBitVector(Class->Configurations[i]);
135 }
136 }
137}

◆ FreeClusterer()

TESS_API void tesseract::FreeClusterer ( CLUSTERER Clusterer)

This routine frees all of the memory allocated to the specified data structure. It will not, however, free the memory used by the prototype list. The pointers to the clusters for each prototype in the list will be set to nullptr to indicate that the cluster data structures no longer exist. Any sample lists that have been obtained via calls to GetSamples are no longer valid.

Parameters
Clustererpointer to data structure to be freed

Definition at line 1575 of file cluster.cpp.

1575 {
1576 if (Clusterer != nullptr) {
1577 delete[] Clusterer->ParamDesc;
1578 delete Clusterer->KDTree;
1579 delete Clusterer->Root;
1580 // Free up all used buckets structures.
1581 for (auto &d : Clusterer->bucket_cache) {
1582 for (auto &c : d) {
1583 delete c;
1584 }
1585 }
1586
1587 delete Clusterer;
1588 }
1589} // FreeClusterer
PARAM_DESC * ParamDesc
Definition: cluster.h:93
KDTREE * KDTree
Definition: cluster.h:95
BUCKETS * bucket_cache[DISTRIBUTION_COUNT][MAXBUCKETS+1 - MINBUCKETS]
Definition: cluster.h:100

◆ FreeLabeledClassList()

TESS_COMMON_TRAINING_API void tesseract::FreeLabeledClassList ( LIST  ClassList)

This routine deallocates all of the space allocated to the specified list of training samples.

Parameters
ClassListlist of all fonts in document

Definition at line 616 of file commontraining.cpp.

616 {
617 MERGE_CLASS MergeClass;
618
619 LIST nodes = ClassList;
620 iterate(ClassList) /* iterate through all of the fonts */
621 {
622 MergeClass = reinterpret_cast<MERGE_CLASS>(ClassList->first_node());
623 FreeClass(MergeClass->Class);
624 delete MergeClass;
625 }
626 destroy(nodes);
627
628} /* FreeLabeledClassList */
void FreeClass(CLASS_TYPE Class)
Definition: protos.cpp:119
tesseract::CLASS_TYPE Class

◆ FreeLabeledList()

TESS_COMMON_TRAINING_API void tesseract::FreeLabeledList ( LABELEDLIST  LabeledList)

This routine deallocates all of the memory consumed by a labeled list. It does not free any memory which may be consumed by the items in the list.

Parameters
LabeledListlabeled list to be freed
Note
Globals: none

Definition at line 417 of file commontraining.cpp.

417 {
418 destroy(LabeledList->List);
419 delete LabeledList;
420} /* FreeLabeledList */

◆ FreeMFOutline()

void tesseract::FreeMFOutline ( void *  arg)

This routine deallocates all of the memory consumed by a micro-feature outline.

Parameters
argmicro-feature outline to be freed

Definition at line 132 of file mfoutline.cpp.

132 { // MFOUTLINE Outline)
133 auto Outline = static_cast<MFOUTLINE>(arg);
134
135 /* break the circular outline so we can use std. techniques to deallocate */
136 MFOUTLINE Start = Outline->list_rest();
137 set_rest(Outline, NIL_LIST);
138 while (Start != nullptr) {
139 delete reinterpret_cast<MFEDGEPT *>(Start->first_node());
140 Start = pop(Start);
141 }
142
143} /* FreeMFOutline */

◆ FreeNormProtoList()

TESS_COMMON_TRAINING_API void tesseract::FreeNormProtoList ( LIST  CharList)

Definition at line 706 of file commontraining.cpp.

708{
709 LABELEDLIST char_sample;
710
711 LIST nodes = CharList;
712 iterate(CharList) /* iterate through all of the fonts */
713 {
714 char_sample = reinterpret_cast<LABELEDLIST>(CharList->first_node());
715 FreeLabeledList(char_sample);
716 }
717 destroy(nodes);
718
719} // FreeNormProtoList
void FreeLabeledList(LABELEDLIST LabeledList)

◆ FreeOutlines()

void tesseract::FreeOutlines ( LIST  Outlines)

Release all memory consumed by the specified list of outlines.

Parameters
Outlineslist of mf-outlines to be freed

Definition at line 151 of file mfoutline.cpp.

151 {
152 destroy_nodes(Outlines, FreeMFOutline);
153} /* FreeOutlines */
void FreeMFOutline(void *arg)
Definition: mfoutline.cpp:132
void destroy_nodes(LIST list, void_dest destructor)
Definition: oldlist.cpp:137

◆ FreeProtoList()

TESS_API void tesseract::FreeProtoList ( LIST ProtoList)

This routine frees all of the memory allocated to the specified list of prototypes. The clusters which are pointed to by the prototypes are not freed.

Parameters
ProtoListpointer to list of prototypes to be freed

Definition at line 1597 of file cluster.cpp.

1597 {
1598 destroy_nodes(*ProtoList, FreePrototype);
1599} // FreeProtoList
void FreePrototype(void *arg)
Definition: cluster.cpp:1608

◆ FreePrototype()

void tesseract::FreePrototype ( void *  arg)

This routine deallocates the memory consumed by the specified prototype and modifies the corresponding cluster so that it is no longer marked as a prototype. The cluster is NOT deallocated by this routine.

Parameters
argprototype data structure to be deallocated

Definition at line 1608 of file cluster.cpp.

1608 { // PROTOTYPE *Prototype)
1609 auto *Prototype = static_cast<PROTOTYPE *>(arg);
1610
1611 // unmark the corresponding cluster (if there is one
1612 if (Prototype->Cluster != nullptr) {
1613 Prototype->Cluster->Prototype = false;
1614 }
1615
1616 // deallocate the prototype statistics and then the prototype itself
1617 if (Prototype->Style != spherical) {
1618 delete[] Prototype->Variance.Elliptical;
1619 delete[] Prototype->Magnitude.Elliptical;
1620 delete[] Prototype->Weight.Elliptical;
1621 }
1622 delete Prototype;
1623} // FreePrototype
CLUSTER * Cluster
Definition: cluster.h:81

◆ FreeTrainingSamples()

TESS_COMMON_TRAINING_API void tesseract::FreeTrainingSamples ( LIST  CharList)

This routine deallocates all of the space allocated to the specified list of training samples.

Parameters
CharListlist of all fonts in document

Definition at line 391 of file commontraining.cpp.

391 {
392 LABELEDLIST char_sample;
393 FEATURE_SET FeatureSet;
394 LIST FeatureList;
395
396 LIST nodes = CharList;
397 iterate(CharList) { /* iterate through all of the fonts */
398 char_sample = reinterpret_cast<LABELEDLIST>(CharList->first_node());
399 FeatureList = char_sample->List;
400 iterate(FeatureList) { /* iterate through all of the classes */
401 FeatureSet = reinterpret_cast<FEATURE_SET>(FeatureList->first_node());
402 delete FeatureSet;
403 }
404 FreeLabeledList(char_sample);
405 }
406 destroy(nodes);
407} /* FreeTrainingSamples */

◆ FullPageBlock()

void tesseract::FullPageBlock ( int  width,
int  height,
BLOCK_LIST *  blocks 
)

Definition at line 68 of file blread.cpp.

68 {
69 BLOCK_IT block_it(blocks);
70 auto *block = new BLOCK("", true, 0, 0, 0, 0, width, height);
71 block_it.add_to_end(block);
72}

◆ FullwidthToHalfwidth()

TESS_UNICHARSET_TRAINING_API char32 tesseract::FullwidthToHalfwidth ( const char32  ch)

Definition at line 282 of file normstrngs.cpp.

282 {
283 // Return unchanged if not in the fullwidth-halfwidth Unicode block.
284 if (ch < 0xFF00 || ch > 0xFFEF || !IsValidCodepoint(ch)) {
285 if (ch != 0x3000) {
286 return ch;
287 }
288 }
289 // Special case for fullwidth left and right "white parentheses".
290 if (ch == 0xFF5F) {
291 return 0x2985;
292 }
293 if (ch == 0xFF60) {
294 return 0x2986;
295 }
296 // Construct a full-to-half width transliterator.
297 IcuErrorCode error_code;
298 icu::UnicodeString uch_str(static_cast<UChar32>(ch));
299 const icu::Transliterator *fulltohalf =
300 icu::Transliterator::createInstance("Fullwidth-Halfwidth", UTRANS_FORWARD, error_code);
301 error_code.assertSuccess();
302 error_code.reset();
303
304 fulltohalf->transliterate(uch_str);
305 delete fulltohalf;
306 ASSERT_HOST(uch_str.length() != 0);
307 return uch_str[0];
308}
bool IsValidCodepoint(const char32 ch)
Definition: normstrngs.cpp:223

◆ FuncInplace()

template<class Func >
void tesseract::FuncInplace ( int  n,
TFloat inout 
)
inline

Definition at line 164 of file functions.h.

164 {
165 Func f;
166 for (int i = 0; i < n; ++i) {
167 inout[i] = f(inout[i]);
168 }
169}

◆ FuncMultiply()

template<class Func >
void tesseract::FuncMultiply ( const TFloat u,
const TFloat v,
int  n,
TFloat out 
)
inline

Definition at line 173 of file functions.h.

173 {
174 Func f;
175 for (int i = 0; i < n; ++i) {
176 out[i] = f(u[i]) * v[i];
177 }
178}

◆ GeneratePerspectiveDistortion()

void tesseract::GeneratePerspectiveDistortion ( int  width,
int  height,
TRand randomizer,
Image pix,
std::vector< TBOX > *  boxes 
)

Definition at line 222 of file degradeimage.cpp.

223 {
224 if (pix != nullptr && *pix != nullptr) {
225 width = pixGetWidth(*pix);
226 height = pixGetHeight(*pix);
227 }
228 float *im_coeffs = nullptr;
229 float *box_coeffs = nullptr;
230 l_int32 incolor = ProjectiveCoeffs(width, height, randomizer, &im_coeffs, &box_coeffs);
231 if (pix != nullptr && *pix != nullptr) {
232 // Transform the image.
233 Image transformed = pixProjective(*pix, im_coeffs, incolor);
234 if (transformed == nullptr) {
235 tprintf("Projective transformation failed!!\n");
236 return;
237 }
238 pix->destroy();
239 *pix = transformed;
240 }
241 if (boxes != nullptr) {
242 // Transform the boxes.
243 for (auto &b : *boxes) {
244 int x1, y1, x2, y2;
245 const TBOX &box = b;
246 projectiveXformSampledPt(box_coeffs, box.left(), height - box.top(), &x1, &y1);
247 projectiveXformSampledPt(box_coeffs, box.right(), height - box.bottom(), &x2, &y2);
248 TBOX new_box1(x1, height - y2, x2, height - y1);
249 projectiveXformSampledPt(box_coeffs, box.left(), height - box.bottom(), &x1, &y1);
250 projectiveXformSampledPt(box_coeffs, box.right(), height - box.top(), &x2, &y2);
251 TBOX new_box2(x1, height - y1, x2, height - y2);
252 b = new_box1.bounding_union(new_box2);
253 }
254 }
255 lept_free(im_coeffs);
256 lept_free(box_coeffs);
257}
int ProjectiveCoeffs(int width, int height, TRand *randomizer, float **im_coeffs, float **box_coeffs)

◆ get_blob_coords()

int tesseract::get_blob_coords ( TO_ROW row,
int32_t  lineheight,
TBOX blobcoords,
bool &  holed_line,
int &  outcount 
)

Definition at line 416 of file oldbasel.cpp.

422 {
423 // blobs
424 BLOBNBOX_IT blob_it = row->blob_list();
425 int blobindex; /*no along text line */
426 int losscount; // lost blobs
427 int maxlosscount; // greatest lost blobs
428 /*height stat collection */
429 STATS heightstat(0, MAXHEIGHT - 1);
430
431 if (blob_it.empty()) {
432 return 0; // none
433 }
434 maxlosscount = 0;
435 losscount = 0;
436 blob_it.mark_cycle_pt();
437 blobindex = 0;
438 do {
439 blobcoords[blobindex] = box_next_pre_chopped(&blob_it);
440 if (blobcoords[blobindex].height() > lineheight * 0.25) {
441 heightstat.add(blobcoords[blobindex].height(), 1);
442 }
443 if (blobindex == 0 || blobcoords[blobindex].height() > lineheight * 0.25 ||
444 blob_it.cycled_list()) {
445 blobindex++; /*no of merged blobs */
446 losscount = 0;
447 } else {
448 if (blobcoords[blobindex].height() < blobcoords[blobindex].width() * oldbl_dot_error_size &&
449 blobcoords[blobindex].width() < blobcoords[blobindex].height() * oldbl_dot_error_size) {
450 // counts as dot
451 blobindex++;
452 losscount = 0;
453 } else {
454 losscount++; // lost it
455 if (losscount > maxlosscount) {
456 // remember max
457 maxlosscount = losscount;
458 }
459 }
460 }
461 } while (!blob_it.cycled_list());
462
463 holed_line = maxlosscount > oldbl_holed_losscount;
464 outcount = blobindex; /*total blobs */
465
466 if (heightstat.get_total() > 1) {
467 /*guess x-height */
468 return static_cast<int>(heightstat.ile(0.25));
469 } else {
470 return blobcoords[0].height();
471 }
472}
#define MAXHEIGHT
Definition: oldbasel.cpp:63
TBOX box_next_pre_chopped(BLOBNBOX_IT *it)
Definition: blobbox.cpp:667

◆ get_min_max_xheight()

void tesseract::get_min_max_xheight ( int  block_linesize,
int *  min_height,
int *  max_height 
)
inline

Definition at line 86 of file makerow.h.

86 {
87 *min_height = static_cast<int32_t>(floor(block_linesize * textord_minxh));
88 if (*min_height < textord_min_xheight) {
89 *min_height = textord_min_xheight;
90 }
91 *max_height = static_cast<int32_t>(ceil(block_linesize * 3.0));
92}
double textord_minxh
Definition: makerow.cpp:79

◆ get_row_category()

ROW_CATEGORY tesseract::get_row_category ( const TO_ROW row)
inline

Definition at line 94 of file makerow.h.

94 {
95 if (row->xheight <= 0) {
96 return ROW_INVALID;
97 }
98 return (row->ascrise > 0) ? ROW_ASCENDERS_FOUND
100}

◆ get_ydiffs()

int tesseract::get_ydiffs ( TBOX  blobcoords[],
int  blobcount,
QSPLINE spline,
float  ydiffs[] 
)

Definition at line 860 of file oldbasel.cpp.

865 {
866 int blobindex; /*current blob */
867 int xcentre; /*xcoord */
868 int lastx; /*last xcentre */
869 float diffsum; /*sum of diffs */
870 float diff; /*current difference */
871 float drift; /*sum of spline steps */
872 float bestsum; /*smallest diffsum */
873 int bestindex; /*index of bestsum */
874
875 diffsum = 0.0f;
876 bestindex = 0;
877 bestsum = static_cast<float>(INT32_MAX);
878 drift = 0.0f;
879 lastx = blobcoords[0].left();
880 /*do each blob in row */
881 for (blobindex = 0; blobindex < blobcount; blobindex++) {
882 /*centre of blob */
883 xcentre = (blobcoords[blobindex].left() + blobcoords[blobindex].right()) >> 1;
884 // step functions in spline
885 drift += spline->step(lastx, xcentre);
886 lastx = xcentre;
887 diff = blobcoords[blobindex].bottom();
888 diff -= spline->y(xcentre);
889 diff += drift;
890 ydiffs[blobindex] = diff; /*store difference */
891 if (blobindex > 2) {
892 /*remove old one */
893 diffsum -= ABS(ydiffs[blobindex - 3]);
894 }
895 diffsum += ABS(diff); /*add new one */
896 if (blobindex >= 2 && diffsum < bestsum) {
897 bestsum = diffsum; /*find min sum */
898 bestindex = blobindex - 1; /*middle of set */
899 }
900 }
901 return bestindex;
902}
double step(double x1, double x2)
Definition: quspline.cpp:180

◆ GetCleanedTextResult()

std::string tesseract::GetCleanedTextResult ( tesseract::TessBaseAPI tess,
Image  pix 
)

Definition at line 45 of file baseapi_test.cc.

45 {
46 tess->SetImage(pix);
47 char *result = tess->GetUTF8Text();
48 std::string ocr_result = result;
49 delete[] result;
50 trim(ocr_result);
51 return ocr_result;
52}
void SetImage(const unsigned char *imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line)
Definition: baseapi.cpp:576

◆ GetCPPadsForLevel()

void tesseract::GetCPPadsForLevel ( int  Level,
float *  EndPad,
float *  SidePad,
float *  AnglePad 
)

This routine copies the appropriate global pad variables into EndPad, SidePad, and AnglePad. This is a kludge used to get around the fact that global control variables cannot be arrays. If the specified level is illegal, the tightest possible pads are returned.

Parameters
Level"tightness" level to return pads for
EndPadplace to put end pad for Level
SidePadplace to put side pad for Level
AnglePadplace to put angle pad for Level

Definition at line 1235 of file intproto.cpp.

1235 {
1236 switch (Level) {
1237 case 0:
1238 *EndPad = classify_cp_end_pad_loose * GetPicoFeatureLength();
1239 *SidePad = classify_cp_side_pad_loose * GetPicoFeatureLength();
1240 *AnglePad = classify_cp_angle_pad_loose / 360.0;
1241 break;
1242
1243 case 1:
1244 *EndPad = classify_cp_end_pad_medium * GetPicoFeatureLength();
1245 *SidePad = classify_cp_side_pad_medium * GetPicoFeatureLength();
1246 *AnglePad = classify_cp_angle_pad_medium / 360.0;
1247 break;
1248
1249 case 2:
1250 *EndPad = classify_cp_end_pad_tight * GetPicoFeatureLength();
1251 *SidePad = classify_cp_side_pad_tight * GetPicoFeatureLength();
1252 *AnglePad = classify_cp_angle_pad_tight / 360.0;
1253 break;
1254
1255 default:
1256 *EndPad = classify_cp_end_pad_tight * GetPicoFeatureLength();
1257 *SidePad = classify_cp_side_pad_tight * GetPicoFeatureLength();
1258 *AnglePad = classify_cp_angle_pad_tight / 360.0;
1259 break;
1260 }
1261 if (*AnglePad > 0.5) {
1262 *AnglePad = 0.5;
1263 }
1264
1265} /* GetCPPadsForLevel */

◆ GetMatchColorFor()

ScrollView::Color tesseract::GetMatchColorFor ( float  Evidence)
Parameters
Evidenceevidence value to return color for
Returns
Color which corresponds to specified Evidence value.
Note
Globals: none

Definition at line 1272 of file intproto.cpp.

1272 {
1273 assert(Evidence >= 0.0);
1274 assert(Evidence <= 1.0);
1275
1276 if (Evidence >= 0.90) {
1277 return ScrollView::WHITE;
1278 } else if (Evidence >= 0.75) {
1279 return ScrollView::GREEN;
1280 } else if (Evidence >= 0.50) {
1281 return ScrollView::RED;
1282 } else {
1283 return ScrollView::BLUE;
1284 }
1285} /* GetMatchColorFor */

◆ GetNextFill()

void tesseract::GetNextFill ( TABLE_FILLER Filler,
FILL_SPEC Fill 
)

This routine returns (in Fill) the specification of the next line to be filled from Filler. FillerDone() should always be called before GetNextFill() to ensure that we do not run past the end of the fill table.

Parameters
Fillerfiller to get next fill spec from
Fillplace to put spec for next fill

Definition at line 1295 of file intproto.cpp.

1295 {
1296 FILL_SWITCH *Next;
1297
1298 /* compute the fill assuming no switches will be encountered */
1299 Fill->AngleStart = Filler->AngleStart;
1300 Fill->AngleEnd = Filler->AngleEnd;
1301 Fill->X = Filler->X;
1302 Fill->YStart = Filler->YStart >> 8;
1303 Fill->YEnd = Filler->YEnd >> 8;
1304
1305 /* update the fill info and the filler for ALL switches at this X value */
1306 Next = &(Filler->Switch[Filler->NextSwitch]);
1307 while (Filler->X >= Next->X) {
1308 Fill->X = Filler->X = Next->X;
1309 if (Next->Type == StartSwitch) {
1310 Fill->YStart = Next->Y;
1311 Filler->StartDelta = Next->Delta;
1312 Filler->YStart = Next->YInit;
1313 } else if (Next->Type == EndSwitch) {
1314 Fill->YEnd = Next->Y;
1315 Filler->EndDelta = Next->Delta;
1316 Filler->YEnd = Next->YInit;
1317 } else { /* Type must be LastSwitch */
1318 break;
1319 }
1320 Filler->NextSwitch++;
1321 Next = &(Filler->Switch[Filler->NextSwitch]);
1322 }
1323
1324 /* prepare the filler for the next call to this routine */
1325 Filler->X++;
1326 Filler->YStart += Filler->StartDelta;
1327 Filler->YEnd += Filler->EndDelta;
1328
1329} /* GetNextFill */

◆ GetXheightString()

std::string tesseract::GetXheightString ( const std::string &  script_dir,
const UNICHARSET unicharset 
)

Definition at line 166 of file unicharset_training_utils.cpp.

166 {
167 std::string xheights_str;
168 for (int s = 0; s < unicharset.get_script_table_size(); ++s) {
169 // Load the xheights for the script if available.
170 std::string filename = script_dir + "/" + unicharset.get_script_from_script_id(s) + ".xheights";
171 std::string script_heights;
172 if (File::ReadFileToString(filename, &script_heights)) {
173 xheights_str += script_heights;
174 }
175 }
176 return xheights_str;
177}
const char * get_script_from_script_id(int id) const
Definition: unicharset.h:886
int get_script_table_size() const
Definition: unicharset.h:881

◆ GlobalParams()

TESS_API ParamsVectors * tesseract::GlobalParams ( )

Definition at line 36 of file params.cpp.

36 {
38 return &global_params;
39}

◆ HistogramRect()

void tesseract::HistogramRect ( Image  src_pix,
int  channel,
int  left,
int  top,
int  width,
int  height,
int *  histogram 
)

Definition at line 146 of file otsuthr.cpp.

147 {
148 int num_channels = pixGetDepth(src_pix) / 8;
149 channel = ClipToRange(channel, 0, num_channels - 1);
150 int bottom = top + height;
151 memset(histogram, 0, sizeof(*histogram) * kHistogramSize);
152 int src_wpl = pixGetWpl(src_pix);
153 l_uint32 *srcdata = pixGetData(src_pix);
154 for (int y = top; y < bottom; ++y) {
155 const l_uint32 *linedata = srcdata + y * src_wpl;
156 for (int x = 0; x < width; ++x) {
157 int pixel = GET_DATA_BYTE(linedata, (x + left) * num_channels + channel);
158 ++histogram[pixel];
159 }
160 }
161}
const int kHistogramSize
Definition: otsuthr.h:30

◆ HOcrEscape()

std::string tesseract::HOcrEscape ( const char *  text)

Escape a char string - replace &<>"' with HTML codes. Escape a char string - replace <>&"' with HTML codes.

Definition at line 2378 of file baseapi.cpp.

2378 {
2379 std::string ret;
2380 const char *ptr;
2381 for (ptr = text; *ptr; ptr++) {
2382 switch (*ptr) {
2383 case '<':
2384 ret += "&lt;";
2385 break;
2386 case '>':
2387 ret += "&gt;";
2388 break;
2389 case '&':
2390 ret += "&amp;";
2391 break;
2392 case '"':
2393 ret += "&quot;";
2394 break;
2395 case '\'':
2396 ret += "&#39;";
2397 break;
2398 default:
2399 ret += *ptr;
2400 }
2401 }
2402 return ret;
2403}

◆ InitFeatureDefs()

TESS_API void tesseract::InitFeatureDefs ( FEATURE_DEFS_STRUCT featuredefs)

Definition at line 87 of file featdefs.cpp.

87 {
89 for (int i = 0; i < NUM_FEATURE_TYPES; ++i) {
90 featuredefs->FeatureDesc[i] = DescDefs[i];
91 }
92}
#define NUM_FEATURE_TYPES
Definition: featdefs.h:29
const FEATURE_DESC_STRUCT * FeatureDesc[NUM_FEATURE_TYPES]
Definition: featdefs.h:43

◆ InitFeatureDisplayWindowIfReqd()

void tesseract::InitFeatureDisplayWindowIfReqd ( )

Initializes the feature display window if it is not already initialized.

Definition at line 1614 of file intproto.cpp.

1614 {
1615 if (FeatureDisplayWindow == nullptr) {
1616 FeatureDisplayWindow = CreateFeatureSpaceWindow("FeatureDisplayWindow", 50, 700);
1617 }
1618}
ScrollView * CreateFeatureSpaceWindow(const char *name, int xpos, int ypos)
Definition: intproto.cpp:1622

◆ initialise_search()

void tesseract::initialise_search ( WERD_RES_LIST &  src_list,
WERD_RES_LIST &  new_list 
)

Definition at line 201 of file fixspace.cpp.

201 {
202 WERD_RES_IT src_it(&src_list);
203 WERD_RES_IT new_it(&new_list);
204 WERD_RES *src_wd;
205 WERD_RES *new_wd;
206
207 for (src_it.mark_cycle_pt(); !src_it.cycled_list(); src_it.forward()) {
208 src_wd = src_it.data();
209 if (!src_wd->combination) {
210 new_wd = WERD_RES::deep_copy(src_wd);
211 new_wd->combination = false;
212 new_wd->part_of_combo = false;
213 new_it.add_after_then_move(new_wd);
214 }
215 }
216}

◆ InitIntegerFX()

TESS_API void tesseract::InitIntegerFX ( )

Public Code


Public Function Prototypes

Definition at line 54 of file intfx.cpp.

54 {
55 // Guards write access to AtanTable so we don't create it more than once.
56 static std::mutex atan_table_mutex;
57 static bool atan_table_init = false;
58 std::lock_guard<std::mutex> guard(atan_table_mutex);
59 if (!atan_table_init) {
60 for (int i = 0; i < INT_CHAR_NORM_RANGE; ++i) {
61 cos_table[i] = cos(i * 2 * M_PI / INT_CHAR_NORM_RANGE + M_PI);
62 sin_table[i] = sin(i * 2 * M_PI / INT_CHAR_NORM_RANGE + M_PI);
63 }
64 atan_table_init = true;
65 }
66}
#define INT_CHAR_NORM_RANGE
Definition: intproto.h:117

◆ InitIntMatchWindowIfReqd()

void tesseract::InitIntMatchWindowIfReqd ( )

Initializes the int matcher window if it is not already initialized.

Definition at line 1587 of file intproto.cpp.

1587 {
1588 if (IntMatchWindow == nullptr) {
1589 IntMatchWindow = CreateFeatureSpaceWindow("IntMatchWindow", 50, 200);
1590 auto *popup_menu = new SVMenuNode();
1591
1592 popup_menu->AddChild("Debug Adapted classes", IDA_ADAPTIVE, "x", "Class to debug");
1593 popup_menu->AddChild("Debug Static classes", IDA_STATIC, "x", "Class to debug");
1594 popup_menu->AddChild("Debug Both", IDA_BOTH, "x", "Class to debug");
1595 popup_menu->AddChild("Debug Shape Index", IDA_SHAPE_INDEX, "0", "Index to debug");
1596 popup_menu->BuildMenu(IntMatchWindow, false);
1597 }
1598}

◆ InitMatcherRatings()

void tesseract::InitMatcherRatings ( float *  Rating)

◆ InitProtoDisplayWindowIfReqd()

void tesseract::InitProtoDisplayWindowIfReqd ( )

Initializes the proto display window if it is not already initialized.

Definition at line 1604 of file intproto.cpp.

1604 {
1605 if (ProtoDisplayWindow == nullptr) {
1606 ProtoDisplayWindow = CreateFeatureSpaceWindow("ProtoDisplayWindow", 550, 200);
1607 }
1608}

◆ InitPrototypes()

void tesseract::InitPrototypes ( )

◆ InitTableFiller()

void tesseract::InitTableFiller ( float  EndPad,
float  SidePad,
float  AnglePad,
PROTO_STRUCT Proto,
TABLE_FILLER Filler 
)

This routine computes a data structure (Filler) which can be used to fill in a rectangle surrounding the specified Proto. Results are returned in Filler.

Parameters
EndPad,SidePad,AnglePadpadding to add to proto
Protoproto to create a filler for
Fillerplace to put table filler

Definition at line 1340 of file intproto.cpp.

1345{
1346 float Angle;
1347 float X, Y, HalfLength;
1348 float Cos, Sin;
1349 float XAdjust, YAdjust;
1350 FPOINT Start, Switch1, Switch2, End;
1351 int S1 = 0;
1352 int S2 = 1;
1353
1354 Angle = Proto->Angle;
1355 X = Proto->X;
1356 Y = Proto->Y;
1357 HalfLength = Proto->Length / 2.0;
1358
1359 Filler->AngleStart = CircBucketFor(Angle - AnglePad, AS, NB);
1360 Filler->AngleEnd = CircBucketFor(Angle + AnglePad, AS, NB);
1361 Filler->NextSwitch = 0;
1362
1363 if (fabs(Angle - 0.0) < HV_TOLERANCE || fabs(Angle - 0.5) < HV_TOLERANCE) {
1364 /* horizontal proto - handle as special case */
1365 Filler->X = Bucket8For(X - HalfLength - EndPad, XS, NB);
1366 Filler->YStart = Bucket16For(Y - SidePad, YS, NB * 256);
1367 Filler->YEnd = Bucket16For(Y + SidePad, YS, NB * 256);
1368 Filler->StartDelta = 0;
1369 Filler->EndDelta = 0;
1370 Filler->Switch[0].Type = LastSwitch;
1371 Filler->Switch[0].X = Bucket8For(X + HalfLength + EndPad, XS, NB);
1372 } else if (fabs(Angle - 0.25) < HV_TOLERANCE || fabs(Angle - 0.75) < HV_TOLERANCE) {
1373 /* vertical proto - handle as special case */
1374 Filler->X = Bucket8For(X - SidePad, XS, NB);
1375 Filler->YStart = Bucket16For(Y - HalfLength - EndPad, YS, NB * 256);
1376 Filler->YEnd = Bucket16For(Y + HalfLength + EndPad, YS, NB * 256);
1377 Filler->StartDelta = 0;
1378 Filler->EndDelta = 0;
1379 Filler->Switch[0].Type = LastSwitch;
1380 Filler->Switch[0].X = Bucket8For(X + SidePad, XS, NB);
1381 } else {
1382 /* diagonal proto */
1383
1384 if ((Angle > 0.0 && Angle < 0.25) || (Angle > 0.5 && Angle < 0.75)) {
1385 /* rising diagonal proto */
1386 Angle *= 2.0 * M_PI;
1387 Cos = fabs(std::cos(Angle));
1388 Sin = fabs(std::sin(Angle));
1389
1390 /* compute the positions of the corners of the acceptance region */
1391 Start.x = X - (HalfLength + EndPad) * Cos - SidePad * Sin;
1392 Start.y = Y - (HalfLength + EndPad) * Sin + SidePad * Cos;
1393 End.x = 2.0 * X - Start.x;
1394 End.y = 2.0 * Y - Start.y;
1395 Switch1.x = X - (HalfLength + EndPad) * Cos + SidePad * Sin;
1396 Switch1.y = Y - (HalfLength + EndPad) * Sin - SidePad * Cos;
1397 Switch2.x = 2.0 * X - Switch1.x;
1398 Switch2.y = 2.0 * Y - Switch1.y;
1399
1400 if (Switch1.x > Switch2.x) {
1401 S1 = 1;
1402 S2 = 0;
1403 }
1404
1405 /* translate into bucket positions and deltas */
1406 Filler->X = Bucket8For(Start.x, XS, NB);
1407 Filler->StartDelta = -static_cast<int16_t>((Cos / Sin) * 256);
1408 Filler->EndDelta = static_cast<int16_t>((Sin / Cos) * 256);
1409
1410 XAdjust = BucketEnd(Filler->X, XS, NB) - Start.x;
1411 YAdjust = XAdjust * Cos / Sin;
1412 Filler->YStart = Bucket16For(Start.y - YAdjust, YS, NB * 256);
1413 YAdjust = XAdjust * Sin / Cos;
1414 Filler->YEnd = Bucket16For(Start.y + YAdjust, YS, NB * 256);
1415
1416 Filler->Switch[S1].Type = StartSwitch;
1417 Filler->Switch[S1].X = Bucket8For(Switch1.x, XS, NB);
1418 Filler->Switch[S1].Y = Bucket8For(Switch1.y, YS, NB);
1419 XAdjust = Switch1.x - BucketStart(Filler->Switch[S1].X, XS, NB);
1420 YAdjust = XAdjust * Sin / Cos;
1421 Filler->Switch[S1].YInit = Bucket16For(Switch1.y - YAdjust, YS, NB * 256);
1422 Filler->Switch[S1].Delta = Filler->EndDelta;
1423
1424 Filler->Switch[S2].Type = EndSwitch;
1425 Filler->Switch[S2].X = Bucket8For(Switch2.x, XS, NB);
1426 Filler->Switch[S2].Y = Bucket8For(Switch2.y, YS, NB);
1427 XAdjust = Switch2.x - BucketStart(Filler->Switch[S2].X, XS, NB);
1428 YAdjust = XAdjust * Cos / Sin;
1429 Filler->Switch[S2].YInit = Bucket16For(Switch2.y + YAdjust, YS, NB * 256);
1430 Filler->Switch[S2].Delta = Filler->StartDelta;
1431
1432 Filler->Switch[2].Type = LastSwitch;
1433 Filler->Switch[2].X = Bucket8For(End.x, XS, NB);
1434 } else {
1435 /* falling diagonal proto */
1436 Angle *= 2.0 * M_PI;
1437 Cos = fabs(std::cos(Angle));
1438 Sin = fabs(std::sin(Angle));
1439
1440 /* compute the positions of the corners of the acceptance region */
1441 Start.x = X - (HalfLength + EndPad) * Cos - SidePad * Sin;
1442 Start.y = Y + (HalfLength + EndPad) * Sin - SidePad * Cos;
1443 End.x = 2.0 * X - Start.x;
1444 End.y = 2.0 * Y - Start.y;
1445 Switch1.x = X - (HalfLength + EndPad) * Cos + SidePad * Sin;
1446 Switch1.y = Y + (HalfLength + EndPad) * Sin + SidePad * Cos;
1447 Switch2.x = 2.0 * X - Switch1.x;
1448 Switch2.y = 2.0 * Y - Switch1.y;
1449
1450 if (Switch1.x > Switch2.x) {
1451 S1 = 1;
1452 S2 = 0;
1453 }
1454
1455 /* translate into bucket positions and deltas */
1456 Filler->X = Bucket8For(Start.x, XS, NB);
1457 Filler->StartDelta = static_cast<int16_t>(
1458 ClipToRange<int>(-IntCastRounded((Sin / Cos) * 256), INT16_MIN, INT16_MAX));
1459 Filler->EndDelta = static_cast<int16_t>(
1460 ClipToRange<int>(IntCastRounded((Cos / Sin) * 256), INT16_MIN, INT16_MAX));
1461
1462 XAdjust = BucketEnd(Filler->X, XS, NB) - Start.x;
1463 YAdjust = XAdjust * Sin / Cos;
1464 Filler->YStart = Bucket16For(Start.y - YAdjust, YS, NB * 256);
1465 YAdjust = XAdjust * Cos / Sin;
1466 Filler->YEnd = Bucket16For(Start.y + YAdjust, YS, NB * 256);
1467
1468 Filler->Switch[S1].Type = EndSwitch;
1469 Filler->Switch[S1].X = Bucket8For(Switch1.x, XS, NB);
1470 Filler->Switch[S1].Y = Bucket8For(Switch1.y, YS, NB);
1471 XAdjust = Switch1.x - BucketStart(Filler->Switch[S1].X, XS, NB);
1472 YAdjust = XAdjust * Sin / Cos;
1473 Filler->Switch[S1].YInit = Bucket16For(Switch1.y + YAdjust, YS, NB * 256);
1474 Filler->Switch[S1].Delta = Filler->StartDelta;
1475
1476 Filler->Switch[S2].Type = StartSwitch;
1477 Filler->Switch[S2].X = Bucket8For(Switch2.x, XS, NB);
1478 Filler->Switch[S2].Y = Bucket8For(Switch2.y, YS, NB);
1479 XAdjust = Switch2.x - BucketStart(Filler->Switch[S2].X, XS, NB);
1480 YAdjust = XAdjust * Cos / Sin;
1481 Filler->Switch[S2].YInit = Bucket16For(Switch2.y - YAdjust, YS, NB * 256);
1482 Filler->Switch[S2].Delta = Filler->EndDelta;
1483
1484 Filler->Switch[2].Type = LastSwitch;
1485 Filler->Switch[2].X = Bucket8For(End.x, XS, NB);
1486 }
1487 }
1488} /* InitTableFiller */
#define XS
#define NB
#define HV_TOLERANCE
Definition: intproto.cpp:67
#define AS
#define YS
uint8_t Bucket8For(float param, float offset, int num_buckets)
Definition: intproto.cpp:385
float BucketEnd(int Bucket, float Offset, int NumBuckets)
Definition: intproto.cpp:1007
uint8_t CircBucketFor(float param, float offset, int num_buckets)
Definition: intproto.cpp:399
float BucketStart(int Bucket, float Offset, int NumBuckets)
Definition: intproto.cpp:991
uint16_t Bucket16For(float param, float offset, int num_buckets)
Definition: intproto.cpp:389

◆ insert_spline_point()

void tesseract::insert_spline_point ( int  xstarts[],
int  segment,
int  coord1,
int  coord2,
int &  segments 
)

Definition at line 1239 of file oldbasel.cpp.

1244 {
1245 int index; // for shuffling
1246
1247 for (index = segments; index > segment; index--) {
1248 xstarts[index + 1] = xstarts[index];
1249 }
1250 segments++;
1251 xstarts[segment] = coord1;
1252 xstarts[segment + 1] = coord2;
1253}

◆ InsertNodes()

void tesseract::InsertNodes ( KDTREE tree,
KDNODE nodes 
)

Given a subtree nodes, insert all of its elements into tree.

Definition at line 477 of file kdtree.cpp.

477 {
478 if (nodes == nullptr) {
479 return;
480 }
481
482 KDStore(tree, nodes->Key, nodes->Data);
483 InsertNodes(tree, nodes->Left);
484 InsertNodes(tree, nodes->Right);
485}
void InsertNodes(KDTREE *tree, KDNODE *nodes)
Definition: kdtree.cpp:477
void KDStore(KDTREE *Tree, float *Key, CLUSTER *Data)
Definition: kdtree.cpp:215
float * Key
Definition: kdtree.h:57
KDNODE * Right
Definition: kdtree.h:63
KDNODE * Left
Definition: kdtree.h:62
CLUSTER * Data
Definition: kdtree.h:58

◆ INSTANTIATE_TEST_SUITE_P() [1/19]

tesseract::INSTANTIATE_TEST_SUITE_P ( DISABLED_Arabic  ,
MatchGroundTruth  ,
::testing::Values("script/Arabic")   
)

◆ INSTANTIATE_TEST_SUITE_P() [2/19]

tesseract::INSTANTIATE_TEST_SUITE_P ( DISABLED_Deva  ,
MatchGroundTruth  ,
::testing::Values("script/Devanagari")   
)

◆ INSTANTIATE_TEST_SUITE_P() [3/19]

tesseract::INSTANTIATE_TEST_SUITE_P ( DISABLED_Latin  ,
MatchGroundTruth  ,
::testing::Values("script/Latin")   
)

◆ INSTANTIATE_TEST_SUITE_P() [4/19]

tesseract::INSTANTIATE_TEST_SUITE_P ( DISABLED_Tessdata  ,
LoadLanguage  ,
::testing::Values(TESSDATA_DIR)   
)

◆ INSTANTIATE_TEST_SUITE_P() [5/19]

tesseract::INSTANTIATE_TEST_SUITE_P ( DISABLED_Tessdata  ,
LoadScript  ,
::testing::Values(TESSDATA_DIR)   
)

◆ INSTANTIATE_TEST_SUITE_P() [6/19]

tesseract::INSTANTIATE_TEST_SUITE_P ( DISABLED_Tessdata_best  ,
LoadLanguage  ,
::testing::Values(TESSDATA_DIR "_best")   
)

◆ INSTANTIATE_TEST_SUITE_P() [7/19]

tesseract::INSTANTIATE_TEST_SUITE_P ( DISABLED_Tessdata_best  ,
LoadScript  ,
::testing::Values(TESSDATA_DIR "_best")   
)

◆ INSTANTIATE_TEST_SUITE_P() [8/19]

tesseract::INSTANTIATE_TEST_SUITE_P ( DISABLED_Tessdata_fast  ,
LoadLanguage  ,
::testing::Values(TESSDATA_DIR "_fast")   
)

◆ INSTANTIATE_TEST_SUITE_P() [9/19]

tesseract::INSTANTIATE_TEST_SUITE_P ( DISABLED_Tessdata_fast  ,
LoadScript  ,
::testing::Values(TESSDATA_DIR "_fast")   
)

◆ INSTANTIATE_TEST_SUITE_P() [10/19]

tesseract::INSTANTIATE_TEST_SUITE_P ( Eng  ,
MatchGroundTruth  ,
::testing::Values("eng")   
)

◆ INSTANTIATE_TEST_SUITE_P() [11/19]

tesseract::INSTANTIATE_TEST_SUITE_P ( RangeTest  ,
QRSequenceGeneratorTest  ,
::testing::Values(2, 7, 8, 9, 16, 1e2, 1e4, 1e6)   
)

◆ INSTANTIATE_TEST_SUITE_P() [12/19]

tesseract::INSTANTIATE_TEST_SUITE_P ( TessdataBestEngEuroHebrew  ,
OSDTest  ,
::testing::Combine(::testing::Values(0), ::testing::Values(TESTING_DIR "/phototest.tif", TESTING_DIR "/eurotext.tif", TESTING_DIR "/hebrew.png"), ::testing::Values(TESSDATA_DIR "_best"))   
)

◆ INSTANTIATE_TEST_SUITE_P() [13/19]

tesseract::INSTANTIATE_TEST_SUITE_P ( TessdataEngEuroHebrew  ,
OSDTest  ,
::testing::Combine(::testing::Values(0), ::testing::Values(TESTING_DIR "/phototest.tif", TESTING_DIR "/eurotext.tif", TESTING_DIR "/hebrew.png"), ::testing::Values(TESSDATA_DIR))   
)

◆ INSTANTIATE_TEST_SUITE_P() [14/19]

tesseract::INSTANTIATE_TEST_SUITE_P ( TessdataFastDeva  ,
OSDTest  ,
::testing::Combine(::testing::Values(0), ::testing::Values(TESTING_DIR "/devatest.png"), ::testing::Values(TESSDATA_DIR "_fast"))   
)

◆ INSTANTIATE_TEST_SUITE_P() [15/19]

tesseract::INSTANTIATE_TEST_SUITE_P ( TessdataFastDevaRotated270  ,
OSDTest  ,
::testing::Combine(::testing::Values(270), ::testing::Values(TESTING_DIR "/devatest-rotated-270.png"), ::testing::Values(TESSDATA_DIR "_fast"))   
)

◆ INSTANTIATE_TEST_SUITE_P() [16/19]

tesseract::INSTANTIATE_TEST_SUITE_P ( TessdataFastEngEuroHebrew  ,
OSDTest  ,
::testing::Combine(::testing::Values(0), ::testing::Values(TESTING_DIR "/phototest.tif", TESTING_DIR "/eurotext.tif", TESTING_DIR "/hebrew.png"), ::testing::Values(TESSDATA_DIR "_fast"))   
)

◆ INSTANTIATE_TEST_SUITE_P() [17/19]

tesseract::INSTANTIATE_TEST_SUITE_P ( TessdataFastRotated180  ,
OSDTest  ,
::testing::Combine(::testing::Values(180), ::testing::Values(TESTING_DIR "/phototest-rotated-180.png"), ::testing::Values(TESSDATA_DIR "_fast"))   
)

◆ INSTANTIATE_TEST_SUITE_P() [18/19]

tesseract::INSTANTIATE_TEST_SUITE_P ( TessdataFastRotated270  ,
OSDTest  ,
::testing::Combine(::testing::Values(270), ::testing::Values(TESTING_DIR "/phototest-rotated-L.png"), ::testing::Values(TESSDATA_DIR "_fast"))   
)

◆ INSTANTIATE_TEST_SUITE_P() [19/19]

tesseract::INSTANTIATE_TEST_SUITE_P ( TessdataFastRotated90  ,
OSDTest  ,
::testing::Combine(::testing::Values(90), ::testing::Values(TESTING_DIR "/phototest-rotated-R.png"), ::testing::Values(TESSDATA_DIR "_fast"))   
)

◆ INT_PARAM_FLAG()

tesseract::INT_PARAM_FLAG ( debug_level  ,
,
"Level of Trainer debugging"   
)

◆ INT_VAR_H() [1/26]

tesseract::INT_VAR_H ( classify_integer_matcher_multiplier  )

◆ INT_VAR_H() [2/26]

tesseract::INT_VAR_H ( devanagari_split_debuglevel  )

◆ INT_VAR_H() [3/26]

tesseract::INT_VAR_H ( editor_image_blob_bb_color  )

◆ INT_VAR_H() [4/26]

tesseract::INT_VAR_H ( editor_image_word_bb_color  )

◆ INT_VAR_H() [5/26]

tesseract::INT_VAR_H ( editor_image_xpos  )

◆ INT_VAR_H() [6/26]

tesseract::INT_VAR_H ( editor_image_ypos  )

◆ INT_VAR_H() [7/26]

tesseract::INT_VAR_H ( editor_word_height  )

◆ INT_VAR_H() [8/26]

tesseract::INT_VAR_H ( editor_word_width  )

◆ INT_VAR_H() [9/26]

tesseract::INT_VAR_H ( editor_word_xpos  )

◆ INT_VAR_H() [10/26]

tesseract::INT_VAR_H ( editor_word_ypos  )

◆ INT_VAR_H() [11/26]

TESS_API tesseract::INT_VAR_H ( log_level  )

◆ INT_VAR_H() [12/26]

tesseract::INT_VAR_H ( pitsync_linear_version  )

◆ INT_VAR_H() [13/26]

tesseract::INT_VAR_H ( textord_debug_block  )

◆ INT_VAR_H() [14/26]

tesseract::INT_VAR_H ( textord_debug_bugs  )

◆ INT_VAR_H() [15/26]

tesseract::INT_VAR_H ( textord_debug_tabfind  )

◆ INT_VAR_H() [16/26]

tesseract::INT_VAR_H ( textord_dotmatrix_gap  )

◆ INT_VAR_H() [17/26]

tesseract::INT_VAR_H ( textord_fp_chop_error  )

◆ INT_VAR_H() [18/26]

tesseract::INT_VAR_H ( textord_lms_line_trials  )

◆ INT_VAR_H() [19/26]

tesseract::INT_VAR_H ( textord_min_blobs_in_row  )

◆ INT_VAR_H() [20/26]

tesseract::INT_VAR_H ( textord_min_xheight  )

◆ INT_VAR_H() [21/26]

tesseract::INT_VAR_H ( textord_pitch_range  )

◆ INT_VAR_H() [22/26]

tesseract::INT_VAR_H ( textord_spline_medianwin  )

◆ INT_VAR_H() [23/26]

tesseract::INT_VAR_H ( textord_spline_minblobs  )

◆ INT_VAR_H() [24/26]

tesseract::INT_VAR_H ( textord_test_x  )

◆ INT_VAR_H() [25/26]

tesseract::INT_VAR_H ( textord_test_y  )

◆ INT_VAR_H() [26/26]

tesseract::INT_VAR_H ( textord_words_veto_power  )

◆ IntCastRounded() [1/2]

int tesseract::IntCastRounded ( double  x)
inline

Definition at line 170 of file helpers.h.

170 {
171 assert(std::isfinite(x));
172 assert(x < INT_MAX);
173 assert(x > INT_MIN);
174 return x >= 0.0 ? static_cast<int>(x + 0.5) : -static_cast<int>(-x + 0.5);
175}

◆ IntCastRounded() [2/2]

int tesseract::IntCastRounded ( float  x)
inline

Definition at line 178 of file helpers.h.

178 {
179 assert(std::isfinite(x));
180 return x >= 0.0F ? static_cast<int>(x + 0.5F) : -static_cast<int>(-x + 0.5F);
181}

◆ IntersectRange()

template<typename T >
void tesseract::IntersectRange ( const T &  lower1,
const T &  upper1,
T *  lower2,
T *  upper2 
)
inline

Definition at line 141 of file helpers.h.

141 {
142 if (lower1 > *lower2) {
143 *lower2 = lower1;
144 }
145 if (upper1 < *upper2) {
146 *upper2 = upper1;
147 }
148}

◆ InterwordSpace()

int tesseract::InterwordSpace ( const std::vector< RowScratchRegisters > &  rows,
int  row_start,
int  row_end 
)

Definition at line 1654 of file paragraphs.cpp.

1654 {
1655 if (row_end < row_start + 1) {
1656 return 1;
1657 }
1658 int word_height =
1659 (rows[row_start].ri_->lword_box.height() + rows[row_end - 1].ri_->lword_box.height()) / 2;
1660 int word_width =
1661 (rows[row_start].ri_->lword_box.width() + rows[row_end - 1].ri_->lword_box.width()) / 2;
1662 STATS spacing_widths(0, 4 + word_width);
1663 for (int i = row_start; i < row_end; i++) {
1664 if (rows[i].ri_->num_words > 1) {
1665 spacing_widths.add(rows[i].ri_->average_interword_space, 1);
1666 }
1667 }
1668 int minimum_reasonable_space = word_height / 3;
1669 if (minimum_reasonable_space < 2) {
1670 minimum_reasonable_space = 2;
1671 }
1672 int median = spacing_widths.median();
1673 return (median > minimum_reasonable_space) ? median : minimum_reasonable_space;
1674}

◆ IsInterchangeValid()

TESS_UNICHARSET_TRAINING_API bool tesseract::IsInterchangeValid ( const char32  ch)

Definition at line 261 of file normstrngs.cpp.

261 {
262 return IsValidCodepoint(ch) && !(ch >= 0xFDD0 && ch <= 0xFDEF) && // Noncharacters.
263 !(ch >= 0xFFFE && ch <= 0xFFFF) && !(ch >= 0x1FFFE && ch <= 0x1FFFF) &&
264 !(ch >= 0x2FFFE && ch <= 0x2FFFF) && !(ch >= 0x3FFFE && ch <= 0x3FFFF) &&
265 !(ch >= 0x4FFFE && ch <= 0x4FFFF) && !(ch >= 0x5FFFE && ch <= 0x5FFFF) &&
266 !(ch >= 0x6FFFE && ch <= 0x6FFFF) && !(ch >= 0x7FFFE && ch <= 0x7FFFF) &&
267 !(ch >= 0x8FFFE && ch <= 0x8FFFF) && !(ch >= 0x9FFFE && ch <= 0x9FFFF) &&
268 !(ch >= 0xAFFFE && ch <= 0xAFFFF) && !(ch >= 0xBFFFE && ch <= 0xBFFFF) &&
269 !(ch >= 0xCFFFE && ch <= 0xCFFFF) && !(ch >= 0xDFFFE && ch <= 0xDFFFF) &&
270 !(ch >= 0xEFFFE && ch <= 0xEFFFF) && !(ch >= 0xFFFFE && ch <= 0xFFFFF) &&
271 !(ch >= 0x10FFFE && ch <= 0x10FFFF) &&
272 (!u_isISOControl(static_cast<UChar32>(ch)) || ch == '\n' || ch == '\f' || ch == '\t' ||
273 ch == '\r');
274}

◆ IsInterchangeValid7BitAscii()

TESS_UNICHARSET_TRAINING_API bool tesseract::IsInterchangeValid7BitAscii ( const char32  ch)

Definition at line 276 of file normstrngs.cpp.

276 {
277 return IsValidCodepoint(ch) && ch <= 128 &&
278 (!u_isISOControl(static_cast<UChar32>(ch)) || ch == '\n' || ch == '\f' || ch == '\t' ||
279 ch == '\r');
280}

◆ IsLeftIndented()

bool tesseract::IsLeftIndented ( const EquationDetect::IndentType  type)
inline

Definition at line 90 of file equationdetect.cpp.

90 {
91 return type == EquationDetect::LEFT_INDENT || type == EquationDetect::BOTH_INDENT;
92}
type
Definition: upload.py:458

◆ IsOCREquivalent()

bool tesseract::IsOCREquivalent ( char32  ch1,
char32  ch2 
)

Definition at line 219 of file normstrngs.cpp.

219 {
220 return OCRNormalize(ch1) == OCRNormalize(ch2);
221}
char32 OCRNormalize(char32 ch)
Definition: normstrngs.cpp:208

◆ IsRightIndented()

bool tesseract::IsRightIndented ( const EquationDetect::IndentType  type)
inline

Definition at line 94 of file equationdetect.cpp.

94 {
95 return type == EquationDetect::RIGHT_INDENT || type == EquationDetect::BOTH_INDENT;
96}

◆ IsTextOrEquationType()

bool tesseract::IsTextOrEquationType ( PolyBlockType  type)
inline

Definition at line 86 of file equationdetect.cpp.

86 {
87 return PTIsTextType(type) || type == PT_EQUATION;
88}
bool PTIsTextType(PolyBlockType type)
Definition: publictypes.h:80

◆ IsUTF8Whitespace()

TESS_UNICHARSET_TRAINING_API bool tesseract::IsUTF8Whitespace ( const char *  text)

Definition at line 233 of file normstrngs.cpp.

233 {
234 return SpanUTF8Whitespace(text) == strlen(text);
235}
unsigned int SpanUTF8Whitespace(const char *text)
Definition: normstrngs.cpp:237

◆ IsValidCodepoint()

bool tesseract::IsValidCodepoint ( const char32  ch)

Definition at line 223 of file normstrngs.cpp.

223 {
224 // In the range [0, 0xD800) or [0xE000, 0x10FFFF]
225 return (static_cast<uint32_t>(ch) < 0xD800) || (ch >= 0xE000 && ch <= 0x10FFFF);
226}

◆ IsWhitespace()

TESS_UNICHARSET_TRAINING_API bool tesseract::IsWhitespace ( const char32  ch)

Definition at line 228 of file normstrngs.cpp.

228 {
229 ASSERT_HOST_MSG(IsValidCodepoint(ch), "Invalid Unicode codepoint: 0x%x\n", ch);
230 return u_isUWhiteSpace(static_cast<UChar32>(ch));
231}
#define ASSERT_HOST_MSG(x,...)
Definition: errcode.h:57

◆ KDDelete()

void tesseract::KDDelete ( KDTREE Tree,
float  Key[],
void *  Data 
)

This routine deletes a node from Tree. The node to be deleted is specified by the Key for the node and the Data contents of the node. These two pointers must be identical to the pointers that were used for the node when it was originally stored in the tree. A node will be deleted from the tree only if its key and data pointers are identical to Key and Data respectively. The tree is re-formed by removing the affected subtree and inserting all elements but the root.

Parameters
TreeK-D tree to delete node from
Keykey of node to be deleted
Datadata contents of node to be deleted

Definition at line 252 of file kdtree.cpp.

252 {
253 int Level;
254 KDNODE *Current;
255 KDNODE *Father;
256
257 /* initialize search at root of tree */
258 Father = &(Tree->Root);
259 Current = Father->Left;
260 Level = NextLevel(Tree, -1);
261
262 /* search tree for node to be deleted */
263 while ((Current != nullptr) && (!NodeFound(Current, Key, Data))) {
264 Father = Current;
265 if (Key[Level] < Current->BranchPoint) {
266 Current = Current->Left;
267 } else {
268 Current = Current->Right;
269 }
270
271 Level = NextLevel(Tree, Level);
272 }
273
274 if (Current != nullptr) { /* if node to be deleted was found */
275 if (Current == Father->Left) {
276 Father->Left = nullptr;
277 Father->LeftBranch = Tree->KeyDesc[Level].Min;
278 } else {
279 Father->Right = nullptr;
280 Father->RightBranch = Tree->KeyDesc[Level].Max;
281 }
282
283 InsertNodes(Tree, Current->Left);
284 InsertNodes(Tree, Current->Right);
285 delete Current;
286 }
287} /* KDDelete */
#define NodeFound(N, K, D)
Definition: kdtree.cpp:31
float LeftBranch
Definition: kdtree.h:60
float RightBranch
Definition: kdtree.h:61
float BranchPoint
Definition: kdtree.h:59
std::vector< PARAM_DESC > KeyDesc
Definition: kdtree.h:82
KDNODE Root
Definition: kdtree.h:81

◆ KDNearestNeighborSearch()

void tesseract::KDNearestNeighborSearch ( KDTREE Tree,
float  Query[],
int  QuerySize,
float  MaxDistance,
int *  NumberOfResults,
void **  NBuffer,
float  DBuffer[] 
)

This routine searches the K-D tree specified by Tree and finds the QuerySize nearest neighbors of Query. All neighbors must be within MaxDistance of Query. The data contents of the nearest neighbors are placed in NBuffer and their distances from Query are placed in DBuffer.

Parameters
Treeptr to K-D tree to be searched
Queryptr to query key (point in D-space)
QuerySizenumber of nearest neighbors to be found
MaxDistanceall neighbors must be within this distance
NBufferptr to QuerySize buffer to hold nearest neighbors
DBufferptr to QuerySize buffer to hold distances from nearest neighbor to query point
NumberOfResults[out] Number of nearest neighbors actually found

Definition at line 305 of file kdtree.cpp.

306 {
307 KDTreeSearch search(Tree, Query, QuerySize);
308 search.Search(NumberOfResults, DBuffer, NBuffer);
309}
LIST search(LIST list, void *key, int_compare is_equal)
Definition: oldlist.cpp:211

◆ KDStore()

void tesseract::KDStore ( KDTREE Tree,
float *  Key,
CLUSTER Data 
)

This routine stores Data in the K-D tree specified by Tree using Key as an access key.

Parameters
TreeK-D tree in which data is to be stored
Keyptr to key by which data can be retrieved
Dataptr to data to be stored in the tree

Definition at line 215 of file kdtree.cpp.

215 {
216 auto PtrToNode = &(Tree->Root.Left);
217 auto Node = *PtrToNode;
218 auto Level = NextLevel(Tree, -1);
219 while (Node != nullptr) {
220 if (Key[Level] < Node->BranchPoint) {
221 PtrToNode = &(Node->Left);
222 if (Key[Level] > Node->LeftBranch) {
223 Node->LeftBranch = Key[Level];
224 }
225 } else {
226 PtrToNode = &(Node->Right);
227 if (Key[Level] < Node->RightBranch) {
228 Node->RightBranch = Key[Level];
229 }
230 }
231 Level = NextLevel(Tree, Level);
232 Node = *PtrToNode;
233 }
234
235 *PtrToNode = new KDNODE(Tree, Key, Data, Level);
236} /* KDStore */

◆ KDWalk()

void tesseract::KDWalk ( KDTREE Tree,
kdwalk_proc  action,
ClusteringContext context 
)

Walk a given Tree with action.

Definition at line 313 of file kdtree.cpp.

313 {
314 if (Tree->Root.Left != nullptr) {
315 Walk(Tree, action, context, Tree->Root.Left, NextLevel(Tree, -1));
316 }
317}
void Walk(KDTREE *tree, kdwalk_proc action, ClusteringContext *context, KDNODE *sub_tree, int32_t level)
Definition: kdtree.cpp:466
action
Definition: upload.py:408

◆ LangLoader()

void tesseract::LangLoader ( const char *  lang,
const char *  tessdatadir 
)

Definition at line 39 of file loadlang_test.cc.

39 {
40 auto api = std::make_unique<tesseract::TessBaseAPI>();
41 ASSERT_FALSE(api->Init(tessdatadir, lang)) << "Could not initialize tesseract for $lang.";
42 api->End();
43}

◆ last()

LIST tesseract::last ( LIST  var_list)

Definition at line 153 of file oldlist.cpp.

153 {
154 while (var_list->list_rest() != NIL_LIST) {
155 var_list = var_list->list_rest();
156 }
157 return var_list;
158}

◆ LeftWordAttributes()

TESS_API void tesseract::LeftWordAttributes ( const UNICHARSET unicharset,
const WERD_CHOICE werd,
const std::string &  utf8,
bool *  is_list,
bool *  starts_idea,
bool *  ends_idea 
)

Definition at line 431 of file paragraphs.cpp.

432 {
433 *is_list = false;
434 *starts_idea = false;
435 *ends_idea = false;
436 if (utf8.empty() || (werd != nullptr && werd->empty())) { // Empty
437 *ends_idea = true;
438 return;
439 }
440
441 if (unicharset && werd) { // We have a proper werd and unicharset so use it.
442 if (UniLikelyListItem(unicharset, werd)) {
443 *is_list = true;
444 *starts_idea = true;
445 *ends_idea = true;
446 }
447 if (unicharset->get_isupper(werd->unichar_id(0))) {
448 *starts_idea = true;
449 }
450 if (unicharset->get_ispunctuation(werd->unichar_id(0))) {
451 *starts_idea = true;
452 *ends_idea = true;
453 }
454 } else { // Assume utf8 is mostly ASCII
455 if (AsciiLikelyListItem(utf8)) {
456 *is_list = true;
457 *starts_idea = true;
458 }
459 int start_letter = utf8[0];
460 if (IsOpeningPunct(start_letter)) {
461 *starts_idea = true;
462 }
463 if (IsTerminalPunct(start_letter)) {
464 *ends_idea = true;
465 }
466 if (start_letter >= 'A' && start_letter <= 'Z') {
467 *starts_idea = true;
468 }
469 }
470}
bool AsciiLikelyListItem(const std::string &word)
Definition: paragraphs.cpp:282
bool empty() const
Definition: ratngs.h:284
bool get_isupper(UNICHAR_ID unichar_id) const
Definition: unicharset.h:515
bool get_ispunctuation(UNICHAR_ID unichar_id) const
Definition: unicharset.h:533

◆ lessthan()

int tesseract::lessthan ( const void *  first,
const void *  second 
)

Definition at line 374 of file polyblk.cpp.

374 {
375 const ICOORDELT *p1 = *reinterpret_cast<const ICOORDELT *const *>(first);
376 const ICOORDELT *p2 = *reinterpret_cast<const ICOORDELT *const *>(second);
377
378 if (p1->x() < p2->x()) {
379 return (-1);
380 } else if (p1->x() > p2->x()) {
381 return (1);
382 } else {
383 return (0);
384 }
385}

◆ linear_spline_baseline()

double * tesseract::linear_spline_baseline ( TO_ROW row,
TO_BLOCK block,
int32_t &  segments,
int32_t  xstarts[] 
)

Definition at line 2180 of file makerow.cpp.

2185 {
2186 int blobcount; // no of blobs
2187 int blobindex; // current blob
2188 int index1, index2; // blob numbers
2189 int blobs_per_segment; // blobs in each
2190 TBOX box; // blob box
2191 TBOX new_box; // new_it box
2192 // blobs
2193 BLOBNBOX_IT blob_it = row->blob_list();
2194 BLOBNBOX_IT new_it = blob_it; // front end
2195 float b, c; // fitted curve
2197 int32_t segment; // current segment
2198
2199 box = box_next_pre_chopped(&blob_it);
2200 xstarts[0] = box.left();
2201 blobcount = 1;
2202 while (!blob_it.at_first()) {
2203 blobcount++;
2204 box = box_next_pre_chopped(&blob_it);
2205 }
2206 segments = blobcount / textord_spline_medianwin;
2207 if (segments < 1) {
2208 segments = 1;
2209 }
2210 blobs_per_segment = blobcount / segments;
2211 // quadratic coeffs
2212 auto *coeffs = new double[segments * 3];
2213 if (textord_oldbl_debug) {
2214 tprintf(
2215 "Linear splining baseline of %d blobs at (%d,%d), into %d segments of "
2216 "%d blobs\n",
2217 blobcount, box.left(), box.bottom(), segments, blobs_per_segment);
2218 }
2219 segment = 1;
2220 for (index2 = 0; index2 < blobs_per_segment / 2; index2++) {
2221 box_next_pre_chopped(&new_it);
2222 }
2223 index1 = 0;
2224 blobindex = index2;
2225 do {
2226 blobindex += blobs_per_segment;
2227 lms.Clear();
2228 while (index1 < blobindex || (segment == segments && index1 < blobcount)) {
2229 box = box_next_pre_chopped(&blob_it);
2230 int middle = (box.left() + box.right()) / 2;
2231 lms.Add(ICOORD(middle, box.bottom()));
2232 index1++;
2233 if (index1 == blobindex - blobs_per_segment / 2 || index1 == blobcount - 1) {
2234 xstarts[segment] = box.left();
2235 }
2236 }
2237 lms.Fit(&b, &c);
2238 coeffs[segment * 3 - 3] = 0;
2239 coeffs[segment * 3 - 2] = b;
2240 coeffs[segment * 3 - 1] = c;
2241 segment++;
2242 if (segment > segments) {
2243 break;
2244 }
2245
2246 blobindex += blobs_per_segment;
2247 lms.Clear();
2248 while (index2 < blobindex || (segment == segments && index2 < blobcount)) {
2249 new_box = box_next_pre_chopped(&new_it);
2250 int middle = (new_box.left() + new_box.right()) / 2;
2251 lms.Add(ICOORD(middle, new_box.bottom()));
2252 index2++;
2253 if (index2 == blobindex - blobs_per_segment / 2 || index2 == blobcount - 1) {
2254 xstarts[segment] = new_box.left();
2255 }
2256 }
2257 lms.Fit(&b, &c);
2258 coeffs[segment * 3 - 3] = 0;
2259 coeffs[segment * 3 - 2] = b;
2260 coeffs[segment * 3 - 1] = c;
2261 segment++;
2262 } while (segment <= segments);
2263 return coeffs;
2264}
int textord_spline_medianwin
Definition: makerow.cpp:68

◆ LoadDataFromFile() [1/2]

bool tesseract::LoadDataFromFile ( const char *  filename,
GenericVector< char > *  data 
)
inline

Definition at line 233 of file genericvector.h.

233 {
234 bool result = false;
235 FILE *fp = fopen(filename, "rb");
236 if (fp != nullptr) {
237 fseek(fp, 0, SEEK_END);
238 auto size = std::ftell(fp);
239 fseek(fp, 0, SEEK_SET);
240 // Trying to open a directory on Linux sets size to LONG_MAX. Catch it here.
241 if (size > 0 && size < LONG_MAX) {
242 // reserve an extra byte in case caller wants to append a '\0' character
243 data->reserve(size + 1);
244 data->resize_no_init(size);
245 result = static_cast<long>(fread(&(*data)[0], 1, size, fp)) == size;
246 }
247 fclose(fp);
248 }
249 return result;
250}
void resize_no_init(int size)
Definition: genericvector.h:64

◆ LoadDataFromFile() [2/2]

TESS_API bool tesseract::LoadDataFromFile ( const char *  filename,
std::vector< char > *  data 
)

Definition at line 32 of file serialis.cpp.

32 {
33 bool result = false;
34 FILE *fp = fopen(filename, "rb");
35 if (fp != nullptr) {
36 fseek(fp, 0, SEEK_END);
37 auto size = std::ftell(fp);
38 fseek(fp, 0, SEEK_SET);
39 // Trying to open a directory on Linux sets size to LONG_MAX. Catch it here.
40 if (size > 0 && size < LONG_MAX) {
41 // reserve an extra byte in case caller wants to append a '\0' character
42 data->reserve(size + 1);
43 data->resize(size); // TODO: optimize no init
44 result = static_cast<long>(fread(&(*data)[0], 1, size, fp)) == size;
45 }
46 fclose(fp);
47 }
48 return result;
49}

◆ LoadFileLinesToStrings()

bool tesseract::LoadFileLinesToStrings ( const char *  filename,
std::vector< std::string > *  lines 
)
inline

Definition at line 32 of file fileio.h.

32 {
33 std::vector<char> data;
34 if (!LoadDataFromFile(filename, &data)) {
35 return false;
36 }
37 // TODO: optimize.
38 std::string lines_str(&data[0], data.size());
39 *lines = split(lines_str, '\n');
40 return true;
41}
bool LoadDataFromFile(const char *filename, GenericVector< char > *data)

◆ LoadShapeTable()

ShapeTable * tesseract::LoadShapeTable ( const std::string &  file_prefix)

Definition at line 148 of file commontraining.cpp.

148 {
149 ShapeTable *shape_table = nullptr;
150 std::string shape_table_file = file_prefix;
151 shape_table_file += kShapeTableFileSuffix;
152 TFile shape_fp;
153 if (shape_fp.Open(shape_table_file.c_str(), nullptr)) {
154 shape_table = new ShapeTable;
155 if (!shape_table->DeSerialize(&shape_fp)) {
156 delete shape_table;
157 shape_table = nullptr;
158 tprintf("Error: Failed to read shape table %s\n", shape_table_file.c_str());
159 } else {
160 int num_shapes = shape_table->NumShapes();
161 tprintf("Read shape table %s of %d shapes\n", shape_table_file.c_str(), num_shapes);
162 }
163 } else {
164 tprintf("Warning: No shape table file present: %s\n", shape_table_file.c_str());
165 }
166 return shape_table;
167}
bool Open(const char *filename, FileReader reader)
Definition: serialis.cpp:140
bool DeSerialize(TFile *fp)
Definition: shapetable.cpp:255
unsigned NumShapes() const
Definition: shapetable.h:248

◆ LoadTrainingData()

TESS_COMMON_TRAINING_API std::unique_ptr< MasterTrainer > tesseract::LoadTrainingData ( const char *const *  filelist,
bool  replication,
ShapeTable **  shape_table,
std::string &  file_prefix 
)

Creates a MasterTrainer and loads the training data into it: Initializes feature_defs and IntegerFX. Loads the shape_table if shape_table != nullptr. Loads initial unicharset from -U command-line option. If FLAGS_T is set, loads the majority of data from there, else:

  • Loads font info from -F option.
  • Loads xheights from -X option.
  • Loads samples from .tr files in remaining command-line args.
  • Deletes outliers and computes canonical samples.
  • If FLAGS_output_trainer is set, saves the trainer for future use. TODO: Who uses that? There is currently no code which reads it. Computes canonical and cloud features. If shape_table is not nullptr, but failed to load, make a fake flat one, as shape clustering was not run.

Definition at line 200 of file commontraining.cpp.

201 {
204 file_prefix = "";
205 if (!FLAGS_D.empty()) {
206 file_prefix += FLAGS_D.c_str();
207 file_prefix += "/";
208 }
209 // If we are shape clustering (nullptr shape_table) or we successfully load
210 // a shape_table written by a previous shape clustering, then
211 // shape_analysis will be true, meaning that the MasterTrainer will replace
212 // some members of the unicharset with their fragments.
213 bool shape_analysis = false;
214 if (shape_table != nullptr) {
215 *shape_table = LoadShapeTable(file_prefix);
216 if (*shape_table != nullptr) {
217 shape_analysis = true;
218 }
219 } else {
220 shape_analysis = true;
221 }
222 auto trainer = std::make_unique<MasterTrainer>(NM_CHAR_ANISOTROPIC, shape_analysis, replication,
223 FLAGS_debug_level);
224 IntFeatureSpace fs;
226 trainer->LoadUnicharset(FLAGS_U.c_str());
227 // Get basic font information from font_properties.
228 if (!FLAGS_F.empty()) {
229 if (!trainer->LoadFontInfo(FLAGS_F.c_str())) {
230 return {};
231 }
232 }
233 if (!FLAGS_X.empty()) {
234 if (!trainer->LoadXHeights(FLAGS_X.c_str())) {
235 return {};
236 }
237 }
238 trainer->SetFeatureSpace(fs);
239 // Load training data from .tr files in filelist (terminated by nullptr).
240 for (const char *page_name = *filelist++; page_name != nullptr; page_name = *filelist++) {
241 tprintf("Reading %s ...\n", page_name);
242 trainer->ReadTrainingSamples(page_name, feature_defs, false);
243
244 // If there is a file with [lang].[fontname].exp[num].fontinfo present,
245 // read font spacing information in to fontinfo_table.
246 int pagename_len = strlen(page_name);
247 char *fontinfo_file_name = new char[pagename_len + 7];
248 strncpy(fontinfo_file_name, page_name, pagename_len - 2); // remove "tr"
249 strcpy(fontinfo_file_name + pagename_len - 2, "fontinfo"); // +"fontinfo"
250 trainer->AddSpacingInfo(fontinfo_file_name);
251 delete[] fontinfo_file_name;
252
253 // Load the images into memory if required by the classifier.
254 if (FLAGS_load_images) {
255 std::string image_name = page_name;
256 // Chop off the tr and replace with tif. Extension must be tif!
257 image_name.resize(image_name.length() - 2);
258 image_name += "tif";
259 trainer->LoadPageImages(image_name.c_str());
260 }
261 }
262 trainer->PostLoadCleanup();
263 // Write the master trainer if required.
264 if (!FLAGS_output_trainer.empty()) {
265 FILE *fp = fopen(FLAGS_output_trainer.c_str(), "wb");
266 if (fp == nullptr) {
267 tprintf("Can't create saved trainer data!\n");
268 } else {
269 trainer->Serialize(fp);
270 fclose(fp);
271 }
272 }
273 trainer->PreTrainingSetup();
274 if (!FLAGS_O.empty() && !trainer->unicharset().save_to_file(FLAGS_O.c_str())) {
275 fprintf(stderr, "Failed to save unicharset to file %s\n", FLAGS_O.c_str());
276 return {};
277 }
278
279 if (shape_table != nullptr) {
280 // If we previously failed to load a shapetable, then shape clustering
281 // wasn't run so make a flat one now.
282 if (*shape_table == nullptr) {
283 *shape_table = new ShapeTable;
284 trainer->SetupFlatShapeTable(*shape_table);
285 tprintf("Flat shape table summary: %s\n", (*shape_table)->SummaryStr().c_str());
286 }
287 (*shape_table)->set_unicharset(trainer->unicharset());
288 }
289 return trainer;
290}
const int kBoostXYBuckets
const int kBoostDirBuckets
ShapeTable * LoadShapeTable(const std::string &file_prefix)
void InitIntegerFX()
Definition: intfx.cpp:54
FEATURE_DEFS_STRUCT feature_defs
void InitFeatureDefs(FEATURE_DEFS_STRUCT *featuredefs)
Definition: featdefs.cpp:87

◆ Logistic()

TFloat tesseract::Logistic ( TFloat  x)
inline

Definition at line 59 of file functions.h.

59 {
60 if (x < 0) {
61 return 1 - Logistic(-x);
62 }
63 x *= kScaleFactor;
64 auto index = static_cast<unsigned>(x);
65 if (index >= (kTableSize - 1)) {
66 return 1;
67 }
68 TFloat l0 = LogisticTable[index];
69 TFloat l1 = LogisticTable[index + 1];
70 // Linear interpolation.
71 return l0 + (l1 - l0) * (x - index);
72}
TFloat Logistic(TFloat x)
Definition: functions.h:59
constexpr TFloat kScaleFactor
Definition: functions.h:37
const TFloat LogisticTable[]
Definition: functions.cpp:4102
constexpr int kTableSize
Definition: functions.h:35

◆ loop_bounding_box()

int16_t tesseract::loop_bounding_box ( CRACKEDGE *&  start,
ICOORD botleft,
ICOORD topright 
)

Definition at line 117 of file edgloop.cpp.

120 {
121 int16_t length; // length of loop
122 int16_t leftmost; // on top row
123 CRACKEDGE *edgept; // current point
124 CRACKEDGE *realstart; // topleft start
125
126 edgept = start;
127 realstart = start;
128 botleft = topright = ICOORD(edgept->pos.x(), edgept->pos.y());
129 leftmost = edgept->pos.x();
130 length = 0; // coutn length
131 do {
132 edgept = edgept->next;
133 if (edgept->pos.x() < botleft.x()) {
134 // get bounding box
135 botleft.set_x(edgept->pos.x());
136 } else if (edgept->pos.x() > topright.x()) {
137 topright.set_x(edgept->pos.x());
138 }
139 if (edgept->pos.y() < botleft.y()) {
140 // get bounding box
141 botleft.set_y(edgept->pos.y());
142 } else if (edgept->pos.y() > topright.y()) {
143 realstart = edgept;
144 leftmost = edgept->pos.x();
145 topright.set_y(edgept->pos.y());
146 } else if (edgept->pos.y() == topright.y() && edgept->pos.x() < leftmost) {
147 // leftmost on line
148 leftmost = edgept->pos.x();
149 realstart = edgept;
150 }
151 length++; // count elements
152 } while (edgept != start);
153 start = realstart; // shift it to topleft
154 return length;
155}
void set_y(TDimension yin)
rewrite function
Definition: points.h:71

◆ LOSTBLOCKLINE()

constexpr ERRCODE tesseract::LOSTBLOCKLINE ( "Can't find rectangle for line"  )
constexpr

◆ make_baseline_spline()

void tesseract::make_baseline_spline ( TO_ROW row,
TO_BLOCK block 
)

Definition at line 2053 of file makerow.cpp.

2054 {
2055 double *coeffs; // quadratic coeffs
2056 int32_t segments; // no of segments
2057
2058 // spline boundaries
2059 auto *xstarts = new int32_t[row->blob_list()->length() + 1];
2060 if (segment_baseline(row, block, segments, xstarts) && !textord_straight_baselines &&
2062 coeffs = linear_spline_baseline(row, block, segments, xstarts);
2063 } else {
2064 xstarts[1] = xstarts[segments];
2065 segments = 1;
2066 coeffs = new double[3];
2067 coeffs[0] = 0;
2068 coeffs[1] = row->line_m();
2069 coeffs[2] = row->line_c();
2070 }
2071 row->baseline = QSPLINE(segments, xstarts, coeffs);
2072 delete[] coeffs;
2073 delete[] xstarts;
2074}
bool textord_parallel_baselines
Definition: makerow.cpp:53
double * linear_spline_baseline(TO_ROW *row, TO_BLOCK *block, int32_t &segments, int32_t xstarts[])
Definition: makerow.cpp:2180
bool segment_baseline(TO_ROW *row, TO_BLOCK *block, int32_t &segments, int32_t *xstarts)
Definition: makerow.cpp:2083
float line_c() const
Definition: blobbox.h:583

◆ make_edgept()

EDGEPT * tesseract::make_edgept ( TDimension  x,
TDimension  y,
EDGEPT next,
EDGEPT prev 
)

Definition at line 138 of file split.cpp.

138 {
139 EDGEPT *this_edgept;
140 /* Create point */
141 this_edgept = new EDGEPT;
142 this_edgept->pos.x = x;
143 this_edgept->pos.y = y;
144 // Now deal with the src_outline steps.
145 C_OUTLINE *prev_ol = prev->src_outline;
146 if (prev_ol != nullptr && prev->next == next) {
147 // Compute the fraction of the segment that is being cut.
148 FCOORD segment_vec(next->pos.x - prev->pos.x, next->pos.y - prev->pos.y);
149 FCOORD target_vec(x - prev->pos.x, y - prev->pos.y);
150 double cut_fraction = target_vec.length() / segment_vec.length();
151 // Get the start and end at the step level.
152 ICOORD step_start = prev_ol->position_at_index(prev->start_step);
153 int end_step = prev->start_step + prev->step_count;
154 int step_length = prev_ol->pathlength();
155 ICOORD step_end = prev_ol->position_at_index(end_step % step_length);
156 ICOORD step_vec = step_end - step_start;
157 double target_length = step_vec.length() * cut_fraction;
158 // Find the point on the segment that gives the length nearest to target.
159 int best_step = prev->start_step;
160 ICOORD total_step(0, 0);
161 double best_dist = target_length;
162 for (int s = prev->start_step; s < end_step; ++s) {
163 total_step += prev_ol->step(s % step_length);
164 double dist = fabs(target_length - total_step.length());
165 if (dist < best_dist) {
166 best_dist = dist;
167 best_step = s + 1;
168 }
169 }
170 // The new point is an intermediate point.
171 this_edgept->src_outline = prev_ol;
172 this_edgept->step_count = end_step - best_step;
173 this_edgept->start_step = best_step % step_length;
174 prev->step_count = best_step - prev->start_step;
175 } else {
176 // The new point is poly only.
177 this_edgept->src_outline = nullptr;
178 this_edgept->step_count = 0;
179 this_edgept->start_step = 0;
180 }
181 /* Hook it up */
182 this_edgept->next = next;
183 this_edgept->prev = prev;
184 prev->next = this_edgept;
185 next->prev = this_edgept;
186 /* Set up vec entries */
187 this_edgept->vec.x = this_edgept->next->pos.x - x;
188 this_edgept->vec.y = this_edgept->next->pos.y - y;
189 this_edgept->prev->vec.x = x - this_edgept->prev->pos.x;
190 this_edgept->prev->vec.y = y - this_edgept->prev->pos.y;
191 return this_edgept;
192}
EDGEPT * prev
Definition: blobs.h:201
TPOINT pos
Definition: blobs.h:194
VECTOR vec
Definition: blobs.h:195
C_OUTLINE * src_outline
Definition: blobs.h:202
ICOORD position_at_index(int index) const
Definition: coutln.h:152
float length() const
find length
Definition: points.h:84

◆ make_first_baseline()

void tesseract::make_first_baseline ( TBOX  blobcoords[],
int  blobcount,
int  xcoords[],
int  ycoords[],
QSPLINE spline,
QSPLINE baseline,
float  jumplimit 
)

Definition at line 482 of file oldbasel.cpp.

490 {
491 int leftedge; /*left edge of line */
492 int rightedge; /*right edge of line */
493 int blobindex; /*current blob */
494 int segment; /*current segment */
495 float prevy, thisy, nexty; /*3 y coords */
496 float y1, y2, y3; /*3 smooth blobs */
497 float maxmax, minmin; /*absolute limits */
498 int x2 = 0; /*right edge of old y3 */
499 int ycount; /*no of ycoords in use */
500 float yturns[SPLINESIZE]; /*y coords of turn pts */
501 int xturns[SPLINESIZE]; /*xcoords of turn pts */
502 int xstarts[SPLINESIZE + 1];
503 int segments; // no of segments
504 ICOORD shift; // shift of spline
505
506 prevy = 0;
507 /*left edge of row */
508 leftedge = blobcoords[0].left();
509 /*right edge of line */
510 rightedge = blobcoords[blobcount - 1].right();
511 if (spline == nullptr /*no given spline */
512 || spline->segments < 3 /*or trivial */
513 /*or too non-overlap */
514 || spline->xcoords[1] > leftedge + MAXOVERLAP * (rightedge - leftedge) ||
515 spline->xcoords[spline->segments - 1] < rightedge - MAXOVERLAP * (rightedge - leftedge)) {
516 if (textord_oldbl_paradef) {
517 return; // use default
518 }
519 xstarts[0] = blobcoords[0].left() - 1;
520 for (blobindex = 0; blobindex < blobcount; blobindex++) {
521 xcoords[blobindex] = (blobcoords[blobindex].left() + blobcoords[blobindex].right()) / 2;
522 ycoords[blobindex] = blobcoords[blobindex].bottom();
523 }
524 xstarts[1] = blobcoords[blobcount - 1].right() + 1;
525 segments = 1; /*no of segments */
526
527 /*linear */
528 *baseline = QSPLINE(xstarts, segments, xcoords, ycoords, blobcount, 1);
529
530 if (blobcount >= 3) {
531 y1 = y2 = y3 = 0.0f;
532 ycount = 0;
533 segment = 0; /*no of segments */
534 maxmax = minmin = 0.0f;
535 thisy = ycoords[0] - baseline->y(xcoords[0]);
536 nexty = ycoords[1] - baseline->y(xcoords[1]);
537 for (blobindex = 2; blobindex < blobcount; blobindex++) {
538 prevy = thisy; /*shift ycoords */
539 thisy = nexty;
540 nexty = ycoords[blobindex] - baseline->y(xcoords[blobindex]);
541 /*middle of smooth y */
542 if (ABS(thisy - prevy) < jumplimit && ABS(thisy - nexty) < jumplimit) {
543 y1 = y2; /*shift window */
544 y2 = y3;
545 y3 = thisy; /*middle point */
546 ycount++;
547 /*local max */
548 if (ycount >= 3 && ((y1 < y2 && y2 >= y3)
549 /*local min */
550 || (y1 > y2 && y2 <= y3))) {
551 if (segment < SPLINESIZE - 2) {
552 /*turning pt */
553 xturns[segment] = x2;
554 yturns[segment] = y2;
555 segment++; /*no of spline segs */
556 }
557 }
558 if (ycount == 1) {
559 maxmax = minmin = y3; /*initialise limits */
560 } else {
561 if (y3 > maxmax) {
562 maxmax = y3; /*biggest max */
563 }
564 if (y3 < minmin) {
565 minmin = y3; /*smallest min */
566 }
567 }
568 /*possible turning pt */
569 x2 = blobcoords[blobindex - 1].right();
570 }
571 }
572
573 jumplimit *= 1.2f;
574 /*must be wavy */
575 if (maxmax - minmin > jumplimit) {
576 ycount = segment; /*no of segments */
577 for (blobindex = 0, segment = 1; blobindex < ycount; blobindex++) {
578 if (yturns[blobindex] > minmin + jumplimit || yturns[blobindex] < maxmax - jumplimit) {
579 /*significant peak */
580 if (segment == 1 || yturns[blobindex] > prevy + jumplimit ||
581 yturns[blobindex] < prevy - jumplimit) {
582 /*different to previous */
583 xstarts[segment] = xturns[blobindex];
584 segment++;
585 prevy = yturns[blobindex];
586 }
587 /*bigger max */
588 else if ((prevy > minmin + jumplimit && yturns[blobindex] > prevy)
589 /*smaller min */
590 || (prevy < maxmax - jumplimit && yturns[blobindex] < prevy)) {
591 xstarts[segment - 1] = xturns[blobindex];
592 /*improved previous */
593 prevy = yturns[blobindex];
594 }
595 }
596 }
597 xstarts[segment] = blobcoords[blobcount - 1].right() + 1;
598 segments = segment; /*no of segments */
599 /*linear */
600 *baseline = QSPLINE(xstarts, segments, xcoords, ycoords, blobcount, 1);
601 }
602 }
603 } else {
604 *baseline = *spline; /*copy it */
605 shift =
606 ICOORD(0, static_cast<int16_t>(blobcoords[0].bottom() - spline->y(blobcoords[0].right())));
607 baseline->move(shift);
608 }
609}
#define MAXOVERLAP
Definition: oldbasel.cpp:64
#define SPLINESIZE
Definition: oldbasel.cpp:69

◆ make_first_xheight()

void tesseract::make_first_xheight ( TO_ROW row,
TBOX  blobcoords[],
int  lineheight,
int  init_lineheight,
int  blobcount,
QSPLINE baseline,
float  jumplimit 
)

Definition at line 1421 of file oldbasel.cpp.

1429 {
1430 STATS heightstat(0, HEIGHTBUCKETS - 1);
1431 int lefts[HEIGHTBUCKETS];
1432 int rights[HEIGHTBUCKETS];
1433 int modelist[MODENUM];
1434 int blobindex;
1435 int mode_count; // blobs to count in thr
1436 int sign_bit;
1437 int mode_threshold;
1438 const int kBaselineTouch = 2; // This really should change with resolution.
1439 const int kGoodStrength = 8; // Strength of baseline-touching heights.
1440 const float kMinHeight = 0.25; // Min fraction of lineheight to use.
1441
1442 sign_bit = row->xheight > 0 ? 1 : -1;
1443
1444 memset(lefts, 0, HEIGHTBUCKETS * sizeof(lefts[0]));
1445 memset(rights, 0, HEIGHTBUCKETS * sizeof(rights[0]));
1446 mode_count = 0;
1447 for (blobindex = 0; blobindex < blobcount; blobindex++) {
1448 int xcenter = (blobcoords[blobindex].left() + blobcoords[blobindex].right()) / 2;
1449 float base = baseline->y(xcenter);
1450 float bottomdiff = std::fabs(base - blobcoords[blobindex].bottom());
1451 int strength = textord_ocropus_mode && bottomdiff <= kBaselineTouch ? kGoodStrength : 1;
1452 int height = static_cast<int>(blobcoords[blobindex].top() - base + 0.5);
1453 if (blobcoords[blobindex].height() > init_lineheight * kMinHeight) {
1454 if (height > lineheight * oldbl_xhfract && height > textord_min_xheight) {
1455 heightstat.add(height, strength);
1456 if (height < HEIGHTBUCKETS) {
1457 if (xcenter > rights[height]) {
1458 rights[height] = xcenter;
1459 }
1460 if (xcenter > 0 && (lefts[height] == 0 || xcenter < lefts[height])) {
1461 lefts[height] = xcenter;
1462 }
1463 }
1464 }
1465 mode_count += strength;
1466 }
1467 }
1468
1469 mode_threshold = static_cast<int>(blobcount * 0.1);
1470 if (oldbl_dot_error_size > 1 || oldbl_xhfix) {
1471 mode_threshold = static_cast<int>(mode_count * 0.1);
1472 }
1473
1474 if (textord_oldbl_debug) {
1475 tprintf("blobcount=%d, mode_count=%d, mode_t=%d\n", blobcount, mode_count, mode_threshold);
1476 }
1477 find_top_modes(&heightstat, HEIGHTBUCKETS, modelist, MODENUM);
1478 if (textord_oldbl_debug) {
1479 for (blobindex = 0; blobindex < MODENUM; blobindex++) {
1480 tprintf("mode[%d]=%d ", blobindex, modelist[blobindex]);
1481 }
1482 tprintf("\n");
1483 }
1484 pick_x_height(row, modelist, lefts, rights, &heightstat, mode_threshold);
1485
1486 if (textord_oldbl_debug) {
1487 tprintf("Output xheight=%g\n", row->xheight);
1488 }
1489 if (row->xheight < 0 && textord_oldbl_debug) {
1490 tprintf("warning: Row Line height < 0; %4.2f\n", row->xheight);
1491 }
1492
1493 if (sign_bit < 0) {
1494 row->xheight = -row->xheight;
1495 }
1496}
#define HEIGHTBUCKETS
Definition: oldbasel.cpp:66
#define MODENUM
Definition: oldbasel.cpp:67
void find_top_modes(STATS *stats, int statnum, int modelist[], int modenum)
Definition: oldbasel.cpp:1508
void pick_x_height(TO_ROW *row, int modelist[], int lefts[], int rights[], STATS *heightstat, int mode_threshold)
Definition: oldbasel.cpp:1547

◆ make_height_array()

int * tesseract::make_height_array ( TBOX  blobcoords[],
int  blobcount,
QSPLINE baseline 
)

◆ make_holed_baseline()

void tesseract::make_holed_baseline ( TBOX  blobcoords[],
int  blobcount,
QSPLINE spline,
QSPLINE baseline,
float  gradient 
)

Definition at line 619 of file oldbasel.cpp.

625 {
626 int leftedge; /*left edge of line */
627 int rightedge; /*right edge of line */
628 int blobindex; /*current blob */
629 float x; // centre of row
630 ICOORD shift; // shift of spline
631
632 tesseract::DetLineFit lms; // straight baseline
633 int32_t xstarts[2]; // straight line
634 double coeffs[3];
635 float c; // line parameter
636
637 /*left edge of row */
638 leftedge = blobcoords[0].left();
639 /*right edge of line */
640 rightedge = blobcoords[blobcount - 1].right();
641 for (blobindex = 0; blobindex < blobcount; blobindex++) {
642 lms.Add(ICOORD((blobcoords[blobindex].left() + blobcoords[blobindex].right()) / 2,
643 blobcoords[blobindex].bottom()));
644 }
645 lms.ConstrainedFit(gradient, &c);
646 xstarts[0] = leftedge;
647 xstarts[1] = rightedge;
648 coeffs[0] = 0;
649 coeffs[1] = gradient;
650 coeffs[2] = c;
651 *baseline = QSPLINE(1, xstarts, coeffs);
652 if (spline != nullptr /*no given spline */
653 && spline->segments >= 3 /*or trivial */
654 /*or too non-overlap */
655 && spline->xcoords[1] <= leftedge + MAXOVERLAP * (rightedge - leftedge) &&
656 spline->xcoords[spline->segments - 1] >= rightedge - MAXOVERLAP * (rightedge - leftedge)) {
657 *baseline = *spline; /*copy it */
658 x = (leftedge + rightedge) / 2.0;
659 shift = ICOORD(0, static_cast<int16_t>(gradient * x + c - spline->y(x)));
660 baseline->move(shift);
661 }
662}

◆ make_illegal_segment()

void tesseract::make_illegal_segment ( FPSEGPT_LIST *  prev_list,
TBOX  blob_box,
BLOBNBOX_IT  blob_it,
int16_t  region_index,
int16_t  pitch,
int16_t  pitch_error,
FPSEGPT_LIST *  seg_list 
)

Definition at line 353 of file pitsync1.cpp.

361 {
362 int16_t x; // current coord
363 int16_t min_x = 0; // in this region
364 int16_t max_x = 0;
365 int16_t offset; // dist to edge
366 FPSEGPT *segpt; // segment point
367 FPSEGPT *prevpt; // previous point
368 float best_cost; // best path
369 FPSEGPT_IT segpt_it = seg_list; // iterator
370 // previous points
371 FPSEGPT_IT prevpt_it = prev_list;
372
373 best_cost = FLT_MAX;
374 for (prevpt_it.mark_cycle_pt(); !prevpt_it.cycled_list(); prevpt_it.forward()) {
375 prevpt = prevpt_it.data();
376 if (prevpt->cost_function() < best_cost) {
377 // find least
378 best_cost = prevpt->cost_function();
379 min_x = prevpt->position();
380 max_x = min_x; // limits on coords
381 } else if (prevpt->cost_function() == best_cost) {
382 max_x = prevpt->position();
383 }
384 }
385 min_x += pitch - pitch_error;
386 max_x += pitch + pitch_error;
387 for (x = min_x; x <= max_x; x++) {
388 while (x > blob_box.right()) {
389 blob_box = box_next(&blob_it);
390 }
391 offset = x - blob_box.left();
392 if (blob_box.right() - x < offset) {
393 offset = blob_box.right() - x;
394 }
395 segpt = new FPSEGPT(x, false, offset, region_index, pitch, pitch_error, prev_list);
396 if (segpt->previous() != nullptr) {
397 ASSERT_HOST(offset >= 0);
398 fprintf(stderr, "made fake at %d\n", x);
399 // make one up
400 segpt_it.add_after_then_move(segpt);
401 segpt->faked = true;
402 segpt->fake_count++;
403 } else {
404 delete segpt;
405 }
406 }
407}
int16_t fake_count
Definition: pitsync1.h:70
int32_t position()
Definition: pitsync1.h:49

◆ make_initial_textrows()

void tesseract::make_initial_textrows ( ICOORD  page_tr,
TO_BLOCK block,
FCOORD  rotation,
bool  testing_on 
)

Definition at line 254 of file makerow.cpp.

259 {
260 TO_ROW_IT row_it = block->get_rows();
261
262#ifndef GRAPHICS_DISABLED
263 ScrollView::Color colour; // of row
264
265 if (textord_show_initial_rows && testing_on) {
266 if (to_win == nullptr) {
267 create_to_win(page_tr);
268 }
269 }
270#endif
271 // guess skew
272 assign_blobs_to_rows(block, nullptr, 0, true, true, textord_show_initial_rows && testing_on);
273 row_it.move_to_first();
274 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
275 fit_lms_line(row_it.data());
276 }
277#ifndef GRAPHICS_DISABLED
278 if (textord_show_initial_rows && testing_on) {
279 colour = ScrollView::RED;
280 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
281 plot_to_row(row_it.data(), colour, rotation);
282 colour = static_cast<ScrollView::Color>(colour + 1);
283 if (colour > ScrollView::MAGENTA) {
284 colour = ScrollView::RED;
285 }
286 }
287 }
288#endif
289}
bool textord_show_initial_rows
Definition: makerow.cpp:47
void fit_lms_line(TO_ROW *row)
Definition: makerow.cpp:296
void plot_to_row(TO_ROW *row, ScrollView::Color colour, FCOORD rotation)
Definition: drawtord.cpp:89

◆ make_pseudo_word()

PAGE_RES_IT * tesseract::make_pseudo_word ( PAGE_RES page_res,
const TBOX selection_box 
)

Definition at line 38 of file werdit.cpp.

38 {
39 PAGE_RES_IT pr_it(page_res);
40 C_BLOB_LIST new_blobs; // list of gathered blobs
41 C_BLOB_IT new_blob_it = &new_blobs; // iterator
42
43 for (WERD_RES *word_res = pr_it.word(); word_res != nullptr; word_res = pr_it.forward()) {
44 WERD *word = word_res->word;
45 if (word->bounding_box().overlap(selection_box)) {
46 C_BLOB_IT blob_it(word->cblob_list());
47 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
48 C_BLOB *blob = blob_it.data();
49 if (blob->bounding_box().overlap(selection_box)) {
50 new_blob_it.add_after_then_move(C_BLOB::deep_copy(blob));
51 }
52 }
53 if (!new_blobs.empty()) {
54 WERD *pseudo_word = new WERD(&new_blobs, 1, nullptr);
55 word_res = pr_it.InsertSimpleCloneWord(*word_res, pseudo_word);
56 auto *it = new PAGE_RES_IT(page_res);
57 while (it->word() != word_res && it->word() != nullptr) {
58 it->forward();
59 }
60 ASSERT_HOST(it->word() == word_res);
61 return it;
62 }
63 }
64 }
65 return nullptr;
66}
bool overlap(const TBOX &box) const
Definition: rect.h:363
TBOX bounding_box() const
Definition: stepblob.cpp:250

◆ make_real_word()

WERD * tesseract::make_real_word ( BLOBNBOX_IT *  box_it,
int32_t  blobcount,
bool  bol,
uint8_t  blanks 
)

Definition at line 559 of file wordseg.cpp.

563 {
564 C_OUTLINE_IT cout_it;
565 C_BLOB_LIST cblobs;
566 C_BLOB_IT cblob_it = &cblobs;
567
568 for (int blobindex = 0; blobindex < blobcount; blobindex++) {
569 auto bblob = box_it->extract();
570 if (bblob->joined_to_prev()) {
571 auto cblob = bblob->remove_cblob();
572 if (cblob != nullptr) {
573 cout_it.set_to_list(cblob_it.data()->out_list());
574 cout_it.move_to_last();
575 cout_it.add_list_after(cblob->out_list());
576 delete cblob;
577 }
578 } else {
579 auto cblob = bblob->remove_cblob();
580 if (cblob != nullptr) {
581 cblob_it.add_after_then_move(cblob);
582 }
583 }
584 delete bblob;
585 box_it->forward(); // next one
586 }
587
588 if (blanks < 1) {
589 blanks = 1;
590 }
591
592 auto word = new WERD(&cblobs, blanks, nullptr);
593
594 if (bol) {
595 word->set_flag(W_BOL, true);
596 }
597 if (box_it->at_first()) {
598 word->set_flag(W_EOL, true); // at end of line
599 }
600
601 return word;
602}

◆ make_real_words()

void tesseract::make_real_words ( tesseract::Textord textord,
TO_BLOCK block,
FCOORD  rotation 
)

Definition at line 473 of file wordseg.cpp.

476 {
477 TO_ROW *row; // current row
478 TO_ROW_IT row_it = block->get_rows();
479 ROW *real_row = nullptr; // output row
480 ROW_IT real_row_it = block->block->row_list();
481
482 if (row_it.empty()) {
483 return; // empty block
484 }
485 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
486 row = row_it.data();
487 if (row->blob_list()->empty() && !row->rep_words.empty()) {
488 real_row = make_rep_words(row, block);
489 } else if (!row->blob_list()->empty()) {
490 // In a fixed pitch document, some lines may be detected as fixed pitch
491 // while others don't, and will go through different path.
492 // For non-space delimited language like CJK, fixed pitch chop always
493 // leave the entire line as one word. We can force consistent chopping
494 // with force_make_prop_words flag.
495 POLY_BLOCK *pb = block->block->pdblk.poly_block();
497 real_row = textord->make_blob_words(row, rotation);
498 } else if (textord_force_make_prop_words || (pb != nullptr && !pb->IsText()) ||
500 real_row = textord->make_prop_words(row, rotation);
501 } else if (row->pitch_decision == PITCH_DEF_FIXED ||
503 real_row = fixed_pitch_words(row, rotation);
504 } else {
505 ASSERT_HOST(false);
506 }
507 }
508 if (real_row != nullptr) {
509 // put row in block
510 real_row_it.add_after_then_move(real_row);
511 }
512 }
513 block->block->set_stats(block->fixed_pitch == 0, static_cast<int16_t>(block->kern_size),
514 static_cast<int16_t>(block->space_size),
515 static_cast<int16_t>(block->fixed_pitch));
516 block->block->check_pitch();
517}
bool textord_force_make_prop_words
Definition: wordseg.cpp:41
ROW * fixed_pitch_words(TO_ROW *row, FCOORD rotation)
Definition: fpchop.cpp:65
bool textord_chopper_test
Definition: wordseg.cpp:42
ROW * make_rep_words(TO_ROW *row, TO_BLOCK *block)
Definition: wordseg.cpp:526
void check_pitch()
check proportional
Definition: ocrblock.cpp:164
void set_stats(bool prop, int16_t kern, int16_t space, int16_t ch_pitch)
Definition: ocrblock.h:56
ROW * make_prop_words(TO_ROW *row, FCOORD rotation)
Definition: tospace.cpp:844
ROW * make_blob_words(TO_ROW *row, FCOORD rotation)
Definition: tospace.cpp:1118

◆ make_rep_words()

ROW * tesseract::make_rep_words ( TO_ROW row,
TO_BLOCK block 
)

Definition at line 526 of file wordseg.cpp.

529 {
530 ROW *real_row; // output row
531 TBOX word_box; // bounding box
532 // iterator
533 WERD_IT word_it = &row->rep_words;
534
535 if (word_it.empty()) {
536 return nullptr;
537 }
538 word_box = word_it.data()->bounding_box();
539 for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) {
540 word_box += word_it.data()->bounding_box();
541 }
542 row->xheight = block->xheight;
543 real_row =
544 new ROW(row, static_cast<int16_t>(block->kern_size), static_cast<int16_t>(block->space_size));
545 word_it.set_to_list(real_row->word_list());
546 // put words in row
547 word_it.add_list_after(&row->rep_words);
548 real_row->recalc_bounding_box();
549 return real_row;
550}

◆ make_rows()

float tesseract::make_rows ( ICOORD  page_tr,
TO_BLOCK_LIST *  port_blocks 
)

Definition at line 229 of file makerow.cpp.

229 {
230 float port_m; // global skew
231 float port_err; // global noise
232 TO_BLOCK_IT block_it; // iterator
233
234 block_it.set_to_list(port_blocks);
235 for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
236 make_initial_textrows(page_tr, block_it.data(), FCOORD(1.0f, 0.0f), !textord_test_landscape);
237 }
238 // compute globally
239 compute_page_skew(port_blocks, port_m, port_err);
240 block_it.set_to_list(port_blocks);
241 for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
242 cleanup_rows_making(page_tr, block_it.data(), port_m, FCOORD(1.0f, 0.0f),
243 block_it.data()->block->pdblk.bounding_box().left(),
245 }
246 return port_m; // global skew
247}
void cleanup_rows_making(ICOORD page_tr, TO_BLOCK *block, float gradient, FCOORD rotation, int32_t block_edge, bool testing_on)
Definition: makerow.cpp:563
void make_initial_textrows(ICOORD page_tr, TO_BLOCK *block, FCOORD rotation, bool testing_on)
Definition: makerow.cpp:254
void compute_page_skew(TO_BLOCK_LIST *blocks, float &page_m, float &page_err)
Definition: makerow.cpp:315
bool textord_test_landscape
Definition: makerow.cpp:52

◆ make_single_row()

float tesseract::make_single_row ( ICOORD  page_tr,
bool  allow_sub_blobs,
TO_BLOCK block,
TO_BLOCK_LIST *  blocks 
)

Definition at line 190 of file makerow.cpp.

191 {
192 BLOBNBOX_IT blob_it = &block->blobs;
193 TO_ROW_IT row_it = block->get_rows();
194
195 // Include all the small blobs and large blobs.
196 blob_it.add_list_after(&block->small_blobs);
197 blob_it.add_list_after(&block->noise_blobs);
198 blob_it.add_list_after(&block->large_blobs);
199 if (block->blobs.singleton() && allow_sub_blobs) {
200 blob_it.move_to_first();
201 float size = MakeRowFromSubBlobs(block, blob_it.data()->cblob(), &row_it);
202 if (size > block->line_size) {
203 block->line_size = size;
204 }
205 } else if (block->blobs.empty()) {
206 // Make a fake blob.
207 C_BLOB *blob = C_BLOB::FakeBlob(block->block->pdblk.bounding_box());
208 // The blobnbox owns the blob.
209 auto *bblob = new BLOBNBOX(blob);
210 blob_it.add_after_then_move(bblob);
211 }
212 MakeRowFromBlobs(block->line_size, &blob_it, &row_it);
213 // Fit an LMS line to the rows.
214 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
215 fit_lms_line(row_it.data());
216 }
217 float gradient;
218 float fit_error;
219 // Compute the skew based on the fitted line.
220 compute_page_skew(blocks, gradient, fit_error);
221 return gradient;
222}

◆ make_single_word()

void tesseract::make_single_word ( bool  one_blob,
TO_ROW_LIST *  rows,
ROW_LIST *  real_rows 
)

Definition at line 53 of file wordseg.cpp.

53 {
54 TO_ROW_IT to_row_it(rows);
55 ROW_IT row_it(real_rows);
56 for (to_row_it.mark_cycle_pt(); !to_row_it.cycled_list(); to_row_it.forward()) {
57 TO_ROW *row = to_row_it.data();
58 // The blobs have to come out of the BLOBNBOX into the C_BLOB_LIST ready
59 // to create the word.
60 C_BLOB_LIST cblobs;
61 C_BLOB_IT cblob_it(&cblobs);
62 BLOBNBOX_IT box_it(row->blob_list());
63 for (; !box_it.empty(); box_it.forward()) {
64 BLOBNBOX *bblob = box_it.extract();
65 if (bblob->joined_to_prev() || (one_blob && !cblob_it.empty())) {
66 auto cblob = bblob->remove_cblob();
67 if (cblob != nullptr) {
68 C_OUTLINE_IT cout_it(cblob_it.data()->out_list());
69 cout_it.move_to_last();
70 cout_it.add_list_after(cblob->out_list());
71 delete cblob;
72 }
73 } else {
74 auto cblob = bblob->remove_cblob();
75 if (cblob != nullptr) {
76 cblob_it.add_after_then_move(cblob);
77 }
78 }
79 delete bblob;
80 }
81 // Convert the TO_ROW to a ROW.
82 ROW *real_row =
83 new ROW(row, static_cast<int16_t>(row->kern_size), static_cast<int16_t>(row->space_size));
84 WERD_IT word_it(real_row->word_list());
85 WERD *word = new WERD(&cblobs, 0, nullptr);
86 word->set_flag(W_BOL, true);
87 word->set_flag(W_EOL, true);
88 word->set_flag(W_DONT_CHOP, one_blob);
89 word_it.add_after_then_move(word);
90 row_it.add_after_then_move(real_row);
91 }
92}
C_BLOB * remove_cblob()
Definition: blobbox.h:280

◆ make_words()

void tesseract::make_words ( tesseract::Textord textord,
ICOORD  page_tr,
float  gradient,
BLOCK_LIST *  blocks,
TO_BLOCK_LIST *  port_blocks 
)

make_words

Arrange the blobs into words.

Definition at line 99 of file wordseg.cpp.

103 { // output list
104 TO_BLOCK_IT block_it; // iterator
105 TO_BLOCK *block; // current block
106
107 if (textord->use_cjk_fp_model()) {
108 compute_fixed_pitch_cjk(page_tr, port_blocks);
109 } else {
110 compute_fixed_pitch(page_tr, port_blocks, gradient, FCOORD(0.0f, -1.0f),
112 }
113 textord->to_spacing(page_tr, port_blocks);
114 block_it.set_to_list(port_blocks);
115 for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
116 block = block_it.data();
117 make_real_words(textord, block, FCOORD(1.0f, 0.0f));
118 }
119}
void compute_fixed_pitch(ICOORD page_tr, TO_BLOCK_LIST *port_blocks, float gradient, FCOORD rotation, bool testing_on)
Definition: topitch.cpp:75
void make_real_words(tesseract::Textord *textord, TO_BLOCK *block, FCOORD rotation)
Definition: wordseg.cpp:473
void compute_fixed_pitch_cjk(ICOORD page_tr, TO_BLOCK_LIST *port_blocks)
Definition: cjkpitch.cpp:1103
bool use_cjk_fp_model() const
Definition: textord.h:98
void to_spacing(ICOORD page_tr, TO_BLOCK_LIST *blocks)
Definition: tospace.cpp:45

◆ MakeAsciiRowInfos()

void tesseract::MakeAsciiRowInfos ( const TextAndModel row_infos,
int  n,
std::vector< RowInfo > *  output 
)

Definition at line 94 of file paragraphs_test.cc.

94 {
95 output->clear();
96 RowInfo info;
97 for (int i = 0; i < n; i++) {
98 AsciiToRowInfo(row_infos[i].ascii, i, &info);
99 output->push_back(info);
100 }
101}
void AsciiToRowInfo(const char *text, int row_number, RowInfo *info)

◆ MakeBoxFileStr()

TESS_API void tesseract::MakeBoxFileStr ( const char *  unichar_str,
const TBOX box,
int  page_num,
std::string &  box_str 
)

Definition at line 280 of file boxread.cpp.

280 {
281 box_str = unichar_str;
282 box_str += " " + std::to_string(box.left());
283 box_str += " " + std::to_string(box.bottom());
284 box_str += " " + std::to_string(box.right());
285 box_str += " " + std::to_string(box.top());
286 box_str += " " + std::to_string(page_num);
287}

◆ MakeClusterer()

TESS_API CLUSTERER * tesseract::MakeClusterer ( int16_t  SampleSize,
const PARAM_DESC  ParamDesc[] 
)

This routine creates a new clusterer data structure, initializes it, and returns a pointer to it.

Parameters
SampleSizenumber of dimensions in feature space
ParamDescdescription of each dimension
Returns
pointer to the new clusterer data structure

Definition at line 1440 of file cluster.cpp.

1440 {
1441 int i;
1442
1443 // allocate main clusterer data structure and init simple fields
1444 auto Clusterer = new CLUSTERER;
1445 Clusterer->SampleSize = SampleSize;
1446 Clusterer->NumberOfSamples = 0;
1447 Clusterer->NumChar = 0;
1448
1449 // init fields which will not be used initially
1450 Clusterer->Root = nullptr;
1451 Clusterer->ProtoList = NIL_LIST;
1452
1453 // maintain a copy of param descriptors in the clusterer data structure
1454 Clusterer->ParamDesc = new PARAM_DESC[SampleSize];
1455 for (i = 0; i < SampleSize; i++) {
1456 Clusterer->ParamDesc[i].Circular = ParamDesc[i].Circular;
1457 Clusterer->ParamDesc[i].NonEssential = ParamDesc[i].NonEssential;
1458 Clusterer->ParamDesc[i].Min = ParamDesc[i].Min;
1459 Clusterer->ParamDesc[i].Max = ParamDesc[i].Max;
1460 Clusterer->ParamDesc[i].Range = ParamDesc[i].Max - ParamDesc[i].Min;
1461 Clusterer->ParamDesc[i].HalfRange = Clusterer->ParamDesc[i].Range / 2;
1462 Clusterer->ParamDesc[i].MidRange = (ParamDesc[i].Max + ParamDesc[i].Min) / 2;
1463 }
1464
1465 // allocate a kd tree to hold the samples
1466 Clusterer->KDTree = MakeKDTree(SampleSize, ParamDesc);
1467
1468 // Initialize cache of histogram buckets to minimize recomputing them.
1469 for (auto &d : Clusterer->bucket_cache) {
1470 for (auto &c : d) {
1471 c = nullptr;
1472 }
1473 }
1474
1475 return Clusterer;
1476} // MakeClusterer
KDTREE * MakeKDTree(int16_t KeySize, const PARAM_DESC KeyDesc[])
Definition: kdtree.cpp:186
int16_t SampleSize
Definition: cluster.h:92

◆ MakeKDTree()

KDTREE * tesseract::MakeKDTree ( int16_t  KeySize,
const PARAM_DESC  KeyDesc[] 
)
Returns
a new KDTREE based on the specified parameters.
Parameters
KeySize# of dimensions in the K-D tree
KeyDescarray of params to describe key dimensions

Definition at line 186 of file kdtree.cpp.

186 {
187 auto *KDTree = new KDTREE(KeySize);
188 for (int i = 0; i < KeySize; i++) {
189 KDTree->KeyDesc[i].NonEssential = KeyDesc[i].NonEssential;
190 KDTree->KeyDesc[i].Circular = KeyDesc[i].Circular;
191 if (KeyDesc[i].Circular) {
192 KDTree->KeyDesc[i].Min = KeyDesc[i].Min;
193 KDTree->KeyDesc[i].Max = KeyDesc[i].Max;
194 KDTree->KeyDesc[i].Range = KeyDesc[i].Max - KeyDesc[i].Min;
195 KDTree->KeyDesc[i].HalfRange = KDTree->KeyDesc[i].Range / 2;
196 KDTree->KeyDesc[i].MidRange = (KeyDesc[i].Max + KeyDesc[i].Min) / 2;
197 } else {
198 KDTree->KeyDesc[i].Min = MINSEARCH;
199 KDTree->KeyDesc[i].Max = MAXSEARCH;
200 }
201 }
202 KDTree->Root.Left = nullptr;
203 KDTree->Root.Right = nullptr;
204 return KDTree;
205}
#define MAXSEARCH
Definition: kdtree.cpp:37
#define MINSEARCH
Definition: kdtree.cpp:36

◆ MakeSample()

TESS_API SAMPLE * tesseract::MakeSample ( CLUSTERER Clusterer,
const float *  Feature,
uint32_t  CharID 
)

This routine creates a new sample data structure to hold the specified feature. This sample is added to the clusterer data structure (so that it knows which samples are to be clustered later), and a pointer to the sample is returned to the caller.

Parameters
Clustererclusterer data structure to add sample to
Featurefeature to be added to clusterer
CharIDunique ident. of char that sample came from
Returns
Pointer to the new sample data structure

Definition at line 1491 of file cluster.cpp.

1491 {
1492 int i;
1493
1494 // see if the samples have already been clustered - if so trap an error
1495 // Can't add samples after they have been clustered.
1496 ASSERT_HOST(Clusterer->Root == nullptr);
1497
1498 // allocate the new sample and initialize it
1499 auto Sample = new SAMPLE(Clusterer->SampleSize);
1500 Sample->Clustered = false;
1501 Sample->Prototype = false;
1502 Sample->SampleCount = 1;
1503 Sample->Left = nullptr;
1504 Sample->Right = nullptr;
1505 Sample->CharID = CharID;
1506
1507 for (i = 0; i < Clusterer->SampleSize; i++) {
1508 Sample->Mean[i] = Feature[i];
1509 }
1510
1511 // add the sample to the KD tree - keep track of the total # of samples
1512 Clusterer->NumberOfSamples++;
1513 KDStore(Clusterer->KDTree, &Sample->Mean[0], Sample);
1514 if (CharID >= Clusterer->NumChar) {
1515 Clusterer->NumChar = CharID + 1;
1516 }
1517
1518 // execute hook for monitoring clustering operation
1519 // (*SampleCreationHook)(Sample);
1520
1521 return (Sample);
1522} // MakeSample
CLUSTER SAMPLE
Definition: cluster.h:51
uint32_t NumChar
Definition: cluster.h:98
int32_t NumberOfSamples
Definition: cluster.h:94

◆ MakeTempProtoPerm()

int tesseract::MakeTempProtoPerm ( void *  item1,
void *  item2 
)

This routine converts TempProto to be permanent if its proto id is used by the configuration specified in ProtoKey.

Parameters
item1(TEMP_PROTO) temporary proto to compare to key
item2(PROTO_KEY) defines which protos to make permanent

Globals: none

Returns
true if TempProto is converted, false otherwise

Definition at line 1896 of file adaptmatch.cpp.

1896 {
1897 auto TempProto = static_cast<TEMP_PROTO_STRUCT *>(item1);
1898 auto ProtoKey = static_cast<PROTO_KEY *>(item2);
1899
1900 auto Class = ProtoKey->Templates->Class[ProtoKey->ClassId];
1901 auto Config = TempConfigFor(Class, ProtoKey->ConfigId);
1902
1903 if (TempProto->ProtoId > Config->MaxProtoId || !test_bit(Config->Protos, TempProto->ProtoId)) {
1904 return false;
1905 }
1906
1907 MakeProtoPermanent(Class, TempProto->ProtoId);
1908 AddProtoToClassPruner(&(TempProto->Proto), ProtoKey->ClassId, ProtoKey->Templates->Templates);
1909 delete TempProto;
1910
1911 return true;
1912} /* MakeTempProtoPerm */
#define MakeProtoPermanent(Class, ProtoId)
Definition: adaptive.h:89
#define TempConfigFor(Class, ConfigId)
Definition: adaptive.h:91
void AddProtoToClassPruner(PROTO_STRUCT *Proto, CLASS_ID ClassId, INT_TEMPLATES_STRUCT *Templates)
Definition: intproto.cpp:306
ADAPT_TEMPLATES_STRUCT * Templates
Definition: adaptmatch.cpp:125

◆ MarginalMatch()

bool tesseract::MarginalMatch ( float  confidence,
float  matcher_great_threshold 
)
inline

Definition at line 142 of file adaptmatch.cpp.

142 {
143 return (1.0f - confidence) > matcher_great_threshold;
144}

◆ mark_outline()

void tesseract::mark_outline ( EDGEPT edgept)

Definition at line 83 of file plotedges.cpp.

83 { /* Start of point list */
84 auto window = edge_window;
85 float x = edgept->pos.x;
86 float y = edgept->pos.y;
87
88 window->Pen(ScrollView::RED);
89 window->SetCursor(x, y);
90
91 x -= 4;
92 y -= 12;
93 window->DrawTo(x, y);
94
95 x -= 2;
96 y += 4;
97 window->DrawTo(x, y);
98
99 x -= 4;
100 y += 2;
101 window->DrawTo(x, y);
102
103 x += 10;
104 y += 6;
105 window->DrawTo(x, y);
106
107 window->Update();
108}

◆ mark_repeated_chars()

void tesseract::mark_repeated_chars ( TO_ROW row)

Definition at line 2565 of file makerow.cpp.

2565 {
2566 BLOBNBOX_IT box_it(row->blob_list()); // Iterator.
2567 int num_repeated_sets = 0;
2568 if (!box_it.empty()) {
2569 do {
2570 BLOBNBOX *bblob = box_it.data();
2571 int repeat_length = 1;
2572 if (bblob->flow() == BTFT_LEADER && !bblob->joined_to_prev() && bblob->cblob() != nullptr) {
2573 BLOBNBOX_IT test_it(box_it);
2574 for (test_it.forward(); !test_it.at_first();) {
2575 bblob = test_it.data();
2576 if (bblob->flow() != BTFT_LEADER) {
2577 break;
2578 }
2579 test_it.forward();
2580 bblob = test_it.data();
2581 if (bblob->joined_to_prev() || bblob->cblob() == nullptr) {
2582 repeat_length = 0;
2583 break;
2584 }
2585 ++repeat_length;
2586 }
2587 }
2588 if (repeat_length >= kMinLeaderCount) {
2589 num_repeated_sets++;
2590 for (; repeat_length > 0; box_it.forward(), --repeat_length) {
2591 bblob = box_it.data();
2592 bblob->set_repeated_set(num_repeated_sets);
2593 }
2594 } else {
2595 bblob->set_repeated_set(0);
2596 box_it.forward();
2597 }
2598 } while (!box_it.at_first()); // until all done
2599 }
2600 row->set_num_repeated_sets(num_repeated_sets);
2601}
const int kMinLeaderCount
void set_repeated_set(int set_id)
Definition: blobbox.h:274
BlobTextFlowType flow() const
Definition: blobbox.h:310
void set_num_repeated_sets(int num_sets)
Definition: blobbox.h:646

◆ MarkDirectionChanges()

void tesseract::MarkDirectionChanges ( MFOUTLINE  Outline)

This routine searches through the specified outline and finds the points at which the outline changes direction. These points are then marked as "extremities". This routine is used as an alternative to FindExtremities(). It forces the endpoints of the microfeatures to be at the direction changes rather than at the midpoint between direction changes.

Parameters
Outlinemicro-feature outline to analyze

Definition at line 166 of file mfoutline.cpp.

166 {
167 MFOUTLINE Current;
168 MFOUTLINE Last;
169 MFOUTLINE First;
170
171 if (DegenerateOutline(Outline)) {
172 return;
173 }
174
175 First = NextDirectionChange(Outline);
176 Last = First;
177 do {
178 Current = NextDirectionChange(Last);
179 PointAt(Current)->MarkPoint();
180 Last = Current;
181 } while (Last != First);
182
183} /* MarkDirectionChanges */
MFOUTLINE NextDirectionChange(MFOUTLINE EdgePoint)
Definition: mfoutline.cpp:402

◆ Mean()

float tesseract::Mean ( PROTOTYPE Proto,
uint16_t  Dimension 
)

This routine returns the mean of the specified prototype in the indicated dimension.

Parameters
Protoprototype to return mean of
Dimensiondimension whose mean is to be returned
Returns
Mean of Prototype in Dimension

Definition at line 1662 of file cluster.cpp.

1662 {
1663 return (Proto->Mean[Dimension]);
1664} // Mean
std::vector< float > Mean
Definition: cluster.h:83

◆ median_block_xheight()

float tesseract::median_block_xheight ( TO_BLOCK block,
float  gradient 
)

◆ MedianOfCircularValues()

template<typename T >
T tesseract::MedianOfCircularValues ( modulus,
std::vector< T > &  v 
)

Definition at line 117 of file linlsq.h.

117 {
118 LLSQ stats;
119 T halfrange = static_cast<T>(modulus / 2);
120 auto num_elements = v.size();
121 for (auto i : v) {
122 stats.add(i, i + halfrange);
123 }
124 bool offset_needed = stats.y_variance() < stats.x_variance();
125 if (offset_needed) {
126 for (auto i : v) {
127 i += halfrange;
128 }
129 }
130 auto median_index = num_elements / 2;
131 std::nth_element(v.begin(), v.begin() + median_index, v.end());
132 if (offset_needed) {
133 for (auto i : v) {
134 i -= halfrange;
135 }
136 }
137 return v[median_index];
138}
void add(double x, double y)
Definition: linlsq.cpp:49
double x_variance() const
Definition: linlsq.h:83
double y_variance() const
Definition: linlsq.h:90

◆ merge_oldbl_parts()

void tesseract::merge_oldbl_parts ( TBOX  blobcoords[],
int  blobcount,
char  partids[],
int  partsizes[],
int  biggestpart,
float  jumplimit 
)

Definition at line 749 of file oldbasel.cpp.

756 {
757 bool found_one; // found a bestpart blob
758 bool close_one; // found was close enough
759 int blobindex; /*no along text line */
760 int prevpart; // previous iteration
761 int runlength; // no in this part
762 float diff; /*difference from line */
763 int startx; /*index of start blob */
764 int test_blob; // another index
765 FCOORD coord; // blob coordinate
766 float m, c; // fitted line
767 QLSQ stats; // line stuff
768
769 prevpart = biggestpart;
770 runlength = 0;
771 startx = 0;
772 for (blobindex = 0; blobindex < blobcount; blobindex++) {
773 if (partids[blobindex] != prevpart) {
774 // tprintf("Partition change at (%d,%d) from %d to %d
775 // after run of %d\n",
776 // blobcoords[blobindex].left(),blobcoords[blobindex].bottom(),
777 // prevpart,partids[blobindex],runlength);
778 if (prevpart != biggestpart && runlength > MAXBADRUN) {
779 stats.clear();
780 for (test_blob = startx; test_blob < blobindex; test_blob++) {
781 coord = FCOORD((blobcoords[test_blob].left() + blobcoords[test_blob].right()) / 2.0,
782 blobcoords[test_blob].bottom());
783 stats.add(coord.x(), coord.y());
784 }
785 stats.fit(1);
786 m = stats.get_b();
787 c = stats.get_c();
789 tprintf("Fitted line y=%g x + %g\n", m, c);
790 }
791 found_one = false;
792 close_one = false;
793 for (test_blob = 1;
794 !found_one && (startx - test_blob >= 0 || blobindex + test_blob <= blobcount);
795 test_blob++) {
796 if (startx - test_blob >= 0 && partids[startx - test_blob] == biggestpart) {
797 found_one = true;
798 coord = FCOORD(
799 (blobcoords[startx - test_blob].left() + blobcoords[startx - test_blob].right()) /
800 2.0,
801 blobcoords[startx - test_blob].bottom());
802 diff = m * coord.x() + c - coord.y();
804 tprintf("Diff of common blob to suspect part=%g at (%g,%g)\n", diff, coord.x(),
805 coord.y());
806 }
807 if (diff < jumplimit && -diff < jumplimit) {
808 close_one = true;
809 }
810 }
811 if (blobindex + test_blob <= blobcount &&
812 partids[blobindex + test_blob - 1] == biggestpart) {
813 found_one = true;
814 coord = FCOORD((blobcoords[blobindex + test_blob - 1].left() +
815 blobcoords[blobindex + test_blob - 1].right()) /
816 2.0,
817 blobcoords[blobindex + test_blob - 1].bottom());
818 diff = m * coord.x() + c - coord.y();
820 tprintf("Diff of common blob to suspect part=%g at (%g,%g)\n", diff, coord.x(),
821 coord.y());
822 }
823 if (diff < jumplimit && -diff < jumplimit) {
824 close_one = true;
825 }
826 }
827 }
828 if (close_one) {
830 tprintf(
831 "Merged %d blobs back into part %d from %d starting at "
832 "(%d,%d)\n",
833 runlength, biggestpart, prevpart, blobcoords[startx].left(),
834 blobcoords[startx].bottom());
835 }
836 // switch sides
837 partsizes[prevpart] -= runlength;
838 for (test_blob = startx; test_blob < blobindex; test_blob++) {
839 partids[test_blob] = biggestpart;
840 }
841 }
842 }
843 prevpart = partids[blobindex];
844 runlength = 1;
845 startx = blobindex;
846 } else {
847 runlength++;
848 }
849 }
850}
void fit(int degree)
Definition: quadlsq.cpp:100
void clear()
Definition: quadlsq.cpp:37
double get_b() const
Definition: quadlsq.h:48
double get_c() const
Definition: quadlsq.h:51
void add(double x, double y)
Definition: quadlsq.cpp:58

◆ MergeClusters()

TESS_API int32_t tesseract::MergeClusters ( int16_t  N,
PARAM_DESC  ParamDesc[],
int32_t  n1,
int32_t  n2,
float  m[],
float  m1[],
float  m2[] 
)

This routine merges two clusters into one larger cluster. To do this it computes the number of samples in the new cluster and the mean of the new cluster. The ParamDesc information is used to ensure that circular dimensions are handled correctly.

Parameters
N# of dimensions (size of arrays)
ParamDescarray of dimension descriptions
n1,n2number of samples in each old cluster
marray to hold mean of new cluster
m1,m2arrays containing means of old clusters
Returns
The number of samples in the new cluster.

Definition at line 1870 of file cluster.cpp.

1871 {
1872 int32_t i, n;
1873
1874 n = n1 + n2;
1875 for (i = N; i > 0; i--, ParamDesc++, m++, m1++, m2++) {
1876 if (ParamDesc->Circular) {
1877 // if distance between means is greater than allowed
1878 // reduce upper point by one "rotation" to compute mean
1879 // then normalize the mean back into the accepted range
1880 if ((*m2 - *m1) > ParamDesc->HalfRange) {
1881 *m = (n1 * *m1 + n2 * (*m2 - ParamDesc->Range)) / n;
1882 if (*m < ParamDesc->Min) {
1883 *m += ParamDesc->Range;
1884 }
1885 } else if ((*m1 - *m2) > ParamDesc->HalfRange) {
1886 *m = (n1 * (*m1 - ParamDesc->Range) + n2 * *m2) / n;
1887 if (*m < ParamDesc->Min) {
1888 *m += ParamDesc->Range;
1889 }
1890 } else {
1891 *m = (n1 * *m1 + n2 * *m2) / n;
1892 }
1893 } else {
1894 *m = (n1 * *m1 + n2 * *m2) / n;
1895 }
1896 }
1897 return n;
1898} // MergeClusters

◆ MergeInsignificantProtos()

TESS_COMMON_TRAINING_API void tesseract::MergeInsignificantProtos ( LIST  ProtoList,
const char *  label,
CLUSTERER Clusterer,
CLUSTERCONFIG clusterconfig 
)

Definition at line 466 of file commontraining.cpp.

467 {
468 PROTOTYPE *Prototype;
469 bool debug = strcmp(FLAGS_test_ch.c_str(), label) == 0;
470
471 LIST pProtoList = ProtoList;
472 iterate(pProtoList) {
473 Prototype = reinterpret_cast<PROTOTYPE *>(pProtoList->first_node());
474 if (Prototype->Significant || Prototype->Merged) {
475 continue;
476 }
477 float best_dist = 0.125;
478 PROTOTYPE *best_match = nullptr;
479 // Find the nearest alive prototype.
480 LIST list_it = ProtoList;
481 iterate(list_it) {
482 auto *test_p = reinterpret_cast<PROTOTYPE *>(list_it->first_node());
483 if (test_p != Prototype && !test_p->Merged) {
484 float dist = ComputeDistance(Clusterer->SampleSize, Clusterer->ParamDesc, &Prototype->Mean[0],
485 &test_p->Mean[0]);
486 if (dist < best_dist) {
487 best_match = test_p;
488 best_dist = dist;
489 }
490 }
491 }
492 if (best_match != nullptr && !best_match->Significant) {
493 if (debug) {
494 auto bestMatchNumSamples = best_match->NumSamples;
495 auto prototypeNumSamples = Prototype->NumSamples;
496 tprintf("Merging red clusters (%d+%d) at %g,%g and %g,%g\n", bestMatchNumSamples,
497 prototypeNumSamples, best_match->Mean[0], best_match->Mean[1], Prototype->Mean[0],
498 Prototype->Mean[1]);
499 }
500 best_match->NumSamples =
501 MergeClusters(Clusterer->SampleSize, Clusterer->ParamDesc, best_match->NumSamples,
502 Prototype->NumSamples, &best_match->Mean[0], &best_match->Mean[0], &Prototype->Mean[0]);
503 Prototype->NumSamples = 0;
504 Prototype->Merged = true;
505 } else if (best_match != nullptr) {
506 if (debug) {
507 tprintf("Red proto at %g,%g matched a green one at %g,%g\n", Prototype->Mean[0],
508 Prototype->Mean[1], best_match->Mean[0], best_match->Mean[1]);
509 }
510 Prototype->Merged = true;
511 }
512 }
513 // Mark significant those that now have enough samples.
514 int min_samples = static_cast<int32_t>(clusterconfig->MinSamples * Clusterer->NumChar);
515 pProtoList = ProtoList;
516 iterate(pProtoList) {
517 Prototype = reinterpret_cast<PROTOTYPE *>(pProtoList->first_node());
518 // Process insignificant protos that do not match a green one
519 if (!Prototype->Significant && Prototype->NumSamples >= min_samples && !Prototype->Merged) {
520 if (debug) {
521 tprintf("Red proto at %g,%g becoming green\n", Prototype->Mean[0], Prototype->Mean[1]);
522 }
523 Prototype->Significant = true;
524 }
525 }
526} /* MergeInsignificantProtos */
float ComputeDistance(int k, PARAM_DESC *dim, float p1[], float p2[])
Definition: kdtree.cpp:400
int32_t MergeClusters(int16_t N, PARAM_DESC ParamDesc[], int32_t n1, int32_t n2, float m[], float m1[], float m2[])
Definition: cluster.cpp:1870
unsigned NumSamples
Definition: cluster.h:80

◆ Modulo()

int tesseract::Modulo ( int  a,
int  b 
)
inline

Definition at line 153 of file helpers.h.

153 {
154 return (a % b + b) % b;
155}

◆ most_overlapping_row() [1/2]

OVERLAP_STATE tesseract::most_overlapping_row ( TO_ROW_IT *  row_it,
TO_ROW *&  best_row,
float  top,
float  bottom,
float  rowsize,
bool  testing_blob 
)

Definition at line 2451 of file makerow.cpp.

2458 {
2459 OVERLAP_STATE result; // result of tests
2460 float overlap; // of blob & row
2461 float bestover; // nearest row
2462 float merge_top, merge_bottom; // size of merged row
2463 ICOORD testpt; // testing only
2464 TO_ROW *row; // current row
2465 TO_ROW *test_row; // for multiple overlaps
2466 BLOBNBOX_IT blob_it; // for merging rows
2467
2468 result = ASSIGN;
2469 row = row_it->data();
2470 bestover = top - bottom;
2471 if (top > row->max_y()) {
2472 bestover -= top - row->max_y();
2473 }
2474 if (bottom < row->min_y()) {
2475 // compute overlap
2476 bestover -= row->min_y() - bottom;
2477 }
2478 if (testing_blob && textord_debug_blob) {
2479 tprintf("Test blob y=(%g,%g), row=(%f,%f), size=%g, overlap=%f\n", bottom, top, row->min_y(),
2480 row->max_y(), rowsize, bestover);
2481 }
2482 test_row = row;
2483 do {
2484 if (!row_it->at_last()) {
2485 row_it->forward();
2486 test_row = row_it->data();
2487 if (test_row->min_y() <= top && test_row->max_y() >= bottom) {
2488 merge_top = test_row->max_y() > row->max_y() ? test_row->max_y() : row->max_y();
2489 merge_bottom = test_row->min_y() < row->min_y() ? test_row->min_y() : row->min_y();
2490 if (merge_top - merge_bottom <= rowsize) {
2491 if (testing_blob && textord_debug_blob) {
2492 tprintf("Merging rows at (%g,%g), (%g,%g)\n", row->min_y(), row->max_y(),
2493 test_row->min_y(), test_row->max_y());
2494 }
2495 test_row->set_limits(merge_bottom, merge_top);
2496 blob_it.set_to_list(test_row->blob_list());
2497 blob_it.add_list_after(row->blob_list());
2498 blob_it.sort(blob_x_order);
2499 row_it->backward();
2500 delete row_it->extract();
2501 row_it->forward();
2502 bestover = -1.0f; // force replacement
2503 }
2504 overlap = top - bottom;
2505 if (top > test_row->max_y()) {
2506 overlap -= top - test_row->max_y();
2507 }
2508 if (bottom < test_row->min_y()) {
2509 overlap -= test_row->min_y() - bottom;
2510 }
2511 if (bestover >= rowsize - 1 && overlap >= rowsize - 1) {
2512 result = REJECT;
2513 }
2514 if (overlap > bestover) {
2515 bestover = overlap; // find biggest overlap
2516 row = test_row;
2517 }
2518 if (testing_blob && textord_debug_blob) {
2519 tprintf("Test blob y=(%g,%g), row=(%f,%f), size=%g, overlap=%f->%f\n", bottom, top,
2520 test_row->min_y(), test_row->max_y(), rowsize, overlap, bestover);
2521 }
2522 }
2523 }
2524 } while (!row_it->at_last() && test_row->min_y() <= top && test_row->max_y() >= bottom);
2525 while (row_it->data() != row) {
2526 row_it->backward(); // make it point to row
2527 }
2528 // doesn't overlap much
2529 if (top - bottom - bestover > rowsize * textord_overlap_x &&
2530 (!textord_fix_makerow_bug || bestover < rowsize * textord_overlap_x) && result == ASSIGN) {
2531 result = NEW_ROW; // doesn't overlap enough
2532 }
2533 best_row = row;
2534 return result;
2535}
bool textord_fix_makerow_bug
Definition: makerow.cpp:58

◆ most_overlapping_row() [2/2]

TO_ROW * tesseract::most_overlapping_row ( TO_ROW_LIST *  rows,
BLOBNBOX blob 
)

Definition at line 103 of file underlin.cpp.

106 {
107 int16_t x = (blob->bounding_box().left() + blob->bounding_box().right()) / 2;
108 TO_ROW_IT row_it = rows; // row iterator
109 TO_ROW *row; // current row
110 TO_ROW *best_row; // output row
111 float overlap; // of blob & row
112 float bestover; // best overlap
113
114 best_row = nullptr;
115 bestover = static_cast<float>(-INT32_MAX);
116 if (row_it.empty()) {
117 return nullptr;
118 }
119 row = row_it.data();
120 row_it.mark_cycle_pt();
121 while (row->baseline.y(x) + row->descdrop > blob->bounding_box().top() && !row_it.cycled_list()) {
122 best_row = row;
123 bestover = blob->bounding_box().top() - row->baseline.y(x) + row->descdrop;
124 row_it.forward();
125 row = row_it.data();
126 }
127 while (row->baseline.y(x) + row->xheight + row->ascrise >= blob->bounding_box().bottom() &&
128 !row_it.cycled_list()) {
129 overlap = row->baseline.y(x) + row->xheight + row->ascrise;
130 if (blob->bounding_box().top() < overlap) {
131 overlap = blob->bounding_box().top();
132 }
133 if (blob->bounding_box().bottom() > row->baseline.y(x) + row->descdrop) {
134 overlap -= blob->bounding_box().bottom();
135 } else {
136 overlap -= row->baseline.y(x) + row->descdrop;
137 }
138 if (overlap > bestover) {
139 bestover = overlap;
140 best_row = row;
141 }
142 row_it.forward();
143 row = row_it.data();
144 }
145 if (bestover < 0 &&
146 row->baseline.y(x) + row->xheight + row->ascrise - blob->bounding_box().bottom() > bestover) {
147 best_row = row;
148 }
149 return best_row;
150}

◆ MultiplyAccumulate()

void tesseract::MultiplyAccumulate ( int  n,
const TFloat u,
const TFloat v,
TFloat out 
)
inline

Definition at line 229 of file functions.h.

229 {
230 for (int i = 0; i < n; i++) {
231 out[i] += u[i] * v[i];
232 }
233}

◆ MultiplyVectorsInPlace()

void tesseract::MultiplyVectorsInPlace ( int  n,
const TFloat src,
TFloat inout 
)
inline

Definition at line 222 of file functions.h.

222 {
223 for (int i = 0; i < n; ++i) {
224 inout[i] *= src[i];
225 }
226}

◆ NearlyEqual()

template<class T >
bool tesseract::NearlyEqual ( x,
y,
tolerance 
)

Definition at line 51 of file host.h.

51 {
52 T diff = x - y;
53 return diff <= tolerance && -diff <= tolerance;
54}

◆ NewClass()

TESS_API CLASS_TYPE tesseract::NewClass ( int  NumProtos,
int  NumConfigs 
)

Definition at line 145 of file protos.cpp.

145 {
146 CLASS_TYPE Class;
147
148 Class = new CLASS_STRUCT;
149
150 Class->Prototypes.resize(NumProtos);
151 Class->Configurations.resize(NumConfigs);
152 Class->MaxNumProtos = NumProtos;
153 Class->MaxNumConfigs = NumConfigs;
154 Class->NumProtos = 0;
155 Class->NumConfigs = 0;
156 return (Class);
157}

◆ NewProgressTester()

void tesseract::NewProgressTester ( const char *  imgname,
const char *  tessdatadir,
const char *  lang 
)

Definition at line 116 of file progress_test.cc.

116 {
117 using ::testing::_;
118 using ::testing::AllOf;
121 using ::testing::Gt;
122 using ::testing::Le;
125
126 auto api = std::make_unique<tesseract::TessBaseAPI>();
127 ASSERT_FALSE(api->Init(tessdatadir, lang)) << "Could not initialize tesseract.";
128 Image image = pixRead(imgname);
129 ASSERT_TRUE(image != nullptr) << "Failed to read test image.";
130 api->SetImage(image);
131
132 NewMockProgressSink progressSink;
133
134 int currentProgress = -1;
135 EXPECT_CALL(progressSink, classicProgress(_)).Times(0);
136 EXPECT_CALL(progressSink, progress(AllOf(Gt<int &>(currentProgress), Le(100))))
137 .Times(AtLeast(5))
138 .WillRepeatedly(DoAll(SaveArg<0>(&currentProgress), Return(false)));
139 EXPECT_CALL(progressSink, cancel(_)).Times(AtLeast(5)).WillRepeatedly(Return(false));
140
141 EXPECT_EQ(api->Recognize(&progressSink.monitor), false);
142 EXPECT_GE(currentProgress, 50) << "The reported progress did not reach 50%";
143
144 api->End();
145 image.destroy();
146}

◆ NextDirectionChange()

MFOUTLINE tesseract::NextDirectionChange ( MFOUTLINE  EdgePoint)

This routine returns the next point in the micro-feature outline that has a direction different than EdgePoint. The routine assumes that the outline being searched is not a degenerate outline (i.e. it must have 2 or more edge points).

Parameters
EdgePointstart search from this point
Returns
Point of next direction change in micro-feature outline.
Note
Globals: none

Definition at line 402 of file mfoutline.cpp.

402 {
403 DIRECTION InitialDirection;
404
405 InitialDirection = PointAt(EdgePoint)->Direction;
406
407 MFOUTLINE next_pt = nullptr;
408 do {
409 EdgePoint = NextPointAfter(EdgePoint);
410 next_pt = NextPointAfter(EdgePoint);
411 } while (PointAt(EdgePoint)->Direction == InitialDirection && !PointAt(EdgePoint)->Hidden &&
412 next_pt != nullptr && !PointAt(next_pt)->Hidden);
413
414 return (EdgePoint);
415}

◆ NextExtremity()

MFOUTLINE tesseract::NextExtremity ( MFOUTLINE  EdgePoint)

This routine returns the next point in the micro-feature outline that is an extremity. The search starts after EdgePoint. The routine assumes that the outline being searched is not a degenerate outline (i.e. it must have 2 or more edge points).

Parameters
EdgePointstart search from this point
Returns
Next extremity in the outline after EdgePoint.
Note
Globals: none

Definition at line 196 of file mfoutline.cpp.

196 {
197 EdgePoint = NextPointAfter(EdgePoint);
198 while (!PointAt(EdgePoint)->ExtremityMark) {
199 EdgePoint = NextPointAfter(EdgePoint);
200 }
201
202 return (EdgePoint);
203
204} /* NextExtremity */

◆ NextSample()

CLUSTER * tesseract::NextSample ( LIST SearchState)

This routine is used to find all of the samples which belong to a cluster. It starts by removing the top cluster on the cluster list (SearchState). If this cluster is a leaf it is returned. Otherwise, the right subcluster is pushed on the list and we continue the search in the left subcluster. This continues until a leaf is found. If all samples have been found, nullptr is returned. InitSampleSearch() must be called before NextSample() to initialize the search.

Parameters
SearchStateptr to list containing clusters to be searched
Returns
Pointer to the next leaf cluster (sample) or nullptr.

Definition at line 1638 of file cluster.cpp.

1638 {
1640
1641 if (*SearchState == NIL_LIST) {
1642 return (nullptr);
1643 }
1644 Cluster = reinterpret_cast<CLUSTER *>((*SearchState)->first_node());
1645 *SearchState = pop(*SearchState);
1646 for (;;) {
1647 if (Cluster->Left == nullptr) {
1648 return (Cluster);
1649 }
1650 *SearchState = push(*SearchState, Cluster->Right);
1651 Cluster = Cluster->Left;
1652 }
1653} // NextSample
CLUSTER * Right
Definition: cluster.h:47
CLUSTER * Left
Definition: cluster.h:46

◆ NO_LIST()

constexpr ERRCODE tesseract::NO_LIST ( "Iterator not set to a list"  )
constexpr

◆ non_0_digit()

bool tesseract::non_0_digit ( const char *  str,
int  length 
)

◆ Normalize()

void tesseract::Normalize ( float *  Values)

Definition at line 691 of file commontraining.cpp.

691 {
692 float Slope;
693 float Intercept;
694 float Normalizer;
695
696 Slope = tan(Values[2] * 2 * M_PI);
697 Intercept = Values[1] - Slope * Values[0];
698 Normalizer = 1 / sqrt(Slope * Slope + 1.0);
699
700 Values[0] = Slope * Normalizer;
701 Values[1] = -Normalizer;
702 Values[2] = Intercept * Normalizer;
703} // Normalize
internal::ValueArray< T... > Values(T... v)

◆ NormalizeCleanAndSegmentUTF8()

TESS_UNICHARSET_TRAINING_API bool tesseract::NormalizeCleanAndSegmentUTF8 ( UnicodeNormMode  u_mode,
OCRNorm  ocr_normalize,
GraphemeNormMode  g_mode,
bool  report_errors,
const char *  str8,
std::vector< std::string > *  graphemes 
)

Definition at line 179 of file normstrngs.cpp.

181 {
182 std::vector<char32> normed32;
183 NormalizeUTF8ToUTF32(u_mode, ocr_normalize, str8, &normed32);
184 StripJoiners(&normed32);
185 std::vector<std::vector<char32>> graphemes32;
186 bool success = Validator::ValidateCleanAndSegment(g_mode, report_errors, normed32, &graphemes32);
187 if (g_mode != GraphemeNormMode::kSingleString && success) {
188 // If we modified the string to clean it up, the segmentation may not be
189 // correct, so check for changes and do it again.
190 std::vector<char32> cleaned32;
191 for (const auto &g : graphemes32) {
192 cleaned32.insert(cleaned32.end(), g.begin(), g.end());
193 }
194 if (cleaned32 != normed32) {
195 graphemes32.clear();
196 success = Validator::ValidateCleanAndSegment(g_mode, report_errors, cleaned32, &graphemes32);
197 }
198 }
199 graphemes->clear();
200 graphemes->reserve(graphemes32.size());
201 for (const auto &grapheme : graphemes32) {
202 graphemes->push_back(UNICHAR::UTF32ToUTF8(grapheme));
203 }
204 return success;
205}

◆ NormalizeOutline()

void tesseract::NormalizeOutline ( MFOUTLINE  Outline,
float  XOrigin 
)

This routine normalizes the coordinates of the specified outline so that the outline is deskewed down to the baseline, translated so that x=0 is at XOrigin, and scaled so that the height of a character cell from descender to ascender is 1. Of this height, 0.25 is for the descender, 0.25 for the ascender, and 0.5 for the x-height. The y coordinate of the baseline is 0.

Parameters
Outlineoutline to be normalized
XOriginx-origin of text

Definition at line 218 of file mfoutline.cpp.

218 {
219 if (Outline == NIL_LIST) {
220 return;
221 }
222
223 MFOUTLINE EdgePoint = Outline;
224 do {
225 MFEDGEPT *Current = PointAt(EdgePoint);
226 Current->Point.y = MF_SCALE_FACTOR * (Current->Point.y - kBlnBaselineOffset);
227 Current->Point.x = MF_SCALE_FACTOR * (Current->Point.x - XOrigin);
228 EdgePoint = NextPointAfter(EdgePoint);
229 } while (EdgePoint != Outline);
230} /* NormalizeOutline */

◆ NormalizeOutlineX()

void tesseract::NormalizeOutlineX ( FEATURE_SET  FeatureSet)

This routine computes the weighted average x position over all of the outline-features in FeatureSet and then renormalizes the outline-features to force this average to be the x origin (i.e. x=0). FeatureSet is changed.

Parameters
FeatureSetoutline-features to be normalized

Definition at line 134 of file outfeat.cpp.

134 {
135 int i;
136 FEATURE Feature;
137 float Length;
138 float TotalX = 0.0;
139 float TotalWeight = 0.0;
140 float Origin;
141
142 if (FeatureSet->NumFeatures <= 0) {
143 return;
144 }
145
146 for (i = 0; i < FeatureSet->NumFeatures; i++) {
147 Feature = FeatureSet->Features[i];
148 Length = Feature->Params[OutlineFeatLength];
149 TotalX += Feature->Params[OutlineFeatX] * Length;
150 TotalWeight += Length;
151 }
152 Origin = TotalX / TotalWeight;
153
154 for (i = 0; i < FeatureSet->NumFeatures; i++) {
155 Feature = FeatureSet->Features[i];
156 Feature->Params[OutlineFeatX] -= Origin;
157 }
158} /* NormalizeOutlineX */

◆ NormalizePicoX()

void tesseract::NormalizePicoX ( FEATURE_SET  FeatureSet)

This routine computes the average x position over all of the pico-features in FeatureSet and then renormalizes the pico-features to force this average to be the x origin (i.e. x=0). FeatureSet is changed.

Parameters
FeatureSetpico-features to be normalized

Definition at line 181 of file picofeat.cpp.

181 {
182 int i;
183 FEATURE Feature;
184 float Origin = 0.0;
185
186 for (i = 0; i < FeatureSet->NumFeatures; i++) {
187 Feature = FeatureSet->Features[i];
188 Origin += Feature->Params[PicoFeatX];
189 }
190 Origin /= FeatureSet->NumFeatures;
191
192 for (i = 0; i < FeatureSet->NumFeatures; i++) {
193 Feature = FeatureSet->Features[i];
194 Feature->Params[PicoFeatX] -= Origin;
195 }
196} /* NormalizePicoX */

◆ NormalizeUTF8String()

TESS_UNICHARSET_TRAINING_API bool tesseract::NormalizeUTF8String ( UnicodeNormMode  u_mode,
OCRNorm  ocr_normalize,
GraphemeNorm  grapheme_normalize,
const char *  str8,
std::string *  normalized 
)

Definition at line 152 of file normstrngs.cpp.

154 {
155 std::vector<char32> normed32;
156 NormalizeUTF8ToUTF32(u_mode, ocr_normalize, str8, &normed32);
157 if (grapheme_normalize == GraphemeNorm::kNormalize) {
158 StripJoiners(&normed32);
159 std::vector<std::vector<char32>> graphemes;
160 bool success = Validator::ValidateCleanAndSegment(GraphemeNormMode::kSingleString, false,
161 normed32, &graphemes);
162 if (graphemes.empty() || graphemes[0].empty()) {
163 success = false;
164 } else if (normalized != nullptr) {
165 *normalized = UNICHAR::UTF32ToUTF8(graphemes[0]);
166 }
167 return success;
168 }
169 if (normalized != nullptr) {
170 *normalized = UNICHAR::UTF32ToUTF8(normed32);
171 }
172 return true;
173}

◆ NULL_CURRENT()

constexpr ERRCODE tesseract::NULL_CURRENT ( "List current position is nullptr"  )
constexpr

◆ NULL_DATA()

constexpr ERRCODE tesseract::NULL_DATA ( "List would have returned a nullptr data pointer"  )
constexpr

◆ NULL_NEXT()

constexpr ERRCODE tesseract::NULL_NEXT ( "Next element on the list is nullptr"  )
constexpr

◆ NULL_PREV()

constexpr ERRCODE tesseract::NULL_PREV ( "Previous element on the list is nullptr"  )
constexpr

◆ NumberOfProtos()

TESS_COMMON_TRAINING_API int tesseract::NumberOfProtos ( LIST  ProtoList,
bool  CountSigProtos,
bool  CountInsigProtos 
)

Definition at line 732 of file commontraining.cpp.

732 {
733 int N = 0;
734 iterate(ProtoList) {
735 auto *Proto = reinterpret_cast<PROTOTYPE *>(ProtoList->first_node());
736 if ((Proto->Significant && CountSigProtos) || (!Proto->Significant && CountInsigProtos)) {
737 N++;
738 }
739 }
740 return (N);
741}

◆ OCRNormalize()

char32 tesseract::OCRNormalize ( char32  ch)

Definition at line 208 of file normstrngs.cpp.

208 {
209 if (is_hyphen_punc(ch)) {
210 return '-';
211 } else if (is_single_quote(ch)) {
212 return '\'';
213 } else if (is_double_quote(ch)) {
214 return '"';
215 }
216 return ch;
217}

◆ OCRTester()

void tesseract::OCRTester ( const char *  imgname,
const char *  groundtruth,
const char *  tessdatadir,
const char *  lang 
)

Definition at line 60 of file apiexample_test.cc.

61 {
62 // log.info() << tessdatadir << " for language: " << lang << std::endl;
63 char *outText;
64 std::locale loc("C"); // You can also use "" for the default system locale
65 std::ifstream file(groundtruth);
66 file.imbue(loc); // Use it for file input
67 std::string gtText((std::istreambuf_iterator<char>(file)), std::istreambuf_iterator<char>());
68 auto api = std::make_unique<tesseract::TessBaseAPI>();
69 ASSERT_FALSE(api->Init(tessdatadir, lang)) << "Could not initialize tesseract.";
70 Image image = pixRead(imgname);
71 ASSERT_TRUE(image != nullptr) << "Failed to read test image.";
72 api->SetImage(image);
73 outText = api->GetUTF8Text();
74 EXPECT_EQ(gtText, outText) << "Phototest.tif OCR does not match ground truth for "
76 api->End();
77 api->ClearPersistentCache();
78 delete[] outText;
79 image.destroy();
80}
::std::string PrintToString(const T &value)

◆ old_first_xheight()

void tesseract::old_first_xheight ( TO_ROW row,
TBOX  blobcoords[],
int  initialheight,
int  blobcount,
QSPLINE baseline,
float  jumplimit 
)

Definition at line 1340 of file oldbasel.cpp.

1347 {
1348 int blobindex; /*current blob */
1349 /*height statistics */
1350 STATS heightstat(0, MAXHEIGHT - 1);
1351 int height; /*height of blob */
1352 int xcentre; /*centre of blob */
1353 int lineheight; /*approx xheight */
1354 float ascenders; /*ascender sum */
1355 int asccount; /*no of ascenders */
1356 float xsum; /*xheight sum */
1357 int xcount; /*xheight count */
1358 float diff; /*height difference */
1359
1360 if (blobcount > 1) {
1361 for (blobindex = 0; blobindex < blobcount; blobindex++) {
1362 xcentre = (blobcoords[blobindex].left() + blobcoords[blobindex].right()) / 2;
1363 /*height of blob */
1364 height = static_cast<int>(blobcoords[blobindex].top() - baseline->y(xcentre) + 0.5);
1365 if (height > initialheight * oldbl_xhfract && height > textord_min_xheight) {
1366 heightstat.add(height, 1);
1367 }
1368 }
1369 if (heightstat.get_total() > 3) {
1370 lineheight = static_cast<int>(heightstat.ile(0.25));
1371 if (lineheight <= 0) {
1372 lineheight = static_cast<int>(heightstat.ile(0.5));
1373 }
1374 } else {
1375 lineheight = initialheight;
1376 }
1377 } else {
1378 lineheight =
1379 static_cast<int>(blobcoords[0].top() -
1380 baseline->y((blobcoords[0].left() + blobcoords[0].right()) / 2) + 0.5);
1381 }
1382
1383 xsum = 0.0f;
1384 xcount = 0;
1385 for (ascenders = 0.0f, asccount = 0, blobindex = 0; blobindex < blobcount; blobindex++) {
1386 xcentre = (blobcoords[blobindex].left() + blobcoords[blobindex].right()) / 2;
1387 diff = blobcoords[blobindex].top() - baseline->y(xcentre);
1388 /*is it ascender */
1389 if (diff > lineheight + jumplimit) {
1390 ascenders += diff;
1391 asccount++; /*count ascenders */
1392 } else if (diff > lineheight - jumplimit) {
1393 xsum += diff; /*mean xheight */
1394 xcount++;
1395 }
1396 }
1397 if (xcount > 0) {
1398 xsum /= xcount; /*average xheight */
1399 } else {
1400 xsum = static_cast<float>(lineheight); /*guess it */
1401 }
1402 row->xheight *= xsum;
1403 if (asccount > 0) {
1404 row->ascrise = ascenders / asccount - xsum;
1405 } else {
1406 row->ascrise = 0.0f; /*had none */
1407 }
1408 if (row->xheight == 0) {
1409 row->xheight = -1.0f;
1410 }
1411}

◆ OpenBoxFile()

TESS_API FILE * tesseract::OpenBoxFile ( const char *  fname)

Definition at line 59 of file boxread.cpp.

59 {
60 std::string filename = BoxFileName(fname);
61 FILE *box_file = nullptr;
62 if (!(box_file = fopen(filename.c_str(), "rb"))) {
63 CANTOPENFILE.error("read_next_box", TESSEXIT, "Can't open box file %s", filename.c_str());
64 tprintf("Can't open box file %s", filename.c_str());
65 }
66 return box_file;
67}
constexpr ERRCODE CANTOPENFILE("Can't open file")
void error(const char *caller, TessErrorLogCode action, const char *format,...) const __attribute__((format(gnu_printf
Definition: errcode.cpp:40

◆ operator!() [1/2]

FCOORD tesseract::operator! ( const FCOORD src)
inline

Definition at line 524 of file points.h.

526 {
527 FCOORD result; // output
528
529 result.xcoord = -src.ycoord;
530 result.ycoord = src.xcoord;
531 return result;
532}

◆ operator!() [2/2]

ICOORD tesseract::operator! ( const ICOORD src)
inline

Definition at line 324 of file points.h.

326 {
327 ICOORD result; // output
328
329 result.xcoord = -src.ycoord;
330 result.ycoord = src.xcoord;
331 return result;
332}
TDimension ycoord
y value
Definition: points.h:160
TDimension xcoord
x value
Definition: points.h:159

◆ operator%() [1/2]

float tesseract::operator% ( const FCOORD op1,
const FCOORD op2 
)
inline

Definition at line 616 of file points.h.

618 {
619 return op1.xcoord * op2.xcoord + op1.ycoord * op2.ycoord;
620}

◆ operator%() [2/2]

int32_t tesseract::operator% ( const ICOORD op1,
const ICOORD op2 
)
inline

Definition at line 416 of file points.h.

418 {
419 return op1.xcoord * op2.xcoord + op1.ycoord * op2.ycoord;
420}

◆ operator&=()

TBOX & tesseract::operator&= ( TBOX op1,
const TBOX op2 
)

Definition at line 242 of file rect.cpp.

242 {
243 if (op1.overlap(op2)) {
244 if (op2.bot_left.x() > op1.bot_left.x()) {
245 op1.bot_left.set_x(op2.bot_left.x());
246 }
247
248 if (op2.top_right.x() < op1.top_right.x()) {
249 op1.top_right.set_x(op2.top_right.x());
250 }
251
252 if (op2.bot_left.y() > op1.bot_left.y()) {
253 op1.bot_left.set_y(op2.bot_left.y());
254 }
255
256 if (op2.top_right.y() < op1.top_right.y()) {
257 op1.top_right.set_y(op2.top_right.y());
258 }
259 } else {
260 op1.bot_left.set_x(INT16_MAX);
261 op1.bot_left.set_y(INT16_MAX);
262 op1.top_right.set_x(-INT16_MAX);
263 op1.top_right.set_y(-INT16_MAX);
264 }
265 return op1;
266}

◆ operator*() [1/6]

float tesseract::operator* ( const FCOORD op1,
const FCOORD op2 
)
inline

Definition at line 628 of file points.h.

630 {
631 return op1.xcoord * op2.ycoord - op1.ycoord * op2.xcoord;
632}

◆ operator*() [2/6]

FCOORD tesseract::operator* ( const FCOORD op1,
float  scale 
)
inline

Definition at line 640 of file points.h.

642 {
643 FCOORD result; // output
644
645 result.xcoord = op1.xcoord * scale;
646 result.ycoord = op1.ycoord * scale;
647 return result;
648}

◆ operator*() [3/6]

int32_t tesseract::operator* ( const ICOORD op1,
const ICOORD op2 
)
inline

Definition at line 428 of file points.h.

430 {
431 return op1.xcoord * op2.ycoord - op1.ycoord * op2.xcoord;
432}

◆ operator*() [4/6]

ICOORD tesseract::operator* ( const ICOORD op1,
TDimension  scale 
)
inline

Definition at line 440 of file points.h.

442 {
443 ICOORD result; // output
444
445 result.xcoord = op1.xcoord * scale;
446 result.ycoord = op1.ycoord * scale;
447 return result;
448}

◆ operator*() [5/6]

FCOORD tesseract::operator* ( float  scale,
const FCOORD op1 
)
inline

Definition at line 650 of file points.h.

653 {
654 FCOORD result; // output
655
656 result.xcoord = op1.xcoord * scale;
657 result.ycoord = op1.ycoord * scale;
658 return result;
659}

◆ operator*() [6/6]

ICOORD tesseract::operator* ( TDimension  scale,
const ICOORD op1 
)
inline

Definition at line 450 of file points.h.

453 {
454 ICOORD result; // output
455
456 result.xcoord = op1.xcoord * scale;
457 result.ycoord = op1.ycoord * scale;
458 return result;
459}

◆ operator*=() [1/2]

FCOORD & tesseract::operator*= ( FCOORD op1,
float  scale 
)
inline

Definition at line 667 of file points.h.

669 {
670 op1.xcoord *= scale;
671 op1.ycoord *= scale;
672 return op1;
673}

◆ operator*=() [2/2]

ICOORD & tesseract::operator*= ( ICOORD op1,
TDimension  scale 
)
inline

Definition at line 467 of file points.h.

469 {
470 op1.xcoord *= scale;
471 op1.ycoord *= scale;
472 return op1;
473}

◆ operator+() [1/2]

FCOORD tesseract::operator+ ( const FCOORD op1,
const FCOORD op2 
)
inline

Definition at line 556 of file points.h.

558 {
559 FCOORD sum; // result
560
561 sum.xcoord = op1.xcoord + op2.xcoord;
562 sum.ycoord = op1.ycoord + op2.ycoord;
563 return sum;
564}

◆ operator+() [2/2]

ICOORD tesseract::operator+ ( const ICOORD op1,
const ICOORD op2 
)
inline

Definition at line 356 of file points.h.

358 {
359 ICOORD sum; // result
360
361 sum.xcoord = op1.xcoord + op2.xcoord;
362 sum.ycoord = op1.ycoord + op2.ycoord;
363 return sum;
364}

◆ operator+=() [1/3]

FCOORD & tesseract::operator+= ( FCOORD op1,
const FCOORD op2 
)
inline

Definition at line 572 of file points.h.

574 {
575 op1.xcoord += op2.xcoord;
576 op1.ycoord += op2.ycoord;
577 return op1;
578}

◆ operator+=() [2/3]

ICOORD & tesseract::operator+= ( ICOORD op1,
const ICOORD op2 
)
inline

Definition at line 372 of file points.h.

374 {
375 op1.xcoord += op2.xcoord;
376 op1.ycoord += op2.ycoord;
377 return op1;
378}

◆ operator+=() [3/3]

TBOX & tesseract::operator+= ( TBOX op1,
const TBOX op2 
)

Definition at line 214 of file rect.cpp.

216 {
217 if (op2.bot_left.x() < op1.bot_left.x()) {
218 op1.bot_left.set_x(op2.bot_left.x());
219 }
220
221 if (op2.top_right.x() > op1.top_right.x()) {
222 op1.top_right.set_x(op2.top_right.x());
223 }
224
225 if (op2.bot_left.y() < op1.bot_left.y()) {
226 op1.bot_left.set_y(op2.bot_left.y());
227 }
228
229 if (op2.top_right.y() > op1.top_right.y()) {
230 op1.top_right.set_y(op2.top_right.y());
231 }
232
233 return op1;
234}

◆ operator-() [1/4]

FCOORD tesseract::operator- ( const FCOORD op1,
const FCOORD op2 
)
inline

Definition at line 586 of file points.h.

588 {
589 FCOORD sum; // result
590
591 sum.xcoord = op1.xcoord - op2.xcoord;
592 sum.ycoord = op1.ycoord - op2.ycoord;
593 return sum;
594}

◆ operator-() [2/4]

FCOORD tesseract::operator- ( const FCOORD src)
inline

Definition at line 540 of file points.h.

542 {
543 FCOORD result; // output
544
545 result.xcoord = -src.xcoord;
546 result.ycoord = -src.ycoord;
547 return result;
548}

◆ operator-() [3/4]

ICOORD tesseract::operator- ( const ICOORD op1,
const ICOORD op2 
)
inline

Definition at line 386 of file points.h.

388 {
389 ICOORD sum; // result
390
391 sum.xcoord = op1.xcoord - op2.xcoord;
392 sum.ycoord = op1.ycoord - op2.ycoord;
393 return sum;
394}

◆ operator-() [4/4]

ICOORD tesseract::operator- ( const ICOORD src)
inline

Definition at line 340 of file points.h.

342 {
343 ICOORD result; // output
344
345 result.xcoord = -src.xcoord;
346 result.ycoord = -src.ycoord;
347 return result;
348}

◆ operator-=() [1/2]

FCOORD & tesseract::operator-= ( FCOORD op1,
const FCOORD op2 
)
inline

Definition at line 602 of file points.h.

604 {
605 op1.xcoord -= op2.xcoord;
606 op1.ycoord -= op2.ycoord;
607 return op1;
608}

◆ operator-=() [2/2]

ICOORD & tesseract::operator-= ( ICOORD op1,
const ICOORD op2 
)
inline

Definition at line 402 of file points.h.

404 {
405 op1.xcoord -= op2.xcoord;
406 op1.ycoord -= op2.ycoord;
407 return op1;
408}

◆ operator/() [1/2]

FCOORD tesseract::operator/ ( const FCOORD op1,
float  scale 
)
inline

Definition at line 681 of file points.h.

683 {
684 FCOORD result; // output
685 ASSERT_HOST(scale != 0.0f);
686 result.xcoord = op1.xcoord / scale;
687 result.ycoord = op1.ycoord / scale;
688 return result;
689}

◆ operator/() [2/2]

ICOORD tesseract::operator/ ( const ICOORD op1,
TDimension  scale 
)
inline

Definition at line 481 of file points.h.

483 {
484 ICOORD result; // output
485
486 result.xcoord = op1.xcoord / scale;
487 result.ycoord = op1.ycoord / scale;
488 return result;
489}

◆ operator/=() [1/2]

FCOORD & tesseract::operator/= ( FCOORD op1,
float  scale 
)
inline

Definition at line 697 of file points.h.

699 {
700 ASSERT_HOST(scale != 0.0f);
701 op1.xcoord /= scale;
702 op1.ycoord /= scale;
703 return op1;
704}

◆ operator/=() [2/2]

ICOORD & tesseract::operator/= ( ICOORD op1,
TDimension  scale 
)
inline

Definition at line 497 of file points.h.

499 {
500 op1.xcoord /= scale;
501 op1.ycoord /= scale;
502 return op1;
503}

◆ orientation_and_script_detection()

int tesseract::orientation_and_script_detection ( const char *  filename,
OSResults osr,
tesseract::Tesseract tess 
)

Definition at line 188 of file osdetect.cpp.

189 {
190 std::string name = filename; // truncated name
191
192 const char *lastdot = strrchr(name.c_str(), '.');
193 if (lastdot != nullptr) {
194 name[lastdot - name.c_str()] = '\0';
195 }
196
197 ASSERT_HOST(tess->pix_binary() != nullptr);
198 int width = pixGetWidth(tess->pix_binary());
199 int height = pixGetHeight(tess->pix_binary());
200
201 BLOCK_LIST blocks;
202 if (!read_unlv_file(name, width, height, &blocks)) {
203 FullPageBlock(width, height, &blocks);
204 }
205
206 // Try to remove non-text regions from consideration.
207 TO_BLOCK_LIST land_blocks, port_blocks;
208 remove_nontext_regions(tess, &blocks, &port_blocks);
209
210 if (port_blocks.empty()) {
211 // page segmentation did not succeed, so we need to find_components first.
212 tess->mutable_textord()->find_components(tess->pix_binary(), &blocks, &port_blocks);
213 } else {
214 TBOX page_box(0, 0, width, height);
215 // Filter_blobs sets up the TO_BLOCKs the same as find_components does.
216 tess->mutable_textord()->filter_blobs(page_box.topright(), &port_blocks, true);
217 }
218
219 return os_detect(&port_blocks, osr, tess);
220}
bool read_unlv_file(std::string &name, int32_t xsize, int32_t ysize, BLOCK_LIST *blocks)
Definition: blread.cpp:36
int os_detect(TO_BLOCK_LIST *port_blocks, OSResults *osr, tesseract::Tesseract *tess)
Definition: osdetect.cpp:225
void FullPageBlock(int width, int height, BLOCK_LIST *blocks)
Definition: blread.cpp:68
Textord * mutable_textord()
Image pix_binary() const
void filter_blobs(ICOORD page_tr, TO_BLOCK_LIST *blocks, bool testing_on)
Definition: tordmain.cpp:238
void find_components(Image pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks)
Definition: tordmain.cpp:211

◆ OrientationIdToValue()

int tesseract::OrientationIdToValue ( const int &  id)

Definition at line 566 of file osdetect.cpp.

566 {
567 switch (id) {
568 case 0:
569 return 0;
570 case 1:
571 return 270;
572 case 2:
573 return 180;
574 case 3:
575 return 90;
576 default:
577 return -1;
578 }
579}

◆ os_detect()

int tesseract::os_detect ( TO_BLOCK_LIST *  port_blocks,
OSResults osr,
tesseract::Tesseract tess 
)

Definition at line 225 of file osdetect.cpp.

225 {
226 int blobs_total = 0;
227 TO_BLOCK_IT block_it;
228 block_it.set_to_list(port_blocks);
229
230 BLOBNBOX_CLIST filtered_list;
231 BLOBNBOX_C_IT filtered_it(&filtered_list);
232
233 for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
234 TO_BLOCK *to_block = block_it.data();
235 if (to_block->block->pdblk.poly_block() && !to_block->block->pdblk.poly_block()->IsText()) {
236 continue;
237 }
238 BLOBNBOX_IT bbox_it;
239 bbox_it.set_to_list(&to_block->blobs);
240 for (bbox_it.mark_cycle_pt(); !bbox_it.cycled_list(); bbox_it.forward()) {
241 BLOBNBOX *bbox = bbox_it.data();
242 C_BLOB *blob = bbox->cblob();
243 TBOX box = blob->bounding_box();
244 ++blobs_total;
245
246 // Catch illegal value of box width and avoid division by zero.
247 if (box.width() == 0) {
248 continue;
249 }
250 // TODO: Can height and width be negative? If not, remove fabs.
251 float y_x = std::fabs((box.height() * 1.0f) / box.width());
252 float x_y = 1.0f / y_x;
253 // Select a >= 1.0 ratio
254 float ratio = x_y > y_x ? x_y : y_x;
255 // Blob is ambiguous
256 if (ratio > kSizeRatioToReject) {
257 continue;
258 }
259 if (box.height() < kMinAcceptableBlobHeight) {
260 continue;
261 }
262 filtered_it.add_to_end(bbox);
263 }
264 }
265 return os_detect_blobs(nullptr, &filtered_list, osr, tess);
266}
const int kMinAcceptableBlobHeight
Definition: osdetect.cpp:42
int os_detect_blobs(const std::vector< int > *allowed_scripts, BLOBNBOX_CLIST *blob_list, OSResults *osr, tesseract::Tesseract *tess)
Definition: osdetect.cpp:274
const float kSizeRatioToReject
Definition: osdetect.cpp:41

◆ os_detect_blob()

bool tesseract::os_detect_blob ( BLOBNBOX bbox,
OrientationDetector o,
ScriptDetector s,
OSResults osr,
tesseract::Tesseract tess 
)

Definition at line 323 of file osdetect.cpp.

324 {
325 tess->tess_cn_matching.set_value(true); // turn it on
326 tess->tess_bn_matching.set_value(false);
327 C_BLOB *blob = bbox->cblob();
328 TBLOB *tblob = TBLOB::PolygonalCopy(tess->poly_allow_detailed_fx, blob);
329 TBOX box = tblob->bounding_box();
330 FCOORD current_rotation(1.0f, 0.0f);
331 FCOORD rotation90(0.0f, 1.0f);
332 BLOB_CHOICE_LIST ratings[4];
333 // Test the 4 orientations
334 for (int i = 0; i < 4; ++i) {
335 // Normalize the blob. Set the origin to the place we want to be the
336 // bottom-middle after rotation.
337 // Scaling is to make the rotated height the x-height.
338 float scaling = static_cast<float>(kBlnXHeight) / box.height();
339 float x_origin = (box.left() + box.right()) / 2.0f;
340 float y_origin = (box.bottom() + box.top()) / 2.0f;
341 if (i == 0 || i == 2) {
342 // Rotation is 0 or 180.
343 y_origin = i == 0 ? box.bottom() : box.top();
344 } else {
345 // Rotation is 90 or 270.
346 scaling = static_cast<float>(kBlnXHeight) / box.width();
347 x_origin = i == 1 ? box.left() : box.right();
348 }
349 std::unique_ptr<TBLOB> rotated_blob(new TBLOB(*tblob));
350 rotated_blob->Normalize(nullptr, &current_rotation, nullptr, x_origin, y_origin, scaling,
351 scaling, 0.0f, static_cast<float>(kBlnBaselineOffset), false, nullptr);
352 tess->AdaptiveClassifier(rotated_blob.get(), ratings + i);
353 current_rotation.rotate(rotation90);
354 }
355 delete tblob;
356
357 bool stop = o->detect_blob(ratings);
358 s->detect_blob(ratings);
359 int orientation = o->get_orientation();
360 stop = s->must_stop(orientation) && stop;
361 return stop;
362}
bool detect_blob(BLOB_CHOICE_LIST *scores)
Definition: osdetect.cpp:371
void detect_blob(BLOB_CHOICE_LIST *scores)
Definition: osdetect.cpp:461
bool must_stop(int orientation) const
Definition: osdetect.cpp:558
void AdaptiveClassifier(TBLOB *Blob, BLOB_CHOICE_LIST *Choices)
Definition: adaptmatch.cpp:202

◆ os_detect_blobs()

int tesseract::os_detect_blobs ( const std::vector< int > *  allowed_scripts,
BLOBNBOX_CLIST *  blob_list,
OSResults osr,
tesseract::Tesseract tess 
)

Definition at line 274 of file osdetect.cpp.

275 {
276 OSResults osr_;
277 int minCharactersToTry = tess->min_characters_to_try;
278 int maxCharactersToTry = 5 * minCharactersToTry;
279 if (osr == nullptr) {
280 osr = &osr_;
281 }
282
283 osr->unicharset = &tess->unicharset;
284 OrientationDetector o(allowed_scripts, osr);
285 ScriptDetector s(allowed_scripts, osr, tess);
286
287 BLOBNBOX_C_IT filtered_it(blob_list);
288 int real_max = std::min(filtered_it.length(), maxCharactersToTry);
289 // tprintf("Total blobs found = %d\n", blobs_total);
290 // tprintf("Number of blobs post-filtering = %d\n", filtered_it.length());
291 // tprintf("Number of blobs to try = %d\n", real_max);
292
293 // If there are too few characters, skip this page entirely.
294 if (real_max < minCharactersToTry / 2) {
295 tprintf("Too few characters. Skipping this page\n");
296 return 0;
297 }
298
299 auto **blobs = new BLOBNBOX *[filtered_it.length()];
300 int number_of_blobs = 0;
301 for (filtered_it.mark_cycle_pt(); !filtered_it.cycled_list(); filtered_it.forward()) {
302 blobs[number_of_blobs++] = filtered_it.data();
303 }
304 QRSequenceGenerator sequence(number_of_blobs);
305 int num_blobs_evaluated = 0;
306 for (int i = 0; i < real_max; ++i) {
307 if (os_detect_blob(blobs[sequence.GetVal()], &o, &s, osr, tess) && i > minCharactersToTry) {
308 break;
309 }
310 ++num_blobs_evaluated;
311 }
312 delete[] blobs;
313
314 // Make sure the best_result is up-to-date
315 int orientation = o.get_orientation();
316 osr->update_best_script(orientation);
317 return num_blobs_evaluated;
318}
bool os_detect_blob(BLOBNBOX *bbox, OrientationDetector *o, ScriptDetector *s, OSResults *, tesseract::Tesseract *tess)
Definition: osdetect.cpp:323
UNICHARSET * unicharset
Definition: osdetect.h:79
void update_best_script(int orientation_id)
Definition: osdetect.cpp:90
UNICHARSET unicharset
Definition: ccutil.h:61

◆ OtsuStats()

int tesseract::OtsuStats ( const int *  histogram,
int *  H_out,
int *  omega0_out 
)

Definition at line 166 of file otsuthr.cpp.

166 {
167 int H = 0;
168 double mu_T = 0.0;
169 for (int i = 0; i < kHistogramSize; ++i) {
170 H += histogram[i];
171 mu_T += static_cast<double>(i) * histogram[i];
172 }
173
174 // Now maximize sig_sq_B over t.
175 // http://www.ctie.monash.edu.au/hargreave/Cornall_Terry_328.pdf
176 int best_t = -1;
177 int omega_0, omega_1;
178 int best_omega_0 = 0;
179 double best_sig_sq_B = 0.0;
180 double mu_0, mu_1, mu_t;
181 omega_0 = 0;
182 mu_t = 0.0;
183 for (int t = 0; t < kHistogramSize - 1; ++t) {
184 omega_0 += histogram[t];
185 mu_t += t * static_cast<double>(histogram[t]);
186 if (omega_0 == 0) {
187 continue;
188 }
189 omega_1 = H - omega_0;
190 if (omega_1 == 0) {
191 break;
192 }
193 mu_0 = mu_t / omega_0;
194 mu_1 = (mu_T - mu_t) / omega_1;
195 double sig_sq_B = mu_1 - mu_0;
196 sig_sq_B *= sig_sq_B * omega_0 * omega_1;
197 if (best_t < 0 || sig_sq_B > best_sig_sq_B) {
198 best_sig_sq_B = sig_sq_B;
199 best_t = t;
200 best_omega_0 = omega_0;
201 }
202 }
203 if (H_out != nullptr) {
204 *H_out = H;
205 }
206 if (omega0_out != nullptr) {
207 *omega0_out = best_omega_0;
208 }
209 return best_t;
210}

◆ OtsuThreshold()

int tesseract::OtsuThreshold ( Image  src_pix,
int  left,
int  top,
int  width,
int  height,
std::vector< int > &  thresholds,
std::vector< int > &  hi_values 
)

Definition at line 38 of file otsuthr.cpp.

39 {
40 int num_channels = pixGetDepth(src_pix) / 8;
41 // Of all channels with no good hi_value, keep the best so we can always
42 // produce at least one answer.
43 int best_hi_value = 1;
44 int best_hi_index = 0;
45 bool any_good_hivalue = false;
46 double best_hi_dist = 0.0;
47 thresholds.resize(num_channels);
48 hi_values.resize(num_channels);
49
50 // only use opencl if compiled w/ OpenCL and selected device is opencl
51#ifdef USE_OPENCL
52 // all of channel 0 then all of channel 1...
53 std::vector<int> histogramAllChannels(kHistogramSize * num_channels);
54
55 // Calculate Histogram on GPU
56 OpenclDevice od;
57 if (od.selectedDeviceIsOpenCL() && (num_channels == 1 || num_channels == 4) && top == 0 &&
58 left == 0) {
59 od.HistogramRectOCL(pixGetData(src_pix), num_channels, pixGetWpl(src_pix) * 4, left, top, width,
60 height, kHistogramSize, &histogramAllChannels[0]);
61
62 // Calculate Threshold from Histogram on cpu
63 for (int ch = 0; ch < num_channels; ++ch) {
64 thresholds[ch] = -1;
65 hi_values[ch] = -1;
66 int *histogram = &histogramAllChannels[kHistogramSize * ch];
67 int H;
68 int best_omega_0;
69 int best_t = OtsuStats(histogram, &H, &best_omega_0);
70 if (best_omega_0 == 0 || best_omega_0 == H) {
71 // This channel is empty.
72 continue;
73 }
74 // To be a convincing foreground we must have a small fraction of H
75 // or to be a convincing background we must have a large fraction of H.
76 // In between we assume this channel contains no thresholding information.
77 int hi_value = best_omega_0 < H * 0.5;
78 thresholds[ch] = best_t;
79 if (best_omega_0 > H * 0.75) {
80 any_good_hivalue = true;
81 hi_values[ch] = 0;
82 } else if (best_omega_0 < H * 0.25) {
83 any_good_hivalue = true;
84 hi_values[ch] = 1;
85 } else {
86 // In case all channels are like this, keep the best of the bad lot.
87 double hi_dist = hi_value ? (H - best_omega_0) : best_omega_0;
88 if (hi_dist > best_hi_dist) {
89 best_hi_dist = hi_dist;
90 best_hi_value = hi_value;
91 best_hi_index = ch;
92 }
93 }
94 }
95 } else {
96#endif
97 for (int ch = 0; ch < num_channels; ++ch) {
98 thresholds[ch] = -1;
99 hi_values[ch] = -1;
100 // Compute the histogram of the image rectangle.
101 int histogram[kHistogramSize];
102 HistogramRect(src_pix, ch, left, top, width, height, histogram);
103 int H;
104 int best_omega_0;
105 int best_t = OtsuStats(histogram, &H, &best_omega_0);
106 if (best_omega_0 == 0 || best_omega_0 == H) {
107 // This channel is empty.
108 continue;
109 }
110 // To be a convincing foreground we must have a small fraction of H
111 // or to be a convincing background we must have a large fraction of H.
112 // In between we assume this channel contains no thresholding information.
113 int hi_value = best_omega_0 < H * 0.5;
114 thresholds[ch] = best_t;
115 if (best_omega_0 > H * 0.75) {
116 any_good_hivalue = true;
117 hi_values[ch] = 0;
118 } else if (best_omega_0 < H * 0.25) {
119 any_good_hivalue = true;
120 hi_values[ch] = 1;
121 } else {
122 // In case all channels are like this, keep the best of the bad lot.
123 double hi_dist = hi_value ? (H - best_omega_0) : best_omega_0;
124 if (hi_dist > best_hi_dist) {
125 best_hi_dist = hi_dist;
126 best_hi_value = hi_value;
127 best_hi_index = ch;
128 }
129 }
130 }
131#ifdef USE_OPENCL
132 }
133#endif // USE_OPENCL
134
135 if (!any_good_hivalue) {
136 // Use the best of the ones that were not good enough.
137 hi_values[best_hi_index] = best_hi_value;
138 }
139 return num_channels;
140}
void HistogramRect(Image src_pix, int channel, int left, int top, int width, int height, int *histogram)
Definition: otsuthr.cpp:146
int OtsuStats(const int *histogram, int *H_out, int *omega0_out)
Definition: otsuthr.cpp:166

◆ outlines_to_blobs()

void tesseract::outlines_to_blobs ( BLOCK block,
ICOORD  bleft,
ICOORD  tright,
C_OUTLINE_LIST *  outlines 
)

Definition at line 460 of file edgblob.cpp.

462 {
463 // make buckets
464 OL_BUCKETS buckets(bleft, tright);
465
466 fill_buckets(outlines, &buckets);
467 empty_buckets(block, &buckets);
468}

◆ ParamsTrainingFeatureByName()

int tesseract::ParamsTrainingFeatureByName ( const char *  name)

Definition at line 26 of file params_training_featdef.cpp.

26 {
27 if (name == nullptr) {
28 return -1;
29 }
30 int array_size =
31 sizeof(kParamsTrainingFeatureTypeName) / sizeof(kParamsTrainingFeatureTypeName[0]);
32 for (int i = 0; i < array_size; i++) {
33 if (kParamsTrainingFeatureTypeName[i] == nullptr) {
34 continue;
35 }
36 if (strcmp(name, kParamsTrainingFeatureTypeName[i]) == 0) {
37 return i;
38 }
39 }
40 return -1;
41}

◆ ParseArguments()

TESS_COMMON_TRAINING_API void tesseract::ParseArguments ( int *  argc,
char ***  argv 
)

This routine parses the command line arguments that were passed to the program and uses them to set relevant training-related global parameters.

Globals:

  • Config current clustering parameters
    Parameters
    argcnumber of command line arguments to parse
    argvcommand line arguments

Definition at line 125 of file commontraining.cpp.

125 {
126 std::string usage;
127 if (*argc) {
128 usage += (*argv)[0];
129 usage += " -v | --version | ";
130 usage += (*argv)[0];
131 }
132 usage += " [.tr files ...]";
133 tesseract::ParseCommandLineFlags(usage.c_str(), argc, argv, true);
134 // Set some global values based on the flags.
136 std::max(0.0, std::min(1.0, double(FLAGS_clusterconfig_min_samples_fraction)));
137 Config.MaxIllegal = std::max(0.0, std::min(1.0, double(FLAGS_clusterconfig_max_illegal)));
138 Config.Independence = std::max(0.0, std::min(1.0, double(FLAGS_clusterconfig_independence)));
139 Config.Confidence = std::max(0.0, std::min(1.0, double(FLAGS_clusterconfig_confidence)));
140 // Set additional parameters from config file if specified.
141 if (!FLAGS_configfile.empty()) {
143 FLAGS_configfile.c_str(), tesseract::SET_PARAM_CONSTRAINT_NON_INIT_ONLY, ccutil.params());
144 }
145}
void ParseCommandLineFlags(const char *usage, int *argc, char ***argv, const bool remove_flags)
ParamsVectors * params()
Definition: ccutil.h:53
static bool ReadParamsFile(const char *file, SetParamConstraint constraint, ParamsVectors *member_params)
Definition: params.cpp:41

◆ ParseBoxFileStr()

TESS_API bool tesseract::ParseBoxFileStr ( const char *  boxfile_str,
int *  page_number,
std::string &  utf8_str,
TBOX bounding_box 
)

Definition at line 205 of file boxread.cpp.

206 {
207 *bounding_box = TBOX(); // Initialize it to empty.
208 utf8_str = "";
209 char uch[kBoxReadBufSize];
210 const char *buffptr = boxfile_str;
211 // Read the unichar without messing up on Tibetan.
212 // According to issue 253 the utf-8 surrogates 85 and A0 are treated
213 // as whitespace by sscanf, so it is more reliable to just find
214 // ascii space and tab.
215 int uch_len = 0;
216 // Skip unicode file designation, if present.
217 const auto *ubuf = reinterpret_cast<const unsigned char *>(buffptr);
218 if (ubuf[0] == 0xef && ubuf[1] == 0xbb && ubuf[2] == 0xbf) {
219 buffptr += 3;
220 }
221 // Allow a single blank as the UTF-8 string. Check for empty string and
222 // then blindly eat the first character.
223 if (*buffptr == '\0') {
224 return false;
225 }
226 do {
227 uch[uch_len++] = *buffptr++;
228 } while (*buffptr != '\0' && *buffptr != ' ' && *buffptr != '\t' &&
229 uch_len < kBoxReadBufSize - 1);
230 uch[uch_len] = '\0';
231 if (*buffptr != '\0') {
232 ++buffptr;
233 }
234 int x_min = INT_MAX;
235 int y_min = INT_MAX;
236 int x_max = INT_MIN;
237 int y_max = INT_MIN;
238 *page_number = 0;
239 std::stringstream stream(buffptr);
240 stream.imbue(std::locale::classic());
241 stream >> x_min;
242 stream >> y_min;
243 stream >> x_max;
244 stream >> y_max;
245 stream >> *page_number;
246 if (x_max < x_min || y_max < y_min) {
247 tprintf("Bad box coordinates in boxfile string! %s\n", ubuf);
248 return false;
249 }
250 // Test for long space-delimited string label.
251 if (strcmp(uch, kMultiBlobLabelCode) == 0 && (buffptr = strchr(buffptr, '#')) != nullptr) {
252 strncpy(uch, buffptr + 1, kBoxReadBufSize - 1);
253 uch[kBoxReadBufSize - 1] = '\0'; // Prevent buffer overrun.
254 chomp_string(uch);
255 uch_len = strlen(uch);
256 }
257 // Validate UTF8 by making unichars with it.
258 int used = 0;
259 while (used < uch_len) {
260 tesseract::UNICHAR ch(uch + used, uch_len - used);
261 int new_used = ch.utf8_len();
262 if (new_used == 0) {
263 tprintf("Bad UTF-8 str %s starts with 0x%02x at col %d\n", uch + used, uch[used], used + 1);
264 return false;
265 }
266 used += new_used;
267 }
268 utf8_str = uch;
269 if (x_min > x_max) {
270 std::swap(x_min, x_max);
271 }
272 if (y_min > y_max) {
273 std::swap(y_min, y_max);
274 }
275 bounding_box->set_to_given_coords(x_min, y_min, x_max, y_max);
276 return true; // Successfully read a box.
277}
void chomp_string(char *str)
Definition: helpers.h:91
const int kBoxReadBufSize
Definition: boxread.h:33
void set_to_given_coords(int x_min, int y_min, int x_max, int y_max)
Definition: rect.h:282

◆ ParseCommandLineFlags()

TESS_COMMON_TRAINING_API void tesseract::ParseCommandLineFlags ( const char *  usage,
int *  argc,
char ***  argv,
const bool  remove_flags 
)

Definition at line 168 of file commandlineflags.cpp.

168 {
169 if (*argc == 1) {
170 printf("USAGE: %s\n", usage);
171 PrintCommandLineFlags();
172 exit(0);
173 }
174
175 if (*argc > 1 && (!strcmp((*argv)[1], "-v") || !strcmp((*argv)[1], "--version"))) {
176 printf("%s\n", TessBaseAPI::Version());
177 exit(0);
178 }
179
180 int i;
181 for (i = 1; i < *argc; ++i) {
182 const char *current_arg = (*argv)[i];
183 // If argument does not start with a hyphen then break.
184 if (current_arg[0] != '-') {
185 break;
186 }
187 // Position current_arg after startings hyphens. We treat a sequence of
188 // one or two consecutive hyphens identically.
189 ++current_arg;
190 if (current_arg[0] == '-') {
191 ++current_arg;
192 }
193 // If this is asking for usage, print the help message and abort.
194 if (!strcmp(current_arg, "help")) {
195 printf("Usage:\n %s [OPTION ...]\n\n", usage);
196 PrintCommandLineFlags();
197 exit(0);
198 }
199 // Find the starting position of the value if it was specified in this
200 // string.
201 const char *equals_position = strchr(current_arg, '=');
202 const char *rhs = nullptr;
203 if (equals_position != nullptr) {
204 rhs = equals_position + 1;
205 }
206 // Extract the flag name.
207 std::string lhs;
208 if (equals_position == nullptr) {
209 lhs = current_arg;
210 } else {
211 lhs.assign(current_arg, equals_position - current_arg);
212 }
213 if (!lhs.length()) {
214 tprintf("ERROR: Bad argument: %s\n", (*argv)[i]);
215 exit(1);
216 }
217
218 // Find the flag name in the list of global flags.
219 // int32_t flag
220 int32_t int_val;
221 if (IntFlagExists(lhs.c_str(), &int_val)) {
222 if (rhs != nullptr) {
223 if (!strlen(rhs)) {
224 // Bad input of the format --int_flag=
225 tprintf("ERROR: Bad argument: %s\n", (*argv)[i]);
226 exit(1);
227 }
228 if (!SafeAtoi(rhs, &int_val)) {
229 tprintf("ERROR: Could not parse int from %s in flag %s\n", rhs, (*argv)[i]);
230 exit(1);
231 }
232 } else {
233 // We need to parse the next argument
234 if (i + 1 >= *argc) {
235 tprintf("ERROR: Could not find value argument for flag %s\n", lhs.c_str());
236 exit(1);
237 } else {
238 ++i;
239 if (!SafeAtoi((*argv)[i], &int_val)) {
240 tprintf("ERROR: Could not parse int32_t from %s\n", (*argv)[i]);
241 exit(1);
242 }
243 }
244 }
245 SetIntFlagValue(lhs.c_str(), int_val);
246 continue;
247 }
248
249 // double flag
250 double double_val;
251 if (DoubleFlagExists(lhs.c_str(), &double_val)) {
252 if (rhs != nullptr) {
253 if (!strlen(rhs)) {
254 // Bad input of the format --double_flag=
255 tprintf("ERROR: Bad argument: %s\n", (*argv)[i]);
256 exit(1);
257 }
258 if (!SafeAtod(rhs, &double_val)) {
259 tprintf("ERROR: Could not parse double from %s in flag %s\n", rhs, (*argv)[i]);
260 exit(1);
261 }
262 } else {
263 // We need to parse the next argument
264 if (i + 1 >= *argc) {
265 tprintf("ERROR: Could not find value argument for flag %s\n", lhs.c_str());
266 exit(1);
267 } else {
268 ++i;
269 if (!SafeAtod((*argv)[i], &double_val)) {
270 tprintf("ERROR: Could not parse double from %s\n", (*argv)[i]);
271 exit(1);
272 }
273 }
274 }
275 SetDoubleFlagValue(lhs.c_str(), double_val);
276 continue;
277 }
278
279 // Bool flag. Allow input forms --flag (equivalent to --flag=true),
280 // --flag=false, --flag=true, --flag=0 and --flag=1
281 bool bool_val;
282 if (BoolFlagExists(lhs.c_str(), &bool_val)) {
283 if (rhs == nullptr) {
284 // --flag form
285 bool_val = true;
286 } else {
287 if (!strlen(rhs)) {
288 // Bad input of the format --bool_flag=
289 tprintf("ERROR: Bad argument: %s\n", (*argv)[i]);
290 exit(1);
291 }
292 if (!strcmp(rhs, "false") || !strcmp(rhs, "0")) {
293 bool_val = false;
294 } else if (!strcmp(rhs, "true") || !strcmp(rhs, "1")) {
295 bool_val = true;
296 } else {
297 tprintf("ERROR: Could not parse bool from flag %s\n", (*argv)[i]);
298 exit(1);
299 }
300 }
301 SetBoolFlagValue(lhs.c_str(), bool_val);
302 continue;
303 }
304
305 // string flag
306 const char *string_val;
307 if (StringFlagExists(lhs.c_str(), &string_val)) {
308 if (rhs != nullptr) {
309 string_val = rhs;
310 } else {
311 // Pick the next argument
312 if (i + 1 >= *argc) {
313 tprintf("ERROR: Could not find string value for flag %s\n", lhs.c_str());
314 exit(1);
315 } else {
316 string_val = (*argv)[++i];
317 }
318 }
319 SetStringFlagValue(lhs.c_str(), string_val);
320 continue;
321 }
322
323 // Flag was not found. Exit with an error message.
324 tprintf("ERROR: Non-existent flag %s\n", (*argv)[i]);
325 exit(1);
326 } // for each argv
327 if (remove_flags) {
328 (*argv)[i - 1] = (*argv)[0];
329 (*argv) += (i - 1);
330 (*argc) -= (i - 1);
331 }
332}

◆ partition_coords()

int tesseract::partition_coords ( TBOX  blobcoords[],
int  blobcount,
char  partids[],
int  bestpart,
int  xcoords[],
int  ycoords[] 
)

Definition at line 977 of file oldbasel.cpp.

984 {
985 int blobindex; /*no along text line */
986 int pointcount; /*no of points */
987
988 pointcount = 0;
989 for (blobindex = 0; blobindex < blobcount; blobindex++) {
990 if (partids[blobindex] == bestpart) {
991 /*centre of blob */
992 xcoords[pointcount] = (blobcoords[blobindex].left() + blobcoords[blobindex].right()) >> 1;
993 ycoords[pointcount++] = blobcoords[blobindex].bottom();
994 }
995 }
996 return pointcount; /*no of points found */
997}

◆ partition_line()

int tesseract::partition_line ( TBOX  blobcoords[],
int  blobcount,
int *  numparts,
char  partids[],
int  partsizes[],
QSPLINE spline,
float  jumplimit,
float  ydiffs[] 
)

Definition at line 673 of file oldbasel.cpp.

682 {
683 int blobindex; /*no along text line */
684 int bestpart; /*best new partition */
685 int biggestpart; /*part with most members */
686 float diff; /*difference from line */
687 int startx; /*index of start blob */
688 float partdiffs[MAXPARTS]; /*step between parts */
689
690 for (bestpart = 0; bestpart < MAXPARTS; bestpart++) {
691 partsizes[bestpart] = 0; /*zero them all */
692 }
693
694 startx = get_ydiffs(blobcoords, blobcount, spline, ydiffs);
695 *numparts = 1; /*1 partition */
696 bestpart = -1; /*first point */
697 float drift = 0.0f;
698 float last_delta = 0.0f;
699 for (blobindex = startx; blobindex < blobcount; blobindex++) {
700 /*do each blob in row */
701 diff = ydiffs[blobindex]; /*diff from line */
703 tprintf("%d(%d,%d), ", blobindex, blobcoords[blobindex].left(),
704 blobcoords[blobindex].bottom());
705 }
706 bestpart =
707 choose_partition(diff, partdiffs, bestpart, jumplimit, &drift, &last_delta, numparts);
708 /*record partition */
709 partids[blobindex] = bestpart;
710 partsizes[bestpart]++; /*another in it */
711 }
712
713 bestpart = -1; /*first point */
714 drift = 0.0f;
715 last_delta = 0.0f;
716 partsizes[0]--; /*doing 1st pt again */
717 /*do each blob in row */
718 for (blobindex = startx; blobindex >= 0; blobindex--) {
719 diff = ydiffs[blobindex]; /*diff from line */
721 tprintf("%d(%d,%d), ", blobindex, blobcoords[blobindex].left(),
722 blobcoords[blobindex].bottom());
723 }
724 bestpart =
725 choose_partition(diff, partdiffs, bestpart, jumplimit, &drift, &last_delta, numparts);
726 /*record partition */
727 partids[blobindex] = bestpart;
728 partsizes[bestpart]++; /*another in it */
729 }
730
731 for (biggestpart = 0, bestpart = 1; bestpart < *numparts; bestpart++) {
732 if (partsizes[bestpart] >= partsizes[biggestpart]) {
733 biggestpart = bestpart; /*new biggest */
734 }
735 }
736 if (textord_oldbl_merge_parts) {
737 merge_oldbl_parts(blobcoords, blobcount, partids, partsizes, biggestpart, jumplimit);
738 }
739 return biggestpart; /*biggest partition */
740}
int get_ydiffs(TBOX blobcoords[], int blobcount, QSPLINE *spline, float ydiffs[])
Definition: oldbasel.cpp:860
int choose_partition(float diff, float partdiffs[], int lastpart, float jumplimit, float *drift, float *lastdelta, int *partcount)
Definition: oldbasel.cpp:910
void merge_oldbl_parts(TBOX blobcoords[], int blobcount, char partids[], int partsizes[], int biggestpart, float jumplimit)
Definition: oldbasel.cpp:749

◆ pick_x_height()

void tesseract::pick_x_height ( TO_ROW row,
int  modelist[],
int  lefts[],
int  rights[],
STATS heightstat,
int  mode_threshold 
)

Definition at line 1547 of file oldbasel.cpp.

1549 {
1550 int x;
1551 int y;
1552 int z;
1553 float ratio;
1554 int found_one_bigger = false;
1555 int best_x_height = 0;
1556 int best_asc = 0;
1557 int num_in_best;
1558
1559 for (x = 0; x < MODENUM; x++) {
1560 for (y = 0; y < MODENUM; y++) {
1561 /* Check for two modes */
1562 if (modelist[x] && modelist[y] && heightstat->pile_count(modelist[x]) > mode_threshold &&
1563 (!textord_ocropus_mode || std::min(rights[modelist[x]], rights[modelist[y]]) >
1564 std::max(lefts[modelist[x]], lefts[modelist[y]]))) {
1565 ratio = static_cast<float>(modelist[y]) / static_cast<float>(modelist[x]);
1566 if (1.2 < ratio && ratio < 1.8) {
1567 /* Two modes found */
1568 best_x_height = modelist[x];
1569 num_in_best = heightstat->pile_count(modelist[x]);
1570
1571 /* Try to get one higher */
1572 do {
1573 found_one_bigger = false;
1574 for (z = 0; z < MODENUM; z++) {
1575 if (modelist[z] == best_x_height + 1 &&
1576 (!textord_ocropus_mode || std::min(rights[modelist[x]], rights[modelist[y]]) >
1577 std::max(lefts[modelist[x]], lefts[modelist[y]]))) {
1578 ratio = static_cast<float>(modelist[y]) / static_cast<float>(modelist[z]);
1579 if ((1.2 < ratio && ratio < 1.8) &&
1580 /* Should be half of best */
1581 heightstat->pile_count(modelist[z]) > num_in_best * 0.5) {
1582 best_x_height++;
1583 found_one_bigger = true;
1584 break;
1585 }
1586 }
1587 }
1588 } while (found_one_bigger);
1589
1590 /* try to get a higher ascender */
1591
1592 best_asc = modelist[y];
1593 num_in_best = heightstat->pile_count(modelist[y]);
1594
1595 /* Try to get one higher */
1596 do {
1597 found_one_bigger = false;
1598 for (z = 0; z < MODENUM; z++) {
1599 if (modelist[z] > best_asc &&
1600 (!textord_ocropus_mode || std::min(rights[modelist[x]], rights[modelist[y]]) >
1601 std::max(lefts[modelist[x]], lefts[modelist[y]]))) {
1602 ratio = static_cast<float>(modelist[z]) / static_cast<float>(best_x_height);
1603 if ((1.2 < ratio && ratio < 1.8) &&
1604 /* Should be half of best */
1605 heightstat->pile_count(modelist[z]) > num_in_best * 0.5) {
1606 best_asc = modelist[z];
1607 found_one_bigger = true;
1608 break;
1609 }
1610 }
1611 }
1612 } while (found_one_bigger);
1613
1614 row->xheight = static_cast<float>(best_x_height);
1615 row->ascrise = static_cast<float>(best_asc) - best_x_height;
1616 return;
1617 }
1618 }
1619 }
1620 }
1621
1622 best_x_height = modelist[0]; /* Single Mode found */
1623 num_in_best = heightstat->pile_count(best_x_height);
1624 do {
1625 /* Try to get one higher */
1626 found_one_bigger = false;
1627 for (z = 1; z < MODENUM; z++) {
1628 /* Should be half of best */
1629 if ((modelist[z] == best_x_height + 1) &&
1630 (heightstat->pile_count(modelist[z]) > num_in_best * 0.5)) {
1631 best_x_height++;
1632 found_one_bigger = true;
1633 break;
1634 }
1635 }
1636 } while (found_one_bigger);
1637
1638 row->ascrise = 0.0f;
1639 row->xheight = static_cast<float>(best_x_height);
1640 if (row->xheight == 0) {
1641 row->xheight = -1.0f;
1642 }
1643}
Uncopyable z

◆ plot_blob_list()

void tesseract::plot_blob_list ( ScrollView win,
BLOBNBOX_LIST *  list,
ScrollView::Color  body_colour,
ScrollView::Color  child_colour 
)

Definition at line 1071 of file blobbox.cpp.

1074 { // colour of child
1075 BLOBNBOX_IT it = list;
1076 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1077 it.data()->plot(win, body_colour, child_colour);
1078 }
1079}

◆ plot_box_list()

void tesseract::plot_box_list ( ScrollView win,
BLOBNBOX_LIST *  list,
ScrollView::Color  body_colour 
)

Definition at line 69 of file drawtord.cpp.

73 {
74 BLOBNBOX_IT it = list; // iterator
75
76 win->Pen(body_colour);
77 win->Brush(ScrollView::NONE);
78 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
79 it.data()->bounding_box().plot(win);
80 }
81}
void Brush(Color color)
Definition: scrollview.cpp:716

◆ plot_fp_cells()

void tesseract::plot_fp_cells ( ScrollView win,
ScrollView::Color  colour,
BLOBNBOX_IT *  blob_it,
int16_t  pitch,
int16_t  blob_count,
STATS projection,
int16_t  projection_left,
int16_t  projection_right,
float  projection_scale 
)

Definition at line 309 of file drawtord.cpp.

317 {
318 int16_t occupation; // occupied cells
319 TBOX word_box; // bounding box
320 FPSEGPT_LIST seg_list; // list of cuts
321 FPSEGPT_IT seg_it;
322 FPSEGPT *segpt; // current point
323
325 check_pitch_sync2(blob_it, blob_count, pitch, 2, projection, projection_left, projection_right,
326 projection_scale, occupation, &seg_list, 0, 0);
327 } else {
328 check_pitch_sync(blob_it, blob_count, pitch, 2, projection, &seg_list);
329 }
330 word_box = blob_it->data()->bounding_box();
331 for (; blob_count > 0; blob_count--) {
332 word_box += box_next(blob_it);
333 }
334 seg_it.set_to_list(&seg_list);
335 for (seg_it.mark_cycle_pt(); !seg_it.cycled_list(); seg_it.forward()) {
336 segpt = seg_it.data();
337 if (segpt->faked) {
338 colour = ScrollView::WHITE;
339 win->Pen(colour);
340 } else {
341 win->Pen(colour);
342 }
343 win->Line(segpt->position(), word_box.bottom(), segpt->position(), word_box.top());
344 }
345}

◆ plot_fp_cells2()

void tesseract::plot_fp_cells2 ( ScrollView win,
ScrollView::Color  colour,
TO_ROW row,
FPSEGPT_LIST *  seg_list 
)

Definition at line 353 of file drawtord.cpp.

358 {
359 TBOX word_box; // bounding box
360 FPSEGPT_IT seg_it = seg_list;
361 // blobs in row
362 BLOBNBOX_IT blob_it = row->blob_list();
363 FPSEGPT *segpt; // current point
364
365 word_box = blob_it.data()->bounding_box();
366 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list();) {
367 word_box += box_next(&blob_it);
368 }
369 for (seg_it.mark_cycle_pt(); !seg_it.cycled_list(); seg_it.forward()) {
370 segpt = seg_it.data();
371 if (segpt->faked) {
372 colour = ScrollView::WHITE;
373 win->Pen(colour);
374 } else {
375 win->Pen(colour);
376 }
377 win->Line(segpt->position(), word_box.bottom(), segpt->position(), word_box.top());
378 }
379}

◆ plot_fp_word()

void tesseract::plot_fp_word ( TO_BLOCK block,
float  pitch,
float  nonspace 
)

Definition at line 1730 of file topitch.cpp.

1734 {
1735 TO_ROW *row; // current row
1736 TO_ROW_IT row_it = block->get_rows();
1737
1738 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
1739 row = row_it.data();
1740 row->min_space = static_cast<int32_t>((pitch + nonspace) / 2);
1741 row->max_nonspace = row->min_space;
1742 row->space_threshold = row->min_space;
1743 plot_word_decisions(to_win, static_cast<int16_t>(pitch), row);
1744 }
1745}
void plot_word_decisions(ScrollView *win, int16_t pitch, TO_ROW *row)
Definition: drawtord.cpp:238

◆ plot_parallel_row()

void tesseract::plot_parallel_row ( TO_ROW row,
float  gradient,
int32_t  left,
ScrollView::Color  colour,
FCOORD  rotation 
)

Definition at line 122 of file drawtord.cpp.

128 {
129 FCOORD plot_pt; // point to plot
130 // blobs
131 BLOBNBOX_IT it = row->blob_list();
132 auto fleft = static_cast<float>(left); // floating version
133 float right; // end of row
134
135 // left=it.data()->bounding_box().left();
136 it.move_to_last();
137 right = it.data()->bounding_box().right();
138 plot_blob_list(to_win, row->blob_list(), colour, ScrollView::BROWN);
139 to_win->Pen(colour);
140 plot_pt = FCOORD(fleft, gradient * left + row->max_y());
141 plot_pt.rotate(rotation);
142 to_win->SetCursor(plot_pt.x(), plot_pt.y());
143 plot_pt = FCOORD(fleft, gradient * left + row->min_y());
144 plot_pt.rotate(rotation);
145 to_win->DrawTo(plot_pt.x(), plot_pt.y());
146 plot_pt = FCOORD(fleft, gradient * left + row->parallel_c());
147 plot_pt.rotate(rotation);
148 to_win->SetCursor(plot_pt.x(), plot_pt.y());
149 plot_pt = FCOORD(right, gradient * right + row->parallel_c());
150 plot_pt.rotate(rotation);
151 to_win->DrawTo(plot_pt.x(), plot_pt.y());
152}
void plot_blob_list(ScrollView *win, BLOBNBOX_LIST *list, ScrollView::Color body_colour, ScrollView::Color child_colour)
Definition: blobbox.cpp:1071

◆ plot_row_cells()

void tesseract::plot_row_cells ( ScrollView win,
ScrollView::Color  colour,
TO_ROW row,
float  xshift,
ICOORDELT_LIST *  cells 
)

Definition at line 387 of file drawtord.cpp.

393 {
394 TBOX word_box; // bounding box
395 ICOORDELT_IT cell_it = cells;
396 // blobs in row
397 BLOBNBOX_IT blob_it = row->blob_list();
398 ICOORDELT *cell; // current cell
399
400 word_box = blob_it.data()->bounding_box();
401 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list();) {
402 word_box += box_next(&blob_it);
403 }
404 win->Pen(colour);
405 for (cell_it.mark_cycle_pt(); !cell_it.cycled_list(); cell_it.forward()) {
406 cell = cell_it.data();
407 win->Line(cell->x() + xshift, word_box.bottom(), cell->x() + xshift, word_box.top());
408 }
409}

◆ plot_to_row()

void tesseract::plot_to_row ( TO_ROW row,
ScrollView::Color  colour,
FCOORD  rotation 
)

Definition at line 89 of file drawtord.cpp.

93 {
94 FCOORD plot_pt; // point to plot
95 // blobs
96 BLOBNBOX_IT it = row->blob_list();
97 float left, right; // end of row
98
99 if (it.empty()) {
100 tprintf("No blobs in row at %g\n", row->parallel_c());
101 return;
102 }
103 left = it.data()->bounding_box().left();
104 it.move_to_last();
105 right = it.data()->bounding_box().right();
106 plot_blob_list(to_win, row->blob_list(), colour, ScrollView::BROWN);
107 to_win->Pen(colour);
108 plot_pt = FCOORD(left, row->line_m() * left + row->line_c());
109 plot_pt.rotate(rotation);
110 to_win->SetCursor(plot_pt.x(), plot_pt.y());
111 plot_pt = FCOORD(right, row->line_m() * right + row->line_c());
112 plot_pt.rotate(rotation);
113 to_win->DrawTo(plot_pt.x(), plot_pt.y());
114}

◆ plot_word_decisions()

void tesseract::plot_word_decisions ( ScrollView win,
int16_t  pitch,
TO_ROW row 
)

Definition at line 238 of file drawtord.cpp.

242 {
243 ScrollView::Color colour = ScrollView::MAGENTA; // current colour
244 ScrollView::Color rect_colour; // fuzzy colour
245 int32_t prev_x; // end of prev blob
246 int16_t blob_count; // blobs in word
247 BLOBNBOX *blob; // current blob
248 TBOX blob_box; // bounding box
249 // iterator
250 BLOBNBOX_IT blob_it = row->blob_list();
251 BLOBNBOX_IT start_it = blob_it; // word start
252
253 rect_colour = ScrollView::BLACK;
254 prev_x = -INT16_MAX;
255 blob_count = 0;
256 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
257 blob = blob_it.data();
258 blob_box = blob->bounding_box();
259 if (!blob->joined_to_prev() && blob_box.left() - prev_x > row->max_nonspace) {
260 if ((blob_box.left() - prev_x >= row->min_space ||
261 blob_box.left() - prev_x > row->space_threshold) &&
262 blob_count > 0) {
263 if (pitch > 0 && textord_show_fixed_cuts) {
264 plot_fp_cells(win, colour, &start_it, pitch, blob_count, &row->projection,
267 }
268 blob_count = 0;
269 start_it = blob_it;
270 }
271 if (colour == ScrollView::MAGENTA) {
272 colour = ScrollView::RED;
273 } else {
274 colour = static_cast<ScrollView::Color>(colour + 1);
275 }
276 if (blob_box.left() - prev_x < row->min_space) {
277 if (blob_box.left() - prev_x > row->space_threshold) {
278 rect_colour = ScrollView::GOLDENROD;
279 } else {
280 rect_colour = ScrollView::CORAL;
281 }
282 // fill_color_index(win, rect_colour);
283 win->Brush(rect_colour);
284 win->Rectangle(prev_x, blob_box.bottom(), blob_box.left(), blob_box.top());
285 }
286 }
287 if (!blob->joined_to_prev()) {
288 prev_x = blob_box.right();
289 }
290 if (blob->cblob() != nullptr) {
291 blob->cblob()->plot(win, colour, colour);
292 }
293 if (!blob->joined_to_prev() && blob->cblob() != nullptr) {
294 blob_count++;
295 }
296 }
297 if (pitch > 0 && textord_show_fixed_cuts && blob_count > 0) {
298 plot_fp_cells(win, colour, &start_it, pitch, blob_count, &row->projection, row->projection_left,
300 }
301}
void plot_fp_cells(ScrollView *win, ScrollView::Color colour, BLOBNBOX_IT *blob_it, int16_t pitch, int16_t blob_count, STATS *projection, int16_t projection_left, int16_t projection_right, float projection_scale)
Definition: drawtord.cpp:309
void plot(ScrollView *window, ScrollView::Color blob_colour, ScrollView::Color child_colour)
Definition: stepblob.cpp:526

◆ pop()

LIST tesseract::pop ( LIST  list)

Definition at line 166 of file oldlist.cpp.

166 {
167 LIST temp = list->list_rest();
168 delete list;
169 return temp;
170}

◆ pre_associate_blobs()

void tesseract::pre_associate_blobs ( ICOORD  page_tr,
TO_BLOCK block,
FCOORD  rotation,
bool  testing_on 
)

Definition at line 1846 of file makerow.cpp.

1851 {
1852#ifndef GRAPHICS_DISABLED
1853 ScrollView::Color colour; // of boxes
1854#endif
1855 BLOBNBOX *blob; // current blob
1856 BLOBNBOX *nextblob; // next in list
1857 TBOX blob_box;
1858 FCOORD blob_rotation; // inverse of rotation
1859 BLOBNBOX_IT blob_it; // iterator
1860 BLOBNBOX_IT start_it; // iterator
1861 TO_ROW_IT row_it = block->get_rows();
1862
1863#ifndef GRAPHICS_DISABLED
1864 colour = ScrollView::RED;
1865#endif
1866
1867 blob_rotation = FCOORD(rotation.x(), -rotation.y());
1868 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
1869 // get blobs
1870 blob_it.set_to_list(row_it.data()->blob_list());
1871 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
1872 blob = blob_it.data();
1873 blob_box = blob->bounding_box();
1874 start_it = blob_it; // save start point
1875 // if (testing_on && textord_show_final_blobs)
1876 // {
1877 // tprintf("Blob at (%d,%d)->(%d,%d),
1878 // addr=%x, count=%d\n",
1879 // blob_box.left(),blob_box.bottom(),
1880 // blob_box.right(),blob_box.top(),
1881 // (void*)blob,blob_it.length());
1882 // }
1883 bool overlap;
1884 do {
1885 overlap = false;
1886 if (!blob_it.at_last()) {
1887 nextblob = blob_it.data_relative(1);
1888 overlap = blob_box.major_x_overlap(nextblob->bounding_box());
1889 if (overlap) {
1890 blob->merge(nextblob); // merge new blob
1891 blob_box = blob->bounding_box(); // get bigger box
1892 blob_it.forward();
1893 }
1894 }
1895 } while (overlap);
1896 blob->chop(&start_it, &blob_it, blob_rotation,
1898 // attempt chop
1899 }
1900#ifndef GRAPHICS_DISABLED
1901 if (testing_on && textord_show_final_blobs) {
1902 if (to_win == nullptr) {
1903 create_to_win(page_tr);
1904 }
1905 to_win->Pen(colour);
1906 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
1907 blob = blob_it.data();
1908 blob_box = blob->bounding_box();
1909 blob_box.rotate(rotation);
1910 if (!blob->joined_to_prev()) {
1911 to_win->Rectangle(blob_box.left(), blob_box.bottom(), blob_box.right(), blob_box.top());
1912 }
1913 }
1914 colour = static_cast<ScrollView::Color>(colour + 1);
1915 if (colour > ScrollView::MAGENTA) {
1916 colour = ScrollView::RED;
1917 }
1918 }
1919#endif
1920 }
1921}
double textord_chop_width
Definition: makerow.cpp:76
bool textord_show_final_blobs
Definition: makerow.cpp:51
void merge(BLOBNBOX *nextblob)
Definition: blobbox.cpp:92
void chop(BLOBNBOX_IT *start_it, BLOBNBOX_IT *blob_it, FCOORD rotation, float xheight)
Definition: blobbox.cpp:118
bool major_x_overlap(const TBOX &box) const
Definition: rect.h:419

◆ PrepareDistortedPix()

Image tesseract::PrepareDistortedPix ( const Image  pix,
bool  perspective,
bool  invert,
bool  white_noise,
bool  smooth_noise,
bool  blur,
int  box_reduction,
TRand randomizer,
std::vector< TBOX > *  boxes 
)

Definition at line 179 of file degradeimage.cpp.

181 {
182 Image distorted = pix.copy();
183 // Things to do to synthetic training data.
184 if ((white_noise || smooth_noise) && randomizer->SignedRand(1.0) > 0.0) {
185 // TODO(rays) Cook noise in a more thread-safe manner than rand().
186 // Attempt to make the sequences reproducible.
187 srand(randomizer->IntRand());
188 Image pixn = pixAddGaussianNoise(distorted, 8.0);
189 distorted.destroy();
190 if (smooth_noise) {
191 distorted = pixBlockconv(pixn, 1, 1);
192 pixn.destroy();
193 } else {
194 distorted = pixn;
195 }
196 }
197 if (blur && randomizer->SignedRand(1.0) > 0.0) {
198 Image blurred = pixBlockconv(distorted, 1, 1);
199 distorted.destroy();
200 distorted = blurred;
201 }
202 if (perspective) {
203 GeneratePerspectiveDistortion(0, 0, randomizer, &distorted, boxes);
204 }
205 if (boxes != nullptr) {
206 for (auto &b : *boxes) {
207 b.scale(1.0f / box_reduction);
208 if (b.width() <= 0) {
209 b.set_right(b.left() + 1);
210 }
211 }
212 }
213 if (invert && randomizer->SignedRand(1.0) < -0) {
214 pixInvert(distorted, distorted);
215 }
216 return distorted;
217}
void GeneratePerspectiveDistortion(int width, int height, TRand *randomizer, Image *pix, std::vector< TBOX > *boxes)
Image copy() const
Definition: image.cpp:28

◆ print_block_counts()

void tesseract::print_block_counts ( TO_BLOCK block,
int32_t  block_index 
)

Definition at line 575 of file topitch.cpp.

578 {
579 int32_t def_fixed = 0; // counters
580 int32_t def_prop = 0;
581 int32_t maybe_fixed = 0;
582 int32_t maybe_prop = 0;
583 int32_t dunno = 0;
584 int32_t corr_fixed = 0;
585 int32_t corr_prop = 0;
586
587 count_block_votes(block, def_fixed, def_prop, maybe_fixed, maybe_prop, corr_fixed, corr_prop,
588 dunno);
589 tprintf("Block %d has (%d,%d,%d)", block_index, def_fixed, maybe_fixed, corr_fixed);
590 if (textord_blocksall_prop && (def_fixed || maybe_fixed || corr_fixed)) {
591 tprintf(" (Wrongly)");
592 }
593 tprintf(" fixed, (%d,%d,%d)", def_prop, maybe_prop, corr_prop);
594 if (textord_blocksall_fixed && (def_prop || maybe_prop || corr_prop)) {
595 tprintf(" (Wrongly)");
596 }
597 tprintf(" prop, %d dunno\n", dunno);
598}
bool textord_blocksall_prop
Definition: tovars.cpp:27
bool textord_blocksall_fixed
Definition: tovars.cpp:26
void count_block_votes(TO_BLOCK *block, int32_t &def_fixed, int32_t &def_prop, int32_t &maybe_fixed, int32_t &maybe_prop, int32_t &corr_fixed, int32_t &corr_prop, int32_t &dunno)
Definition: topitch.cpp:606

◆ print_pitch_sd()

void tesseract::print_pitch_sd ( TO_ROW row,
STATS projection,
int16_t  projection_left,
int16_t  projection_right,
float  space_size,
float  initial_pitch 
)

Definition at line 1535 of file topitch.cpp.

1541 {
1542 const char *res2; // pitch result
1543 int16_t occupation; // used cells
1544 float sp_sd; // space sd
1545 // blobs
1546 BLOBNBOX_IT blob_it = row->blob_list();
1547 BLOBNBOX_IT start_it; // start of word
1548 BLOBNBOX_IT row_start; // start of row
1549 int16_t blob_count; // no of blobs
1550 int16_t total_blob_count; // total blobs in line
1551 TBOX blob_box; // bounding box
1552 TBOX prev_box; // of super blob
1553 int32_t prev_right; // of word sync
1554 int scale_factor; // on scores for big words
1555 int32_t sp_count; // spaces
1556 FPSEGPT_LIST seg_list; // char cells
1557 FPSEGPT_IT seg_it; // iterator
1558 double sqsum; // sum of squares
1559 double spsum; // of spaces
1560 double sp_var; // space error
1561 double word_sync; // result for word
1562 double total_count; // total cuts
1563
1564 if (blob_it.empty()) {
1565 return;
1566 }
1567 row_start = blob_it;
1568 total_blob_count = 0;
1569
1570 total_count = 0;
1571 sqsum = 0;
1572 sp_count = 0;
1573 spsum = 0;
1574 prev_right = -1;
1575 blob_it = row_start;
1576 start_it = blob_it;
1577 blob_count = 0;
1578 blob_box = box_next(&blob_it); // first blob
1579 blob_it.mark_cycle_pt();
1580 do {
1581 for (; blob_count > 0; blob_count--) {
1582 box_next(&start_it);
1583 }
1584 do {
1585 prev_box = blob_box;
1586 blob_count++;
1587 blob_box = box_next(&blob_it);
1588 } while (!blob_it.cycled_list() && blob_box.left() - prev_box.right() < space_size);
1589 word_sync = check_pitch_sync2(
1590 &start_it, blob_count, static_cast<int16_t>(initial_pitch), 2, projection, projection_left,
1591 projection_right, row->xheight * textord_projection_scale, occupation, &seg_list, 0, 0);
1592 total_blob_count += blob_count;
1593 seg_it.set_to_list(&seg_list);
1594 if (prev_right >= 0) {
1595 sp_var = seg_it.data()->position() - prev_right;
1596 sp_var -= floor(sp_var / initial_pitch + 0.5) * initial_pitch;
1597 sp_var *= sp_var;
1598 spsum += sp_var;
1599 sp_count++;
1600 }
1601 seg_it.move_to_last();
1602 prev_right = seg_it.data()->position();
1604 scale_factor = (seg_list.length() - 2) / 2;
1605 if (scale_factor < 1) {
1606 scale_factor = 1;
1607 }
1608 } else {
1609 scale_factor = 1;
1610 }
1611 sqsum += word_sync * scale_factor;
1612 total_count += (seg_list.length() - 1) * scale_factor;
1613 seg_list.clear();
1614 } while (!blob_it.cycled_list());
1615 sp_sd = sp_count > 0 ? sqrt(spsum / sp_count) : 0;
1616 word_sync = total_count > 0 ? sqrt(sqsum / total_count) : space_size * 10;
1617 tprintf("new_sd=%g:sd/p=%g:new_sp_sd=%g:res=%c:", word_sync, word_sync / initial_pitch, sp_sd,
1618 word_sync < textord_words_pitchsd_threshold * initial_pitch ? 'F' : 'P');
1619
1620 start_it = row_start;
1621 blob_it = row_start;
1622 word_sync =
1623 check_pitch_sync2(&blob_it, total_blob_count, static_cast<int16_t>(initial_pitch), 2,
1624 projection, projection_left, projection_right,
1625 row->xheight * textord_projection_scale, occupation, &seg_list, 0, 0);
1626 if (occupation > 1) {
1627 word_sync /= occupation;
1628 }
1629 word_sync = sqrt(word_sync);
1630
1631#ifndef GRAPHICS_DISABLED
1632 if (textord_show_row_cuts && to_win != nullptr) {
1633 plot_fp_cells2(to_win, ScrollView::CORAL, row, &seg_list);
1634 }
1635#endif
1636 seg_list.clear();
1637 if (word_sync < textord_words_pitchsd_threshold * initial_pitch) {
1638 if (word_sync < textord_words_def_fixed * initial_pitch && !row->all_caps) {
1639 res2 = "DF";
1640 } else {
1641 res2 = "MF";
1642 }
1643 } else {
1644 res2 = word_sync < textord_words_def_prop * initial_pitch ? "MP" : "DP";
1645 }
1646 tprintf(
1647 "row_sd=%g:sd/p=%g:res=%c:N=%d:res2=%s,init pitch=%g, row_pitch=%g, "
1648 "all_caps=%d\n",
1649 word_sync, word_sync / initial_pitch,
1650 word_sync < textord_words_pitchsd_threshold * initial_pitch ? 'F' : 'P', occupation, res2,
1651 initial_pitch, row->fixed_pitch, row->all_caps);
1652}
bool textord_show_row_cuts
Definition: topitch.cpp:46

◆ print_ratings_list()

void tesseract::print_ratings_list ( const char *  msg,
BLOB_CHOICE_LIST *  ratings,
const UNICHARSET current_unicharset 
)

print_ratings_list

Send all the ratings out to the logfile.

Parameters
msgintro message
ratingslist of ratings
current_unicharsetunicharset that can be used for id-to-unichar conversion

Definition at line 804 of file ratngs.cpp.

805 {
806 if (ratings->empty()) {
807 tprintf("%s:<none>\n", msg);
808 return;
809 }
810 if (*msg != '\0') {
811 tprintf("%s\n", msg);
812 }
813 BLOB_CHOICE_IT c_it;
814 c_it.set_to_list(ratings);
815 for (c_it.mark_cycle_pt(); !c_it.cycled_list(); c_it.forward()) {
816 c_it.data()->print(&current_unicharset);
817 if (!c_it.at_last()) {
818 tprintf("\n");
819 }
820 }
821 tprintf("\n");
822 fflush(stdout);
823}

◆ PrintSegmentationStats()

void tesseract::PrintSegmentationStats ( BLOCK_LIST *  block_list)

Definition at line 407 of file ocrblock.cpp.

407 {
408 int num_blocks = 0;
409 int num_rows = 0;
410 int num_words = 0;
411 int num_blobs = 0;
412 BLOCK_IT block_it(block_list);
413 for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
414 BLOCK *block = block_it.data();
415 ++num_blocks;
416 ROW_IT row_it(block->row_list());
417 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
418 ++num_rows;
419 ROW *row = row_it.data();
420 // Iterate over all werds in the row.
421 WERD_IT werd_it(row->word_list());
422 for (werd_it.mark_cycle_pt(); !werd_it.cycled_list(); werd_it.forward()) {
423 WERD *werd = werd_it.data();
424 ++num_words;
425 num_blobs += werd->cblob_list()->length();
426 }
427 }
428 }
429 tprintf("Block list stats:\nBlocks = %d\nRows = %d\nWords = %d\nBlobs = %d\n", num_blocks,
430 num_rows, num_words, num_blobs);
431}

◆ PrintString32WithUnicodes()

std::string tesseract::PrintString32WithUnicodes ( const std::string &  str)
inline

Definition at line 32 of file normstrngs_test.h.

32 {
33 std::vector<char32> str32 = UNICHAR::UTF8ToUTF32(str.c_str());
34 std::string s = "\"";
35 s += "\" " + CodepointList(str32);
36 return s;
37}
std::string CodepointList(const std::vector< char32 > &str32)

◆ PrintStringVectorWithUnicodes()

std::string tesseract::PrintStringVectorWithUnicodes ( const std::vector< std::string > &  glyphs)
inline

Definition at line 39 of file normstrngs_test.h.

39 {
40 std::string result;
41 for (const auto &s : glyphs) {
42 result += "Glyph:";
43 result += PrintString32WithUnicodes(s) + "\n";
44 }
45 return result;
46}
std::string PrintString32WithUnicodes(const std::string &str)

◆ ProjectiveCoeffs()

int tesseract::ProjectiveCoeffs ( int  width,
int  height,
TRand randomizer,
float **  im_coeffs,
float **  box_coeffs 
)

Definition at line 263 of file degradeimage.cpp.

264 {
265 // Setup "from" points.
266 Pta *src_pts = ptaCreate(4);
267 ptaAddPt(src_pts, 0.0f, 0.0f);
268 ptaAddPt(src_pts, width, 0.0f);
269 ptaAddPt(src_pts, width, height);
270 ptaAddPt(src_pts, 0.0f, height);
271 // Extract factors from pseudo-random sequence.
272 float factors[FN_NUM_FACTORS];
273 float shear = 0.0f; // Shear is signed.
274 for (int i = 0; i < FN_NUM_FACTORS; ++i) {
275 // Everything is squared to make wild values rarer.
276 if (i == FN_SHEAR) {
277 // Shear is signed.
278 shear = randomizer->SignedRand(0.5 / 3.0);
279 shear = shear >= 0.0 ? shear * shear : -shear * shear;
280 // Keep the sheared points within the original rectangle.
281 if (shear < -factors[FN_X0]) {
282 shear = -factors[FN_X0];
283 }
284 if (shear > factors[FN_X1]) {
285 shear = factors[FN_X1];
286 }
287 factors[i] = shear;
288 } else if (i != FN_INCOLOR) {
289 factors[i] = fabs(randomizer->SignedRand(1.0));
290 if (i <= FN_Y3) {
291 factors[i] *= 5.0 / 8.0;
292 } else {
293 factors[i] *= 0.5;
294 }
295 factors[i] *= factors[i];
296 }
297 }
298 // Setup "to" points.
299 Pta *dest_pts = ptaCreate(4);
300 ptaAddPt(dest_pts, factors[FN_X0] * width, factors[FN_Y0] * height);
301 ptaAddPt(dest_pts, (1.0f - factors[FN_X1]) * width, factors[FN_Y1] * height);
302 ptaAddPt(dest_pts, (1.0f - factors[FN_X1] + shear) * width, (1 - factors[FN_Y2]) * height);
303 ptaAddPt(dest_pts, (factors[FN_X0] + shear) * width, (1 - factors[FN_Y3]) * height);
304 getProjectiveXformCoeffs(dest_pts, src_pts, im_coeffs);
305 getProjectiveXformCoeffs(src_pts, dest_pts, box_coeffs);
306 ptaDestroy(&src_pts);
307 ptaDestroy(&dest_pts);
308 return factors[FN_INCOLOR] > 0.5f ? L_BRING_IN_WHITE : L_BRING_IN_BLACK;
309}

◆ PSM_BLOCK_FIND_ENABLED()

bool tesseract::PSM_BLOCK_FIND_ENABLED ( int  pageseg_mode)
inline

Definition at line 198 of file publictypes.h.

198 {
199 return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_COLUMN;
200}

◆ PSM_COL_FIND_ENABLED()

bool tesseract::PSM_COL_FIND_ENABLED ( int  pageseg_mode)
inline

Definition at line 192 of file publictypes.h.

192 {
193 return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_AUTO;
194}

◆ PSM_LINE_FIND_ENABLED()

bool tesseract::PSM_LINE_FIND_ENABLED ( int  pageseg_mode)
inline

Definition at line 201 of file publictypes.h.

201 {
202 return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_BLOCK;
203}

◆ PSM_ORIENTATION_ENABLED()

bool tesseract::PSM_ORIENTATION_ENABLED ( int  pageseg_mode)
inline

Definition at line 189 of file publictypes.h.

189 {
190 return pageseg_mode <= PSM_AUTO || pageseg_mode == PSM_SPARSE_TEXT_OSD;
191}

◆ PSM_OSD_ENABLED()

bool tesseract::PSM_OSD_ENABLED ( int  pageseg_mode)
inline

Inline functions that act on a PageSegMode to determine whether components of layout analysis are enabled. Depend critically on the order of elements of PageSegMode. NOTE that arg is an int for compatibility with INT_PARAM.

Definition at line 186 of file publictypes.h.

186 {
187 return pageseg_mode <= PSM_AUTO_OSD || pageseg_mode == PSM_SPARSE_TEXT_OSD;
188}

◆ PSM_SPARSE()

bool tesseract::PSM_SPARSE ( int  pageseg_mode)
inline

Definition at line 195 of file publictypes.h.

195 {
196 return pageseg_mode == PSM_SPARSE_TEXT || pageseg_mode == PSM_SPARSE_TEXT_OSD;
197}

◆ PSM_WORD_FIND_ENABLED()

bool tesseract::PSM_WORD_FIND_ENABLED ( int  pageseg_mode)
inline

Definition at line 204 of file publictypes.h.

204 {
205 return (pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_LINE) ||
206 pageseg_mode == PSM_SPARSE_TEXT || pageseg_mode == PSM_SPARSE_TEXT_OSD;
207}

◆ PTIsImageType()

bool tesseract::PTIsImageType ( PolyBlockType  type)
inline

Returns true if PolyBlockType is of image type

Definition at line 75 of file publictypes.h.

75 {
78}

◆ PTIsLineType()

bool tesseract::PTIsLineType ( PolyBlockType  type)
inline

Returns true if PolyBlockType is of horizontal line type

Definition at line 71 of file publictypes.h.

71 {
72 return type == PT_HORZ_LINE || type == PT_VERT_LINE;
73}

◆ PTIsPulloutType()

bool tesseract::PTIsPulloutType ( PolyBlockType  type)
inline

Definition at line 87 of file publictypes.h.

87 {
89}

◆ PTIsTextType()

bool tesseract::PTIsTextType ( PolyBlockType  type)
inline

Returns true if PolyBlockType is of text type

Definition at line 80 of file publictypes.h.

◆ push()

TESS_API LIST tesseract::push ( LIST  list,
void *  element 
)

Definition at line 178 of file oldlist.cpp.

178 {
179 LIST t;
180
181 t = new list_rec;
182 t->node = static_cast<LIST>(element);
183 set_rest(t, list);
184 return (t);
185}
list_rec * node
Definition: oldlist.h:104

◆ push_back_new()

template<class T >
void tesseract::push_back_new ( std::vector< T > &  vector,
const T &  data 
)

Definition at line 418 of file paragraphs.cpp.

418 {
419 if (std::find(vector.begin(), vector.end(), data) == vector.end()) {
420 vector.push_back(data);
421 }
422}

◆ push_last()

TESS_API LIST tesseract::push_last ( LIST  list,
void *  item 
)

Definition at line 192 of file oldlist.cpp.

192 {
193 LIST t;
194
195 if (list != NIL_LIST) {
196 t = last(list);
197 t->next = push(NIL_LIST, item);
198 return (list);
199 } else {
200 return (push(NIL_LIST, item));
201 }
202}
LIST last(LIST var_list)
Definition: oldlist.cpp:153
list_rec * next
Definition: oldlist.h:105

◆ QueryInSearch()

int tesseract::QueryInSearch ( KDTREE tree)

◆ read_info()

bool tesseract::read_info ( TFile f,
FontInfo fi 
)

Definition at line 143 of file fontinfo.cpp.

143 {
144 uint32_t size;
145 if (!f->DeSerialize(&size)) {
146 return false;
147 }
148 char *font_name = new char[size + 1];
149 fi->name = font_name;
150 if (!f->DeSerialize(font_name, size)) {
151 return false;
152 }
153 font_name[size] = '\0';
154 return f->DeSerialize(&fi->properties);
155}
uint32_t properties
Definition: fontinfo.h:135
bool DeSerialize(std::string &data)
Definition: serialis.cpp:94

◆ read_spacing_info()

bool tesseract::read_spacing_info ( TFile f,
FontInfo fi 
)

Definition at line 163 of file fontinfo.cpp.

163 {
164 int32_t vec_size, kern_size;
165 if (!f->DeSerialize(&vec_size)) {
166 return false;
167 }
168 ASSERT_HOST(vec_size >= 0);
169 if (vec_size == 0) {
170 return true;
171 }
172 fi->init_spacing(vec_size);
173 for (int i = 0; i < vec_size; ++i) {
174 auto *fs = new FontSpacingInfo();
175 if (!f->DeSerialize(&fs->x_gap_before) || !f->DeSerialize(&fs->x_gap_after) ||
176 !f->DeSerialize(&kern_size)) {
177 delete fs;
178 return false;
179 }
180 if (kern_size < 0) { // indication of a nullptr entry in fi->spacing_vec
181 delete fs;
182 continue;
183 }
184 if (kern_size > 0 &&
185 (!f->DeSerialize(fs->kerned_unichar_ids) || !f->DeSerialize(fs->kerned_x_gaps))) {
186 delete fs;
187 return false;
188 }
189 fi->add_spacing(i, fs);
190 }
191 return true;
192}
void init_spacing(int unicharset_size)
Definition: fontinfo.h:79
void add_spacing(UNICHAR_ID uch_id, FontSpacingInfo *spacing_info)
Definition: fontinfo.h:85

◆ read_unlv_file()

bool tesseract::read_unlv_file ( std::string &  name,
int32_t  xsize,
int32_t  ysize,
BLOCK_LIST *  blocks 
)

Definition at line 36 of file blread.cpp.

41 {
42 FILE *pdfp; // file pointer
43 BLOCK *block; // current block
44 int x; // current top-down coords
45 int y;
46 int width; // of current block
47 int height;
48 BLOCK_IT block_it = blocks; // block iterator
49
50 name += UNLV_EXT; // add extension
51 if ((pdfp = fopen(name.c_str(), "rb")) == nullptr) {
52 return false; // didn't read one
53 } else {
54 while (tfscanf(pdfp, "%d %d %d %d %*s", &x, &y, &width, &height) >= 4) {
55 // make rect block
56 block = new BLOCK(name.c_str(), true, 0, 0, static_cast<int16_t>(x),
57 static_cast<int16_t>(ysize - y - height), static_cast<int16_t>(x + width),
58 static_cast<int16_t>(ysize - y));
59 // on end of list
60 block_it.add_to_end(block);
61 }
62 fclose(pdfp);
63 }
64 tprintf("UZN file %s loaded.\n", name.c_str());
65 return true;
66}
int tfscanf(FILE *stream, const char *format,...)
Definition: scanutils.cpp:189
#define UNLV_EXT
Definition: blread.cpp:28

◆ ReadAdaptedClass()

ADAPT_CLASS_STRUCT * tesseract::ReadAdaptedClass ( TFile fp)

Read an adapted class description from file and return a ptr to the adapted class.

Parameters
fpopen file to read adapted class from
Returns
Ptr to new adapted class.
Note
Globals: none

Definition at line 186 of file adaptive.cpp.

186 {
187 int NumTempProtos;
188 int NumConfigs;
189 int i;
190 ADAPT_CLASS_STRUCT *Class;
191
192 /* first read high level adapted class structure */
193 Class = new ADAPT_CLASS_STRUCT;
194 fp->FRead(Class, sizeof(ADAPT_CLASS_STRUCT), 1);
195
196 /* then read in the definitions of the permanent protos and configs */
197 Class->PermProtos = NewBitVector(MAX_NUM_PROTOS);
198 Class->PermConfigs = NewBitVector(MAX_NUM_CONFIGS);
199 fp->FRead(Class->PermProtos, sizeof(uint32_t), WordsInVectorOfSize(MAX_NUM_PROTOS));
200 fp->FRead(Class->PermConfigs, sizeof(uint32_t), WordsInVectorOfSize(MAX_NUM_CONFIGS));
201
202 /* then read in the list of temporary protos */
203 fp->FRead(&NumTempProtos, sizeof(int), 1);
204 Class->TempProtos = NIL_LIST;
205 for (i = 0; i < NumTempProtos; i++) {
206 auto TempProto = new TEMP_PROTO_STRUCT;
207 fp->FRead(TempProto, sizeof(TEMP_PROTO_STRUCT), 1);
208 Class->TempProtos = push_last(Class->TempProtos, TempProto);
209 }
210
211 /* then read in the adapted configs */
212 fp->FRead(&NumConfigs, sizeof(int), 1);
213 for (i = 0; i < NumConfigs; i++) {
214 if (test_bit(Class->PermConfigs, i)) {
215 Class->Config[i].Perm = ReadPermConfig(fp);
216 } else {
217 Class->Config[i].Temp = ReadTempConfig(fp);
218 }
219 }
220
221 return (Class);
222
223} /* ReadAdaptedClass */
PERM_CONFIG_STRUCT * ReadPermConfig(TFile *fp)
Definition: adaptive.cpp:262
TEMP_CONFIG_STRUCT * ReadTempConfig(TFile *fp)
Definition: adaptive.cpp:285
LIST push_last(LIST list, void *item)
Definition: oldlist.cpp:192
PERM_CONFIG_STRUCT * Perm
Definition: adaptive.h:52
TEMP_CONFIG_STRUCT * Temp
Definition: adaptive.h:51
ADAPTED_CONFIG Config[MAX_NUM_CONFIGS]
Definition: adaptive.h:64

◆ ReadAllBoxes()

bool tesseract::ReadAllBoxes ( int  target_page,
bool  skip_blanks,
const char *  filename,
std::vector< TBOX > *  boxes,
std::vector< std::string > *  texts,
std::vector< std::string > *  box_texts,
std::vector< int > *  pages 
)

Definition at line 76 of file boxread.cpp.

78 {
79 std::ifstream input(BoxFileName(filename).c_str(), std::ios::in | std::ios::binary);
80 if (input.fail()) {
81 tprintf("Cannot read box data from '%s'.\n", BoxFileName(filename).c_str());
82 tprintf("Does it exists?\n");
83 return false;
84 }
85 std::vector<char> box_data(std::istreambuf_iterator<char>(input), {});
86 if (box_data.empty()) {
87 tprintf("No box data found in '%s'.\n", BoxFileName(filename).c_str());
88 return false;
89 }
90 // Convert the array of bytes to a string, so it can be used by the parser.
91 box_data.push_back('\0');
92 return ReadMemBoxes(target_page, skip_blanks, &box_data[0],
93 /*continue_on_failure*/ true, boxes, texts, box_texts, pages);
94}
bool ReadMemBoxes(int target_page, bool skip_blanks, const char *box_data, bool continue_on_failure, std::vector< TBOX > *boxes, std::vector< std::string > *texts, std::vector< std::string > *box_texts, std::vector< int > *pages)
Definition: boxread.cpp:97

◆ ReadCharDescription()

TESS_API CHAR_DESC_STRUCT * tesseract::ReadCharDescription ( const FEATURE_DEFS_STRUCT FeatureDefs,
FILE *  File 
)

Read a character description from File, and return a data structure containing this information. The data is formatted as follows:

  NumberOfSets
          ShortNameForSet1 Set1
          ShortNameForSet2 Set2
          ...

Globals:

  • none
Parameters
FeatureDefsdefinitions of feature types/extractors
Fileopen text file to read character description from
Returns
Character description read from File.

Definition at line 172 of file featdefs.cpp.

172 {
173 int NumSetsToRead;
174 char ShortName[FEAT_NAME_SIZE];
175 int Type;
176
177 ASSERT_HOST(tfscanf(File, "%d", &NumSetsToRead) == 1);
178 ASSERT_HOST(NumSetsToRead >= 0);
179 ASSERT_HOST(NumSetsToRead <= FeatureDefs.NumFeatureTypes);
180
181 auto CharDesc = new CHAR_DESC_STRUCT(FeatureDefs);
182 for (; NumSetsToRead > 0; NumSetsToRead--) {
183 tfscanf(File, "%s", ShortName);
184 Type = ShortNameToFeatureType(FeatureDefs, ShortName);
185 CharDesc->FeatureSets[Type] = ReadFeatureSet(File, FeatureDefs.FeatureDesc[Type]);
186 }
187 return CharDesc;
188}
#define FEAT_NAME_SIZE
Definition: ocrfeatures.h:32
uint32_t ShortNameToFeatureType(const FEATURE_DEFS_STRUCT &FeatureDefs, const char *ShortName)
Definition: featdefs.cpp:203
FEATURE_SET ReadFeatureSet(FILE *File, const FEATURE_DESC_STRUCT *FeatureDesc)
Definition: ocrfeatures.cpp:82

◆ ReadFeatureSet()

FEATURE_SET tesseract::ReadFeatureSet ( FILE *  File,
const FEATURE_DESC_STRUCT FeatureDesc 
)

Create a new feature set of the specified type and read in the features from File. The correct text representation for a feature set is an integer which specifies the number (N) of features in a set followed by a list of N feature descriptions.

Parameters
Fileopen text file to read new feature set from
FeatureDescspecifies type of feature to read from File
Returns
New feature set read from File.

Definition at line 82 of file ocrfeatures.cpp.

82 {
83 int NumFeatures;
84 ASSERT_HOST(tfscanf(File, "%d", &NumFeatures) == 1);
85 ASSERT_HOST(NumFeatures >= 0);
86
87 auto FeatureSet = new FEATURE_SET_STRUCT(NumFeatures);
88 for (int i = 0; i < NumFeatures; i++) {
89 AddFeature(FeatureSet, ReadFeature(File, FeatureDesc));
90 }
91
92 return FeatureSet;
93}

◆ ReadFile()

TESS_UNICHARSET_TRAINING_API std::string tesseract::ReadFile ( const std::string &  filename,
FileReader  reader 
)

Definition at line 63 of file lang_model_helpers.cpp.

63 {
64 if (filename.empty()) {
65 return std::string();
66 }
67 std::vector<char> data;
68 bool read_result;
69 if (reader == nullptr) {
70 read_result = LoadDataFromFile(filename.c_str(), &data);
71 } else {
72 read_result = (*reader)(filename.c_str(), &data);
73 }
74 if (read_result) {
75 return std::string(&data[0], data.size());
76 }
77 tprintf("Failed to read data from: %s\n", filename.c_str());
78 return std::string();
79}

◆ ReadMemBoxes()

TESS_API bool tesseract::ReadMemBoxes ( int  target_page,
bool  skip_blanks,
const char *  box_data,
bool  continue_on_failure,
std::vector< TBOX > *  boxes,
std::vector< std::string > *  texts,
std::vector< std::string > *  box_texts,
std::vector< int > *  pages 
)

Definition at line 97 of file boxread.cpp.

99 {
100 std::string box_str(box_data);
101 std::vector<std::string> lines = split(box_str, '\n');
102 if (lines.empty()) {
103 return false;
104 }
105 int num_boxes = 0;
106 for (auto &line : lines) {
107 int page = 0;
108 std::string utf8_str;
109 TBOX box;
110 if (!ParseBoxFileStr(line.c_str(), &page, utf8_str, &box)) {
111 if (continue_on_failure) {
112 continue;
113 } else {
114 return false;
115 }
116 }
117 if (skip_blanks && (utf8_str == " " || utf8_str == "\t")) {
118 continue;
119 }
120 if (target_page >= 0 && page != target_page) {
121 continue;
122 }
123 if (boxes != nullptr) {
124 boxes->push_back(box);
125 }
126 if (texts != nullptr) {
127 texts->push_back(utf8_str);
128 }
129 if (box_texts != nullptr) {
130 std::string full_text;
131 MakeBoxFileStr(utf8_str.c_str(), box, target_page, full_text);
132 box_texts->push_back(full_text);
133 }
134 if (pages != nullptr) {
135 pages->push_back(page);
136 }
137 ++num_boxes;
138 }
139 return num_boxes > 0;
140}
bool ParseBoxFileStr(const char *boxfile_str, int *page_number, std::string &utf8_str, TBOX *bounding_box)
Definition: boxread.cpp:205
void MakeBoxFileStr(const char *unichar_str, const TBOX &box, int page_num, std::string &box_str)
Definition: boxread.cpp:280

◆ ReadNextBox() [1/2]

TESS_API bool tesseract::ReadNextBox ( int *  line_number,
FILE *  box_file,
std::string &  utf8_str,
TBOX bounding_box 
)

Definition at line 153 of file boxread.cpp.

153 {
154 return ReadNextBox(-1, line_number, box_file, utf8_str, bounding_box);
155}
bool ReadNextBox(int target_page, int *line_number, FILE *box_file, std::string &utf8_str, TBOX *bounding_box)
Definition: boxread.cpp:160

◆ ReadNextBox() [2/2]

TESS_API bool tesseract::ReadNextBox ( int  target_page,
int *  line_number,
FILE *  box_file,
std::string &  utf8_str,
TBOX bounding_box 
)

Definition at line 160 of file boxread.cpp.

161 {
162 int page = 0;
163 char buff[kBoxReadBufSize]; // boxfile read buffer
164 char *buffptr = buff;
165
166 while (fgets(buff, sizeof(buff) - 1, box_file)) {
167 (*line_number)++;
168
169 buffptr = buff;
170 const auto *ubuf = reinterpret_cast<const unsigned char *>(buffptr);
171 if (ubuf[0] == 0xef && ubuf[1] == 0xbb && ubuf[2] == 0xbf) {
172 buffptr += 3; // Skip unicode file designation.
173 }
174 // Check for blank lines in box file
175 if (*buffptr == '\n' || *buffptr == '\0') {
176 continue;
177 }
178 // Skip blank boxes.
179 if (*buffptr == ' ' || *buffptr == '\t') {
180 continue;
181 }
182 if (*buffptr != '\0') {
183 if (!ParseBoxFileStr(buffptr, &page, utf8_str, bounding_box)) {
184 tprintf("Box file format error on line %i; ignored\n", *line_number);
185 continue;
186 }
187 if (target_page >= 0 && target_page != page) {
188 continue; // Not on the appropriate page.
189 }
190 return true; // Successfully read a box.
191 }
192 }
193 fclose(box_file);
194 return false; // EOF
195}

◆ ReadParamDesc()

PARAM_DESC * tesseract::ReadParamDesc ( TFile fp,
uint16_t  N 
)

This routine reads textual descriptions of sets of parameters which describe the characteristics of feature dimensions.

Parameters
fpopen text file to read N parameter descriptions from
Nnumber of parameter descriptions to read
Returns
Pointer to an array of parameter descriptors.
Note
Globals: None

Definition at line 134 of file clusttool.cpp.

134 {
135 auto ParamDesc = new PARAM_DESC[N];
136 for (int i = 0; i < N; i++) {
137 const int kMaxLineSize = TOKENSIZE * 4;
138 char line[kMaxLineSize];
139 ASSERT_HOST(fp->FGets(line, kMaxLineSize) != nullptr);
140 std::istringstream stream(line);
141 // Use "C" locale (needed for float values Min, Max).
142 stream.imbue(std::locale::classic());
143 std::string linear_token;
144 stream >> linear_token;
145 std::string essential_token;
146 stream >> essential_token;
147 stream >> ParamDesc[i].Min;
148 stream >> ParamDesc[i].Max;
149 ASSERT_HOST(!stream.fail());
150 ParamDesc[i].Circular = (linear_token[0] == 'c');
151 ParamDesc[i].NonEssential = (essential_token[0] != 'e');
152 ParamDesc[i].Range = ParamDesc[i].Max - ParamDesc[i].Min;
153 ParamDesc[i].HalfRange = ParamDesc[i].Range / 2;
154 ParamDesc[i].MidRange = (ParamDesc[i].Max + ParamDesc[i].Min) / 2;
155 }
156 return (ParamDesc);
157}
#define TOKENSIZE
max size of tokens read from an input file
Definition: clusttool.cpp:29
char * FGets(char *buffer, int buffer_size)
Definition: serialis.cpp:195

◆ ReadPermConfig()

PERM_CONFIG_STRUCT * tesseract::ReadPermConfig ( TFile fp)

Read a permanent configuration description from file and return a ptr to it.

Parameters
fpopen file to read permanent config from
Returns
Ptr to new permanent configuration description.
Note
Globals: none

Definition at line 262 of file adaptive.cpp.

262 {
263 auto Config = new PERM_CONFIG_STRUCT;
264 uint8_t NumAmbigs;
265 fp->FRead(&NumAmbigs, sizeof(NumAmbigs), 1);
266 Config->Ambigs = new UNICHAR_ID[NumAmbigs + 1];
267 fp->FRead(Config->Ambigs, sizeof(UNICHAR_ID), NumAmbigs);
268 Config->Ambigs[NumAmbigs] = -1;
269 fp->FRead(&(Config->FontinfoId), sizeof(int), 1);
270
271 return (Config);
272
273} /* ReadPermConfig */
int UNICHAR_ID
Definition: unichar.h:34

◆ ReadPrototype()

PROTOTYPE * tesseract::ReadPrototype ( TFile fp,
uint16_t  N 
)

This routine reads a textual description of a prototype from the specified file.

Parameters
fpopen text file to read prototype from
Nnumber of dimensions used in prototype
Returns
List of prototypes
Note
Globals: None

Definition at line 168 of file clusttool.cpp.

168 {
169 char sig_token[TOKENSIZE], shape_token[TOKENSIZE];
170 int SampleCount;
171 int i;
172
173 const int kMaxLineSize = TOKENSIZE * 4;
174 char line[kMaxLineSize];
175 if (fp->FGets(line, kMaxLineSize) == nullptr ||
176 sscanf(line, "%" QUOTED_TOKENSIZE "s %" QUOTED_TOKENSIZE "s %d", sig_token, shape_token,
177 &SampleCount) != 3) {
178 tprintf("Invalid prototype: %s\n", line);
179 return nullptr;
180 }
181 auto Proto = new PROTOTYPE;
182 Proto->Cluster = nullptr;
183 Proto->Significant = (sig_token[0] == 's');
184
185 switch (shape_token[0]) {
186 case 's':
187 Proto->Style = spherical;
188 break;
189 case 'e':
190 Proto->Style = elliptical;
191 break;
192 case 'a':
193 Proto->Style = automatic;
194 break;
195 default:
196 tprintf("Invalid prototype style specification:%s\n", shape_token);
197 Proto->Style = elliptical;
198 }
199
200 ASSERT_HOST(SampleCount >= 0);
201 Proto->NumSamples = SampleCount;
202
203 Proto->Mean.resize(N);
204 ReadNFloats(fp, N, &Proto->Mean[0]);
205
206 switch (Proto->Style) {
207 case spherical:
208 ReadNFloats(fp, 1, &(Proto->Variance.Spherical));
209 Proto->Magnitude.Spherical = 1.0 / sqrt(2.0 * M_PI * Proto->Variance.Spherical);
210 Proto->TotalMagnitude = std::pow(Proto->Magnitude.Spherical, static_cast<float>(N));
211 Proto->LogMagnitude = log(static_cast<double>(Proto->TotalMagnitude));
212 Proto->Weight.Spherical = 1.0 / Proto->Variance.Spherical;
213 Proto->Distrib.clear();
214 break;
215 case elliptical:
216 Proto->Variance.Elliptical = new float[N];
217 ReadNFloats(fp, N, Proto->Variance.Elliptical);
218 Proto->Magnitude.Elliptical = new float[N];
219 Proto->Weight.Elliptical = new float[N];
220 Proto->TotalMagnitude = 1.0;
221 for (i = 0; i < N; i++) {
222 Proto->Magnitude.Elliptical[i] = 1.0f / sqrt(2.0f * M_PI * Proto->Variance.Elliptical[i]);
223 Proto->Weight.Elliptical[i] = 1.0f / Proto->Variance.Elliptical[i];
224 Proto->TotalMagnitude *= Proto->Magnitude.Elliptical[i];
225 }
226 Proto->LogMagnitude = log(static_cast<double>(Proto->TotalMagnitude));
227 Proto->Distrib.clear();
228 break;
229 default:
230 delete Proto;
231 tprintf("Invalid prototype style\n");
232 return nullptr;
233 }
234 return Proto;
235}
#define QUOTED_TOKENSIZE
Definition: clusttool.cpp:30

◆ ReadSampleSize()

uint16_t tesseract::ReadSampleSize ( TFile fp)

This routine reads a single integer from the specified file and checks to ensure that it is between 0 and MAXSAMPLESIZE.

Parameters
fpopen text file to read sample size from
Returns
Sample size
Note
Globals: None

Definition at line 114 of file clusttool.cpp.

114 {
115 int SampleSize = 0;
116
117 const int kMaxLineSize = 100;
118 char line[kMaxLineSize];
119 ASSERT_HOST(fp->FGets(line, kMaxLineSize) != nullptr);
120 ASSERT_HOST(sscanf(line, "%d", &SampleSize) == 1);
121 ASSERT_HOST(SampleSize >= 0 && SampleSize <= MAXSAMPLESIZE);
122 return SampleSize;
123}
#define MAXSAMPLESIZE
max num of dimensions in feature space
Definition: clusttool.cpp:31

◆ ReadTempConfig()

TEMP_CONFIG_STRUCT * tesseract::ReadTempConfig ( TFile fp)

Read a temporary configuration description from file and return a ptr to it.

Parameters
fpopen file to read temporary config from
Returns
Ptr to new temporary configuration description.
Note
Globals: none

Definition at line 285 of file adaptive.cpp.

285 {
286 auto Config = new TEMP_CONFIG_STRUCT;
287 fp->FRead(Config, sizeof(TEMP_CONFIG_STRUCT), 1);
288
289 Config->Protos = NewBitVector(Config->ProtoVectorSize * BITSINLONG);
290 fp->FRead(Config->Protos, sizeof(uint32_t), Config->ProtoVectorSize);
291
292 return (Config);
293
294} /* ReadTempConfig */
const size_t BITSINLONG
Definition: bitvec.h:31

◆ ReadTrainingSamples()

TESS_COMMON_TRAINING_API void tesseract::ReadTrainingSamples ( const FEATURE_DEFS_STRUCT feature_definitions,
const char *  feature_name,
int  max_samples,
UNICHARSET unicharset,
FILE *  file,
LIST training_samples 
)

This routine reads training samples from a file and places them into a data structure which organizes the samples by FontName and CharName. It then returns this data structure.

Parameters
fileopen text file to read samples from
feature_definitions
feature_name
max_samples
unicharset
training_samples

Definition at line 330 of file commontraining.cpp.

332 {
333 char buffer[2048];
334 char unichar[UNICHAR_LEN + 1];
335 LABELEDLIST char_sample;
336 FEATURE_SET feature_samples;
337 uint32_t feature_type = ShortNameToFeatureType(feature_definitions, feature_name);
338
339 // Zero out the font_sample_count for all the classes.
340 LIST it = *training_samples;
341 iterate(it) {
342 char_sample = reinterpret_cast<LABELEDLIST>(it->first_node());
343 char_sample->font_sample_count = 0;
344 }
345
346 while (fgets(buffer, 2048, file) != nullptr) {
347 if (buffer[0] == '\n') {
348 continue;
349 }
350
351 sscanf(buffer, "%*s %s", unichar);
352 if (unicharset != nullptr && !unicharset->contains_unichar(unichar)) {
353 unicharset->unichar_insert(unichar);
354 if (unicharset->size() > MAX_NUM_CLASSES) {
355 tprintf(
356 "Error: Size of unicharset in training is "
357 "greater than MAX_NUM_CLASSES\n");
358 exit(1);
359 }
360 }
361 char_sample = FindList(*training_samples, unichar);
362 if (char_sample == nullptr) {
363 char_sample = new LABELEDLISTNODE(unichar);
364 *training_samples = push(*training_samples, char_sample);
365 }
366 auto char_desc = ReadCharDescription(feature_definitions, file);
367 feature_samples = char_desc->FeatureSets[feature_type];
368 if (char_sample->font_sample_count < max_samples || max_samples <= 0) {
369 char_sample->List = push(char_sample->List, feature_samples);
370 char_sample->SampleCount++;
371 char_sample->font_sample_count++;
372 } else {
373 delete feature_samples;
374 }
375 for (size_t i = 0; i < char_desc->NumFeatureSets; i++) {
376 if (feature_type != i) {
377 delete char_desc->FeatureSets[i];
378 }
379 char_desc->FeatureSets[i] = nullptr;
380 }
381 delete char_desc;
382 }
383} // ReadTrainingSamples
#define UNICHAR_LEN
Definition: unichar.h:31
#define MAX_NUM_CLASSES
Definition: matchdefs.h:31
CHAR_DESC_STRUCT * ReadCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs, FILE *File)
Definition: featdefs.cpp:172
LABELEDLIST FindList(LIST List, const std::string &Label)
void unichar_insert(const char *const unichar_repr, OldUncleanUnichars old_style)
Definition: unicharset.cpp:654
bool contains_unichar(const char *const unichar_repr) const
Definition: unicharset.cpp:695
size_t size() const
Definition: unicharset.h:355

◆ RecomputeMarginsAndClearHypotheses()

void tesseract::RecomputeMarginsAndClearHypotheses ( std::vector< RowScratchRegisters > *  rows,
int  start,
int  end,
int  percentile 
)

Definition at line 1612 of file paragraphs.cpp.

1613 {
1614 if (!AcceptableRowArgs(0, 0, __func__, rows, start, end)) {
1615 return;
1616 }
1617
1618 int lmin, lmax, rmin, rmax;
1619 lmin = lmax = (*rows)[start].lmargin_ + (*rows)[start].lindent_;
1620 rmin = rmax = (*rows)[start].rmargin_ + (*rows)[start].rindent_;
1621 for (int i = start; i < end; i++) {
1622 RowScratchRegisters &sr = (*rows)[i];
1623 sr.SetUnknown();
1624 if (sr.ri_->num_words == 0) {
1625 continue;
1626 }
1627 UpdateRange(sr.lmargin_ + sr.lindent_, &lmin, &lmax);
1628 UpdateRange(sr.rmargin_ + sr.rindent_, &rmin, &rmax);
1629 }
1630 STATS lefts(lmin, lmax);
1631 STATS rights(rmin, rmax);
1632 for (int i = start; i < end; i++) {
1633 RowScratchRegisters &sr = (*rows)[i];
1634 if (sr.ri_->num_words == 0) {
1635 continue;
1636 }
1637 lefts.add(sr.lmargin_ + sr.lindent_, 1);
1638 rights.add(sr.rmargin_ + sr.rindent_, 1);
1639 }
1640 int ignorable_left = lefts.ile(ClipToRange(percentile, 0, 100) / 100.0);
1641 int ignorable_right = rights.ile(ClipToRange(percentile, 0, 100) / 100.0);
1642 for (int i = start; i < end; i++) {
1643 RowScratchRegisters &sr = (*rows)[i];
1644 int ldelta = ignorable_left - sr.lmargin_;
1645 sr.lmargin_ += ldelta;
1646 sr.lindent_ -= ldelta;
1647 int rdelta = ignorable_right - sr.rmargin_;
1648 sr.rmargin_ += rdelta;
1649 sr.rindent_ -= rdelta;
1650 }
1651}

◆ RefreshWordBlobsFromNewBlobs()

void tesseract::RefreshWordBlobsFromNewBlobs ( BLOCK_LIST *  block_list,
C_BLOB_LIST *  new_blobs,
C_BLOB_LIST *  not_found_blobs 
)

Definition at line 474 of file ocrblock.cpp.

475 {
476 // Now iterate over all the blobs in the segmentation_block_list_, and just
477 // replace the corresponding c-blobs inside the werds.
478 BLOCK_IT block_it(block_list);
479 for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
480 BLOCK *block = block_it.data();
481 if (block->pdblk.poly_block() != nullptr && !block->pdblk.poly_block()->IsText()) {
482 continue; // Don't touch non-text blocks.
483 }
484 // Iterate over all rows in the block.
485 ROW_IT row_it(block->row_list());
486 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
487 ROW *row = row_it.data();
488 // Iterate over all werds in the row.
489 WERD_IT werd_it(row->word_list());
490 WERD_LIST new_words;
491 WERD_IT new_words_it(&new_words);
492 for (werd_it.mark_cycle_pt(); !werd_it.cycled_list(); werd_it.forward()) {
493 WERD *werd = werd_it.extract();
494 WERD *new_werd = werd->ConstructWerdWithNewBlobs(new_blobs, not_found_blobs);
495 if (new_werd) {
496 // Insert this new werd into the actual row's werd-list. Remove the
497 // existing one.
498 new_words_it.add_after_then_move(new_werd);
499 delete werd;
500 } else {
501 // Reinsert the older word back, for lack of better options.
502 // This is critical since dropping the words messes up segmentation:
503 // eg. 1st word in the row might otherwise have W_FUZZY_NON turned on.
504 new_words_it.add_after_then_move(werd);
505 }
506 }
507 // Get rid of the old word list & replace it with the new one.
508 row->word_list()->clear();
509 werd_it.move_to_first();
510 werd_it.add_list_after(&new_words);
511 }
512 }
513}

◆ reject_blanks()

void tesseract::reject_blanks ( WERD_RES word)

Definition at line 182 of file reject.cpp.

182 {
183 int16_t i;
184 int16_t offset;
185
186 for (i = 0, offset = 0; word->best_choice->unichar_string()[offset] != '\0';
187 offset += word->best_choice->unichar_lengths()[i], i += 1) {
188 if (word->best_choice->unichar_string()[offset] == ' ') {
189 // rej unrecognised blobs
190 word->reject_map[i].setrej_tess_failure();
191 }
192 }
193}
const std::string & unichar_lengths() const
Definition: ratngs.h:533

◆ reject_poor_matches()

void tesseract::reject_poor_matches ( WERD_RES word)

Definition at line 208 of file reject.cpp.

208 {
209 float threshold = compute_reject_threshold(word->best_choice);
210 for (unsigned i = 0; i < word->best_choice->length(); ++i) {
211 if (word->best_choice->unichar_id(i) == UNICHAR_SPACE) {
212 word->reject_map[i].setrej_tess_failure();
213 } else if (word->best_choice->certainty(i) < threshold) {
214 word->reject_map[i].setrej_poor_match();
215 }
216 }
217}
float compute_reject_threshold(WERD_CHOICE *word)
Definition: reject.cpp:227

◆ reject_whole_page()

void tesseract::reject_whole_page ( PAGE_RES_IT page_res_it)

Definition at line 363 of file docqual.cpp.

363 {
364 page_res_it.restart_page();
365 while (page_res_it.word() != nullptr) {
366 page_res_it.word()->reject_map.rej_word_doc_rej();
367 page_res_it.forward();
368 }
369 // whole page is rejected
370 page_res_it.page_res->rejected = true;
371}
PAGE_RES * page_res
Definition: pageres.h:684
WERD_RES * forward()
Definition: pageres.h:743
WERD_RES * word() const
Definition: pageres.h:763
WERD_RES * restart_page()
Definition: pageres.h:710
void rej_word_doc_rej()
Definition: rejctmap.cpp:195

◆ remove_edgept()

void tesseract::remove_edgept ( EDGEPT point)

Definition at line 199 of file split.cpp.

199 {
200 EDGEPT *prev = point->prev;
201 EDGEPT *next = point->next;
202 // Add point's steps onto prev's steps if they are from the same outline.
203 if (prev->src_outline == point->src_outline && prev->src_outline != nullptr) {
204 prev->step_count += point->step_count;
205 }
206 prev->next = next;
207 next->prev = prev;
208 prev->vec.x = next->pos.x - prev->pos.x;
209 prev->vec.y = next->pos.y - prev->pos.y;
210 delete point;
211}

◆ RemoveInsignificantProtos()

TESS_COMMON_TRAINING_API tesseract::LIST tesseract::RemoveInsignificantProtos ( LIST  ProtoList,
bool  KeepSigProtos,
bool  KeepInsigProtos,
int  N 
)

Definition at line 544 of file commontraining.cpp.

546{
547 LIST NewProtoList = NIL_LIST;
548 auto pProtoList = ProtoList;
549 iterate(pProtoList) {
550 auto Proto = reinterpret_cast<PROTOTYPE *>(pProtoList->first_node());
551 if ((Proto->Significant && KeepSigProtos) || (!Proto->Significant && KeepInsigProtos)) {
552 auto NewProto = new PROTOTYPE;
553 NewProto->Mean = Proto->Mean;
554 NewProto->Significant = Proto->Significant;
555 NewProto->Style = Proto->Style;
556 NewProto->NumSamples = Proto->NumSamples;
557 NewProto->Cluster = nullptr;
558 NewProto->Distrib.clear();
559
560 if (Proto->Variance.Elliptical != nullptr) {
561 NewProto->Variance.Elliptical = new float[N];
562 for (int i = 0; i < N; i++) {
563 NewProto->Variance.Elliptical[i] = Proto->Variance.Elliptical[i];
564 }
565 } else {
566 NewProto->Variance.Elliptical = nullptr;
567 }
568 //---------------------------------------------
569 if (Proto->Magnitude.Elliptical != nullptr) {
570 NewProto->Magnitude.Elliptical = new float[N];
571 for (int i = 0; i < N; i++) {
572 NewProto->Magnitude.Elliptical[i] = Proto->Magnitude.Elliptical[i];
573 }
574 } else {
575 NewProto->Magnitude.Elliptical = nullptr;
576 }
577 //------------------------------------------------
578 if (Proto->Weight.Elliptical != nullptr) {
579 NewProto->Weight.Elliptical = new float[N];
580 for (int i = 0; i < N; i++) {
581 NewProto->Weight.Elliptical[i] = Proto->Weight.Elliptical[i];
582 }
583 } else {
584 NewProto->Weight.Elliptical = nullptr;
585 }
586
587 NewProto->TotalMagnitude = Proto->TotalMagnitude;
588 NewProto->LogMagnitude = Proto->LogMagnitude;
589 NewProtoList = push_last(NewProtoList, NewProto);
590 }
591 }
592 FreeProtoList(&ProtoList);
593 return (NewProtoList);
594} /* RemoveInsignificantProtos */

◆ render_blob()

void tesseract::render_blob ( ScrollView window,
TBLOB blob,
ScrollView::Color  color 
)

Definition at line 71 of file render.cpp.

71 {
72 /* No outline */
73 if (!blob) {
74 return;
75 }
76
77 render_outline(window, blob->outlines, color);
78}
void render_outline(ScrollView *window, TESSLINE *outline, ScrollView::Color color)
Definition: render.cpp:111

◆ render_edgepts()

void tesseract::render_edgepts ( ScrollView window,
EDGEPT edgept,
ScrollView::Color  color 
)

Definition at line 86 of file render.cpp.

86 {
87 if (!edgept) {
88 return;
89 }
90
91 float x = edgept->pos.x;
92 float y = edgept->pos.y;
93 EDGEPT *this_edge = edgept;
94
95 window->Pen(color);
96 window->SetCursor(x, y);
97 do {
98 this_edge = this_edge->next;
99 x = this_edge->pos.x;
100 y = this_edge->pos.y;
101 window->DrawTo(x, y);
102 } while (edgept != this_edge);
103}

◆ render_outline()

void tesseract::render_outline ( ScrollView window,
TESSLINE outline,
ScrollView::Color  color 
)

Definition at line 111 of file render.cpp.

111 {
112 /* No outline */
113 if (!outline) {
114 return;
115 }
116 /* Draw Compact outline */
117 if (outline->loop) {
118 render_edgepts(window, outline->loop, color);
119 }
120 /* Add on next outlines */
121 render_outline(window, outline->next, color);
122}

◆ RenderIntFeature()

TESS_API void tesseract::RenderIntFeature ( ScrollView window,
const INT_FEATURE_STRUCT Feature,
ScrollView::Color  color 
)

This routine renders the specified feature into ShapeList.

Parameters
windowto add feature rendering to
Featurefeature to be rendered
colorcolor to use for feature rendering
Returns
New shape list with rendering of Feature added.
Note
Globals: none

Definition at line 1500 of file intproto.cpp.

1501 {
1502 float X, Y, Dx, Dy, Length;
1503
1504 window->Pen(color);
1505 assert(Feature != nullptr);
1506 assert(color != 0);
1507
1508 X = Feature->X;
1509 Y = Feature->Y;
1510 Length = GetPicoFeatureLength() * 0.7 * INT_CHAR_NORM_RANGE;
1511 // The -PI has no significant effect here, but the value of Theta is computed
1512 // using BinaryAnglePlusPi in intfx.cpp.
1513 Dx = (Length / 2.0) * cos((Feature->Theta / 256.0) * 2.0 * M_PI - M_PI);
1514 Dy = (Length / 2.0) * sin((Feature->Theta / 256.0) * 2.0 * M_PI - M_PI);
1515
1516 window->SetCursor(X, Y);
1517 window->DrawTo(X + Dx, Y + Dy);
1518} /* RenderIntFeature */

◆ RenderIntProto()

void tesseract::RenderIntProto ( ScrollView window,
INT_CLASS_STRUCT Class,
PROTO_ID  ProtoId,
ScrollView::Color  color 
)

This routine extracts the parameters of the specified proto from the class description and adds a rendering of the proto onto the ShapeList.

Parameters
windowScrollView instance
Classclass that proto is contained in
ProtoIdid of proto to be rendered
colorcolor to render proto in

Globals: none

Returns
New shape list with a rendering of one proto added.

Definition at line 1534 of file intproto.cpp.

1535 {
1536 INT_PROTO_STRUCT *Proto;
1537 int ProtoSetIndex;
1538 int ProtoWordIndex;
1539 float Length;
1540 int Xmin, Xmax, Ymin, Ymax;
1541 float X, Y, Dx, Dy;
1542 uint32_t ProtoMask;
1543 int Bucket;
1544
1545 assert(ProtoId >= 0);
1546 assert(Class != nullptr);
1547 assert(ProtoId < Class->NumProtos);
1548 assert(color != 0);
1549 window->Pen(color);
1550
1551 auto ProtoSet = Class->ProtoSets[SetForProto(ProtoId)];
1552 ProtoSetIndex = IndexForProto(ProtoId);
1553 Proto = &(ProtoSet->Protos[ProtoSetIndex]);
1554 Length = (Class->ProtoLengths[ProtoId] * GetPicoFeatureLength() * INT_CHAR_NORM_RANGE);
1555 ProtoMask = PPrunerMaskFor(ProtoId);
1556 ProtoWordIndex = PPrunerWordIndexFor(ProtoId);
1557
1558 // find the x and y extent of the proto from the proto pruning table
1559 Xmin = Ymin = NUM_PP_BUCKETS;
1560 Xmax = Ymax = 0;
1561 for (Bucket = 0; Bucket < NUM_PP_BUCKETS; Bucket++) {
1562 if (ProtoMask & ProtoSet->ProtoPruner[PRUNER_X][Bucket][ProtoWordIndex]) {
1563 UpdateRange(Bucket, &Xmin, &Xmax);
1564 }
1565
1566 if (ProtoMask & ProtoSet->ProtoPruner[PRUNER_Y][Bucket][ProtoWordIndex]) {
1567 UpdateRange(Bucket, &Ymin, &Ymax);
1568 }
1569 }
1570 X = (Xmin + Xmax + 1) / 2.0 * PROTO_PRUNER_SCALE;
1571 Y = (Ymin + Ymax + 1) / 2.0 * PROTO_PRUNER_SCALE;
1572 // The -PI has no significant effect here, but the value of Theta is computed
1573 // using BinaryAnglePlusPi in intfx.cpp.
1574 Dx = (Length / 2.0) * cos((Proto->Angle / 256.0) * 2.0 * M_PI - M_PI);
1575 Dy = (Length / 2.0) * sin((Proto->Angle / 256.0) * 2.0 * M_PI - M_PI);
1576
1577 window->SetCursor(X - Dx, Y - Dy);
1578 window->DrawTo(X + Dx, Y + Dy);
1579} /* RenderIntProto */
#define PPrunerWordIndexFor(I)
Definition: intproto.h:149
#define PPrunerMaskFor(I)
Definition: intproto.h:151
#define PROTO_PRUNER_SCALE
Definition: intproto.cpp:50

◆ restore_underlined_blobs()

void tesseract::restore_underlined_blobs ( TO_BLOCK block)

Definition at line 32 of file underlin.cpp.

34 {
35 int16_t chop_coord; // chop boundary
36 TBOX blob_box; // of underline
37 BLOBNBOX *u_line; // underline bit
38 TO_ROW *row; // best row for blob
39 ICOORDELT_LIST chop_cells; // blobs to cut out
40 // real underlines
41 BLOBNBOX_LIST residual_underlines;
42 C_OUTLINE_LIST left_coutlines;
43 C_OUTLINE_LIST right_coutlines;
44 ICOORDELT_IT cell_it = &chop_cells;
45 // under lines
46 BLOBNBOX_IT under_it = &block->underlines;
47 BLOBNBOX_IT ru_it = &residual_underlines;
48
49 if (block->get_rows()->empty()) {
50 return; // Don't crash if there are no rows.
51 }
52 for (under_it.mark_cycle_pt(); !under_it.cycled_list(); under_it.forward()) {
53 u_line = under_it.extract();
54 blob_box = u_line->bounding_box();
55 row = most_overlapping_row(block->get_rows(), u_line);
56 if (row == nullptr) {
57 return; // Don't crash if there is no row.
58 }
59 find_underlined_blobs(u_line, &row->baseline, row->xheight,
60 row->xheight * textord_underline_offset, &chop_cells);
61 cell_it.set_to_list(&chop_cells);
62 for (cell_it.mark_cycle_pt(); !cell_it.cycled_list(); cell_it.forward()) {
63 chop_coord = cell_it.data()->x();
64 if (cell_it.data()->y() - chop_coord > textord_fp_chop_error + 1) {
65 split_to_blob(u_line, chop_coord, textord_fp_chop_error + 0.5, &left_coutlines,
66 &right_coutlines);
67 if (!left_coutlines.empty()) {
68 ru_it.add_after_then_move(new BLOBNBOX(new C_BLOB(&left_coutlines)));
69 }
70 chop_coord = cell_it.data()->y();
71 split_to_blob(nullptr, chop_coord, textord_fp_chop_error + 0.5, &left_coutlines,
72 &right_coutlines);
73 if (!left_coutlines.empty()) {
74 row->insert_blob(new BLOBNBOX(new C_BLOB(&left_coutlines)));
75 }
76 u_line = nullptr; // no more blobs to add
77 }
78 delete cell_it.extract();
79 }
80 if (!right_coutlines.empty()) {
81 split_to_blob(nullptr, blob_box.right(), textord_fp_chop_error + 0.5, &left_coutlines,
82 &right_coutlines);
83 if (!left_coutlines.empty()) {
84 ru_it.add_after_then_move(new BLOBNBOX(new C_BLOB(&left_coutlines)));
85 }
86 }
87 delete u_line;
88 }
89 if (!ru_it.empty()) {
90 ru_it.move_to_first();
91 for (ru_it.mark_cycle_pt(); !ru_it.cycled_list(); ru_it.forward()) {
92 under_it.add_after_then_move(ru_it.extract());
93 }
94 }
95}
TO_ROW * most_overlapping_row(TO_ROW_LIST *rows, BLOBNBOX *blob)
Definition: underlin.cpp:103
double textord_underline_offset
Definition: underlin.cpp:23
void find_underlined_blobs(BLOBNBOX *u_line, QSPLINE *baseline, float xheight, float baseline_offset, ICOORDELT_LIST *chop_cells)
Definition: underlin.cpp:158
void insert_blob(BLOBNBOX *blob)
Definition: blobbox.cpp:773
BLOBNBOX_LIST underlines
Definition: blobbox.h:777

◆ Reverse32()

void tesseract::Reverse32 ( void *  ptr)
inline

Definition at line 196 of file helpers.h.

196 {
197 ReverseN(ptr, 4);
198}

◆ ReverseN()

void tesseract::ReverseN ( void *  ptr,
int  num_bytes 
)
inline

Definition at line 184 of file helpers.h.

184 {
185 assert(num_bytes == 1 || num_bytes == 2 || num_bytes == 4 || num_bytes == 8);
186 char *cptr = static_cast<char *>(ptr);
187 int halfsize = num_bytes / 2;
188 for (int i = 0; i < halfsize; ++i) {
189 char tmp = cptr[i];
190 cptr[i] = cptr[num_bytes - 1 - i];
191 cptr[num_bytes - 1 - i] = tmp;
192 }
193}

◆ RightWordAttributes()

TESS_API void tesseract::RightWordAttributes ( const UNICHARSET unicharset,
const WERD_CHOICE werd,
const std::string &  utf8,
bool *  is_list,
bool *  starts_idea,
bool *  ends_idea 
)

Definition at line 477 of file paragraphs.cpp.

478 {
479 *is_list = false;
480 *starts_idea = false;
481 *ends_idea = false;
482 if (utf8.empty() || (werd != nullptr && werd->empty())) { // Empty
483 *ends_idea = true;
484 return;
485 }
486
487 if (unicharset && werd) { // We have a proper werd and unicharset so use it.
488 if (UniLikelyListItem(unicharset, werd)) {
489 *is_list = true;
490 *starts_idea = true;
491 }
492 UNICHAR_ID last_letter = werd->unichar_id(werd->length() - 1);
493 if (unicharset->get_ispunctuation(last_letter)) {
494 *ends_idea = true;
495 }
496 } else { // Assume utf8 is mostly ASCII
497 if (AsciiLikelyListItem(utf8)) {
498 *is_list = true;
499 *starts_idea = true;
500 }
501 int last_letter = utf8[utf8.size() - 1];
502 if (IsOpeningPunct(last_letter) || IsTerminalPunct(last_letter)) {
503 *ends_idea = true;
504 }
505 }
506}

◆ RoundUp()

int tesseract::RoundUp ( int  n,
int  block_size 
)
inline

Definition at line 99 of file helpers.h.

99 {
100 return block_size * ((n + block_size - 1) / block_size);
101}

◆ row_pitch_stats()

bool tesseract::row_pitch_stats ( TO_ROW row,
int32_t  maxwidth,
bool  testing_on 
)

Definition at line 648 of file topitch.cpp.

652 {
653 BLOBNBOX *blob; // current blob
654 int gap_index; // current gap
655 int32_t prev_x; // end of prev blob
656 int32_t cluster_count; // no of clusters
657 int32_t prev_count; // of clusters
658 int32_t smooth_factor; // for smoothing stats
659 TBOX blob_box; // bounding box
660 float lower, upper; // cluster thresholds
661 // gap sizes
662 float gaps[BLOCK_STATS_CLUSTERS];
663 // blobs
664 BLOBNBOX_IT blob_it = row->blob_list();
665 STATS gap_stats(0, maxwidth - 1);
666 STATS cluster_stats[BLOCK_STATS_CLUSTERS + 1];
667 // clusters
668
669 smooth_factor = static_cast<int32_t>(row->xheight * textord_wordstats_smooth_factor + 1.5);
670 if (!blob_it.empty()) {
671 prev_x = blob_it.data()->bounding_box().right();
672 blob_it.forward();
673 while (!blob_it.at_first()) {
674 blob = blob_it.data();
675 if (!blob->joined_to_prev()) {
676 blob_box = blob->bounding_box();
677 if (blob_box.left() - prev_x < maxwidth) {
678 gap_stats.add(blob_box.left() - prev_x, 1);
679 }
680 prev_x = blob_box.right();
681 }
682 blob_it.forward();
683 }
684 }
685 if (gap_stats.get_total() == 0) {
686 return false;
687 }
688 cluster_count = 0;
689 lower = row->xheight * words_initial_lower;
690 upper = row->xheight * words_initial_upper;
691 gap_stats.smooth(smooth_factor);
692 do {
693 prev_count = cluster_count;
694 cluster_count = gap_stats.cluster(lower, upper, textord_spacesize_ratioprop,
695 BLOCK_STATS_CLUSTERS, cluster_stats);
696 } while (cluster_count > prev_count && cluster_count < BLOCK_STATS_CLUSTERS);
697 if (cluster_count < 1) {
698 return false;
699 }
700 for (gap_index = 0; gap_index < cluster_count; gap_index++) {
701 gaps[gap_index] = cluster_stats[gap_index + 1].ile(0.5);
702 }
703 // get medians
704 if (testing_on) {
705 tprintf("cluster_count=%d:", cluster_count);
706 for (gap_index = 0; gap_index < cluster_count; gap_index++) {
707 tprintf(" %g(%d)", gaps[gap_index], cluster_stats[gap_index + 1].get_total());
708 }
709 tprintf("\n");
710 }
711 qsort(gaps, cluster_count, sizeof(float), sort_floats);
712
713 // Try to find proportional non-space and space for row.
715 upper = row->xheight * textord_words_min_minspace;
716 for (gap_index = 0; gap_index < cluster_count && gaps[gap_index] < lower; gap_index++) {
717 ;
718 }
719 if (gap_index == 0) {
720 if (testing_on) {
721 tprintf("No clusters below nonspace threshold!!\n");
722 }
723 if (cluster_count > 1) {
724 row->pr_nonsp = gaps[0];
725 row->pr_space = gaps[1];
726 } else {
727 row->pr_nonsp = lower;
728 row->pr_space = gaps[0];
729 }
730 } else {
731 row->pr_nonsp = gaps[gap_index - 1];
732 while (gap_index < cluster_count && gaps[gap_index] < upper) {
733 gap_index++;
734 }
735 if (gap_index == cluster_count) {
736 if (testing_on) {
737 tprintf("No clusters above nonspace threshold!!\n");
738 }
740 } else {
741 row->pr_space = gaps[gap_index];
742 }
743 }
744
745 // Now try to find the fixed pitch space and non-space.
746 upper = row->xheight * words_default_fixed_space;
747 for (gap_index = 0; gap_index < cluster_count && gaps[gap_index] < upper; gap_index++) {
748 ;
749 }
750 if (gap_index == 0) {
751 if (testing_on) {
752 tprintf("No clusters below space threshold!!\n");
753 }
754 row->fp_nonsp = upper;
755 row->fp_space = gaps[0];
756 } else {
757 row->fp_nonsp = gaps[gap_index - 1];
758 if (gap_index == cluster_count) {
759 if (testing_on) {
760 tprintf("No clusters above space threshold!!\n");
761 }
762 row->fp_space = row->xheight;
763 } else {
764 row->fp_space = gaps[gap_index];
765 }
766 }
767 if (testing_on) {
768 tprintf(
769 "Initial estimates:pr_nonsp=%g, pr_space=%g, fp_nonsp=%g, "
770 "fp_space=%g\n",
771 row->pr_nonsp, row->pr_space, row->fp_nonsp, row->fp_space);
772 }
773 return true; // computed some stats
774}
#define BLOCK_STATS_CLUSTERS
Definition: topitch.cpp:52
double words_initial_upper
Definition: tovars.cpp:47
double textord_wordstats_smooth_factor
Definition: tovars.cpp:31
double words_initial_lower
Definition: tovars.cpp:46
double words_default_fixed_space
Definition: tovars.cpp:49
double textord_words_min_minspace
Definition: tovars.cpp:35

◆ row_words()

int32_t tesseract::row_words ( TO_BLOCK block,
TO_ROW row,
int32_t  maxwidth,
FCOORD  rotation,
bool  testing_on 
)

Definition at line 168 of file wordseg.cpp.

174 {
175 bool testing_row; // contains testpt
176 bool prev_valid; // if decent size
177 int32_t prev_x; // end of prev blob
178 int32_t cluster_count; // no of clusters
179 int32_t gap_index; // which cluster
180 int32_t smooth_factor; // for smoothing stats
181 BLOBNBOX *blob; // current blob
182 float lower, upper; // clustering parameters
183 float gaps[3]; // gap clusers
184 ICOORD testpt;
185 TBOX blob_box; // bounding box
186 // iterator
187 BLOBNBOX_IT blob_it = row->blob_list();
188 STATS gap_stats(0, maxwidth - 1);
189 STATS cluster_stats[4]; // clusters
190
192 smooth_factor = static_cast<int32_t>(block->xheight * textord_wordstats_smooth_factor + 1.5);
193 // if (testing_on)
194 // tprintf("Row smooth factor=%d\n",smooth_factor);
195 prev_valid = false;
196 prev_x = -INT32_MAX;
197 testing_row = false;
198 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
199 blob = blob_it.data();
200 blob_box = blob->bounding_box();
201 if (blob_box.contains(testpt)) {
202 testing_row = true;
203 }
204 gap_stats.add(blob_box.width(), 1);
205 }
206 gap_stats.clear();
207 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
208 blob = blob_it.data();
209 if (!blob->joined_to_prev()) {
210 blob_box = blob->bounding_box();
211 if (prev_valid && blob_box.left() - prev_x < maxwidth) {
212 gap_stats.add(blob_box.left() - prev_x, 1);
213 }
214 prev_valid = true;
215 prev_x = blob_box.right();
216 }
217 }
218 if (gap_stats.get_total() == 0) {
219 row->min_space = 0; // no evidence
220 row->max_nonspace = 0;
221 return 0;
222 }
223 gap_stats.smooth(smooth_factor);
226 cluster_count = gap_stats.cluster(lower, upper, textord_spacesize_ratioprop, 3, cluster_stats);
227 while (cluster_count < 2 && std::ceil(lower) < std::floor(upper)) {
228 // shrink gap
229 upper = (upper * 3 + lower) / 4;
230 lower = (lower * 3 + upper) / 4;
231 cluster_count = gap_stats.cluster(lower, upper, textord_spacesize_ratioprop, 3, cluster_stats);
232 }
233 if (cluster_count < 2) {
234 row->min_space = 0; // no evidence
235 row->max_nonspace = 0;
236 return 0;
237 }
238 for (gap_index = 0; gap_index < cluster_count; gap_index++) {
239 gaps[gap_index] = cluster_stats[gap_index + 1].ile(0.5);
240 }
241 // get medians
242 if (cluster_count > 2) {
243 if (testing_on && textord_show_initial_words) {
244 tprintf("Row at %g has 3 sizes of gap:%g,%g,%g\n", row->intercept(),
245 cluster_stats[1].ile(0.5), cluster_stats[2].ile(0.5), cluster_stats[3].ile(0.5));
246 }
247 lower = gaps[0];
248 if (gaps[1] > lower) {
249 upper = gaps[1]; // prefer most frequent
250 if (upper < block->xheight * textord_words_min_minspace && gaps[2] > gaps[1]) {
251 upper = gaps[2];
252 }
253 } else if (gaps[2] > lower && gaps[2] >= block->xheight * textord_words_min_minspace) {
254 upper = gaps[2];
255 } else if (lower >= block->xheight * textord_words_min_minspace) {
256 upper = lower; // not nice
257 lower = gaps[1];
258 if (testing_on && textord_show_initial_words) {
259 tprintf("Had to switch most common from lower to upper!!\n");
260 gap_stats.print();
261 }
262 } else {
263 row->min_space = 0; // no evidence
264 row->max_nonspace = 0;
265 return 0;
266 }
267 } else {
268 if (gaps[1] < gaps[0]) {
269 if (testing_on && textord_show_initial_words) {
270 tprintf("Had to switch most common from lower to upper!!\n");
271 gap_stats.print();
272 }
273 lower = gaps[1];
274 upper = gaps[0];
275 } else {
276 upper = gaps[1];
277 lower = gaps[0];
278 }
279 }
280 if (upper < block->xheight * textord_words_min_minspace) {
281 row->min_space = 0; // no evidence
282 row->max_nonspace = 0;
283 return 0;
284 }
285 if (upper * 3 < block->min_space * 2 + block->max_nonspace ||
286 lower * 3 > block->min_space * 2 + block->max_nonspace) {
287 if (testing_on && textord_show_initial_words) {
288 tprintf("Disagreement between block and row at %g!!\n", row->intercept());
289 tprintf("Lower=%g, upper=%g, Stats:\n", lower, upper);
290 gap_stats.print();
291 }
292 }
293 row->min_space =
294 static_cast<int32_t>(ceil(upper - (upper - lower) * textord_words_definite_spread));
295 row->max_nonspace =
296 static_cast<int32_t>(floor(lower + (upper - lower) * textord_words_definite_spread));
297 row->space_threshold = (row->max_nonspace + row->min_space) / 2;
298 row->space_size = upper;
299 row->kern_size = lower;
300 if (testing_on && textord_show_initial_words) {
301 if (testing_row) {
302 tprintf("GAP STATS\n");
303 gap_stats.print();
304 tprintf("SPACE stats\n");
305 cluster_stats[2].print_summary();
306 tprintf("NONSPACE stats\n");
307 cluster_stats[1].print_summary();
308 }
309 tprintf("Row at %g has minspace=%d(%g), max_non=%d(%g)\n", row->intercept(), row->min_space,
310 upper, row->max_nonspace, lower);
311 }
312 return cluster_stats[2].get_total();
313}
double textord_words_initial_upper
Definition: tovars.cpp:38
double textord_words_initial_lower
Definition: tovars.cpp:37
double textord_words_definite_spread
Definition: tovars.cpp:51
void print_summary() const
Definition: statistc.cpp:572

◆ row_words2()

int32_t tesseract::row_words2 ( TO_BLOCK block,
TO_ROW row,
int32_t  maxwidth,
FCOORD  rotation,
bool  testing_on 
)

Definition at line 321 of file wordseg.cpp.

327 {
328 bool prev_valid; // if decent size
329 bool this_valid; // current blob big enough
330 int32_t prev_x; // end of prev blob
331 int32_t min_width; // min interesting width
332 int32_t valid_count; // good gaps
333 int32_t total_count; // total gaps
334 int32_t cluster_count; // no of clusters
335 int32_t prev_count; // previous cluster_count
336 int32_t gap_index; // which cluster
337 int32_t smooth_factor; // for smoothing stats
338 BLOBNBOX *blob; // current blob
339 float lower, upper; // clustering parameters
340 ICOORD testpt;
341 TBOX blob_box; // bounding box
342 // iterator
343 BLOBNBOX_IT blob_it = row->blob_list();
344 STATS gap_stats(0, maxwidth - 1);
345 // gap sizes
346 float gaps[BLOCK_STATS_CLUSTERS];
347 STATS cluster_stats[BLOCK_STATS_CLUSTERS + 1];
348 // clusters
349
351 smooth_factor = static_cast<int32_t>(block->xheight * textord_wordstats_smooth_factor + 1.5);
352 // if (testing_on)
353 // tprintf("Row smooth factor=%d\n",smooth_factor);
354 prev_valid = false;
355 prev_x = -INT16_MAX;
356 const bool testing_row = false;
357 // min blob size
358 min_width = static_cast<int32_t>(block->pr_space);
359 total_count = 0;
360 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
361 blob = blob_it.data();
362 if (!blob->joined_to_prev()) {
363 blob_box = blob->bounding_box();
364 this_valid = blob_box.width() >= min_width;
365 if (this_valid && prev_valid && blob_box.left() - prev_x < maxwidth) {
366 gap_stats.add(blob_box.left() - prev_x, 1);
367 }
368 total_count++; // count possibles
369 prev_x = blob_box.right();
370 prev_valid = this_valid;
371 }
372 }
373 valid_count = gap_stats.get_total();
374 if (valid_count < total_count * textord_words_minlarge) {
375 gap_stats.clear();
376 prev_x = -INT16_MAX;
377 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
378 blob = blob_it.data();
379 if (!blob->joined_to_prev()) {
380 blob_box = blob->bounding_box();
381 if (blob_box.left() - prev_x < maxwidth) {
382 gap_stats.add(blob_box.left() - prev_x, 1);
383 }
384 prev_x = blob_box.right();
385 }
386 }
387 }
388 if (gap_stats.get_total() == 0) {
389 row->min_space = 0; // no evidence
390 row->max_nonspace = 0;
391 return 0;
392 }
393
394 cluster_count = 0;
395 lower = block->xheight * words_initial_lower;
396 upper = block->xheight * words_initial_upper;
397 gap_stats.smooth(smooth_factor);
398 do {
399 prev_count = cluster_count;
400 cluster_count = gap_stats.cluster(lower, upper, textord_spacesize_ratioprop,
401 BLOCK_STATS_CLUSTERS, cluster_stats);
402 } while (cluster_count > prev_count && cluster_count < BLOCK_STATS_CLUSTERS);
403 if (cluster_count < 1) {
404 row->min_space = 0;
405 row->max_nonspace = 0;
406 return 0;
407 }
408 for (gap_index = 0; gap_index < cluster_count; gap_index++) {
409 gaps[gap_index] = cluster_stats[gap_index + 1].ile(0.5);
410 }
411 // get medians
412 if (testing_on) {
413 tprintf("cluster_count=%d:", cluster_count);
414 for (gap_index = 0; gap_index < cluster_count; gap_index++) {
415 tprintf(" %g(%d)", gaps[gap_index], cluster_stats[gap_index + 1].get_total());
416 }
417 tprintf("\n");
418 }
419
420 // Try to find proportional non-space and space for row.
421 for (gap_index = 0; gap_index < cluster_count && gaps[gap_index] > block->max_nonspace;
422 gap_index++) {
423 ;
424 }
425 if (gap_index < cluster_count) {
426 lower = gaps[gap_index]; // most frequent below
427 } else {
428 if (testing_on) {
429 tprintf("No cluster below block threshold!, using default=%g\n", block->pr_nonsp);
430 }
431 lower = block->pr_nonsp;
432 }
433 for (gap_index = 0; gap_index < cluster_count && gaps[gap_index] <= block->max_nonspace;
434 gap_index++) {
435 ;
436 }
437 if (gap_index < cluster_count) {
438 upper = gaps[gap_index]; // most frequent above
439 } else {
440 if (testing_on) {
441 tprintf("No cluster above block threshold!, using default=%g\n", block->pr_space);
442 }
443 upper = block->pr_space;
444 }
445 row->min_space =
446 static_cast<int32_t>(ceil(upper - (upper - lower) * textord_words_definite_spread));
447 row->max_nonspace =
448 static_cast<int32_t>(floor(lower + (upper - lower) * textord_words_definite_spread));
449 row->space_threshold = (row->max_nonspace + row->min_space) / 2;
450 row->space_size = upper;
451 row->kern_size = lower;
452 if (testing_on) {
453 if (testing_row) {
454 tprintf("GAP STATS\n");
455 gap_stats.print();
456 tprintf("SPACE stats\n");
457 cluster_stats[2].print_summary();
458 tprintf("NONSPACE stats\n");
459 cluster_stats[1].print_summary();
460 }
461 tprintf("Row at %g has minspace=%d(%g), max_non=%d(%g)\n", row->intercept(), row->min_space,
462 upper, row->max_nonspace, lower);
463 }
464 return 1;
465}
#define BLOCK_STATS_CLUSTERS
Definition: wordseg.cpp:44
double textord_words_minlarge
Definition: tovars.cpp:39

◆ RowsFitModel()

bool tesseract::RowsFitModel ( const std::vector< RowScratchRegisters > *  rows,
int  start,
int  end,
const ParagraphModel model 
)

Definition at line 1859 of file paragraphs.cpp.

1860 {
1861 if (!AcceptableRowArgs(0, 1, __func__, rows, start, end)) {
1862 return false;
1863 }
1864 if (!ValidFirstLine(rows, start, model)) {
1865 return false;
1866 }
1867 for (int i = start + 1; i < end; i++) {
1868 if (!ValidBodyLine(rows, i, model)) {
1869 return false;
1870 }
1871 }
1872 return true;
1873}
bool ValidBodyLine(const std::vector< RowScratchRegisters > *rows, int row, const ParagraphModel *model)
bool ValidFirstLine(const std::vector< RowScratchRegisters > *rows, int row, const ParagraphModel *model)

◆ SaveDataToFile() [1/2]

bool tesseract::SaveDataToFile ( const GenericVector< char > &  data,
const char *  filename 
)
inline

Definition at line 254 of file genericvector.h.

254 {
255 FILE *fp = fopen(filename, "wb");
256 if (fp == nullptr) {
257 return false;
258 }
259 bool result = fwrite(&data[0], 1, data.size(), fp) == data.size();
260 fclose(fp);
261 return result;
262}
unsigned size() const
Definition: genericvector.h:70

◆ SaveDataToFile() [2/2]

TESS_API bool tesseract::SaveDataToFile ( const std::vector< char > &  data,
const char *  filename 
)

Definition at line 53 of file serialis.cpp.

53 {
54 FILE *fp = fopen(filename, "wb");
55 if (fp == nullptr) {
56 return false;
57 }
58 bool result = fwrite(&data[0], 1, data.size(), fp) == data.size();
59 fclose(fp);
60 return result;
61}

◆ ScriptPosToString()

const char * tesseract::ScriptPosToString ( enum ScriptPos  script_pos)

Definition at line 193 of file ratngs.cpp.

193 {
194 switch (script_pos) {
195 case SP_NORMAL:
196 return "NORM";
197 case SP_SUBSCRIPT:
198 return "SUB";
199 case SP_SUPERSCRIPT:
200 return "SUPER";
201 case SP_DROPCAP:
202 return "DROPC";
203 }
204 return "SP_UNKNOWN";
205}

◆ search()

LIST tesseract::search ( LIST  list,
void *  key,
int_compare  is_equal 
)

Definition at line 211 of file oldlist.cpp.

211 {
212 if (is_equal == nullptr) {
213 is_equal = is_same;
214 }
215
216 iterate(list) if ((*is_equal)(list->first_node(), key)) return list;
217 return (NIL_LIST);
218}

◆ segment_baseline()

bool tesseract::segment_baseline ( TO_ROW row,
TO_BLOCK block,
int32_t &  segments,
int32_t *  xstarts 
)

Definition at line 2083 of file makerow.cpp.

2088 {
2089 bool needs_curve; // needs curved line
2090 int blobcount; // no of blobs
2091 int blobindex; // current blob
2092 int last_state; // above, on , below
2093 int state; // of current blob
2094 float yshift; // from baseline
2095 TBOX box; // blob box
2096 TBOX new_box; // new_it box
2097 float middle; // xcentre of blob
2098 // blobs
2099 BLOBNBOX_IT blob_it = row->blob_list();
2100 BLOBNBOX_IT new_it = blob_it; // front end
2101 SORTED_FLOATS yshifts; // shifts from baseline
2102
2103 needs_curve = false;
2104 box = box_next_pre_chopped(&blob_it);
2105 xstarts[0] = box.left();
2106 segments = 1;
2107 blobcount = row->blob_list()->length();
2108 if (textord_oldbl_debug) {
2109 tprintf("Segmenting baseline of %d blobs at (%d,%d)\n", blobcount, box.left(), box.bottom());
2110 }
2111 if (blobcount <= textord_spline_medianwin || blobcount < textord_spline_minblobs) {
2112 blob_it.move_to_last();
2113 box = blob_it.data()->bounding_box();
2114 xstarts[1] = box.right();
2115 return false;
2116 }
2117 last_state = 0;
2118 new_it.mark_cycle_pt();
2119 for (blobindex = 0; blobindex < textord_spline_medianwin; blobindex++) {
2120 new_box = box_next_pre_chopped(&new_it);
2121 middle = (new_box.left() + new_box.right()) / 2.0;
2122 yshift = new_box.bottom() - row->line_m() * middle - row->line_c();
2123 // record shift
2124 yshifts.add(yshift, blobindex);
2125 if (new_it.cycled_list()) {
2126 xstarts[1] = new_box.right();
2127 return false;
2128 }
2129 }
2130 for (blobcount = 0; blobcount < textord_spline_medianwin / 2; blobcount++) {
2131 box = box_next_pre_chopped(&blob_it);
2132 }
2133 do {
2134 new_box = box_next_pre_chopped(&new_it);
2135 // get middle one
2136 yshift = yshifts[textord_spline_medianwin / 2];
2137 if (yshift > textord_spline_shift_fraction * block->line_size) {
2138 state = 1;
2139 } else if (-yshift > textord_spline_shift_fraction * block->line_size) {
2140 state = -1;
2141 } else {
2142 state = 0;
2143 }
2144 if (state != 0) {
2145 needs_curve = true;
2146 }
2147 // tprintf("State=%d, prev=%d, shift=%g\n",
2148 // state,last_state,yshift);
2149 if (state != last_state && blobcount > textord_spline_minblobs) {
2150 xstarts[segments++] = box.left();
2151 blobcount = 0;
2152 }
2153 last_state = state;
2154 yshifts.remove(blobindex - textord_spline_medianwin);
2155 box = box_next_pre_chopped(&blob_it);
2156 middle = (new_box.left() + new_box.right()) / 2.0;
2157 yshift = new_box.bottom() - row->line_m() * middle - row->line_c();
2158 yshifts.add(yshift, blobindex);
2159 blobindex++;
2160 blobcount++;
2161 } while (!new_it.cycled_list());
2162 if (blobcount > textord_spline_minblobs || segments == 1) {
2163 xstarts[segments] = new_box.right();
2164 } else {
2165 xstarts[--segments] = new_box.right();
2166 }
2167 if (textord_oldbl_debug) {
2168 tprintf("Made %d segments on row at (%d,%d)\n", segments, box.right(), box.bottom());
2169 }
2170 return needs_curve;
2171}
int textord_spline_minblobs
Definition: makerow.cpp:67
double textord_spline_shift_fraction
Definition: makerow.cpp:71
void remove(int32_t key)
Definition: sortflts.cpp:53
void add(float value, int32_t key)
Definition: sortflts.cpp:28

◆ segment_spline()

int tesseract::segment_spline ( TBOX  blobcoords[],
int  blobcount,
int  xcoords[],
int  ycoords[],
int  degree,
int  pointcount,
int  xstarts[] 
)

Definition at line 1006 of file oldbasel.cpp.

1013 {
1014 int ptindex; /*no along text line */
1015 int segment; /*partition no */
1016 int lastmin, lastmax; /*possible turn points */
1017 int turnpoints[SPLINESIZE]; /*good turning points */
1018 int turncount; /*no of turning points */
1019 int max_x; // max specified coord
1020
1021 xstarts[0] = xcoords[0] - 1; // leftmost defined pt
1022 max_x = xcoords[pointcount - 1] + 1;
1023 if (degree < 2) {
1024 pointcount = 0;
1025 }
1026 turncount = 0; /*no turning points yet */
1027 if (pointcount > 3) {
1028 ptindex = 1;
1029 lastmax = lastmin = 0; /*start with first one */
1030 while (ptindex < pointcount - 1 && turncount < SPLINESIZE - 1) {
1031 /*minimum */
1032 if (ycoords[ptindex - 1] > ycoords[ptindex] && ycoords[ptindex] <= ycoords[ptindex + 1]) {
1033 if (ycoords[ptindex] < ycoords[lastmax] - TURNLIMIT) {
1034 if (turncount == 0 || turnpoints[turncount - 1] != lastmax) {
1035 /*new max point */
1036 turnpoints[turncount++] = lastmax;
1037 }
1038 lastmin = ptindex; /*latest minimum */
1039 } else if (ycoords[ptindex] < ycoords[lastmin]) {
1040 lastmin = ptindex; /*lower minimum */
1041 }
1042 }
1043
1044 /*maximum */
1045 if (ycoords[ptindex - 1] < ycoords[ptindex] && ycoords[ptindex] >= ycoords[ptindex + 1]) {
1046 if (ycoords[ptindex] > ycoords[lastmin] + TURNLIMIT) {
1047 if (turncount == 0 || turnpoints[turncount - 1] != lastmin) {
1048 /*new min point */
1049 turnpoints[turncount++] = lastmin;
1050 }
1051 lastmax = ptindex; /*latest maximum */
1052 } else if (ycoords[ptindex] > ycoords[lastmax]) {
1053 lastmax = ptindex; /*higher maximum */
1054 }
1055 }
1056 ptindex++;
1057 }
1058 /*possible global min */
1059 if (ycoords[ptindex] < ycoords[lastmax] - TURNLIMIT &&
1060 (turncount == 0 || turnpoints[turncount - 1] != lastmax)) {
1061 if (turncount < SPLINESIZE - 1) {
1062 /*2 more turns */
1063 turnpoints[turncount++] = lastmax;
1064 }
1065 if (turncount < SPLINESIZE - 1) {
1066 turnpoints[turncount++] = ptindex;
1067 }
1068 } else if (ycoords[ptindex] > ycoords[lastmin] + TURNLIMIT
1069 /*possible global max */
1070 && (turncount == 0 || turnpoints[turncount - 1] != lastmin)) {
1071 if (turncount < SPLINESIZE - 1) {
1072 /*2 more turns */
1073 turnpoints[turncount++] = lastmin;
1074 }
1075 if (turncount < SPLINESIZE - 1) {
1076 turnpoints[turncount++] = ptindex;
1077 }
1078 } else if (turncount > 0 && turnpoints[turncount - 1] == lastmin &&
1079 turncount < SPLINESIZE - 1) {
1080 if (ycoords[ptindex] > ycoords[lastmax]) {
1081 turnpoints[turncount++] = ptindex;
1082 } else {
1083 turnpoints[turncount++] = lastmax;
1084 }
1085 } else if (turncount > 0 && turnpoints[turncount - 1] == lastmax &&
1086 turncount < SPLINESIZE - 1) {
1087 if (ycoords[ptindex] < ycoords[lastmin]) {
1088 turnpoints[turncount++] = ptindex;
1089 } else {
1090 turnpoints[turncount++] = lastmin;
1091 }
1092 }
1093 }
1094
1095 if (textord_oldbl_debug && turncount > 0) {
1096 tprintf("First turn is %d at (%d,%d)\n", turnpoints[0], xcoords[turnpoints[0]],
1097 ycoords[turnpoints[0]]);
1098 }
1099 for (segment = 1; segment < turncount; segment++) {
1100 /*centre y coord */
1101 lastmax = (ycoords[turnpoints[segment - 1]] + ycoords[turnpoints[segment]]) / 2;
1102
1103 /* fix alg so that it works with both rising and falling sections */
1104 if (ycoords[turnpoints[segment - 1]] < ycoords[turnpoints[segment]]) {
1105 /*find rising y centre */
1106 for (ptindex = turnpoints[segment - 1] + 1;
1107 ptindex < turnpoints[segment] && ycoords[ptindex + 1] <= lastmax; ptindex++) {
1108 }
1109 } else {
1110 /*find falling y centre */
1111 for (ptindex = turnpoints[segment - 1] + 1;
1112 ptindex < turnpoints[segment] && ycoords[ptindex + 1] >= lastmax; ptindex++) {
1113 }
1114 }
1115
1116 /*centre x */
1117 xstarts[segment] = (xcoords[ptindex - 1] + xcoords[ptindex] + xcoords[turnpoints[segment - 1]] +
1118 xcoords[turnpoints[segment]] + 2) /
1119 4;
1120 /*halfway between turns */
1121 if (textord_oldbl_debug) {
1122 tprintf("Turn %d is %d at (%d,%d), mid pt is %d@%d, final @%d\n", segment,
1123 turnpoints[segment], xcoords[turnpoints[segment]], ycoords[turnpoints[segment]],
1124 ptindex - 1, xcoords[ptindex - 1], xstarts[segment]);
1125 }
1126 }
1127
1128 xstarts[segment] = max_x;
1129 return segment; /*no of splines */
1130}
#define TURNLIMIT
Definition: oldbasel.cpp:56

◆ separate_underlines()

void tesseract::separate_underlines ( TO_BLOCK block,
float  gradient,
FCOORD  rotation,
bool  testing_on 
)

Definition at line 1781 of file makerow.cpp.

1784 { // correct orientation
1785 BLOBNBOX *blob; // current blob
1786 C_BLOB *rotated_blob; // rotated blob
1787 TO_ROW *row; // current row
1788 float length; // of g_vec
1789 TBOX blob_box;
1790 FCOORD blob_rotation; // inverse of rotation
1791 FCOORD g_vec; // skew rotation
1792 BLOBNBOX_IT blob_it; // iterator
1793 // iterator
1794 BLOBNBOX_IT under_it = &block->underlines;
1795 BLOBNBOX_IT large_it = &block->large_blobs;
1796 TO_ROW_IT row_it = block->get_rows();
1797 int min_blob_height = static_cast<int>(textord_min_blob_height_fraction * block->line_size + 0.5);
1798
1799 // length of vector
1800 length = std::sqrt(1 + gradient * gradient);
1801 g_vec = FCOORD(1 / length, -gradient / length);
1802 blob_rotation = FCOORD(rotation.x(), -rotation.y());
1803 blob_rotation.rotate(g_vec); // undoing everything
1804 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
1805 row = row_it.data();
1806 // get blobs
1807 blob_it.set_to_list(row->blob_list());
1808 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
1809 blob = blob_it.data();
1810 blob_box = blob->bounding_box();
1811 if (blob_box.width() > block->line_size * textord_underline_width) {
1812 ASSERT_HOST(blob->cblob() != nullptr);
1813 rotated_blob = crotate_cblob(blob->cblob(), blob_rotation);
1814 if (test_underline(testing_on && textord_show_final_rows, rotated_blob,
1815 static_cast<int16_t>(row->intercept()),
1816 static_cast<int16_t>(block->line_size *
1819 under_it.add_after_then_move(blob_it.extract());
1820 if (testing_on && textord_show_final_rows) {
1821 tprintf("Underlined blob at:");
1822 rotated_blob->bounding_box().print();
1823 tprintf("Was:");
1824 blob_box.print();
1825 }
1826 } else if (CountOverlaps(blob->bounding_box(), min_blob_height, row->blob_list()) >
1827 textord_max_blob_overlaps) {
1828 large_it.add_after_then_move(blob_it.extract());
1829 if (testing_on && textord_show_final_rows) {
1830 tprintf("Large blob overlaps %d blobs at:",
1831 CountOverlaps(blob_box, min_blob_height, row->blob_list()));
1832 blob_box.print();
1833 }
1834 }
1835 delete rotated_blob;
1836 }
1837 }
1838 }
1839}
C_BLOB * crotate_cblob(C_BLOB *blob, FCOORD rotation)
Definition: blobbox.cpp:614
bool textord_show_final_rows
Definition: makerow.cpp:50
bool test_underline(bool testing_on, C_BLOB *blob, int16_t baseline, int16_t xheight)
Definition: blkocc.cpp:47
double textord_underline_width
Definition: makerow.cpp:83

◆ Serialize() [1/2]

template<typename T >
bool tesseract::Serialize ( FILE *  fp,
const std::vector< T > &  data 
)

Definition at line 236 of file helpers.h.

236 {
237 uint32_t size = data.size();
238 if (fwrite(&size, sizeof(size), 1, fp) != 1) {
239 return false;
240 } else if constexpr (std::is_class<T>::value) {
241 // Serialize a tesseract class.
242 for (auto &item : data) {
243 if (!item.Serialize(fp)) {
244 return false;
245 }
246 }
247 } else if constexpr (std::is_pointer<T>::value) {
248 // Serialize pointers.
249 for (auto &item : data) {
250 uint8_t non_null = (item != nullptr);
251 if (!Serialize(fp, &non_null)) {
252 return false;
253 }
254 if (non_null) {
255 if (!item->Serialize(fp)) {
256 return false;
257 }
258 }
259 }
260 } else if (size > 0) {
261 if (fwrite(&data[0], sizeof(T), size, fp) != size) {
262 return false;
263 }
264 }
265 return true;
266}
bool Serialize(FILE *fp, const std::vector< T > &data)
Definition: helpers.h:236

◆ Serialize() [2/2]

template<typename T >
bool tesseract::Serialize ( FILE *  fp,
const T *  data,
size_t  n = 1 
)

Definition at line 55 of file serialis.h.

55 {
56 return fwrite(data, sizeof(T), n, fp) == n;
57}

◆ set_row_spaces()

void tesseract::set_row_spaces ( TO_BLOCK block,
FCOORD  rotation,
bool  testing_on 
)

Definition at line 128 of file wordseg.cpp.

132 {
133 TO_ROW *row; // current row
134 TO_ROW_IT row_it = block->get_rows();
135
136 if (row_it.empty()) {
137 return; // empty block
138 }
139 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
140 row = row_it.data();
141 if (row->fixed_pitch == 0) {
142 row->min_space = static_cast<int32_t>(
143 ceil(row->pr_space - (row->pr_space - row->pr_nonsp) * textord_words_definite_spread));
144 row->max_nonspace = static_cast<int32_t>(
145 floor(row->pr_nonsp + (row->pr_space - row->pr_nonsp) * textord_words_definite_spread));
146 if (testing_on && textord_show_initial_words) {
147 tprintf("Assigning defaults %d non, %d space to row at %g\n", row->max_nonspace,
148 row->min_space, row->intercept());
149 }
150 row->space_threshold = (row->max_nonspace + row->min_space) / 2;
151 row->space_size = row->pr_space;
152 row->kern_size = row->pr_nonsp;
153 }
154#ifndef GRAPHICS_DISABLED
155 if (textord_show_initial_words && testing_on) {
156 plot_word_decisions(to_win, static_cast<int16_t>(row->fixed_pitch), row);
157 }
158#endif
159 }
160}

◆ SetAdaptiveThreshold()

void tesseract::SetAdaptiveThreshold ( float  Threshold)

◆ SetBlobStrokeWidth()

void tesseract::SetBlobStrokeWidth ( Image  pix,
BLOBNBOX blob 
)

Definition at line 68 of file tordmain.cpp.

68 {
69 // Cut the blob rectangle into a Pix.
70 int pix_height = pixGetHeight(pix);
71 const TBOX &box = blob->bounding_box();
72 int width = box.width();
73 int height = box.height();
74 Box *blob_pix_box = boxCreate(box.left(), pix_height - box.top(), width, height);
75 Image pix_blob = pixClipRectangle(pix, blob_pix_box, nullptr);
76 boxDestroy(&blob_pix_box);
77 Image dist_pix = pixDistanceFunction(pix_blob, 4, 8, L_BOUNDARY_BG);
78 pix_blob.destroy();
79 // Compute the stroke widths.
80 uint32_t *data = pixGetData(dist_pix);
81 int wpl = pixGetWpl(dist_pix);
82 // Horizontal width of stroke.
83 STATS h_stats(0, width);
84 for (int y = 0; y < height; ++y) {
85 uint32_t *pixels = data + y * wpl;
86 int prev_pixel = 0;
87 int pixel = GET_DATA_BYTE(pixels, 0);
88 for (int x = 1; x < width; ++x) {
89 int next_pixel = GET_DATA_BYTE(pixels, x);
90 // We are looking for a pixel that is equal to its vertical neighbours,
91 // yet greater than its left neighbour.
92 if (prev_pixel < pixel && (y == 0 || pixel == GET_DATA_BYTE(pixels - wpl, x - 1)) &&
93 (y == height - 1 || pixel == GET_DATA_BYTE(pixels + wpl, x - 1))) {
94 if (pixel > next_pixel) {
95 // Single local max, so an odd width.
96 h_stats.add(pixel * 2 - 1, 1);
97 } else if (pixel == next_pixel && x + 1 < width && pixel > GET_DATA_BYTE(pixels, x + 1)) {
98 // Double local max, so an even width.
99 h_stats.add(pixel * 2, 1);
100 }
101 }
102 prev_pixel = pixel;
103 pixel = next_pixel;
104 }
105 }
106 // Vertical width of stroke.
107 STATS v_stats(0, height);
108 for (int x = 0; x < width; ++x) {
109 int prev_pixel = 0;
110 int pixel = GET_DATA_BYTE(data, x);
111 for (int y = 1; y < height; ++y) {
112 uint32_t *pixels = data + y * wpl;
113 int next_pixel = GET_DATA_BYTE(pixels, x);
114 // We are looking for a pixel that is equal to its horizontal neighbours,
115 // yet greater than its upper neighbour.
116 if (prev_pixel < pixel && (x == 0 || pixel == GET_DATA_BYTE(pixels - wpl, x - 1)) &&
117 (x == width - 1 || pixel == GET_DATA_BYTE(pixels - wpl, x + 1))) {
118 if (pixel > next_pixel) {
119 // Single local max, so an odd width.
120 v_stats.add(pixel * 2 - 1, 1);
121 } else if (pixel == next_pixel && y + 1 < height &&
122 pixel > GET_DATA_BYTE(pixels + wpl, x)) {
123 // Double local max, so an even width.
124 v_stats.add(pixel * 2, 1);
125 }
126 }
127 prev_pixel = pixel;
128 pixel = next_pixel;
129 }
130 }
131 dist_pix.destroy();
132 // Store the horizontal and vertical width in the blob, keeping both
133 // widths if there is enough information, otherwise only the one with
134 // the most samples.
135 // If there are insufficient samples, store zero, rather than using
136 // 2*area/perimeter, as the numbers that gives do not match the numbers
137 // from the distance method.
138 if (h_stats.get_total() >= (width + height) / 4) {
139 blob->set_horz_stroke_width(h_stats.ile(0.5f));
140 if (v_stats.get_total() >= (width + height) / 4) {
141 blob->set_vert_stroke_width(v_stats.ile(0.5f));
142 } else {
143 blob->set_vert_stroke_width(0.0f);
144 }
145 } else {
146 if (v_stats.get_total() >= (width + height) / 4 || v_stats.get_total() > h_stats.get_total()) {
147 blob->set_horz_stroke_width(0.0f);
148 blob->set_vert_stroke_width(v_stats.ile(0.5f));
149 } else {
150 blob->set_horz_stroke_width(h_stats.get_total() > 2 ? h_stats.ile(0.5f) : 0.0f);
151 blob->set_vert_stroke_width(0.0f);
152 }
153 }
154}
void set_horz_stroke_width(float width)
Definition: blobbox.h:355
void set_vert_stroke_width(float width)
Definition: blobbox.h:361

◆ SetPropertiesForInputFile()

TESS_UNICHARSET_TRAINING_API void tesseract::SetPropertiesForInputFile ( const std::string &  script_dir,
const std::string &  input_unicharset_file,
const std::string &  output_unicharset_file,
const std::string &  output_xheights_file 
)

Definition at line 184 of file unicharset_training_utils.cpp.

187 {
188 UNICHARSET unicharset;
189
190 // Load the input unicharset
191 unicharset.load_from_file(input_unicharset_file.c_str());
192 tprintf("Loaded unicharset of size %zu from file %s\n", unicharset.size(),
193 input_unicharset_file.c_str());
194
195 // Set unichar properties
196 tprintf("Setting unichar properties\n");
197 SetupBasicProperties(true, false, &unicharset);
198 tprintf("Setting script properties\n");
199 SetScriptProperties(script_dir, &unicharset);
200 if (!output_xheights_file.empty()) {
201 std::string xheights_str = GetXheightString(script_dir, unicharset);
202 File::WriteStringToFileOrDie(xheights_str, output_xheights_file);
203 }
204
205 // Write the output unicharset
206 tprintf("Writing unicharset to file %s\n", output_unicharset_file.c_str());
207 unicharset.save_to_file(output_unicharset_file.c_str());
208}
std::string GetXheightString(const std::string &script_dir, const UNICHARSET &unicharset)
void SetupBasicProperties(bool report_errors, bool decompose, UNICHARSET *unicharset)
void SetScriptProperties(const std::string &script_dir, UNICHARSET *unicharset)
bool load_from_file(const char *const filename, bool skip_fragments)
Definition: unicharset.h:391
bool save_to_file(const char *const filename) const
Definition: unicharset.h:361

◆ SetScriptProperties()

TESS_UNICHARSET_TRAINING_API void tesseract::SetScriptProperties ( const std::string &  script_dir,
UNICHARSET unicharset 
)

Definition at line 145 of file unicharset_training_utils.cpp.

145 {
146 for (int s = 0; s < unicharset->get_script_table_size(); ++s) {
147 // Load the unicharset for the script if available.
148 std::string filename =
149 script_dir + "/" + unicharset->get_script_from_script_id(s) + ".unicharset";
150 UNICHARSET script_set;
151 if (script_set.load_from_file(filename.c_str())) {
152 unicharset->SetPropertiesFromOther(script_set);
153 } else if (s != unicharset->common_sid() && s != unicharset->null_sid()) {
154 tprintf("Failed to load script unicharset from:%s\n", filename.c_str());
155 }
156 }
157 for (int c = SPECIAL_UNICHAR_CODES_COUNT; c < unicharset->size(); ++c) {
158 if (unicharset->PropertiesIncomplete(c)) {
159 tprintf("Warning: properties incomplete for index %d = %s\n", c,
160 unicharset->id_to_unichar(c));
161 }
162 }
163}
int common_sid() const
Definition: unicharset.h:919
int null_sid() const
Definition: unicharset.h:916
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:279
bool PropertiesIncomplete(UNICHAR_ID unichar_id) const
Definition: unicharset.h:662
void SetPropertiesFromOther(const UNICHARSET &src)
Definition: unicharset.h:563

◆ SetupBasicProperties() [1/2]

TESS_UNICHARSET_TRAINING_API void tesseract::SetupBasicProperties ( bool  report_errors,
bool  decompose,
UNICHARSET unicharset 
)

Definition at line 40 of file unicharset_training_utils.cpp.

40 {
41 for (size_t unichar_id = 0; unichar_id < unicharset->size(); ++unichar_id) {
42 // Convert any custom ligatures.
43 const char *unichar_str = unicharset->id_to_unichar(unichar_id);
44 for (int i = 0; UNICHARSET::kCustomLigatures[i][0] != nullptr; ++i) {
45 if (!strcmp(UNICHARSET::kCustomLigatures[i][1], unichar_str)) {
46 unichar_str = UNICHARSET::kCustomLigatures[i][0];
47 break;
48 }
49 }
50
51 // Convert the unichar to UTF32 representation
52 std::vector<char32> uni_vector = UNICHAR::UTF8ToUTF32(unichar_str);
53
54 // Assume that if the property is true for any character in the string,
55 // then it holds for the whole "character".
56 bool unichar_isalpha = false;
57 bool unichar_islower = false;
58 bool unichar_isupper = false;
59 bool unichar_isdigit = false;
60 bool unichar_ispunct = false;
61
62 for (char32 u_ch : uni_vector) {
63 if (u_isalpha(u_ch)) {
64 unichar_isalpha = true;
65 }
66 if (u_islower(u_ch)) {
67 unichar_islower = true;
68 }
69 if (u_isupper(u_ch)) {
70 unichar_isupper = true;
71 }
72 if (u_isdigit(u_ch)) {
73 unichar_isdigit = true;
74 }
75 if (u_ispunct(u_ch)) {
76 unichar_ispunct = true;
77 }
78 }
79
80 unicharset->set_isalpha(unichar_id, unichar_isalpha);
81 unicharset->set_islower(unichar_id, unichar_islower);
82 unicharset->set_isupper(unichar_id, unichar_isupper);
83 unicharset->set_isdigit(unichar_id, unichar_isdigit);
84 unicharset->set_ispunctuation(unichar_id, unichar_ispunct);
85
87 unicharset->set_script(unichar_id, uscript_getName(uscript_getScript(uni_vector[0], err)));
88
89 const int num_code_points = uni_vector.size();
90 // Obtain the lower/upper case if needed and record it in the properties.
91 unicharset->set_other_case(unichar_id, unichar_id);
92 if (unichar_islower || unichar_isupper) {
93 std::vector<char32> other_case(num_code_points, 0);
94 for (int i = 0; i < num_code_points; ++i) {
95 // TODO(daria): Ideally u_strToLower()/ustrToUpper() should be used.
96 // However since they deal with UChars (so need a conversion function
97 // from char32 or UTF8string) and require a meaningful locale string,
98 // for now u_tolower()/u_toupper() are used.
99 other_case[i] = unichar_islower ? u_toupper(uni_vector[i]) : u_tolower(uni_vector[i]);
100 }
101 std::string other_case_uch = UNICHAR::UTF32ToUTF8(other_case);
102 UNICHAR_ID other_case_id = unicharset->unichar_to_id(other_case_uch.c_str());
103 if (other_case_id != INVALID_UNICHAR_ID) {
104 unicharset->set_other_case(unichar_id, other_case_id);
105 } else if (unichar_id >= SPECIAL_UNICHAR_CODES_COUNT && report_errors) {
106 tprintf("Other case %s of %s is not in unicharset\n", other_case_uch.c_str(), unichar_str);
107 }
108 }
109
110 // Set RTL property and obtain mirror unichar ID from ICU.
111 std::vector<char32> mirrors(num_code_points, 0);
112 for (int i = 0; i < num_code_points; ++i) {
113 mirrors[i] = u_charMirror(uni_vector[i]);
114 if (i == 0) { // set directionality to that of the 1st code point
115 unicharset->set_direction(
116 unichar_id, static_cast<UNICHARSET::Direction>(u_charDirection(uni_vector[i])));
117 }
118 }
119 std::string mirror_uch = UNICHAR::UTF32ToUTF8(mirrors);
120 UNICHAR_ID mirror_uch_id = unicharset->unichar_to_id(mirror_uch.c_str());
121 if (mirror_uch_id != INVALID_UNICHAR_ID) {
122 unicharset->set_mirror(unichar_id, mirror_uch_id);
123 } else if (report_errors) {
124 tprintf("Mirror %s of %s is not in unicharset\n", mirror_uch.c_str(), unichar_str);
125 }
126
127 // Record normalized version of this unichar.
128 std::string normed_str;
129 if (unichar_id != 0 &&
133 &normed_str) &&
134 !normed_str.empty()) {
135 unicharset->set_normed(unichar_id, normed_str.c_str());
136 } else {
137 unicharset->set_normed(unichar_id, unichar_str);
138 }
139 ASSERT_HOST(unicharset->get_other_case(unichar_id) < unicharset->size());
140 }
141 unicharset->post_load_setup();
142}
signed int char32
void set_mirror(UNICHAR_ID unichar_id, UNICHAR_ID mirror)
Definition: unicharset.h:483
void set_script(UNICHAR_ID unichar_id, const char *value)
Definition: unicharset.h:468
void set_isupper(UNICHAR_ID unichar_id, bool value)
Definition: unicharset.h:447
void set_normed(UNICHAR_ID unichar_id, const char *normed)
Definition: unicharset.h:488
void set_direction(UNICHAR_ID unichar_id, UNICHARSET::Direction value)
Definition: unicharset.h:478
void set_ispunctuation(UNICHAR_ID unichar_id, bool value)
Definition: unicharset.h:457
UNICHAR_ID get_other_case(UNICHAR_ID unichar_id) const
Definition: unicharset.h:703
void set_isalpha(UNICHAR_ID unichar_id, bool value)
Definition: unicharset.h:437
void set_other_case(UNICHAR_ID unichar_id, UNICHAR_ID other_case)
Definition: unicharset.h:473
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:186
void set_islower(UNICHAR_ID unichar_id, bool value)
Definition: unicharset.h:442
void set_isdigit(UNICHAR_ID unichar_id, bool value)
Definition: unicharset.h:452

◆ SetupBasicProperties() [2/2]

void tesseract::SetupBasicProperties ( bool  report_errors,
UNICHARSET unicharset 
)
inline

Definition at line 38 of file unicharset_training_utils.h.

38 {
39 SetupBasicProperties(report_errors, false, unicharset);
40}
void SetupBasicProperties(bool report_errors, UNICHARSET *unicharset)

◆ SetUpForClustering()

TESS_COMMON_TRAINING_API tesseract::CLUSTERER * tesseract::SetUpForClustering ( const FEATURE_DEFS_STRUCT FeatureDefs,
LABELEDLIST  char_sample,
const char *  program_feature_type 
)

This routine reads samples from a LABELEDLIST and enters those samples into a clusterer data structure. This data structure is then returned to the caller.

Parameters
char_sampleLABELEDLIST that holds all the feature information for a
FeatureDefs
program_feature_typegiven character.
Returns
Pointer to new clusterer data structure.
Note
Globals: None

Definition at line 434 of file commontraining.cpp.

435 {
436 uint16_t N;
437 CLUSTERER *Clusterer;
438 LIST FeatureList = nullptr;
439 FEATURE_SET FeatureSet = nullptr;
440
441 int32_t desc_index = ShortNameToFeatureType(FeatureDefs, program_feature_type);
442 N = FeatureDefs.FeatureDesc[desc_index]->NumParams;
443 Clusterer = MakeClusterer(N, FeatureDefs.FeatureDesc[desc_index]->ParamDesc);
444
445 FeatureList = char_sample->List;
446 uint32_t CharID = 0;
447 std::vector<float> Sample;
448 iterate(FeatureList) {
449 FeatureSet = reinterpret_cast<FEATURE_SET>(FeatureList->first_node());
450 for (int i = 0; i < FeatureSet->MaxNumFeatures; i++) {
451 if (Sample.empty()) {
452 Sample.resize(N);
453 }
454 for (int j = 0; j < N; j++) {
455 Sample[j] = FeatureSet->Features[i]->Params[j];
456 }
457 MakeSample(Clusterer, &Sample[0], CharID);
458 }
459 CharID++;
460 }
461 return Clusterer;
462
463} /* SetUpForClustering */
CLUSTERER * MakeClusterer(int16_t SampleSize, const PARAM_DESC ParamDesc[])
Definition: cluster.cpp:1440
SAMPLE * MakeSample(CLUSTERER *Clusterer, const float *Feature, uint32_t CharID)
Definition: cluster.cpp:1491
const PARAM_DESC * ParamDesc
Definition: ocrfeatures.h:54

◆ SetUpForFloat2Int()

TESS_COMMON_TRAINING_API tesseract::CLASS_STRUCT * tesseract::SetUpForFloat2Int ( const UNICHARSET unicharset,
LIST  LabeledClassList 
)

Definition at line 631 of file commontraining.cpp.

631 {
632 MERGE_CLASS MergeClass;
633 CLASS_TYPE Class;
634 int NumProtos;
635 int NumConfigs;
636 int NumWords;
637 int i, j;
638 float Values[3];
639 PROTO_STRUCT *NewProto;
640 PROTO_STRUCT *OldProto;
641 BIT_VECTOR NewConfig;
642 BIT_VECTOR OldConfig;
643
644 // printf("Float2Int ...\n");
645
646 auto *float_classes = new CLASS_STRUCT[unicharset.size()];
647 iterate(LabeledClassList) {
648 UnicityTable<int> font_set;
649 MergeClass = reinterpret_cast<MERGE_CLASS>(LabeledClassList->first_node());
650 Class = &float_classes[unicharset.unichar_to_id(MergeClass->Label.c_str())];
651 NumProtos = MergeClass->Class->NumProtos;
652 NumConfigs = MergeClass->Class->NumConfigs;
653 font_set.move(&MergeClass->Class->font_set);
654 Class->NumProtos = NumProtos;
655 Class->MaxNumProtos = NumProtos;
656 Class->Prototypes.resize(NumProtos);
657 for (i = 0; i < NumProtos; i++) {
658 NewProto = ProtoIn(Class, i);
659 OldProto = ProtoIn(MergeClass->Class, i);
660 Values[0] = OldProto->X;
661 Values[1] = OldProto->Y;
662 Values[2] = OldProto->Angle;
664 NewProto->X = OldProto->X;
665 NewProto->Y = OldProto->Y;
666 NewProto->Length = OldProto->Length;
667 NewProto->Angle = OldProto->Angle;
668 NewProto->A = Values[0];
669 NewProto->B = Values[1];
670 NewProto->C = Values[2];
671 }
672
673 Class->NumConfigs = NumConfigs;
674 Class->MaxNumConfigs = NumConfigs;
675 Class->font_set.move(&font_set);
676 Class->Configurations.resize(NumConfigs);
677 NumWords = WordsInVectorOfSize(NumProtos);
678 for (i = 0; i < NumConfigs; i++) {
679 NewConfig = NewBitVector(NumProtos);
680 OldConfig = MergeClass->Class->Configurations[i];
681 for (j = 0; j < NumWords; j++) {
682 NewConfig[j] = OldConfig[j];
683 }
684 Class->Configurations[i] = NewConfig;
685 }
686 }
687 return float_classes;
688} // SetUpForFloat2Int
#define ProtoIn(Class, Pid)
Definition: protos.h:70
void Normalize(float *Values)
void move(UnicityTable< T > *from)
UnicityTable< int > font_set
Definition: protos.h:47

◆ ShortNameToFeatureType()

TESS_API uint32_t tesseract::ShortNameToFeatureType ( const FEATURE_DEFS_STRUCT FeatureDefs,
const char *  ShortName 
)

Search through all features currently defined and return the feature type for the feature with the specified short name. Trap an error if the specified name is not found.

Globals:

  • none
Parameters
FeatureDefsdefinitions of feature types/extractors
ShortNameshort name of a feature type
Returns
Feature type which corresponds to ShortName.

Definition at line 203 of file featdefs.cpp.

203 {
204 for (int i = 0; i < FeatureDefs.NumFeatureTypes; i++) {
205 if (!strcmp((FeatureDefs.FeatureDesc[i]->ShortName), ShortName)) {
206 return static_cast<uint32_t>(i);
207 }
208 }
209 ASSERT_HOST(!"Illegal short name for a feature");
210 return 0;
211}

◆ ShowMatchDisplay()

void tesseract::ShowMatchDisplay ( )

◆ SoftmaxInPlace()

template<typename T >
void tesseract::SoftmaxInPlace ( int  n,
T *  inout 
)
inline

Definition at line 181 of file functions.h.

181 {
182 if (n <= 0) {
183 return;
184 }
185 // A limit on the negative range input to exp to guarantee non-zero output.
186 const T kMaxSoftmaxActivation = 86;
187
188 T max_output = inout[0];
189 for (int i = 1; i < n; i++) {
190 T output = inout[i];
191 if (output > max_output) {
192 max_output = output;
193 }
194 }
195 T prob_total = 0;
196 for (int i = 0; i < n; i++) {
197 T prob = inout[i] - max_output;
198 prob = std::exp(ClipToRange(prob, -kMaxSoftmaxActivation, static_cast<T>(0)));
199 prob_total += prob;
200 inout[i] = prob;
201 }
202 if (prob_total > 0) {
203 for (int i = 0; i < n; i++) {
204 inout[i] /= prob_total;
205 }
206 }
207}

◆ sort_cmp()

template<typename T >
int tesseract::sort_cmp ( const void *  t1,
const void *  t2 
)

Definition at line 269 of file genericvector.h.

269 {
270 const T *a = static_cast<const T *>(t1);
271 const T *b = static_cast<const T *>(t2);
272 if (*a < *b) {
273 return -1;
274 }
275 if (*b < *a) {
276 return 1;
277 }
278 return 0;
279}

◆ sort_ptr_cmp()

template<typename T >
int tesseract::sort_ptr_cmp ( const void *  t1,
const void *  t2 
)

Definition at line 286 of file genericvector.h.

286 {
287 const T *a = *static_cast<T *const *>(t1);
288 const T *b = *static_cast<T *const *>(t2);
289 if (*a < *b) {
290 return -1;
291 }
292 if (*b < *a) {
293 return 1;
294 }
295 return 0;
296}

◆ SortByBoxBottom()

template<class BBC >
int tesseract::SortByBoxBottom ( const void *  void1,
const void *  void2 
)

Definition at line 449 of file bbgrid.h.

449 {
450 // The void*s are actually doubly indirected, so get rid of one level.
451 const BBC *p1 = *static_cast<const BBC *const *>(void1);
452 const BBC *p2 = *static_cast<const BBC *const *>(void2);
453 int result = p1->bounding_box().bottom() - p2->bounding_box().bottom();
454 if (result != 0) {
455 return result;
456 }
457 result = p1->bounding_box().top() - p2->bounding_box().top();
458 if (result != 0) {
459 return result;
460 }
461 result = p1->bounding_box().left() - p2->bounding_box().left();
462 if (result != 0) {
463 return result;
464 }
465 return p1->bounding_box().right() - p2->bounding_box().right();
466}

◆ SortByBoxLeft()

template<class BBC >
int tesseract::SortByBoxLeft ( const void *  void1,
const void *  void2 
)

Definition at line 367 of file bbgrid.h.

367 {
368 // The void*s are actually doubly indirected, so get rid of one level.
369 const BBC *p1 = *static_cast<const BBC *const *>(void1);
370 const BBC *p2 = *static_cast<const BBC *const *>(void2);
371 int result = p1->bounding_box().left() - p2->bounding_box().left();
372 if (result != 0) {
373 return result;
374 }
375 result = p1->bounding_box().right() - p2->bounding_box().right();
376 if (result != 0) {
377 return result;
378 }
379 result = p1->bounding_box().bottom() - p2->bounding_box().bottom();
380 if (result != 0) {
381 return result;
382 }
383 return p1->bounding_box().top() - p2->bounding_box().top();
384}

◆ SortByRating()

template<class BLOB_CHOICE >
int tesseract::SortByRating ( const void *  void1,
const void *  void2 
)

Definition at line 79 of file pieces.cpp.

79 {
80 const BLOB_CHOICE *p1 = *static_cast<const BLOB_CHOICE *const *>(void1);
81 const BLOB_CHOICE *p2 = *static_cast<const BLOB_CHOICE *const *>(void2);
82
83 if (p1->rating() < p2->rating()) {
84 return 1;
85 }
86 return -1;
87}
float rating() const
Definition: ratngs.h:84

◆ SortByUnicharID()

template<class BLOB_CHOICE >
int tesseract::SortByUnicharID ( const void *  void1,
const void *  void2 
)

Definition at line 71 of file pieces.cpp.

71 {
72 const BLOB_CHOICE *p1 = *static_cast<const BLOB_CHOICE *const *>(void1);
73 const BLOB_CHOICE *p2 = *static_cast<const BLOB_CHOICE *const *>(void2);
74
75 return p1->unichar_id() - p2->unichar_id();
76}

◆ SortRightToLeft()

template<class BBC >
int tesseract::SortRightToLeft ( const void *  void1,
const void *  void2 
)

Definition at line 408 of file bbgrid.h.

408 {
409 // The void*s are actually doubly indirected, so get rid of one level.
410 const BBC *p1 = *static_cast<const BBC *const *>(void1);
411 const BBC *p2 = *static_cast<const BBC *const *>(void2);
412 int result = p2->bounding_box().right() - p1->bounding_box().right();
413 if (result != 0) {
414 return result;
415 }
416 result = p2->bounding_box().left() - p1->bounding_box().left();
417 if (result != 0) {
418 return result;
419 }
420 result = p1->bounding_box().bottom() - p2->bounding_box().bottom();
421 if (result != 0) {
422 return result;
423 }
424 return p1->bounding_box().top() - p2->bounding_box().top();
425}

◆ SpanUTF8NotWhitespace()

TESS_UNICHARSET_TRAINING_API unsigned int tesseract::SpanUTF8NotWhitespace ( const char *  text)

Definition at line 249 of file normstrngs.cpp.

249 {
250 int n_notwhite = 0;
251 for (UNICHAR::const_iterator it = UNICHAR::begin(text, strlen(text));
252 it != UNICHAR::end(text, strlen(text)); ++it) {
253 if (IsWhitespace(*it)) {
254 break;
255 }
256 n_notwhite += it.utf8_len();
257 }
258 return n_notwhite;
259}
bool IsWhitespace(const char32 ch)
Definition: normstrngs.cpp:228

◆ SpanUTF8Whitespace()

TESS_UNICHARSET_TRAINING_API unsigned int tesseract::SpanUTF8Whitespace ( const char *  text)

Definition at line 237 of file normstrngs.cpp.

237 {
238 int n_white = 0;
239 for (UNICHAR::const_iterator it = UNICHAR::begin(text, strlen(text));
240 it != UNICHAR::end(text, strlen(text)); ++it) {
241 if (!IsWhitespace(*it)) {
242 break;
243 }
244 n_white += it.utf8_len();
245 }
246 return n_white;
247}

◆ split()

const std::vector< std::string > tesseract::split ( const std::string &  s,
char  c 
)
inline

Definition at line 43 of file helpers.h.

43 {
44 std::string buff;
45 std::vector<std::string> v;
46 for (auto n : s) {
47 if (n != c) {
48 buff += n;
49 } else if (n == c && !buff.empty()) {
50 v.push_back(buff);
51 buff.clear();
52 }
53 }
54 if (!buff.empty()) {
55 v.push_back(buff);
56 }
57 return v;
58}

◆ split_stepped_spline()

bool tesseract::split_stepped_spline ( QSPLINE baseline,
float  jumplimit,
int *  xcoords,
int *  xstarts,
int &  segments 
)

Definition at line 1139 of file oldbasel.cpp.

1145 {
1146 bool doneany; // return value
1147 int segment; /*partition no */
1148 int startindex, centreindex, endindex;
1149 float leftcoord, rightcoord;
1150 int leftindex, rightindex;
1151 float step; // spline step
1152
1153 doneany = false;
1154 startindex = 0;
1155 for (segment = 1; segment < segments - 1; segment++) {
1156 step = baseline->step((xstarts[segment - 1] + xstarts[segment]) / 2.0,
1157 (xstarts[segment] + xstarts[segment + 1]) / 2.0);
1158 if (step < 0) {
1159 step = -step;
1160 }
1161 if (step > jumplimit) {
1162 while (xcoords[startindex] < xstarts[segment - 1]) {
1163 startindex++;
1164 }
1165 centreindex = startindex;
1166 while (xcoords[centreindex] < xstarts[segment]) {
1167 centreindex++;
1168 }
1169 endindex = centreindex;
1170 while (xcoords[endindex] < xstarts[segment + 1]) {
1171 endindex++;
1172 }
1173 if (segments >= SPLINESIZE) {
1174 if (textord_debug_baselines) {
1175 tprintf("Too many segments to resegment spline!!\n");
1176 }
1177 } else if (endindex - startindex >= textord_spline_medianwin * 3) {
1178 while (centreindex - startindex < textord_spline_medianwin * 3 / 2) {
1179 centreindex++;
1180 }
1181 while (endindex - centreindex < textord_spline_medianwin * 3 / 2) {
1182 centreindex--;
1183 }
1184 leftindex = (startindex + startindex + centreindex) / 3;
1185 rightindex = (centreindex + endindex + endindex) / 3;
1186 leftcoord = (xcoords[startindex] * 2 + xcoords[centreindex]) / 3.0;
1187 rightcoord = (xcoords[centreindex] + xcoords[endindex] * 2) / 3.0;
1188 while (xcoords[leftindex] > leftcoord &&
1189 leftindex - startindex > textord_spline_medianwin) {
1190 leftindex--;
1191 }
1192 while (xcoords[leftindex] < leftcoord &&
1193 centreindex - leftindex > textord_spline_medianwin / 2) {
1194 leftindex++;
1195 }
1196 if (xcoords[leftindex] - leftcoord > leftcoord - xcoords[leftindex - 1]) {
1197 leftindex--;
1198 }
1199 while (xcoords[rightindex] > rightcoord &&
1200 rightindex - centreindex > textord_spline_medianwin / 2) {
1201 rightindex--;
1202 }
1203 while (xcoords[rightindex] < rightcoord &&
1204 endindex - rightindex > textord_spline_medianwin) {
1205 rightindex++;
1206 }
1207 if (xcoords[rightindex] - rightcoord > rightcoord - xcoords[rightindex - 1]) {
1208 rightindex--;
1209 }
1210 if (textord_debug_baselines) {
1211 tprintf("Splitting spline at %d with step %g at (%d,%d)\n", xstarts[segment],
1212 baseline->step((xstarts[segment - 1] + xstarts[segment]) / 2.0,
1213 (xstarts[segment] + xstarts[segment + 1]) / 2.0),
1214 (xcoords[leftindex - 1] + xcoords[leftindex]) / 2,
1215 (xcoords[rightindex - 1] + xcoords[rightindex]) / 2);
1216 }
1217 insert_spline_point(xstarts, segment, (xcoords[leftindex - 1] + xcoords[leftindex]) / 2,
1218 (xcoords[rightindex - 1] + xcoords[rightindex]) / 2, segments);
1219 doneany = true;
1220 } else if (textord_debug_baselines) {
1221 tprintf("Resegmenting spline failed - insufficient pts (%d,%d,%d,%d)\n", startindex,
1222 centreindex, endindex, (int32_t)textord_spline_medianwin);
1223 }
1224 }
1225 // else tprintf("Spline step at %d is %g\n",
1226 // xstarts[segment],
1227 // baseline->step((xstarts[segment-1]+xstarts[segment])/2.0,
1228 // (xstarts[segment]+xstarts[segment+1])/2.0));
1229 }
1230 return doneany;
1231}
void insert_spline_point(int xstarts[], int segment, int coord1, int coord2, int &segments)
Definition: oldbasel.cpp:1239

◆ split_to_blob()

void tesseract::split_to_blob ( BLOBNBOX blob,
int16_t  chop_coord,
float  pitch_error,
C_OUTLINE_LIST *  left_coutlines,
C_OUTLINE_LIST *  right_coutlines 
)

Definition at line 260 of file fpchop.cpp.

265 {
266 C_BLOB *real_cblob; // cblob to chop
267
268 if (blob != nullptr) {
269 real_cblob = blob->remove_cblob();
270 } else {
271 real_cblob = nullptr;
272 }
273 if (!right_coutlines->empty() || real_cblob != nullptr) {
274 fixed_chop_cblob(real_cblob, chop_coord, pitch_error, left_coutlines, right_coutlines);
275 }
276
277 delete blob;
278}

◆ StandardDeviation()

float tesseract::StandardDeviation ( PROTOTYPE Proto,
uint16_t  Dimension 
)

This routine returns the standard deviation of the prototype in the indicated dimension.

Parameters
Protoprototype to return standard deviation of
Dimensiondimension whose stddev is to be returned
Returns
Standard deviation of Prototype in Dimension

Definition at line 1673 of file cluster.cpp.

1673 {
1674 switch (Proto->Style) {
1675 case spherical:
1676 return std::sqrt(Proto->Variance.Spherical);
1677 case elliptical:
1678 return std::sqrt(Proto->Variance.Elliptical[Dimension]);
1679 case mixed:
1680 switch (Proto->Distrib[Dimension]) {
1681 case normal:
1682 return std::sqrt(Proto->Variance.Elliptical[Dimension]);
1683 case uniform:
1684 case D_random:
1685 return Proto->Variance.Elliptical[Dimension];
1686 case DISTRIBUTION_COUNT:
1687 ASSERT_HOST(!"Distribution count not allowed!");
1688 }
1689 }
1690 return 0.0f;
1691} // StandardDeviation
unsigned Style
Definition: cluster.h:79
std::vector< DISTRIBUTION > Distrib
Definition: cluster.h:82

◆ start_seam_list()

void tesseract::start_seam_list ( TWERD word,
std::vector< SEAM * > *  seam_array 
)

Definition at line 262 of file seam.cpp.

262 {
263 seam_array->clear();
264 TPOINT location;
265
266 for (unsigned b = 1; b < word->NumBlobs(); ++b) {
267 TBOX bbox = word->blobs[b - 1]->bounding_box();
268 TBOX nbox = word->blobs[b]->bounding_box();
269 location.x = (bbox.right() + nbox.left()) / 2;
270 location.y = (bbox.bottom() + bbox.top() + nbox.bottom() + nbox.top()) / 4;
271 seam_array->push_back(new SEAM(0.0f, location));
272 }
273}
std::vector< TBLOB * > blobs
Definition: blobs.h:462

◆ StartParamDesc() [1/4]

MicroFeatureParams tesseract::StartParamDesc ( CharNormParams  )

◆ StartParamDesc() [2/4]

MicroFeatureParams CharNormParams tesseract::StartParamDesc ( IntFeatParams  )

◆ StartParamDesc() [3/4]

tesseract::StartParamDesc ( MicroFeatureParams  )

◆ StartParamDesc() [4/4]

tesseract::StartParamDesc ( PicoFeatParams  )

◆ StdSortByBoxLeft()

template<class BBC >
bool tesseract::StdSortByBoxLeft ( const void *  void1,
const void *  void2 
)

Definition at line 387 of file bbgrid.h.

387 {
388 // The void*s are actually doubly indirected, so get rid of one level.
389 const BBC *p1 = *static_cast<const BBC *const *>(void1);
390 const BBC *p2 = *static_cast<const BBC *const *>(void2);
391 int result = p1->bounding_box().left() - p2->bounding_box().left();
392 if (result != 0) {
393 return result < 0;
394 }
395 result = p1->bounding_box().right() - p2->bounding_box().right();
396 if (result != 0) {
397 return result < 0;
398 }
399 result = p1->bounding_box().bottom() - p2->bounding_box().bottom();
400 if (result != 0) {
401 return result < 0;
402 }
403 return p1->bounding_box().top() < p2->bounding_box().top();
404}

◆ StdSortRightToLeft()

template<class BBC >
bool tesseract::StdSortRightToLeft ( const void *  void1,
const void *  void2 
)

Definition at line 428 of file bbgrid.h.

428 {
429 // The void*s are actually doubly indirected, so get rid of one level.
430 const BBC *p1 = *static_cast<const BBC *const *>(void1);
431 const BBC *p2 = *static_cast<const BBC *const *>(void2);
432 int result = p2->bounding_box().right() - p1->bounding_box().right();
433 if (result != 0) {
434 return result < 0;
435 }
436 result = p2->bounding_box().left() - p1->bounding_box().left();
437 if (result != 0) {
438 return result < 0;
439 }
440 result = p1->bounding_box().bottom() - p2->bounding_box().bottom();
441 if (result != 0) {
442 return result < 0;
443 }
444 return p1->bounding_box().top() < p2->bounding_box().top();
445}

◆ STILL_LINKED()

constexpr ERRCODE tesseract::STILL_LINKED ( "Attempting to add an element with non nullptr  links,
to a list"   
)
constexpr

◆ STRING_PARAM_FLAG() [1/9]

tesseract::STRING_PARAM_FLAG ( ,
""  ,
"Directory to write output files to"   
)

◆ STRING_PARAM_FLAG() [2/9]

tesseract::STRING_PARAM_FLAG ( ,
"font_properties"  ,
"File listing font properties"   
)

◆ STRING_PARAM_FLAG() [3/9]

tesseract::STRING_PARAM_FLAG ( fontconfig_tmpdir  ,
""  ,
""   
)

◆ STRING_PARAM_FLAG() [4/9]

tesseract::STRING_PARAM_FLAG ( fonts_dir  ,
""  ,
""   
)

◆ STRING_PARAM_FLAG() [5/9]

tesseract::STRING_PARAM_FLAG ( ,
""  ,
"File to write unicharset to"   
)

◆ STRING_PARAM_FLAG() [6/9]

tesseract::STRING_PARAM_FLAG ( output_trainer  ,
""  ,
"File to write trainer to"   
)

◆ STRING_PARAM_FLAG() [7/9]

tesseract::STRING_PARAM_FLAG ( test_ch  ,
""  ,
"UTF8 test character string"   
)

◆ STRING_PARAM_FLAG() [8/9]

tesseract::STRING_PARAM_FLAG ( ,
"unicharset"  ,
"File to load unicharset from"   
)

◆ STRING_PARAM_FLAG() [9/9]

tesseract::STRING_PARAM_FLAG ( ,
""  ,
"File listing font xheights"   
)

◆ STRING_VAR_H() [1/2]

tesseract::STRING_VAR_H ( editor_image_win_name  )

◆ STRING_VAR_H() [2/2]

tesseract::STRING_VAR_H ( editor_word_name  )

◆ StrongModel()

bool tesseract::StrongModel ( const ParagraphModel model)
inline

Definition at line 69 of file paragraphs_internal.h.

69 {
70 return model != nullptr && model != kCrownLeft && model != kCrownRight;
71}

◆ SumVectors()

void tesseract::SumVectors ( int  n,
const TFloat v1,
const TFloat v2,
const TFloat v3,
const TFloat v4,
const TFloat v5,
TFloat sum 
)
inline

Definition at line 236 of file functions.h.

237 {
238 for (int i = 0; i < n; ++i) {
239 sum[i] = v1[i] + v2[i] + v3[i] + v4[i] + v5[i];
240 }
241}

◆ Tanh()

TFloat tesseract::Tanh ( TFloat  x)
inline

Definition at line 44 of file functions.h.

44 {
45 if (x < 0) {
46 return -Tanh(-x);
47 }
48 x *= kScaleFactor;
49 auto index = static_cast<unsigned>(x);
50 if (index >= (kTableSize - 1)) {
51 return 1;
52 }
53 TFloat tanh_i0 = TanhTable[index];
54 TFloat tanh_i1 = TanhTable[index + 1];
55 // Linear interpolation.
56 return tanh_i0 + (tanh_i1 - tanh_i0) * (x - index);
57}
TFloat Tanh(TFloat x)
Definition: functions.h:44
const TFloat TanhTable[]
Definition: functions.cpp:4

◆ TEST() [1/88]

tesseract::TEST ( CleanNamespaceTess  ,
DummyTest   
)

Definition at line 26 of file cleanapi_test.cc.

◆ TEST() [2/88]

tesseract::TEST ( ConvertBasicLatinToFullwidthLatinTest  ,
DoesConvertBasicLatin   
)

Definition at line 451 of file stringrenderer_test.cc.

451 {
452 const std::string kHalfAlpha = "ABCD";
453 const std::string kFullAlpha = "ABCD";
454 EXPECT_EQ(kFullAlpha, StringRenderer::ConvertBasicLatinToFullwidthLatin(kHalfAlpha));
455
456 const std::string kHalfDigit = "0123";
457 const std::string kFullDigit = "0123";
458 EXPECT_EQ(kFullDigit, StringRenderer::ConvertBasicLatinToFullwidthLatin(kHalfDigit));
459
460 const std::string kHalfSym = "()[]:;!?";
461 const std::string kFullSym = "()[]:;!?";
462 EXPECT_EQ(kFullSym, StringRenderer::ConvertBasicLatinToFullwidthLatin(kHalfSym));
463}

◆ TEST() [3/88]

tesseract::TEST ( ConvertBasicLatinToFullwidthLatinTest  ,
DoesNotConvertFullwidthLatin   
)

Definition at line 465 of file stringrenderer_test.cc.

465 {
466 const std::string kFullAlpha = "ABCD";
467 EXPECT_EQ(kFullAlpha, StringRenderer::ConvertBasicLatinToFullwidthLatin(kFullAlpha));
468
469 const std::string kFullDigit = "0123";
470 EXPECT_EQ(kFullDigit, StringRenderer::ConvertBasicLatinToFullwidthLatin(kFullDigit));
471
472 const std::string kFullSym = "()[]:;!?";
473 EXPECT_EQ(kFullSym, StringRenderer::ConvertBasicLatinToFullwidthLatin(kFullSym));
474}

◆ TEST() [4/88]

tesseract::TEST ( ConvertBasicLatinToFullwidthLatinTest  ,
DoesNotConvertNonLatin   
)

Definition at line 476 of file stringrenderer_test.cc.

476 {
477 const std::string kHalfKana = "アイウエオ";
478 const std::string kFullKana = "アイウエオ";
479 EXPECT_EQ(kHalfKana, StringRenderer::ConvertBasicLatinToFullwidthLatin(kHalfKana));
480 EXPECT_EQ(kFullKana, StringRenderer::ConvertBasicLatinToFullwidthLatin(kFullKana));
481}

◆ TEST() [5/88]

tesseract::TEST ( ConvertBasicLatinToFullwidthLatinTest  ,
DoesNotConvertSpace   
)

Definition at line 483 of file stringrenderer_test.cc.

483 {
484 const std::string kHalfSpace = " ";
485 const std::string kFullSpace = " ";
486 EXPECT_EQ(kHalfSpace, StringRenderer::ConvertBasicLatinToFullwidthLatin(kHalfSpace));
487 EXPECT_EQ(kFullSpace, StringRenderer::ConvertBasicLatinToFullwidthLatin(kFullSpace));
488}

◆ TEST() [6/88]

tesseract::TEST ( ConvertFullwidthLatinToBasicLatinTest  ,
DoesConvertFullwidthLatin   
)

Definition at line 492 of file stringrenderer_test.cc.

492 {
493 const std::string kHalfAlpha = "ABCD";
494 const std::string kFullAlpha = "ABCD";
495 EXPECT_EQ(kHalfAlpha, StringRenderer::ConvertFullwidthLatinToBasicLatin(kFullAlpha));
496
497 const std::string kHalfDigit = "0123";
498 const std::string kFullDigit = "0123";
499 EXPECT_EQ(kHalfDigit, StringRenderer::ConvertFullwidthLatinToBasicLatin(kFullDigit));
500
501 const std::string kHalfSym = "()[]:;!?";
502 const std::string kFullSym = "()[]:;!?";
503 EXPECT_EQ(kHalfSym, StringRenderer::ConvertFullwidthLatinToBasicLatin(kFullSym));
504}

◆ TEST() [7/88]

tesseract::TEST ( ConvertFullwidthLatinToBasicLatinTest  ,
DoesNotConvertBasicLatin   
)

Definition at line 506 of file stringrenderer_test.cc.

506 {
507 const std::string kHalfAlpha = "ABCD";
508 EXPECT_EQ(kHalfAlpha, StringRenderer::ConvertFullwidthLatinToBasicLatin(kHalfAlpha));
509
510 const std::string kHalfDigit = "0123";
511 EXPECT_EQ(kHalfDigit, StringRenderer::ConvertFullwidthLatinToBasicLatin(kHalfDigit));
512
513 const std::string kHalfSym = "()[]:;!?";
514 EXPECT_EQ(kHalfSym, StringRenderer::ConvertFullwidthLatinToBasicLatin(kHalfSym));
515}

◆ TEST() [8/88]

tesseract::TEST ( ConvertFullwidthLatinToBasicLatinTest  ,
DoesNotConvertNonLatin   
)

Definition at line 517 of file stringrenderer_test.cc.

517 {
518 const std::string kHalfKana = "アイウエオ";
519 const std::string kFullKana = "アイウエオ";
520 EXPECT_EQ(kHalfKana, StringRenderer::ConvertFullwidthLatinToBasicLatin(kHalfKana));
521 EXPECT_EQ(kFullKana, StringRenderer::ConvertFullwidthLatinToBasicLatin(kFullKana));
522}

◆ TEST() [9/88]

tesseract::TEST ( ConvertFullwidthLatinToBasicLatinTest  ,
DoesNotConvertSpace   
)

Definition at line 524 of file stringrenderer_test.cc.

524 {
525 const std::string kHalfSpace = " ";
526 const std::string kFullSpace = " ";
527 EXPECT_EQ(kHalfSpace, StringRenderer::ConvertFullwidthLatinToBasicLatin(kHalfSpace));
528 EXPECT_EQ(kFullSpace, StringRenderer::ConvertFullwidthLatinToBasicLatin(kFullSpace));
529}

◆ TEST() [10/88]

tesseract::TEST ( FileTest  ,
JoinPath   
)

Definition at line 20 of file fileio_test.cc.

20 {
21 EXPECT_EQ("/abc/def", File::JoinPath("/abc", "def"));
22 EXPECT_EQ("/abc/def", File::JoinPath("/abc/", "def"));
23 EXPECT_EQ("def", File::JoinPath("", "def"));
24}

◆ TEST() [11/88]

tesseract::TEST ( InputBufferTest  ,
Read   
)

Definition at line 45 of file fileio_test.cc.

45 {
46 const int kMaxBufSize = 128;
47 char buffer[kMaxBufSize];
48 auto s = "Hello\n world!";
49 strncpy(buffer, s, kMaxBufSize);
50 EXPECT_STREQ(s, buffer);
51 FILE *fp = tmpfile();
52 CHECK(fp != nullptr);
53 fwrite(buffer, strlen(s), 1, fp);
54 rewind(fp);
55
56 std::string str;
57 auto input = std::make_unique<InputBuffer>(fp);
58 EXPECT_TRUE(input->Read(&str));
59 std::vector<std::string> lines = split(str, '\n');
60 EXPECT_EQ(2, lines.size());
61 EXPECT_EQ("Hello", lines[0]);
62 EXPECT_EQ(" world!", lines[1]);
63}
#define EXPECT_STREQ(s1, s2)
Definition: gtest.h:2112
#define CHECK(condition)
Definition: include_gunit.h:76

◆ TEST() [12/88]

tesseract::TEST ( LangModelTest  ,
AddACharacter   
)

Definition at line 32 of file lang_model_test.cc.

32 {
33 constexpr char kTestString[] = "Simple ASCII string to encode !@#$%&";
34 constexpr char kTestStringRupees[] = "ASCII string with Rupee symbol ₹";
35 // Setup the arguments.
36 std::string script_dir = LANGDATA_DIR;
37 std::string eng_dir = file::JoinPath(script_dir, "eng");
38 std::string unicharset_path = TestDataNameToPath("eng_beam.unicharset");
39 UNICHARSET unicharset;
40 EXPECT_TRUE(unicharset.load_from_file(unicharset_path.c_str()));
41 std::string version_str = "TestVersion";
43 std::string output_dir = FLAGS_test_tmpdir;
44 LOG(INFO) << "Output dir=" << output_dir << "\n";
45 std::string lang1 = "eng";
46 bool pass_through_recoder = false;
47 // If these reads fail, we get a warning message and an empty list of words.
48 std::vector<std::string> words = split(ReadFile(file::JoinPath(eng_dir, "eng.wordlist")), '\n');
49 EXPECT_GT(words.size(), 0);
50 std::vector<std::string> puncs = split(ReadFile(file::JoinPath(eng_dir, "eng.punc")), '\n');
51 EXPECT_GT(puncs.size(), 0);
52 std::vector<std::string> numbers = split(ReadFile(file::JoinPath(eng_dir, "eng.numbers")), '\n');
53 EXPECT_GT(numbers.size(), 0);
54 bool lang_is_rtl = false;
55 // Generate the traineddata file.
56 EXPECT_EQ(0, CombineLangModel(unicharset, script_dir, version_str, output_dir, lang1,
57 pass_through_recoder, words, puncs, numbers, lang_is_rtl, nullptr,
58 nullptr));
59 // Init a trainer with it, and encode kTestString.
60 std::string traineddata1 = file::JoinPath(output_dir, lang1, lang1) + ".traineddata";
61 LSTMTrainer trainer1;
62 trainer1.InitCharSet(traineddata1);
63 std::vector<int> labels1;
64 EXPECT_TRUE(trainer1.EncodeString(kTestString, &labels1));
65 std::string test1_decoded = trainer1.DecodeLabels(labels1);
66 std::string test1_str(&test1_decoded[0], test1_decoded.length());
67 LOG(INFO) << "Labels1=" << test1_str << "\n";
68
69 // Add a new character to the unicharset and try again.
70 int size_before = unicharset.size();
71 unicharset.unichar_insert("₹");
72 SetupBasicProperties(/*report_errors*/ true, /*decompose (NFD)*/ false, &unicharset);
73 EXPECT_EQ(size_before + 1, unicharset.size());
74 // Generate the traineddata file.
75 std::string lang2 = "extended";
76 EXPECT_EQ(EXIT_SUCCESS, CombineLangModel(unicharset, script_dir, version_str, output_dir, lang2,
77 pass_through_recoder, words, puncs, numbers, lang_is_rtl,
78 nullptr, nullptr));
79 // Init a trainer with it, and encode kTestString.
80 std::string traineddata2 = file::JoinPath(output_dir, lang2, lang2) + ".traineddata";
81 LSTMTrainer trainer2;
82 trainer2.InitCharSet(traineddata2);
83 std::vector<int> labels2;
84 EXPECT_TRUE(trainer2.EncodeString(kTestString, &labels2));
85 std::string test2_decoded = trainer2.DecodeLabels(labels2);
86 std::string test2_str(&test2_decoded[0], test2_decoded.length());
87 LOG(INFO) << "Labels2=" << test2_str << "\n";
88 // encode kTestStringRupees.
89 std::vector<int> labels3;
90 EXPECT_TRUE(trainer2.EncodeString(kTestStringRupees, &labels3));
91 std::string test3_decoded = trainer2.DecodeLabels(labels3);
92 std::string test3_str(&test3_decoded[0], test3_decoded.length());
93 LOG(INFO) << "labels3=" << test3_str << "\n";
94 // Copy labels1 to a std::vector, renumbering the null char to match trainer2.
95 // Since Tensor Flow's CTC implementation insists on having the null be the
96 // last label, and we want to be compatible, null has to be renumbered when
97 // we add a class.
98 int null1 = trainer1.null_char();
99 int null2 = trainer2.null_char();
100 EXPECT_EQ(null1 + 1, null2);
101 std::vector<int> labels1_v(labels1.size());
102 for (unsigned i = 0; i < labels1.size(); ++i) {
103 if (labels1[i] == null1) {
104 labels1_v[i] = null2;
105 } else {
106 labels1_v[i] = labels1[i];
107 }
108 }
109 EXPECT_THAT(labels1_v, testing::ElementsAreArray(&labels2[0], labels2.size()));
110 // To make sure we we are not cheating somehow, we can now encode the Rupee
111 // symbol, which we could not do before.
112 EXPECT_FALSE(trainer1.EncodeString(kTestStringRupees, &labels1));
113 EXPECT_TRUE(trainer2.EncodeString(kTestStringRupees, &labels2));
114}
#define EXPECT_THAT(value, matcher)
#define EXPECT_GT(val1, val2)
Definition: gtest.h:2053
#define EXPECT_FALSE(condition)
Definition: gtest.h:1986
std::string TestDataNameToPath(const std::string &name)
int CombineLangModel(const UNICHARSET &unicharset, const std::string &script_dir, const std::string &version_str, const std::string &output_dir, const std::string &lang, bool pass_through_recoder, const std::vector< std::string > &words, const std::vector< std::string > &puncs, const std::vector< std::string > &numbers, bool lang_is_rtl, FileReader reader, FileWriter writer)
std::string DecodeLabels(const std::vector< int > &labels)
bool EncodeString(const std::string &str, std::vector< int > *labels) const
Definition: lstmtrainer.h:254
bool InitCharSet(const std::string &traineddata_path)
Definition: lstmtrainer.h:100
static void MakeTmpdir()
Definition: include_gunit.h:38
static std::string JoinPath(const std::string &s1, const std::string &s2)
Definition: include_gunit.h:65

◆ TEST() [13/88]

tesseract::TEST ( LangModelTest  ,
AddACharacterHindi   
)

Definition at line 117 of file lang_model_test.cc.

117 {
118 constexpr char kTestString[] = "हिन्दी में एक लाइन लिखें";
119 constexpr char kTestStringRupees[] = "हिंदी में रूपये का चिन्ह प्रयोग करें ₹१००.००";
120 // Setup the arguments.
121 std::string script_dir = LANGDATA_DIR;
122 std::string hin_dir = file::JoinPath(script_dir, "hin");
123 std::string unicharset_path = TestDataNameToPath("hin_beam.unicharset");
124 UNICHARSET unicharset;
125 EXPECT_TRUE(unicharset.load_from_file(unicharset_path.c_str()));
126 std::string version_str = "TestVersion";
128 std::string output_dir = FLAGS_test_tmpdir;
129 LOG(INFO) << "Output dir=" << output_dir << "\n";
130 std::string lang1 = "hin";
131 bool pass_through_recoder = false;
132 // If these reads fail, we get a warning message and an empty list of words.
133 std::vector<std::string> words = split(ReadFile(file::JoinPath(hin_dir, "hin.wordlist")), '\n');
134 EXPECT_GT(words.size(), 0);
135 std::vector<std::string> puncs = split(ReadFile(file::JoinPath(hin_dir, "hin.punc")), '\n');
136 EXPECT_GT(puncs.size(), 0);
137 std::vector<std::string> numbers = split(ReadFile(file::JoinPath(hin_dir, "hin.numbers")), '\n');
138 EXPECT_GT(numbers.size(), 0);
139 bool lang_is_rtl = false;
140 // Generate the traineddata file.
141 EXPECT_EQ(0, CombineLangModel(unicharset, script_dir, version_str, output_dir, lang1,
142 pass_through_recoder, words, puncs, numbers, lang_is_rtl, nullptr,
143 nullptr));
144 // Init a trainer with it, and encode kTestString.
145 std::string traineddata1 = file::JoinPath(output_dir, lang1, lang1) + ".traineddata";
146 LSTMTrainer trainer1;
147 trainer1.InitCharSet(traineddata1);
148 std::vector<int> labels1;
149 EXPECT_TRUE(trainer1.EncodeString(kTestString, &labels1));
150 std::string test1_decoded = trainer1.DecodeLabels(labels1);
151 std::string test1_str(&test1_decoded[0], test1_decoded.length());
152 LOG(INFO) << "Labels1=" << test1_str << "\n";
153
154 // Add a new character to the unicharset and try again.
155 int size_before = unicharset.size();
156 unicharset.unichar_insert("₹");
157 SetupBasicProperties(/*report_errors*/ true, /*decompose (NFD)*/ false, &unicharset);
158 EXPECT_EQ(size_before + 1, unicharset.size());
159 // Generate the traineddata file.
160 std::string lang2 = "extendedhin";
161 EXPECT_EQ(EXIT_SUCCESS, CombineLangModel(unicharset, script_dir, version_str, output_dir, lang2,
162 pass_through_recoder, words, puncs, numbers, lang_is_rtl,
163 nullptr, nullptr));
164 // Init a trainer with it, and encode kTestString.
165 std::string traineddata2 = file::JoinPath(output_dir, lang2, lang2) + ".traineddata";
166 LSTMTrainer trainer2;
167 trainer2.InitCharSet(traineddata2);
168 std::vector<int> labels2;
169 EXPECT_TRUE(trainer2.EncodeString(kTestString, &labels2));
170 std::string test2_decoded = trainer2.DecodeLabels(labels2);
171 std::string test2_str(&test2_decoded[0], test2_decoded.length());
172 LOG(INFO) << "Labels2=" << test2_str << "\n";
173 // encode kTestStringRupees.
174 std::vector<int> labels3;
175 EXPECT_TRUE(trainer2.EncodeString(kTestStringRupees, &labels3));
176 std::string test3_decoded = trainer2.DecodeLabels(labels3);
177 std::string test3_str(&test3_decoded[0], test3_decoded.length());
178 LOG(INFO) << "labels3=" << test3_str << "\n";
179 // Copy labels1 to a std::vector, renumbering the null char to match trainer2.
180 // Since Tensor Flow's CTC implementation insists on having the null be the
181 // last label, and we want to be compatible, null has to be renumbered when
182 // we add a class.
183 int null1 = trainer1.null_char();
184 int null2 = trainer2.null_char();
185 EXPECT_EQ(null1 + 1, null2);
186 std::vector<int> labels1_v(labels1.size());
187 for (unsigned i = 0; i < labels1.size(); ++i) {
188 if (labels1[i] == null1) {
189 labels1_v[i] = null2;
190 } else {
191 labels1_v[i] = labels1[i];
192 }
193 }
194 EXPECT_THAT(labels1_v, testing::ElementsAreArray(&labels2[0], labels2.size()));
195 // To make sure we we are not cheating somehow, we can now encode the Rupee
196 // symbol, which we could not do before.
197 EXPECT_FALSE(trainer1.EncodeString(kTestStringRupees, &labels1));
198 EXPECT_TRUE(trainer2.EncodeString(kTestStringRupees, &labels2));
199}

◆ TEST() [14/88]

tesseract::TEST ( NormstrngsTest  ,
AllScriptsRegtest   
)

Definition at line 183 of file normstrngs_test.cc.

183 {
184 // Tests some valid text in a large number of scripts, some of which were
185 // found to be rejected by an earlier version.
186 const std::vector<std::pair<std::string, std::string>> kScriptText(
187 {{"Arabic",
188 " فكان منهم علقمة بن قيس ، و إبراهيم النخعي ، و الأسود بن"
189 "توفي بالمدينة في هذه السنة وهو ابن مائة وعشرين سنة "
190 "مجموعه هیچ اثری در فنون هنر و ادب و ترجمه، تقدیم پیشگاه ارجمند "
191 "سازنده تاریخ نگاه میکرد و به اصطلاح انسان و فطرت انسانی را زیربنای"},
192 {"Armenian",
193 "անտիկ աշխարհի փիլիսոփաների կենսագրությունը, թե′ նրանց ուս-"
194 "պատրաստւում է դալ (բուլամա): Կովկասում կաթից նաև պատ-"
195 "Հոգաբարձութեան յղել այդ անձին յիմարութիւնը հաստա-"
196 "գծերը եւ միջագծերը կը համրուին վարէն վեր:"},
197 {"Bengali",
198 "এসে দাঁড়ায় দাও খানি উঁচিয়ে নিয়ে । ঝরনার স্বচ্ছ জলে প্রতিবিম্বিত "
199 "পাঠিয়ে, গোবিন্দ স্মরণ করে, নির্ভয়ে রওনা হয়েছিল। তাতে সে "
200 "সুলতার। মনে পড়ে বিয়ের সময় বাবা এদের বাড়ি থেকে ঘুরে "
201 "কিন্তু তারপর মাতৃহৃদয় কেমন করে আছে? কী"},
202 {"Cyrillic",
203 "достей, є ще нагороди й почесті, є хай і сумнівна, але слава, "
204 "вып., 96б). Параўн. найсвятший у 1 знач., насвятейший у 1 знач., "
205 "»Правді«, — гітлерівські окупанти винищували нижчі раси, після дру- "
206 "І знов майдан зачорнів од народу. Всередині чоло-"},
207 {"Devanagari",
208 "डा॰ नै हात्तीमाथि चढेर त्यो भएनेर आइपुगे। राजालाई देखी "
209 "बाबतीत लिहिणे ही एक मोठीच जबाबदारी आहे. काकासाहेबांच्या कार्याचा "
210 "प्रबंध, आधोगिक प्रबंध तथा बैंकिंग एवम वाणिज्य आदि विषयों में "
211 "चित्रकृती दिल्या. शंभराहून अधिक देश आज आपापले चित्रपट निर्माण करीत"},
212 {"Greek",
213 "Μέσα ένα τετράδιο είχα στριμώξει το πρώτο "
214 "νον αξίως τού ευαγγελίου τού χριστού πολιτεύεσθε, ίνα "
215 "οὐδεμία ὑπ' αὐτοῦ μνεία γίνεται τῶν οἰκείων χωρίων. "
216 "είτα την φάσιν αυτήν ην ούτος εποιήσατο κατά του Μίκω-"},
217 {"Gujarati",
218 "ઉપહારગૃહે ને નાટ્યસ્થળે આ એ જ તેલ કડકડતું "
219 "શકી. ભાવવધારો અટકાવી નથી શકી અને બેકારીને "
220 "ત્યાં વાંકુથી પાછે આવ્યો, ચોરીનો માલ સોંપવા ! "
221 "કહી. એણે રેશમના કપડામાં વીંટી રાખેલ કુંવરીની છબી"},
222 {"Gurmukhi",
223 "ਯਾਦ ਰਹੇ ਕਿ ‘ਨਫਰਤ ’ ਦਾ ਵਿਸ਼ਾ ਕ੍ਰਾਤੀ ਨਹੀ ਹੈ ਅਤੇ ਕਵੀ ਦੀ ਇਹ "
224 "ਮਹਾਂ ਨੰਦਾ ਕੋਲ ਇਕ ਚੀਜ਼ ਸੀ ਉਹ ਸੀ ਸਚ, ਕੋਰਾ ਸਚ, ਬੇਧਤ੍ਰਕ ਕਹਿੳ "
225 "ਭੂਰਾ ਸਾਨੂੰ ਥੜਾ ਚੰਗਾ ਲਗਦਾ ਸੀ । ਉਸ ਦਾ ਇਕ ਪੈਰ ਜਨਮ ਤੋ "
226 "ਨੂੰ ਇਹ ਅਧਿਕਾਰ ਦਿੱਤਾ ਕਿ ਉਹ ਸਿੱਖ ਵਿਰੋਧ ਦਾ ਸੰਗਠਨ ਕਰੇ ਅਤੇ 3 ਸਤੰਬਰ,"},
227 {"Hangul",
228 "로 들어갔다. 이대통령은 아이젠하워 대통령의 뒷모습을 보면서 "
229 "그것뿐인 줄 아요? 노름도 했다 캅니다. 빌어묵을 놈이 그러 "
230 "의 가장 과학적 태도이며, 우리 역사를 가장 정확하게 학습할 수 있는 "
231 "마르크스 레"
232 "각하는 그는 그들의 식사보장을 위해 때때로 집에"},
233 {"HanS",
234 "大凡世界上的先生可 分 三 种: 第一种只会教书, 只会拿一 "
235 "书像是探宝一样,在茶叶店里我买过西湖龙井﹑黄山毛峰﹑福建的铁观音﹑大红"
236 " "
237 "持 “左” 倾冒险主义的干部,便扣上 “富农 "
238 "笑说:“我听说了,王总工程师也跟我说过了,只是工作忙,谁"},
239 {"HanT",
240 "叁、 銀行資產管理的群組分析模式 "
241 "民國六十三年,申請就讀台灣大學歷史研究所,並從事著述,"
242 "質言之﹐在社會結構中﹐性質﹑特徵﹑地位相類似的一羣人﹐由於 "
243 "董橋,一九四二年生,福建晉江人,國立成功大學外"},
244 {"Hebrew",
245 " אֵ-לִי, אֵ-לִי, כֵּיַצד מְטַפְּסִים בְּקִירוֹת שֶׁל זְכוּכִי"
246 " הראשון חוצה אותי שוב. אני בסיבוב הרביעי, הוא בטח מתחיל את"
247 " ווערטער געהאט, אבער דער עיקר איז ניט דאָס וואָרט, נאָר"
248 " על גחלת היהדות המקורית בעירך, נתת צביון ואופי מיוחד"},
249 {"Japanese",
250 "は異民族とみなされていた。楚の荘王(前613〜前 "
251 "を詳細に吟味する。実際の治療活動の領域は便宜上、(1) 障害者 "
252 "困難性は多角企業の場合原則として部門別に判断されている.). "
253 "☆ご希望の団体には見本をお送りします"},
254 {"Kannada",
255 "ಕೂಡ ಯುದ್ಧ ಮಾಡಿ ಜಯಪಡೆ. ನಂತರ ನಗರದೊಳಕ್ಕೆ ನಡೆ ಇದನ್ನು "
256 "ಅಸಹ್ಯದೃಶ್ಯ ಯಾರಿಗಾದರೂ ನಾಚಿಕೆತರುವಂತಹದಾಗಿದೆ. ಆರೋಗ್ಯ ದೃಷ್ಟಿ "
257 "ಯಾಗಲಿ, ಮೋಹನನಾಗಲಿ ಇಂಥ ಬಿಸಿಲಿನಲ್ಲಿ ಎಂದೂ ಬಹಳ ಹೊತ್ತು "
258 "\"ಇದೆ...ಖಂಡಿತಾ ಇದೆ\" ಅಂದ ಮನಸ್ಸಿನಲ್ಲಿಯೇ ವಂದಿಸುತ್ತಾ,"},
259 {"Khmer",
260 "សិតសក់និងផ្លាស់សម្លៀកបំពាក់ពេលយប់ចេញ។ "
261 "និយាយអំពីនគរនេះ ប្រាប់ដល់លោកទាំងមូលឲ្យដឹងច្បាស់លាស់អំពី "
262 "កន្លះកាថាសម្រាប់ទន្ទេញឲ្យងាយចាំ បោះពុម្ពនៅក្នុងទ្រង់ទ្រាយបច្ចុប្បន្ន "
263 "ឯកសារនេះបានផ្សព្វផ្សាយនៅក្នុងសន្និសីទ"},
264 {"Lao",
265 "ເອີຍ ! ຟັງສຽງຟ້າມັນຮ້ອງຮ່ວນ ມັນດັງໄກໆ ເອີຍ "
266 "ໄດລຽງດູລາວມາດວບຄວາມລາບາກຫລາຍ; "
267 "ບາງໄດ້ ເຈົ້າລອງສູ້ບໍ່ໄດ້ຈຶ່ງຫນີລົງມາວຽງຈັນ. "
268 "ລົບອອກຈາກ 3 ເຫລືອ 1, ຂ້ອຍຂຽນ 1 (1)"},
269 {"Latin",
270 "režisoru, palīdzēja to manu domīgo, kluso Dzejas metru ielikt "
271 "Ešte nedávno sa chcel mladý Novomeský „liečiť” "
272 "tiivisia kysymyksiä, mistä seuraa, että spekula- | don luonteesta "
273 "Grabiel Sanchez, yang bertani selama 120 tahun meninggal"},
274 {"Malayalam",
275 "അമൂർത്തചിത്രമായിരിക്കും. ഛേ! ആ വീട്ടിലേക്ക് അവളൊന്നിച്ച് പോകേണ്ടതാ "
276 "മൃഗങ്ങൾക്ക് എന്തെക്കിലും പറ്റിയാൽ മാത്രം ഞാനതു "
277 "വെലക്ക് വേണമെങ്കിൽ തരാം. എന്തോ തരും? പറ. "
278 "എല്ലാം കഴിഞ്ഞ് സീനിയറിന്റെ അടുത്തു ചെന്ന് കാൽതൊട്ട"},
279 {"Tamil",
280 "பொருத்தமாகப் பாடினாள் நம் ஔவைப் பாட்டி. காவிரி "
281 "உள்ளடக்கி நிற்பது விநோத வார்த்தையின் அஃறிணை "
282 "சூரிய கிரஹண சமயத்தில் குருக்ஷேத்திரம் செல்வது "
283 "காலங்களில் வெளியே போகும்பொழுது, 'ஸார்', 'ஸார்',"},
284 {"Telugu",
285 "1892లో ఆమె 10వ సంవత్సరంలో గుంటూరు తాలూకా వేములాపాడు "
286 "ఫండ్స్ చట్టము'నందు చేయబడెను. తరువాత క్రీ. శ. "
287 "సంచారము చేయును. మీరు ఇప్పుడే కాళకాలయమునకు "
288 "ఎంతటి సరళమైన భాషలో వ్రాశాడో విశదమవుతుంది. పైగా ఆనాటి భాష"},
289 {"Thai",
290 "อ้อ! กับนัง....แม่ยอดพระกลิ่น นั่นเอง ! หรับก็ย่อมจะรู้โดยชัดเจนว่า "
291 "ถ้าตราบใดยังมีเรือปืนอยู่ใกล้ ๆ แล้ว ตราบนั้น "
292 "พระดำรินี้ ที่มีคตีทำกรวยหมากและธูปเทียน "
293 "อันยานมีเรือเปนต้นฃ้ามยาก ฯ เพราะว่าแม่น้ำนั่นมีน้ำใสยิ่ง แม้เพียง"},
294 {"Vietnamese",
295 "vợ đến tai mụ hung thần Xăng-tô- mê-a. Mụ vô cùng "
296 "chiếc xe con gấu chạy qua nhà. Nhưng thỉnh thoảng "
297 "hòa hoãn với người Pháp để cho họ được dựng một ngôi nhà thờ nhỏ bằng "
298 "Cặp câu đói súc tích mà sâu sắc, là lời chúc lời"}});
299
300 for (const auto &p : kScriptText) {
301 std::string normalized;
304 tesseract::GraphemeNorm::kNormalize, p.second.c_str(), &normalized))
305 << "Script=" << p.first << " text=" << p.second;
306 }
307}
const char * p

◆ TEST() [15/88]

tesseract::TEST ( NormstrngsTest  ,
BasicText   
)

Definition at line 31 of file normstrngs_test.cc.

31 {
32 const char *kBasicText = "AbCd Ef";
33 std::string result;
34 EXPECT_TRUE(NormalizeUTF8String(UnicodeNormMode::kNFKC, OCRNorm::kNormalize,
35 GraphemeNorm::kNormalize, kBasicText, &result));
36 EXPECT_STREQ(kBasicText, result.c_str());
37}

◆ TEST() [16/88]

tesseract::TEST ( NormstrngsTest  ,
DandaOK   
)

Definition at line 176 of file normstrngs_test.cc.

176 {
177 std::string str = "\u0964"; // Single danda.
178 ExpectGraphemeModeResults(str, UnicodeNormMode::kNFC, 1, 1, 1, str);
179 str = "\u0965"; // Double danda.
180 ExpectGraphemeModeResults(str, UnicodeNormMode::kNFC, 1, 1, 1, str);
181}
void ExpectGraphemeModeResults(const std::string &str, UnicodeNormMode u_mode, int unicode_count, int glyph_count, int grapheme_count, const std::string &target_str)

◆ TEST() [17/88]

tesseract::TEST ( NormstrngsTest  ,
DetectsCorrectText   
)

Definition at line 89 of file normstrngs_test.cc.

89 {
90 std::string chars;
91 EXPECT_TRUE(NormalizeUTF8String(UnicodeNormMode::kNFKC, OCRNorm::kNone, GraphemeNorm::kNormalize,
92 kEngText, &chars));
93 EXPECT_STREQ(kEngText, chars.c_str());
94
95 EXPECT_TRUE(NormalizeUTF8String(UnicodeNormMode::kNFKC, OCRNorm::kNone, GraphemeNorm::kNormalize,
96 kHinText, &chars))
97 << "Incorrect text: '" << kHinText << "'";
98 EXPECT_STREQ(kHinText, chars.c_str());
99
100 EXPECT_TRUE(NormalizeUTF8String(UnicodeNormMode::kNFKC, OCRNorm::kNone, GraphemeNorm::kNormalize,
101 kKorText, &chars));
102 EXPECT_STREQ(kKorText, chars.c_str());
103}
const char kHinText[]
const char kEngText[]
const char kKorText[]

◆ TEST() [18/88]

tesseract::TEST ( NormstrngsTest  ,
DetectsIncorrectText   
)

Definition at line 105 of file normstrngs_test.cc.

105 {
106 for (auto &kBadlyFormedHinWord : kBadlyFormedHinWords) {
107 EXPECT_FALSE(NormalizeUTF8String(UnicodeNormMode::kNFKC, OCRNorm::kNone,
108 GraphemeNorm::kNormalize, kBadlyFormedHinWord, nullptr))
109 << kBadlyFormedHinWord;
110 }
111 for (auto &kBadlyFormedThaiWord : kBadlyFormedThaiWords) {
112 EXPECT_FALSE(NormalizeUTF8String(UnicodeNormMode::kNFKC, OCRNorm::kNone,
113 GraphemeNorm::kNormalize, kBadlyFormedThaiWord, nullptr))
114 << kBadlyFormedThaiWord;
115 }
116}
const char * kBadlyFormedHinWords[]
const char * kBadlyFormedThaiWords[]

◆ TEST() [19/88]

tesseract::TEST ( NormstrngsTest  ,
DigitOK   
)

Definition at line 171 of file normstrngs_test.cc.

171 {
172 std::string str = "\u0cea"; // Digit 4.
173 ExpectGraphemeModeResults(str, UnicodeNormMode::kNFC, 1, 1, 1, str);
174}

◆ TEST() [20/88]

tesseract::TEST ( NormstrngsTest  ,
FullwidthToHalfwidth   
)

Definition at line 389 of file normstrngs_test.cc.

389 {
390 // U+FF21 -> U+0041 (Latin capital letter A)
391 EXPECT_EQ('A', FullwidthToHalfwidth(0xFF21));
392 // U+FF05 -> U+0025 (percent sign)
393 EXPECT_EQ('%', FullwidthToHalfwidth(0xFF05));
394 // U+FFE6 -> U+20A9 (won sign)
395 EXPECT_EQ(0x20A9, FullwidthToHalfwidth(0xFFE6));
396
397#if defined(MISSING_CODE) && defined(INCLUDE_TENSORFLOW)
398 // Skipped because of missing UniLib::FullwidthToHalfwidth.
399 const int32_t kMinUnicodeValue = 33;
400 const int32_t kMaxUnicodeValue = 0x10FFFF;
401 for (int32_t ch = kMinUnicodeValue; ch <= kMaxUnicodeValue; ++ch) {
402 if (!IsValidCodepoint(ch))
403 continue;
404 char text[80];
405 snprintf(text, sizeof(text), "Failed at U+%x", ch);
406 SCOPED_TRACE(text);
407 std::string str = EncodeAsUTF8(ch);
408 const std::string expected_half_str =
409 UniLib::FullwidthToHalfwidth(str.c_str(), str.length(), true);
410 EXPECT_EQ(expected_half_str, EncodeAsUTF8(FullwidthToHalfwidth(ch)));
411 }
412#endif
413}
#define SCOPED_TRACE(message)
Definition: gtest.h:2281
char32 FullwidthToHalfwidth(const char32 ch)
Definition: normstrngs.cpp:282

◆ TEST() [21/88]

tesseract::TEST ( NormstrngsTest  ,
IsInterchangeValid   
)

Definition at line 353 of file normstrngs_test.cc.

353 {
354#ifdef INCLUDE_TENSORFLOW
355 const int32_t kMinUnicodeValue = 33;
356 const int32_t kMaxUnicodeValue = 0x10FFFF;
357 for (int32_t ch = kMinUnicodeValue; ch <= kMaxUnicodeValue; ++ch) {
358 char text[80];
359 snprintf(text, sizeof(text), "Failed at U+%x", ch);
360 SCOPED_TRACE(text);
362 }
363#else
364 GTEST_SKIP();
365#endif
366}
#define GTEST_SKIP()
Definition: gtest.h:1889
bool IsInterchangeValid(const char32 ch)
Definition: normstrngs.cpp:261
bool IsInterchangeValid(char32 c)
Definition: unilib.cc:33

◆ TEST() [22/88]

tesseract::TEST ( NormstrngsTest  ,
IsInterchangeValid7BitAscii   
)

Definition at line 370 of file normstrngs_test.cc.

370 {
371#if defined(MISSING_CODE) && defined(INCLUDE_TENSORFLOW)
372 const int32_t kMinUnicodeValue = 33;
373 const int32_t kMaxUnicodeValue = 0x10FFFF;
374 for (int32_t ch = kMinUnicodeValue; ch <= kMaxUnicodeValue; ++ch) {
375 char text[80];
376 snprintf(text, sizeof(text), "Failed at U+%x", ch);
377 SCOPED_TRACE(text);
378 std::string str = EncodeAsUTF8(ch);
380 }
381#else
382 // Skipped because of missing UniLib::IsInterchangeValid7BitAscii.
383 GTEST_SKIP();
384#endif
385}
bool IsInterchangeValid7BitAscii(const char32 ch)
Definition: normstrngs.cpp:276

◆ TEST() [23/88]

tesseract::TEST ( NormstrngsTest  ,
IsWhitespace   
)

Definition at line 309 of file normstrngs_test.cc.

309 {
310 // U+0020 is whitespace
315 // U+2000 through U+200A
316 for (char32 ch = 0x2000; ch <= 0x200A; ++ch) {
317 char text[80];
318 snprintf(text, sizeof(text), "Failed at U+%x", ch);
319 SCOPED_TRACE(text);
321 }
322 // U+3000 is whitespace
323 EXPECT_TRUE(IsWhitespace(0x3000));
324 // ZWNBSP is not considered a space.
325 EXPECT_FALSE(IsWhitespace(0xFEFF));
326}

◆ TEST() [24/88]

tesseract::TEST ( NormstrngsTest  ,
JoinersStayInArabic   
)

Definition at line 165 of file normstrngs_test.cc.

165 {
166 std::string str = "\u0628\u200c\u0628\u200d\u0628";
167 // Returns true, string untouched.
168 ExpectGraphemeModeResults(str, UnicodeNormMode::kNFC, 5, 5, 2, str);
169}

◆ TEST() [25/88]

tesseract::TEST ( NormstrngsTest  ,
LigatureText   
)

Definition at line 39 of file normstrngs_test.cc.

39 {
40 const char *kTwoByteLigText = "ij"; // U+0133 (ij) -> ij
41 std::string result;
42 EXPECT_TRUE(NormalizeUTF8String(UnicodeNormMode::kNFKC, OCRNorm::kNormalize,
43 GraphemeNorm::kNormalize, kTwoByteLigText, &result));
44 EXPECT_STREQ("ij", result.c_str());
45
46 const char *kThreeByteLigText = "finds"; // U+FB01 (fi) -> fi
47 EXPECT_TRUE(NormalizeUTF8String(UnicodeNormMode::kNFKC, OCRNorm::kNormalize,
48 GraphemeNorm::kNormalize, kThreeByteLigText, &result));
49 EXPECT_STREQ("finds", result.c_str());
50}

◆ TEST() [26/88]

tesseract::TEST ( NormstrngsTest  ,
NoLonelyJoiners   
)

Definition at line 127 of file normstrngs_test.cc.

127 {
128 std::string str = "x\u200d\u0d06\u0d34\u0d02";
129 std::vector<std::string> glyphs;
130 // Returns true, but the joiner is gone.
131 EXPECT_TRUE(NormalizeCleanAndSegmentUTF8(UnicodeNormMode::kNFC, OCRNorm::kNone,
132 GraphemeNormMode::kCombined, true, str.c_str(), &glyphs))
134 EXPECT_EQ(glyphs.size(), 3);
135 EXPECT_EQ(glyphs[0], std::string("x"));
136 EXPECT_EQ(glyphs[1], std::string("\u0d06"));
137 EXPECT_EQ(glyphs[2], std::string("\u0d34\u0d02"));
138}

◆ TEST() [27/88]

tesseract::TEST ( NormstrngsTest  ,
NoLonelyJoinersNonAlpha   
)

Definition at line 153 of file normstrngs_test.cc.

153 {
154 std::string str = "\u200d+\u200c\u200d";
155 // Returns true, but the joiners are gone.
156 ExpectGraphemeModeResults(str, UnicodeNormMode::kNFC, 1, 1, 1, std::string("+"));
157 str = "\u200d\u200c\u200d";
158 // Without the plus, the string is invalid.
159 std::string result;
160 EXPECT_FALSE(NormalizeUTF8String(UnicodeNormMode::kNFC, OCRNorm::kNone, GraphemeNorm::kNormalize,
161 str.c_str(), &result))
162 << PrintString32WithUnicodes(result);
163}

◆ TEST() [28/88]

tesseract::TEST ( NormstrngsTest  ,
NoLonelyJoinersPlus   
)

Definition at line 140 of file normstrngs_test.cc.

140 {
141 std::string str = "\u0d2a\u200d+\u0d2a\u0d4b";
142 std::vector<std::string> glyphs;
143 // Returns true, but the joiner is gone.
144 EXPECT_TRUE(NormalizeCleanAndSegmentUTF8(UnicodeNormMode::kNFC, OCRNorm::kNone,
145 GraphemeNormMode::kCombined, true, str.c_str(), &glyphs))
147 EXPECT_EQ(glyphs.size(), 3);
148 EXPECT_EQ(glyphs[0], std::string("\u0d2a"));
149 EXPECT_EQ(glyphs[1], std::string("+"));
150 EXPECT_EQ(glyphs[2], std::string("\u0d2a\u0d4b"));
151}

◆ TEST() [29/88]

tesseract::TEST ( NormstrngsTest  ,
NonIndicTextDoesntBreakIndicRules   
)

Definition at line 118 of file normstrngs_test.cc.

118 {
119 std::string nonindic = "Here's some latin text.";
120 std::string dest;
121 EXPECT_TRUE(NormalizeUTF8String(UnicodeNormMode::kNFC, OCRNorm::kNone, GraphemeNorm::kNormalize,
122 nonindic.c_str(), &dest))
123 << PrintString32WithUnicodes(nonindic);
124 EXPECT_EQ(dest, nonindic);
125}

◆ TEST() [30/88]

tesseract::TEST ( NormstrngsTest  ,
OcrSpecificNormalization   
)

Definition at line 52 of file normstrngs_test.cc.

52 {
53 const char *kSingleQuoteText = "‘Hi"; // U+2018 (‘) -> U+027 (')
54 std::string result;
55 EXPECT_TRUE(NormalizeUTF8String(UnicodeNormMode::kNFKC, OCRNorm::kNormalize,
56 GraphemeNorm::kNormalize, kSingleQuoteText, &result));
57 EXPECT_STREQ("'Hi", result.c_str());
58
59 const char *kDoubleQuoteText = "“Hi"; // U+201C (“) -> U+022 (")
60 EXPECT_TRUE(NormalizeUTF8String(UnicodeNormMode::kNFKC, OCRNorm::kNormalize,
61 GraphemeNorm::kNormalize, kDoubleQuoteText, &result));
62 EXPECT_STREQ("\"Hi", result.c_str());
63
64 const char *kEmDash = "Hi—"; // U+2014 (—) -> U+02D (-)
65 EXPECT_TRUE(NormalizeUTF8String(UnicodeNormMode::kNFKC, OCRNorm::kNormalize,
66 GraphemeNorm::kNormalize, kEmDash, &result));
67 EXPECT_STREQ("Hi-", result.c_str());
68 // Without the ocr normalization, these changes are not made.
69 EXPECT_TRUE(NormalizeUTF8String(UnicodeNormMode::kNFKC, OCRNorm::kNone, GraphemeNorm::kNormalize,
70 kSingleQuoteText, &result));
71 EXPECT_STREQ(kSingleQuoteText, result.c_str());
72 EXPECT_TRUE(NormalizeUTF8String(UnicodeNormMode::kNFKC, OCRNorm::kNone, GraphemeNorm::kNormalize,
73 kDoubleQuoteText, &result));
74 EXPECT_STREQ(kDoubleQuoteText, result.c_str());
75 EXPECT_TRUE(NormalizeUTF8String(UnicodeNormMode::kNFKC, OCRNorm::kNone, GraphemeNorm::kNormalize,
76 kEmDash, &result));
77 EXPECT_STREQ(kEmDash, result.c_str());
78}

◆ TEST() [31/88]

tesseract::TEST ( NormstrngsTest  ,
SpanUTF8NotWhitespace   
)

Definition at line 335 of file normstrngs_test.cc.

335 {
336 const char kHinText[] = "पिताने विवाह";
337 const char kKorText[] = "이는 것으로 다시 넣을";
338 const char kMixedText[] = "والفكر 123 والصراع abc";
339
342 EXPECT_EQ(0, SpanUTF8NotWhitespace("\rabc"));
343 EXPECT_EQ(0, SpanUTF8NotWhitespace("\tabc"));
344 EXPECT_EQ(0, SpanUTF8NotWhitespace("\nabc"));
345 EXPECT_EQ(3, SpanUTF8NotWhitespace("abc def"));
349}
const char kMixedText[]
unsigned int SpanUTF8NotWhitespace(const char *text)
Definition: normstrngs.cpp:249

◆ TEST() [32/88]

tesseract::TEST ( NormstrngsTest  ,
SpanUTF8Whitespace   
)

Definition at line 328 of file normstrngs_test.cc.

328 {
329 EXPECT_EQ(4, SpanUTF8Whitespace(" \t\r\n"));
330 EXPECT_EQ(4, SpanUTF8Whitespace(" \t\r\nabc"));
331 EXPECT_EQ(0, SpanUTF8Whitespace("abc \t\r\nabc"));
333}

◆ TEST() [33/88]

tesseract::TEST ( OutputBufferTest  ,
WriteString   
)

Definition at line 26 of file fileio_test.cc.

26 {
27 const int kMaxBufSize = 128;
28 char buffer[kMaxBufSize];
29 for (char &i : buffer) {
30 i = '\0';
31 }
32 FILE *fp = tmpfile();
33 CHECK(fp != nullptr);
34
35 auto output = std::make_unique<OutputBuffer>(fp);
36 output->WriteString("Hello ");
37 output->WriteString("world!");
38
39 rewind(fp);
40 auto s = "Hello world!";
41 fread(buffer, strlen(s), 1, fp);
42 EXPECT_STREQ(s, buffer);
43}

◆ TEST() [34/88]

tesseract::TEST ( ParagraphsTest  ,
IndexPageTest   
)

Definition at line 724 of file paragraphs_test.cc.

724 {
726}
const TextAndModel kNewZealandIndex[]
constexpr size_t countof(T const (&)[N]) noexcept
Definition: serialis.h:34
void TestParagraphDetection(const TextAndModel *correct, int num_rows)

◆ TEST() [35/88]

◆ TEST() [36/88]

tesseract::TEST ( ParagraphsTest  ,
NotDistractedBySourceCode   
)

Definition at line 603 of file paragraphs_test.cc.

◆ TEST() [37/88]

tesseract::TEST ( ParagraphsTest  ,
NotOverlyAggressiveWithBlockQuotes   
)

Definition at line 687 of file paragraphs_test.cc.

687 {
689}
const TextAndModel kOldManAndSea[]

◆ TEST() [38/88]

tesseract::TEST ( ParagraphsTest  ,
TestComplexPage1   
)

Definition at line 408 of file paragraphs_test.cc.

408 {
410}
const TextAndModel kComplexPage1[]

◆ TEST() [39/88]

tesseract::TEST ( ParagraphsTest  ,
TestComplexPage2   
)

Definition at line 451 of file paragraphs_test.cc.

451 {
453}
const TextAndModel kComplexPage2[]

◆ TEST() [40/88]

tesseract::TEST ( ParagraphsTest  ,
TestCrownParagraphDetection   
)

Definition at line 275 of file paragraphs_test.cc.

◆ TEST() [41/88]

tesseract::TEST ( ParagraphsTest  ,
TestFewCluesWithCrown   
)

Definition at line 258 of file paragraphs_test.cc.

◆ TEST() [42/88]

tesseract::TEST ( ParagraphsTest  ,
TestRightAlignedParagraph   
)

Definition at line 342 of file paragraphs_test.cc.

342 {
344}
const TextAndModel kRightAligned[]

◆ TEST() [43/88]

tesseract::TEST ( ParagraphsTest  ,
TestSimpleParagraphDetection   
)

Definition at line 243 of file paragraphs_test.cc.

◆ TEST() [44/88]

tesseract::TEST ( ParagraphsTest  ,
TestSingleFullPageContinuation   
)

Definition at line 315 of file paragraphs_test.cc.

315 {
317 int num_rows = countof(kSingleFullPageContinuation);
318 std::vector<RowInfo> row_infos;
319 std::vector<PARA *> row_owners;
320 PARA_LIST paragraphs;
321 std::vector<ParagraphModel *> models;
322 models.push_back(new ParagraphModel(kLeft, 0, 20, 0, 10));
323 MakeAsciiRowInfos(correct, num_rows, &row_infos);
324 tesseract::DetectParagraphs(3, &row_infos, &row_owners, &paragraphs, &models);
325 EvaluateParagraphDetection(correct, num_rows, row_owners);
326 for (auto *model : models) {
327 delete model;
328 }
329}
const TextAndModel kSingleFullPageContinuation[]
void DetectParagraphs(int debug_level, std::vector< RowInfo > *row_infos, std::vector< PARA * > *row_owners, PARA_LIST *paragraphs, std::vector< ParagraphModel * > *models)
const ParagraphJustification kLeft
void EvaluateParagraphDetection(const TextAndModel *correct, int n, const std::vector< PARA * > &detector_output)
void MakeAsciiRowInfos(const TextAndModel *row_infos, int n, std::vector< RowInfo > *output)

◆ TEST() [45/88]

tesseract::TEST ( ParagraphsTest  ,
TestSplitsOutLeaderLines   
)

Definition at line 568 of file paragraphs_test.cc.

568 {
570}
const TextAndModel kTableOfContents[]

◆ TEST() [46/88]

tesseract::TEST ( ParagraphsTest  ,
TestStrayLineInBlock   
)

Definition at line 470 of file paragraphs_test.cc.

470 {
472}
const TextAndModel kSubtleCrown[]

◆ TEST() [47/88]

tesseract::TEST ( ParagraphsTest  ,
TestSubtleCrown   
)

Definition at line 466 of file paragraphs_test.cc.

◆ TEST() [48/88]

tesseract::TEST ( ParagraphsTest  ,
TestTinyParagraphs   
)

Definition at line 364 of file paragraphs_test.cc.

364 {
366}
const TextAndModel kTinyParagraphs[]

◆ TEST() [49/88]

tesseract::TEST ( ParagraphsTest  ,
TestUnlvInsurance   
)

Definition at line 543 of file paragraphs_test.cc.

543 {
545}
const TextAndModel kUnlvRep3AO[]

◆ TEST() [50/88]

tesseract::TEST ( ParagraphsText  ,
TestRealFlushLeftParagraphs   
)

Definition at line 294 of file paragraphs_test.cc.

◆ TEST() [51/88]

tesseract::TEST ( QRSequenceGenerator  ,
GetBinaryReversedInteger   
)

Definition at line 30 of file qrsequence_test.cc.

30 {
31 const int kRangeSize = 8;
32 TestableQRSequenceGenerator generator(kRangeSize);
33 int reversed_vals[kRangeSize] = {0, 4, 2, 6, 1, 5, 3, 7};
34 for (int i = 0; i < kRangeSize; ++i) {
35 EXPECT_EQ(reversed_vals[i], generator.GetBinaryReversedInteger(i));
36 }
37}

◆ TEST() [52/88]

tesseract::TEST ( QuickTest  ,
ClassicProgressReporting   
)

Definition at line 148 of file progress_test.cc.

148 {
149 ClassicProgressTester(TESTING_DIR "/phototest.tif", TESSDATA_DIR "_fast", "eng");
150}
void ClassicProgressTester(const char *imgname, const char *tessdatadir, const char *lang)

◆ TEST() [53/88]

tesseract::TEST ( QuickTest  ,
NewProgressReporting   
)

Definition at line 152 of file progress_test.cc.

152 {
153 NewProgressTester(TESTING_DIR "/phototest.tif", TESSDATA_DIR "_fast", "eng");
154}
void NewProgressTester(const char *imgname, const char *tessdatadir, const char *lang)

◆ TEST() [54/88]

tesseract::TEST ( TesseractInstanceTest  ,
TestMultipleTessInstances   
)

Definition at line 313 of file baseapi_test.cc.

313 {
314 int num_langs = 0;
315 while (langs[num_langs] != nullptr) {
316 ++num_langs;
317 }
318
319 const std::string kTessdataPath = TESSDATA_DIR;
320
321 // Preload images and verify that OCR is correct on them individually.
322 std::vector<Image > pix(num_langs);
323 for (int i = 0; i < num_langs; ++i) {
324 std::string tracestring = "Single instance test with lang = ";
325 tracestring += langs[i];
326 SCOPED_TRACE(tracestring);
327 std::string path = file::JoinPath(TESTING_DIR, image_files[i]);
328 pix[i] = pixRead(path.c_str());
329 QCHECK(pix[i] != nullptr) << "Could not read " << path;
330
332 EXPECT_EQ(0, tess.Init(kTessdataPath.c_str(), langs[i]));
333 std::string ocr_result = GetCleanedTextResult(&tess, pix[i]);
334 EXPECT_STREQ(gt_text[i], ocr_result.c_str());
335 }
336
337 // Process the images in all pairwise combinations of associated languages.
338 std::string ocr_result[2];
339 for (int i = 0; i < num_langs; ++i) {
340 for (int j = i + 1; j < num_langs; ++j) {
341 tesseract::TessBaseAPI tess1, tess2;
342 tess1.Init(kTessdataPath.c_str(), langs[i]);
343 tess2.Init(kTessdataPath.c_str(), langs[j]);
344
345 ocr_result[0] = GetCleanedTextResult(&tess1, pix[i]);
346 ocr_result[1] = GetCleanedTextResult(&tess2, pix[j]);
347
348 EXPECT_FALSE(strcmp(gt_text[i], ocr_result[0].c_str()) ||
349 strcmp(gt_text[j], ocr_result[1].c_str()))
350 << "OCR failed on language pair " << langs[i] << "-" << langs[j];
351 }
352 }
353
354 for (int i = 0; i < num_langs; ++i) {
355 pix[i].destroy();
356 }
357}
std::string GetCleanedTextResult(tesseract::TessBaseAPI *tess, Image pix)
Definition: baseapi_test.cc:45
int Init(const char *datapath, const char *language, OcrEngineMode mode, char **configs, int configs_size, const std::vector< std::string > *vars_vec, const std::vector< std::string > *vars_values, bool set_only_non_debug_params)
Definition: baseapi.cpp:368

◆ TEST() [55/88]

tesseract::TEST ( TesseractInstanceTest  ,
TestMultipleTessInstanceVariables   
)

Definition at line 360 of file baseapi_test.cc.

360 {
361 std::string illegal_name = "an_illegal_name";
362 std::string langs[2] = {"eng", "hin"};
363 std::string int_param_name = "tessedit_pageseg_mode";
364 int int_param[2] = {1, 2};
365 std::string int_param_str[2] = {"1", "2"};
366 std::string bool_param_name = "tessedit_ambigs_training";
367 bool bool_param[2] = {false, true};
368 std::string bool_param_str[2] = {"F", "T"};
369 std::string str_param_name = "tessedit_char_blacklist";
370 std::string str_param[2] = {"abc", "def"};
371 std::string double_param_name = "segment_penalty_dict_frequent_word";
372 std::string double_param_str[2] = {"0.01", "2"};
373 double double_param[2] = {0.01, 2};
374
375 const std::string kTessdataPath = TESSDATA_DIR;
376
377 tesseract::TessBaseAPI tess1, tess2;
378 for (int i = 0; i < 2; ++i) {
379 tesseract::TessBaseAPI *api = (i == 0) ? &tess1 : &tess2;
380 api->Init(kTessdataPath.c_str(), langs[i].c_str());
381 api->SetVariable(illegal_name.c_str(), "none");
382 api->SetVariable(int_param_name.c_str(), int_param_str[i].c_str());
383 api->SetVariable(bool_param_name.c_str(), bool_param_str[i].c_str());
384 api->SetVariable(str_param_name.c_str(), str_param[i].c_str());
385 api->SetVariable(double_param_name.c_str(), double_param_str[i].c_str());
386 }
387 for (int i = 0; i < 2; ++i) {
388 tesseract::TessBaseAPI *api = (i == 0) ? &tess1 : &tess2;
389 EXPECT_FALSE(api->GetStringVariable(illegal_name.c_str()));
390 int intvar;
391 EXPECT_TRUE(api->GetIntVariable(int_param_name.c_str(), &intvar));
392 EXPECT_EQ(int_param[i], intvar);
393 bool boolvar;
394 EXPECT_TRUE(api->GetBoolVariable(bool_param_name.c_str(), &boolvar));
395 EXPECT_EQ(bool_param[i], boolvar);
396 EXPECT_STREQ(str_param[i].c_str(), api->GetStringVariable(str_param_name.c_str()));
397 double doublevar;
398 EXPECT_TRUE(api->GetDoubleVariable(double_param_name.c_str(), &doublevar));
399 EXPECT_EQ(double_param[i], doublevar);
400 }
401}
bool GetIntVariable(const char *name, int *value) const
Definition: baseapi.cpp:294
bool SetVariable(const char *name, const char *value)
Definition: baseapi.cpp:279
const char * GetStringVariable(const char *name) const
Definition: baseapi.cpp:314
bool GetBoolVariable(const char *name, bool *value) const
Definition: baseapi.cpp:304
bool GetDoubleVariable(const char *name, double *value) const
Definition: baseapi.cpp:320

◆ TEST() [56/88]

tesseract::TEST ( UnicharsetTest  ,
Basics   
)

Definition at line 29 of file unicharset_test.cc.

29 {
30 // This test verifies basic insertion, unichar_to_id, and encode.
31 UNICHARSET u;
32 u.unichar_insert("a");
33 EXPECT_EQ(u.size(), 4);
34 u.unichar_insert("f");
35 EXPECT_EQ(u.size(), 5);
36 u.unichar_insert("i");
37 EXPECT_EQ(u.size(), 6);
38 // The fi ligature is NOT added because it can be encoded with a cleanup as f
39 // then i.
40 u.unichar_insert("\ufb01");
41 EXPECT_EQ(u.size(), 6);
42 u.unichar_insert("e");
43 EXPECT_EQ(u.size(), 7);
44 u.unichar_insert("n");
45 EXPECT_EQ(u.size(), 8);
46 EXPECT_EQ(u.unichar_to_id("f"), 4);
47 EXPECT_EQ(u.unichar_to_id("i"), 5);
48 // The fi ligature has no valid id.
49 EXPECT_EQ(u.unichar_to_id("\ufb01"), INVALID_UNICHAR_ID);
50 // The fi pair has no valid id.
51 EXPECT_EQ(u.unichar_to_id("fi"), INVALID_UNICHAR_ID);
52 std::vector<int> labels;
53 EXPECT_TRUE(u.encode_string("affine", true, &labels, nullptr, nullptr));
54 std::vector<int> v(&labels[0], &labels[0] + labels.size());
55 EXPECT_THAT(v, ElementsAreArray({3, 4, 4, 5, 7, 6}));
56 // With the fi ligature encoding fails without a pre-cleanup.
57 std::string lig_str = "af\ufb01ne";
58 EXPECT_FALSE(u.encode_string(lig_str.c_str(), true, &labels, nullptr, nullptr));
59 lig_str = u.CleanupString(lig_str.c_str());
60 EXPECT_TRUE(u.encode_string(lig_str.c_str(), true, &labels, nullptr, nullptr));
61 v = std::vector<int>(&labels[0], &labels[0] + labels.size());
62 EXPECT_THAT(v, ElementsAreArray({3, 4, 4, 5, 7, 6}));
63}
bool encode_string(const char *str, bool give_up_on_failure, std::vector< UNICHAR_ID > *encoding, std::vector< char > *lengths, unsigned *encoded_length) const
Definition: unicharset.cpp:239
static std::string CleanupString(const char *utf8_str)
Definition: unicharset.h:265

◆ TEST() [57/88]

tesseract::TEST ( UnicharsetTest  ,
Multibyte   
)

Definition at line 65 of file unicharset_test.cc.

65 {
66 // This test verifies basic insertion, unichar_to_id, and encode.
67 // The difference from Basic above is that now we are testing multi-byte
68 // unicodes instead of single byte.
69 UNICHARSET u;
70 // Insert some Arabic letters.
71 u.unichar_insert("\u0627");
72 EXPECT_EQ(u.size(), 4);
73 u.unichar_insert("\u062c");
74 EXPECT_EQ(u.size(), 5);
75 u.unichar_insert("\u062f");
76 EXPECT_EQ(u.size(), 6);
77 u.unichar_insert("\ufb01"); // fi ligature is added as fi pair.
78 EXPECT_EQ(u.size(), 7);
79 u.unichar_insert("\u062b");
80 EXPECT_EQ(u.size(), 8);
81 u.unichar_insert("\u0635");
82 EXPECT_EQ(u.size(), 9);
83 EXPECT_EQ(u.unichar_to_id("\u0627"), 3);
84 EXPECT_EQ(u.unichar_to_id("\u062c"), 4);
85 // The first two bytes of this string is \u0627, which matches id 3;
86 EXPECT_EQ(u.unichar_to_id("\u0627\u062c", 2), 3);
87 EXPECT_EQ(u.unichar_to_id("\u062f"), 5);
88 // Individual f and i are not present, but they are there as a pair.
89 EXPECT_EQ(u.unichar_to_id("f"), INVALID_UNICHAR_ID);
90 EXPECT_EQ(u.unichar_to_id("i"), INVALID_UNICHAR_ID);
91 EXPECT_EQ(u.unichar_to_id("fi"), 6);
92 // The fi ligature is findable.
93 EXPECT_EQ(u.unichar_to_id("\ufb01"), 6);
94 std::vector<int> labels;
96 u.encode_string("\u0627\u062c\u062c\u062f\u0635\u062b", true, &labels, nullptr, nullptr));
97 std::vector<int> v(&labels[0], &labels[0] + labels.size());
98 EXPECT_THAT(v, ElementsAreArray({3, 4, 4, 5, 8, 7}));
99 // With the fi ligature the fi is picked out.
100 std::vector<char> lengths;
101 unsigned encoded_length;
102 std::string src_str = "\u0627\u062c\ufb01\u0635\u062b";
103 // src_str has to be pre-cleaned for lengths to be correct.
104 std::string cleaned = u.CleanupString(src_str.c_str());
105 EXPECT_TRUE(u.encode_string(cleaned.c_str(), true, &labels, &lengths, &encoded_length));
106 EXPECT_EQ(encoded_length, cleaned.size());
107 std::string len_str(&lengths[0], lengths.size());
108 EXPECT_STREQ(len_str.c_str(), "\002\002\002\002\002");
109 v = std::vector<int>(&labels[0], &labels[0] + labels.size());
110 EXPECT_THAT(v, ElementsAreArray({3, 4, 6, 8, 7}));
111}

◆ TEST() [58/88]

tesseract::TEST ( UnicharsetTest  ,
MultibyteBigrams   
)

Definition at line 113 of file unicharset_test.cc.

113 {
114 // This test verifies basic insertion, unichar_to_id, and encode.
115 // The difference from Basic above is that now we are testing multi-byte
116 // unicodes instead of single byte.
117 UNICHARSET u;
118 // Insert some Arabic letters.
119 u.unichar_insert("\u0c9c");
120 EXPECT_EQ(u.size(), 4);
121 u.unichar_insert("\u0cad");
122 EXPECT_EQ(u.size(), 5);
123 u.unichar_insert("\u0ccd\u0c9c");
124 EXPECT_EQ(u.size(), 6);
125 u.unichar_insert("\u0ccd");
126 EXPECT_EQ(u.size(), 7);
127 // By default the encodable bigram is NOT added.
128 u.unichar_insert("\u0ccd\u0cad");
129 EXPECT_EQ(u.size(), 7);
130 // It is added if we force it to be.
131 u.unichar_insert("\u0ccd\u0cad", OldUncleanUnichars::kTrue);
132 EXPECT_EQ(u.size(), 8);
133 std::vector<char> data;
135 fp.OpenWrite(&data);
136 u.save_to_file(&fp);
137 fp.Open(&data[0], data.size());
138 UNICHARSET v;
139 v.load_from_file(&fp, false);
140 EXPECT_EQ(v.unichar_to_id("\u0c9c"), 3);
141 EXPECT_EQ(v.unichar_to_id("\u0cad"), 4);
142 EXPECT_EQ(v.unichar_to_id("\u0ccd\u0c9c"), 5);
143 EXPECT_EQ(v.unichar_to_id("\u0ccd"), 6);
144 EXPECT_EQ(v.unichar_to_id("\u0ccd\u0cad"), 7);
145}
void OpenWrite(std::vector< char > *data)
Definition: serialis.cpp:246

◆ TEST() [59/88]

tesseract::TEST ( UnicharsetTest  ,
OldStyle   
)

Definition at line 147 of file unicharset_test.cc.

147 {
148 // This test verifies an old unicharset that contains fi/fl ligatures loads
149 // and keeps all the entries.
150 std::string filename = file::JoinPath(TESTDATA_DIR, "eng.unicharset");
151 UNICHARSET u;
152 LOG(INFO) << "Filename=" << filename;
153 EXPECT_TRUE(u.load_from_file(filename.c_str()));
154 EXPECT_EQ(u.size(), 111);
155}

◆ TEST() [60/88]

tesseract::TEST ( UnicharTest  ,
Conversion   
)

Definition at line 18 of file unichar_test.cc.

18 {
19 // This test verifies that Unichar::UTF8ToUTF32 and Unichar::UTF32ToUTF8
20 // show the required conversion properties.
21 // Test for round-trip utf8-32-8 for 1, 2, 3 and 4 byte codes.
22 const char *kUTF8Src = "a\u05d0\u0ca4\U0002a714";
23 const std::vector<char32> kUTF32Src = {'a', 0x5d0, 0xca4, 0x2a714};
24 // Check for round-trip conversion.
25 std::vector<char32> utf32 = UNICHAR::UTF8ToUTF32(kUTF8Src);
26 EXPECT_THAT(utf32, testing::ElementsAreArray(kUTF32Src));
27 std::string utf8 = UNICHAR::UTF32ToUTF8(utf32);
28 EXPECT_STREQ(kUTF8Src, utf8.c_str());
29}

◆ TEST() [61/88]

tesseract::TEST ( UnicharTest  ,
InvalidText   
)

Definition at line 31 of file unichar_test.cc.

31 {
32 // This test verifies that Unichar correctly deals with invalid text.
33 const char *kInvalidUTF8 = "a b\200d string";
34 const std::vector<char32> kInvalidUTF32 = {'a', ' ', 0x200000, 'x'};
35 // Invalid utf8 produces an empty vector.
36 std::vector<char32> utf32 = UNICHAR::UTF8ToUTF32(kInvalidUTF8);
37 EXPECT_TRUE(utf32.empty());
38 // Invalid utf32 produces an empty string.
39 std::string utf8 = UNICHAR::UTF32ToUTF8(kInvalidUTF32);
40 EXPECT_TRUE(utf8.empty());
41}

◆ TEST() [62/88]

tesseract::TEST ( ValidateGraphemeTest  ,
ExplicitViramaNonJoiner   
)

Definition at line 121 of file validate_grapheme_test.cc.

121 {
122 std::string str = "\u0d15\u0d4d\u200c\u0d24"; // KA Virama ZWNJ Ta
123 std::vector<std::string> glyphs;
124 EXPECT_TRUE(NormalizeCleanAndSegmentUTF8(UnicodeNormMode::kNFC, OCRNorm::kNone,
125 GraphemeNormMode::kCombined, true, str.c_str(), &glyphs))
127 EXPECT_EQ(glyphs.size(), 2);
128 EXPECT_EQ(glyphs[1], std::string("\u0d24"));
129 EXPECT_TRUE(NormalizeCleanAndSegmentUTF8(UnicodeNormMode::kNFC, OCRNorm::kNone,
130 GraphemeNormMode::kGlyphSplit, true, str.c_str(),
131 &glyphs))
133 EXPECT_EQ(glyphs.size(), 3);
134 EXPECT_EQ(glyphs[1], std::string("\u0d4d\u200c"));
135}

◆ TEST() [63/88]

tesseract::TEST ( ValidateGraphemeTest  ,
HalfFormJoiner   
)

Definition at line 67 of file validate_grapheme_test.cc.

67 {
68 std::string str = "\u0d15\u0d4d\u200d\u0d24"; // KA Virama ZWJ Ta
69 std::vector<std::string> glyphs;
70 EXPECT_TRUE(NormalizeCleanAndSegmentUTF8(UnicodeNormMode::kNFC, OCRNorm::kNone,
71 GraphemeNormMode::kCombined, true, str.c_str(), &glyphs))
73 EXPECT_EQ(glyphs.size(), 1);
74 EXPECT_EQ(glyphs[0], str);
75 EXPECT_TRUE(NormalizeCleanAndSegmentUTF8(UnicodeNormMode::kNFC, OCRNorm::kNone,
76 GraphemeNormMode::kGlyphSplit, true, str.c_str(),
77 &glyphs))
79 EXPECT_EQ(glyphs.size(), 2) << PrintStringVectorWithUnicodes(glyphs);
80 EXPECT_EQ(glyphs[0], std::string("\u0d15\u0d4d\u200d"));
81}

◆ TEST() [64/88]

tesseract::TEST ( ValidateGraphemeTest  ,
MultipleSyllablesAreNotASingleGrapheme   
)

Definition at line 18 of file validate_grapheme_test.cc.

18 {
19 std::string str = "\u0c15\u0c3f\u0c15\u0c0e"; // KA - dep I - KA - ind E.
20 std::vector<std::string> glyphs;
21 EXPECT_TRUE(NormalizeCleanAndSegmentUTF8(UnicodeNormMode::kNFC, OCRNorm::kNone,
22 GraphemeNormMode::kCombined, true, str.c_str(), &glyphs))
24 // It made 3 graphemes.
25 EXPECT_EQ(glyphs.size(), 3);
26 EXPECT_EQ(glyphs[0], std::string("\u0c15\u0c3f"));
27 EXPECT_EQ(glyphs[1], std::string("\u0c15"));
28 EXPECT_EQ(glyphs[2], std::string("\u0c0e"));
29}

◆ TEST() [65/88]

tesseract::TEST ( ValidateGraphemeTest  ,
NoLonelyJoinersQuote   
)

Definition at line 154 of file validate_grapheme_test.cc.

154 {
155 std::string str = "'\u0d24\u0d23\u0d32\u0d4d'\u200d";
156 std::vector<std::string> glyphs;
157 // Returns true, but the joiner is gone.
158 EXPECT_TRUE(NormalizeCleanAndSegmentUTF8(UnicodeNormMode::kNFC, OCRNorm::kNone,
159 GraphemeNormMode::kCombined, true, str.c_str(), &glyphs))
161 EXPECT_EQ(glyphs.size(), 5);
162 EXPECT_EQ(glyphs[0], std::string("'"));
163 EXPECT_EQ(glyphs[1], std::string("\u0d24"));
164 EXPECT_EQ(glyphs[2], std::string("\u0d23"));
165 EXPECT_EQ(glyphs[3], std::string("\u0d32\u0d4d\u200c"));
166 EXPECT_EQ(glyphs[4], std::string("'"));
167}

◆ TEST() [66/88]

tesseract::TEST ( ValidateGraphemeTest  ,
OpenConjunctNonJoiner   
)

Definition at line 99 of file validate_grapheme_test.cc.

99 {
100 std::string str = "\u0d15\u200c\u0d4d\u0d24"; // KA ZWNJ Virama Ta
101 std::vector<std::string> glyphs;
102 EXPECT_TRUE(NormalizeCleanAndSegmentUTF8(UnicodeNormMode::kNFC, OCRNorm::kNone,
103 GraphemeNormMode::kCombined, true, str.c_str(), &glyphs))
105 EXPECT_EQ(glyphs.size(), 1);
106 EXPECT_EQ(glyphs[0], str);
107 EXPECT_TRUE(NormalizeCleanAndSegmentUTF8(UnicodeNormMode::kNFC, OCRNorm::kNone,
108 GraphemeNormMode::kGlyphSplit, true, str.c_str(),
109 &glyphs))
111 EXPECT_EQ(glyphs.size(), 3);
112 EXPECT_EQ(glyphs[1], std::string("\u200c\u0d4d"));
113 // Malaylam only, so not allowed in Telugu.
114 str = "\u0c15\u200c\u0c4d\u0c24"; // KA ZWNJ Virama Ta
115 EXPECT_FALSE(NormalizeCleanAndSegmentUTF8(UnicodeNormMode::kNFC, OCRNorm::kNone,
116 GraphemeNormMode::kCombined, true, str.c_str(),
117 &glyphs))
119}

◆ TEST() [67/88]

tesseract::TEST ( ValidateGraphemeTest  ,
SimpleCV   
)

Definition at line 41 of file validate_grapheme_test.cc.

41 {
42 std::string str = "\u0cb9\u0cbf"; // HA I
43 std::vector<std::string> glyphs;
44 EXPECT_TRUE(NormalizeCleanAndSegmentUTF8(UnicodeNormMode::kNFC, OCRNorm::kNone,
45 GraphemeNormMode::kCombined, true, str.c_str(), &glyphs))
47 EXPECT_EQ(glyphs.size(), 1);
48 EXPECT_EQ(glyphs[0], str);
49}

◆ TEST() [68/88]

tesseract::TEST ( ValidateGraphemeTest  ,
SingleConsonantOK   
)

Definition at line 31 of file validate_grapheme_test.cc.

31 {
32 std::string str = "\u0cb9"; // HA
33 std::vector<std::string> glyphs;
34 EXPECT_TRUE(NormalizeCleanAndSegmentUTF8(UnicodeNormMode::kNFC, OCRNorm::kNone,
35 GraphemeNormMode::kCombined, true, str.c_str(), &glyphs))
37 EXPECT_EQ(glyphs.size(), 1);
38 EXPECT_EQ(glyphs[0], str);
39}

◆ TEST() [69/88]

tesseract::TEST ( ValidateGraphemeTest  ,
SubscriptConjunct   
)

Definition at line 51 of file validate_grapheme_test.cc.

51 {
52 std::string str = "\u0cb9\u0ccd\u0c95\u0cbf"; // HA Virama KA I
53 std::vector<std::string> glyphs;
54 EXPECT_TRUE(NormalizeCleanAndSegmentUTF8(UnicodeNormMode::kNFC, OCRNorm::kNone,
55 GraphemeNormMode::kCombined, true, str.c_str(), &glyphs))
57 EXPECT_EQ(glyphs.size(), 1);
58 EXPECT_EQ(glyphs[0], str);
59 EXPECT_TRUE(NormalizeCleanAndSegmentUTF8(UnicodeNormMode::kNFC, OCRNorm::kNone,
60 GraphemeNormMode::kGlyphSplit, true, str.c_str(),
61 &glyphs))
63 EXPECT_EQ(glyphs.size(), 3);
64 EXPECT_EQ(glyphs[1], std::string("\u0ccd\u0c95"));
65}

◆ TEST() [70/88]

tesseract::TEST ( ValidateGraphemeTest  ,
ThaiGraphemes   
)

Definition at line 137 of file validate_grapheme_test.cc.

137 {
138 // This is a single grapheme unless in glyph split mode
139 std::string str = "\u0e14\u0e38\u0e4a";
140 std::vector<std::string> glyphs;
141 EXPECT_TRUE(NormalizeCleanAndSegmentUTF8(UnicodeNormMode::kNFC, OCRNorm::kNone,
142 GraphemeNormMode::kCombined, true, str.c_str(), &glyphs))
144 EXPECT_EQ(glyphs.size(), 1);
145 EXPECT_EQ(glyphs[0], str);
146 EXPECT_TRUE(NormalizeCleanAndSegmentUTF8(UnicodeNormMode::kNFC, OCRNorm::kNone,
147 GraphemeNormMode::kGlyphSplit, true, str.c_str(),
148 &glyphs))
150 EXPECT_EQ(glyphs.size(), 3);
151 EXPECT_EQ(glyphs[0], std::string("\u0e14"));
152}

◆ TEST() [71/88]

tesseract::TEST ( ValidateGraphemeTest  ,
TraditionalConjunctJoiner   
)

Definition at line 83 of file validate_grapheme_test.cc.

83 {
84 std::string str = "\u0d15\u200d\u0d4d\u0d24"; // KA ZWI Virama Ta
85 std::vector<std::string> glyphs;
86 EXPECT_TRUE(NormalizeCleanAndSegmentUTF8(UnicodeNormMode::kNFC, OCRNorm::kNone,
87 GraphemeNormMode::kCombined, true, str.c_str(), &glyphs))
89 EXPECT_EQ(glyphs.size(), 1);
90 EXPECT_EQ(glyphs[0], str);
91 EXPECT_TRUE(NormalizeCleanAndSegmentUTF8(UnicodeNormMode::kNFC, OCRNorm::kNone,
92 GraphemeNormMode::kGlyphSplit, true, str.c_str(),
93 &glyphs))
95 EXPECT_EQ(glyphs.size(), 3);
96 EXPECT_EQ(glyphs[1], std::string("\u200d\u0d4d"));
97}

◆ TEST() [72/88]

tesseract::TEST ( ValidateIndicTest  ,
AddsJoinerToTerminalVirama   
)

Definition at line 28 of file validate_indic_test.cc.

28 {
29 std::string str = "\u0c15\u0c4d"; // KA - virama
30 std::string target_str = "\u0c15\u0c4d\u200c"; // KA - virama - ZWNJ
31 ExpectGraphemeModeResults(str, UnicodeNormMode::kNFC, 3, 2, 1, target_str);
32 // Same result if we started with the normalized string.
33 ExpectGraphemeModeResults(target_str, UnicodeNormMode::kNFC, 3, 2, 1, target_str);
34}

◆ TEST() [73/88]

tesseract::TEST ( ValidateIndicTest  ,
MatrasFollowConsonantsNotVowels   
)

Definition at line 102 of file validate_indic_test.cc.

102 {
103 std::string str = "\u0c05\u0c47"; // A EE
104 std::string dest;
105 EXPECT_FALSE(NormalizeUTF8String(UnicodeNormMode::kNFC, OCRNorm::kNone, GraphemeNorm::kNormalize,
106 str.c_str(), &dest))
108 str = "\u0c1e\u0c3e"; // NYA AA
109 EXPECT_TRUE(NormalizeUTF8String(UnicodeNormMode::kNFC, OCRNorm::kNone, GraphemeNorm::kNormalize,
110 str.c_str(), &dest))
112 EXPECT_EQ(dest, str);
113}

◆ TEST() [74/88]

tesseract::TEST ( ValidateIndicTest  ,
Nukta   
)

Definition at line 128 of file validate_indic_test.cc.

128 {
129 std::string str = "\u0c95\u0cbc\u0ccd\u0cb9"; // KA Nukta Virama HA
130 std::vector<std::string> glyphs;
131 EXPECT_TRUE(NormalizeCleanAndSegmentUTF8(UnicodeNormMode::kNFC, OCRNorm::kNone,
132 GraphemeNormMode::kGlyphSplit, true, str.c_str(),
133 &glyphs));
134 EXPECT_EQ(glyphs.size(), 3);
135 EXPECT_EQ(glyphs[2], std::string("\u0ccd\u0cb9"));
136 // Swapped Nukta and Virama are not allowed, but NFC normalization fixes it.
137 std::string str2 = "\u0c95\u0ccd\u0cbc\u0cb9"; // KA Virama Nukta HA
138 ExpectGraphemeModeResults(str2, UnicodeNormMode::kNFC, 4, 3, 1, str);
139}

◆ TEST() [75/88]

tesseract::TEST ( ValidateIndicTest  ,
OnlyOneDependentVowel   
)

Definition at line 37 of file validate_indic_test.cc.

37 {
38 std::string str = "\u0d15\u0d3e\u0d42"; // KA AA UU
39 std::string dest;
40 EXPECT_FALSE(NormalizeUTF8String(UnicodeNormMode::kNFC, OCRNorm::kNone, GraphemeNorm::kNormalize,
41 str.c_str(), &dest))
43}

◆ TEST() [76/88]

tesseract::TEST ( ValidateIndicTest  ,
OnlyOneVowelModifier   
)

Definition at line 53 of file validate_indic_test.cc.

53 {
54 std::string str = "\u0c26\u0c4d\u0c01"; // DA virama candrabindu
55 std::string result;
56 EXPECT_TRUE(NormalizeUTF8String(UnicodeNormMode::kNFC, OCRNorm::kNone, GraphemeNorm::kNormalize,
57 str.c_str(), &result));
58 // It made 1 grapheme of 4 chars, by terminating the explicit virama.
59 EXPECT_EQ(std::string("\u0c26\u0c4d\u200c\u0c01"), result);
60
61 str = "\u0995\u0983\u0981"; // KA visarga candrabindu
62 EXPECT_FALSE(NormalizeUTF8String(UnicodeNormMode::kNFC, OCRNorm::kNone, GraphemeNorm::kNormalize,
63 str.c_str(), &result));
64
65 // Exception: Malayalam allows multiple anusvara.
66 str = "\u0d15\u0d02\u0d02"; // KA Anusvara Anusvara
67 EXPECT_TRUE(NormalizeUTF8String(UnicodeNormMode::kNFC, OCRNorm::kNone, GraphemeNorm::kNormalize,
68 str.c_str(), &result));
69 EXPECT_EQ(str, result);
70}

◆ TEST() [77/88]

tesseract::TEST ( ValidateIndicTest  ,
SinhalaRakaransaya   
)

Definition at line 142 of file validate_indic_test.cc.

142 {
143 std::string str = "\u0d9a\u0dca\u200d\u0dbb"; // KA Virama ZWJ Rayanna
144 std::string dest;
145 EXPECT_TRUE(NormalizeUTF8String(UnicodeNormMode::kNFC, OCRNorm::kNone, GraphemeNorm::kNormalize,
146 str.c_str(), &dest))
148 EXPECT_EQ(dest, str);
149 std::vector<std::string> glyphs;
150 EXPECT_TRUE(NormalizeCleanAndSegmentUTF8(UnicodeNormMode::kNFC, OCRNorm::kNone,
151 GraphemeNormMode::kGlyphSplit, true, str.c_str(),
152 &glyphs));
153 EXPECT_EQ(glyphs.size(), 2);
154 EXPECT_EQ(glyphs[1], std::string("\u0dca\u200d\u0dbb"));
155 // Can be followed by a dependent vowel.
156 str += "\u0dd9"; // E
157 EXPECT_TRUE(NormalizeUTF8String(UnicodeNormMode::kNFC, OCRNorm::kNone, GraphemeNorm::kNormalize,
158 str.c_str(), &dest))
160 EXPECT_EQ(dest, str);
161}

◆ TEST() [78/88]

tesseract::TEST ( ValidateIndicTest  ,
SinhalaRepaya   
)

Definition at line 184 of file validate_indic_test.cc.

184 {
185 std::string str = "\u0d9a\u0dbb\u0dca\u200d\u0db8"; // KA Rayanna Virama ZWJ MA
186 std::vector<std::string> glyphs;
187 EXPECT_TRUE(NormalizeCleanAndSegmentUTF8(UnicodeNormMode::kNFC, OCRNorm::kNone,
188 GraphemeNormMode::kCombined, true, str.c_str(),
189 &glyphs));
190 EXPECT_EQ(glyphs.size(), 2);
191 EXPECT_EQ(glyphs[1], std::string("\u0dbb\u0dca\u200d\u0db8"));
192 EXPECT_TRUE(NormalizeCleanAndSegmentUTF8(UnicodeNormMode::kNFC, OCRNorm::kNone,
193 GraphemeNormMode::kGlyphSplit, true, str.c_str(),
194 &glyphs));
195 EXPECT_EQ(glyphs.size(), 3);
196 EXPECT_EQ(glyphs[1], std::string("\u0dbb\u0dca\u200d"));
197}

◆ TEST() [79/88]

tesseract::TEST ( ValidateIndicTest  ,
SinhalaSpecials   
)

Definition at line 199 of file validate_indic_test.cc.

199 {
200 // Sinhala has some exceptions from the usual rules.
201 std::string str = "\u0dc0\u0d9c\u0dca\u200d\u0dbb\u0dca\u200d\u0dbb\u0dca\u200d";
202 std::vector<std::string> glyphs;
203 EXPECT_TRUE(NormalizeCleanAndSegmentUTF8(UnicodeNormMode::kNFC, OCRNorm::kNone,
204 GraphemeNormMode::kGlyphSplit, true, str.c_str(),
205 &glyphs));
206 EXPECT_EQ(glyphs.size(), 5) << PrintStringVectorWithUnicodes(glyphs);
207 EXPECT_EQ(glyphs[0], std::string("\u0dc0"));
208 EXPECT_EQ(glyphs[1], std::string("\u0d9c"));
209 EXPECT_EQ(glyphs[2], std::string("\u0dca\u200d\u0dbb"));
210 EXPECT_EQ(glyphs[3], std::string("\u0dca\u200d"));
211 EXPECT_EQ(glyphs[4], std::string("\u0dbb\u0dca\u200d"));
212 str = "\u0dc3\u0dbb\u0dca\u200d\u0dbb\u0dca\u200d\u0dcf";
213 EXPECT_TRUE(NormalizeCleanAndSegmentUTF8(UnicodeNormMode::kNFC, OCRNorm::kNone,
214 GraphemeNormMode::kGlyphSplit, true, str.c_str(),
215 &glyphs));
216 EXPECT_EQ(glyphs.size(), 4) << PrintStringVectorWithUnicodes(glyphs);
217 EXPECT_EQ(glyphs[0], std::string("\u0dc3"));
218 EXPECT_EQ(glyphs[1], std::string("\u0dbb\u0dca\u200d"));
219 EXPECT_EQ(glyphs[2], std::string("\u0dbb\u0dca\u200d"));
220 EXPECT_EQ(glyphs[3], std::string("\u0dcf"));
221}

◆ TEST() [80/88]

tesseract::TEST ( ValidateIndicTest  ,
SinhalaYansaya   
)

Definition at line 163 of file validate_indic_test.cc.

163 {
164 std::string str = "\u0d9a\u0dca\u200d\u0dba"; // KA Virama ZWJ Yayanna
165 std::string dest;
166 EXPECT_TRUE(NormalizeUTF8String(UnicodeNormMode::kNFC, OCRNorm::kNone, GraphemeNorm::kNormalize,
167 str.c_str(), &dest))
169 EXPECT_EQ(dest, str);
170 // Can be followed by a dependent vowel.
171 str += "\u0ddd"; // OO
172 EXPECT_TRUE(NormalizeUTF8String(UnicodeNormMode::kNFC, OCRNorm::kNone, GraphemeNorm::kNormalize,
173 str.c_str(), &dest))
175 EXPECT_EQ(dest, str);
176 std::vector<std::string> glyphs;
177 EXPECT_TRUE(NormalizeCleanAndSegmentUTF8(UnicodeNormMode::kNFC, OCRNorm::kNone,
178 GraphemeNormMode::kGlyphSplit, true, str.c_str(),
179 &glyphs));
180 EXPECT_EQ(glyphs.size(), 3);
181 EXPECT_EQ(glyphs[1], std::string("\u0dca\u200d\u0dba"));
182}

◆ TEST() [81/88]

tesseract::TEST ( ValidateIndicTest  ,
SubGraphemes   
)

Definition at line 116 of file validate_indic_test.cc.

116 {
117 std::string str = "\u0d3e"; // AA
118 std::string dest;
119 EXPECT_FALSE(NormalizeUTF8String(UnicodeNormMode::kNFC, OCRNorm::kNone, GraphemeNorm::kNormalize,
120 str.c_str(), &dest))
122 EXPECT_TRUE(NormalizeUTF8String(UnicodeNormMode::kNFC, OCRNorm::kNone, GraphemeNorm::kNone,
123 str.c_str(), &dest))
125 EXPECT_EQ(dest, str);
126}

◆ TEST() [82/88]

tesseract::TEST ( ValidateIndicTest  ,
VowelModifierMustBeLast   
)

Definition at line 80 of file validate_indic_test.cc.

80 {
81 std::string str = "\u0c28\u0c02\u0c3f"; // NA Sunna I
82 std::string dest;
83 EXPECT_FALSE(NormalizeUTF8String(UnicodeNormMode::kNFC, OCRNorm::kNone, GraphemeNorm::kNormalize,
84 str.c_str(), &dest))
86 // Swap c02/c3f and all is ok.
87 str = "\u0c28\u0c3f\u0c02"; // NA I Sunna
88 EXPECT_TRUE(NormalizeUTF8String(UnicodeNormMode::kNFC, OCRNorm::kNone, GraphemeNorm::kNormalize,
89 str.c_str(), &dest))
91 EXPECT_EQ(dest, str);
92}

◆ TEST() [83/88]

tesseract::TEST ( ValidateKhmerTest  ,
BadKhmerWords   
)

Definition at line 31 of file validate_khmer_test.cc.

31 {
32 std::string result;
33 // Multiple dependent vowels not allowed
34 std::string str = "\u1796\u17b6\u17b7";
35 EXPECT_FALSE(NormalizeUTF8String(UnicodeNormMode::kNFC, OCRNorm::kNone, GraphemeNorm::kNormalize,
36 str.c_str(), &result));
37 // Multiple shifters not allowed
38 str = "\u1798\u17c9\u17ca";
39 EXPECT_FALSE(NormalizeUTF8String(UnicodeNormMode::kNFC, OCRNorm::kNone, GraphemeNorm::kNormalize,
40 str.c_str(), &result));
41 // Multiple signs not allowed
42 str = "\u1780\u17b6\u17cb\u17cd";
43 EXPECT_FALSE(NormalizeUTF8String(UnicodeNormMode::kNFC, OCRNorm::kNone, GraphemeNorm::kNormalize,
44 str.c_str(), &result));
45}

◆ TEST() [84/88]

tesseract::TEST ( ValidateKhmerTest  ,
GoodKhmerWords   
)

Definition at line 19 of file validate_khmer_test.cc.

19 {
20 std::string str = "ព័ត៏មានប្លែកៗ";
21 ExpectGraphemeModeResults(str, UnicodeNormMode::kNFC, 13, 12, 7, str);
22 str = "ទំនុកច្រៀង";
23 ExpectGraphemeModeResults(str, UnicodeNormMode::kNFC, 10, 9, 5, str);
24 str = "កាលីហ្វូញ៉ា";
25 ExpectGraphemeModeResults(str, UnicodeNormMode::kNFC, 11, 10, 4, str);
26 str = "ចាប់ពីផ្លូវ";
27 ExpectGraphemeModeResults(str, UnicodeNormMode::kNFC, 11, 10, 5, str);
28}

◆ TEST() [85/88]

tesseract::TEST ( ValidateMyanmarTest  ,
BadMyanmarWords   
)

Definition at line 27 of file validate_myanmar_test.cc.

27 {
28 std::string str = "က်န္းမာေရး";
29 std::vector<std::string> glyphs;
30 EXPECT_FALSE(NormalizeCleanAndSegmentUTF8(UnicodeNormMode::kNFC, OCRNorm::kNone,
31 GraphemeNormMode::kCombined, true, str.c_str(),
32 &glyphs));
33 std::string result;
34 EXPECT_FALSE(NormalizeUTF8String(UnicodeNormMode::kNFC, OCRNorm::kNone, GraphemeNorm::kNormalize,
35 str.c_str(), &result));
36 // It works if the grapheme normalization is turned off.
37 EXPECT_TRUE(NormalizeUTF8String(UnicodeNormMode::kNFC, OCRNorm::kNone, GraphemeNorm::kNone,
38 str.c_str(), &result));
39 EXPECT_EQ(str, result);
40 str = "ခုႏွစ္";
41 EXPECT_FALSE(NormalizeCleanAndSegmentUTF8(UnicodeNormMode::kNFC, OCRNorm::kNone,
42 GraphemeNormMode::kGlyphSplit, true, str.c_str(),
43 &glyphs));
44 EXPECT_FALSE(NormalizeUTF8String(UnicodeNormMode::kNFC, OCRNorm::kNone, GraphemeNorm::kNormalize,
45 str.c_str(), &result));
46 // It works if the grapheme normalization is turned off.
47 EXPECT_TRUE(NormalizeUTF8String(UnicodeNormMode::kNFC, OCRNorm::kNone, GraphemeNorm::kNone,
48 str.c_str(), &result));
49 EXPECT_EQ(str, result);
50}

◆ TEST() [86/88]

tesseract::TEST ( ValidateMyanmarTest  ,
GoodMyanmarWords   
)

Definition at line 19 of file validate_myanmar_test.cc.

19 {
20 std::string str = "လျှာကသိသည် "; // No viramas in this one.
21 ExpectGraphemeModeResults(str, UnicodeNormMode::kNFC, 11, 11, 5, str);
22 str = "တုန္လႈပ္မႈ ";
23 ExpectGraphemeModeResults(str, UnicodeNormMode::kNFC, 11, 9, 4, str);
24}

◆ TEST() [87/88]

tesseract::TEST ( ValidatorTest  ,
Idempotency   
)

Definition at line 49 of file validator_test.cc.

49 {
50 std::vector<char32> str1({0xd24, 0xd23, 0xd32, 0xd4d, '\'', 0x200d, 0x200c, 0x200d, 0x200c});
51 std::vector<char32> str2({0xd24, 0xd23, 0xd32, 0xd4d, 0x200c, 0x200d, 0x200c, 0x200d, '\''});
52 std::vector<std::vector<char32>> result1, result2, result3, result4;
54 Validator::ValidateCleanAndSegment(GraphemeNormMode::kSingleString, true, str1, &result1));
55 EXPECT_TRUE(Validator::ValidateCleanAndSegment(GraphemeNormMode::kSingleString, true, result1[0],
56 &result2));
57 EXPECT_EQ(result1.size(), result2.size());
58 EXPECT_THAT(result2[0], testing::ElementsAreArray(result1[0]));
60 Validator::ValidateCleanAndSegment(GraphemeNormMode::kSingleString, true, str2, &result3));
61 EXPECT_TRUE(Validator::ValidateCleanAndSegment(GraphemeNormMode::kSingleString, true, result3[0],
62 &result4));
63 EXPECT_EQ(result3.size(), result4.size());
64 EXPECT_THAT(result4[0], testing::ElementsAreArray(result3[0]));
65}

◆ TEST() [88/88]

tesseract::TEST ( ValidatorTest  ,
MostFrequentViramaScript   
)

Definition at line 28 of file validator_test.cc.

28 {
29 // The most frequent virama script should come out correct, despite
30 // distractions from other scripts.
31 EXPECT_EQ(ViramaScript::kTelugu, TestableValidator::TestableMostFrequentViramaScript({0xc05}));
32 // It is still Telugu surrounded by Latin.
33 EXPECT_EQ(ViramaScript::kTelugu,
34 TestableValidator::TestableMostFrequentViramaScript({'a', 0xc05, 'b', 'c'}));
35 // But not still Telugu surrounded by Devanagari.
36 EXPECT_EQ(ViramaScript::kDevanagari,
37 TestableValidator::TestableMostFrequentViramaScript({0x905, 0xc05, 0x906, 0x907}));
38 EXPECT_EQ(ViramaScript::kKannada,
39 TestableValidator::TestableMostFrequentViramaScript({0xc85, 0xc05, 0xc86, 0xc87}));
40 EXPECT_EQ(ViramaScript::kBengali,
41 TestableValidator::TestableMostFrequentViramaScript({0x985, 0xc05, 0x986, 0x987}));
42 // Danda and double Danda don't count as Devanagari, as they are common.
43 EXPECT_EQ(ViramaScript::kTelugu,
44 TestableValidator::TestableMostFrequentViramaScript({0x964, 0xc05, 0x965, 0x965}));
45}

◆ TEST_F() [1/229]

tesseract::TEST_F ( ApplyBoxTest  ,
ItalicCharLevel   
)

Definition at line 115 of file applybox_test.cc.

115 {
116 VerifyBoxesAndText("trainingital.tif", kTruthTextWords, "trainingital.box", false);
117}
const char * kTruthTextWords

◆ TEST_F() [2/229]

tesseract::TEST_F ( ApplyBoxTest  ,
ItalLineLevel   
)

Definition at line 125 of file applybox_test.cc.

125 {
126 VerifyBoxesAndText("trainingitalline.tif", kTruthTextLine, "trainingital.box", true);
127}
const char * kTruthTextLine

◆ TEST_F() [3/229]

tesseract::TEST_F ( ApplyBoxTest  ,
TimesCharLevel   
)

Definition at line 110 of file applybox_test.cc.

110 {
111 VerifyBoxesAndText("trainingtimes.tif", kTruthTextWords, "trainingtimes.box", false);
112}

◆ TEST_F() [4/229]

tesseract::TEST_F ( ApplyBoxTest  ,
TimesLineLevel   
)

Definition at line 120 of file applybox_test.cc.

120 {
121 VerifyBoxesAndText("trainingtimesline.tif", kTruthTextLine, "trainingtimes.box", true);
122}

◆ TEST_F() [5/229]

tesseract::TEST_F ( BaseapiThreadTest  ,
TestAll   
)

Definition at line 221 of file baseapi_thread_test.cc.

221 {
222#ifdef INCLUDE_TENSORFLOW
223 const int n = num_langs_ * FLAGS_reps;
224 ResetPool();
225 for (int i = 0; i < n; ++i) {
226 pool_->Schedule(std::bind(VerifyTextResult, nullptr, pix_[i], langs_[i % num_langs_],
227 gt_text_[i % num_langs_]));
228 }
229 WaitForPoolWorkers();
230#endif
231}

◆ TEST_F() [6/229]

tesseract::TEST_F ( BaseapiThreadTest  ,
TestBasicSanity   
)

Definition at line 179 of file baseapi_thread_test.cc.

179 {
180 for (int i = 0; i < num_langs_; ++i) {
181 TessBaseAPI tess;
182 InitTessInstance(&tess, langs_[i]);
183 std::string ocr_text;
184 GetCleanedText(&tess, pix_[i], ocr_text);
185 CHECK(strcmp(gt_text_[i].c_str(), ocr_text.c_str()) == 0) << "Failed with lang = " << langs_[i];
186 }
187}

◆ TEST_F() [7/229]

tesseract::TEST_F ( BaseapiThreadTest  ,
TestInit   
)

Definition at line 190 of file baseapi_thread_test.cc.

190 {
191#ifdef INCLUDE_TENSORFLOW
192 const int n = num_langs_ * FLAGS_reps;
193 ResetPool();
194 std::vector<TessBaseAPI> tess(n);
195 for (int i = 0; i < n; ++i) {
196 pool_->Schedule(std::bind(InitTessInstance, &tess[i], langs_[i % num_langs_]));
197 }
198 WaitForPoolWorkers();
199#endif
200}

◆ TEST_F() [8/229]

tesseract::TEST_F ( BaseapiThreadTest  ,
TestRecognition   
)

Definition at line 203 of file baseapi_thread_test.cc.

203 {
204#ifdef INCLUDE_TENSORFLOW
205 const int n = num_langs_ * FLAGS_reps;
206 std::vector<TessBaseAPI> tess(n);
207 // Initialize api instances in a single thread.
208 for (int i = 0; i < n; ++i) {
209 InitTessInstance(&tess[i], langs_[i % num_langs_]);
210 }
211
212 ResetPool();
213 for (int i = 0; i < n; ++i) {
214 pool_->Schedule(std::bind(VerifyTextResult, &tess[i], pix_[i], langs_[i % num_langs_],
215 gt_text_[i % num_langs_]));
216 }
217 WaitForPoolWorkers();
218#endif
219}

◆ TEST_F() [9/229]

tesseract::TEST_F ( BitVectorTest  ,
Primes   
)

Definition at line 116 of file bitvector_test.cc.

116 {
117 BitVector map;
118 ComputePrimes(&map);
119 TestPrimes(map);
120 // It still works if we use the copy constructor.
121 BitVector map2(map);
122 TestPrimes(map2);
123 // Or if we assign it.
124 BitVector map3;
125 map3 = map;
126 TestPrimes(map3);
127 // Test file i/o too.
128 std::string filename = OutputNameToPath("primesbitvector");
129 FILE *fp = fopen(filename.c_str(), "wb");
130 ASSERT_TRUE(fp != nullptr);
131 EXPECT_TRUE(map.Serialize(fp));
132 fclose(fp);
133 fp = fopen(filename.c_str(), "rb");
134 ASSERT_TRUE(fp != nullptr);
135 BitVector read_map;
136 EXPECT_TRUE(read_map.DeSerialize(false, fp));
137 fclose(fp);
138 TestPrimes(read_map);
139}
bool DeSerialize(bool swap, FILE *fp)
Definition: bitvector.cpp:97
bool Serialize(FILE *fp) const
Definition: bitvector.cpp:87

◆ TEST_F() [10/229]

tesseract::TEST_F ( BitVectorTest  ,
SetAll   
)

Definition at line 142 of file bitvector_test.cc.

142 {
143 // Test the default constructor and set/resetall.
144 BitVector map(42);
145 TestAll(map, false);
146 map.SetAllTrue();
147 TestAll(map, true);
148 map.SetAllFalse();
149 TestAll(map, false);
150}

◆ TEST_F() [11/229]

tesseract::TEST_F ( BitVectorTest  ,
TestNextSetBit   
)

Definition at line 155 of file bitvector_test.cc.

155 {
156 BitVector bv;
157 for (int spacing = 1; spacing <= 5; ++spacing) {
158 SetBitPattern(0, 256, spacing, &bv);
159 ExpectCorrectBits(bv);
160 }
161}

◆ TEST_F() [12/229]

tesseract::TEST_F ( BitVectorTest  ,
TestNumSetBits   
)

Definition at line 165 of file bitvector_test.cc.

165 {
166 BitVector bv;
167 for (int byte = 0; byte < 256; ++byte) {
168 SetBitPattern(byte, byte + 1, 1, &bv);
169 ExpectCorrectBits(bv);
170 }
171}

◆ TEST_F() [13/229]

tesseract::TEST_F ( ColPartitionTest  ,
IsInSameColumnAsBorders   
)

Definition at line 44 of file colpartition_test.cc.

44 {
45 TestableColPartition a, b, c, d;
46 a.SetColumnRange(0, 1);
47 b.SetColumnRange(1, 2);
48 c.SetColumnRange(2, 3);
49 d.SetColumnRange(4, 5);
50
56}
bool IsInSameColumnAs(const ColPartition &part) const
void SetColumnRange(int first, int last)

◆ TEST_F() [14/229]

tesseract::TEST_F ( ColPartitionTest  ,
IsInSameColumnAsPartialOverlap   
)

Definition at line 67 of file colpartition_test.cc.

67 {
69 a.SetColumnRange(3, 8);
70 b.SetColumnRange(6, 10);
71
74}

◆ TEST_F() [15/229]

tesseract::TEST_F ( ColPartitionTest  ,
IsInSameColumnAsReflexive   
)

Definition at line 35 of file colpartition_test.cc.

35 {
37 a.SetColumnRange(1, 2);
38 b.SetColumnRange(3, 3);
39
42}

◆ TEST_F() [16/229]

tesseract::TEST_F ( ColPartitionTest  ,
IsInSameColumnAsSuperset   
)

Definition at line 58 of file colpartition_test.cc.

58 {
60 a.SetColumnRange(4, 7);
61 b.SetColumnRange(2, 8);
62
65}

◆ TEST_F() [17/229]

tesseract::TEST_F ( CommandlineflagsTest  ,
ExitsWithErrorOnInvalidFlag   
)

Definition at line 66 of file commandlineflags_test.cc.

66 {
67 const char *argv[] = {"", "--test_nonexistent_flag"};
68 EXPECT_EXIT(TestParser(countof(argv), argv), ::testing::ExitedWithCode(1),
69 "ERROR: Non-existent flag");
70}

◆ TEST_F() [18/229]

tesseract::TEST_F ( CommandlineflagsTest  ,
ParseBoolFlags   
)

Definition at line 121 of file commandlineflags_test.cc.

121 {
122 const char *argv[] = {"", "--foo_bool=true", "--bar_bool=1"};
123 FLAGS_foo_bool.set_value(false);
124 FLAGS_bar_bool.set_value(false);
125 TestParser(countof(argv), argv);
126 // Verify changed value
127 EXPECT_TRUE(FLAGS_foo_bool);
128 EXPECT_TRUE(FLAGS_bar_bool);
129
130 const char *inv_argv[] = {"", "--foo_bool=false", "--bar_bool=0"};
131 FLAGS_foo_bool.set_value(true);
132 FLAGS_bar_bool.set_value(true);
133 TestParser(3, inv_argv);
134 // Verify changed value
135 EXPECT_FALSE(FLAGS_foo_bool);
136 EXPECT_FALSE(FLAGS_bar_bool);
137
138 const char *arg_implied_true[] = {"", "--bar_bool"};
139 FLAGS_bar_bool.set_value(false);
140 TestParser(2, arg_implied_true);
141 EXPECT_TRUE(FLAGS_bar_bool);
142
143 const char *arg_missing_val[] = {"", "--bar_bool="};
144 EXPECT_EXIT(TestParser(2, arg_missing_val), ::testing::ExitedWithCode(1), "ERROR");
145}

◆ TEST_F() [19/229]

tesseract::TEST_F ( CommandlineflagsTest  ,
ParseDoubleFlags   
)

Definition at line 91 of file commandlineflags_test.cc.

91 {
92 const char *argv[] = {"", "--foo_double=3.14", "--bar_double", "1.2"};
93 TestParser(countof(argv), argv);
94
95 EXPECT_EQ(3.14, FLAGS_foo_double);
96 EXPECT_EQ(1.2, FLAGS_bar_double);
97
98 const char *arg_no_value[] = {"", "--bar_double"};
99 EXPECT_EXIT(TestParser(2, arg_no_value), ::testing::ExitedWithCode(1), "ERROR");
100
101 const char *arg_bad_format[] = {"", "--bar_double="};
102 EXPECT_EXIT(TestParser(2, arg_bad_format), ::testing::ExitedWithCode(1), "ERROR");
103}

◆ TEST_F() [20/229]

tesseract::TEST_F ( CommandlineflagsTest  ,
ParseIntegerFlags   
)

Definition at line 72 of file commandlineflags_test.cc.

72 {
73 const char *argv[] = {"", "--foo_int=3", "--bar_int", "-4"};
74 TestParser(countof(argv), argv);
75 EXPECT_EQ(3, FLAGS_foo_int);
76 EXPECT_EQ(-4, FLAGS_bar_int);
77
78 const char *arg_no_value[] = {"", "--bar_int"};
79 EXPECT_EXIT(TestParser(countof(arg_no_value), arg_no_value), ::testing::ExitedWithCode(1),
80 "ERROR");
81
82 const char *arg_invalid_value[] = {"", "--bar_int", "--foo_int=3"};
83 EXPECT_EXIT(TestParser(countof(arg_invalid_value), arg_invalid_value),
84 ::testing::ExitedWithCode(1), "ERROR");
85
86 const char *arg_bad_format[] = {"", "--bar_int="};
87 EXPECT_EXIT(TestParser(countof(arg_bad_format), arg_bad_format), ::testing::ExitedWithCode(1),
88 "ERROR");
89}

◆ TEST_F() [21/229]

tesseract::TEST_F ( CommandlineflagsTest  ,
ParseOldFlags   
)

Definition at line 147 of file commandlineflags_test.cc.

147 {
148 EXPECT_STREQ("", FLAGS_q.c_str());
149 const char *argv[] = {"", "-q", "text"};
150 TestParser(countof(argv), argv);
151 EXPECT_STREQ("text", FLAGS_q.c_str());
152}

◆ TEST_F() [22/229]

tesseract::TEST_F ( CommandlineflagsTest  ,
ParseStringFlags   
)

Definition at line 105 of file commandlineflags_test.cc.

105 {
106 const char *argv[] = {"", "--foo_string=abc", "--bar_string", "def"};
107 TestParser(countof(argv), argv);
108
109 EXPECT_STREQ("abc", FLAGS_foo_string.c_str());
110 EXPECT_STREQ("def", FLAGS_bar_string.c_str());
111
112 const char *arg_no_value[] = {"", "--bar_string"};
113 EXPECT_EXIT(TestParser(2, arg_no_value), ::testing::ExitedWithCode(1), "ERROR");
114
115 FLAGS_bar_string.set_value("bar");
116 const char *arg_empty_string[] = {"", "--bar_string="};
117 TestParser(2, arg_empty_string);
118 EXPECT_STREQ("", FLAGS_bar_string.c_str());
119}

◆ TEST_F() [23/229]

tesseract::TEST_F ( CommandlineflagsTest  ,
RemoveFlags   
)

Definition at line 44 of file commandlineflags_test.cc.

44 {
45 const char *const_argv[] = {"Progname", "--foo_int", "3", "file1.h", "file2.h"};
46 int argc = countof(const_argv);
47 char **argv = const_cast<char **>(const_argv);
48 tesseract::ParseCommandLineFlags(argv[0], &argc, &argv, true);
49
50 // argv should be rearranged to look like { "Progname", "file1.h", "file2.h" }
51 EXPECT_EQ(3, argc);
52 EXPECT_STREQ("Progname", argv[0]);
53 EXPECT_STREQ("file1.h", argv[1]);
54 EXPECT_STREQ("file2.h", argv[2]);
55}

◆ TEST_F() [24/229]

tesseract::TEST_F ( DawgTest  ,
TestDawgConversion   
)

Definition at line 85 of file dawg_test.cc.

85 {
86 TestDawgRoundTrip("eng.unicharset", "eng.wordlist.clean.freq");
87}

◆ TEST_F() [25/229]

tesseract::TEST_F ( DawgTest  ,
TestMatching   
)

Definition at line 89 of file dawg_test.cc.

89 {
90 UNICHARSET unicharset;
91 unicharset.load_from_file(file::JoinPath(TESTING_DIR, "eng.unicharset").c_str());
92 tesseract::Trie trie(tesseract::DAWG_TYPE_WORD, "basic_dawg", NGRAM_PERM, unicharset.size(), 0);
93 WERD_CHOICE space_apos(" '", unicharset);
94 trie.add_word_to_dawg(space_apos);
95
96 WERD_CHOICE space(" ", unicharset);
97
98 // partial match ok - then good!
99 EXPECT_TRUE(trie.prefix_in_dawg(space, false));
100 // require complete match - not present.
101 EXPECT_FALSE(trie.word_in_dawg(space));
102 EXPECT_FALSE(trie.prefix_in_dawg(space, true));
103
104 // partial or complete match ok for full word:
105 EXPECT_TRUE(trie.prefix_in_dawg(space_apos, false));
106 EXPECT_TRUE(trie.word_in_dawg(space_apos));
107 EXPECT_TRUE(trie.prefix_in_dawg(space_apos, true));
108}

◆ TEST_F() [26/229]

tesseract::TEST_F ( DENORMTest  ,
Multiple   
)

Definition at line 83 of file denorm_test.cc.

83 {
84 DENORM denorm;
85 denorm.SetupNormalization(nullptr, nullptr, nullptr, 1000.0f, 2000.0f, 2.0f, 3.0f, 0.0f,
86 static_cast<float>(kBlnBaselineOffset));
87
88 DENORM denorm2;
89 FCOORD rotation90(0.0f, 1.0f);
90 denorm2.SetupNormalization(nullptr, &rotation90, &denorm, 128.0f, 128.0f, 0.5f, 0.25f, 0.0f,
91 0.0f);
92 TPOINT pt1(1050, 2000);
93 TPOINT result1(100, kBlnBaselineOffset);
94 ExpectCorrectTransform(denorm, pt1, result1, true);
95 ExpectCorrectTransform(denorm, pt1, result1, false);
96 TPOINT result2(kBlnBaselineOffset / 4, -14);
97 ExpectCorrectTransform(denorm2, result1, result2, true);
98 ExpectCorrectTransform(denorm2, pt1, result2, false);
99}
void SetupNormalization(const BLOCK *block, const FCOORD *rotation, const DENORM *predecessor, float x_origin, float y_origin, float x_scale, float y_scale, float final_xshift, float final_yshift)
Definition: normalis.cpp:99

◆ TEST_F() [27/229]

tesseract::TEST_F ( DENORMTest  ,
NoRotations   
)

Definition at line 51 of file denorm_test.cc.

51 {
52 DENORM denorm;
53 denorm.SetupNormalization(nullptr, nullptr, nullptr, 1000.0f, 2000.0f, 2.0f, 3.0f, 0.0f,
54 static_cast<float>(kBlnBaselineOffset));
55 TPOINT pt1(1100, 2000);
56 TPOINT result1(200, kBlnBaselineOffset);
57 ExpectCorrectTransform(denorm, pt1, result1, true);
58 ExpectCorrectTransform(denorm, pt1, result1, false);
59 TPOINT pt2(900, 2100);
60 TPOINT result2(-200, 300 + kBlnBaselineOffset);
61 ExpectCorrectTransform(denorm, pt2, result2, true);
62 ExpectCorrectTransform(denorm, pt2, result2, false);
63}

◆ TEST_F() [28/229]

tesseract::TEST_F ( DENORMTest  ,
WithRotations   
)

Definition at line 66 of file denorm_test.cc.

66 {
67 DENORM denorm;
68 FCOORD rotation90(0.0f, 1.0f);
69 denorm.SetupNormalization(nullptr, &rotation90, nullptr, 1000.0f, 2000.0f, 2.0f, 3.0f, 0.0f,
70 static_cast<float>(kBlnBaselineOffset));
71
72 TPOINT pt1(1100, 2000);
73 TPOINT result1(0, 200 + kBlnBaselineOffset);
74 ExpectCorrectTransform(denorm, pt1, result1, true);
75 ExpectCorrectTransform(denorm, pt1, result1, false);
76 TPOINT pt2(900, 2100);
77 TPOINT result2(-300, kBlnBaselineOffset - 200);
78 ExpectCorrectTransform(denorm, pt2, result2, true);
79 ExpectCorrectTransform(denorm, pt2, result2, false);
80}

◆ TEST_F() [29/229]

tesseract::TEST_F ( EquationFinderTest  ,
CheckSeedBlobsCount   
)

Definition at line 330 of file equationdetect_test.cc.

330 {
331 TBOX box(0, 950, 999, 999);
332 ColPartition *part1 = ColPartition::FakePartition(box, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
333 ColPartition *part2 = ColPartition::FakePartition(box, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
334 ColPartition *part3 = ColPartition::FakePartition(box, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
335 ColPartition *part4 = ColPartition::FakePartition(box, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
336
337 // Part 1: 8 math, 0 digit, 20 total.
338 equation_det_->AddMathDigitBlobs(8, 0, 20, part1);
339 EXPECT_TRUE(equation_det_->RunCheckSeedBlobsCount(part1));
340
341 // Part 2: 1 math, 8 digit, 20 total.
342 equation_det_->AddMathDigitBlobs(1, 8, 20, part2);
343 EXPECT_FALSE(equation_det_->RunCheckSeedBlobsCount(part2));
344
345 // Part 3: 3 math, 8 digit, 8 total.
346 equation_det_->AddMathDigitBlobs(3, 8, 20, part3);
347 EXPECT_TRUE(equation_det_->RunCheckSeedBlobsCount(part3));
348
349 // Part 4: 8 math, 0 digit, 8 total.
350 equation_det_->AddMathDigitBlobs(0, 0, 8, part4);
351 EXPECT_FALSE(equation_det_->RunCheckSeedBlobsCount(part4));
352
353 // Release memory.
354 part1->DeleteBoxes();
355 delete (part1);
356 part2->DeleteBoxes();
357 delete (part2);
358 part3->DeleteBoxes();
359 delete (part3);
360 part4->DeleteBoxes();
361 delete (part4);
362}

◆ TEST_F() [30/229]

tesseract::TEST_F ( EquationFinderTest  ,
ComputeCPsSuperBBox   
)

Definition at line 404 of file equationdetect_test.cc.

404 {
405 Image pix = pixCreate(1001, 1001, 1);
406 equation_det_->SetPixBinary(pix);
407 ColPartitionGrid part_grid(10, ICOORD(0, 0), ICOORD(1000, 1000));
408
409 TBOX box1(0, 0, 999, 99);
410 ColPartition *part1 = ColPartition::FakePartition(box1, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
411 TBOX box2(0, 100, 499, 199);
412 ColPartition *part2 = ColPartition::FakePartition(box2, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
413 TBOX box3(500, 100, 999, 199);
414 ColPartition *part3 = ColPartition::FakePartition(box3, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
415 TBOX box4(0, 200, 999, 299);
416 ColPartition *part4 = ColPartition::FakePartition(box4, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
417 TBOX box5(0, 900, 999, 999);
418 ColPartition *part5 = ColPartition::FakePartition(box5, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
419
420 // Add part1->part3 into part_grid and test.
421 part_grid.InsertBBox(true, true, part1);
422 part_grid.InsertBBox(true, true, part2);
423 part_grid.InsertBBox(true, true, part3);
424 TBOX super_box(0, 0, 999, 199);
425 equation_det_->TestComputeCPsSuperBBox(super_box, &part_grid);
426
427 // Add part4 and test.
428 part_grid.InsertBBox(true, true, part4);
429 TBOX super_box2(0, 0, 999, 299);
430 equation_det_->TestComputeCPsSuperBBox(super_box2, &part_grid);
431
432 // Add part5 and test.
433 part_grid.InsertBBox(true, true, part5);
434 TBOX super_box3(0, 0, 999, 999);
435 equation_det_->TestComputeCPsSuperBBox(super_box3, &part_grid);
436
437 // Release memory.
438 part1->DeleteBoxes();
439 delete (part1);
440 part2->DeleteBoxes();
441 delete (part2);
442 part3->DeleteBoxes();
443 delete (part3);
444 part4->DeleteBoxes();
445 delete (part4);
446 part5->DeleteBoxes();
447 delete (part5);
448}

◆ TEST_F() [31/229]

tesseract::TEST_F ( EquationFinderTest  ,
ComputeForegroundDensity   
)

Definition at line 364 of file equationdetect_test.cc.

364 {
365 // Create the pix with top half foreground, bottom half background.
366 int width = 1024, height = 768;
367 Image pix = pixCreate(width, height, 1);
368 pixRasterop(pix, 0, 0, width, height / 2, PIX_SET, nullptr, 0, 0);
369 TBOX box1(100, 0, 140, 140), box2(100, height / 2 - 20, 140, height / 2 + 20),
370 box3(100, height - 40, 140, height);
371 equation_det_->SetPixBinary(pix);
372
373 // Verify
374 EXPECT_NEAR(0.0, equation_det_->RunComputeForegroundDensity(box1), 0.0001f);
375 EXPECT_NEAR(0.5, equation_det_->RunComputeForegroundDensity(box2), 0.0001f);
376 EXPECT_NEAR(1.0, equation_det_->RunComputeForegroundDensity(box3), 0.0001f);
377}
#define EXPECT_NEAR(val1, val2, abs_error)
Definition: gtest.h:2160

◆ TEST_F() [32/229]

tesseract::TEST_F ( EquationFinderTest  ,
CountAlignment   
)

Definition at line 379 of file equationdetect_test.cc.

379 {
380 std::vector<int> vec;
381 vec.push_back(1);
382 vec.push_back(1);
383 vec.push_back(1);
384 vec.push_back(100);
385 vec.push_back(200);
386 vec.push_back(200);
387
388 // Test the right point.
389 EXPECT_EQ(3, equation_det_->RunCountAlignment(vec, 1));
390 EXPECT_EQ(1, equation_det_->RunCountAlignment(vec, 100));
391 EXPECT_EQ(2, equation_det_->RunCountAlignment(vec, 200));
392
393 // Test the near neighbors.
394 EXPECT_EQ(3, equation_det_->RunCountAlignment(vec, 3));
395 EXPECT_EQ(1, equation_det_->RunCountAlignment(vec, 99));
396 EXPECT_EQ(2, equation_det_->RunCountAlignment(vec, 202));
397
398 // Test the far neighbors.
399 EXPECT_EQ(0, equation_det_->RunCountAlignment(vec, 150));
400 EXPECT_EQ(0, equation_det_->RunCountAlignment(vec, 50));
401 EXPECT_EQ(0, equation_det_->RunCountAlignment(vec, 250));
402}

◆ TEST_F() [33/229]

tesseract::TEST_F ( EquationFinderTest  ,
EstimateTypeForUnichar   
)

Definition at line 231 of file equationdetect_test.cc.

231 {
232 // Test abc characters.
233 EXPECT_EQ(BSTT_NONE, equation_det_->RunEstimateTypeForUnichar("a"));
234 EXPECT_EQ(BSTT_NONE, equation_det_->RunEstimateTypeForUnichar("c"));
235
236 // Test punctuation characters.
237 EXPECT_EQ(BSTT_NONE, equation_det_->RunEstimateTypeForUnichar("'"));
238 EXPECT_EQ(BSTT_NONE, equation_det_->RunEstimateTypeForUnichar(","));
239
240 // Test digits.
241 EXPECT_EQ(BSTT_DIGIT, equation_det_->RunEstimateTypeForUnichar("1"));
242 EXPECT_EQ(BSTT_DIGIT, equation_det_->RunEstimateTypeForUnichar("4"));
243 EXPECT_EQ(BSTT_DIGIT, equation_det_->RunEstimateTypeForUnichar("|"));
244
245 // Test math symbols.
246 EXPECT_EQ(BSTT_MATH, equation_det_->RunEstimateTypeForUnichar("("));
247 EXPECT_EQ(BSTT_MATH, equation_det_->RunEstimateTypeForUnichar("+"));
248}

◆ TEST_F() [34/229]

tesseract::TEST_F ( EquationFinderTest  ,
IdentifySpecialText   
)

Definition at line 180 of file equationdetect_test.cc.

180 {
181#if !ENABLE_IdentifySpecialText_TEST
182 GTEST_SKIP();
183#else // TODO: missing equ_gt1.tif
184 // Load Image.
185 std::string imagefile = file::JoinPath(testdata_dir_, "equ_gt1.tif");
186 Image pix_binary = pixRead(imagefile.c_str());
187 CHECK(pix_binary != nullptr && pixGetDepth(pix_binary) == 1);
188
189 // Get components.
190 BLOCK_LIST blocks;
191 TO_BLOCK_LIST to_blocks;
192 AddPageBlock(pix_binary, &blocks);
193 Textord *textord = tesseract_->mutable_textord();
194 textord->find_components(pix_binary, &blocks, &to_blocks);
195
196 // Identify special texts from to_blocks.
197 TO_BLOCK_IT to_block_it(&to_blocks);
198 std::map<int, int> stt_count;
199 for (to_block_it.mark_cycle_pt(); !to_block_it.cycled_list(); to_block_it.forward()) {
200 TO_BLOCK *to_block = to_block_it.data();
201 BLOBNBOX_IT blob_it(&(to_block->blobs));
202 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
203 BLOBNBOX *blob = blob_it.data();
204 // blob->set_special_text_type(BSTT_NONE);
205 equation_det_->RunIdentifySpecialText(blob, 0);
206 tensorflow::gtl::InsertIfNotPresent(&stt_count, blob->special_text_type(), 0);
207 stt_count[blob->special_text_type()]++;
208 }
209 }
210
211 // Verify the number, but allow a range of +/- kCountRange before squealing.
212 const int kCountRange = 3;
213 EXPECT_GE(39 + kCountRange, stt_count[BSTT_NONE]);
214 EXPECT_LE(39 - kCountRange, stt_count[BSTT_NONE]);
215
216 // if you count all the subscripts etc, there are ~45 italic chars.
217 EXPECT_GE(45 + kCountRange, stt_count[BSTT_ITALIC]);
218 EXPECT_LE(45 - kCountRange, stt_count[BSTT_ITALIC]);
219 EXPECT_GE(41 + kCountRange, stt_count[BSTT_DIGIT]);
220 EXPECT_LE(41 - kCountRange, stt_count[BSTT_DIGIT]);
221 EXPECT_GE(50 + kCountRange, stt_count[BSTT_MATH]);
222 EXPECT_LE(50 - kCountRange, stt_count[BSTT_MATH]);
223 EXPECT_GE(10 + kCountRange, stt_count[BSTT_UNCLEAR]);
224 EXPECT_LE(10 - kCountRange, stt_count[BSTT_UNCLEAR]);
225
226 // Release memory.
227 pix_binary.destroy();
228#endif
229}
#define EXPECT_LE(val1, val2)
Definition: gtest.h:2047
BlobSpecialTextType special_text_type() const
Definition: blobbox.h:304

◆ TEST_F() [35/229]

tesseract::TEST_F ( EquationFinderTest  ,
IsIndented   
)

Definition at line 250 of file equationdetect_test.cc.

250 {
251 ColPartitionGrid part_grid(10, ICOORD(0, 0), ICOORD(1000, 1000));
252
253 // Create five ColPartitions:
254 // part 1: ************
255 // part 2: *********
256 // part 3: *******
257 // part 4: *****
258 //
259 // part 5: ********
260 TBOX box1(0, 950, 999, 999);
261 ColPartition *part1 = ColPartition::FakePartition(box1, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
262 part_grid.InsertBBox(true, true, part1);
263 TBOX box2(300, 920, 900, 940);
264 ColPartition *part2 = ColPartition::FakePartition(box2, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
265 part_grid.InsertBBox(true, true, part2);
266 TBOX box3(0, 900, 600, 910);
267 ColPartition *part3 = ColPartition::FakePartition(box3, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
268 part_grid.InsertBBox(true, true, part3);
269 TBOX box4(300, 890, 600, 899);
270 ColPartition *part4 = ColPartition::FakePartition(box4, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
271 part_grid.InsertBBox(true, true, part4);
272 TBOX box5(300, 500, 900, 510);
273 ColPartition *part5 = ColPartition::FakePartition(box5, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
274 part_grid.InsertBBox(true, true, part5);
275
276 // Test
277 // part1 should be no indent.
278 EXPECT_EQ(EquationDetect::NO_INDENT, equation_det_->RunIsIndented(&part_grid, part1));
279 // part2 should be left indent in terms of part1.
280 EXPECT_EQ(EquationDetect::LEFT_INDENT, equation_det_->RunIsIndented(&part_grid, part2));
281 // part3 should be right indent.
282 EXPECT_EQ(EquationDetect::RIGHT_INDENT, equation_det_->RunIsIndented(&part_grid, part3));
283 // part4 should be both indented.
284 EXPECT_EQ(EquationDetect::BOTH_INDENT, equation_det_->RunIsIndented(&part_grid, part4));
285 // part5 should be no indent because it is too far from part1.
286 EXPECT_EQ(EquationDetect::NO_INDENT, equation_det_->RunIsIndented(&part_grid, part5));
287
288 // Release memory.
289 part1->DeleteBoxes();
290 delete (part1);
291 part2->DeleteBoxes();
292 delete (part2);
293 part3->DeleteBoxes();
294 delete (part3);
295 part4->DeleteBoxes();
296 delete (part4);
297 part5->DeleteBoxes();
298 delete (part5);
299}

◆ TEST_F() [36/229]

tesseract::TEST_F ( EquationFinderTest  ,
IsNearSmallNeighbor   
)

Definition at line 301 of file equationdetect_test.cc.

301 {
302 // Create four tboxes:
303 // part 1, part 2
304 // ***** *****
305 // part 3: *****
306 //
307 // part 4: *****************
308 TBOX box1(0, 950, 499, 999);
309 TBOX box2(500, 950, 999, 998);
310 TBOX box3(0, 900, 499, 949);
311 TBOX box4(0, 550, 499, 590);
312
313 // Test
314 // box2 should be box1's near neighbor but not vice versa.
315 EXPECT_TRUE(equation_det_->RunIsNearSmallNeighbor(box1, box2));
316 EXPECT_FALSE(equation_det_->RunIsNearSmallNeighbor(box2, box1));
317 // box1 and box3 should be near neighbors of each other.
318 EXPECT_TRUE(equation_det_->RunIsNearSmallNeighbor(box1, box3));
319 EXPECT_FALSE(equation_det_->RunIsNearSmallNeighbor(box2, box3));
320 // box2 and box3 should not be near neighbors of each other.
321 EXPECT_FALSE(equation_det_->RunIsNearSmallNeighbor(box2, box3));
322 EXPECT_FALSE(equation_det_->RunIsNearSmallNeighbor(box3, box2));
323
324 // box4 should not be the near neighbor of any one.
325 EXPECT_FALSE(equation_det_->RunIsNearSmallNeighbor(box1, box4));
326 EXPECT_FALSE(equation_det_->RunIsNearSmallNeighbor(box2, box4));
327 EXPECT_FALSE(equation_det_->RunIsNearSmallNeighbor(box3, box4));
328}

◆ TEST_F() [37/229]

tesseract::TEST_F ( EquationFinderTest  ,
SplitCPHor   
)

Definition at line 484 of file equationdetect_test.cc.

484 {
485 TBOX box(0, 0, 999, 99);
486 ColPartition *part = ColPartition::FakePartition(box, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
487 part->DeleteBoxes();
488 part->set_median_width(10);
489 std::vector<ColPartition *> parts_splitted;
490
491 // Test an empty part.
492 equation_det_->RunSplitCPHor(part, &parts_splitted);
493 EXPECT_TRUE(parts_splitted.empty());
494 // Test with one blob.
495 AddBlobIntoPart(TBOX(0, 0, 10, 50), part);
496
497 equation_det_->RunSplitCPHor(part, &parts_splitted);
498 EXPECT_EQ(1, parts_splitted.size());
499 EXPECT_TRUE(TBOX(0, 0, 10, 50) == parts_splitted[0]->bounding_box());
500
501 // Add more blob and test.
502 AddBlobIntoPart(TBOX(11, 0, 20, 60), part);
503 AddBlobIntoPart(TBOX(25, 0, 30, 55), part); // break point.
504 AddBlobIntoPart(TBOX(100, 0, 110, 15), part);
505 AddBlobIntoPart(TBOX(125, 0, 140, 45), part); // break point.
506 AddBlobIntoPart(TBOX(500, 0, 540, 35), part); // break point.
507 equation_det_->RunSplitCPHor(part, &parts_splitted);
508
509 // Verify.
510 EXPECT_EQ(3, parts_splitted.size());
511 EXPECT_TRUE(TBOX(0, 0, 30, 60) == parts_splitted[0]->bounding_box());
512 EXPECT_TRUE(TBOX(100, 0, 140, 45) == parts_splitted[1]->bounding_box());
513 EXPECT_TRUE(TBOX(500, 0, 540, 35) == parts_splitted[2]->bounding_box());
514
515 for (auto part_splitted : parts_splitted) {
516 delete part_splitted;
517 }
518 part->DeleteBoxes();
519 delete (part);
520}
void set_median_width(int width)
Definition: colpartition.h:144

◆ TEST_F() [38/229]

tesseract::TEST_F ( EquationFinderTest  ,
SplitCPHorLite   
)

Definition at line 450 of file equationdetect_test.cc.

450 {
451 TBOX box(0, 0, 999, 99);
452 ColPartition *part = ColPartition::FakePartition(box, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
453 part->DeleteBoxes();
454 part->set_median_width(10);
455 std::vector<TBOX> splitted_boxes;
456
457 // Test an empty part.
458 equation_det_->RunSplitCPHorLite(part, &splitted_boxes);
459 EXPECT_TRUE(splitted_boxes.empty());
460
461 // Test with one blob.
462 AddBlobIntoPart(TBOX(0, 0, 10, 50), part);
463 equation_det_->RunSplitCPHorLite(part, &splitted_boxes);
464 EXPECT_EQ(1, splitted_boxes.size());
465 EXPECT_TRUE(TBOX(0, 0, 10, 50) == splitted_boxes[0]);
466
467 // Add more blob and test.
468 AddBlobIntoPart(TBOX(11, 0, 20, 60), part);
469 AddBlobIntoPart(TBOX(25, 0, 30, 55), part); // break point.
470 AddBlobIntoPart(TBOX(100, 0, 110, 15), part);
471 AddBlobIntoPart(TBOX(125, 0, 140, 45), part); // break point.
472 AddBlobIntoPart(TBOX(500, 0, 540, 35), part); // break point.
473 equation_det_->RunSplitCPHorLite(part, &splitted_boxes);
474 // Verify.
475 EXPECT_EQ(3, splitted_boxes.size());
476 EXPECT_TRUE(TBOX(0, 0, 30, 60) == splitted_boxes[0]);
477 EXPECT_TRUE(TBOX(100, 0, 140, 45) == splitted_boxes[1]);
478 EXPECT_TRUE(TBOX(500, 0, 540, 35) == splitted_boxes[2]);
479
480 part->DeleteBoxes();
481 delete (part);
482}

◆ TEST_F() [39/229]

tesseract::TEST_F ( EuroText  ,
FastLatinOCR   
)

Definition at line 105 of file apiexample_test.cc.

105 {
106 OCRTester(TESTING_DIR "/eurotext.tif", TESTING_DIR "/eurotext.txt", TESSDATA_DIR "_fast",
107 "script/Latin");
108}
void OCRTester(const char *imgname, const char *groundtruth, const char *tessdatadir, const char *lang)

◆ TEST_F() [40/229]

tesseract::TEST_F ( FontUtilsTest  ,
DoesDetectMissingFonts   
)

Definition at line 220 of file pango_font_info_test.cc.

220 {
221 // Only bold italic face is available.
222 EXPECT_FALSE(FontUtils::IsAvailableFont("Arial"));
223 // Don't have a ttf for the Courier family.
224 EXPECT_FALSE(FontUtils::IsAvailableFont("Courier"));
225 // Pango "synthesizes" the italic font from the available Verdana Regular and
226 // includes it in its list, but it is not really loadable.
227 EXPECT_FALSE(FontUtils::IsAvailableFont("Verdana Italic"));
228 // We have "Dejavu Sans Ultra-Light" but not its medium weight counterpart.
229 EXPECT_FALSE(FontUtils::IsAvailableFont("DejaVu Sans"));
230}

◆ TEST_F() [41/229]

tesseract::TEST_F ( FontUtilsTest  ,
DoesFailToSelectFont   
)

Definition at line 276 of file pango_font_info_test.cc.

276 {
277 const char kMixedScriptText[] = "पिताने विवाह की | والفكر والصراع";
278 std::vector<std::string> graphemes;
279 std::string selected_font;
280 EXPECT_FALSE(FontUtils::SelectFont(kMixedScriptText, strlen(kMixedScriptText), &selected_font,
281 &graphemes));
282}

◆ TEST_F() [42/229]

tesseract::TEST_F ( FontUtilsTest  ,
DoesFindAvailableFonts   
)

Definition at line 210 of file pango_font_info_test.cc.

210 {
211 EXPECT_TRUE(FontUtils::IsAvailableFont("Arial Bold Italic"));
212 EXPECT_TRUE(FontUtils::IsAvailableFont("Verdana"));
213 EXPECT_TRUE(FontUtils::IsAvailableFont("DejaVu Sans Ultra-Light"));
214
215 // Test that we can support font name convention for Pango v1.30.2 even when
216 // we are running an older version.
217 EXPECT_TRUE(FontUtils::IsAvailableFont("Times New Roman,"));
218}

◆ TEST_F() [43/229]

tesseract::TEST_F ( FontUtilsTest  ,
DoesListAvailableFonts   
)

Definition at line 232 of file pango_font_info_test.cc.

232 {
233 const std::vector<std::string> &fonts = FontUtils::ListAvailableFonts();
234 EXPECT_THAT(fonts, ::testing::ElementsAreArray(kExpectedFontNames));
235 for (auto &font : fonts) {
236 PangoFontInfo font_info;
237 EXPECT_TRUE(font_info.ParseFontDescriptionName(font));
238 }
239}
const char * kExpectedFontNames[]
bool ParseFontDescriptionName(const std::string &name)

◆ TEST_F() [44/229]

tesseract::TEST_F ( FontUtilsTest  ,
DoesSelectFont   
)

Definition at line 262 of file pango_font_info_test.cc.

262 {
263 const char *kLangText[] = {kArabicText, kEngText, kHinText, kKorText, nullptr};
264 const char *kLangNames[] = {"Arabic", "English", "Hindi", "Korean", nullptr};
265 for (int i = 0; kLangText[i] != nullptr; ++i) {
266 SCOPED_TRACE(kLangNames[i]);
267 std::vector<std::string> graphemes;
268 std::string selected_font;
270 FontUtils::SelectFont(kLangText[i], strlen(kLangText[i]), &selected_font, &graphemes));
271 EXPECT_TRUE(selected_font.size());
272 EXPECT_TRUE(graphemes.size());
273 }
274}
const char kArabicText[]

◆ TEST_F() [45/229]

tesseract::TEST_F ( HeapTest  ,
DoublePtrTest   
)

Definition at line 186 of file heap_test.cc.

186 {
187 DoublePtr ptr1;
188 DoublePtr ptr2;
189 ptr1.Connect(&ptr2);
190 // Check that the correct copy constructor is used.
191 DoublePtr ptr3(ptr1);
192 EXPECT_EQ(&ptr3, ptr3.OtherEnd()->OtherEnd());
193 EXPECT_TRUE(ptr1.OtherEnd() == nullptr);
194 // Check that the correct operator= is used.
195 ptr1 = ptr3;
196 EXPECT_EQ(&ptr1, ptr1.OtherEnd()->OtherEnd());
197 EXPECT_TRUE(ptr3.OtherEnd() == nullptr);
198}
DoublePtr * OtherEnd() const
Definition: doubleptr.h:80
void Connect(DoublePtr *other)
Definition: doubleptr.h:66

◆ TEST_F() [46/229]

tesseract::TEST_F ( HeapTest  ,
MixedTest   
)

Definition at line 94 of file heap_test.cc.

94 {
96 KDVector v;
97 // Push the test data onto both the heap and the KDVector.
98 PushTestData(&heap, &v);
99 // Sort the vector and remove the first 5 values from both heap and v.
100 std::sort(v.begin(), v.end());
101 for (int i = 0; i < 5; ++i) {
102 heap.Pop(nullptr);
103 v.erase(v.begin());
104 }
105 // Push the test data onto both the heap and the KDVector.
106 PushTestData(&heap, &v);
107 // Heap and vector should still match!
108 VerifyHeapVectorMatch(&heap, &v);
109}
bool Pop(Pair *entry)
Definition: genericheap.h:120

◆ TEST_F() [47/229]

tesseract::TEST_F ( HeapTest  ,
PopWorstTest   
)

Definition at line 113 of file heap_test.cc.

113 {
115 KDVector v;
116 // Push the test data onto both the heap and the KDVector.
117 PushTestData(&heap, &v);
118 // Get the worst element off the heap.
119 IntKDPair pair;
120 heap.PopWorst(&pair);
121 EXPECT_EQ(pair.key(), 65536);
122 EXPECT_EQ(pair.data(), 6);
123 // Sort and remove the worst element from the vector.
124 std::sort(v.begin(), v.end());
125 v.resize(v.size() - 1);
126 // After that they should still match!
127 VerifyHeapVectorMatch(&heap, &v);
128}
bool PopWorst(Pair *entry)
Definition: genericheap.h:144
Data & data()
Definition: kdpair.h:41
Key & key()
Definition: kdpair.h:47

◆ TEST_F() [48/229]

tesseract::TEST_F ( HeapTest  ,
RevalueTest   
)

Definition at line 132 of file heap_test.cc.

132 {
133 // Here the data element of the pair is a DoublePtr, which links the entries
134 // in the vector and heap, and we test a MAX heap.
135 typedef KDPairDec<int, DoublePtr> PtrPair;
137 std::vector<PtrPair> v;
138 // Push the test data onto both the heap and the vector.
139 for (int i : test_data) {
140 PtrPair h_pair;
141 h_pair.key() = i;
142 PtrPair v_pair;
143 v_pair.key() = i;
144 h_pair.data().Connect(&v_pair.data());
145 heap.Push(&h_pair);
146 v.push_back(v_pair);
147 }
148 // Test changes both ways. Index 0 is 8, so change it to -1.
149 v[0].key() = -1;
150 // v[0].data.OtherEnd() is a pointer to the data element in the appropriate
151 // heap entry, wherever it may be. We can change its value via that pointer.
152 // Without Reshuffle, that would be a terribly bad thing to do, as it violates
153 // the heap invariant, making the heap corrupt.
154 auto *pair_ptr = reinterpret_cast<PtrPair *>(v[0].data().OtherEnd());
155 pair_ptr->key() = v[0].key();
156 heap.Reshuffle(pair_ptr);
157 // Index 1 is 1. Change to 32767.
158 v[1].key() = 32767;
159 pair_ptr = reinterpret_cast<PtrPair *>(v[1].data().OtherEnd());
160 pair_ptr->key() = v[1].key();
161 heap.Reshuffle(pair_ptr);
162 // After the changes, popping the heap should still match the sorted order
163 // of the vector.
164 std::sort(v.begin(), v.end());
165 EXPECT_GT(v[0].key(), v.back().key());
166 for (auto &i : v) {
167 EXPECT_EQ(i.key(), heap.PeekTop().key());
168 EXPECT_FALSE(heap.empty());
169 heap.Pop(nullptr);
170 }
171 EXPECT_TRUE(heap.empty());
172}
int test_data[]
Definition: heap_test.cc:23
bool empty() const
Definition: genericheap.h:68
const Pair & PeekTop() const
Definition: genericheap.h:108
void Reshuffle(Pair *pair)
Definition: genericheap.h:193
void Push(Pair *entry)
Definition: genericheap.h:95

◆ TEST_F() [49/229]

tesseract::TEST_F ( HeapTest  ,
SortTest   
)

Definition at line 81 of file heap_test.cc.

81 {
83 EXPECT_TRUE(heap.empty());
84 KDVector v;
85 EXPECT_EQ(heap.size(), v.size());
86 // Push the test data onto both the heap and the KDVector.
87 PushTestData(&heap, &v);
88 VerifyHeapVectorMatch(&heap, &v);
89}

◆ TEST_F() [50/229]

tesseract::TEST_F ( ImagedataTest  ,
CachesMultiDocs   
)

Definition at line 90 of file imagedata_test.cc.

90 {
91 // This test verifies that DocumentCache works to store multiple DocumentData
92 // and the two caching strategies read images in the right order.
93 // Number of pages in each document.
94 const std::vector<int> kNumPages = {6, 5, 7};
95 std::vector<std::vector<std::string>> page_texts;
96 std::vector<std::string> filenames;
97 for (size_t d = 0; d < kNumPages.size(); ++d) {
98 page_texts.emplace_back(std::vector<std::string>());
99 std::string filename = MakeFakeDoc(kNumPages[d], d, &page_texts.back());
100 filenames.push_back(filename);
101 }
102 // Now try getting them back with different cache strategies and check that
103 // the pages come out in the right order.
104 DocumentCache robin_cache(8000000);
105 robin_cache.LoadDocuments(filenames, tesseract::CS_ROUND_ROBIN, nullptr);
106 DocumentCache serial_cache(8000000);
107 serial_cache.LoadDocuments(filenames, tesseract::CS_SEQUENTIAL, nullptr);
108 for (int p = 0; p <= 21; ++p) {
109 LOG(INFO) << "Page " << p;
110 const ImageData *robin_data = robin_cache.GetPageBySerial(p);
111 const ImageData *serial_data = serial_cache.GetPageBySerial(p);
112 CHECK(robin_data != nullptr);
113 CHECK(serial_data != nullptr);
114 int robin_doc = p % kNumPages.size();
115 int robin_page = p / kNumPages.size() % kNumPages[robin_doc];
116 // Check that this is the right page.
117 EXPECT_STREQ(page_texts[robin_doc][robin_page].c_str(), robin_data->transcription().c_str());
118 int serial_doc = p / kNumPages[0] % kNumPages.size();
119 int serial_page = p % kNumPages[0] % kNumPages[serial_doc];
120 EXPECT_STREQ(page_texts[serial_doc][serial_page].c_str(), serial_data->transcription().c_str());
121 }
122}

◆ TEST_F() [51/229]

tesseract::TEST_F ( ImagedataTest  ,
CachesProperly   
)

Definition at line 60 of file imagedata_test.cc.

60 {
61 // This test verifies that Imagedata can be stored in a DocumentData and a
62 // collection of them is cached correctly given limited memory.
63 // Number of pages to put in the fake document.
64 const int kNumPages = 12;
65 // Allowances to read the document. Big enough for 1, 3, 0, all pages.
66 const int kMemoryAllowances[] = {2000000, 4000000, 1000000, 100000000, 0};
67 // Order in which to read the pages, with some sequential and some seeks.
68 const int kPageReadOrder[] = {0, 1, 2, 3, 8, 4, 5, 6, 7, 11, 10, 9, -1};
69
70 std::vector<std::string> page_texts;
71 std::string filename = MakeFakeDoc(kNumPages, 0, &page_texts);
72 // Now try getting it back with different memory allowances and check that
73 // the pages can still be read.
74 for (int m = 0; kMemoryAllowances[m] > 0; ++m) {
75 DocumentData read_doc("My document");
76 EXPECT_TRUE(read_doc.LoadDocument(filename.c_str(), 0, kMemoryAllowances[m], nullptr));
77 LOG(ERROR) << "Allowance = " << kMemoryAllowances[m];
78 // Read the pages in a specific order.
79 for (int p = 0; kPageReadOrder[p] >= 0; ++p) {
80 int page = kPageReadOrder[p];
81 const ImageData *imagedata = read_doc.GetPage(page);
82 EXPECT_NE(nullptr, imagedata);
83 // EXPECT_NE(reinterpret_cast<ImageData*>(nullptr), imagedata);
84 // Check that this is the right page.
85 EXPECT_STREQ(page_texts[page].c_str(), imagedata->transcription().c_str());
86 }
87 }
88}
@ ERROR
Definition: log.h:28
#define EXPECT_NE(val1, val2)
Definition: gtest.h:2045

◆ TEST_F() [52/229]

tesseract::TEST_F ( IndexMapBiDiTest  ,
ManyToOne   
)

Definition at line 101 of file indexmapbidi_test.cc.

101 {
102 // Test the example in the comment on CompleteMerges.
103 IndexMapBiDi map;
104 map.Init(13, false);
105 map.SetMap(2, true);
106 map.SetMap(4, true);
107 map.SetMap(7, true);
108 map.SetMap(9, true);
109 map.SetMap(11, true);
110 map.Setup();
111 map.Merge(map.SparseToCompact(2), map.SparseToCompact(9));
112 map.Merge(map.SparseToCompact(4), map.SparseToCompact(11));
113 map.CompleteMerges();
114 EXPECT_EQ(3, map.CompactSize());
115 EXPECT_EQ(13, map.SparseSize());
116 EXPECT_EQ(1, map.SparseToCompact(4));
117 EXPECT_EQ(4, map.CompactToSparse(1));
118 EXPECT_EQ(1, map.SparseToCompact(11));
119}
int CompactSize() const
Definition: indexmapbidi.h:63
int CompactToSparse(int compact_index) const
Definition: indexmapbidi.h:55
void Init(int size, bool all_mapped)
bool Merge(int compact_index1, int compact_index2)
void SetMap(int sparse_index, bool mapped)
int SparseSize() const override
Definition: indexmapbidi.h:144
int SparseToCompact(int sparse_index) const override
Definition: indexmapbidi.h:140

◆ TEST_F() [53/229]

tesseract::TEST_F ( IndexMapBiDiTest  ,
Primes   
)

Definition at line 74 of file indexmapbidi_test.cc.

74 {
75 IndexMapBiDi map;
76 ComputePrimes(&map);
77 TestPrimes(map);
78 // It still works if we assign it to another.
79 IndexMapBiDi map2;
80 map2.CopyFrom(map);
81 TestPrimes(map2);
82 // Or if we assign it to a base class.
83 IndexMap base_map;
84 base_map.CopyFrom(map);
85 TestPrimes(base_map);
86 // Test file i/o too.
87 std::string filename = OutputNameToPath("primesmap");
88 FILE *fp = fopen(filename.c_str(), "wb");
89 CHECK(fp != nullptr);
90 EXPECT_TRUE(map.Serialize(fp));
91 fclose(fp);
92 fp = fopen(filename.c_str(), "rb");
93 CHECK(fp != nullptr);
94 IndexMapBiDi read_map;
95 EXPECT_TRUE(read_map.DeSerialize(false, fp));
96 fclose(fp);
97 TestPrimes(read_map);
98}
void CopyFrom(const IndexMap &src)
void CopyFrom(const IndexMapBiDi &src)
bool Serialize(FILE *fp) const
bool DeSerialize(bool swap, FILE *fp)

◆ TEST_F() [54/229]

tesseract::TEST_F ( IntFeatureMapTest  ,
Exhaustive   
)

Definition at line 42 of file intfeaturemap_test.cc.

42 {
43#ifdef DISABLED_LEGACY_ENGINE
44 // Skip test because IntFeatureSpace is missing.
45 GTEST_SKIP();
46#else
47 IntFeatureSpace space;
49 IntFeatureMap map;
50 map.Init(space);
52 auto features = std::make_unique<INT_FEATURE_STRUCT[]>(total_size);
53 // Fill the features with every value.
54 for (int y = 0; y < kIntFeatureExtent; ++y) {
55 for (int x = 0; x < kIntFeatureExtent; ++x) {
56 for (int theta = 0; theta < kIntFeatureExtent; ++theta) {
57 int f_index = (y * kIntFeatureExtent + x) * kIntFeatureExtent + theta;
58 features[f_index].X = x;
59 features[f_index].Y = y;
60 features[f_index].Theta = theta;
61 }
62 }
63 }
64 std::vector<int> index_features;
65 map.IndexAndSortFeatures(features.get(), total_size, &index_features);
66 EXPECT_EQ(total_size, index_features.size());
67 int total_buckets = kXBuckets * kYBuckets * kThetaBuckets;
68 std::vector<int> map_features;
69 int misses = map.MapIndexedFeatures(index_features, &map_features);
70 EXPECT_EQ(0, misses);
71 EXPECT_EQ(total_buckets, map_features.size());
72 ExpectContiguous(map_features, 0, total_buckets);
73 EXPECT_EQ(total_buckets, map.compact_size());
74 EXPECT_EQ(total_buckets, map.sparse_size());
75
76 // Every offset should be within dx, dy, dtheta of the start point.
77 int dx = kIntFeatureExtent / kXBuckets + 1;
78 int dy = kIntFeatureExtent / kYBuckets + 1;
79 int dtheta = kIntFeatureExtent / kThetaBuckets + 1;
80 int bad_offsets = 0;
81 for (int index = 0; index < total_buckets; ++index) {
82 for (int dir = -tesseract::kNumOffsetMaps; dir <= tesseract::kNumOffsetMaps; ++dir) {
83 int offset_index = map.OffsetFeature(index, dir);
84 if (dir == 0) {
85 EXPECT_EQ(index, offset_index);
86 } else if (offset_index >= 0) {
87 INT_FEATURE_STRUCT f = map.InverseIndexFeature(index);
88 INT_FEATURE_STRUCT f2 = map.InverseIndexFeature(offset_index);
89 EXPECT_TRUE(f.X != f2.X || f.Y != f2.Y || f.Theta != f2.Theta);
90 EXPECT_LE(abs(f.X - f2.X), dx);
91 EXPECT_LE(abs(f.Y - f2.Y), dy);
92 int theta_delta = abs(f.Theta - f2.Theta);
93 if (theta_delta > kIntFeatureExtent / 2) {
94 theta_delta = kIntFeatureExtent - theta_delta;
95 }
96 EXPECT_LE(theta_delta, dtheta);
97 } else {
98 ++bad_offsets;
99 INT_FEATURE_STRUCT f = map.InverseIndexFeature(index);
100 }
101 }
102 }
103 EXPECT_LE(bad_offsets, (kXBuckets + kYBuckets) * kThetaBuckets);
104
105 // To test the mapping further, delete the 1st and last map feature, and
106 // test again.
107 map.DeleteMapFeature(0);
108 map.DeleteMapFeature(total_buckets - 1);
109 map.FinalizeMapping(nullptr);
110 map.IndexAndSortFeatures(features.get(), total_size, &index_features);
111 // Has no effect on index features.
112 EXPECT_EQ(total_size, index_features.size());
113 misses = map.MapIndexedFeatures(index_features, &map_features);
114 int expected_misses = (kIntFeatureExtent / kXBuckets) * (kIntFeatureExtent / kYBuckets) *
116 expected_misses += (kIntFeatureExtent / kXBuckets) * (kIntFeatureExtent / kYBuckets + 1) *
118 EXPECT_EQ(expected_misses, misses);
119 EXPECT_EQ(total_buckets - 2, map_features.size());
120 ExpectContiguous(map_features, 0, total_buckets - 2);
121 EXPECT_EQ(total_buckets - 2, map.compact_size());
122 EXPECT_EQ(total_buckets, map.sparse_size());
123#endif
124}
const int kIntFeatureExtent
const int kThetaBuckets
const int kYBuckets
const int kXBuckets
void Init(uint8_t xbuckets, uint8_t ybuckets, uint8_t thetabuckets)
int MapIndexedFeatures(const std::vector< int > &index_features, std::vector< int > *map_features) const
INT_FEATURE_STRUCT InverseIndexFeature(int index_feature) const
int OffsetFeature(int index_feature, int dir) const
void DeleteMapFeature(int map_feature)
void Init(const IntFeatureSpace &feature_space)
void IndexAndSortFeatures(const INT_FEATURE_STRUCT *features, int num_features, std::vector< int > *sorted_features) const
int FinalizeMapping(SampleIterator *it)

◆ TEST_F() [55/229]

tesseract::TEST_F ( IntSimdMatrixTest  ,
AVX2   
)

Definition at line 125 of file intsimdmatrix_test.cc.

125 {
126#if defined(HAVE_AVX2)
127 if (!SIMDDetect::IsAVX2Available()) {
128 GTEST_LOG_(INFO) << "No AVX2 found! Not tested!";
129 GTEST_SKIP();
130 }
131 ExpectEqualResults(IntSimdMatrix::intSimdMatrixAVX2);
132#else
133 GTEST_LOG_(INFO) << "AVX2 unsupported! Not tested!";
134 GTEST_SKIP();
135#endif
136}
#define GTEST_LOG_(severity)
Definition: gtest-port.h:984

◆ TEST_F() [56/229]

tesseract::TEST_F ( IntSimdMatrixTest  ,
 
)

Definition at line 105 of file intsimdmatrix_test.cc.

105 {
106 static const IntSimdMatrix matrix = {nullptr, 1, 1, 1, 1};
107 ExpectEqualResults(matrix);
108}

◆ TEST_F() [57/229]

tesseract::TEST_F ( IntSimdMatrixTest  ,
SSE   
)

Definition at line 111 of file intsimdmatrix_test.cc.

111 {
112#if defined(HAVE_SSE4_1)
113 if (!SIMDDetect::IsSSEAvailable()) {
114 GTEST_LOG_(INFO) << "No SSE found! Not tested!";
115 GTEST_SKIP();
116 }
117 ExpectEqualResults(IntSimdMatrix::intSimdMatrixSSE);
118#else
119 GTEST_LOG_(INFO) << "SSE unsupported! Not tested!";
120 GTEST_SKIP();
121#endif
122}

◆ TEST_F() [58/229]

tesseract::TEST_F ( LayoutTest  ,
ArraySizeTest   
)

Definition at line 191 of file layout_test.cc.

191 {
192 int size = 0;
193 for (size = 0; kPolyBlockNames[size][0] != '\0'; ++size) {
194 ;
195 }
196 EXPECT_EQ(size, PT_COUNT);
197}

◆ TEST_F() [59/229]

tesseract::TEST_F ( LayoutTest  ,
HebrewOrderingAndSkew   
)

Definition at line 214 of file layout_test.cc.

214 {
215 SetImage("hebrew.png", "eng");
216 // Just run recognition.
217 EXPECT_EQ(api_.Recognize(nullptr), 0);
218 tesseract::MutableIterator *it = api_.GetMutableIterator();
219 // In eng mode, block order should not be RTL.
220 VerifyRoughBlockOrder(false, it);
221 VerifyTotalContainment(1, it);
222 delete it;
223 // Now try again using Hebrew.
224 SetImage("hebrew.png", "heb");
225 // Just run recognition.
226 EXPECT_EQ(api_.Recognize(nullptr), 0);
227 it = api_.GetMutableIterator();
228 // In heb mode, block order should be RTL.
229 VerifyRoughBlockOrder(true, it);
230 // And blobs should still be fully contained.
231 VerifyTotalContainment(-1, it);
232 delete it;
233}

◆ TEST_F() [60/229]

tesseract::TEST_F ( LayoutTest  ,
UNLV8087_054   
)

Definition at line 201 of file layout_test.cc.

201 {
202 SetImage("8087_054.3B.tif", "eng");
203 // Just run recognition.
204 EXPECT_EQ(api_.Recognize(nullptr), 0);
205 // Check iterator position.
206 tesseract::ResultIterator *it = api_.GetIterator();
207 VerifyBlockTextOrder(kStrings8087_054, kBlocks8087_054, it);
208 delete it;
209}
const char * kStrings8087_054[]
Definition: layout_test.cc:50
const PolyBlockType kBlocks8087_054[]
Definition: layout_test.cc:51

◆ TEST_F() [61/229]

tesseract::TEST_F ( LigatureTableTest  ,
DoesFillLigatureTables   
)

Definition at line 54 of file ligature_table_test.cc.

54 {
55 EXPECT_GT(lig_table_->norm_to_lig_table().size(), 0);
56 EXPECT_GT(lig_table_->lig_to_norm_table().size(), 0);
57}

◆ TEST_F() [62/229]

tesseract::TEST_F ( LigatureTableTest  ,
TestCustomLigatures   
)

Definition at line 84 of file ligature_table_test.cc.

84 {
85 const char *kTestCases[] = {
86 "act", "a\uE003", "publiſh", "publi\uE006", "ſince",
87 "\uE007nce", "aſleep", "a\uE008eep", "neceſſary", "nece\uE009ary",
88 };
89 for (size_t i = 0; i < countof(kTestCases); i += 2) {
90 EXPECT_STREQ(kTestCases[i + 1], lig_table_->AddLigatures(kTestCases[i], nullptr).c_str());
91 EXPECT_STREQ(kTestCases[i], lig_table_->RemoveLigatures(kTestCases[i + 1]).c_str());
92 EXPECT_STREQ(kTestCases[i], lig_table_->RemoveCustomLigatures(kTestCases[i + 1]).c_str());
93 }
94}

◆ TEST_F() [63/229]

tesseract::TEST_F ( ListTest  ,
TestCLIST   
)

Definition at line 50 of file list_test.cc.

50 {
51 Clst_CLIST list;
52 EXPECT_TRUE(list.empty());
53 EXPECT_EQ(list.length(), 0);
54 auto it = CLIST_ITERATOR(&list);
55 for (unsigned i = 0; i < ListSize; i++) {
56 auto *lst = new Clst(i);
57 it.add_to_end(lst);
58 }
59 EXPECT_TRUE(!list.empty());
60 EXPECT_EQ(list.length(), ListSize);
61 it.move_to_first();
62 unsigned n = 0;
63 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
64 EXPECT_TRUE(n == 0 || !it.at_first());
65 auto *lst = reinterpret_cast<Clst *>(it.data());
66 EXPECT_EQ(lst->value, n);
67 n++;
68 EXPECT_TRUE(n != ListSize || it.at_last());
69 }
70 it.forward();
71 n++;
72 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
73 auto *lst = reinterpret_cast<Clst *>(it.extract());
74 EXPECT_EQ(lst->value, n % ListSize);
75 n++;
76 delete lst;
77 }
78 // TODO: add more tests for CLIST
79}

◆ TEST_F() [64/229]

tesseract::TEST_F ( ListTest  ,
TestELIST   
)

Definition at line 81 of file list_test.cc.

81 {
82 Elst_LIST list;
83 EXPECT_TRUE(list.empty());
84 EXPECT_EQ(list.length(), 0);
85 auto it = ELIST_ITERATOR(&list);
86 for (unsigned i = 0; i < ListSize; i++) {
87 auto *elst = new Elst(i);
88 it.add_to_end(elst);
89 }
90 EXPECT_TRUE(!list.empty());
91 EXPECT_EQ(list.length(), ListSize);
92 it.move_to_first();
93 unsigned n = 0;
94 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
95 EXPECT_TRUE(n == 0 || !it.at_first());
96 auto *elst = reinterpret_cast<Elst *>(it.data());
97 EXPECT_EQ(elst->value, n);
98 n++;
99 EXPECT_TRUE(n != ListSize || it.at_last());
100 }
101 it.forward();
102 n++;
103 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
104 auto *elst = reinterpret_cast<Elst *>(it.extract());
105 EXPECT_EQ(elst->value, n % ListSize);
106 n++;
107 delete elst;
108 }
109 // TODO: add more tests for ELIST
110}

◆ TEST_F() [65/229]

tesseract::TEST_F ( ListTest  ,
TestELIST2   
)

Definition at line 112 of file list_test.cc.

112 {
113 Elst2_LIST list;
114 EXPECT_TRUE(list.empty());
115 EXPECT_EQ(list.length(), 0);
116 auto it = ELIST2_ITERATOR(&list);
117 for (unsigned i = 0; i < ListSize; i++) {
118 auto *lst = new Elst2(i);
119 it.add_to_end(lst);
120 }
121 EXPECT_TRUE(!list.empty());
122 EXPECT_EQ(list.length(), ListSize);
123 it.move_to_first();
124 unsigned n = 0;
125 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
126 EXPECT_TRUE(n == 0 || !it.at_first());
127 auto *lst = reinterpret_cast<Elst2 *>(it.data());
128 EXPECT_EQ(lst->value, n);
129 n++;
130 EXPECT_TRUE(n != ListSize || it.at_last());
131 }
132 it.backward();
133 n--;
134 for (it.mark_cycle_pt(); !it.cycled_list(); it.backward()) {
135 auto *lst = reinterpret_cast<Elst2 *>(it.data());
136 EXPECT_EQ(lst->value, n);
137 n--;
138 }
139 it.forward();
140 n++;
141 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
142 auto *lst = reinterpret_cast<Elst2 *>(it.extract());
143 EXPECT_EQ(lst->value, n % ListSize);
144 n++;
145 delete lst;
146 }
147 // TODO: add more tests for ELIST2
148}

◆ TEST_F() [66/229]

tesseract::TEST_F ( LLSQTest  ,
BasicLines   
)

Definition at line 68 of file linlsq_test.cc.

68 {
69 LLSQ llsq;
70 llsq.add(1.0, 1.0);
71 llsq.add(2.0, 2.0);
72 ExpectCorrectLine(llsq, 1.0, 0.0, 0.0, 1.0, 1e-6);
73 float half_root_2 = sqrt(2.0) / 2.0f;
74 ExpectCorrectVector(llsq, FCOORD(1.5f, 1.5f), FCOORD(half_root_2, half_root_2), 1e-6);
75 llsq.remove(2.0, 2.0);
76 llsq.add(1.0, 2.0);
77 llsq.add(10.0, 1.0);
78 llsq.add(-8.0, 1.0);
79 // The point at 1,2 pulls the result away from what would otherwise be a
80 // perfect fit to a horizontal line by 0.25 unit, with rms error of 0.433.
81 ExpectCorrectLine(llsq, 0.0, 1.25, 0.433, 0.0, 1e-2);
82 ExpectCorrectVector(llsq, FCOORD(1.0f, 1.25f), FCOORD(1.0f, 0.0f), 1e-3);
83 llsq.add(1.0, 2.0, 10.0);
84 // With a heavy weight, the point at 1,2 pulls the line nearer.
85 ExpectCorrectLine(llsq, 0.0, 1.786, 0.41, 0.0, 1e-2);
86 ExpectCorrectVector(llsq, FCOORD(1.0f, 1.786f), FCOORD(1.0f, 0.0f), 1e-3);
87}
void remove(double x, double y)
Definition: linlsq.cpp:82

◆ TEST_F() [67/229]

tesseract::TEST_F ( LLSQTest  ,
RmsOrthWorksAsIntended   
)

Definition at line 104 of file linlsq_test.cc.

104 {
105 std::vector<FCOORD> pts;
106 pts.emplace_back(0.56, 0.95);
107 pts.emplace_back(0.09, 0.09);
108 pts.emplace_back(0.13, 0.77);
109 pts.emplace_back(0.16, 0.83);
110 pts.emplace_back(0.45, 0.79);
111 VerifyRmsOrth(pts, FCOORD(1, 0));
112 VerifyRmsOrth(pts, FCOORD(1, 1));
113 VerifyRmsOrth(pts, FCOORD(1, 2));
114 VerifyRmsOrth(pts, FCOORD(2, 1));
115}

◆ TEST_F() [68/229]

tesseract::TEST_F ( LLSQTest  ,
Vectors   
)

Definition at line 90 of file linlsq_test.cc.

90 {
91 LLSQ llsq;
92 llsq.add(1.0, 1.0);
93 llsq.add(1.0, -1.0);
94 ExpectCorrectVector(llsq, FCOORD(1.0f, 0.0f), FCOORD(0.0f, 1.0f), 1e-6);
95 llsq.add(0.9, -2.0);
96 llsq.add(1.1, -3.0);
97 llsq.add(0.9, 2.0);
98 llsq.add(1.10001, 3.0);
99 ExpectCorrectVector(llsq, FCOORD(1.0f, 0.0f), FCOORD(0.0f, 1.0f), 1e-3);
100}

◆ TEST_F() [69/229]

tesseract::TEST_F ( LoadLang  ,
engBest   
)

Definition at line 555 of file loadlang_test.cc.

555 {
556 LangLoader("eng", TESSDATA_DIR "_best");
557}
void LangLoader(const char *lang, const char *tessdatadir)

◆ TEST_F() [70/229]

tesseract::TEST_F ( LoadLang  ,
engBestInt   
)

Definition at line 558 of file loadlang_test.cc.

558 {
559 LangLoader("eng", TESSDATA_DIR);
560}

◆ TEST_F() [71/229]

tesseract::TEST_F ( LoadLang  ,
engFast   
)

Definition at line 552 of file loadlang_test.cc.

552 {
553 LangLoader("eng", TESSDATA_DIR "_fast");
554}

◆ TEST_F() [72/229]

tesseract::TEST_F ( LoadLang  ,
kmrBest   
)

Definition at line 566 of file loadlang_test.cc.

566 {
567 LangLoader("kmr", TESSDATA_DIR "_best");
568}

◆ TEST_F() [73/229]

tesseract::TEST_F ( LoadLang  ,
kmrFast   
)

Definition at line 563 of file loadlang_test.cc.

563 {
564 LangLoader("kmr", TESSDATA_DIR "_fast");
565}

◆ TEST_F() [74/229]

tesseract::TEST_F ( LSTMTrainerTest  ,
BasicTest   
)

Definition at line 29 of file lstm_test.cc.

29 {
30 // A Convolver sliding window classifier without LSTM.
31 SetupTrainer(
32 "[1,32,0,1 Ct5,5,16 Mp4,4 Ct1,1,16 Ct3,3,128 Mp4,1 Ct1,1,64 S2,1 "
33 "Ct1,1,64O1c1]",
34 "no-lstm", "eng/eng.unicharset", "eng.Arial.exp0.lstmf", false, false, 2e-4, false, "eng");
35 double non_lstm_err = TrainIterations(kTrainerIterations * 4);
36 EXPECT_LT(non_lstm_err, 98);
37 LOG(INFO) << "********** Expected < 98 ************\n";
38
39 // A basic single-layer, single direction LSTM.
40 SetupTrainerEng("[1,1,0,32 Lfx100 O1c1]", "1D-lstm", false, false);
41 double lstm_uni_err = TrainIterations(kTrainerIterations * 2);
42 EXPECT_LT(lstm_uni_err, 86);
43 LOG(INFO) << "********** Expected < 86 ************\n";
44 // Beats the convolver. (Although it does have a lot more weights, it still
45 // iterates faster.)
46 EXPECT_LT(lstm_uni_err, non_lstm_err);
47}
#define EXPECT_LT(val1, val2)
Definition: gtest.h:2049
const int kTrainerIterations
Definition: lstm_test.h:34

◆ TEST_F() [75/229]

tesseract::TEST_F ( LSTMTrainerTest  ,
BidiTest   
)

Definition at line 59 of file lstm_test.cc.

59 {
60 // A basic single-layer, bi-di 1d LSTM.
61 SetupTrainerEng("[1,1,0,32 Lbx100 O1c1]", "bidi-lstm", false, false);
62 double lstm_bi_err = TrainIterations(kTrainerIterations);
63 EXPECT_LT(lstm_bi_err, 75);
64 LOG(INFO) << "********** Expected < 75 ************\n";
65 // Int mode training is dead, so convert the trained network to int and check
66 // that its error rate is close to the float version.
67 TestIntMode(kTrainerIterations);
68}

◆ TEST_F() [76/229]

tesseract::TEST_F ( LSTMTrainerTest  ,
ColorTest   
)

Definition at line 50 of file lstm_test.cc.

50 {
51 // A basic single-layer, single direction LSTM.
52 SetupTrainerEng("[1,32,0,3 S4,2 L2xy16 Ct1,1,16 S8,1 Lbx100 O1c1]", "2D-color-lstm", true, true);
53 double lstm_uni_err = TrainIterations(kTrainerIterations);
54 EXPECT_LT(lstm_uni_err, 85);
55 // EXPECT_GT(lstm_uni_err, 66);
56 LOG(INFO) << "********** Expected < 85 ************\n";
57}

◆ TEST_F() [77/229]

tesseract::TEST_F ( LSTMTrainerTest  ,
ConvertModel   
)

Definition at line 67 of file lstmtrainer_test.cc.

67 {
68 // Setup a trainer with a deu charset.
69 LSTMTrainer deu_trainer;
70 deu_trainer.InitCharSet(TestDataNameToPath("deu/deu.traineddata"));
71 // Load the fra traineddata, strip out the model, and save to a tmp file.
73 std::string fra_data = file::JoinPath(TESSDATA_DIR "_best", "fra.traineddata");
74 CHECK(mgr.Init(fra_data.c_str()));
75 LOG(INFO) << "Load " << fra_data << "\n";
77 std::string model_path = file::JoinPath(FLAGS_test_tmpdir, "fra.lstm");
78 CHECK(mgr.ExtractToFile(model_path.c_str()));
79 LOG(INFO) << "Extract " << model_path << "\n";
80 // Load the fra model into the deu_trainer, and save the converted model.
81 CHECK(deu_trainer.TryLoadingCheckpoint(model_path.c_str(), fra_data.c_str()));
82 LOG(INFO) << "Checkpoint load for " << model_path << " and " << fra_data << "\n";
83 std::string deu_data = file::JoinPath(FLAGS_test_tmpdir, "deu.traineddata");
84 CHECK(deu_trainer.SaveTraineddata(deu_data.c_str()));
85 LOG(INFO) << "Save " << deu_data << "\n";
86 // Now run the saved model on phototest. (See BasicTesseractTest in
87 // baseapi_test.cc).
88 TessBaseAPI api;
89 api.Init(FLAGS_test_tmpdir, "deu", tesseract::OEM_LSTM_ONLY);
90 Image src_pix = pixRead(TestingNameToPath("phototest.tif").c_str());
91 CHECK(src_pix);
92 api.SetImage(src_pix);
93 std::unique_ptr<char[]> result(api.GetUTF8Text());
94 std::string truth_text;
96 file::GetContents(TestingNameToPath("phototest.gold.txt"), &truth_text, file::Defaults()));
97
98 EXPECT_STREQ(truth_text.c_str(), result.get());
99 src_pix.destroy();
100}
#define CHECK_OK(test)
Definition: include_gunit.h:84
bool ExtractToFile(const char *filename)
bool Init(const char *data_file_name)
bool SaveTraineddata(const char *filename)
bool TryLoadingCheckpoint(const char *filename, const char *old_traineddata)
static int Defaults()
Definition: include_gunit.h:61
static bool GetContents(const std::string &filename, std::string *out, int)
Definition: include_gunit.h:52

◆ TEST_F() [78/229]

tesseract::TEST_F ( LSTMTrainerTest  ,
DeterminismTest   
)

Definition at line 109 of file lstm_test.cc.

109 {
110 SetupTrainerEng("[1,32,0,1 S4,2 L2xy16 Ct1,1,16 S8,1 Lbx100 O1c1]", "2-D-2-layer-lstm", false,
111 false);
112 double lstm_2d_err_a = TrainIterations(kTrainerIterations);
113 double act_error_a = trainer_->ActivationError();
114 double char_error_a = trainer_->CharError();
115 std::vector<char> trainer_a_data;
116 EXPECT_TRUE(trainer_->SaveTrainingDump(NO_BEST_TRAINER, *trainer_, &trainer_a_data));
117 SetupTrainerEng("[1,32,0,1 S4,2 L2xy16 Ct1,1,16 S8,1 Lbx100 O1c1]", "2-D-2-layer-lstm", false,
118 false);
119 double lstm_2d_err_b = TrainIterations(kTrainerIterations);
120 double act_error_b = trainer_->ActivationError();
121 double char_error_b = trainer_->CharError();
122 EXPECT_FLOAT_EQ(lstm_2d_err_a, lstm_2d_err_b);
123 EXPECT_FLOAT_EQ(act_error_a, act_error_b);
124 EXPECT_FLOAT_EQ(char_error_a, char_error_b);
125 // Now train some more iterations.
126 lstm_2d_err_b = TrainIterations(kTrainerIterations / 3);
127 act_error_b = trainer_->ActivationError();
128 char_error_b = trainer_->CharError();
129 // Unpack into a new trainer and train that some more too.
130 SetupTrainerEng("[1,32,0,1 S4,2 L2xy16 Ct1,1,16 S8,1 Lbx100 O1c1]", "2-D-2-layer-lstm", false,
131 false);
132 EXPECT_TRUE(trainer_->ReadTrainingDump(trainer_a_data, *trainer_));
133 lstm_2d_err_a = TrainIterations(kTrainerIterations / 3);
134 act_error_a = trainer_->ActivationError();
135 char_error_a = trainer_->CharError();
136 EXPECT_FLOAT_EQ(lstm_2d_err_a, lstm_2d_err_b);
137 EXPECT_FLOAT_EQ(act_error_a, act_error_b);
138 EXPECT_FLOAT_EQ(char_error_a, char_error_b);
139 LOG(INFO) << "********** *** ************\n";
140}
#define EXPECT_FLOAT_EQ(val1, val2)
Definition: gtest.h:2144

◆ TEST_F() [79/229]

tesseract::TEST_F ( LSTMTrainerTest  ,
EncodeDecodeBothTestKor   
)

Definition at line 41 of file lstm_recode_test.cc.

41 {
42 TestEncodeDecodeBoth("kor", "한국어 위키백과에 오신 것을 환영합니다!");
43}

◆ TEST_F() [80/229]

tesseract::TEST_F ( LSTMTrainerTest  ,
EncodedSoftmaxTest   
)

Definition at line 175 of file lstm_test.cc.

175 {
176 // LSTM with a built-in encoded softmax can beat the external softmax.
177 SetupTrainerEng("[1,1,0,32 LE96]", "Lstm-+-softmax", false, true);
178 double lstm_sm_err = TrainIterations(kTrainerIterations * 2);
179 EXPECT_LT(lstm_sm_err, 62.0);
180 LOG(INFO) << "********** Expected < 62 ************\n";
181 // Check that it works in int mode too.
182 TestIntMode(kTrainerIterations);
183}

◆ TEST_F() [81/229]

tesseract::TEST_F ( LSTMTrainerTest  ,
EncodesEng   
)

Definition at line 18 of file lstmtrainer_test.cc.

18 {
19 TestEncodeDecodeBoth("eng", "The quick brown 'fox' jumps over: the lazy dog!");
20}

◆ TEST_F() [82/229]

tesseract::TEST_F ( LSTMTrainerTest  ,
EncodesKan   
)

Definition at line 22 of file lstmtrainer_test.cc.

22 {
23 TestEncodeDecodeBoth("kan", "ಫ್ರಬ್ರವರಿ ತತ್ವಾಂಶಗಳೆಂದರೆ ಮತ್ತು ಜೊತೆಗೆ ಕ್ರಮವನ್ನು");
24}

◆ TEST_F() [83/229]

tesseract::TEST_F ( LSTMTrainerTest  ,
EncodesKor   
)

Definition at line 26 of file lstmtrainer_test.cc.

26 {
27 TestEncodeDecodeBoth("kor", "이는 것으로 다시 넣을 수는 있지만 선택의 의미는");
28}

◆ TEST_F() [84/229]

tesseract::TEST_F ( LSTMTrainerTest  ,
MapCoder   
)

Definition at line 30 of file lstmtrainer_test.cc.

30 {
31 LSTMTrainer fra_trainer;
32 fra_trainer.InitCharSet(TestDataNameToPath("fra/fra.traineddata"));
33 LSTMTrainer deu_trainer;
34 deu_trainer.InitCharSet(TestDataNameToPath("deu/deu.traineddata"));
35 // A string that uses characters common to French and German.
36 std::string kTestStr = "The quick brown 'fox' jumps over: the lazy dog!";
37 std::vector<int> deu_labels;
38 EXPECT_TRUE(deu_trainer.EncodeString(kTestStr.c_str(), &deu_labels));
39 // The french trainer cannot decode them correctly.
40 std::string badly_decoded = fra_trainer.DecodeLabels(deu_labels);
41 std::string bad_str(&badly_decoded[0], badly_decoded.length());
42 LOG(INFO) << "bad_str fra=" << bad_str << "\n";
43 EXPECT_NE(kTestStr, bad_str);
44 // Encode the string as fra.
45 std::vector<int> fra_labels;
46 EXPECT_TRUE(fra_trainer.EncodeString(kTestStr.c_str(), &fra_labels));
47 // Use the mapper to compute what the labels are as deu.
48 std::vector<int> mapping =
49 fra_trainer.MapRecoder(deu_trainer.GetUnicharset(), deu_trainer.GetRecoder());
50 std::vector<int> mapped_fra_labels(fra_labels.size(), -1);
51 for (unsigned i = 0; i < fra_labels.size(); ++i) {
52 mapped_fra_labels[i] = mapping[fra_labels[i]];
53 EXPECT_NE(-1, mapped_fra_labels[i]) << "i=" << i << ", ch=" << kTestStr[i];
54 EXPECT_EQ(mapped_fra_labels[i], deu_labels[i])
55 << "i=" << i << ", ch=" << kTestStr[i] << " has deu label=" << deu_labels[i]
56 << ", but mapped to " << mapped_fra_labels[i];
57 }
58 // The german trainer can now decode them correctly.
59 std::string decoded = deu_trainer.DecodeLabels(mapped_fra_labels);
60 std::string ok_str(&decoded[0], decoded.length());
61 LOG(INFO) << "ok_str deu=" << ok_str << "\n";
62 EXPECT_EQ(kTestStr, ok_str);
63}
const UnicharCompress & GetRecoder() const
const UNICHARSET & GetUnicharset() const
std::vector< int > MapRecoder(const UNICHARSET &old_chset, const UnicharCompress &old_recoder) const

◆ TEST_F() [85/229]

tesseract::TEST_F ( LSTMTrainerTest  ,
RecodeTestKor   
)

Definition at line 29 of file lstm_recode_test.cc.

29 {
30 // A basic single-layer, bi-di 1d LSTM on Korean.
31 SetupTrainer("[1,1,0,32 Lbx96 O1c1]", "kor-recode", "kor/kor.unicharset",
32 "kor.Arial_Unicode_MS.exp0.lstmf", true, true, 5e-4, false, "kor");
33 double kor_recode_err = TrainIterations(kTrainerIterations);
34 EXPECT_LT(kor_recode_err, 60);
35 LOG(INFO) << "********** Expected < 60 ************\n";
36}

◆ TEST_F() [86/229]

tesseract::TEST_F ( LSTMTrainerTest  ,
RecodeTestKorBase   
)

Definition at line 19 of file lstm_recode_test.cc.

19 {
20 // A basic single-layer, bi-di 1d LSTM on Korean.
21 SetupTrainer("[1,1,0,32 Lbx96 O1c1]", "kor-full", "kor/kor.unicharset",
22 "kor.Arial_Unicode_MS.exp0.lstmf", false, true, 5e-4, false, "kor");
23 double kor_full_err = TrainIterations(kTrainerIterations * 2);
24 EXPECT_LT(kor_full_err, 88);
25 // EXPECT_GT(kor_full_err, 85);
26 LOG(INFO) << "********** Expected < 88 ************\n";
27}

◆ TEST_F() [87/229]

tesseract::TEST_F ( LSTMTrainerTest  ,
SoftmaxBaselineTest   
)

Definition at line 143 of file lstm_test.cc.

143 {
144 // A basic single-layer, single direction LSTM.
145 SetupTrainerEng("[1,1,0,32 Lfx96 O1c1]", "1D-lstm", false, true);
146 double lstm_uni_err = TrainIterations(kTrainerIterations * 2);
147 EXPECT_LT(lstm_uni_err, 60);
148 // EXPECT_GT(lstm_uni_err, 48);
149 LOG(INFO) << "********** Expected < 60 ************\n";
150 // Check that it works in int mode too.
151 TestIntMode(kTrainerIterations);
152 // If we run TestIntMode again, it tests that int_mode networks can
153 // serialize and deserialize correctly.
154 double delta = TestIntMode(kTrainerIterations);
155 // The two tests (both of int mode this time) should be almost identical.
156 LOG(INFO) << "Delta in Int mode error rates = " << delta << "\n";
157 EXPECT_LT(delta, 0.01);
158}

◆ TEST_F() [88/229]

tesseract::TEST_F ( LSTMTrainerTest  ,
SoftmaxTest   
)

Definition at line 163 of file lstm_test.cc.

163 {
164 // LSTM with a built-in softmax can beat the external softmax.
165 SetupTrainerEng("[1,1,0,32 LS96]", "Lstm-+-softmax", false, true);
166 double lstm_sm_err = TrainIterations(kTrainerIterations * 2);
167 EXPECT_LT(lstm_sm_err, 49.0);
168 LOG(INFO) << "********** Expected < 49 ************\n";
169 // Check that it works in int mode too.
170 TestIntMode(kTrainerIterations);
171}

◆ TEST_F() [89/229]

tesseract::TEST_F ( LSTMTrainerTest  ,
SpeedTest   
)

Definition at line 98 of file lstm_test.cc.

98 {
99 SetupTrainerEng(
100 "[1,30,0,1 Ct5,5,16 Mp2,2 L2xy24 Ct1,1,48 Mp5,1 Ct1,1,32 S3,1 Lbx64 "
101 "O1c1]",
102 "2-D-2-layer-lstm", false, true);
103 TrainIterations(kTrainerIterations);
104 LOG(INFO) << "********** *** ************\n";
105}

◆ TEST_F() [90/229]

tesseract::TEST_F ( LSTMTrainerTest  ,
Test2D   
)

Definition at line 72 of file lstm_test.cc.

72 {
73 // A 2-layer LSTM with a 2-D feature-extracting LSTM on the bottom.
74 SetupTrainerEng("[1,32,0,1 S4,2 L2xy16 Ct1,1,16 S8,1 Lbx100 O1c1]", "2-D-2-layer-lstm", false,
75 false);
76 double lstm_2d_err = TrainIterations(kTrainerIterations * 3 / 2);
77 EXPECT_LT(lstm_2d_err, 98);
78 // EXPECT_GT(lstm_2d_err, 90);
79 LOG(INFO) << "********** Expected < 98 ************\n";
80 // Int mode training is dead, so convert the trained network to int and check
81 // that its error rate is close to the float version.
82 TestIntMode(kTrainerIterations);
83}

◆ TEST_F() [91/229]

tesseract::TEST_F ( LSTMTrainerTest  ,
TestAdam   
)

Definition at line 87 of file lstm_test.cc.

87 {
88 // A 2-layer LSTM with a 2-D feature-extracting LSTM on the bottom.
89 SetupTrainerEng("[1,32,0,1 S4,2 L2xy16 Ct1,1,16 S8,1 Lbx100 O1c1]", "2-D-2-layer-lstm", false,
90 true);
91 double lstm_2d_err = TrainIterations(kTrainerIterations);
92 EXPECT_LT(lstm_2d_err, 70);
93 LOG(INFO) << "********** Expected < 70 ************\n";
94 TestIntMode(kTrainerIterations);
95}

◆ TEST_F() [92/229]

tesseract::TEST_F ( LSTMTrainerTest  ,
TestLayerAccess   
)

Definition at line 186 of file lstm_test.cc.

186 {
187 // A 2-layer LSTM with a Squashed feature-extracting LSTM on the bottom.
188 SetupTrainerEng("[1,32,0,1 Ct5,5,16 Mp2,2 Lfys32 Lbx128 O1c1]", "SQU-lstm", false, false);
189 // Number of layers.
190 const size_t kNumLayers = 8;
191 // Expected layer names.
192 const char *kLayerIds[kNumLayers] = {":0", ":1:0", ":1:1", ":2", ":3:0", ":4:0", ":4:1:0", ":5"};
193 const char *kLayerNames[kNumLayers] = {"Input", "Convolve", "ConvNL", "Maxpool",
194 "Lfys32", "Lbx128LTR", "Lbx128", "Output"};
195 // Expected number of weights.
196 const int kNumWeights[kNumLayers] = {0,
197 0,
198 16 * (25 + 1),
199 0,
200 32 * (4 * (32 + 16 + 1)),
201 128 * (4 * (128 + 32 + 1)),
202 128 * (4 * (128 + 32 + 1)),
203 112 * (2 * 128 + 1)};
204
205 auto layers = trainer_->EnumerateLayers();
206 EXPECT_EQ(kNumLayers, layers.size());
207 for (unsigned i = 0; i < kNumLayers && i < layers.size(); ++i) {
208 EXPECT_STREQ(kLayerIds[i], layers[i].c_str());
209 EXPECT_STREQ(kLayerNames[i], trainer_->GetLayer(layers[i])->name().c_str());
210 EXPECT_EQ(kNumWeights[i], trainer_->GetLayer(layers[i])->num_weights());
211 }
212}

◆ TEST_F() [93/229]

tesseract::TEST_F ( LSTMTrainerTest  ,
TestSquashed   
)

Definition at line 18 of file lstm_squashed_test.cc.

18 {
19 // A 2-layer LSTM with a Squashed feature-extracting LSTM on the bottom, and
20 // a small convolution/maxpool below that.
21 // Match training conditions to those typically used with this spec:
22 // recoding on, adam on.
23 SetupTrainerEng("[1,32,0,1 Ct3,3,16 Mp3,3 Lfys48 Lbx96 O1c1]", "SQU-2-layer-lstm",
24 /*recode*/ true, /*adam*/ true);
25 double lstm_2d_err = TrainIterations(kTrainerIterations * 3 / 2);
26 EXPECT_LT(lstm_2d_err, 80);
27 LOG(INFO) << "********** < 80 ************\n";
28 TestIntMode(kTrainerIterations);
29}

◆ TEST_F() [94/229]

tesseract::TEST_F ( MatrixTest  ,
RotatingTranspose_0_2   
)

Definition at line 120 of file matrix_test.cc.

120 {
122 src_.RotatingTranspose(dims_, kNumDims_, 0, 2, &m);
123 m.ResizeNoInit(kInputSize_ / 2, 2);
124 // Verify that the result is:
125 // output tensor=[[[[0, 1][24, 25][48, 49][72, 73][96, 97]]
126 // [[2, 3][26, 27][50, 51][74, 75][98, 99]]
127 // [[4, 5][28, 29][52, 53][76, 77][100, 101]]]
128 // [[[6, 7]...
129 EXPECT_EQ(0, m(0, 0));
130 EXPECT_EQ(1, m(0, 1));
131 EXPECT_EQ(24, m(1, 0));
132 EXPECT_EQ(25, m(1, 1));
133 EXPECT_EQ(96, m(4, 0));
134 EXPECT_EQ(97, m(4, 1));
135 EXPECT_EQ(2, m(5, 0));
136 EXPECT_EQ(6, m(15, 0));
137}
void ResizeNoInit(int size1, int size2, int pad=0)
Definition: matrix.h:94
void RotatingTranspose(const int *dims, int num_dims, int src_dim, int dest_dim, GENERIC_2D_ARRAY< T > *result) const
Definition: matrix.h:468

◆ TEST_F() [95/229]

tesseract::TEST_F ( MatrixTest  ,
RotatingTranspose_1_3   
)

Definition at line 99 of file matrix_test.cc.

99 {
101 src_.RotatingTranspose(dims_, kNumDims_, 1, 3, &m);
102 m.ResizeNoInit(kInputSize_ / 4, 4);
103 // Verify that the result is:
104 // output tensor=[[[[0, 6, 12, 18][1, 7, 13, 19]]
105 // [[2, 8, 14, 20][3, 9, 15, 21]]
106 // [[4, 10, 16, 22][5, 11, 17, 23]]]
107 // [[[24, 30, 36, 42]...
108 EXPECT_EQ(0, m(0, 0));
109 EXPECT_EQ(6, m(0, 1));
110 EXPECT_EQ(1, m(1, 0));
111 EXPECT_EQ(2, m(2, 0));
112 EXPECT_EQ(3, m(3, 0));
113 EXPECT_EQ(4, m(4, 0));
114 EXPECT_EQ(5, m(5, 0));
115 EXPECT_EQ(24, m(6, 0));
116 EXPECT_EQ(30, m(6, 1));
117}

◆ TEST_F() [96/229]

tesseract::TEST_F ( MatrixTest  ,
RotatingTranspose_2_0   
)

Definition at line 77 of file matrix_test.cc.

77 {
79 src_.RotatingTranspose(dims_, kNumDims_, 2, 0, &m);
80 m.ResizeNoInit(kInputSize_ / 2, 2);
81 // Verify that the result is:
82 // output tensor=[[[[0, 1][6, 7][12, 13][18, 19]]
83 // [[24, 25][30, 31][36, 37][42, 43]]
84 // [[48, 49][54, 55][60, 61][66, 67]]
85 // [[72, 73][78, 79][84, 85][90, 91]]
86 // [[96, 97][102, 103][108, 109][114, 115]]]
87 // [[[2,3]...
88 EXPECT_EQ(0, m(0, 0));
89 EXPECT_EQ(1, m(0, 1));
90 EXPECT_EQ(6, m(1, 0));
91 EXPECT_EQ(7, m(1, 1));
92 EXPECT_EQ(24, m(4, 0));
93 EXPECT_EQ(25, m(4, 1));
94 EXPECT_EQ(30, m(5, 0));
95 EXPECT_EQ(2, m(20, 0));
96}

◆ TEST_F() [97/229]

tesseract::TEST_F ( MatrixTest  ,
RotatingTranspose_3_1   
)

Definition at line 58 of file matrix_test.cc.

58 {
60 src_.RotatingTranspose(dims_, kNumDims_, 3, 1, &m);
61 m.ResizeNoInit(kInputSize_ / 3, 3);
62 // Verify that the result is:
63 // output tensor=[[[[0, 2, 4][6, 8, 10][12, 14, 16][18, 20, 22]]
64 // [[1, 3, 5][7, 9, 11][13, 15, 17][19, 21, 23]]]
65 // [[[24, 26, 28]...
66 EXPECT_EQ(0, m(0, 0));
67 EXPECT_EQ(2, m(0, 1));
68 EXPECT_EQ(4, m(0, 2));
69 EXPECT_EQ(6, m(1, 0));
70 EXPECT_EQ(1, m(4, 0));
71 EXPECT_EQ(24, m(8, 0));
72 EXPECT_EQ(26, m(8, 1));
73 EXPECT_EQ(25, m(12, 0));
74}

◆ TEST_F() [98/229]

tesseract::TEST_F ( NetworkioTest  ,
CopyWithXReversal   
)

Definition at line 142 of file networkio_test.cc.

142 {
143#ifdef INCLUDE_TENSORFLOW
144 NetworkIO nio;
145 SetupNetworkIO(&nio);
146 NetworkIO copy;
147 copy.CopyWithXReversal(nio);
148 StrideMap::Index index(copy.stride_map());
149 int next_t = 0;
150 int pos = 0;
151 std::vector<int> expected_values = {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9,
152 8, 16, 15, 14, 13, 12, 21, 20, 19, 18, 17,
153 26, 25, 24, 23, 22, 31, 30, 29, 28, 27};
154 do {
155 int t = index.t();
156 // The indexed values match the expected values.
157 int value = copy.i(t)[0];
158 EXPECT_EQ(value, expected_values[pos]);
159 value = copy.i(t)[1];
160 EXPECT_EQ(value, -expected_values[pos]);
161 // When we skip t values, the data is always 0.
162 while (next_t < t) {
163 EXPECT_EQ(copy.i(next_t)[0], 0) << "Failure t = " << next_t;
164 EXPECT_EQ(copy.i(next_t)[1], 0) << "Failure t = " << next_t;
165 ++next_t;
166 }
167 ++pos;
168 ++next_t;
169 } while (index.Increment());
170 EXPECT_EQ(pos, 32);
171 EXPECT_EQ(next_t, 40);
172#else
173 LOG(INFO) << "Skip test because of missing xla::Array2D";
174 GTEST_SKIP();
175#endif
176}
void CopyWithXReversal(const NetworkIO &src)
Definition: networkio.cpp:888
const StrideMap & stride_map() const
Definition: networkio.h:128
const int8_t * i(int t) const
Definition: networkio.h:118

◆ TEST_F() [99/229]

tesseract::TEST_F ( NetworkioTest  ,
CopyWithXYTranspose   
)

Definition at line 179 of file networkio_test.cc.

179 {
180#ifdef INCLUDE_TENSORFLOW
181 NetworkIO nio;
182 SetupNetworkIO(&nio);
183 NetworkIO copy;
184 copy.CopyWithXYTranspose(nio);
185 StrideMap::Index index(copy.stride_map());
186 int next_t = 0;
187 int pos = 0;
188 std::vector<int> expected_values = {0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7,
189 11, 12, 17, 22, 27, 13, 18, 23, 28, 14, 19,
190 24, 29, 15, 20, 25, 30, 16, 21, 26, 31};
191 do {
192 int t = index.t();
193 // The indexed values match the expected values.
194 int value = copy.i(t)[0];
195 EXPECT_EQ(value, expected_values[pos]);
196 value = copy.i(t)[1];
197 EXPECT_EQ(value, -expected_values[pos]);
198 // When we skip t values, the data is always 0.
199 while (next_t < t) {
200 EXPECT_EQ(copy.i(next_t)[0], 0);
201 EXPECT_EQ(copy.i(next_t)[1], 0);
202 ++next_t;
203 }
204 ++pos;
205 ++next_t;
206 } while (index.Increment());
207 EXPECT_EQ(pos, 32);
208 EXPECT_EQ(next_t, 40);
209#else
210 LOG(INFO) << "Skip test because of missing xla::Array2D";
211 GTEST_SKIP();
212#endif
213}
void CopyWithXYTranspose(const NetworkIO &src)
Definition: networkio.cpp:906

◆ TEST_F() [100/229]

tesseract::TEST_F ( NetworkioTest  ,
CopyWithYReversal   
)

Definition at line 105 of file networkio_test.cc.

105 {
106#ifdef INCLUDE_TENSORFLOW
107 NetworkIO nio;
108 SetupNetworkIO(&nio);
109 NetworkIO copy;
110 copy.CopyWithYReversal(nio);
111 StrideMap::Index index(copy.stride_map());
112 int next_t = 0;
113 int pos = 0;
114 std::vector<int> expected_values = {8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2,
115 3, 27, 28, 29, 30, 31, 22, 23, 24, 25, 26,
116 17, 18, 19, 20, 21, 12, 13, 14, 15, 16};
117 do {
118 int t = index.t();
119 // The indexed values match the expected values.
120 int value = copy.i(t)[0];
121 EXPECT_EQ(value, expected_values[pos]);
122 value = copy.i(t)[1];
123 EXPECT_EQ(value, -expected_values[pos]);
124 // When we skip t values, the data is always 0.
125 while (next_t < t) {
126 EXPECT_EQ(copy.i(next_t)[0], 0) << "Failure t = " << next_t;
127 EXPECT_EQ(copy.i(next_t)[1], 0) << "Failure t = " << next_t;
128 ++next_t;
129 }
130 ++pos;
131 ++next_t;
132 } while (index.Increment());
133 EXPECT_EQ(pos, 32);
134 EXPECT_EQ(next_t, 40);
135#else
136 LOG(INFO) << "Skip test because of missing xla::Array2D";
137 GTEST_SKIP();
138#endif
139}
void CopyWithYReversal(const NetworkIO &src)
Definition: networkio.cpp:868

◆ TEST_F() [101/229]

tesseract::TEST_F ( NetworkioTest  ,
InitWithZeroFill   
)

Definition at line 65 of file networkio_test.cc.

65 {
66#ifdef INCLUDE_TENSORFLOW
67 NetworkIO nio;
68 nio.Resize2d(true, 32, 2);
69 int width = nio.Width();
70 for (int t = 0; t < width; ++t) {
71 nio.SetPixel(t, 0, 0, 0.0f, 128.0f);
72 nio.SetPixel(t, 1, 0, 0.0f, 128.0f);
73 }
74 // The initialization will wipe out all previously set values.
75 SetupNetworkIO(&nio);
77 StrideMap::Index index(nio.stride_map());
78 int next_t = 0;
79 int pos = 0;
80 do {
81 int t = index.t();
82 // The indexed values just increase monotonically.
83 int value = nio.i(t)[0];
84 EXPECT_EQ(value, pos);
85 value = nio.i(t)[1];
86 EXPECT_EQ(value, -pos);
87 // When we skip t values, the data is always 0.
88 while (next_t < t) {
89 EXPECT_EQ(nio.i(next_t)[0], 0);
90 EXPECT_EQ(nio.i(next_t)[1], 0);
91 ++next_t;
92 }
93 ++pos;
94 ++next_t;
95 } while (index.Increment());
96 EXPECT_EQ(pos, 32);
97 EXPECT_EQ(next_t, 40);
98#else
99 LOG(INFO) << "Skip test because of missing xla::Array2D";
100 GTEST_SKIP();
101#endif
102}
void ZeroInvalidElements()
Definition: networkio.cpp:86
int Width() const
Definition: networkio.h:102
void Resize2d(bool int_mode, int width, int num_features)
Definition: networkio.cpp:35
void SetPixel(int t, int f, int pixel, float black, float contrast)
Definition: networkio.cpp:290

◆ TEST_F() [102/229]

tesseract::TEST_F ( NthItemTest  ,
BoringTest   
)

Definition at line 61 of file nthitem_test.cc.

61 {
62 KDVector v;
63 // Push the test data onto the KDVector.
64 int test_data[] = {8, 8, 8, 8, 8, 7, 7, 7, 7};
65 for (size_t i = 0; i < countof(test_data); ++i) {
66 IntKDPair pair(test_data[i], i);
67 v.push_back(pair);
68 }
69 // The 3rd item is 7 but the 4th is 8..
70 size_t index = 3;
71 std::nth_element(v.begin(), v.begin() + index, v.end());
72 // The result is 7.
73 EXPECT_EQ(7, v[index].key());
74 index = 4;
75 std::nth_element(v.begin(), v.begin() + index, v.end());
76 // The result is 8.
77 EXPECT_EQ(8, v[index].key());
78 // Get the min item.
79 index = 0;
80 std::nth_element(v.begin(), v.begin() + index, v.end());
81 // The result is 7.
82 EXPECT_EQ(7, v[index].key());
83 // Get the max item.
84 index = v.size() - 1;
85 std::nth_element(v.begin(), v.begin() + index, v.end());
86 // The result is 8.
87 EXPECT_EQ(8, v[index].key());
88}
KDPairInc< int, int > IntKDPair
Definition: kdpair.h:191

◆ TEST_F() [103/229]

tesseract::TEST_F ( NthItemTest  ,
EqualTest   
)

Definition at line 104 of file nthitem_test.cc.

104 {
105 KDVector v;
106 // Push the test data onto the KDVector.
107 PushTestData(&v);
108 // Add an extra 8. This makes the median 7.
109 IntKDPair pair(8, 13);
110 v.push_back(pair);
111 // Get the median item.
112 size_t index = v.size() / 2;
113 std::nth_element(v.begin(), v.begin() + index, v.end());
114 // The result is 7, it started out at index 4 or 12.
115 EXPECT_EQ(7, v[index].key());
116 EXPECT_TRUE(v[index].data() == 4 || v[index].data() == 12);
117}

◆ TEST_F() [104/229]

tesseract::TEST_F ( NthItemTest  ,
GeneralTest   
)

Definition at line 44 of file nthitem_test.cc.

44 {
45 KDVector v;
46 // Push the test data onto the KDVector.
47 PushTestData(&v);
48 // Get the min item.
49 size_t index = 0;
50 std::nth_element(v.begin(), v.begin() + index, v.end());
51 // The result is -32767.
52 EXPECT_EQ(-32767, v[index].key());
53 // Get the max item.
54 index = v.size() - 1;
55 std::nth_element(v.begin(), v.begin() + index, v.end());
56 // The result is 65536.
57 EXPECT_EQ(65536, v[index].key());
58}

◆ TEST_F() [105/229]

tesseract::TEST_F ( NthItemTest  ,
UniqueTest   
)

Definition at line 91 of file nthitem_test.cc.

91 {
92 KDVector v;
93 // Push the test data onto the KDVector.
94 PushTestData(&v);
95 // Get the median item.
96 size_t index = v.size() / 2;
97 std::nth_element(v.begin(), v.begin() + index, v.end());
98 // The result is 6, it started out at index 11.
99 EXPECT_EQ(6, v[index].key());
100 EXPECT_EQ(11, v[index].data());
101}

◆ TEST_F() [106/229]

tesseract::TEST_F ( PageSegModeTest  ,
WordTest   
)

Definition at line 87 of file pagesegmode_test.cc.

87 {
88 std::string filename = file::JoinPath(TESTING_DIR, "segmodeimg.tif");
89 if (!file_exists(filename.c_str())) {
90 LOG(INFO) << "Skip test because of missing " << filename << '\n';
91 GTEST_SKIP();
92 } else {
93 SetImage(filename.c_str());
94 // Test various rectangles around the inverse page number.
95 VerifyRectText(tesseract::PSM_SINGLE_WORD, "183", 1419, 264, 69, 34);
96 VerifyRectText(tesseract::PSM_SINGLE_WORD, "183", 1411, 252, 78, 62);
97 VerifyRectText(tesseract::PSM_SINGLE_WORD, "183", 1396, 218, 114, 102);
98 // Test a random pair of words as a line
99 VerifyRectText(tesseract::PSM_SINGLE_LINE, "What should", 237, 393, 256, 36);
100 #ifdef DISABLED_LEGACY_ENGINE
101 // Skip check as LSTM mode adds a space.
102 LOG(INFO) << "Skip `Whatshould` test in LSTM Mode\n";
103 #else
104 // Test a random pair of words as a word
105 VerifyRectText(tesseract::PSM_SINGLE_WORD, "Whatshould", 237, 393, 256, 36);
106 #endif
107 // Test single block mode.
108 VerifyRectText(tesseract::PSM_SINGLE_BLOCK, "both the\nfrom the", 237, 450, 172, 94);
109 // But doesn't work in line or word mode.
110 NotRectText(tesseract::PSM_SINGLE_LINE, "both the\nfrom the", 237, 450, 172, 94);
111 NotRectText(tesseract::PSM_SINGLE_WORD, "both the\nfrom the", 237, 450, 172, 94);
112 }
113}
@ PSM_SINGLE_WORD
Treat the image as a single word.
Definition: publictypes.h:168
@ PSM_SINGLE_BLOCK
Assume a single uniform block of text. (Default.)
Definition: publictypes.h:166
@ PSM_SINGLE_LINE
Treat the image as a single text line.
Definition: publictypes.h:167

◆ TEST_F() [107/229]

tesseract::TEST_F ( PangoFontInfoTest  ,
CanDropUncoveredChars   
)

Definition at line 154 of file pango_font_info_test.cc.

154 {
155 font_info_.ParseFontDescriptionName("Verdana 12");
156 // Verdana cannot render the "ff" ligature
157 std::string word = "office";
158 EXPECT_EQ(1, font_info_.DropUncoveredChars(&word));
159 EXPECT_EQ("oice", word);
160
161 // Don't drop non-letter characters like word joiners.
162 const char *kJoiners[] = {
163 "\u2060", // U+2060 (WJ)
164 "\u200C", // U+200C (ZWJ)
165 "\u200D" // U+200D (ZWNJ)
166 };
167 for (auto &kJoiner : kJoiners) {
168 word = kJoiner;
169 EXPECT_EQ(0, font_info_.DropUncoveredChars(&word));
170 EXPECT_STREQ(kJoiner, word.c_str());
171 }
172}

◆ TEST_F() [108/229]

tesseract::TEST_F ( PangoFontInfoTest  ,
CannotRenderInvalidString   
)

Definition at line 145 of file pango_font_info_test.cc.

145 {
146 font_info_.ParseFontDescriptionName("Lohit Hindi 12");
147 for (int i = 0; kBadlyFormedHinWords[i] != nullptr; ++i) {
149 font_info_.CanRenderString(kBadlyFormedHinWords[i], strlen(kBadlyFormedHinWords[i])))
150 << "Can render " << kBadlyFormedHinWords[i];
151 }
152}

◆ TEST_F() [109/229]

tesseract::TEST_F ( PangoFontInfoTest  ,
CannotRenderUncoveredString   
)

Definition at line 140 of file pango_font_info_test.cc.

140 {
141 font_info_.ParseFontDescriptionName("Verdana 12");
142 EXPECT_FALSE(font_info_.CanRenderString(kKorText, strlen(kKorText)));
143}

◆ TEST_F() [110/229]

tesseract::TEST_F ( PangoFontInfoTest  ,
CanRenderLigature   
)

Definition at line 131 of file pango_font_info_test.cc.

131 {
132 font_info_.ParseFontDescriptionName("Arab 12");
133 const char kArabicLigature[] = "لا";
134 EXPECT_TRUE(font_info_.CanRenderString(kArabicLigature, strlen(kArabicLigature)));
135
136 printf("Next word\n");
137 EXPECT_TRUE(font_info_.CanRenderString(kArabicText, strlen(kArabicText)));
138}

◆ TEST_F() [111/229]

tesseract::TEST_F ( PangoFontInfoTest  ,
CanRenderString   
)

Definition at line 120 of file pango_font_info_test.cc.

120 {
121 font_info_.ParseFontDescriptionName("Verdana 12");
122 EXPECT_TRUE(font_info_.CanRenderString(kEngText, strlen(kEngText)));
123
124 font_info_.ParseFontDescriptionName("UnBatang 12");
125 EXPECT_TRUE(font_info_.CanRenderString(kKorText, strlen(kKorText)));
126
127 font_info_.ParseFontDescriptionName("Lohit Hindi 12");
128 EXPECT_TRUE(font_info_.CanRenderString(kHinText, strlen(kHinText)));
129}

◆ TEST_F() [112/229]

tesseract::TEST_F ( PangoFontInfoTest  ,
DoesGetSpacingProperties   
)

Definition at line 111 of file pango_font_info_test.cc.

111 {
112 EXPECT_TRUE(font_info_.ParseFontDescriptionName("Arial Italic 12"));
113 int x_bearing, x_advance;
114 EXPECT_TRUE(font_info_.GetSpacingProperties("A", &x_bearing, &x_advance));
115 EXPECT_GT(x_advance, 0);
116 EXPECT_TRUE(font_info_.GetSpacingProperties("a", &x_bearing, &x_advance));
117 EXPECT_GT(x_advance, 0);
118}

◆ TEST_F() [113/229]

tesseract::TEST_F ( PangoFontInfoTest  ,
DoesParseFontDescriptionName   
)

Definition at line 84 of file pango_font_info_test.cc.

84 {
85 EXPECT_TRUE(font_info_.ParseFontDescriptionName("Arial Bold Italic 12"));
86 EXPECT_EQ(12, font_info_.font_size());
87 EXPECT_EQ("Arial", font_info_.family_name());
88
89 EXPECT_TRUE(font_info_.ParseFontDescriptionName("Verdana 10"));
90 EXPECT_EQ(10, font_info_.font_size());
91 EXPECT_EQ("Verdana", font_info_.family_name());
92
93 EXPECT_TRUE(font_info_.ParseFontDescriptionName("DejaVu Sans Ultra-Light"));
94 EXPECT_EQ("DejaVu Sans", font_info_.family_name());
95}

◆ TEST_F() [114/229]

tesseract::TEST_F ( PangoFontInfoTest  ,
DoesParseMissingFonts   
)

Definition at line 97 of file pango_font_info_test.cc.

97 {
98 // Font family one of whose faces exists but this one doesn't.
99 EXPECT_TRUE(font_info_.ParseFontDescriptionName("Arial Italic 12"));
100 EXPECT_EQ(12, font_info_.font_size());
101 EXPECT_EQ("Arial", font_info_.family_name());
102
103 // Font family that doesn't exist in testdata. It will still parse the
104 // description name. But without the file, it will not be able to populate
105 // some font family details, like is_monospace().
106 EXPECT_TRUE(font_info_.ParseFontDescriptionName("Georgia 10"));
107 EXPECT_EQ(10, font_info_.font_size());
108 EXPECT_EQ("Georgia", font_info_.family_name());
109}

◆ TEST_F() [115/229]

tesseract::TEST_F ( PangoFontInfoTest  ,
TestNonDefaultConstructor   
)

Definition at line 78 of file pango_font_info_test.cc.

78 {
79 PangoFontInfo font("Arial Bold Italic 12");
80 EXPECT_EQ(12, font.font_size());
81 EXPECT_EQ("Arial", font.family_name());
82}

◆ TEST_F() [116/229]

tesseract::TEST_F ( ParamsModelTest  ,
TestEngParamsModelIO   
)

Definition at line 66 of file params_model_test.cc.

66 {
67#ifdef DISABLED_LEGACY_ENGINE
68 // Skip test because ParamsModel::LoadFromFp is missing.
69 GTEST_SKIP();
70#else
71 TestParamsModelRoundTrip("eng.params_model");
72#endif
73}

◆ TEST_F() [117/229]

tesseract::TEST_F ( RecodeBeamTest  ,
DISABLED_ChiDictionary   
)

Definition at line 439 of file recodebeam_test.cc.

439 {
440 LOG(INFO) << "Testing zh_hans dictionary"
441 << "\n";
442 LoadUnicharset("zh_hans.unicharset");
444 GenerateSyntheticOutputs(kZHTops, kZHTopScores, kZH2nds, kZH2ndScores, nullptr);
446 ExpectCorrect(outputs, "实学储啬投学生", nullptr, &words);
447 // Each is an individual word, with permuter = top choice.
448 EXPECT_EQ(7, words.size());
449 for (int w = 0; w < words.size(); ++w) {
450 EXPECT_EQ(TOP_CHOICE_PERM, words[w]->best_choice->permuter());
451 }
452 // Now try again with the dictionary.
453 LoadDict("zh_hans");
454 ExpectCorrect(outputs, "实学储啬投学生", &lstm_dict_, &words);
455 // Number of words expected.
456 const int kNumWords = 5;
457 // Content of the words.
458 const char *kWords[kNumWords] = {"实学", "储", "啬", "投", "学生"};
459 // Permuters of the words.
460 const int kWordPerms[kNumWords] = {SYSTEM_DAWG_PERM, TOP_CHOICE_PERM, TOP_CHOICE_PERM,
462 EXPECT_EQ(kNumWords, words.size());
463 for (int w = 0; w < kNumWords && w < words.size(); ++w) {
464 EXPECT_STREQ(kWords[w], words[w]->best_choice->unichar_string().c_str());
465 EXPECT_EQ(kWordPerms[w], words[w]->best_choice->permuter());
466 }
467}
const float kZH2ndScores[]
const char * kZH2nds[]
const char * kZHTops[]
const float kZHTopScores[]

◆ TEST_F() [118/229]

tesseract::TEST_F ( RecodeBeamTest  ,
DISABLED_EngDictionary   
)

Definition at line 422 of file recodebeam_test.cc.

422 {
423 LOG(INFO) << "Testing eng dictionary"
424 << "\n";
425 LoadUnicharset("eng_beam.unicharset");
427 GenerateSyntheticOutputs(kGWRTops, kGWRTopScores, kGWR2nds, kGWR2ndScores, nullptr);
428 std::string default_str;
429 for (int i = 0; kGWRTops[i] != nullptr; ++i) {
430 default_str += kGWRTops[i];
431 }
432 PointerVector<WERD_RES> words;
433 ExpectCorrect(outputs, default_str, nullptr, &words);
434 // Now try again with the dictionary.
435 LoadDict("eng_beam");
436 ExpectCorrect(outputs, "Gets words right.", &lstm_dict_, &words);
437}
const float kGWR2ndScores[]
const char * kGWRTops[]
const float kGWRTopScores[]
const char * kGWR2nds[]

◆ TEST_F() [119/229]

tesseract::TEST_F ( RecodeBeamTest  ,
DISABLED_MultiCodeSequences   
)

Definition at line 471 of file recodebeam_test.cc.

471 {
472 LOG(INFO) << "Testing duplicates in multi-code sequences"
473 << "\n";
474 LoadUnicharset("vie.d.unicharset");
475 tesseract::SetupBasicProperties(false, true, &ccutil_.unicharset);
476 TRand random;
478 GenerateSyntheticOutputs(kViTops, kViTopScores, kVi2nds, kVi2ndScores, &random);
480 std::string truth_str;
482 tesseract::GraphemeNorm::kNone, "vậy tội", &truth_str);
483 ExpectCorrect(outputs, truth_str, nullptr, &words);
484}
const char * kVi2nds[]
const char * kViTops[]
const float kViTopScores[]
const float kVi2ndScores[]

◆ TEST_F() [120/229]

tesseract::TEST_F ( RecodeBeamTest  ,
DoesChinese   
)

Definition at line 334 of file recodebeam_test.cc.

334 {
335 LOG(INFO) << "Testing chi_tra"
336 << "\n";
337 LoadUnicharset("chi_tra.unicharset");
338 // Correctly reproduce the first kNumchars characters from easy output.
339 std::vector<int> transcription;
340 for (int i = SPECIAL_UNICHAR_CODES_COUNT; i < kNumChars; ++i) {
341 transcription.push_back(i);
342 }
343 GENERIC_2D_ARRAY<float> outputs = GenerateRandomPaddedOutputs(transcription, kPadding);
344 ExpectCorrect(outputs, transcription);
345 LOG(INFO) << "Testing chi_sim"
346 << "\n";
347 LoadUnicharset("chi_sim.unicharset");
348 // Correctly reproduce the first kNumchars characters from easy output.
349 transcription.clear();
350 for (int i = SPECIAL_UNICHAR_CODES_COUNT; i < kNumChars; ++i) {
351 transcription.push_back(i);
352 }
353 outputs = GenerateRandomPaddedOutputs(transcription, kPadding);
354 ExpectCorrect(outputs, transcription);
355}
const int kNumChars
const int kPadding

◆ TEST_F() [121/229]

tesseract::TEST_F ( RecodeBeamTest  ,
DoesEnglish   
)

Definition at line 409 of file recodebeam_test.cc.

409 {
410 LOG(INFO) << "Testing eng"
411 << "\n";
412 LoadUnicharset("eng.unicharset");
413 // Correctly reproduce the first kNumchars characters from easy output.
414 std::vector<int> transcription;
415 for (int i = SPECIAL_UNICHAR_CODES_COUNT; i < kNumChars; ++i) {
416 transcription.push_back(i);
417 }
418 GENERIC_2D_ARRAY<float> outputs = GenerateRandomPaddedOutputs(transcription, kPadding);
419 ExpectCorrect(outputs, transcription);
420}

◆ TEST_F() [122/229]

tesseract::TEST_F ( RecodeBeamTest  ,
DoesJapanese   
)

Definition at line 357 of file recodebeam_test.cc.

357 {
358 LOG(INFO) << "Testing jpn"
359 << "\n";
360 LoadUnicharset("jpn.unicharset");
361 // Correctly reproduce the first kNumchars characters from easy output.
362 std::vector<int> transcription;
363 for (int i = SPECIAL_UNICHAR_CODES_COUNT; i < kNumChars; ++i) {
364 transcription.push_back(i);
365 }
366 GENERIC_2D_ARRAY<float> outputs = GenerateRandomPaddedOutputs(transcription, kPadding);
367 ExpectCorrect(outputs, transcription);
368}

◆ TEST_F() [123/229]

tesseract::TEST_F ( RecodeBeamTest  ,
DoesKannada   
)

Definition at line 383 of file recodebeam_test.cc.

383 {
384 LOG(INFO) << "Testing kan"
385 << "\n";
386 LoadUnicharset("kan.unicharset");
387 // Correctly reproduce the first kNumchars characters from easy output.
388 std::vector<int> transcription;
389 for (int i = SPECIAL_UNICHAR_CODES_COUNT; i < kNumChars; ++i) {
390 transcription.push_back(i);
391 }
392 GENERIC_2D_ARRAY<float> outputs = GenerateRandomPaddedOutputs(transcription, kPadding);
393 ExpectCorrect(outputs, transcription);
394}

◆ TEST_F() [124/229]

tesseract::TEST_F ( RecodeBeamTest  ,
DoesKorean   
)

Definition at line 370 of file recodebeam_test.cc.

370 {
371 LOG(INFO) << "Testing kor"
372 << "\n";
373 LoadUnicharset("kor.unicharset");
374 // Correctly reproduce the first kNumchars characters from easy output.
375 std::vector<int> transcription;
376 for (int i = SPECIAL_UNICHAR_CODES_COUNT; i < kNumChars; ++i) {
377 transcription.push_back(i);
378 }
379 GENERIC_2D_ARRAY<float> outputs = GenerateRandomPaddedOutputs(transcription, kPadding);
380 ExpectCorrect(outputs, transcription);
381}

◆ TEST_F() [125/229]

tesseract::TEST_F ( RecodeBeamTest  ,
DoesMarathi   
)

Definition at line 396 of file recodebeam_test.cc.

396 {
397 LOG(INFO) << "Testing mar"
398 << "\n";
399 LoadUnicharset("mar.unicharset");
400 // Correctly reproduce the first kNumchars characters from easy output.
401 std::vector<int> transcription;
402 for (int i = SPECIAL_UNICHAR_CODES_COUNT; i < kNumChars; ++i) {
403 transcription.push_back(i);
404 }
405 GENERIC_2D_ARRAY<float> outputs = GenerateRandomPaddedOutputs(transcription, kPadding);
406 ExpectCorrect(outputs, transcription);
407}

◆ TEST_F() [126/229]

tesseract::TEST_F ( ResultIteratorTest  ,
ComplexTest   
)

Definition at line 348 of file resultiterator_test.cc.

348 {
349 SetImage("8087_054.3B.tif");
350 // Just run layout analysis.
351 PageIterator *it = api_.AnalyseLayout();
352 EXPECT_FALSE(it == nullptr);
353 // The images should rebuild almost perfectly.
354 VerifyRebuilds(2073, 2073, 2080, 2081, 2090, it);
355 delete it;
356}

◆ TEST_F() [127/229]

tesseract::TEST_F ( ResultIteratorTest  ,
DISABLED_NonNullChoicesTest   
)

Definition at line 537 of file resultiterator_test.cc.

537 {
538 SetImage("5318c4b679264.jpg");
539 char *result = api_.GetUTF8Text();
540 delete[] result;
541 ResultIterator *r_it = api_.GetIterator();
542 // Iterate over the words.
543 do {
544 char *word_str = r_it->GetUTF8Text(tesseract::RIL_WORD);
545 if (word_str != nullptr) {
546 LOG(INFO) << "Word " << word_str << ":\n";
547 ResultIterator s_it = *r_it;
548 do {
549 tesseract::ChoiceIterator c_it(s_it);
550 do {
551 const char *char_str = c_it.GetUTF8Text();
552 if (char_str == nullptr) {
553 LOG(INFO) << "Null char choice"
554 << "\n";
555 } else {
556 LOG(INFO) << "Char choice " << char_str << "\n";
557 }
558 CHECK(char_str != nullptr);
559 } while (c_it.Next());
562 delete[] word_str;
563 }
564 } while (r_it->Next(tesseract::RIL_WORD));
565 delete r_it;
566}
bool IsAtFinalElement(PageIteratorLevel level, PageIteratorLevel element) const override
virtual char * GetUTF8Text(PageIteratorLevel level) const

◆ TEST_F() [128/229]

tesseract::TEST_F ( ResultIteratorTest  ,
DualStartTextlineOrderTest   
)

Definition at line 478 of file resultiterator_test.cc.

478 {
479 const StrongScriptDirection word_dirs[] = {dL, dL, dN, dL, dN, dR, dR, dR};
480 int reading_order_rtl_context[] = {7, 6, 5, 4, ResultIterator::kMinorRunStart,
481 0, 1, 2, 3, ResultIterator::kMinorRunEnd};
482 int reading_order_ltr_context[] = {
483 0, 1, 2, 3, 4, ResultIterator::kMinorRunStart, 7, 6, 5, ResultIterator::kMinorRunEnd};
484
485 ExpectTextlineReadingOrder(true, word_dirs, countof(word_dirs), reading_order_ltr_context,
486 countof(reading_order_ltr_context));
487 ExpectTextlineReadingOrder(false, word_dirs, countof(word_dirs), reading_order_rtl_context,
488 countof(reading_order_rtl_context));
489}
StrongScriptDirection
Definition: unichar.h:41

◆ TEST_F() [129/229]

tesseract::TEST_F ( ResultIteratorTest  ,
EasyTest   
)

Definition at line 258 of file resultiterator_test.cc.

258 {
259 SetImage("phototest.tif");
260 // Just run layout analysis.
261 PageIterator *p_it = api_.AnalyseLayout();
262 EXPECT_FALSE(p_it == nullptr);
263 // Check iterator position.
265 // This should be a single block.
268
269 // The images should rebuild almost perfectly.
270 LOG(INFO) << "Verifying image rebuilds 1 (pageiterator)"
271 << "\n";
272 VerifyRebuilds(10, 10, 0, 0, 0, p_it);
273 delete p_it;
274
275 char *result = api_.GetUTF8Text();
276 ocr_text_ = result;
277 delete[] result;
278 ResultIterator *r_it = api_.GetIterator();
279 // The images should rebuild almost perfectly.
280 LOG(INFO) << "Verifying image rebuilds 2a (resultiterator)"
281 << "\n";
282 VerifyRebuilds(8, 8, 0, 0, 40, r_it, tesseract::RIL_WORD);
283 // Test the text.
284 LOG(INFO) << "Verifying text rebuilds 1 (resultiterator)"
285 << "\n";
286 VerifyAllText(ocr_text_, r_it);
287
288 // The images should rebuild almost perfectly.
289 LOG(INFO) << "Verifying image rebuilds 2b (resultiterator)"
290 << "\n";
291 VerifyRebuilds(8, 8, 0, 0, 40, r_it, tesseract::RIL_WORD);
292
293 r_it->Begin();
294 // Test baseline of the first line.
295 int x1, y1, x2, y2;
296 r_it->Baseline(tesseract::RIL_TEXTLINE, &x1, &y1, &x2, &y2);
297 LOG(INFO) << "Baseline ("
298 << x1 << ',' << y1 << ")->(" << x2 << ',' << y2 << ")\n";
299 // Make sure we have a decent vector.
300 EXPECT_GE(x2, x1 + 400);
301 // The point 200,116 should be very close to the baseline.
302 // (x3,y3) is the vector from (x1,y1) to (200,116)
303 int x3 = 200 - x1;
304 int y3 = 116 - y1;
305 x2 -= x1;
306 y2 -= y1;
307 // The cross product (x2,y1)x(x3,y3) should be small.
308 int product = x2 * y3 - x3 * y2;
309 EXPECT_LE(abs(product), x2);
310
311 // Test font attributes for each word.
312 do {
313 float confidence = r_it->Confidence(tesseract::RIL_WORD);
314#ifndef DISABLED_LEGACY_ENGINE
315 int pointsize, font_id;
316 bool bold, italic, underlined, monospace, serif, smallcaps;
317 const char *font = r_it->WordFontAttributes(&bold, &italic, &underlined, &monospace, &serif,
318 &smallcaps, &pointsize, &font_id);
319 EXPECT_GE(confidence, 80.0f);
320#endif
321 char *word_str = r_it->GetUTF8Text(tesseract::RIL_WORD);
322
323#ifdef DISABLED_LEGACY_ENGINE
324 LOG(INFO) << "Word " << word_str << ", conf " << confidence << "\n";
325#else
326 LOG(INFO) << "Word " << word_str << " in font " << font
327 << ", id " << font_id << ", size " << pointsize
328 << ", conf " << confidence << "\n";
329#endif // def DISABLED_LEGACY_ENGINE
330 delete[] word_str;
331#ifndef DISABLED_LEGACY_ENGINE
332 EXPECT_FALSE(bold);
333 EXPECT_FALSE(italic);
334 EXPECT_FALSE(underlined);
335 EXPECT_FALSE(monospace);
336 EXPECT_FALSE(serif);
337 // The text is about 31 pixels high. Above we say the source is 200 ppi,
338 // which translates to:
339 // 31 pixels / textline * (72 pts / inch) / (200 pixels / inch) = 11.16 pts
340 EXPECT_GE(pointsize, 11.16 - 1.50);
341 EXPECT_LE(pointsize, 11.16 + 1.50);
342#endif // def DISABLED_LEGACY_ENGINE
343 } while (r_it->Next(tesseract::RIL_WORD));
344 delete r_it;
345}
const char * WordFontAttributes(bool *is_bold, bool *is_italic, bool *is_underlined, bool *is_monospace, bool *is_serif, bool *is_smallcaps, int *pointsize, int *font_id) const
float Confidence(PageIteratorLevel level) const
virtual bool Next(PageIteratorLevel level)
virtual bool IsAtBeginningOf(PageIteratorLevel level) const
bool Baseline(PageIteratorLevel level, int *x1, int *y1, int *x2, int *y2) const

◆ TEST_F() [130/229]

tesseract::TEST_F ( ResultIteratorTest  ,
GreyTest   
)

Definition at line 359 of file resultiterator_test.cc.

359 {
360 SetImage("8087_054.3G.tif");
361 // Just run layout analysis.
362 PageIterator *it = api_.AnalyseLayout();
363 EXPECT_FALSE(it == nullptr);
364 // The images should rebuild almost perfectly.
365 VerifyRebuilds(600, 600, 600, 600, 600, it);
366 delete it;
367}

◆ TEST_F() [131/229]

tesseract::TEST_F ( ResultIteratorTest  ,
LeftwardTextlineOrderTest   
)

Definition at line 493 of file resultiterator_test.cc.

493 {
494 const StrongScriptDirection word_dirs[] = {dL, dL, dN, dL, dN, dN, dL, dL};
495 // The order here is just left to right, nothing fancy.
496 int reading_order_ltr_context[] = {0, 1, 2, 3, 4, 5, 6, 7};
497 // In the strange event that this shows up in an RTL paragraph, nonetheless
498 // just presume the whole thing is an LTR line.
499 int reading_order_rtl_context[] = {ResultIterator::kMinorRunStart, 0, 1, 2, 3, 4, 5, 6, 7,
500 ResultIterator::kMinorRunEnd};
501
502 ExpectTextlineReadingOrder(true, word_dirs, countof(word_dirs), reading_order_ltr_context,
503 countof(reading_order_ltr_context));
504 ExpectTextlineReadingOrder(false, word_dirs, countof(word_dirs), reading_order_rtl_context,
505 countof(reading_order_rtl_context));
506}

◆ TEST_F() [132/229]

tesseract::TEST_F ( ResultIteratorTest  ,
NonNullConfidencesTest   
)

Definition at line 569 of file resultiterator_test.cc.

569 {
570 // SetImage("line6.tiff");
571 SetImage("trainingitalline.tif");
572 api_.SetPageSegMode(tesseract::PSM_SINGLE_BLOCK);
573 // Force recognition so we can used the result iterator.
574 // We don't care about the return from GetUTF8Text.
575 char *result = api_.GetUTF8Text();
576 delete[] result;
577 ResultIterator *r_it = api_.GetIterator();
578 // Iterate over the words.
579 do {
580 char *word_str = r_it->GetUTF8Text(tesseract::RIL_WORD);
581 if (word_str != nullptr) {
584 ResultIterator s_it = *r_it;
585 do {
586 const char *char_str = s_it.GetUTF8Text(tesseract::RIL_SYMBOL);
587 CHECK(char_str != nullptr);
588 float confidence = s_it.Confidence(tesseract::RIL_SYMBOL);
589 LOG(INFO) << "Char " << char_str << " has confidence " << confidence << "\n";
590 delete[] char_str;
593 delete[] word_str;
594 } else {
595 LOG(INFO) << "Empty word found"
596 << "\n";
597 }
598 } while (r_it->Next(tesseract::RIL_WORD));
599 delete r_it;
600}

◆ TEST_F() [133/229]

tesseract::TEST_F ( ResultIteratorTest  ,
RightwardTextlineOrderTest   
)

Definition at line 510 of file resultiterator_test.cc.

510 {
511 const StrongScriptDirection word_dirs[] = {dR, dR, dN, dR, dN, dN, dR, dR};
512 // The order here is just right-to-left, nothing fancy.
513 int reading_order_rtl_context[] = {7, 6, 5, 4, 3, 2, 1, 0};
514 ExpectTextlineReadingOrder(false, word_dirs, countof(word_dirs), reading_order_rtl_context,
515 countof(reading_order_rtl_context));
516}

◆ TEST_F() [134/229]

tesseract::TEST_F ( ResultIteratorTest  ,
SmallCapDropCapTest   
)

Definition at line 370 of file resultiterator_test.cc.

370 {
371#ifdef DISABLED_LEGACY_ENGINE
372 // Skip test as LSTM mode does not recognize smallcaps & dropcaps attributes.
373 GTEST_SKIP();
374#else
375 SetImage("8071_093.3B.tif");
376 char *result = api_.GetUTF8Text();
377 delete[] result;
378 ResultIterator *r_it = api_.GetIterator();
379 // Iterate over the words.
380 int found_dropcaps = 0;
381 int found_smallcaps = 0;
382 int false_positives = 0;
383 do {
384 bool bold, italic, underlined, monospace, serif, smallcaps;
385 int pointsize, font_id;
386 r_it->WordFontAttributes(&bold, &italic, &underlined, &monospace, &serif, &smallcaps,
387 &pointsize, &font_id);
388 char *word_str = r_it->GetUTF8Text(tesseract::RIL_WORD);
389 if (word_str != nullptr) {
390 LOG(INFO) << "Word " << word_str
391 << " is " << (smallcaps ? "SMALLCAPS" : "Normal") << "\n";
392 if (r_it->SymbolIsDropcap()) {
393 ++found_dropcaps;
394 }
395 if (strcmp(word_str, "SHE") == 0 || strcmp(word_str, "MOPED") == 0 ||
396 strcmp(word_str, "RALPH") == 0 || strcmp(word_str, "KINNEY") == 0 || // Not working yet.
397 strcmp(word_str, "BENNETT") == 0) {
398 EXPECT_TRUE(smallcaps) << word_str;
399 ++found_smallcaps;
400 } else {
401 if (smallcaps) {
402 ++false_positives;
403 }
404 }
405 // No symbol other than the first of any word should be dropcap.
406 ResultIterator s_it(*r_it);
407 while (s_it.Next(tesseract::RIL_SYMBOL) && !s_it.IsAtBeginningOf(tesseract::RIL_WORD)) {
408 if (s_it.SymbolIsDropcap()) {
409 char *sym_str = s_it.GetUTF8Text(tesseract::RIL_SYMBOL);
410 LOG(ERROR) << "Symbol " << sym_str << " of word " << word_str << " is dropcap";
411 delete[] sym_str;
412 }
413 EXPECT_FALSE(s_it.SymbolIsDropcap());
414 }
415 delete[] word_str;
416 }
417 } while (r_it->Next(tesseract::RIL_WORD));
418 delete r_it;
419 EXPECT_EQ(1, found_dropcaps);
420 EXPECT_GE(4, found_smallcaps);
421 EXPECT_LE(false_positives, 3);
422#endif // DISABLED_LEGACY_ENGINE
423}

◆ TEST_F() [135/229]

tesseract::TEST_F ( ResultIteratorTest  ,
TextlineOrderSanityCheck   
)

Definition at line 518 of file resultiterator_test.cc.

518 {
519 // Iterate through all 7-word sequences and make sure that the output
520 // contains each of the indices 0..6 exactly once.
521 const int kNumWords(7);
522 const int kNumCombos = 1 << (2 * kNumWords); // 4 ^ 7 combinations
523 StrongScriptDirection word_dirs[kNumWords];
524 for (int i = 0; i < kNumCombos; i++) {
525 // generate the next combination.
526 int tmp = i;
527 for (auto &word_dir : word_dirs) {
528 word_dir = static_cast<StrongScriptDirection>(tmp % 4);
529 tmp = tmp / 4;
530 }
531 VerifySaneTextlineOrder(true, word_dirs, kNumWords);
532 VerifySaneTextlineOrder(false, word_dirs, kNumWords);
533 }
534}

◆ TEST_F() [136/229]

tesseract::TEST_F ( ScanutilsTest  ,
DoesScanf   
)

Definition at line 24 of file scanutils_test.cc.

24 {
25 // This test verifies that tfscanf does Scanf the same as stdio fscanf.
26 // There are probably a gazillion more test cases that could be added, but
27 // these brought the tesseract and unittest test results in line.
28 std::string filename = file::JoinPath(TESTDATA_DIR, "scanftest.txt");
29 FILE *fp1 = fopen(filename.c_str(), "r");
30 if (fp1 == nullptr) {
31 std::cout << "Failed to open file " << filename << '\n';
32 GTEST_SKIP();
33 }
34 FILE *fp2 = fopen(filename.c_str(), "r");
35 if (fp2 == nullptr) {
36 std::cout << "Failed to open file " << filename << '\n';
37 fclose(fp1);
38 GTEST_SKIP();
39 }
40 // The file contains this:
41 // 42.5 17 0.001000 -0.001000
42 // 0 1 123 -123 0x100
43 // abcdefghijklmnopqrstuvwxyz
44 // abcdefghijklmnopqrstuvwxyz
45 // MF 25 6.25e-2 0.5e5 -1e+4
46 // 42 MF 25 6.25e-2 0.5
47 // 24
48 const int kNumFloats = 4;
49 float f1[kNumFloats], f2[kNumFloats];
50 int r1 = fscanf(fp1, "%f %f %f %f", &f1[0], &f1[1], &f1[2], &f1[3]);
51 int r2 = tfscanf(fp2, "%f %f %f %f", &f2[0], &f2[1], &f2[2], &f2[3]);
52 EXPECT_EQ(r1, kNumFloats);
53 EXPECT_EQ(r2, kNumFloats);
54 if (r1 == r2) {
55 for (int i = 0; i < r1; ++i) {
56 EXPECT_FLOAT_EQ(f1[i], f2[i]);
57 }
58 }
59 const int kNumInts = 5;
60 int i1[kNumInts], i2[kNumInts];
61 r1 = fscanf(fp1, "%d %d %d %d %i", &i1[0], &i1[1], &i1[2], &i1[3], &i1[4]);
62 r2 = tfscanf(fp2, "%d %d %d %d %i", &i2[0], &i2[1], &i2[2], &i2[3], &i2[4]);
63 EXPECT_EQ(r1, kNumInts);
64 EXPECT_EQ(r2, kNumInts);
65 if (r1 == r2) {
66 for (int i = 0; i < kNumInts; ++i) {
67 EXPECT_EQ(i1[i], i2[i]);
68 }
69 }
70 const int kStrLen = 1024;
71 char s1[kStrLen];
72 char s2[kStrLen];
73 r1 = fscanf(fp1, "%1023s", s1);
74 r2 = tfscanf(fp2, "%1023s", s2);
75 EXPECT_EQ(r1, r2);
76 EXPECT_STREQ(s1, s2);
77 EXPECT_EQ(26, strlen(s2));
78 r1 = fscanf(fp1, "%20s", s1);
79 r2 = tfscanf(fp2, "%20s", s2);
80 EXPECT_EQ(r1, r2);
81 EXPECT_STREQ(s1, s2);
82 EXPECT_EQ(20, strlen(s2));
83 // Now read the rest of the alphabet.
84 r1 = fscanf(fp1, "%1023s", s1);
85 r2 = tfscanf(fp2, "%1023s", s2);
86 EXPECT_EQ(r1, r2);
87 EXPECT_STREQ(s1, s2);
88 EXPECT_EQ(6, strlen(s2));
89 r1 = fscanf(fp1, "%1023s", s1);
90 r2 = tfscanf(fp2, "%1023s", s2);
91 EXPECT_EQ(r1, r2);
92 EXPECT_STREQ(s1, s2);
93 EXPECT_EQ(2, strlen(s2));
94 r1 = fscanf(fp1, "%f %f %f %f", &f1[0], &f1[1], &f1[2], &f1[3]);
95 r2 = tfscanf(fp2, "%f %f %f %f", &f2[0], &f2[1], &f2[2], &f2[3]);
96 EXPECT_EQ(r1, r2);
97 for (int i = 0; i < kNumFloats; ++i) {
98 EXPECT_FLOAT_EQ(f1[i], f2[i]);
99 }
100 // Test the * for field suppression.
101 r1 = fscanf(fp1, "%d %*s %*d %*f %*f", &i1[0]);
102 r2 = tfscanf(fp2, "%d %*s %*d %*f %*f", &i2[0]);
103 EXPECT_EQ(r1, r2);
104 EXPECT_EQ(i1[0], i2[0]);
105 // We should still see the next value and no phantoms.
106 r1 = fscanf(fp1, "%d %1023s", &i1[0], s1);
107 r2 = tfscanf(fp2, "%d %1023s", &i2[0], s2);
108 EXPECT_EQ(r1, r2);
109 EXPECT_EQ(1, r2);
110 EXPECT_EQ(i1[0], i2[0]);
111 fclose(fp2);
112 fclose(fp1);
113}

◆ TEST_F() [137/229]

tesseract::TEST_F ( ShapeTableTest  ,
FullTest   
)

Definition at line 129 of file shapetable_test.cc.

129 {
130#ifdef DISABLED_LEGACY_ENGINE
131 // Skip test because Shape is missing.
132 GTEST_SKIP();
133#else
134 Shape shape1;
135 Setup352(101, &shape1);
136 // Build a shape table with the same data, but in separate shapes.
137 UNICHARSET unicharset;
138 unicharset.unichar_insert(" ");
139 for (int i = 1; i <= 10; ++i) {
140 char class_str[20];
141 snprintf(class_str, sizeof(class_str), "class%d", i);
142 unicharset.unichar_insert(class_str);
143 }
144 ShapeTable st(unicharset);
145 EXPECT_EQ(0, st.AddShape(3, 101));
146 EXPECT_EQ(1, st.AddShape(5, 101));
147 EXPECT_EQ(2, st.AddShape(2, 101));
148 EXPECT_EQ(3, st.NumShapes());
149 Expect352(101, shape1);
150 EXPECT_EQ(3, st.AddShape(shape1));
151 for (int i = 0; i < 3; ++i) {
152 EXPECT_FALSE(st.MutableShape(i)->IsEqualUnichars(&shape1));
153 }
154 EXPECT_TRUE(st.MutableShape(3)->IsEqualUnichars(&shape1));
155 EXPECT_TRUE(st.AnyMultipleUnichars());
156 st.DeleteShape(3);
157 EXPECT_FALSE(st.AnyMultipleUnichars());
158
159 // Now merge to make a single shape like shape1.
160 EXPECT_EQ(1, st.MasterUnicharCount(0));
161 st.MergeShapes(0, 1);
162 EXPECT_EQ(3, st.MergedUnicharCount(1, 2));
163 st.MergeShapes(1, 2);
164 for (int i = 0; i < 3; ++i) {
165 EXPECT_EQ(3, st.MasterUnicharCount(i));
166 // Master font count is the sum of all the font counts in the shape, not
167 // the actual number of different fonts in the shape.
168 EXPECT_EQ(3, st.MasterFontCount(i));
169 }
170 EXPECT_EQ(0, st.MasterDestinationIndex(1));
171 EXPECT_EQ(0, st.MasterDestinationIndex(2));
172 ShapeTable st2;
173 st2.AppendMasterShapes(st, nullptr);
174 EXPECT_EQ(1, st.NumMasterShapes());
175 EXPECT_EQ(1, st2.NumShapes());
176 EXPECT_TRUE(st2.MutableShape(0)->IsEqualUnichars(&shape1));
177 EXPECT_TRUE(st2.AnyMultipleUnichars());
178#endif
179}

◆ TEST_F() [138/229]

tesseract::TEST_F ( ShapeTest  ,
AddShapeTest   
)

Definition at line 94 of file shapetable_test.cc.

94 {
95#ifdef DISABLED_LEGACY_ENGINE
96 // Skip test because Shape is missing.
97 GTEST_SKIP();
98#else
99 Shape shape1;
100 Setup352(101, &shape1);
101 Expect352(101, shape1);
102 // Now setup a different shape with different content.
103 Shape shape2;
104 shape2.AddToShape(3, 101); // Duplicates shape1.
105 shape2.AddToShape(5, 110); // Different font to shape1.
106 shape2.AddToShape(7, 101); // Different unichar to shape1.
107 // They should NOT be subsets of each other.
108 EXPECT_FALSE(shape1.IsSubsetOf(shape2));
109 EXPECT_FALSE(shape2.IsSubsetOf(shape1));
110 // Now add shape2 to shape1.
111 shape1.AddShape(shape2);
112 // Test subsets again.
113 EXPECT_FALSE(shape1.IsSubsetOf(shape2));
114 EXPECT_TRUE(shape2.IsSubsetOf(shape1));
115 EXPECT_EQ(4, shape1.size());
116 EXPECT_FALSE(shape1.ContainsUnichar(1));
117 EXPECT_TRUE(shape1.ContainsUnicharAndFont(5, 101));
118 EXPECT_TRUE(shape1.ContainsUnicharAndFont(5, 110));
119 EXPECT_FALSE(shape1.ContainsUnicharAndFont(3, 110));
120 EXPECT_FALSE(shape1.ContainsUnicharAndFont(7, 110));
121 EXPECT_FALSE(shape1.IsEqualUnichars(&shape2));
122#endif
123}
bool IsSubsetOf(const Shape &other) const
Definition: shapetable.cpp:206
bool ContainsUnicharAndFont(int unichar_id, int font_id) const
Definition: shapetable.cpp:133
void AddToShape(int unichar_id, int font_id)
Definition: shapetable.cpp:103
int size() const
Definition: shapetable.h:169
void AddShape(const Shape &other)
Definition: shapetable.cpp:123
bool IsEqualUnichars(Shape *other)
Definition: shapetable.cpp:222
bool ContainsUnichar(int unichar_id) const
Definition: shapetable.cpp:150

◆ TEST_F() [139/229]

tesseract::TEST_F ( ShapeTest  ,
BasicTest   
)

Definition at line 62 of file shapetable_test.cc.

62 {
63#ifdef DISABLED_LEGACY_ENGINE
64 // Skip test because Shape is missing.
65 GTEST_SKIP();
66#else
67 Shape shape1;
68 EXPECT_EQ(0, shape1.size());
69 Setup352(101, &shape1);
70 Expect352(101, shape1);
71 // It should still work after file I/O.
72 std::string filename = TmpNameToPath("shapefile");
73 FILE *fp = fopen(filename.c_str(), "wb");
74 ASSERT_TRUE(fp != nullptr);
75 EXPECT_TRUE(shape1.Serialize(fp));
76 fclose(fp);
77 TFile tfp;
78 EXPECT_TRUE(tfp.Open(filename.c_str(), nullptr));
79 Shape shape2;
80 EXPECT_TRUE(shape2.DeSerialize(&tfp));
81 Expect352(101, shape2);
82 // They should be subsets of each other.
83 EXPECT_TRUE(shape1.IsSubsetOf(shape2));
84 EXPECT_TRUE(shape2.IsSubsetOf(shape1));
85 // They should be equal unichars.
86 EXPECT_TRUE(shape1.IsEqualUnichars(&shape2));
87 // and still pass afterwards.
88 Expect352(101, shape1);
89 Expect352(101, shape2);
90#endif
91}
bool Serialize(FILE *fp) const
Definition: shapetable.cpp:86
bool DeSerialize(TFile *fp)
Definition: shapetable.cpp:92

◆ TEST_F() [140/229]

tesseract::TEST_F ( STATSTest  ,
BasicStats   
)

Definition at line 37 of file stats_test.cc.

37 {
38 EXPECT_EQ(37, stats_.get_total());
39 EXPECT_EQ(2, stats_.mode());
40 EXPECT_EQ(12, stats_.pile_count(2));
41}

◆ TEST_F() [141/229]

tesseract::TEST_F ( STATSTest  ,
InitStats   
)

Definition at line 43 of file stats_test.cc.

43 {
44 STATS stats;
45 EXPECT_EQ(0, stats.get_total());
46 EXPECT_EQ(0, stats.mode());
47 EXPECT_EQ(0, stats.pile_count(2));
48}

◆ TEST_F() [142/229]

tesseract::TEST_F ( STATSTest  ,
TopNModes   
)

Definition at line 51 of file stats_test.cc.

51 {
52 std::vector<tesseract::KDPairInc<float, int> > modes;
53 int num_modes = stats_.top_n_modes(3, modes);
54 EXPECT_EQ(3, num_modes);
55 // Mode0 is 12 1 1 = 14 total count with a mean of 2 3/14.
56 EXPECT_FLOAT_EQ(2.0f + 3.0f / 14, modes[0].key());
57 EXPECT_EQ(14, modes[0].data());
58 // Mode 1 is 2 10 1 = 13 total count with a mean of 5 12/13.
59 EXPECT_FLOAT_EQ(5.0f + 12.0f / 13, modes[1].key());
60 EXPECT_EQ(13, modes[1].data());
61 // Mode 2 is 4 1 1 = 6 total count with a mean of 13.5.
62 EXPECT_FLOAT_EQ(13.5f, modes[2].key());
63 EXPECT_EQ(6, modes[2].data());
64}

◆ TEST_F() [143/229]

tesseract::TEST_F ( StridemapTest  ,
Indexing   
)

Definition at line 63 of file stridemap_test.cc.

63 {
64 // This test verifies that with a batch of arrays of different sizes, the
65 // iteration index each of them in turn, without going out of bounds.
66#ifdef INCLUDE_TENSORFLOW
67 std::vector<std::unique_ptr<xla::Array2D<int>>> arrays;
68 arrays.push_back(SetupArray(3, 4, 0));
69 arrays.push_back(SetupArray(4, 5, 12));
70 arrays.push_back(SetupArray(4, 4, 32));
71 arrays.push_back(SetupArray(3, 5, 48));
72 std::vector<std::pair<int, int>> h_w_sizes;
73 for (size_t i = 0; i < arrays.size(); ++i) {
74 h_w_sizes.emplace_back(arrays[i].get()->height(), arrays[i].get()->width());
75 }
76 StrideMap stride_map;
77 stride_map.SetStride(h_w_sizes);
78 StrideMap::Index index(stride_map);
79 int pos = 0;
80 do {
81 EXPECT_GE(index.t(), pos);
82 EXPECT_EQ((*arrays.at(index.index(FD_BATCH)))(index.index(FD_HEIGHT), index.index(FD_WIDTH)),
83 pos);
84 EXPECT_EQ(index.IsLast(FD_BATCH), index.index(FD_BATCH) == arrays.size() - 1);
85 EXPECT_EQ(index.IsLast(FD_HEIGHT),
86 index.index(FD_HEIGHT) == arrays[index.index(FD_BATCH)]->height() - 1);
87 EXPECT_EQ(index.IsLast(FD_WIDTH),
88 index.index(FD_WIDTH) == arrays[index.index(FD_BATCH)]->width() - 1);
89 EXPECT_TRUE(index.IsValid());
90 ++pos;
91 } while (index.Increment());
92 LOG(INFO) << "pos=" << pos;
93 index.InitToLast();
94 do {
95 --pos;
96 EXPECT_GE(index.t(), pos);
97 EXPECT_EQ((*arrays.at(index.index(FD_BATCH)))(index.index(FD_HEIGHT), index.index(FD_WIDTH)),
98 pos);
99 StrideMap::Index copy(index);
100 // Since a change in batch index changes the height and width, it isn't
101 // necessarily true that the position is still valid, even when changing
102 // to another valid batch index.
103 if (index.IsLast(FD_BATCH)) {
104 EXPECT_FALSE(copy.AddOffset(1, FD_BATCH));
105 }
106 copy = index;
107 EXPECT_EQ(index.IsLast(FD_HEIGHT), !copy.AddOffset(1, FD_HEIGHT));
108 copy = index;
109 EXPECT_EQ(index.IsLast(FD_WIDTH), !copy.AddOffset(1, FD_WIDTH));
110 copy = index;
111 if (index.index(FD_BATCH) == 0) {
112 EXPECT_FALSE(copy.AddOffset(-1, FD_BATCH));
113 }
114 copy = index;
115 EXPECT_EQ(index.index(FD_HEIGHT) == 0, !copy.AddOffset(-1, FD_HEIGHT));
116 copy = index;
117 EXPECT_EQ(index.index(FD_WIDTH) == 0, !copy.AddOffset(-1, FD_WIDTH));
118 copy = index;
119 EXPECT_FALSE(copy.AddOffset(10, FD_WIDTH));
120 copy = index;
121 EXPECT_FALSE(copy.AddOffset(-10, FD_HEIGHT));
122 EXPECT_TRUE(index.IsValid());
123 } while (index.Decrement());
124#else
125 LOG(INFO) << "Skip test because of missing xla::Array2D";
126 GTEST_SKIP();
127#endif
128}

◆ TEST_F() [144/229]

tesseract::TEST_F ( StridemapTest  ,
Scaling   
)

Definition at line 130 of file stridemap_test.cc.

130 {
131 // This test verifies that with a batch of arrays of different sizes, the
132 // scaling/reduction functions work as expected.
133#ifdef INCLUDE_TENSORFLOW
134 std::vector<std::unique_ptr<xla::Array2D<int>>> arrays;
135 arrays.push_back(SetupArray(3, 4, 0)); // 0-11
136 arrays.push_back(SetupArray(4, 5, 12)); // 12-31
137 arrays.push_back(SetupArray(4, 4, 32)); // 32-47
138 arrays.push_back(SetupArray(3, 5, 48)); // 48-62
139 std::vector<std::pair<int, int>> h_w_sizes;
140 for (size_t i = 0; i < arrays.size(); ++i) {
141 h_w_sizes.emplace_back(arrays[i].get()->height(), arrays[i].get()->width());
142 }
143 StrideMap stride_map;
144 stride_map.SetStride(h_w_sizes);
145
146 // Scale x by 2, keeping y the same.
147 std::vector<int> values_x2 = {0, 1, 4, 5, 8, 9, 12, 13, 17, 18, 22, 23, 27, 28,
148 32, 33, 36, 37, 40, 41, 44, 45, 48, 49, 53, 54, 58, 59};
149 StrideMap test_map(stride_map);
150 test_map.ScaleXY(2, 1);
151 StrideMap::Index index(test_map);
152 int pos = 0;
153 do {
154 int expected_value = values_x2[pos++];
155 EXPECT_EQ((*arrays.at(index.index(FD_BATCH)))(index.index(FD_HEIGHT), index.index(FD_WIDTH)),
156 expected_value);
157 } while (index.Increment());
158 EXPECT_EQ(pos, values_x2.size());
159
160 test_map = stride_map;
161 // Scale y by 2, keeping x the same.
162 std::vector<int> values_y2 = {0, 1, 2, 3, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
163 32, 33, 34, 35, 36, 37, 38, 39, 48, 49, 50, 51, 52};
164 test_map.ScaleXY(1, 2);
165 index.InitToFirst();
166 pos = 0;
167 do {
168 int expected_value = values_y2[pos++];
169 EXPECT_EQ((*arrays.at(index.index(FD_BATCH)))(index.index(FD_HEIGHT), index.index(FD_WIDTH)),
170 expected_value);
171 } while (index.Increment());
172 EXPECT_EQ(pos, values_y2.size());
173
174 test_map = stride_map;
175 // Scale x and y by 2.
176 std::vector<int> values_xy2 = {0, 1, 12, 13, 17, 18, 32, 33, 36, 37, 48, 49};
177 test_map.ScaleXY(2, 2);
178 index.InitToFirst();
179 pos = 0;
180 do {
181 int expected_value = values_xy2[pos++];
182 EXPECT_EQ((*arrays.at(index.index(FD_BATCH)))(index.index(FD_HEIGHT), index.index(FD_WIDTH)),
183 expected_value);
184 } while (index.Increment());
185 EXPECT_EQ(pos, values_xy2.size());
186
187 test_map = stride_map;
188 // Reduce Width to 1.
189 std::vector<int> values_x_to_1 = {0, 4, 8, 12, 17, 22, 27, 32, 36, 40, 44, 48, 53, 58};
190 test_map.ReduceWidthTo1();
191 index.InitToFirst();
192 pos = 0;
193 do {
194 int expected_value = values_x_to_1[pos++];
195 EXPECT_EQ((*arrays.at(index.index(FD_BATCH)))(index.index(FD_HEIGHT), index.index(FD_WIDTH)),
196 expected_value);
197 } while (index.Increment());
198 EXPECT_EQ(pos, values_x_to_1.size());
199#else
200 LOG(INFO) << "Skip test because of missing xla::Array2D";
201 GTEST_SKIP();
202#endif
203}

◆ TEST_F() [145/229]

tesseract::TEST_F ( StringRendererTest  ,
ArabicBoxcharsInLTROrder   
)

Definition at line 205 of file stringrenderer_test.cc.

205 {
206 renderer_ = std::make_unique<StringRenderer>("Arab 10", 600, 600);
207 Image pix = nullptr;
208 // Arabic letters should be in decreasing x-coordinates
209 const char kArabicWord[] = "\u0644\u0627\u0641\u0643\u0631";
210 const std::string kRevWord = "\u0631\u0643\u0641\u0627\u0644";
211 renderer_->RenderToImage(kArabicWord, strlen(kArabicWord), &pix);
212 std::string boxes_str = renderer_->GetBoxesStr();
213 // Decode to get the box text strings.
214 EXPECT_FALSE(boxes_str.empty());
215 std::vector<std::string> texts;
216 EXPECT_TRUE(ReadMemBoxes(0, false, boxes_str.c_str(), false, nullptr, &texts, nullptr, nullptr));
217 std::string ltr_str;
218 for (auto &text : texts) {
219 ltr_str += text.c_str();
220 }
221 // The string should come out perfectly reversed, despite there being a
222 // ligature.
223 EXPECT_EQ(ltr_str, kRevWord);
224 // Just to prove there was a ligature, the number of texts is less than the
225 // number of unicodes.
226 EXPECT_LT(texts.size(), 5);
227 pix.destroy();
228}

◆ TEST_F() [146/229]

tesseract::TEST_F ( StringRendererTest  ,
DISABLED_DoesDropUncoveredChars   
)

Definition at line 431 of file stringrenderer_test.cc.

431 {
432 renderer_ = std::make_unique<StringRenderer>("Verdana 10", 500, 200);
433 renderer_->set_drop_uncovered_chars(true);
434 const std::string kWord = "office";
435 const std::string kCleanWord = "oice";
436 Image pix = nullptr;
437 EXPECT_FALSE(renderer_->font().CanRenderString(kWord.c_str(), kWord.length()));
438 EXPECT_FALSE(renderer_->font().CoversUTF8Text(kWord.c_str(), kWord.length()));
439 int offset = renderer_->RenderToImage(kWord.c_str(), kWord.length(), &pix);
440 pix.destroy();
441 const std::vector<BoxChar *> &boxchars = renderer_->GetBoxes();
442 EXPECT_EQ(kWord.length(), offset);
443 ASSERT_EQ(kCleanWord.length(), boxchars.size());
444 for (size_t i = 0; i < boxchars.size(); ++i) {
445 EXPECT_EQ(kCleanWord.substr(i, 1), boxchars[i]->ch());
446 }
447}

◆ TEST_F() [147/229]

tesseract::TEST_F ( StringRendererTest  ,
DoesClearBoxes   
)

Definition at line 295 of file stringrenderer_test.cc.

295 {
296 renderer_ = std::make_unique<StringRenderer>("Verdana 10", 600, 600);
297 Image pix = nullptr;
298 EXPECT_EQ(strlen(kEngText), renderer_->RenderToImage(kEngText, strlen(kEngText), &pix));
299 pix.destroy();
300 EXPECT_GT(renderer_->GetBoxes().size(), 0);
301 const int num_boxes_per_page = renderer_->GetBoxes().size();
302
303 renderer_->ClearBoxes();
304 EXPECT_EQ(strlen(kEngText), renderer_->RenderToImage(kEngText, strlen(kEngText), &pix));
305 pix.destroy();
306 EXPECT_EQ(num_boxes_per_page, renderer_->GetBoxes().size());
307}

◆ TEST_F() [148/229]

tesseract::TEST_F ( StringRendererTest  ,
DoesHandleNewlineCharacters   
)

Definition at line 153 of file stringrenderer_test.cc.

153 {
154 const char kRawText[] = "\n\n\n A \nB \nC \n\n\n";
155 const char kStrippedText[] = " A B C "; // text with newline chars removed
156 renderer_ = std::make_unique<StringRenderer>("Verdana 10", 600, 600);
157 Image pix = nullptr;
158 EXPECT_EQ(strlen(kRawText), renderer_->RenderToImage(kRawText, strlen(kRawText), &pix));
159 EXPECT_TRUE(pix != nullptr);
160 const std::vector<BoxChar *> &boxchars = renderer_->GetBoxes();
161 // 3 characters + 4 spaces => 7 boxes
162 EXPECT_EQ(7, boxchars.size());
163 if (boxchars.size() == 7) {
164 // Verify the text content of the boxchars
165 for (size_t i = 0; i < boxchars.size(); ++i) {
166 EXPECT_EQ(std::string(1, kStrippedText[i]), boxchars[i]->ch());
167 }
168 }
169 DisplayClusterBoxes(pix);
170 pix.destroy();
171}

◆ TEST_F() [149/229]

tesseract::TEST_F ( StringRendererTest  ,
DoesKeepAllImageBoxes   
)

Definition at line 274 of file stringrenderer_test.cc.

274 {
275 renderer_ = std::make_unique<StringRenderer>("Verdana 10", 600, 600);
276 Image pix = nullptr;
277 int num_boxes_per_page = 0;
278 const int kNumTrials = 2;
279 for (int i = 0; i < kNumTrials; ++i) {
280 EXPECT_EQ(strlen(kEngText), renderer_->RenderToImage(kEngText, strlen(kEngText), &pix));
281 EXPECT_TRUE(pix != nullptr);
282 pix.destroy();
283 EXPECT_GT(renderer_->GetBoxes().size(), 0);
284 if (!num_boxes_per_page) {
285 num_boxes_per_page = renderer_->GetBoxes().size();
286 } else {
287 EXPECT_EQ((i + 1) * num_boxes_per_page, renderer_->GetBoxes().size());
288 }
289 for (int j = i * num_boxes_per_page; j < (i + 1) * num_boxes_per_page; ++j) {
290 EXPECT_EQ(i, renderer_->GetBoxes()[j]->page());
291 }
292 }
293}

◆ TEST_F() [150/229]

tesseract::TEST_F ( StringRendererTest  ,
DoesLigatureTextForRendering   
)

Definition at line 309 of file stringrenderer_test.cc.

309 {
310 renderer_ = std::make_unique<StringRenderer>("Verdana 10", 600, 600);
311 renderer_->set_add_ligatures(true);
312 Image pix = nullptr;
314 renderer_->RenderToImage(kEngNonLigatureText, strlen(kEngNonLigatureText), &pix));
315 pix.destroy();
316#if 0 // not with NFC normalization
317 // There should be one less box than letters due to the 'fi' ligature.
318 EXPECT_EQ(strlen(kEngNonLigatureText) - 1, renderer_->GetBoxes().size());
319 // The output box text should be ligatured.
320 EXPECT_STREQ("fi", renderer_->GetBoxes()[0]->ch().c_str());
321#endif
322}
const char kEngNonLigatureText[]

◆ TEST_F() [151/229]

tesseract::TEST_F ( StringRendererTest  ,
DoesNotRenderWordJoiner   
)

Definition at line 415 of file stringrenderer_test.cc.

415 {
416 renderer_ = std::make_unique<StringRenderer>("Verdana 10", 500, 200);
417 const std::string word = "A- -B C-D A BC";
418 const std::string joined_word = StringRenderer::InsertWordJoiners(word);
419 Image pix = nullptr;
420 renderer_->RenderToImage(joined_word.c_str(), joined_word.length(), &pix);
421 pix.destroy();
422 const std::vector<BoxChar *> &boxchars = renderer_->GetBoxes();
423 const std::string kWordJoinerUTF8 = "\u2060";
424 ASSERT_EQ(word.length(), boxchars.size());
425 for (size_t i = 0; i < boxchars.size(); ++i) {
426 EXPECT_NE(kWordJoinerUTF8, boxchars[i]->ch());
427 EXPECT_EQ(word.substr(i, 1), boxchars[i]->ch());
428 }
429}

◆ TEST_F() [152/229]

tesseract::TEST_F ( StringRendererTest  ,
DoesOutputBoxcharsInReadingOrder   
)

Definition at line 230 of file stringrenderer_test.cc.

230 {
231 renderer_ = std::make_unique<StringRenderer>("Arab 10", 600, 600);
232 Image pix = nullptr;
233 // Arabic letters should be in decreasing x-coordinates
234 const char kArabicWord[] = "والفكر";
235 renderer_->RenderToImage(kArabicWord, strlen(kArabicWord), &pix);
236 EXPECT_GT(renderer_->GetBoxes().size(), 0);
237 const std::vector<BoxChar *> &boxchars = renderer_->GetBoxes();
238 for (size_t i = 1; i < boxchars.size(); ++i) {
239 EXPECT_GT(boxchars[i - 1]->box()->x, boxchars[i]->box()->x) << boxchars[i - 1]->ch();
240 }
241 pix.destroy();
242
243 // English letters should be in increasing x-coordinates
244 const char kEnglishWord[] = "Google";
245 renderer_->ClearBoxes();
246 renderer_->RenderToImage(kEnglishWord, strlen(kEnglishWord), &pix);
247 EXPECT_EQ(boxchars.size(), strlen(kEnglishWord));
248 for (size_t i = 1; i < boxchars.size(); ++i) {
249 EXPECT_LT(boxchars[i - 1]->box()->x, boxchars[i]->box()->x) << boxchars[i - 1]->ch();
250 }
251 pix.destroy();
252
253 // Mixed text should satisfy both.
254 renderer_->ClearBoxes();
255 renderer_->RenderToImage(kMixedText, strlen(kMixedText), &pix);
256 EXPECT_LT(FindBoxCharXCoord(boxchars, "a"), FindBoxCharXCoord(boxchars, "b"));
257 EXPECT_LT(FindBoxCharXCoord(boxchars, "1"), FindBoxCharXCoord(boxchars, "2"));
258 EXPECT_GT(FindBoxCharXCoord(boxchars, "و"), FindBoxCharXCoord(boxchars, "ر"));
259 pix.destroy();
260}

◆ TEST_F() [153/229]

tesseract::TEST_F ( StringRendererTest  ,
DoesRenderAllFontsToImage   
)

Definition at line 395 of file stringrenderer_test.cc.

395 {
396 renderer_ = std::make_unique<StringRenderer>("Verdana 10", 1200, 1200);
397 size_t offset = 0;
398 std::string font_used;
399 do {
400 Image pix = nullptr;
401 font_used.clear();
402 offset += renderer_->RenderAllFontsToImage(1.0, kEngText + offset, strlen(kEngText + offset),
403 &font_used, &pix);
404 if (offset < strlen(kEngText)) {
405 EXPECT_TRUE(pix != nullptr);
406 EXPECT_STRNE("", font_used.c_str());
407 }
408 if (FLAGS_display) {
409 pixDisplay(pix, 0, 0);
410 }
411 pix.destroy();
412 } while (offset < strlen(kEngText));
413}
#define EXPECT_STRNE(s1, s2)
Definition: gtest.h:2114

◆ TEST_F() [154/229]

tesseract::TEST_F ( StringRendererTest  ,
DoesRenderLigatures   
)

Definition at line 173 of file stringrenderer_test.cc.

173 {
174 renderer_ = std::make_unique<StringRenderer>("Arab 12", 600, 250);
175 const char kArabicLigature[] = "لا";
176
177 Image pix = nullptr;
178 EXPECT_EQ(strlen(kArabicLigature),
179 renderer_->RenderToImage(kArabicLigature, strlen(kArabicLigature), &pix));
180 EXPECT_TRUE(pix != nullptr);
181 EXPECT_GT(renderer_->GetBoxes().size(), 0);
182 const std::vector<BoxChar *> &boxes = renderer_->GetBoxes();
183 EXPECT_EQ(1, boxes.size());
184 EXPECT_TRUE(boxes[0]->box() != nullptr);
185 EXPECT_STREQ(kArabicLigature, boxes[0]->ch().c_str());
186 DisplayClusterBoxes(pix);
187 pix.destroy();
188
189 renderer_ = std::make_unique<StringRenderer>("Arab 12", 600, 250);
190 const char kArabicMixedText[] = "والفكر والصراع 1234,\nوالفكر لا والصراع";
191 renderer_->RenderToImage(kArabicMixedText, strlen(kArabicMixedText), &pix);
192 DisplayClusterBoxes(pix);
193 pix.destroy();
194}

◆ TEST_F() [155/229]

tesseract::TEST_F ( StringRendererTest  ,
DoesRenderToImage   
)

Definition at line 82 of file stringrenderer_test.cc.

82 {
83 renderer_ = std::make_unique<StringRenderer>("Verdana 10", 600, 600);
84 Image pix = nullptr;
85 EXPECT_EQ(strlen(kEngText), renderer_->RenderToImage(kEngText, strlen(kEngText), &pix));
86 EXPECT_TRUE(pix != nullptr);
87 EXPECT_GT(renderer_->GetBoxes().size(), 0);
88 DisplayClusterBoxes(pix);
89 pix.destroy();
90
91 renderer_ = std::make_unique<StringRenderer>("UnBatang 10", 600, 600);
92 EXPECT_EQ(strlen(kKorText), renderer_->RenderToImage(kKorText, strlen(kKorText), &pix));
93 EXPECT_GT(renderer_->GetBoxes().size(), 0);
94 DisplayClusterBoxes(pix);
95 pix.destroy();
96
97 renderer_ = std::make_unique<StringRenderer>("Lohit Hindi 10", 600, 600);
98 EXPECT_EQ(strlen(kHinText), renderer_->RenderToImage(kHinText, strlen(kHinText), &pix));
99 EXPECT_GT(renderer_->GetBoxes().size(), 0);
100 DisplayClusterBoxes(pix);
101 pix.destroy();
102
103 // RTL text
104 renderer_ = std::make_unique<StringRenderer>("Arab 10", 600, 600);
105 EXPECT_EQ(strlen(kArabicText), renderer_->RenderToImage(kArabicText, strlen(kArabicText), &pix));
106 EXPECT_TRUE(pix != nullptr);
107 EXPECT_GT(renderer_->GetBoxes().size(), 0);
108 DisplayClusterBoxes(pix);
109 pix.destroy();
110
111 // Mixed direction Arabic + english text
112 renderer_ = std::make_unique<StringRenderer>("Arab 10", 600, 600);
113 EXPECT_EQ(strlen(kMixedText), renderer_->RenderToImage(kMixedText, strlen(kMixedText), &pix));
114 EXPECT_TRUE(pix != nullptr);
115 EXPECT_GT(renderer_->GetBoxes().size(), 0);
116 DisplayClusterBoxes(pix);
117 pix.destroy();
118}

◆ TEST_F() [156/229]

tesseract::TEST_F ( StringRendererTest  ,
DoesRenderToImageWithUnderline   
)

Definition at line 120 of file stringrenderer_test.cc.

120 {
121 renderer_ = std::make_unique<StringRenderer>("Verdana 10", 600, 600);
122 // Underline all words but NOT intervening spaces.
123 renderer_->set_underline_start_prob(1.0);
124 renderer_->set_underline_continuation_prob(0);
125 Image pix = nullptr;
126 EXPECT_EQ(strlen(kEngText), renderer_->RenderToImage(kEngText, strlen(kEngText), &pix));
127 EXPECT_TRUE(pix != nullptr);
128 EXPECT_GT(renderer_->GetBoxes().size(), 0);
129 DisplayClusterBoxes(pix);
130 pix.destroy();
131 renderer_->ClearBoxes();
132
133 // Underline all words AND intervening spaces.
134 renderer_->set_underline_start_prob(1.0);
135 renderer_->set_underline_continuation_prob(1.0);
136 EXPECT_EQ(strlen(kEngText), renderer_->RenderToImage(kEngText, strlen(kEngText), &pix));
137 EXPECT_TRUE(pix != nullptr);
138 EXPECT_GT(renderer_->GetBoxes().size(), 0);
139 DisplayClusterBoxes(pix);
140 pix.destroy();
141 renderer_->ClearBoxes();
142
143 // Underline words and intervening spaces with 0.5 prob.
144 renderer_->set_underline_start_prob(0.5);
145 renderer_->set_underline_continuation_prob(0.5);
146 EXPECT_EQ(strlen(kEngText), renderer_->RenderToImage(kEngText, strlen(kEngText), &pix));
147 EXPECT_TRUE(pix != nullptr);
148 EXPECT_GT(renderer_->GetBoxes().size(), 0);
149 DisplayClusterBoxes(pix);
150 pix.destroy();
151}

◆ TEST_F() [157/229]

tesseract::TEST_F ( StringRendererTest  ,
DoesRenderVerticalText   
)

Definition at line 262 of file stringrenderer_test.cc.

262 {
263 Image pix = nullptr;
264 renderer_ = std::make_unique<StringRenderer>("UnBatang 10", 600, 600);
265 renderer_->set_vertical_text(true);
266 EXPECT_EQ(strlen(kKorText), renderer_->RenderToImage(kKorText, strlen(kKorText), &pix));
267 EXPECT_GT(renderer_->GetBoxes().size(), 0);
268 DisplayClusterBoxes(pix);
269 pix.destroy();
270}

◆ TEST_F() [158/229]

tesseract::TEST_F ( StringRendererTest  ,
DoesRenderWordBoxes   
)

Definition at line 345 of file stringrenderer_test.cc.

345 {
346 renderer_ = std::make_unique<StringRenderer>("Verdana 10", 600, 600);
347 renderer_->set_output_word_boxes(true);
348 Image pix = nullptr;
349 EXPECT_EQ(strlen(kEngText), renderer_->RenderToImage(kEngText, strlen(kEngText), &pix));
350 pix.destroy();
351 // Verify #boxchars = #words + #spaces
352 std::vector<std::string> words = split(kEngText, ' ');
353 const int kNumSpaces = words.size() - 1;
354 const int kExpectedNumBoxes = words.size() + kNumSpaces;
355 const std::vector<BoxChar *> &boxchars = renderer_->GetBoxes();
356 EXPECT_EQ(kExpectedNumBoxes, boxchars.size());
357 // Verify content of words and spaces
358 for (size_t i = 0; i < boxchars.size(); i += 2) {
359 EXPECT_EQ(words[i / 2], boxchars[i]->ch());
360 if (i < boxchars.size() - 1) {
361 EXPECT_EQ(" ", boxchars[i + 1]->ch());
362 EXPECT_TRUE(boxchars[i + 1]->box() == nullptr);
363 }
364 }
365}

◆ TEST_F() [159/229]

tesseract::TEST_F ( StringRendererTest  ,
DoesRenderWordBoxesFromMultiLineText   
)

Definition at line 367 of file stringrenderer_test.cc.

367 {
368 renderer_ = std::make_unique<StringRenderer>("Verdana 10", 600, 600);
369 renderer_->set_output_word_boxes(true);
370 Image pix = nullptr;
371 const char kMultlineText[] = "the quick brown fox\njumps over the lazy dog";
372 EXPECT_EQ(strlen(kMultlineText), renderer_->RenderToImage(kMultlineText, strlen(kEngText), &pix));
373 pix.destroy();
374 // Verify #boxchars = #words + #spaces + #newlines
375 std::vector<std::string> words;
376 for (auto &line : split(kMultlineText, '\n')) {
377 for (auto &word : split(line, ' ')) {
378 words.push_back(word);
379 }
380 }
381 const int kNumSeparators = words.size() - 1;
382 const int kExpectedNumBoxes = words.size() + kNumSeparators;
383 const std::vector<BoxChar *> &boxchars = renderer_->GetBoxes();
384 EXPECT_EQ(kExpectedNumBoxes, boxchars.size());
385 // Verify content of words and spaces
386 for (size_t i = 0; i < boxchars.size(); i += 2) {
387 EXPECT_EQ(words[i / 2], boxchars[i]->ch());
388 if (i + 1 < boxchars.size()) {
389 EXPECT_EQ(" ", boxchars[i + 1]->ch());
390 EXPECT_TRUE(boxchars[i + 1]->box() == nullptr);
391 }
392 }
393}

◆ TEST_F() [160/229]

tesseract::TEST_F ( StringRendererTest  ,
DoesRetainInputLigatureForRendering   
)

Definition at line 324 of file stringrenderer_test.cc.

324 {
325 renderer_ = std::make_unique<StringRenderer>("Verdana 10", 600, 600);
326 Image pix = nullptr;
328 renderer_->RenderToImage(kEngLigatureText, strlen(kEngLigatureText), &pix));
329 pix.destroy();
330 // There should be one less box than letters due to the 'fi' ligature.
331 EXPECT_EQ(strlen(kEngNonLigatureText) - 1, renderer_->GetBoxes().size());
332 // The output box text should be ligatured.
333 EXPECT_STREQ("\uFB01", renderer_->GetBoxes()[0]->ch().c_str());
334}
const char kEngLigatureText[]

◆ TEST_F() [161/229]

tesseract::TEST_F ( StringRendererTest  ,
DoesStripUnrenderableWords   
)

Definition at line 336 of file stringrenderer_test.cc.

336 {
337 // Verdana should only be able to render the english letters and numbers in
338 // the mixed text.
339 renderer_ = std::make_unique<StringRenderer>("Verdana 10", 600, 600);
340 std::string text(kMixedText);
341 EXPECT_GT(renderer_->StripUnrenderableWords(&text), 0);
342 EXPECT_EQ(" 123 abc", text);
343}

◆ TEST_F() [162/229]

tesseract::TEST_F ( StructuredTableTest  ,
CountHorizontalIntersectionsAll   
)

Definition at line 260 of file tablerecog_test.cc.

260 {
261 table_->set_bounding_box(TBOX(0, 0, 1000, 1000));
262 InsertPartition(0, 3, 100, 10);
263 InsertPartition(110, 5, 200, 16);
264
265 EXPECT_EQ(0, table_->CountHorizontalIntersections(0));
266 EXPECT_EQ(1, table_->CountHorizontalIntersections(4));
267 EXPECT_EQ(2, table_->CountHorizontalIntersections(8));
268 EXPECT_EQ(1, table_->CountHorizontalIntersections(12));
269 EXPECT_EQ(0, table_->CountHorizontalIntersections(20));
270}

◆ TEST_F() [163/229]

tesseract::TEST_F ( StructuredTableTest  ,
CountVerticalIntersectionsAll   
)

Definition at line 245 of file tablerecog_test.cc.

245 {
246 table_->set_bounding_box(TBOX(0, 0, 1000, 1000));
247 InsertPartition(0, 0, 100, 10);
248 InsertPartition(1, 12, 43, 21);
249 EXPECT_EQ(2, table_->CountVerticalIntersections(4));
250 EXPECT_EQ(2, table_->CountVerticalIntersections(20));
251 EXPECT_EQ(2, table_->CountVerticalIntersections(40));
252 EXPECT_EQ(1, table_->CountVerticalIntersections(50));
253 EXPECT_EQ(1, table_->CountVerticalIntersections(60));
254 EXPECT_EQ(1, table_->CountVerticalIntersections(80));
255 EXPECT_EQ(1, table_->CountVerticalIntersections(95));
256 EXPECT_EQ(0, table_->CountVerticalIntersections(104));
257 EXPECT_EQ(0, table_->CountVerticalIntersections(150));
258}

◆ TEST_F() [164/229]

tesseract::TEST_F ( StructuredTableTest  ,
FindWhitespacedColumnsBasic   
)

Definition at line 313 of file tablerecog_test.cc.

313 {
314 InsertPartitions();
315 TBOX guess(0, 0, 500, 800);
316 table_->set_bounding_box(guess);
317 table_->FindWhitespacedColumns();
318 table_->ExpectCellX(1, 25, 25, 475, 499);
319}

◆ TEST_F() [165/229]

tesseract::TEST_F ( StructuredTableTest  ,
FindWhitespacedColumnsSorted   
)

Definition at line 321 of file tablerecog_test.cc.

321 {
322 InsertPartitions();
323 TBOX guess(0, 0, 500, 800);
324 table_->set_bounding_box(guess);
325 table_->FindWhitespacedColumns();
326 table_->ExpectSortedX();
327}

◆ TEST_F() [166/229]

tesseract::TEST_F ( StructuredTableTest  ,
VerifyLinedTableBasicPass   
)

Definition at line 272 of file tablerecog_test.cc.

272 {
273 for (int y = 10; y <= 50; y += 10) {
274 table_->InjectCellY(y);
275 }
276 for (int x = 100; x <= 450; x += 50) {
277 table_->InjectCellX(x);
278 }
279 InsertLines();
280 InsertCellsInLines();
281 table_->set_bounding_box(line_box_);
282 EXPECT_TRUE(table_->VerifyLinedTableCells());
283}

◆ TEST_F() [167/229]

tesseract::TEST_F ( StructuredTableTest  ,
VerifyLinedTableHorizontalFail   
)

Definition at line 285 of file tablerecog_test.cc.

285 {
286 for (int y = 10; y <= 50; y += 10) {
287 table_->InjectCellY(y);
288 }
289 for (int x = 100; x <= 450; x += 50) {
290 table_->InjectCellX(x);
291 }
292 InsertLines();
293 InsertCellsInLines();
294 InsertPartition(101, 11, 299, 19);
295 table_->set_bounding_box(line_box_);
296 EXPECT_FALSE(table_->VerifyLinedTableCells());
297}

◆ TEST_F() [168/229]

tesseract::TEST_F ( StructuredTableTest  ,
VerifyLinedTableVerticalFail   
)

Definition at line 299 of file tablerecog_test.cc.

299 {
300 for (int y = 10; y <= 50; y += 10) {
301 table_->InjectCellY(y);
302 }
303 for (int x = 100; x <= 450; x += 50) {
304 table_->InjectCellX(x);
305 }
306 InsertLines();
307 InsertCellsInLines();
308 InsertPartition(151, 21, 199, 39);
309 table_->set_bounding_box(line_box_);
310 EXPECT_FALSE(table_->VerifyLinedTableCells());
311}

◆ TEST_F() [169/229]

tesseract::TEST_F ( TableFinderTest  ,
GapInXProjectionEdgeGap   
)

Definition at line 141 of file tablefind_test.cc.

141 {
142 int data[100];
143 for (int i = 0; i < 10; ++i) {
144 data[i] = 2;
145 }
146 for (int i = 10; i < 90; ++i) {
147 data[i] = 10;
148 }
149 for (int i = 90; i < 100; ++i) {
150 data[i] = 2;
151 }
152 EXPECT_FALSE(finder_->GapInXProjection(data, 100));
153}

◆ TEST_F() [170/229]

tesseract::TEST_F ( TableFinderTest  ,
GapInXProjectionExists   
)

Definition at line 155 of file tablefind_test.cc.

155 {
156 int data[100];
157 for (int i = 0; i < 10; ++i) {
158 data[i] = 10;
159 }
160 for (int i = 10; i < 90; ++i) {
161 data[i] = 2;
162 }
163 for (int i = 90; i < 100; ++i) {
164 data[i] = 10;
165 }
166 EXPECT_TRUE(finder_->GapInXProjection(data, 100));
167}

◆ TEST_F() [171/229]

tesseract::TEST_F ( TableFinderTest  ,
GapInXProjectionNoGap   
)

Definition at line 133 of file tablefind_test.cc.

133 {
134 int data[100];
135 for (int &i : data) {
136 i = 10;
137 }
138 EXPECT_FALSE(finder_->GapInXProjection(data, 100));
139}

◆ TEST_F() [172/229]

tesseract::TEST_F ( TableFinderTest  ,
HasLeaderAdjacentNoOverlap   
)

Definition at line 181 of file tablefind_test.cc.

181 {
182 InsertLeaderPartition(90, 10, 150, 15);
183 MakePartition(0, 10, 85, 20);
184 EXPECT_TRUE(finder_->HasLeaderAdjacent(*partition_));
185 MakePartition(0, 25, 100, 40);
186 EXPECT_FALSE(finder_->HasLeaderAdjacent(*partition_));
187 MakePartition(0, 0, 100, 10);
188 EXPECT_FALSE(finder_->HasLeaderAdjacent(*partition_));
189 // TODO(nbeato): is this a useful metric? case fails
190 // MakePartition(160, 0, 200, 15); // leader is primarily above it
191 // EXPECT_FALSE(finder_->HasLeaderAdjacent(*partition_));
192}

◆ TEST_F() [173/229]

tesseract::TEST_F ( TableFinderTest  ,
HasLeaderAdjacentOverlapping   
)

Definition at line 169 of file tablefind_test.cc.

169 {
170 InsertLeaderPartition(90, 0, 150, 5);
171 MakePartition(0, 0, 100, 10);
172 EXPECT_TRUE(finder_->HasLeaderAdjacent(*partition_));
173 MakePartition(0, 25, 100, 40);
174 EXPECT_FALSE(finder_->HasLeaderAdjacent(*partition_));
175 MakePartition(145, 0, 200, 20);
176 EXPECT_TRUE(finder_->HasLeaderAdjacent(*partition_));
177 MakePartition(40, 0, 50, 4);
178 EXPECT_TRUE(finder_->HasLeaderAdjacent(*partition_));
179}

◆ TEST_F() [174/229]

tesseract::TEST_F ( TableFinderTest  ,
HasLeaderAdjacentPreservesColumns   
)

Definition at line 194 of file tablefind_test.cc.

194 {
195 InsertLeaderPartition(90, 0, 150, 5, 1, 2);
196 MakePartition(0, 0, 85, 10, 0, 0);
197 EXPECT_FALSE(finder_->HasLeaderAdjacent(*partition_));
198 MakePartition(0, 0, 100, 10, 0, 1);
199 EXPECT_TRUE(finder_->HasLeaderAdjacent(*partition_));
200 MakePartition(0, 0, 200, 10, 0, 5);
201 EXPECT_TRUE(finder_->HasLeaderAdjacent(*partition_));
202 MakePartition(155, 0, 200, 10, 5, 5);
203 EXPECT_FALSE(finder_->HasLeaderAdjacent(*partition_));
204}

◆ TEST_F() [175/229]

tesseract::TEST_F ( TableFinderTest  ,
SplitAndInsertFragmentedPartitionsBasicFail   
)

Definition at line 248 of file tablefind_test.cc.

248 {
249 finder_->set_global_median_blob_width(3);
250 finder_->set_global_median_xheight(10);
251
252 TBOX part_box(10, 5, 100, 15);
253 auto *all = new ColPartition(BRT_UNKNOWN, ICOORD(0, 1));
254 all->set_type(PT_FLOWING_TEXT);
255 all->set_blob_type(BRT_TEXT);
256 all->set_flow(BTFT_CHAIN);
257 all->set_left_margin(10);
258 all->set_right_margin(100);
259 TBOX blob_box = part_box;
260 for (int i = 10; i <= 95; i += 5) {
261 blob_box.set_left(i + 1);
262 blob_box.set_right(i + 4);
263 all->AddBox(new BLOBNBOX(C_BLOB::FakeBlob(blob_box)));
264 }
265 // TODO(nbeato): Ray's newer code...
266 // all->ClaimBoxes();
267 all->ComputeLimits(); // This is to make sure median iinfo is set.
268 InsertTextPartition(all); // This is to delete blobs
269 ColPartition *fragment_me = all->CopyButDontOwnBlobs();
270
271 finder_->SplitAndInsertFragmentedTextPartition(fragment_me);
272 finder_->ExpectPartition(TBOX(11, 5, 99, 15));
273 finder_->ExpectPartitionCount(1);
274}
void set_right(int x)
Definition: rect.h:92
void set_left(int x)
Definition: rect.h:85

◆ TEST_F() [176/229]

tesseract::TEST_F ( TableFinderTest  ,
SplitAndInsertFragmentedPartitionsBasicPass   
)

Definition at line 208 of file tablefind_test.cc.

208 {
209 finder_->set_global_median_blob_width(3);
210 finder_->set_global_median_xheight(10);
211
212 TBOX part_box(10, 5, 100, 15);
213 auto *all = new ColPartition(BRT_UNKNOWN, ICOORD(0, 1));
214 all->set_type(PT_FLOWING_TEXT);
215 all->set_blob_type(BRT_TEXT);
216 all->set_flow(BTFT_CHAIN);
217 all->set_left_margin(10);
218 all->set_right_margin(100);
219 TBOX blob_box = part_box;
220 for (int i = 10; i <= 20; i += 5) {
221 blob_box.set_left(i + 1);
222 blob_box.set_right(i + 4);
223 all->AddBox(new BLOBNBOX(C_BLOB::FakeBlob(blob_box)));
224 }
225 for (int i = 35; i <= 55; i += 5) {
226 blob_box.set_left(i + 1);
227 blob_box.set_right(i + 4);
228 all->AddBox(new BLOBNBOX(C_BLOB::FakeBlob(blob_box)));
229 }
230 for (int i = 80; i <= 95; i += 5) {
231 blob_box.set_left(i + 1);
232 blob_box.set_right(i + 4);
233 all->AddBox(new BLOBNBOX(C_BLOB::FakeBlob(blob_box)));
234 }
235 // TODO(nbeato): Ray's newer code...
236 // all->ClaimBoxes();
237 all->ComputeLimits(); // This is to make sure median iinfo is set.
238 InsertTextPartition(all); // This is to delete blobs
239 ColPartition *fragment_me = all->CopyButDontOwnBlobs();
240
241 finder_->SplitAndInsertFragmentedTextPartition(fragment_me);
242 finder_->ExpectPartition(TBOX(11, 5, 24, 15));
243 finder_->ExpectPartition(TBOX(36, 5, 59, 15));
244 finder_->ExpectPartition(TBOX(81, 5, 99, 15));
245 finder_->ExpectPartitionCount(3);
246}

◆ TEST_F() [177/229]

tesseract::TEST_F ( TableRecognizerTest  ,
FindLinesBoundingBoxBasic   
)

Definition at line 196 of file tablerecog_test.cc.

196 {
197 InsertLines();
198 TBOX box(0, 0, 200, 50);
199 bool result = recognizer_->FindLinesBoundingBox(&box);
200 EXPECT_TRUE(result);
201 EXPECT_EQ(line_box_.left(), box.left());
202 EXPECT_EQ(line_box_.right(), box.right());
203 EXPECT_EQ(line_box_.bottom(), box.bottom());
204 EXPECT_EQ(line_box_.top(), box.top());
205}

◆ TEST_F() [178/229]

tesseract::TEST_F ( TableRecognizerTest  ,
HasSignificantLinesBasicFail   
)

Definition at line 184 of file tablerecog_test.cc.

184 {
185 InsertLines();
186 TBOX box(370, 35, 500, 45);
187 EXPECT_FALSE(recognizer_->HasSignificantLines(box));
188}

◆ TEST_F() [179/229]

tesseract::TEST_F ( TableRecognizerTest  ,
HasSignificantLinesBasicPass   
)

Definition at line 175 of file tablerecog_test.cc.

175 {
176 InsertLines();
177 TBOX smaller_guess(120, 15, 370, 45);
178 TBOX larger_guess(90, 5, 490, 70);
179 EXPECT_TRUE(recognizer_->HasSignificantLines(line_box_));
180 EXPECT_TRUE(recognizer_->HasSignificantLines(larger_guess));
181 EXPECT_TRUE(recognizer_->HasSignificantLines(smaller_guess));
182}

◆ TEST_F() [180/229]

tesseract::TEST_F ( TableRecognizerTest  ,
HasSignificantLinesHorizontalOnlyFails   
)

Definition at line 190 of file tablerecog_test.cc.

190 {
191 InsertLines();
192 TBOX box(0, 100, 200, 200);
193 EXPECT_FALSE(recognizer_->HasSignificantLines(box));
194}

◆ TEST_F() [181/229]

tesseract::TEST_F ( TableRecognizerTest  ,
RecognizeLinedTableBasic   
)

Definition at line 207 of file tablerecog_test.cc.

207 {
208 InsertLines();
209 TBOX guess(120, 15, 370, 45);
211 table.set_text_grid(text_grid_.get());
212 table.set_line_grid(line_grid_.get());
213
214 EXPECT_TRUE(recognizer_->RecognizeLinedTable(guess, &table));
215 EXPECT_EQ(line_box_.bottom(), table.bounding_box().bottom());
216 EXPECT_EQ(line_box_.top(), table.bounding_box().top());
217 EXPECT_EQ(line_box_.left(), table.bounding_box().left());
218 EXPECT_EQ(line_box_.right(), table.bounding_box().right());
219 EXPECT_EQ(line_box_.area(), table.bounding_box().area());
220 EXPECT_EQ(7, table.column_count());
221 EXPECT_EQ(4, table.row_count());
222 EXPECT_EQ(28, table.cell_count());
223 EXPECT_TRUE(table.is_lined());
224}
int32_t area() const
Definition: rect.h:134
unsigned column_count() const
Definition: tablerecog.cpp:117
const TBOX & bounding_box() const
Definition: tablerecog.cpp:126
unsigned cell_count() const
Definition: tablerecog.cpp:120
void set_line_grid(ColPartitionGrid *lines)
Definition: tablerecog.cpp:105
void set_text_grid(ColPartitionGrid *text)
Definition: tablerecog.cpp:102
unsigned row_count() const
Definition: tablerecog.cpp:114

◆ TEST_F() [182/229]

tesseract::TEST_F ( TableRecognizerTest  ,
RecognizeWhitespacedTableBasic   
)

Definition at line 226 of file tablerecog_test.cc.

226 {
227 InsertPartitions();
228 TBOX guess(0, 0, 500, 800);
229
231 table.set_text_grid(text_grid_.get());
232 table.set_line_grid(line_grid_.get());
233 EXPECT_TRUE(recognizer_->RecognizeWhitespacedTable(guess, &table));
234 EXPECT_EQ(1, table.bounding_box().bottom());
235 EXPECT_EQ(799, table.bounding_box().top());
236 EXPECT_EQ(1, table.bounding_box().left());
237 EXPECT_EQ(499, table.bounding_box().right());
238 EXPECT_EQ(798 * 498, table.bounding_box().area());
239 EXPECT_EQ(500 / 25, table.column_count());
240 EXPECT_EQ(800 / 20, table.row_count());
241 EXPECT_EQ(500 * 800 / 20 / 25, table.cell_count());
242 EXPECT_FALSE(table.is_lined());
243}

◆ TEST_F() [183/229]

tesseract::TEST_F ( TabVectorTest  ,
SetStartEndPointsMatch   
)

Definition at line 38 of file tabvector_test.cc.

38 {
39 vector_ = std::make_unique<TabVector>();
40 ICOORD start(51, 65);
41 ICOORD end(7568, 234);
42 // Test coordinates individually to avoid adding an ostream operator
43 // explicitly to the ICOORD class (Droid doesn't support it).
44 vector_->set_startpt(start);
45 EXPECT_EQ(start.x(), vector_->startpt().x());
46 EXPECT_EQ(start.y(), vector_->startpt().y());
47 vector_->set_endpt(end);
48 EXPECT_EQ(end.x(), vector_->endpt().x());
49 EXPECT_EQ(end.y(), vector_->endpt().y());
50}

◆ TEST_F() [184/229]

tesseract::TEST_F ( TabVectorTest  ,
VOverlapInRangeSimple   
)

Definition at line 107 of file tabvector_test.cc.

107 {
108 MakeSimpleTabVector(0, 0, 100, 100);
109 int overlap = vector_->VOverlap(90, 10);
110 EXPECT_EQ(80, overlap);
111 overlap = vector_->VOverlap(100, 0);
112 EXPECT_EQ(100, overlap);
113}

◆ TEST_F() [185/229]

tesseract::TEST_F ( TabVectorTest  ,
VOverlapOutOfRange   
)

Definition at line 115 of file tabvector_test.cc.

115 {
116 MakeSimpleTabVector(0, 10, 100, 90);
117 int overlap = vector_->VOverlap(100, 0);
118 EXPECT_EQ(80, overlap);
119}

◆ TEST_F() [186/229]

tesseract::TEST_F ( TabVectorTest  ,
XAtY45DegreeSlopeInRangeExact   
)

Definition at line 52 of file tabvector_test.cc.

52 {
53 MakeSimpleTabVector(0, 0, 100, 100);
54 for (int y = 0; y <= 100; ++y) {
55 int x = vector_->XAtY(y);
56 EXPECT_EQ(y, x);
57 }
58}

◆ TEST_F() [187/229]

tesseract::TEST_F ( TabVectorTest  ,
XAtYHorizontal   
)

Definition at line 69 of file tabvector_test.cc.

69 {
70 const int y = 76; // arbitrary
71 MakeSimpleTabVector(0, y, 100, y);
72 EXPECT_EQ(0, vector_->XAtY(y));
73 // TODO(nbeato): What's the failure condition?
74 // Undefined! Should not pass! Allow until resolved answer.
75 EXPECT_EQ(0, vector_->XAtY(10));
76}

◆ TEST_F() [188/229]

tesseract::TEST_F ( TabVectorTest  ,
XAtYHorizontalInRangeExact   
)

Definition at line 99 of file tabvector_test.cc.

99 {
100 const int y = 120; // Arbitrary choice
101 MakeSimpleTabVector(50, y, 150, y);
102
103 int x = vector_->XAtY(y);
104 EXPECT_EQ(50, x);
105}

◆ TEST_F() [189/229]

tesseract::TEST_F ( TabVectorTest  ,
XAtYLargeNumbers   
)

Definition at line 90 of file tabvector_test.cc.

90 {
91 // Assume a document is 800 DPI,
92 // the width of a page is 10 inches across (8000 pixels), and
93 // the height of the page is 15 inches (12000 pixels).
94 MakeSimpleTabVector(7804, 504, 7968, 11768); // Arbitrary for vertical line
95 int x = vector_->XAtY(6136); // test mid point
96 EXPECT_EQ(7886, x);
97}

◆ TEST_F() [190/229]

tesseract::TEST_F ( TabVectorTest  ,
XAtYRoundingSimple   
)

Definition at line 78 of file tabvector_test.cc.

78 {
79 MakeSimpleTabVector(0, 0, 2, 10000);
80 int x = vector_->XAtY(1);
81 EXPECT_EQ(0, x);
82 x = vector_->XAtY(4999);
83 EXPECT_EQ(0, x);
84 x = vector_->XAtY(5001);
85 EXPECT_EQ(1, x);
86 x = vector_->XAtY(9999);
87 EXPECT_EQ(1, x);
88}

◆ TEST_F() [191/229]

tesseract::TEST_F ( TabVectorTest  ,
XAtYVerticalInRangeExact   
)

Definition at line 60 of file tabvector_test.cc.

60 {
61 const int x = 120; // Arbitrary choice
62 MakeSimpleTabVector(x, 0, x, 100);
63 for (int y = 0; y <= 100; ++y) {
64 int result_x = vector_->XAtY(y);
65 EXPECT_EQ(x, result_x);
66 }
67}

◆ TEST_F() [192/229]

tesseract::TEST_F ( TabVectorTest  ,
XYFlip   
)

Definition at line 121 of file tabvector_test.cc.

121 {
122 MakeSimpleTabVector(1, 2, 3, 4);
123 vector_->XYFlip();
124 EXPECT_EQ(2, vector_->startpt().x());
125 EXPECT_EQ(1, vector_->startpt().y());
126 EXPECT_EQ(4, vector_->endpt().x());
127 EXPECT_EQ(3, vector_->endpt().y());
128}

◆ TEST_F() [193/229]

tesseract::TEST_F ( TatweelTest  ,
DictIgnoresTatweel   
)

Definition at line 76 of file tatweel_test.cc.

76 {
77 // This test verifies that the dictionary ignores the Tatweel character.
78 tesseract::Trie trie(tesseract::DAWG_TYPE_WORD, "ara", SYSTEM_DAWG_PERM, unicharset_.size(), 0);
79 std::string filename = TestDataNameToPath("ara.wordlist");
80 if (!file_exists(filename.c_str())) {
81 LOG(INFO) << "Skip test because of missing " << filename;
82 GTEST_SKIP();
83 } else {
84 EXPECT_TRUE(trie.read_and_add_word_list(filename.c_str(), unicharset_,
86 EXPECT_EQ(0, trie.check_for_words(filename.c_str(), unicharset_, false));
87 }
88}
@ RRP_REVERSE_IF_HAS_RTL
Definition: trie.h:57

◆ TEST_F() [194/229]

tesseract::TEST_F ( TatweelTest  ,
UnicharsetIgnoresTatweel   
)

Definition at line 68 of file tatweel_test.cc.

68 {
69 // This test verifies that the unicharset ignores the Tatweel character.
70 for (int i = 0; i < unicharset_.size(); ++i) {
71 const char *utf8 = unicharset_.id_to_unichar(i);
72 EXPECT_EQ(strstr(utf8, reinterpret_cast<const char *>(u8"\u0640")), nullptr);
73 }
74}

◆ TEST_F() [195/229]

tesseract::TEST_F ( TatweelTest  ,
UnicharsetLoadKeepsTatweel   
)

Definition at line 90 of file tatweel_test.cc.

90 {
91 // This test verifies that a load of an existing unicharset keeps any
92 // existing tatweel for backwards compatibility.
93 std::string filename = TestDataNameToPath("ara.unicharset");
94 if (!file_exists(filename.c_str())) {
95 LOG(INFO) << "Skip test because of missing " << filename;
96 GTEST_SKIP();
97 } else {
98 EXPECT_TRUE(unicharset_.load_from_file(filename.c_str()));
99 int num_tatweel = 0;
100 for (int i = 0; i < unicharset_.size(); ++i) {
101 const char *utf8 = unicharset_.id_to_unichar(i);
102 if (strstr(utf8, reinterpret_cast<const char *>(u8"\u0640")) != nullptr) {
103 ++num_tatweel;
104 }
105 }
106 LOG(INFO) << "Num tatweels in unicharset=" << num_tatweel;
107 EXPECT_EQ(num_tatweel, 4);
108 }
109}

◆ TEST_F() [196/229]

tesseract::TEST_F ( TBOXTest  ,
OverlapBoolCorners   
)

Definition at line 37 of file rect_test.cc.

37 {
38 TBOX mid(10, 10, 30, 30);
39 TBOX bottom_left(5, 5, 15, 15);
40 TBOX top_left(5, 25, 15, 35);
41 // other corners covered by symmetry
42
43 EXPECT_TRUE(mid.overlap(bottom_left));
44 EXPECT_TRUE(bottom_left.overlap(mid));
45 EXPECT_TRUE(mid.overlap(top_left));
46 EXPECT_TRUE(top_left.overlap(mid));
47}

◆ TEST_F() [197/229]

tesseract::TEST_F ( TBOXTest  ,
OverlapBoolSides   
)

Definition at line 61 of file rect_test.cc.

61 {
62 TBOX mid(10, 10, 30, 30);
63 TBOX left(5, 15, 15, 25);
64 TBOX bottom(15, 5, 25, 15);
65 // other sides covered by symmetry
66
67 EXPECT_TRUE(mid.overlap(left));
68 EXPECT_TRUE(left.overlap(mid));
69 EXPECT_TRUE(mid.overlap(bottom));
70 EXPECT_TRUE(bottom.overlap(mid));
71}

◆ TEST_F() [198/229]

tesseract::TEST_F ( TBOXTest  ,
OverlapBoolSpan   
)

Definition at line 85 of file rect_test.cc.

85 {
86 TBOX mid(10, 10, 30, 30);
87 TBOX vertical(15, 5, 25, 35);
88 TBOX horizontal(5, 15, 35, 25);
89 // other sides covered by symmetry in other test cases
90
91 EXPECT_TRUE(mid.overlap(vertical));
92 EXPECT_TRUE(vertical.overlap(mid));
93 EXPECT_TRUE(mid.overlap(horizontal));
94 EXPECT_TRUE(horizontal.overlap(mid));
95}

◆ TEST_F() [199/229]

tesseract::TEST_F ( TBOXTest  ,
OverlapFractionCorners   
)

Definition at line 49 of file rect_test.cc.

49 {
50 TBOX mid(10, 10, 30, 30);
51 TBOX bottom_left(5, 5, 15, 15);
52 TBOX top_left(5, 25, 15, 35);
53 // other corners covered by symmetry
54
55 EXPECT_DOUBLE_EQ((5.0 * 5.0) / (20.0 * 20.0), mid.overlap_fraction(bottom_left));
56 EXPECT_DOUBLE_EQ((5.0 * 5.0) / (10.0 * 10.0), bottom_left.overlap_fraction(mid));
57 EXPECT_DOUBLE_EQ((5.0 * 5.0) / (20.0 * 20.0), mid.overlap_fraction(top_left));
58 EXPECT_DOUBLE_EQ((5.0 * 5.0) / (10.0 * 10.0), top_left.overlap_fraction(mid));
59}
#define EXPECT_DOUBLE_EQ(val1, val2)
Definition: gtest.h:2148

◆ TEST_F() [200/229]

tesseract::TEST_F ( TBOXTest  ,
OverlapFractionSides   
)

Definition at line 73 of file rect_test.cc.

73 {
74 TBOX mid(10, 10, 30, 30);
75 TBOX left(5, 15, 15, 25);
76 TBOX bottom(15, 5, 25, 15);
77 // other sides covered by symmetry
78
79 EXPECT_DOUBLE_EQ((5.0 * 10.0) / (20.0 * 20.0), mid.overlap_fraction(left));
80 EXPECT_DOUBLE_EQ((5.0 * 10.0) / (10.0 * 10.0), left.overlap_fraction(mid));
81 EXPECT_DOUBLE_EQ((5.0 * 10.0) / (20.0 * 20.0), mid.overlap_fraction(bottom));
82 EXPECT_DOUBLE_EQ((5.0 * 10.0) / (10.0 * 10.0), bottom.overlap_fraction(mid));
83}

◆ TEST_F() [201/229]

tesseract::TEST_F ( TBOXTest  ,
OverlapFractionSpan   
)

Definition at line 97 of file rect_test.cc.

97 {
98 TBOX mid(10, 10, 30, 30);
99 TBOX vertical(15, 5, 25, 35);
100 TBOX horizontal(5, 15, 35, 25);
101 // other sides covered by symmetry in other test cases
102
103 EXPECT_DOUBLE_EQ((10.0 * 20.0) / (20.0 * 20.0), mid.overlap_fraction(vertical));
104 EXPECT_DOUBLE_EQ((10.0 * 20.0) / (10.0 * 30.0), vertical.overlap_fraction(mid));
105 EXPECT_DOUBLE_EQ((20.0 * 10.0) / (20.0 * 20.0), mid.overlap_fraction(horizontal));
106 EXPECT_DOUBLE_EQ((20.0 * 10.0) / (30.0 * 10.0), horizontal.overlap_fraction(mid));
107}

◆ TEST_F() [202/229]

tesseract::TEST_F ( TBOXTest  ,
OverlapInside   
)

Definition at line 27 of file rect_test.cc.

27 {
28 TBOX a(10, 10, 20, 20);
29 TBOX b(11, 11, 12, 12);
30
31 EXPECT_TRUE(a.overlap(b));
32 EXPECT_TRUE(b.overlap(a));
33 EXPECT_DOUBLE_EQ(0.01, a.overlap_fraction(b));
34 EXPECT_DOUBLE_EQ(1.0, b.overlap_fraction(a));
35}

◆ TEST_F() [203/229]

tesseract::TEST_F ( TBOXTest  ,
OverlapOutsideTests   
)

Definition at line 110 of file rect_test.cc.

110 {
111 TBOX mid(10, 10, 30, 30);
112 TBOX left(0, 15, 5, 25);
113
114 EXPECT_FALSE(mid.overlap(left));
115 EXPECT_FALSE(left.overlap(mid));
116 EXPECT_DOUBLE_EQ(0.0, mid.overlap_fraction(left));
117 EXPECT_DOUBLE_EQ(0.0, left.overlap_fraction(mid));
118}

◆ TEST_F() [204/229]

tesseract::TEST_F ( TBOXTest  ,
OverlapXFraction   
)

Definition at line 120 of file rect_test.cc.

120 {
121 TBOX a(10, 10, 20, 20);
122 TBOX b(12, 100, 26, 200);
123 TBOX c(0, 0, 100, 100);
124 TBOX d(0, 0, 1, 1);
125
126 EXPECT_DOUBLE_EQ(8.0 / 10.0, a.x_overlap_fraction(b));
127 EXPECT_DOUBLE_EQ(8.0 / 14.0, b.x_overlap_fraction(a));
128 EXPECT_DOUBLE_EQ(1.0, a.x_overlap_fraction(c));
129 EXPECT_DOUBLE_EQ(10.0 / 100.0, c.x_overlap_fraction(a));
130 EXPECT_DOUBLE_EQ(0.0, a.x_overlap_fraction(d));
131 EXPECT_DOUBLE_EQ(0.0, d.x_overlap_fraction(a));
132}

◆ TEST_F() [205/229]

tesseract::TEST_F ( TBOXTest  ,
OverlapXFractionZeroSize   
)

Definition at line 148 of file rect_test.cc.

148 {
149 TBOX zero(10, 10, 10, 10);
150 TBOX big(0, 0, 100, 100);
151 TBOX small(0, 0, 1, 1);
152
153 EXPECT_DOUBLE_EQ(1.0, zero.x_overlap_fraction(big));
154 EXPECT_DOUBLE_EQ(0.0, big.x_overlap_fraction(zero));
155 EXPECT_DOUBLE_EQ(0.0, zero.x_overlap_fraction(small));
156 EXPECT_DOUBLE_EQ(0.0, small.x_overlap_fraction(zero));
157}

◆ TEST_F() [206/229]

tesseract::TEST_F ( TBOXTest  ,
OverlapYFraction   
)

Definition at line 134 of file rect_test.cc.

134 {
135 TBOX a(10, 10, 20, 20);
136 TBOX b(100, 12, 200, 26);
137 TBOX c(0, 0, 100, 100);
138 TBOX d(0, 0, 1, 1);
139
140 EXPECT_DOUBLE_EQ(8.0 / 10.0, a.y_overlap_fraction(b));
141 EXPECT_DOUBLE_EQ(8.0 / 14.0, b.y_overlap_fraction(a));
142 EXPECT_DOUBLE_EQ(1.0, a.y_overlap_fraction(c));
143 EXPECT_DOUBLE_EQ(10.0 / 100.0, c.y_overlap_fraction(a));
144 EXPECT_DOUBLE_EQ(0.0, a.y_overlap_fraction(d));
145 EXPECT_DOUBLE_EQ(0.0, d.y_overlap_fraction(a));
146}

◆ TEST_F() [207/229]

tesseract::TEST_F ( TBOXTest  ,
OverlapYFractionZeroSize   
)

Definition at line 159 of file rect_test.cc.

159 {
160 TBOX zero(10, 10, 10, 10);
161 TBOX big(0, 0, 100, 100);
162 TBOX small(0, 0, 1, 1);
163
164 EXPECT_DOUBLE_EQ(1.0, zero.y_overlap_fraction(big));
165 EXPECT_DOUBLE_EQ(0.0, big.y_overlap_fraction(zero));
166 EXPECT_DOUBLE_EQ(0.0, zero.y_overlap_fraction(small));
167 EXPECT_DOUBLE_EQ(0.0, small.y_overlap_fraction(zero));
168}

◆ TEST_F() [208/229]

tesseract::TEST_F ( TesseractTest  ,
AdaptToWordStrTest   
)

Definition at line 163 of file baseapi_test.cc.

163 {
164#ifdef DISABLED_LEGACY_ENGINE
165 // Skip test because TessBaseAPI::AdaptToWordStr is missing.
166 GTEST_SKIP();
167#else
168 static const char *kTrainingPages[] = {"136.tif", "256.tif", "410.tif", "432.tif", "540.tif",
169 "692.tif", "779.tif", "793.tif", "808.tif", "815.tif",
170 "12.tif", "12.tif", nullptr};
171 static const char *kTrainingText[] = {"1 3 6", "2 5 6", "4 1 0", "4 3 2", "5 4 0",
172 "6 9 2", "7 7 9", "7 9 3", "8 0 8", "8 1 5",
173 "1 2", "1 2", nullptr};
174 static const char *kTestPages[] = {"324.tif", "433.tif", "12.tif", nullptr};
175 static const char *kTestText[] = {"324", "433", "12", nullptr};
177 std::string truth_text;
178 std::string ocr_text;
179 if (api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_TESSERACT_ONLY) == -1) {
180 // eng.traineddata not found.
181 GTEST_SKIP();
182 return;
183 }
184 api.SetVariable("matcher_sufficient_examples_for_prototyping", "1");
185 api.SetVariable("classify_class_pruner_threshold", "220");
186 // Train on the training text.
187 for (int i = 0; kTrainingPages[i] != nullptr; ++i) {
188 std::string image_file = TestDataNameToPath(kTrainingPages[i]);
189 Image src_pix = pixRead(image_file.c_str());
190 CHECK(src_pix);
191 api.SetImage(src_pix);
193 << "Failed to adapt to text \"" << kTrainingText[i] << "\" on image " << image_file;
194 src_pix.destroy();
195 }
196 // Test the test text.
197 api.SetVariable("tess_bn_matching", "1");
199 for (int i = 0; kTestPages[i] != nullptr; ++i) {
200 Image src_pix = pixRead(TestDataNameToPath(kTestPages[i]).c_str());
201 CHECK(src_pix);
202 ocr_text = GetCleanedTextResult(&api, src_pix);
203 trim(truth_text);
204 EXPECT_STREQ(kTestText[i], ocr_text.c_str());
205 src_pix.destroy();
206 }
207#endif
208}
@ OEM_TESSERACT_ONLY
Definition: publictypes.h:264
void SetPageSegMode(PageSegMode mode)
Definition: baseapi.cpp:511
bool AdaptToWordStr(PageSegMode mode, const char *wordstr)
Definition: baseapi.cpp:1835

◆ TEST_F() [209/229]

tesseract::TEST_F ( TesseractTest  ,
BasicLSTMTest   
)

Definition at line 211 of file baseapi_test.cc.

211 {
213 std::string truth_text;
214 std::string ocr_text;
215 if (api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_LSTM_ONLY) == -1) {
216 // eng.traineddata not found.
217 GTEST_SKIP();
218 return;
219 }
220 Image src_pix = pixRead(TestDataNameToPath("phototest_2.tif").c_str());
221 CHECK(src_pix);
222 ocr_text = GetCleanedTextResult(&api, src_pix);
223 CHECK_OK(
224 file::GetContents(TestDataNameToPath("phototest.gold.txt"), &truth_text, file::Defaults()));
225 trim(truth_text);
226 EXPECT_STREQ(truth_text.c_str(), ocr_text.c_str());
227 src_pix.destroy();
228}

◆ TEST_F() [210/229]

tesseract::TEST_F ( TesseractTest  ,
BasicTesseractTest   
)

Definition at line 72 of file baseapi_test.cc.

72 {
74 std::string truth_text;
75 std::string ocr_text;
76 if (api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_TESSERACT_ONLY) != -1) {
77 Image src_pix = pixRead(TestDataNameToPath("phototest.tif").c_str());
78 CHECK(src_pix);
79 ocr_text = GetCleanedTextResult(&api, src_pix);
81 file::GetContents(TestDataNameToPath("phototest.gold.txt"), &truth_text, file::Defaults()));
82 trim(truth_text);
83 EXPECT_STREQ(truth_text.c_str(), ocr_text.c_str());
84 src_pix.destroy();
85 } else {
86 // eng.traineddata not found.
87 GTEST_SKIP();
88 }
89}

◆ TEST_F() [211/229]

tesseract::TEST_F ( TesseractTest  ,
HOCRContainsBaseline   
)

Definition at line 141 of file baseapi_test.cc.

141 {
143 if (api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_TESSERACT_ONLY) == -1) {
144 // eng.traineddata not found.
145 GTEST_SKIP();
146 return;
147 }
148 Image src_pix = pixRead(TestDataNameToPath("HelloGoogle.tif").c_str());
149 CHECK(src_pix);
150 api.SetInputName("HelloGoogle.tif");
151 api.SetImage(src_pix);
152 char *result = api.GetHOCRText(0);
153 EXPECT_TRUE(result != nullptr);
154 EXPECT_THAT(result, HasSubstr("Hello"));
155 EXPECT_TRUE(std::regex_search(
156 result, std::regex{"<span class='ocr_line'[^>]* baseline [-.0-9]+ [-.0-9]+"}));
157
158 delete[] result;
159 src_pix.destroy();
160}
char * GetHOCRText(ETEXT_DESC *monitor, int page_number)
void SetInputName(const char *name)
Definition: baseapi.cpp:270

◆ TEST_F() [212/229]

tesseract::TEST_F ( TesseractTest  ,
HOCRWorksWithoutSetInputName   
)

Definition at line 122 of file baseapi_test.cc.

122 {
124 if (api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_TESSERACT_ONLY) == -1) {
125 // eng.traineddata not found.
126 GTEST_SKIP();
127 return;
128 }
129 Image src_pix = pixRead(TestDataNameToPath("HelloGoogle.tif").c_str());
130 CHECK(src_pix);
131 api.SetImage(src_pix);
132 char *result = api.GetHOCRText(0);
133 EXPECT_TRUE(result != nullptr);
134 EXPECT_THAT(result, HasSubstr("Hello"));
135 EXPECT_THAT(result, HasSubstr("<div class='ocr_page'"));
136 delete[] result;
137 src_pix.destroy();
138}

◆ TEST_F() [213/229]

tesseract::TEST_F ( TesseractTest  ,
InitConfigOnlyTest   
)

Definition at line 280 of file baseapi_test.cc.

280 {
281 // Languages for testing initialization.
282 const char *langs[] = {"eng", "chi_tra", "jpn", "vie"};
283 std::unique_ptr<tesseract::TessBaseAPI> api;
284 CycleTimer timer;
285 for (auto &lang : langs) {
286 api = std::make_unique<tesseract::TessBaseAPI>();
287 timer.Restart();
288 EXPECT_EQ(0, api->Init(TessdataPath().c_str(), lang, tesseract::OEM_TESSERACT_ONLY));
289 timer.Stop();
290 LOG(INFO) << "Lang " << lang << " took " << timer.GetInMs() << "ms in regular init";
291 }
292 // Init variables to set for config-only initialization.
293 std::vector<std::string> vars_vec, vars_values;
294 vars_vec.emplace_back("tessedit_init_config_only");
295 vars_values.emplace_back("1");
296 LOG(INFO) << "Switching to config only initialization:";
297 for (auto &lang : langs) {
298 api = std::make_unique<tesseract::TessBaseAPI>();
299 timer.Restart();
300 EXPECT_EQ(0, api->Init(TessdataPath().c_str(), lang, tesseract::OEM_TESSERACT_ONLY, nullptr, 0,
301 &vars_vec, &vars_values, false));
302 timer.Stop();
303 LOG(INFO) << "Lang " << lang << " took " << timer.GetInMs() << "ms in config-only init";
304 }
305}
void Stop()
Definition: cycletimer.h:48
void Restart()
Definition: cycletimer.h:43
int64_t GetInMs() const
Definition: cycletimer.h:54

◆ TEST_F() [214/229]

tesseract::TEST_F ( TesseractTest  ,
IteratesParagraphsEvenIfNotDetected   
)

Definition at line 93 of file baseapi_test.cc.

93 {
95 if (api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_TESSERACT_ONLY) != -1) {
97 api.SetVariable("paragraph_debug_level", "3");
98#if 0 // TODO: b622.png is missing
99 Pix* src_pix = pixRead(TestDataNameToPath("b622.png").c_str());
100 CHECK(src_pix);
101 api.SetImage(src_pix);
102 Boxa* para_boxes =
103 api.GetComponentImages(tesseract::RIL_PARA, true, nullptr, nullptr);
104 EXPECT_TRUE(para_boxes != nullptr);
105 Boxa* block_boxes =
106 api.GetComponentImages(tesseract::RIL_BLOCK, true, nullptr, nullptr);
107 EXPECT_TRUE(block_boxes != nullptr);
108 // TODO(eger): Get paragraphs out of this page pre-text.
109 EXPECT_GE(boxaGetCount(para_boxes), boxaGetCount(block_boxes));
110 boxaDestroy(&block_boxes);
111 boxaDestroy(&para_boxes);
112 src_pix.destroy();
113#endif
114 } else {
115 // eng.traineddata not found.
116 GTEST_SKIP();
117 }
118}
Boxa * GetComponentImages(PageIteratorLevel level, bool text_only, bool raw_image, int raw_padding, Pixa **pixa, int **blockids, int **paraids)
Definition: baseapi.cpp:702

◆ TEST_F() [215/229]

tesseract::TEST_F ( TesseractTest  ,
LSTMGeometryTest   
)

Definition at line 236 of file baseapi_test.cc.

236 {
237 Image src_pix = pixRead(TestDataNameToPath("deslant.tif").c_str());
239 if (api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_LSTM_ONLY) == -1) {
240 // eng.traineddata not found.
241 GTEST_SKIP();
242 return;
243 }
244 api.SetImage(src_pix);
245 ASSERT_EQ(api.Recognize(nullptr), 0);
246
247 const PAGE_RES *page_res = api.GetPageRes();
248 PAGE_RES_IT page_res_it(const_cast<PAGE_RES *>(page_res));
249 page_res_it.restart_page();
250 BLOCK *block = page_res_it.block()->block;
251 CHECK(block);
252
253 // extract word and character boxes for each word
254 for (page_res_it.restart_page(); page_res_it.word() != nullptr; page_res_it.forward()) {
255 WERD_RES *word = page_res_it.word();
256 CHECK(word);
257 CHECK(word->best_choice);
258 CHECK_GT(word->best_choice->length(), 0);
259 CHECK(word->word);
260 CHECK(word->box_word);
261 // tesseract's word box
262 TBOX tess_blob_box;
263 tess_blob_box = word->word->bounding_box();
264 tess_blob_box.rotate(block->re_rotation());
265 // verify that each of LSTM's character boxes lies close to within
266 // tesseract's word box
267 for (int i = 0; i < word->box_word->length(); ++i) {
268 TBOX lstm_blob_box = word->box_word->BlobBox(i);
269 // LSTM character box should not spill out of tesseract word box
270 // by more than a few pixels in any direction
271 EXPECT_LT(tess_blob_box.left() - lstm_blob_box.left(), 5);
272 EXPECT_LT(lstm_blob_box.right() - tess_blob_box.right(), 5);
273 EXPECT_LT(tess_blob_box.bottom() - lstm_blob_box.bottom(), 5);
274 EXPECT_LT(lstm_blob_box.top() - tess_blob_box.top(), 5);
275 }
276 }
277 src_pix.destroy();
278}
#define CHECK_GT(test, value)
Definition: include_gunit.h:81
int Recognize(ETEXT_DESC *monitor)
Definition: baseapi.cpp:834
const PAGE_RES * GetPageRes() const
Definition: baseapi.h:760

◆ TEST_F() [216/229]

tesseract::TEST_F ( TesseractTest  ,
StaticTessBaseAPI   
)

Definition at line 66 of file baseapi_test.cc.

66 {
67 static tesseract::TessBaseAPI api;
68 api.End();
69}

◆ TEST_F() [217/229]

tesseract::TEST_F ( TextlineProjectionTest  ,
Rotated   
)

Definition at line 250 of file textlineprojection_test.cc.

250 {
251 VerifyBoxes("phototestrot.tif", 31);
252}

◆ TEST_F() [218/229]

tesseract::TEST_F ( TextlineProjectionTest  ,
Unrotated   
)

Definition at line 245 of file textlineprojection_test.cc.

245 {
246 VerifyBoxes("phototest.tif", 31);
247}

◆ TEST_F() [219/229]

tesseract::TEST_F ( TfileTest  ,
BigEndian   
)

Definition at line 196 of file tfile_test.cc.

196 {
197 // This test verifies that Tfile can auto-reverse big-endian data.
198 MathData m1;
199 m1.Setup();
200 std::vector<char> data;
201 TFile fpw;
202 fpw.OpenWrite(&data);
203 EXPECT_TRUE(m1.SerializeBigEndian(&fpw));
204 TFile fpr;
205 EXPECT_TRUE(fpr.Open(&data[0], data.size()));
206 fpr.set_swap(true);
207 MathData m2;
208 EXPECT_TRUE(m2.DeSerializeBigEndian(&fpr));
209 // That serialize was destructive, so test against a fresh MathData.
210 MathData m3;
211 m3.Setup();
212 m3.ExpectEq(m2);
213}
void set_swap(bool value)
Definition: serialis.h:75

◆ TEST_F() [220/229]

tesseract::TEST_F ( TfileTest  ,
FGets   
)

Definition at line 170 of file tfile_test.cc.

170 {
171 // This test verifies that Tfile can interleave FGets with binary data.
172 MathData m1;
173 std::string line_str = "This is a textline with a newline\n";
174 m1.Setup();
175 std::vector<char> data;
176 TFile fpw;
177 fpw.OpenWrite(&data);
178 EXPECT_TRUE(m1.Serialize(&fpw));
179 EXPECT_EQ(1, fpw.FWrite(line_str.data(), line_str.size(), 1));
180 EXPECT_TRUE(m1.Serialize(&fpw));
181 // Now get back the 2 copies of m1 with the line in between.
182 TFile fpr;
183 EXPECT_TRUE(fpr.Open(&data[0], data.size()));
184 MathData m2;
185 EXPECT_TRUE(m2.DeSerialize(&fpr));
186 m1.ExpectEq(m2);
187 const int kBufsize = 1024;
188 char buffer[kBufsize + 1];
189 EXPECT_EQ(buffer, fpr.FGets(buffer, kBufsize));
190 EXPECT_STREQ(line_str.c_str(), buffer);
191 MathData m3;
192 EXPECT_TRUE(m3.DeSerialize(&fpr));
193 m1.ExpectEq(m3);
194}
size_t FWrite(const void *buffer, size_t size, size_t count)
Definition: serialis.cpp:272

◆ TEST_F() [221/229]

tesseract::TEST_F ( TfileTest  ,
Serialize   
)

Definition at line 150 of file tfile_test.cc.

150 {
151 // This test verifies that Tfile can serialize a class.
152 MathData m1;
153 m1.Setup();
154 std::vector<char> data;
155 TFile fpw;
156 fpw.OpenWrite(&data);
157 EXPECT_TRUE(m1.Serialize(&fpw));
158 TFile fpr;
159 EXPECT_TRUE(fpr.Open(&data[0], data.size()));
160 MathData m2;
161 EXPECT_TRUE(m2.DeSerialize(&fpr));
162 m1.ExpectEq(m2);
163 MathData m3;
164 EXPECT_FALSE(m3.DeSerialize(&fpr));
165 fpr.Rewind();
166 EXPECT_TRUE(m3.DeSerialize(&fpr));
167 m1.ExpectEq(m3);
168}

◆ TEST_F() [222/229]

tesseract::TEST_F ( UnicharcompressTest  ,
DoesChinese   
)

Definition at line 165 of file unicharcompress_test.cc.

165 {
166 LOG(INFO) << "Testing chi_tra";
167 LoadUnicharset("chi_tra.unicharset");
168 ExpectCorrect("chi_tra");
169 LOG(INFO) << "Testing chi_sim";
170 LoadUnicharset("chi_sim.unicharset");
171 ExpectCorrect("chi_sim");
172}

◆ TEST_F() [223/229]

tesseract::TEST_F ( UnicharcompressTest  ,
DoesEnglish   
)

Definition at line 200 of file unicharcompress_test.cc.

200 {
201 LOG(INFO) << "Testing eng";
202 LoadUnicharset("eng.unicharset");
203 ExpectCorrect("eng");
204}

◆ TEST_F() [224/229]

tesseract::TEST_F ( UnicharcompressTest  ,
DoesJapanese   
)

Definition at line 174 of file unicharcompress_test.cc.

174 {
175 LOG(INFO) << "Testing jpn";
176 LoadUnicharset("jpn.unicharset");
177 ExpectCorrect("jpn");
178}

◆ TEST_F() [225/229]

tesseract::TEST_F ( UnicharcompressTest  ,
DoesKannada   
)

Definition at line 186 of file unicharcompress_test.cc.

186 {
187 LOG(INFO) << "Testing kan";
188 LoadUnicharset("kan.unicharset");
189 ExpectCorrect("kan");
190 SerializeAndUndo();
191 ExpectCorrect("kan");
192}

◆ TEST_F() [226/229]

tesseract::TEST_F ( UnicharcompressTest  ,
DoesKorean   
)

Definition at line 180 of file unicharcompress_test.cc.

180 {
181 LOG(INFO) << "Testing kor";
182 LoadUnicharset("kor.unicharset");
183 ExpectCorrect("kor");
184}

◆ TEST_F() [227/229]

tesseract::TEST_F ( UnicharcompressTest  ,
DoesLigaturesWithDoubles   
)

Definition at line 208 of file unicharcompress_test.cc.

208 {
209 LOG(INFO) << "Testing por with ligatures";
210 LoadUnicharset("por.unicharset");
211 ExpectCorrect("por");
212 // Check that any unichar-id that is encoded with multiple codes has the
213 // correct encoded_nulll_char_ in between.
214 for (int u = 0; u <= unicharset_.size(); ++u) {
215 RecodedCharID code;
216 int len = compressed_.EncodeUnichar(u, &code);
217 if (len > 1) {
218 // The should not be any null char in the code.
219 for (int i = 0; i < len; ++i) {
220 EXPECT_NE(encoded_null_char_, code(i));
221 }
222 }
223 }
224}

◆ TEST_F() [228/229]

tesseract::TEST_F ( UnicharcompressTest  ,
DoesMarathi   
)

Definition at line 194 of file unicharcompress_test.cc.

194 {
195 LOG(INFO) << "Testing mar";
196 LoadUnicharset("mar.unicharset");
197 ExpectCorrect("mar");
198}

◆ TEST_F() [229/229]

tesseract::TEST_F ( UnicharcompressTest  ,
GetEncodingAsString   
)

Definition at line 228 of file unicharcompress_test.cc.

228 {
229 LoadUnicharset("trivial.unicharset");
230 ExpectCorrect("trivial");
231 std::string encoding = compressed_.GetEncodingAsString(unicharset_);
232 std::string encoding_str(&encoding[0], encoding.length());
233 std::vector<std::string> lines = split(encoding_str, '\n');
234 EXPECT_EQ(5, lines.size());
235 // The first line is always space.
236 EXPECT_EQ("0\t ", lines[0]);
237 // Next we have i.
238 EXPECT_EQ("1\ti", lines[1]);
239 // Next we have f.
240 EXPECT_EQ("2\tf", lines[2]);
241 // Next we have the fi ligature: fi. There are no nulls in it, as there are no
242 // repeated letter ligatures in this unicharset, unlike por.unicharset above.
243 EXPECT_EQ("2,1\tfi", lines[3]);
244 // Finally the null character.
245 EXPECT_EQ("3\t<nul>", lines[4]);
246}

◆ TEST_P() [1/165]

tesseract::TEST_P ( LoadLanguage  ,
afr   
)

Definition at line 49 of file loadlang_test.cc.

49 {
50 LangLoader("afr", GetParam());
51}

◆ TEST_P() [2/165]

tesseract::TEST_P ( LoadLanguage  ,
amh   
)

Definition at line 52 of file loadlang_test.cc.

52 {
53 LangLoader("amh", GetParam());
54}

◆ TEST_P() [3/165]

tesseract::TEST_P ( LoadLanguage  ,
ara   
)

Definition at line 55 of file loadlang_test.cc.

55 {
56 LangLoader("ara", GetParam());
57}

◆ TEST_P() [4/165]

tesseract::TEST_P ( LoadLanguage  ,
asm   
)

Definition at line 58 of file loadlang_test.cc.

58 {
59 LangLoader("asm", GetParam());
60}

◆ TEST_P() [5/165]

tesseract::TEST_P ( LoadLanguage  ,
aze   
)

Definition at line 61 of file loadlang_test.cc.

61 {
62 LangLoader("aze", GetParam());
63}

◆ TEST_P() [6/165]

tesseract::TEST_P ( LoadLanguage  ,
aze_cyrl   
)

Definition at line 64 of file loadlang_test.cc.

64 {
65 LangLoader("aze_cyrl", GetParam());
66}

◆ TEST_P() [7/165]

tesseract::TEST_P ( LoadLanguage  ,
bel   
)

Definition at line 67 of file loadlang_test.cc.

67 {
68 LangLoader("bel", GetParam());
69}

◆ TEST_P() [8/165]

tesseract::TEST_P ( LoadLanguage  ,
ben   
)

Definition at line 70 of file loadlang_test.cc.

70 {
71 LangLoader("ben", GetParam());
72}

◆ TEST_P() [9/165]

tesseract::TEST_P ( LoadLanguage  ,
bod   
)

Definition at line 73 of file loadlang_test.cc.

73 {
74 LangLoader("bod", GetParam());
75}

◆ TEST_P() [10/165]

tesseract::TEST_P ( LoadLanguage  ,
bos   
)

Definition at line 76 of file loadlang_test.cc.

76 {
77 LangLoader("bos", GetParam());
78}

◆ TEST_P() [11/165]

tesseract::TEST_P ( LoadLanguage  ,
bre   
)

Definition at line 79 of file loadlang_test.cc.

79 {
80 LangLoader("bre", GetParam());
81}

◆ TEST_P() [12/165]

tesseract::TEST_P ( LoadLanguage  ,
bul   
)

Definition at line 82 of file loadlang_test.cc.

82 {
83 LangLoader("bul", GetParam());
84}

◆ TEST_P() [13/165]

tesseract::TEST_P ( LoadLanguage  ,
cat   
)

Definition at line 85 of file loadlang_test.cc.

85 {
86 LangLoader("cat", GetParam());
87}

◆ TEST_P() [14/165]

tesseract::TEST_P ( LoadLanguage  ,
ceb   
)

Definition at line 88 of file loadlang_test.cc.

88 {
89 LangLoader("ceb", GetParam());
90}

◆ TEST_P() [15/165]

tesseract::TEST_P ( LoadLanguage  ,
ces   
)

Definition at line 91 of file loadlang_test.cc.

91 {
92 LangLoader("ces", GetParam());
93}

◆ TEST_P() [16/165]

tesseract::TEST_P ( LoadLanguage  ,
chi_sim   
)

Definition at line 94 of file loadlang_test.cc.

94 {
95 LangLoader("chi_sim", GetParam());
96}

◆ TEST_P() [17/165]

tesseract::TEST_P ( LoadLanguage  ,
chi_sim_vert   
)

Definition at line 97 of file loadlang_test.cc.

97 {
98 LangLoader("chi_sim_vert", GetParam());
99}

◆ TEST_P() [18/165]

tesseract::TEST_P ( LoadLanguage  ,
chi_tra   
)

Definition at line 100 of file loadlang_test.cc.

100 {
101 LangLoader("chi_tra", GetParam());
102}

◆ TEST_P() [19/165]

tesseract::TEST_P ( LoadLanguage  ,
chi_tra_vert   
)

Definition at line 103 of file loadlang_test.cc.

103 {
104 LangLoader("chi_tra_vert", GetParam());
105}

◆ TEST_P() [20/165]

tesseract::TEST_P ( LoadLanguage  ,
chr   
)

Definition at line 106 of file loadlang_test.cc.

106 {
107 LangLoader("chr", GetParam());
108}

◆ TEST_P() [21/165]

tesseract::TEST_P ( LoadLanguage  ,
cos   
)

Definition at line 109 of file loadlang_test.cc.

109 {
110 LangLoader("cos", GetParam());
111}

◆ TEST_P() [22/165]

tesseract::TEST_P ( LoadLanguage  ,
cym   
)

Definition at line 112 of file loadlang_test.cc.

112 {
113 LangLoader("cym", GetParam());
114}

◆ TEST_P() [23/165]

tesseract::TEST_P ( LoadLanguage  ,
dan   
)

Definition at line 115 of file loadlang_test.cc.

115 {
116 LangLoader("dan", GetParam());
117}

◆ TEST_P() [24/165]

tesseract::TEST_P ( LoadLanguage  ,
deu   
)

Definition at line 118 of file loadlang_test.cc.

118 {
119 LangLoader("deu", GetParam());
120}

◆ TEST_P() [25/165]

tesseract::TEST_P ( LoadLanguage  ,
div   
)

Definition at line 121 of file loadlang_test.cc.

121 {
122 LangLoader("div", GetParam());
123}

◆ TEST_P() [26/165]

tesseract::TEST_P ( LoadLanguage  ,
dzo   
)

Definition at line 124 of file loadlang_test.cc.

124 {
125 LangLoader("dzo", GetParam());
126}

◆ TEST_P() [27/165]

tesseract::TEST_P ( LoadLanguage  ,
ell   
)

Definition at line 127 of file loadlang_test.cc.

127 {
128 LangLoader("ell", GetParam());
129}

◆ TEST_P() [28/165]

tesseract::TEST_P ( LoadLanguage  ,
eng   
)

Definition at line 130 of file loadlang_test.cc.

130 {
131 LangLoader("eng", GetParam());
132}

◆ TEST_P() [29/165]

tesseract::TEST_P ( LoadLanguage  ,
enm   
)

Definition at line 133 of file loadlang_test.cc.

133 {
134 LangLoader("enm", GetParam());
135}

◆ TEST_P() [30/165]

tesseract::TEST_P ( LoadLanguage  ,
epo   
)

Definition at line 136 of file loadlang_test.cc.

136 {
137 LangLoader("epo", GetParam());
138}

◆ TEST_P() [31/165]

tesseract::TEST_P ( LoadLanguage  ,
est   
)

Definition at line 139 of file loadlang_test.cc.

139 {
140 LangLoader("est", GetParam());
141}

◆ TEST_P() [32/165]

tesseract::TEST_P ( LoadLanguage  ,
eus   
)

Definition at line 142 of file loadlang_test.cc.

142 {
143 LangLoader("eus", GetParam());
144}

◆ TEST_P() [33/165]

tesseract::TEST_P ( LoadLanguage  ,
fao   
)

Definition at line 145 of file loadlang_test.cc.

145 {
146 LangLoader("fao", GetParam());
147}

◆ TEST_P() [34/165]

tesseract::TEST_P ( LoadLanguage  ,
fas   
)

Definition at line 148 of file loadlang_test.cc.

148 {
149 LangLoader("fas", GetParam());
150}

◆ TEST_P() [35/165]

tesseract::TEST_P ( LoadLanguage  ,
fil   
)

Definition at line 151 of file loadlang_test.cc.

151 {
152 LangLoader("fil", GetParam());
153}

◆ TEST_P() [36/165]

tesseract::TEST_P ( LoadLanguage  ,
fin   
)

Definition at line 154 of file loadlang_test.cc.

154 {
155 LangLoader("fin", GetParam());
156}

◆ TEST_P() [37/165]

tesseract::TEST_P ( LoadLanguage  ,
fra   
)

Definition at line 157 of file loadlang_test.cc.

157 {
158 LangLoader("fra", GetParam());
159}

◆ TEST_P() [38/165]

tesseract::TEST_P ( LoadLanguage  ,
frk   
)

Definition at line 160 of file loadlang_test.cc.

160 {
161 LangLoader("frk", GetParam());
162}

◆ TEST_P() [39/165]

tesseract::TEST_P ( LoadLanguage  ,
frm   
)

Definition at line 163 of file loadlang_test.cc.

163 {
164 LangLoader("frm", GetParam());
165}

◆ TEST_P() [40/165]

tesseract::TEST_P ( LoadLanguage  ,
fry   
)

Definition at line 166 of file loadlang_test.cc.

166 {
167 LangLoader("fry", GetParam());
168}

◆ TEST_P() [41/165]

tesseract::TEST_P ( LoadLanguage  ,
gla   
)

Definition at line 169 of file loadlang_test.cc.

169 {
170 LangLoader("gla", GetParam());
171}

◆ TEST_P() [42/165]

tesseract::TEST_P ( LoadLanguage  ,
gle   
)

Definition at line 172 of file loadlang_test.cc.

172 {
173 LangLoader("gle", GetParam());
174}

◆ TEST_P() [43/165]

tesseract::TEST_P ( LoadLanguage  ,
glg   
)

Definition at line 175 of file loadlang_test.cc.

175 {
176 LangLoader("glg", GetParam());
177}

◆ TEST_P() [44/165]

tesseract::TEST_P ( LoadLanguage  ,
grc   
)

Definition at line 178 of file loadlang_test.cc.

178 {
179 LangLoader("grc", GetParam());
180}

◆ TEST_P() [45/165]

tesseract::TEST_P ( LoadLanguage  ,
guj   
)

Definition at line 181 of file loadlang_test.cc.

181 {
182 LangLoader("guj", GetParam());
183}

◆ TEST_P() [46/165]

tesseract::TEST_P ( LoadLanguage  ,
hat   
)

Definition at line 184 of file loadlang_test.cc.

184 {
185 LangLoader("hat", GetParam());
186}

◆ TEST_P() [47/165]

tesseract::TEST_P ( LoadLanguage  ,
heb   
)

Definition at line 187 of file loadlang_test.cc.

187 {
188 LangLoader("heb", GetParam());
189}

◆ TEST_P() [48/165]

tesseract::TEST_P ( LoadLanguage  ,
hin   
)

Definition at line 190 of file loadlang_test.cc.

190 {
191 LangLoader("hin", GetParam());
192}

◆ TEST_P() [49/165]

tesseract::TEST_P ( LoadLanguage  ,
hrv   
)

Definition at line 193 of file loadlang_test.cc.

193 {
194 LangLoader("hrv", GetParam());
195}

◆ TEST_P() [50/165]

tesseract::TEST_P ( LoadLanguage  ,
hun   
)

Definition at line 196 of file loadlang_test.cc.

196 {
197 LangLoader("hun", GetParam());
198}

◆ TEST_P() [51/165]

tesseract::TEST_P ( LoadLanguage  ,
hye   
)

Definition at line 199 of file loadlang_test.cc.

199 {
200 LangLoader("hye", GetParam());
201}

◆ TEST_P() [52/165]

tesseract::TEST_P ( LoadLanguage  ,
iku   
)

Definition at line 202 of file loadlang_test.cc.

202 {
203 LangLoader("iku", GetParam());
204}

◆ TEST_P() [53/165]

tesseract::TEST_P ( LoadLanguage  ,
ind   
)

Definition at line 205 of file loadlang_test.cc.

205 {
206 LangLoader("ind", GetParam());
207}

◆ TEST_P() [54/165]

tesseract::TEST_P ( LoadLanguage  ,
isl   
)

Definition at line 208 of file loadlang_test.cc.

208 {
209 LangLoader("isl", GetParam());
210}

◆ TEST_P() [55/165]

tesseract::TEST_P ( LoadLanguage  ,
ita   
)

Definition at line 211 of file loadlang_test.cc.

211 {
212 LangLoader("ita", GetParam());
213}

◆ TEST_P() [56/165]

tesseract::TEST_P ( LoadLanguage  ,
ita_old   
)

Definition at line 214 of file loadlang_test.cc.

214 {
215 LangLoader("ita_old", GetParam());
216}

◆ TEST_P() [57/165]

tesseract::TEST_P ( LoadLanguage  ,
jav   
)

Definition at line 217 of file loadlang_test.cc.

217 {
218 LangLoader("jav", GetParam());
219}

◆ TEST_P() [58/165]

tesseract::TEST_P ( LoadLanguage  ,
jpn   
)

Definition at line 220 of file loadlang_test.cc.

220 {
221 LangLoader("jpn", GetParam());
222}

◆ TEST_P() [59/165]

tesseract::TEST_P ( LoadLanguage  ,
jpn_vert   
)

Definition at line 223 of file loadlang_test.cc.

223 {
224 LangLoader("jpn_vert", GetParam());
225}

◆ TEST_P() [60/165]

tesseract::TEST_P ( LoadLanguage  ,
kan   
)

Definition at line 226 of file loadlang_test.cc.

226 {
227 LangLoader("kan", GetParam());
228}

◆ TEST_P() [61/165]

tesseract::TEST_P ( LoadLanguage  ,
kat   
)

Definition at line 229 of file loadlang_test.cc.

229 {
230 LangLoader("kat", GetParam());
231}

◆ TEST_P() [62/165]

tesseract::TEST_P ( LoadLanguage  ,
kat_old   
)

Definition at line 232 of file loadlang_test.cc.

232 {
233 LangLoader("kat_old", GetParam());
234}

◆ TEST_P() [63/165]

tesseract::TEST_P ( LoadLanguage  ,
kaz   
)

Definition at line 235 of file loadlang_test.cc.

235 {
236 LangLoader("kaz", GetParam());
237}

◆ TEST_P() [64/165]

tesseract::TEST_P ( LoadLanguage  ,
khm   
)

Definition at line 238 of file loadlang_test.cc.

238 {
239 LangLoader("khm", GetParam());
240}

◆ TEST_P() [65/165]

tesseract::TEST_P ( LoadLanguage  ,
kir   
)

Definition at line 241 of file loadlang_test.cc.

241 {
242 LangLoader("kir", GetParam());
243}

◆ TEST_P() [66/165]

tesseract::TEST_P ( LoadLanguage  ,
kor   
)

Definition at line 245 of file loadlang_test.cc.

245 {
246 LangLoader("kor", GetParam());
247}

◆ TEST_P() [67/165]

tesseract::TEST_P ( LoadLanguage  ,
kor_vert   
)

Definition at line 248 of file loadlang_test.cc.

248 {
249 LangLoader("kor_vert", GetParam());
250}

◆ TEST_P() [68/165]

tesseract::TEST_P ( LoadLanguage  ,
lao   
)

Definition at line 251 of file loadlang_test.cc.

251 {
252 LangLoader("lao", GetParam());
253}

◆ TEST_P() [69/165]

tesseract::TEST_P ( LoadLanguage  ,
lat   
)

Definition at line 254 of file loadlang_test.cc.

254 {
255 LangLoader("lat", GetParam());
256}

◆ TEST_P() [70/165]

tesseract::TEST_P ( LoadLanguage  ,
lav   
)

Definition at line 257 of file loadlang_test.cc.

257 {
258 LangLoader("lav", GetParam());
259}

◆ TEST_P() [71/165]

tesseract::TEST_P ( LoadLanguage  ,
lit   
)

Definition at line 260 of file loadlang_test.cc.

260 {
261 LangLoader("lit", GetParam());
262}

◆ TEST_P() [72/165]

tesseract::TEST_P ( LoadLanguage  ,
ltz   
)

Definition at line 263 of file loadlang_test.cc.

263 {
264 LangLoader("ltz", GetParam());
265}

◆ TEST_P() [73/165]

tesseract::TEST_P ( LoadLanguage  ,
mal   
)

Definition at line 266 of file loadlang_test.cc.

266 {
267 LangLoader("mal", GetParam());
268}

◆ TEST_P() [74/165]

tesseract::TEST_P ( LoadLanguage  ,
mar   
)

Definition at line 269 of file loadlang_test.cc.

269 {
270 LangLoader("mar", GetParam());
271}

◆ TEST_P() [75/165]

tesseract::TEST_P ( LoadLanguage  ,
mkd   
)

Definition at line 272 of file loadlang_test.cc.

272 {
273 LangLoader("mkd", GetParam());
274}

◆ TEST_P() [76/165]

tesseract::TEST_P ( LoadLanguage  ,
mlt   
)

Definition at line 275 of file loadlang_test.cc.

275 {
276 LangLoader("mlt", GetParam());
277}

◆ TEST_P() [77/165]

tesseract::TEST_P ( LoadLanguage  ,
mon   
)

Definition at line 278 of file loadlang_test.cc.

278 {
279 LangLoader("mon", GetParam());
280}

◆ TEST_P() [78/165]

tesseract::TEST_P ( LoadLanguage  ,
mri   
)

Definition at line 281 of file loadlang_test.cc.

281 {
282 LangLoader("mri", GetParam());
283}

◆ TEST_P() [79/165]

tesseract::TEST_P ( LoadLanguage  ,
msa   
)

Definition at line 284 of file loadlang_test.cc.

284 {
285 LangLoader("msa", GetParam());
286}

◆ TEST_P() [80/165]

tesseract::TEST_P ( LoadLanguage  ,
mya   
)

Definition at line 287 of file loadlang_test.cc.

287 {
288 LangLoader("mya", GetParam());
289}

◆ TEST_P() [81/165]

tesseract::TEST_P ( LoadLanguage  ,
nep   
)

Definition at line 290 of file loadlang_test.cc.

290 {
291 LangLoader("nep", GetParam());
292}

◆ TEST_P() [82/165]

tesseract::TEST_P ( LoadLanguage  ,
nld   
)

Definition at line 293 of file loadlang_test.cc.

293 {
294 LangLoader("nld", GetParam());
295}

◆ TEST_P() [83/165]

tesseract::TEST_P ( LoadLanguage  ,
nor   
)

Definition at line 296 of file loadlang_test.cc.

296 {
297 LangLoader("nor", GetParam());
298}

◆ TEST_P() [84/165]

tesseract::TEST_P ( LoadLanguage  ,
oci   
)

Definition at line 299 of file loadlang_test.cc.

299 {
300 LangLoader("oci", GetParam());
301}

◆ TEST_P() [85/165]

tesseract::TEST_P ( LoadLanguage  ,
ori   
)

Definition at line 302 of file loadlang_test.cc.

302 {
303 LangLoader("ori", GetParam());
304}

◆ TEST_P() [86/165]

tesseract::TEST_P ( LoadLanguage  ,
osd   
)

Definition at line 305 of file loadlang_test.cc.

305 {
306 LangLoader("osd", GetParam());
307}

◆ TEST_P() [87/165]

tesseract::TEST_P ( LoadLanguage  ,
pan   
)

Definition at line 308 of file loadlang_test.cc.

308 {
309 LangLoader("pan", GetParam());
310}

◆ TEST_P() [88/165]

tesseract::TEST_P ( LoadLanguage  ,
pol   
)

Definition at line 311 of file loadlang_test.cc.

311 {
312 LangLoader("pol", GetParam());
313}

◆ TEST_P() [89/165]

tesseract::TEST_P ( LoadLanguage  ,
por   
)

Definition at line 314 of file loadlang_test.cc.

314 {
315 LangLoader("por", GetParam());
316}

◆ TEST_P() [90/165]

tesseract::TEST_P ( LoadLanguage  ,
pus   
)

Definition at line 317 of file loadlang_test.cc.

317 {
318 LangLoader("pus", GetParam());
319}

◆ TEST_P() [91/165]

tesseract::TEST_P ( LoadLanguage  ,
que   
)

Definition at line 320 of file loadlang_test.cc.

320 {
321 LangLoader("que", GetParam());
322}

◆ TEST_P() [92/165]

tesseract::TEST_P ( LoadLanguage  ,
ron   
)

Definition at line 323 of file loadlang_test.cc.

323 {
324 LangLoader("ron", GetParam());
325}

◆ TEST_P() [93/165]

tesseract::TEST_P ( LoadLanguage  ,
rus   
)

Definition at line 326 of file loadlang_test.cc.

326 {
327 LangLoader("rus", GetParam());
328}

◆ TEST_P() [94/165]

tesseract::TEST_P ( LoadLanguage  ,
san   
)

Definition at line 329 of file loadlang_test.cc.

329 {
330 LangLoader("san", GetParam());
331}

◆ TEST_P() [95/165]

tesseract::TEST_P ( LoadLanguage  ,
sin   
)

Definition at line 332 of file loadlang_test.cc.

332 {
333 LangLoader("sin", GetParam());
334}

◆ TEST_P() [96/165]

tesseract::TEST_P ( LoadLanguage  ,
slk   
)

Definition at line 335 of file loadlang_test.cc.

335 {
336 LangLoader("slk", GetParam());
337}

◆ TEST_P() [97/165]

tesseract::TEST_P ( LoadLanguage  ,
slv   
)

Definition at line 338 of file loadlang_test.cc.

338 {
339 LangLoader("slv", GetParam());
340}

◆ TEST_P() [98/165]

tesseract::TEST_P ( LoadLanguage  ,
snd   
)

Definition at line 341 of file loadlang_test.cc.

341 {
342 LangLoader("snd", GetParam());
343}

◆ TEST_P() [99/165]

tesseract::TEST_P ( LoadLanguage  ,
spa   
)

Definition at line 344 of file loadlang_test.cc.

344 {
345 LangLoader("spa", GetParam());
346}

◆ TEST_P() [100/165]

tesseract::TEST_P ( LoadLanguage  ,
spa_old   
)

Definition at line 347 of file loadlang_test.cc.

347 {
348 LangLoader("spa_old", GetParam());
349}

◆ TEST_P() [101/165]

tesseract::TEST_P ( LoadLanguage  ,
sqi   
)

Definition at line 350 of file loadlang_test.cc.

350 {
351 LangLoader("sqi", GetParam());
352}

◆ TEST_P() [102/165]

tesseract::TEST_P ( LoadLanguage  ,
srp   
)

Definition at line 353 of file loadlang_test.cc.

353 {
354 LangLoader("srp", GetParam());
355}

◆ TEST_P() [103/165]

tesseract::TEST_P ( LoadLanguage  ,
srp_latn   
)

Definition at line 356 of file loadlang_test.cc.

356 {
357 LangLoader("srp_latn", GetParam());
358}

◆ TEST_P() [104/165]

tesseract::TEST_P ( LoadLanguage  ,
sun   
)

Definition at line 359 of file loadlang_test.cc.

359 {
360 LangLoader("sun", GetParam());
361}

◆ TEST_P() [105/165]

tesseract::TEST_P ( LoadLanguage  ,
swa   
)

Definition at line 362 of file loadlang_test.cc.

362 {
363 LangLoader("swa", GetParam());
364}

◆ TEST_P() [106/165]

tesseract::TEST_P ( LoadLanguage  ,
swe   
)

Definition at line 365 of file loadlang_test.cc.

365 {
366 LangLoader("swe", GetParam());
367}

◆ TEST_P() [107/165]

tesseract::TEST_P ( LoadLanguage  ,
syr   
)

Definition at line 368 of file loadlang_test.cc.

368 {
369 LangLoader("syr", GetParam());
370}

◆ TEST_P() [108/165]

tesseract::TEST_P ( LoadLanguage  ,
tam   
)

Definition at line 371 of file loadlang_test.cc.

371 {
372 LangLoader("tam", GetParam());
373}

◆ TEST_P() [109/165]

tesseract::TEST_P ( LoadLanguage  ,
tat   
)

Definition at line 374 of file loadlang_test.cc.

374 {
375 LangLoader("tat", GetParam());
376}

◆ TEST_P() [110/165]

tesseract::TEST_P ( LoadLanguage  ,
tel   
)

Definition at line 377 of file loadlang_test.cc.

377 {
378 LangLoader("tel", GetParam());
379}

◆ TEST_P() [111/165]

tesseract::TEST_P ( LoadLanguage  ,
tgk   
)

Definition at line 380 of file loadlang_test.cc.

380 {
381 LangLoader("tgk", GetParam());
382}

◆ TEST_P() [112/165]

tesseract::TEST_P ( LoadLanguage  ,
tha   
)

Definition at line 383 of file loadlang_test.cc.

383 {
384 LangLoader("tha", GetParam());
385}

◆ TEST_P() [113/165]

tesseract::TEST_P ( LoadLanguage  ,
tir   
)

Definition at line 386 of file loadlang_test.cc.

386 {
387 LangLoader("tir", GetParam());
388}

◆ TEST_P() [114/165]

tesseract::TEST_P ( LoadLanguage  ,
ton   
)

Definition at line 389 of file loadlang_test.cc.

389 {
390 LangLoader("ton", GetParam());
391}

◆ TEST_P() [115/165]

tesseract::TEST_P ( LoadLanguage  ,
tur   
)

Definition at line 392 of file loadlang_test.cc.

392 {
393 LangLoader("tur", GetParam());
394}

◆ TEST_P() [116/165]

tesseract::TEST_P ( LoadLanguage  ,
uig   
)

Definition at line 395 of file loadlang_test.cc.

395 {
396 LangLoader("uig", GetParam());
397}

◆ TEST_P() [117/165]

tesseract::TEST_P ( LoadLanguage  ,
ukr   
)

Definition at line 398 of file loadlang_test.cc.

398 {
399 LangLoader("ukr", GetParam());
400}

◆ TEST_P() [118/165]

tesseract::TEST_P ( LoadLanguage  ,
urd   
)

Definition at line 401 of file loadlang_test.cc.

401 {
402 LangLoader("urd", GetParam());
403}

◆ TEST_P() [119/165]

tesseract::TEST_P ( LoadLanguage  ,
uzb   
)

Definition at line 404 of file loadlang_test.cc.

404 {
405 LangLoader("uzb", GetParam());
406}

◆ TEST_P() [120/165]

tesseract::TEST_P ( LoadLanguage  ,
uzb_cyrl   
)

Definition at line 407 of file loadlang_test.cc.

407 {
408 LangLoader("uzb_cyrl", GetParam());
409}

◆ TEST_P() [121/165]

tesseract::TEST_P ( LoadLanguage  ,
vie   
)

Definition at line 410 of file loadlang_test.cc.

410 {
411 LangLoader("vie", GetParam());
412}

◆ TEST_P() [122/165]

tesseract::TEST_P ( LoadLanguage  ,
yid   
)

Definition at line 413 of file loadlang_test.cc.

413 {
414 LangLoader("yid", GetParam());
415}

◆ TEST_P() [123/165]

tesseract::TEST_P ( LoadLanguage  ,
yor   
)

Definition at line 416 of file loadlang_test.cc.

416 {
417 LangLoader("yor", GetParam());
418}

◆ TEST_P() [124/165]

tesseract::TEST_P ( LoadScript  ,
Arabic   
)

Definition at line 430 of file loadlang_test.cc.

430 {
431 LangLoader("script/Arabic", GetParam());
432}

◆ TEST_P() [125/165]

tesseract::TEST_P ( LoadScript  ,
Armenian   
)

Definition at line 433 of file loadlang_test.cc.

433 {
434 LangLoader("script/Armenian", GetParam());
435}

◆ TEST_P() [126/165]

tesseract::TEST_P ( LoadScript  ,
Bengali   
)

Definition at line 436 of file loadlang_test.cc.

436 {
437 LangLoader("script/Bengali", GetParam());
438}

◆ TEST_P() [127/165]

tesseract::TEST_P ( LoadScript  ,
Canadian_Aboriginal   
)

Definition at line 439 of file loadlang_test.cc.

439 {
440 LangLoader("script/Canadian_Aboriginal", GetParam());
441}

◆ TEST_P() [128/165]

tesseract::TEST_P ( LoadScript  ,
Cherokee   
)

Definition at line 442 of file loadlang_test.cc.

442 {
443 LangLoader("script/Cherokee", GetParam());
444}

◆ TEST_P() [129/165]

tesseract::TEST_P ( LoadScript  ,
Cyrillic   
)

Definition at line 445 of file loadlang_test.cc.

445 {
446 LangLoader("script/Cyrillic", GetParam());
447}

◆ TEST_P() [130/165]

tesseract::TEST_P ( LoadScript  ,
Devanagari   
)

Definition at line 448 of file loadlang_test.cc.

448 {
449 LangLoader("script/Devanagari", GetParam());
450}

◆ TEST_P() [131/165]

tesseract::TEST_P ( LoadScript  ,
Ethiopic   
)

Definition at line 451 of file loadlang_test.cc.

451 {
452 LangLoader("script/Ethiopic", GetParam());
453}

◆ TEST_P() [132/165]

tesseract::TEST_P ( LoadScript  ,
Fraktur   
)

Definition at line 454 of file loadlang_test.cc.

454 {
455 LangLoader("script/Fraktur", GetParam());
456}

◆ TEST_P() [133/165]

tesseract::TEST_P ( LoadScript  ,
Georgian   
)

Definition at line 457 of file loadlang_test.cc.

457 {
458 LangLoader("script/Georgian", GetParam());
459}

◆ TEST_P() [134/165]

tesseract::TEST_P ( LoadScript  ,
Greek   
)

Definition at line 460 of file loadlang_test.cc.

460 {
461 LangLoader("script/Greek", GetParam());
462}

◆ TEST_P() [135/165]

tesseract::TEST_P ( LoadScript  ,
Gujarati   
)

Definition at line 463 of file loadlang_test.cc.

463 {
464 LangLoader("script/Gujarati", GetParam());
465}

◆ TEST_P() [136/165]

tesseract::TEST_P ( LoadScript  ,
Gurmukhi   
)

Definition at line 466 of file loadlang_test.cc.

466 {
467 LangLoader("script/Gurmukhi", GetParam());
468}

◆ TEST_P() [137/165]

tesseract::TEST_P ( LoadScript  ,
Hangul   
)

Definition at line 481 of file loadlang_test.cc.

481 {
482 LangLoader("script/Hangul", GetParam());
483}

◆ TEST_P() [138/165]

tesseract::TEST_P ( LoadScript  ,
Hangul_vert   
)

Definition at line 484 of file loadlang_test.cc.

484 {
485 LangLoader("script/Hangul_vert", GetParam());
486}

◆ TEST_P() [139/165]

tesseract::TEST_P ( LoadScript  ,
HanS   
)

Definition at line 469 of file loadlang_test.cc.

469 {
470 LangLoader("script/HanS", GetParam());
471}

◆ TEST_P() [140/165]

tesseract::TEST_P ( LoadScript  ,
HanS_vert   
)

Definition at line 472 of file loadlang_test.cc.

472 {
473 LangLoader("script/HanS_vert", GetParam());
474}

◆ TEST_P() [141/165]

tesseract::TEST_P ( LoadScript  ,
HanT   
)

Definition at line 475 of file loadlang_test.cc.

475 {
476 LangLoader("script/HanT", GetParam());
477}

◆ TEST_P() [142/165]

tesseract::TEST_P ( LoadScript  ,
HanT_vert   
)

Definition at line 478 of file loadlang_test.cc.

478 {
479 LangLoader("script/HanT_vert", GetParam());
480}

◆ TEST_P() [143/165]

tesseract::TEST_P ( LoadScript  ,
Hebrew   
)

Definition at line 487 of file loadlang_test.cc.

487 {
488 LangLoader("script/Hebrew", GetParam());
489}

◆ TEST_P() [144/165]

tesseract::TEST_P ( LoadScript  ,
Japanese   
)

Definition at line 490 of file loadlang_test.cc.

490 {
491 LangLoader("script/Japanese", GetParam());
492}

◆ TEST_P() [145/165]

tesseract::TEST_P ( LoadScript  ,
Japanese_vert   
)

Definition at line 493 of file loadlang_test.cc.

493 {
494 LangLoader("script/Japanese_vert", GetParam());
495}

◆ TEST_P() [146/165]

tesseract::TEST_P ( LoadScript  ,
Kannada   
)

Definition at line 496 of file loadlang_test.cc.

496 {
497 LangLoader("script/Kannada", GetParam());
498}

◆ TEST_P() [147/165]

tesseract::TEST_P ( LoadScript  ,
Khmer   
)

Definition at line 499 of file loadlang_test.cc.

499 {
500 LangLoader("script/Khmer", GetParam());
501}

◆ TEST_P() [148/165]

tesseract::TEST_P ( LoadScript  ,
Lao   
)

Definition at line 502 of file loadlang_test.cc.

502 {
503 LangLoader("script/Lao", GetParam());
504}

◆ TEST_P() [149/165]

tesseract::TEST_P ( LoadScript  ,
Latin   
)

Definition at line 505 of file loadlang_test.cc.

505 {
506 LangLoader("script/Latin", GetParam());
507}

◆ TEST_P() [150/165]

tesseract::TEST_P ( LoadScript  ,
Malayalam   
)

Definition at line 508 of file loadlang_test.cc.

508 {
509 LangLoader("script/Malayalam", GetParam());
510}

◆ TEST_P() [151/165]

tesseract::TEST_P ( LoadScript  ,
Myanmar   
)

Definition at line 511 of file loadlang_test.cc.

511 {
512 LangLoader("script/Myanmar", GetParam());
513}

◆ TEST_P() [152/165]

tesseract::TEST_P ( LoadScript  ,
Oriya   
)

Definition at line 514 of file loadlang_test.cc.

514 {
515 LangLoader("script/Oriya", GetParam());
516}

◆ TEST_P() [153/165]

tesseract::TEST_P ( LoadScript  ,
Sinhala   
)

Definition at line 517 of file loadlang_test.cc.

517 {
518 LangLoader("script/Sinhala", GetParam());
519}

◆ TEST_P() [154/165]

tesseract::TEST_P ( LoadScript  ,
Syriac   
)

Definition at line 520 of file loadlang_test.cc.

520 {
521 LangLoader("script/Syriac", GetParam());
522}

◆ TEST_P() [155/165]

tesseract::TEST_P ( LoadScript  ,
Tamil   
)

Definition at line 523 of file loadlang_test.cc.

523 {
524 LangLoader("script/Tamil", GetParam());
525}

◆ TEST_P() [156/165]

tesseract::TEST_P ( LoadScript  ,
Telugu   
)

Definition at line 526 of file loadlang_test.cc.

526 {
527 LangLoader("script/Telugu", GetParam());
528}

◆ TEST_P() [157/165]

tesseract::TEST_P ( LoadScript  ,
Thaana   
)

Definition at line 529 of file loadlang_test.cc.

529 {
530 LangLoader("script/Thaana", GetParam());
531}

◆ TEST_P() [158/165]

tesseract::TEST_P ( LoadScript  ,
Thai   
)

Definition at line 532 of file loadlang_test.cc.

532 {
533 LangLoader("script/Thai", GetParam());
534}

◆ TEST_P() [159/165]

tesseract::TEST_P ( LoadScript  ,
Tibetan   
)

Definition at line 535 of file loadlang_test.cc.

535 {
536 LangLoader("script/Tibetan", GetParam());
537}

◆ TEST_P() [160/165]

tesseract::TEST_P ( LoadScript  ,
Vietnamese   
)

Definition at line 538 of file loadlang_test.cc.

538 {
539 LangLoader("script/Vietnamese", GetParam());
540}

◆ TEST_P() [161/165]

tesseract::TEST_P ( MatchGroundTruth  ,
BestPhototestOCR   
)

Definition at line 89 of file apiexample_test.cc.

89 {
90 OCRTester(TESTING_DIR "/phototest.tif", TESTING_DIR "/phototest.txt", TESSDATA_DIR "_best",
91 GetParam());
92}

◆ TEST_P() [162/165]

tesseract::TEST_P ( MatchGroundTruth  ,
FastPhototestOCR   
)

Definition at line 84 of file apiexample_test.cc.

84 {
85 OCRTester(TESTING_DIR "/phototest.tif", TESTING_DIR "/phototest.txt", TESSDATA_DIR "_fast",
86 GetParam());
87}

◆ TEST_P() [163/165]

tesseract::TEST_P ( MatchGroundTruth  ,
TessPhototestOCR   
)

Definition at line 94 of file apiexample_test.cc.

94 {
95 OCRTester(TESTING_DIR "/phototest.tif", TESTING_DIR "/phototest.txt", TESSDATA_DIR, GetParam());
96}

◆ TEST_P() [164/165]

tesseract::TEST_P ( OSDTest  ,
MatchOrientationDegrees   
)

Definition at line 65 of file osd_test.cc.

65 {
66#ifdef DISABLED_LEGACY_ENGINE
67 // Skip test because TessBaseAPI::DetectOrientationScript is missing.
68 GTEST_SKIP();
69#else
70 OSDTester(std::get<0>(GetParam()), std::get<1>(GetParam()), std::get<2>(GetParam()));
71#endif
72}

◆ TEST_P() [165/165]

tesseract::TEST_P ( QRSequenceGeneratorTest  ,
GeneratesValidSequence   
)

Definition at line 47 of file qrsequence_test.cc.

47 {
48 const int kRangeSize = GetParam();
49 TestableQRSequenceGenerator generator(kRangeSize);
50 std::vector<int> vals(kRangeSize);
51 CycleTimer timer;
52 timer.Restart();
53 for (int i = 0; i < kRangeSize; ++i) {
54 vals[i] = generator.GetVal();
55 }
56 LOG(INFO) << kRangeSize << "-length sequence took " << timer.GetInMs() << "ms";
57 // Sort the numbers to verify that we've covered the range without repetition.
58 std::sort(vals.begin(), vals.end());
59 for (int i = 0; i < kRangeSize; ++i) {
60 EXPECT_EQ(i, vals[i]);
61 if (i != vals[i]) {
62 LOG(INFO) << "Aborting remaining comparisons";
63 break;
64 }
65 }
66}

◆ test_underline()

bool tesseract::test_underline ( bool  testing_on,
C_BLOB blob,
int16_t  baseline,
int16_t  xheight 
)

test_underline

Check to see if the blob is an underline. Return true if it is.

Parameters
testing_ondrawing blob
blobblob to test
baselinecoords of baseline
xheightheight of line

Definition at line 47 of file blkocc.cpp.

52 {
53 TDimension occ;
54 STATS projection;
55
56 auto blob_box = blob->bounding_box();
57 auto blob_width = blob->bounding_box().width();
58 projection.set_range(blob_box.bottom(), blob_box.top());
59 if (testing_on) {
60 // blob->plot(to_win,GOLDENROD,GOLDENROD);
61 // line_color_index(to_win,GOLDENROD);
62 // move2d(to_win,blob_box.left(),baseline);
63 // draw2d(to_win,blob_box.right(),baseline);
64 // move2d(to_win,blob_box.left(),baseline+xheight);
65 // draw2d(to_win,blob_box.right(),baseline+xheight);
66 tprintf("Testing underline on blob at (%d,%d)->(%d,%d), base=%d\nOccs:",
67 blob->bounding_box().left(), blob->bounding_box().bottom(),
68 blob->bounding_box().right(), blob->bounding_box().top(), baseline);
69 }
70 horizontal_cblob_projection(blob, &projection);
71 int32_t desc_occ = 0;
72 for (occ = blob_box.bottom(); occ < baseline; occ++) {
73 if (occ <= blob_box.top() && projection.pile_count(occ) > desc_occ) {
74 // max in region
75 desc_occ = projection.pile_count(occ);
76 }
77 }
78 int32_t x_occ = 0;
79 for (occ = baseline; occ <= baseline + xheight; occ++) {
80 if (occ >= blob_box.bottom() && occ <= blob_box.top() && projection.pile_count(occ) > x_occ) {
81 // max in region
82 x_occ = projection.pile_count(occ);
83 }
84 }
85 int32_t asc_occ = 0;
86 for (occ = baseline + xheight + 1; occ <= blob_box.top(); occ++) {
87 if (occ >= blob_box.bottom() && projection.pile_count(occ) > asc_occ) {
88 asc_occ = projection.pile_count(occ);
89 }
90 }
91 if (testing_on) {
92 tprintf("%d %d %d\n", desc_occ, x_occ, asc_occ);
93 }
94 if (desc_occ == 0 && x_occ == 0 && asc_occ == 0) {
95 tprintf("Bottom=%d, top=%d, base=%d, x=%d\n", blob_box.bottom(), blob_box.top(), baseline,
96 xheight);
97 projection.print();
98 }
99 if (desc_occ > x_occ + x_occ && desc_occ > blob_width * textord_underline_threshold) {
100 return true; // real underline
101 }
102 return asc_occ > x_occ + x_occ && asc_occ > blob_width * textord_underline_threshold; // overline
103 // neither
104}
double textord_underline_threshold
Definition: blkocc.cpp:32
int16_t TDimension
Definition: tesstypes.h:32

◆ TestDataNameToPath()

std::string tesseract::TestDataNameToPath ( const std::string &  name)

Definition at line 24 of file lang_model_test.cc.

24 {
25 return file::JoinPath(TESTING_DIR, name);
26}

◆ TestParagraphDetection()

void tesseract::TestParagraphDetection ( const TextAndModel correct,
int  num_rows 
)

Definition at line 191 of file paragraphs_test.cc.

191 {
192 std::vector<RowInfo> row_infos;
193 std::vector<PARA *> row_owners;
194 PARA_LIST paragraphs;
195 std::vector<ParagraphModel *> models;
196
197 MakeAsciiRowInfos(correct, num_rows, &row_infos);
198 int debug_level(3);
199 tesseract::DetectParagraphs(debug_level, &row_infos, &row_owners, &paragraphs, &models);
200 EvaluateParagraphDetection(correct, num_rows, row_owners);
201 for (auto *model : models) {
202 delete model;
203 }
204}

◆ tprintf()

TESS_API void tesseract::tprintf ( const char *  format,
  ... 
)

Definition at line 41 of file tprintf.cpp.

41 {
42 const char *debug_file_name = debug_file.c_str();
43 static FILE *debugfp = nullptr; // debug file
44
45 if (debug_file_name == nullptr) {
46 // This should not happen.
47 return;
48 }
49
50#ifdef _WIN32
51 // Replace /dev/null by nul for Windows.
52 if (strcmp(debug_file_name, "/dev/null") == 0) {
53 debug_file_name = "nul";
54 debug_file.set_value(debug_file_name);
55 }
56#endif
57
58 if (debugfp == nullptr && debug_file_name[0] != '\0') {
59 debugfp = fopen(debug_file_name, "wb");
60 } else if (debugfp != nullptr && debug_file_name[0] == '\0') {
61 fclose(debugfp);
62 debugfp = nullptr;
63 }
64
65 va_list args; // variable args
66 va_start(args, format); // variable list
67 if (debugfp != nullptr) {
68 vfprintf(debugfp, format, args);
69 } else {
70 vfprintf(stderr, format, args);
71 }
72 va_end(args);
73}

◆ TraceBlockOnReducedPix()

Image tesseract::TraceBlockOnReducedPix ( BLOCK block,
int  gridsize,
ICOORD  bleft,
int *  left,
int *  bottom 
)

Definition at line 250 of file bbgrid.cpp.

250 {
251 const TBOX &box = block->pdblk.bounding_box();
252 Image pix = GridReducedPix(box, gridsize, bleft, left, bottom);
253 int wpl = pixGetWpl(pix);
254 l_uint32 *data = pixGetData(pix);
255 ICOORDELT_IT it(block->pdblk.poly_block()->points());
256 for (it.mark_cycle_pt(); !it.cycled_list();) {
257 ICOORD pos = *it.data();
258 it.forward();
259 ICOORD next_pos = *it.data();
260 ICOORD line_vector = next_pos - pos;
261 int major, minor;
262 ICOORD major_step, minor_step;
263 line_vector.setup_render(&major_step, &minor_step, &major, &minor);
264 int accumulator = major / 2;
265 while (pos != next_pos) {
266 int grid_x = (pos.x() - bleft.x()) / gridsize - *left;
267 int grid_y = (pos.y() - bleft.y()) / gridsize - *bottom;
268 SET_DATA_BIT(data + grid_y * wpl, grid_x);
269 pos += major_step;
270 accumulator += minor;
271 if (accumulator >= major) {
272 accumulator -= major;
273 pos += minor_step;
274 }
275 }
276 }
277 return pix;
278}
void setup_render(ICOORD *major_step, ICOORD *minor_step, int *major, int *minor) const
Definition: points.cpp:99
ICOORDELT_LIST * points()
Definition: polyblk.h:42

◆ TraceOutlineOnReducedPix()

Image tesseract::TraceOutlineOnReducedPix ( C_OUTLINE outline,
int  gridsize,
ICOORD  bleft,
int *  left,
int *  bottom 
)

Definition at line 224 of file bbgrid.cpp.

225 {
226 const TBOX &box = outline->bounding_box();
227 Image pix = GridReducedPix(box, gridsize, bleft, left, bottom);
228 int wpl = pixGetWpl(pix);
229 l_uint32 *data = pixGetData(pix);
230 int length = outline->pathlength();
231 ICOORD pos = outline->start_pos();
232 for (int i = 0; i < length; ++i) {
233 int grid_x = (pos.x() - bleft.x()) / gridsize - *left;
234 int grid_y = (pos.y() - bleft.y()) / gridsize - *bottom;
235 SET_DATA_BIT(data + grid_y * wpl, grid_x);
236 pos += outline->step(i);
237 }
238 return pix;
239}

◆ transform_to_next_perm()

void tesseract::transform_to_next_perm ( WERD_RES_LIST &  words)

Definition at line 391 of file fixspace.cpp.

391 {
392 WERD_RES_IT word_it(&words);
393 WERD_RES_IT prev_word_it(&words);
394 WERD_RES *word;
395 WERD_RES *prev_word;
396 WERD_RES *combo;
397 WERD *copy_word;
398 int16_t prev_right = -INT16_MAX;
399 TBOX box;
400 int16_t gap;
401 int16_t min_gap = INT16_MAX;
402
403 for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) {
404 word = word_it.data();
405 if (!word->part_of_combo) {
406 box = word->word->bounding_box();
407 if (prev_right > -INT16_MAX) {
408 gap = box.left() - prev_right;
409 if (gap < min_gap) {
410 min_gap = gap;
411 }
412 }
413 prev_right = box.right();
414 }
415 }
416 if (min_gap < INT16_MAX) {
417 prev_right = -INT16_MAX; // back to start
418 word_it.set_to_list(&words);
419 // Note: we can't use cycle_pt due to inserted combos at start of list.
420 for (; (prev_right == -INT16_MAX) || !word_it.at_first(); word_it.forward()) {
421 word = word_it.data();
422 if (!word->part_of_combo) {
423 box = word->word->bounding_box();
424 if (prev_right > -INT16_MAX) {
425 gap = box.left() - prev_right;
426 if (gap <= min_gap) {
427 prev_word = prev_word_it.data();
428 if (prev_word->combination) {
429 combo = prev_word;
430 } else {
431 /* Make a new combination and insert before
432 * the first word being joined. */
433 copy_word = new WERD;
434 *copy_word = *(prev_word->word);
435 // deep copy
436 combo = new WERD_RES(copy_word);
437 combo->combination = true;
438 combo->x_height = prev_word->x_height;
439 prev_word->part_of_combo = true;
440 prev_word_it.add_before_then_move(combo);
441 }
442 combo->word->set_flag(W_EOL, word->word->flag(W_EOL));
443 if (word->combination) {
444 combo->word->join_on(word->word);
445 // Move blobs to combo
446 // old combo no longer needed
447 delete word_it.extract();
448 } else {
449 // Copy current wd to combo
450 combo->copy_on(word);
451 word->part_of_combo = true;
452 }
453 combo->done = false;
454 combo->ClearResults();
455 } else {
456 prev_word_it = word_it; // catch up
457 }
458 }
459 prev_right = box.right();
460 }
461 }
462 } else {
463 words.clear(); // signal termination
464 }
465}
void copy_on(WERD_RES *word_res)
Definition: pageres.h:667
void join_on(WERD *other)
Definition: werd.cpp:208

◆ try_block_fixed()

bool tesseract::try_block_fixed ( TO_BLOCK block,
int32_t  block_index 
)

Definition at line 502 of file topitch.cpp.

505 {
506 return false;
507}

◆ try_doc_fixed()

bool tesseract::try_doc_fixed ( ICOORD  page_tr,
TO_BLOCK_LIST *  port_blocks,
float  gradient 
)

Definition at line 371 of file topitch.cpp.

375 {
376 int16_t master_x; // uniform shifts
377 int16_t pitch; // median pitch.
378 int x; // profile coord
379 int prop_blocks; // correct counts
380 int fixed_blocks;
381 int total_row_count; // total in page
382 // iterator
383 TO_BLOCK_IT block_it = port_blocks;
384 TO_BLOCK *block; // current block;
385 TO_ROW *row; // current row
386 int16_t projection_left; // edges
387 int16_t projection_right;
388 int16_t row_left; // edges of row
389 int16_t row_right;
390 float master_y; // uniform shifts
391 float shift_factor; // page skew correction
392 float final_pitch; // output pitch
393 float row_y; // baseline
394 STATS projection; // entire page
395 STATS pitches(0, MAX_ALLOWED_PITCH - 1);
396 // for median
397 float sp_sd; // space sd
398 int16_t mid_cuts; // no of cheap cuts
399 float pitch_sd; // sync rating
400
402 block_it.empty() || block_it.data()->get_rows()->empty()) {
403 return false;
404 }
405 shift_factor = gradient / (gradient * gradient + 1);
406 // row iterator
407 TO_ROW_IT row_it(block_it.data()->get_rows());
408 master_x = row_it.data()->projection_left;
409 master_y = row_it.data()->baseline.y(master_x);
410 projection_left = INT16_MAX;
411 projection_right = -INT16_MAX;
412 prop_blocks = 0;
413 fixed_blocks = 0;
414 total_row_count = 0;
415
416 for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
417 block = block_it.data();
418 row_it.set_to_list(block->get_rows());
419 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
420 row = row_it.data();
421 total_row_count++;
422 if (row->fixed_pitch > 0) {
423 pitches.add(static_cast<int32_t>(row->fixed_pitch), 1);
424 }
425 // find median
426 row_y = row->baseline.y(master_x);
427 row_left = static_cast<int16_t>(row->projection_left - shift_factor * (master_y - row_y));
428 row_right = static_cast<int16_t>(row->projection_right - shift_factor * (master_y - row_y));
429 if (row_left < projection_left) {
430 projection_left = row_left;
431 }
432 if (row_right > projection_right) {
433 projection_right = row_right;
434 }
435 }
436 }
437 if (pitches.get_total() == 0) {
438 return false;
439 }
440 projection.set_range(projection_left, projection_right - 1);
441
442 for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
443 block = block_it.data();
444 row_it.set_to_list(block->get_rows());
445 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
446 row = row_it.data();
447 row_y = row->baseline.y(master_x);
448 row_left = static_cast<int16_t>(row->projection_left - shift_factor * (master_y - row_y));
449 for (x = row->projection_left; x < row->projection_right; x++, row_left++) {
450 projection.add(row_left, row->projection.pile_count(x));
451 }
452 }
453 }
454
455 row_it.set_to_list(block_it.data()->get_rows());
456 row = row_it.data();
457#ifndef GRAPHICS_DISABLED
458 if (textord_show_page_cuts && to_win != nullptr) {
459 projection.plot(to_win, projection_left, row->intercept(), 1.0f, -1.0f, ScrollView::CORAL);
460 }
461#endif
462 final_pitch = pitches.ile(0.5);
463 pitch = static_cast<int16_t>(final_pitch);
464 pitch_sd = tune_row_pitch(row, &projection, projection_left, projection_right, pitch * 0.75,
465 final_pitch, sp_sd, mid_cuts, &row->char_cells, false);
466
468 tprintf(
469 "try_doc:props=%d:fixed=%d:pitch=%d:final_pitch=%g:pitch_sd=%g:sp_sd=%"
470 "g:sd/trc=%g:sd/p=%g:sd/trc/p=%g\n",
471 prop_blocks, fixed_blocks, pitch, final_pitch, pitch_sd, sp_sd, pitch_sd / total_row_count,
472 pitch_sd / pitch, pitch_sd / total_row_count / pitch);
473 }
474
475#ifndef GRAPHICS_DISABLED
476 if (textord_show_page_cuts && to_win != nullptr) {
477 float row_shift; // shift for row
478 ICOORDELT_LIST *master_cells; // cells for page
479 master_cells = &row->char_cells;
480 for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
481 block = block_it.data();
482 row_it.set_to_list(block->get_rows());
483 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
484 row = row_it.data();
485 row_y = row->baseline.y(master_x);
486 row_shift = shift_factor * (master_y - row_y);
487 plot_row_cells(to_win, ScrollView::GOLDENROD, row, row_shift, master_cells);
488 }
489 }
490 }
491#endif
492 row->char_cells.clear();
493 return false;
494}
#define MAX_ALLOWED_PITCH
Definition: topitch.cpp:53
bool textord_blockndoc_fixed
Definition: topitch.cpp:48

◆ try_rows_fixed()

bool tesseract::try_rows_fixed ( TO_BLOCK block,
int32_t  block_index,
bool  testing_on 
)

Definition at line 515 of file topitch.cpp.

519 {
520 TO_ROW *row; // current row
521 int32_t row_index; // row number.
522 int32_t def_fixed = 0; // counters
523 int32_t def_prop = 0;
524 int32_t maybe_fixed = 0;
525 int32_t maybe_prop = 0;
526 int32_t dunno = 0;
527 int32_t corr_fixed = 0;
528 int32_t corr_prop = 0;
529 float lower, upper; // cluster thresholds
530 TO_ROW_IT row_it = block->get_rows();
531
532 row_index = 1;
533 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
534 row = row_it.data();
535 ASSERT_HOST(row->xheight > 0);
536 if (row->fixed_pitch > 0 && fixed_pitch_row(row, block->block, block_index)) {
537 if (row->fixed_pitch == 0) {
538 lower = row->pr_nonsp;
539 upper = row->pr_space;
540 row->space_size = upper;
541 row->kern_size = lower;
542 }
543 }
544 row_index++;
545 }
546 count_block_votes(block, def_fixed, def_prop, maybe_fixed, maybe_prop, corr_fixed, corr_prop,
547 dunno);
548 if (testing_on &&
550 tprintf("Initially:");
551 print_block_counts(block, block_index);
552 }
553 if (def_fixed > def_prop * textord_words_veto_power) {
555 } else if (def_prop > def_fixed * textord_words_veto_power) {
557 } else if (def_fixed > 0 || def_prop > 0) {
559 } else if (maybe_fixed > maybe_prop * textord_words_veto_power) {
561 } else if (maybe_prop > maybe_fixed * textord_words_veto_power) {
563 } else {
565 }
566 return false;
567}
bool fixed_pitch_row(TO_ROW *row, BLOCK *block, int32_t block_index)
Definition: topitch.cpp:931
void print_block_counts(TO_BLOCK *block, int32_t block_index)
Definition: topitch.cpp:575
PITCH_TYPE pitch_decision
Definition: blobbox.h:782

◆ tune_row_pitch()

float tesseract::tune_row_pitch ( TO_ROW row,
STATS projection,
int16_t  projection_left,
int16_t  projection_right,
float  space_size,
float &  initial_pitch,
float &  best_sp_sd,
int16_t &  best_mid_cuts,
ICOORDELT_LIST *  best_cells,
bool  testing_on 
)

Definition at line 1097 of file topitch.cpp.

1108 {
1109 int pitch_delta; // offset pitch
1110 int16_t mid_cuts; // cheap cuts
1111 float pitch_sd; // current sd
1112 float best_sd; // best result
1113 float best_pitch; // pitch for best result
1114 float initial_sd; // starting error
1115 float sp_sd; // space sd
1116 ICOORDELT_LIST test_cells; // row cells
1117 ICOORDELT_IT best_it; // start of best list
1118
1120 return tune_row_pitch2(row, projection, projection_left, projection_right, space_size,
1121 initial_pitch, best_sp_sd,
1122 // space sd
1123 best_mid_cuts, best_cells, testing_on);
1124 }
1125 if (textord_disable_pitch_test) {
1126 best_sp_sd = initial_pitch;
1127 return initial_pitch;
1128 }
1129 initial_sd = compute_pitch_sd(row, projection, projection_left, projection_right, space_size,
1130 initial_pitch, best_sp_sd, best_mid_cuts, best_cells, testing_on);
1131 best_sd = initial_sd;
1132 best_pitch = initial_pitch;
1133 if (testing_on) {
1134 tprintf("tune_row_pitch:start pitch=%g, sd=%g\n", best_pitch, best_sd);
1135 }
1136 for (pitch_delta = 1; pitch_delta <= textord_pitch_range; pitch_delta++) {
1137 pitch_sd =
1138 compute_pitch_sd(row, projection, projection_left, projection_right, space_size,
1139 initial_pitch + pitch_delta, sp_sd, mid_cuts, &test_cells, testing_on);
1140 if (testing_on) {
1141 tprintf("testing pitch at %g, sd=%g\n", initial_pitch + pitch_delta, pitch_sd);
1142 }
1143 if (pitch_sd < best_sd) {
1144 best_sd = pitch_sd;
1145 best_mid_cuts = mid_cuts;
1146 best_sp_sd = sp_sd;
1147 best_pitch = initial_pitch + pitch_delta;
1148 best_cells->clear();
1149 best_it.set_to_list(best_cells);
1150 best_it.add_list_after(&test_cells);
1151 } else {
1152 test_cells.clear();
1153 }
1154 if (pitch_sd > initial_sd) {
1155 break; // getting worse
1156 }
1157 }
1158 for (pitch_delta = 1; pitch_delta <= textord_pitch_range; pitch_delta++) {
1159 pitch_sd =
1160 compute_pitch_sd(row, projection, projection_left, projection_right, space_size,
1161 initial_pitch - pitch_delta, sp_sd, mid_cuts, &test_cells, testing_on);
1162 if (testing_on) {
1163 tprintf("testing pitch at %g, sd=%g\n", initial_pitch - pitch_delta, pitch_sd);
1164 }
1165 if (pitch_sd < best_sd) {
1166 best_sd = pitch_sd;
1167 best_mid_cuts = mid_cuts;
1168 best_sp_sd = sp_sd;
1169 best_pitch = initial_pitch - pitch_delta;
1170 best_cells->clear();
1171 best_it.set_to_list(best_cells);
1172 best_it.add_list_after(&test_cells);
1173 } else {
1174 test_cells.clear();
1175 }
1176 if (pitch_sd > initial_sd) {
1177 break;
1178 }
1179 }
1180 initial_pitch = best_pitch;
1181
1183 print_pitch_sd(row, projection, projection_left, projection_right, space_size, best_pitch);
1184 }
1185
1186 return best_sd;
1187}
float compute_pitch_sd(TO_ROW *row, STATS *projection, int16_t projection_left, int16_t projection_right, float space_size, float initial_pitch, float &sp_sd, int16_t &mid_cuts, ICOORDELT_LIST *row_cells, bool testing_on, int16_t start, int16_t end)
Definition: topitch.cpp:1289
int textord_pitch_range
Definition: tovars.cpp:30
void print_pitch_sd(TO_ROW *row, STATS *projection, int16_t projection_left, int16_t projection_right, float space_size, float initial_pitch)
Definition: topitch.cpp:1535
float tune_row_pitch2(TO_ROW *row, STATS *projection, int16_t projection_left, int16_t projection_right, float space_size, float &initial_pitch, float &best_sp_sd, int16_t &best_mid_cuts, ICOORDELT_LIST *best_cells, bool testing_on)
Definition: topitch.cpp:1196

◆ tune_row_pitch2()

float tesseract::tune_row_pitch2 ( TO_ROW row,
STATS projection,
int16_t  projection_left,
int16_t  projection_right,
float  space_size,
float &  initial_pitch,
float &  best_sp_sd,
int16_t &  best_mid_cuts,
ICOORDELT_LIST *  best_cells,
bool  testing_on 
)

Definition at line 1196 of file topitch.cpp.

1207 {
1208 int pitch_delta; // offset pitch
1209 int16_t pixel; // pixel coord
1210 int16_t best_pixel; // pixel coord
1211 int16_t best_delta; // best pitch
1212 int16_t best_pitch; // best pitch
1213 int16_t start; // of good range
1214 int16_t end; // of good range
1215 int32_t best_count; // lowest sum
1216 float best_sd; // best result
1217
1218 best_sp_sd = initial_pitch;
1219
1220 best_pitch = static_cast<int>(initial_pitch);
1221 if (textord_disable_pitch_test || best_pitch <= textord_pitch_range) {
1222 return initial_pitch;
1223 }
1224 std::unique_ptr<STATS[]> sum_proj(new STATS[textord_pitch_range * 2 + 1]); // summed projection
1225
1226 for (pitch_delta = -textord_pitch_range; pitch_delta <= textord_pitch_range; pitch_delta++) {
1227 sum_proj[textord_pitch_range + pitch_delta].set_range(0, best_pitch + pitch_delta);
1228 }
1229 for (pixel = projection_left; pixel <= projection_right; pixel++) {
1230 for (pitch_delta = -textord_pitch_range; pitch_delta <= textord_pitch_range; pitch_delta++) {
1231 sum_proj[textord_pitch_range + pitch_delta].add(
1232 (pixel - projection_left) % (best_pitch + pitch_delta), projection->pile_count(pixel));
1233 }
1234 }
1235 best_count = sum_proj[textord_pitch_range].pile_count(0);
1236 best_delta = 0;
1237 best_pixel = 0;
1238 for (pitch_delta = -textord_pitch_range; pitch_delta <= textord_pitch_range; pitch_delta++) {
1239 for (pixel = 0; pixel < best_pitch + pitch_delta; pixel++) {
1240 if (sum_proj[textord_pitch_range + pitch_delta].pile_count(pixel) < best_count) {
1241 best_count = sum_proj[textord_pitch_range + pitch_delta].pile_count(pixel);
1242 best_delta = pitch_delta;
1243 best_pixel = pixel;
1244 }
1245 }
1246 }
1247 if (testing_on) {
1248 tprintf("tune_row_pitch:start pitch=%g, best_delta=%d, count=%d\n", initial_pitch, best_delta,
1249 best_count);
1250 }
1251 best_pitch += best_delta;
1252 initial_pitch = best_pitch;
1253 best_count++;
1254 best_count += best_count;
1255 for (start = best_pixel - 2;
1256 start > best_pixel - best_pitch &&
1257 sum_proj[textord_pitch_range + best_delta].pile_count(start % best_pitch) <= best_count;
1258 start--) {
1259 ;
1260 }
1261 for (end = best_pixel + 2;
1262 end < best_pixel + best_pitch &&
1263 sum_proj[textord_pitch_range + best_delta].pile_count(end % best_pitch) <= best_count;
1264 end++) {
1265 ;
1266 }
1267
1268 best_sd = compute_pitch_sd(row, projection, projection_left, projection_right, space_size,
1269 initial_pitch, best_sp_sd, best_mid_cuts, best_cells, testing_on,
1270 start, end);
1271 if (testing_on) {
1272 tprintf("tune_row_pitch:output pitch=%g, sd=%g\n", initial_pitch, best_sd);
1273 }
1274
1276 print_pitch_sd(row, projection, projection_left, projection_right, space_size, initial_pitch);
1277 }
1278
1279 return best_sd;
1280}

◆ tweak_row_baseline()

void tesseract::tweak_row_baseline ( ROW row,
double  blshift_maxshift,
double  blshift_xfraction 
)

Definition at line 864 of file tordmain.cpp.

864 {
865 TBOX blob_box; // bounding box
866 C_BLOB *blob; // current blob
867 WERD *word; // current word
868 int32_t blob_count; // no of blobs
869 int32_t src_index; // source segment
870 int32_t dest_index; // destination segment
871 float ydiff; // baseline error
872 float x_centre; // centre of blob
873 // words of row
874 WERD_IT word_it = row->word_list();
875 C_BLOB_IT blob_it; // blob iterator
876
877 blob_count = 0;
878 for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) {
879 word = word_it.data(); // current word
880 // get total blobs
881 blob_count += word->cblob_list()->length();
882 }
883 if (blob_count == 0) {
884 return;
885 }
886 // spline segments
887 std::vector<int32_t> xstarts(blob_count + row->baseline.segments + 1);
888 // spline coeffs
889 std::vector<double> coeffs((blob_count + row->baseline.segments) * 3);
890
891 src_index = 0;
892 dest_index = 0;
893 xstarts[0] = row->baseline.xcoords[0];
894 for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) {
895 word = word_it.data(); // current word
896 // blobs in word
897 blob_it.set_to_list(word->cblob_list());
898 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
899 blob = blob_it.data();
900 blob_box = blob->bounding_box();
901 x_centre = (blob_box.left() + blob_box.right()) / 2.0;
902 ydiff = blob_box.bottom() - row->base_line(x_centre);
903 if (ydiff < 0) {
904 ydiff = -ydiff / row->x_height();
905 } else {
906 ydiff = ydiff / row->x_height();
907 }
908 if (ydiff < blshift_maxshift && blob_box.height() / row->x_height() > blshift_xfraction) {
909 if (xstarts[dest_index] >= x_centre) {
910 xstarts[dest_index] = blob_box.left();
911 }
912 coeffs[dest_index * 3] = 0;
913 coeffs[dest_index * 3 + 1] = 0;
914 coeffs[dest_index * 3 + 2] = blob_box.bottom();
915 // shift it
916 dest_index++;
917 xstarts[dest_index] = blob_box.right() + 1;
918 } else {
919 if (xstarts[dest_index] <= x_centre) {
920 while (row->baseline.xcoords[src_index + 1] <= x_centre &&
921 src_index < row->baseline.segments - 1) {
922 if (row->baseline.xcoords[src_index + 1] > xstarts[dest_index]) {
923 coeffs[dest_index * 3] = row->baseline.quadratics[src_index].a;
924 coeffs[dest_index * 3 + 1] = row->baseline.quadratics[src_index].b;
925 coeffs[dest_index * 3 + 2] = row->baseline.quadratics[src_index].c;
926 dest_index++;
927 xstarts[dest_index] = row->baseline.xcoords[src_index + 1];
928 }
929 src_index++;
930 }
931 coeffs[dest_index * 3] = row->baseline.quadratics[src_index].a;
932 coeffs[dest_index * 3 + 1] = row->baseline.quadratics[src_index].b;
933 coeffs[dest_index * 3 + 2] = row->baseline.quadratics[src_index].c;
934 dest_index++;
935 xstarts[dest_index] = row->baseline.xcoords[src_index + 1];
936 }
937 }
938 }
939 }
940 while (src_index < row->baseline.segments &&
941 row->baseline.xcoords[src_index + 1] <= xstarts[dest_index]) {
942 src_index++;
943 }
944 while (src_index < row->baseline.segments) {
945 coeffs[dest_index * 3] = row->baseline.quadratics[src_index].a;
946 coeffs[dest_index * 3 + 1] = row->baseline.quadratics[src_index].b;
947 coeffs[dest_index * 3 + 2] = row->baseline.quadratics[src_index].c;
948 dest_index++;
949 src_index++;
950 xstarts[dest_index] = row->baseline.xcoords[src_index];
951 }
952 // turn to spline
953 row->baseline = QSPLINE(dest_index, &xstarts[0], &coeffs[0]);
954}
float x_height() const
Definition: ocrrow.h:66
float base_line(float xpos) const
Definition: ocrrow.h:61

◆ UpdateMatchDisplay()

void tesseract::UpdateMatchDisplay ( )

This routine clears the global feature and proto display lists.

Globals:

  • FeatureShapes display list for features
  • ProtoShapes display list for protos

Definition at line 413 of file intproto.cpp.

413 {
414 if (IntMatchWindow != nullptr) {
415 IntMatchWindow->Update();
416 }
417} /* ClearMatchDisplay */

◆ UpdateRange() [1/2]

template<typename T1 , typename T2 >
void tesseract::UpdateRange ( const T1 x,
T2 lower_bound,
T2 upper_bound 
)
inline

Definition at line 117 of file helpers.h.

117 {
118 if (x < *lower_bound) {
119 *lower_bound = x;
120 }
121 if (x > *upper_bound) {
122 *upper_bound = x;
123 }
124}

◆ UpdateRange() [2/2]

template<typename T1 , typename T2 >
void tesseract::UpdateRange ( const T1 x_lo,
const T1 x_hi,
T2 lower_bound,
T2 upper_bound 
)
inline

Definition at line 128 of file helpers.h.

128 {
129 if (x_lo < *lower_bound) {
130 *lower_bound = x_lo;
131 }
132 if (x_hi > *upper_bound) {
133 *upper_bound = x_hi;
134 }
135}

◆ ValidBodyLine()

bool tesseract::ValidBodyLine ( const std::vector< RowScratchRegisters > *  rows,
int  row,
const ParagraphModel model 
)

Definition at line 1340 of file paragraphs.cpp.

1341 {
1342 if (!StrongModel(model)) {
1343 tprintf("ValidBodyLine() should only be called with strong models!\n");
1344 }
1345 return StrongModel(model) && model->ValidBodyLine((*rows)[row].lmargin_, (*rows)[row].lindent_,
1346 (*rows)[row].rindent_, (*rows)[row].rmargin_);
1347}
bool StrongModel(const ParagraphModel *model)
bool ValidBodyLine(int lmargin, int lindent, int rindent, int rmargin) const
Definition: ocrpara.cpp:59

◆ ValidCharDescription()

bool tesseract::ValidCharDescription ( const FEATURE_DEFS_STRUCT FeatureDefs,
CHAR_DESC_STRUCT CharDesc 
)

Definition at line 131 of file featdefs.cpp.

131 {
132 bool anything_written = false;
133 bool well_formed = true;
134 for (size_t Type = 0; Type < CharDesc->NumFeatureSets; Type++) {
135 if (CharDesc->FeatureSets[Type]) {
136 for (int i = 0; i < CharDesc->FeatureSets[Type]->NumFeatures; i++) {
137 FEATURE feat = CharDesc->FeatureSets[Type]->Features[i];
138 for (int p = 0; p < feat->Type->NumParams; p++) {
139 if (std::isnan(feat->Params[p]) || std::isinf(feat->Params[p])) {
140 well_formed = false;
141 } else {
142 anything_written = true;
143 }
144 }
145 }
146 } else {
147 return false;
148 }
149 }
150 return anything_written && well_formed;
151} /* ValidCharDescription */
std::array< FEATURE_SET_STRUCT *, NUM_FEATURE_TYPES > FeatureSets
Definition: featdefs.h:63
const FEATURE_DESC_STRUCT * Type
Definition: ocrfeatures.h:65

◆ ValidFirstLine()

bool tesseract::ValidFirstLine ( const std::vector< RowScratchRegisters > *  rows,
int  row,
const ParagraphModel model 
)

Definition at line 1331 of file paragraphs.cpp.

1332 {
1333 if (!StrongModel(model)) {
1334 tprintf("ValidFirstLine() should only be called with strong models!\n");
1335 }
1336 return StrongModel(model) && model->ValidFirstLine((*rows)[row].lmargin_, (*rows)[row].lindent_,
1337 (*rows)[row].rindent_, (*rows)[row].rmargin_);
1338}
bool ValidFirstLine(int lmargin, int lindent, int rindent, int rmargin) const
Definition: ocrpara.cpp:45

◆ vertical_cblob_projection()

void tesseract::vertical_cblob_projection ( C_BLOB blob,
STATS stats 
)

Definition at line 871 of file blobbox.cpp.

874 {
875 // outlines of blob
876 C_OUTLINE_IT out_it = blob->out_list();
877
878 for (out_it.mark_cycle_pt(); !out_it.cycled_list(); out_it.forward()) {
879 vertical_coutline_projection(out_it.data(), stats);
880 }
881}
void vertical_coutline_projection(C_OUTLINE *outline, STATS *stats)
Definition: blobbox.cpp:890

◆ vertical_coutline_projection()

void tesseract::vertical_coutline_projection ( C_OUTLINE outline,
STATS stats 
)

Definition at line 890 of file blobbox.cpp.

893 {
894 ICOORD pos; // current point
895 ICOORD step; // edge step
896 int32_t length; // of outline
897 int16_t stepindex; // current step
898 C_OUTLINE_IT out_it = outline->child();
899
900 pos = outline->start_pos();
901 length = outline->pathlength();
902 for (stepindex = 0; stepindex < length; stepindex++) {
903 step = outline->step(stepindex);
904 if (step.x() > 0) {
905 stats->add(pos.x(), -pos.y());
906 } else if (step.x() < 0) {
907 stats->add(pos.x() - 1, pos.y());
908 }
909 pos += step;
910 }
911
912 for (out_it.mark_cycle_pt(); !out_it.cycled_list(); out_it.forward()) {
913 vertical_coutline_projection(out_it.data(), stats);
914 }
915}
C_OUTLINE_LIST * child()
Definition: coutln.h:108

◆ vertical_cunderline_projection()

void tesseract::vertical_cunderline_projection ( C_OUTLINE outline,
QSPLINE baseline,
float  xheight,
float  baseline_offset,
STATS lower_proj,
STATS middle_proj,
STATS upper_proj 
)

Definition at line 202 of file underlin.cpp.

210 {
211 ICOORD pos; // current point
212 ICOORD step; // edge step
213 int16_t lower_y, upper_y; // region limits
214 int32_t length; // of outline
215 int16_t stepindex; // current step
216 C_OUTLINE_IT out_it = outline->child();
217
218 pos = outline->start_pos();
219 length = outline->pathlength();
220 for (stepindex = 0; stepindex < length; stepindex++) {
221 step = outline->step(stepindex);
222 if (step.x() > 0) {
223 lower_y = static_cast<int16_t>(floor(baseline->y(pos.x()) + baseline_offset + 0.5));
224 upper_y = static_cast<int16_t>(floor(baseline->y(pos.x()) + baseline_offset + xheight + 0.5));
225 if (pos.y() >= lower_y) {
226 lower_proj->add(pos.x(), -lower_y);
227 if (pos.y() >= upper_y) {
228 middle_proj->add(pos.x(), lower_y - upper_y);
229 upper_proj->add(pos.x(), upper_y - pos.y());
230 } else {
231 middle_proj->add(pos.x(), lower_y - pos.y());
232 }
233 } else {
234 lower_proj->add(pos.x(), -pos.y());
235 }
236 } else if (step.x() < 0) {
237 lower_y = static_cast<int16_t>(floor(baseline->y(pos.x() - 1) + baseline_offset + 0.5));
238 upper_y =
239 static_cast<int16_t>(floor(baseline->y(pos.x() - 1) + baseline_offset + xheight + 0.5));
240 if (pos.y() >= lower_y) {
241 lower_proj->add(pos.x() - 1, lower_y);
242 if (pos.y() >= upper_y) {
243 middle_proj->add(pos.x() - 1, upper_y - lower_y);
244 upper_proj->add(pos.x() - 1, pos.y() - upper_y);
245 } else {
246 middle_proj->add(pos.x() - 1, pos.y() - lower_y);
247 }
248 } else {
249 lower_proj->add(pos.x() - 1, pos.y());
250 }
251 }
252 pos += step;
253 }
254
255 for (out_it.mark_cycle_pt(); !out_it.cycled_list(); out_it.forward()) {
256 vertical_cunderline_projection(out_it.data(), baseline, xheight, baseline_offset, lower_proj,
257 middle_proj, upper_proj);
258 }
259}

◆ vertical_torow_projection()

int16_t tesseract::vertical_torow_projection ( TO_ROW row,
STATS projection 
)

◆ vigorous_noise_removal()

void tesseract::vigorous_noise_removal ( TO_BLOCK block)

Definition at line 508 of file makerow.cpp.

508 {
509 TO_ROW_IT row_it = block->get_rows();
510 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
511 TO_ROW *row = row_it.data();
512 BLOBNBOX_IT b_it = row->blob_list();
513 // Estimate the xheight on the row.
514 int max_height = 0;
515 for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
516 BLOBNBOX *blob = b_it.data();
517 if (blob->bounding_box().height() > max_height) {
518 max_height = blob->bounding_box().height();
519 }
520 }
521 STATS hstats(0, max_height);
522 for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
523 BLOBNBOX *blob = b_it.data();
524 int height = blob->bounding_box().height();
525 if (height >= kMinSize) {
526 hstats.add(blob->bounding_box().height(), 1);
527 }
528 }
529 float xheight = hstats.median();
530 // Delete small objects.
531 BLOBNBOX *prev = nullptr;
532 for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
533 BLOBNBOX *blob = b_it.data();
534 const TBOX &box = blob->bounding_box();
535 if (box.height() < kNoiseSize * xheight) {
536 // Small so delete unless it looks like an i dot.
537 if (prev != nullptr) {
538 if (dot_of_i(blob, prev, row)) {
539 continue; // Looks OK.
540 }
541 }
542 if (!b_it.at_last()) {
543 BLOBNBOX *next = b_it.data_relative(1);
544 if (dot_of_i(blob, next, row)) {
545 continue; // Looks OK.
546 }
547 }
548 // It might be noise so get rid of it.
549 delete blob->remove_cblob();
550 delete b_it.extract();
551 } else {
552 prev = blob;
553 }
554 }
555 }
556}
const double kNoiseSize
Definition: makerow.cpp:407
const int kMinSize
Definition: makerow.cpp:408

◆ Walk()

void tesseract::Walk ( KDTREE tree,
kdwalk_proc  action,
ClusteringContext context,
KDNODE sub_tree,
int32_t  level 
)

Walk a tree, calling action once on each node.

Operation: This routine walks through the specified sub_tree and invokes action action at each node as follows: action(context, data, level) data the data contents of the node being visited, level is the level of the node in the tree with the root being level 0.

Parameters
treeroot of the tree being walked.
actionaction to be performed at every node
contextaction's context
sub_treeptr to root of subtree to be walked
levelcurrent level in the tree for this node

Definition at line 466 of file kdtree.cpp.

466 {
467 (*action)(context, sub_tree->Data, level);
468 if (sub_tree->Left != nullptr) {
469 Walk(tree, action, context, sub_tree->Left, NextLevel(tree, level));
470 }
471 if (sub_tree->Right != nullptr) {
472 Walk(tree, action, context, sub_tree->Right, NextLevel(tree, level));
473 }
474}

◆ within_error_margin()

bool tesseract::within_error_margin ( float  test,
float  num,
float  margin 
)
inline

Definition at line 102 of file makerow.h.

102 {
103 return (test >= num * (1 - margin) && test <= num * (1 + margin));
104}

◆ word_blob_quality()

int16_t tesseract::word_blob_quality ( WERD_RES word)

◆ word_comparator()

int tesseract::word_comparator ( const void *  word1p,
const void *  word2p 
)

word_comparator()

word comparator used to sort a word list so that words are in increasing order of left edge.

Definition at line 377 of file werd.cpp.

377 {
378 const WERD *word1 = *reinterpret_cast<const WERD *const *>(word1p);
379 const WERD *word2 = *reinterpret_cast<const WERD *const *>(word2p);
380 return word1->bounding_box().left() - word2->bounding_box().left();
381}

◆ word_contains_non_1_digit()

bool tesseract::word_contains_non_1_digit ( const char *  word,
const char *  word_lengths 
)

◆ write_info()

bool tesseract::write_info ( FILE *  f,
const FontInfo fi 
)

Definition at line 157 of file fontinfo.cpp.

157 {
158 int32_t size = strlen(fi.name);
159 return tesseract::Serialize(f, &size) && tesseract::Serialize(f, &fi.name[0], size) &&
161}

◆ write_set()

bool tesseract::write_set ( FILE *  f,
const FontSet fs 
)

Definition at line 222 of file fontinfo.cpp.

222 {
223 int size = fs.size();
224 return tesseract::Serialize(f, &size) &&
225 (size > 0 ? tesseract::Serialize(f, &fs[0], size) : true);
226}

◆ write_spacing_info()

bool tesseract::write_spacing_info ( FILE *  f,
const FontInfo fi 
)

Definition at line 194 of file fontinfo.cpp.

194 {
195 int32_t vec_size = (fi.spacing_vec == nullptr) ? 0 : fi.spacing_vec->size();
196 if (!tesseract::Serialize(f, &vec_size)) {
197 return false;
198 }
199 int16_t x_gap_invalid = -1;
200 for (int i = 0; i < vec_size; ++i) {
201 FontSpacingInfo *fs = fi.spacing_vec->at(i);
202 int32_t kern_size = (fs == nullptr) ? -1 : fs->kerned_x_gaps.size();
203 if (fs == nullptr) {
204 // Writing two invalid x-gaps.
205 if (!tesseract::Serialize(f, &x_gap_invalid, 2) || !tesseract::Serialize(f, &kern_size)) {
206 return false;
207 }
208 } else {
209 if (!tesseract::Serialize(f, &fs->x_gap_before) ||
210 !tesseract::Serialize(f, &fs->x_gap_after) || !tesseract::Serialize(f, &kern_size)) {
211 return false;
212 }
213 }
214 if (kern_size > 0 &&
215 (!Serialize(f, fs->kerned_unichar_ids) || !Serialize(f, fs->kerned_x_gaps))) {
216 return false;
217 }
218 }
219 return true;
220}

◆ WriteAdaptedClass()

void tesseract::WriteAdaptedClass ( FILE *  File,
ADAPT_CLASS_STRUCT Class,
int  NumConfigs 
)

This routine writes a binary representation of Class to File.

Parameters
Fileopen file to write Class to
Classadapted class to write to File
NumConfigsnumber of configs in Class
Note
Globals: none

Definition at line 307 of file adaptive.cpp.

307 {
308 /* first write high level adapted class structure */
309 fwrite(Class, sizeof(ADAPT_CLASS_STRUCT), 1, File);
310
311 /* then write out the definitions of the permanent protos and configs */
312 fwrite(Class->PermProtos, sizeof(uint32_t), WordsInVectorOfSize(MAX_NUM_PROTOS), File);
313 fwrite(Class->PermConfigs, sizeof(uint32_t), WordsInVectorOfSize(MAX_NUM_CONFIGS), File);
314
315 /* then write out the list of temporary protos */
316 uint32_t NumTempProtos = Class->TempProtos->size();
317 fwrite(&NumTempProtos, sizeof(NumTempProtos), 1, File);
318 auto TempProtos = Class->TempProtos;
319 iterate(TempProtos) {
320 void *proto = TempProtos->node;
321 fwrite(proto, sizeof(TEMP_PROTO_STRUCT), 1, File);
322 }
323
324 /* then write out the adapted configs */
325 fwrite(&NumConfigs, sizeof(int), 1, File);
326 for (int i = 0; i < NumConfigs; i++) {
327 if (test_bit(Class->PermConfigs, i)) {
328 WritePermConfig(File, Class->Config[i].Perm);
329 } else {
330 WriteTempConfig(File, Class->Config[i].Temp);
331 }
332 }
333
334} /* WriteAdaptedClass */
void WritePermConfig(FILE *File, PERM_CONFIG_STRUCT *Config)
Definition: adaptive.cpp:368
void WriteTempConfig(FILE *File, TEMP_CONFIG_STRUCT *Config)
Definition: adaptive.cpp:391

◆ WriteCharDescription()

void tesseract::WriteCharDescription ( const FEATURE_DEFS_STRUCT FeatureDefs,
CHAR_DESC_STRUCT CharDesc,
std::string &  str 
)

Appends a textual representation of CharDesc to str. The format used is to write out the number of feature sets which will be written followed by a representation of each feature set.

Each set starts with the short name for that feature followed by a description of the feature set. Feature sets which are not present are not written.

Parameters
FeatureDefsdefinitions of feature types/extractors
strstring to append CharDesc to
CharDesccharacter description to write to File

Definition at line 109 of file featdefs.cpp.

109 {
110 int NumSetsToWrite = 0;
111
112 for (size_t Type = 0; Type < CharDesc->NumFeatureSets; Type++) {
113 if (CharDesc->FeatureSets[Type]) {
114 NumSetsToWrite++;
115 }
116 }
117
118 str += " " + std::to_string(NumSetsToWrite);
119 str += "\n";
120 for (size_t Type = 0; Type < CharDesc->NumFeatureSets; Type++) {
121 if (CharDesc->FeatureSets[Type]) {
122 str += FeatureDefs.FeatureDesc[Type]->ShortName;
123 str += " ";
124 WriteFeatureSet(CharDesc->FeatureSets[Type], str);
125 }
126 }
127} /* WriteCharDescription */
void WriteFeatureSet(FEATURE_SET FeatureSet, std::string &str)

◆ WriteFeatureSet()

void tesseract::WriteFeatureSet ( FEATURE_SET  FeatureSet,
std::string &  str 
)

Write a textual representation of FeatureSet to File. This representation is an integer specifying the number of features in the set, followed by a newline, followed by text representations for each feature in the set.

Parameters
FeatureSetfeature set to write to File
strstring to write Feature to

Definition at line 129 of file ocrfeatures.cpp.

129 {
130 if (FeatureSet) {
131 str += "" + std::to_string(FeatureSet->NumFeatures);
132 str += "\n";
133 for (int i = 0; i < FeatureSet->NumFeatures; i++) {
134 WriteFeature(FeatureSet->Features[i], str);
135 }
136 }
137} /* WriteFeatureSet */

◆ WriteFile()

TESS_UNICHARSET_TRAINING_API bool tesseract::WriteFile ( const std::string &  output_dir,
const std::string &  lang,
const std::string &  suffix,
const std::vector< char > &  data,
FileWriter  writer 
)

Definition at line 40 of file lang_model_helpers.cpp.

41 {
42 if (lang.empty()) {
43 return true;
44 }
45 std::string dirname = output_dir + "/" + lang;
46 // Attempt to make the directory, but ignore errors, as it may not be a
47 // standard filesystem, and the writer will complain if not successful.
48#if defined(_WIN32)
49 _mkdir(dirname.c_str());
50#else
51 mkdir(dirname.c_str(), S_IRWXU | S_IRWXG);
52#endif
53 std::string filename = dirname + "/" + lang + suffix;
54 if (writer == nullptr) {
55 return SaveDataToFile(data, filename.c_str());
56 } else {
57 return (*writer)(data, filename.c_str());
58 }
59}
bool SaveDataToFile(const GenericVector< char > &data, const char *filename)

◆ WriteParamDesc()

TESS_API void tesseract::WriteParamDesc ( FILE *  File,
uint16_t  N,
const PARAM_DESC  ParamDesc[] 
)

This routine writes an array of dimension descriptors to the specified text file.

Parameters
Fileopen text file to write param descriptors to
Nnumber of param descriptors to write
ParamDescarray of param descriptors to write

Definition at line 244 of file clusttool.cpp.

244 {
245 int i;
246
247 for (i = 0; i < N; i++) {
248 if (ParamDesc[i].Circular) {
249 fprintf(File, "circular ");
250 } else {
251 fprintf(File, "linear ");
252 }
253
254 if (ParamDesc[i].NonEssential) {
255 fprintf(File, "non-essential ");
256 } else {
257 fprintf(File, "essential ");
258 }
259
260 fprintf(File, "%10.6f %10.6f\n", ParamDesc[i].Min, ParamDesc[i].Max);
261 }
262}

◆ WritePermConfig()

void tesseract::WritePermConfig ( FILE *  File,
PERM_CONFIG_STRUCT Config 
)

This routine writes a binary representation of a permanent configuration to File.

Parameters
Fileopen file to write Config to
Configpermanent config to write to File
Note
Globals: none

Definition at line 368 of file adaptive.cpp.

368 {
369 uint8_t NumAmbigs = 0;
370
371 assert(Config != nullptr);
372 while (Config->Ambigs[NumAmbigs] > 0) {
373 ++NumAmbigs;
374 }
375
376 fwrite(&NumAmbigs, sizeof(uint8_t), 1, File);
377 fwrite(Config->Ambigs, sizeof(UNICHAR_ID), NumAmbigs, File);
378 fwrite(&(Config->FontinfoId), sizeof(int), 1, File);
379} /* WritePermConfig */

◆ WritePrototype()

TESS_API void tesseract::WritePrototype ( FILE *  File,
uint16_t  N,
PROTOTYPE Proto 
)

This routine writes a textual description of a prototype to the specified text file.

Parameters
Fileopen text file to write prototype to
Nnumber of dimensions in feature space
Protoprototype to write out

Definition at line 271 of file clusttool.cpp.

271 {
272 int i;
273
274 if (Proto->Significant) {
275 fprintf(File, "significant ");
276 } else {
277 fprintf(File, "insignificant ");
278 }
279 WriteProtoStyle(File, static_cast<PROTOSTYLE>(Proto->Style));
280 fprintf(File, "%6d\n\t", Proto->NumSamples);
281 WriteNFloats(File, N, &Proto->Mean[0]);
282 fprintf(File, "\t");
283
284 switch (Proto->Style) {
285 case spherical:
286 WriteNFloats(File, 1, &(Proto->Variance.Spherical));
287 break;
288 case elliptical:
289 WriteNFloats(File, N, Proto->Variance.Elliptical);
290 break;
291 case mixed:
292 for (i = 0; i < N; i++) {
293 switch (Proto->Distrib[i]) {
294 case normal:
295 fprintf(File, " %9s", "normal");
296 break;
297 case uniform:
298 fprintf(File, " %9s", "uniform");
299 break;
300 case D_random:
301 fprintf(File, " %9s", "random");
302 break;
304 ASSERT_HOST(!"Distribution count not allowed!");
305 }
306 }
307 fprintf(File, "\n\t");
308 WriteNFloats(File, N, Proto->Variance.Elliptical);
309 }
310}

◆ WriteRecoder()

bool tesseract::WriteRecoder ( const UNICHARSET unicharset,
bool  pass_through,
const std::string &  output_dir,
const std::string &  lang,
FileWriter  writer,
std::string *  radical_table_data,
TessdataManager traineddata 
)

Definition at line 97 of file lang_model_helpers.cpp.

99 {
100 UnicharCompress recoder;
101 // Where the unicharset is carefully setup already to contain a good
102 // compact encoding, use a pass-through recoder that does nothing.
103 // For scripts that have a large number of unicodes (Han, Hangul) we want
104 // to use the recoder to compress the symbol space by re-encoding each
105 // unicode as multiple codes from a smaller 'alphabet' that are related to the
106 // shapes in the character. Hangul Jamo is a perfect example of this.
107 // See the Hangul Syllables section, sub-section "Equivalence" in:
108 // http://www.unicode.org/versions/Unicode10.0.0/ch18.pdf
109 if (pass_through) {
110 recoder.SetupPassThrough(unicharset);
111 } else {
112 int null_char = unicharset.has_special_codes() ? UNICHAR_BROKEN : unicharset.size();
113 tprintf("Null char=%d\n", null_char);
114 if (!recoder.ComputeEncoding(unicharset, null_char, radical_table_data)) {
115 tprintf("Creation of encoded unicharset failed!!\n");
116 return false;
117 }
118 }
119 TFile fp;
120 std::vector<char> recoder_data;
121 fp.OpenWrite(&recoder_data);
122 if (!recoder.Serialize(&fp)) {
123 return false;
124 }
125 traineddata->OverwriteEntry(TESSDATA_LSTM_RECODER, &recoder_data[0], recoder_data.size());
126 std::string encoding = recoder.GetEncodingAsString(unicharset);
127 recoder_data.resize(encoding.length(), 0);
128 memcpy(&recoder_data[0], &encoding[0], encoding.length());
129 std::string suffix;
130 suffix += ".charset_size=" + std::to_string(recoder.code_range());
131 suffix += ".txt";
132 return WriteFile(output_dir, lang, suffix.c_str(), recoder_data, writer);
133}
std::string GetEncodingAsString(const UNICHARSET &unicharset) const
void SetupPassThrough(const UNICHARSET &unicharset)
bool ComputeEncoding(const UNICHARSET &unicharset, int null_id, std::string *radical_stroke_table)
bool Serialize(TFile *fp) const
bool has_special_codes() const
Definition: unicharset.h:756

◆ WriteShapeTable()

TESS_COMMON_TRAINING_API void tesseract::WriteShapeTable ( const std::string &  file_prefix,
const ShapeTable shape_table 
)

Definition at line 170 of file commontraining.cpp.

170 {
171 std::string shape_table_file = file_prefix;
172 shape_table_file += kShapeTableFileSuffix;
173 FILE *fp = fopen(shape_table_file.c_str(), "wb");
174 if (fp != nullptr) {
175 if (!shape_table.Serialize(fp)) {
176 fprintf(stderr, "Error writing shape table: %s\n", shape_table_file.c_str());
177 }
178 fclose(fp);
179 } else {
180 fprintf(stderr, "Error creating shape table: %s\n", shape_table_file.c_str());
181 }
182}
bool Serialize(FILE *fp) const
Definition: shapetable.cpp:250

◆ WriteTempConfig()

void tesseract::WriteTempConfig ( FILE *  File,
TEMP_CONFIG_STRUCT Config 
)

This routine writes a binary representation of a temporary configuration to File.

Parameters
Fileopen file to write Config to
Configtemporary config to write to File
Note
Globals: none

Definition at line 391 of file adaptive.cpp.

391 {
392 assert(Config != nullptr);
393
394 fwrite(Config, sizeof(TEMP_CONFIG_STRUCT), 1, File);
395 fwrite(Config->Protos, sizeof(uint32_t), Config->ProtoVectorSize, File);
396
397} /* WriteTempConfig */

◆ WriteTrainingSamples()

void tesseract::WriteTrainingSamples ( const tesseract::FEATURE_DEFS_STRUCT FeatureDefs,
char *  Directory,
tesseract::LIST  CharList,
const char *  program_feature_type 
)

◆ WriteUnicharset()

bool tesseract::WriteUnicharset ( const UNICHARSET unicharset,
const std::string &  output_dir,
const std::string &  lang,
FileWriter  writer,
TessdataManager traineddata 
)

Definition at line 82 of file lang_model_helpers.cpp.

83 {
84 std::vector<char> unicharset_data;
85 TFile fp;
86 fp.OpenWrite(&unicharset_data);
87 if (!unicharset.save_to_file(&fp)) {
88 return false;
89 }
90 traineddata->OverwriteEntry(TESSDATA_LSTM_UNICHARSET, &unicharset_data[0],
91 unicharset_data.size());
92 return WriteFile(output_dir, lang, ".unicharset", unicharset_data, writer);
93}

◆ ZeroVector()

template<typename T >
void tesseract::ZeroVector ( unsigned  n,
T *  vec 
)
inline

Definition at line 245 of file functions.h.

245 {
246 memset(vec, 0, n * sizeof(*vec));
247}

Variable Documentation

◆ _TFNetworkModel_default_instance_

TFNetworkModelDefaultTypeInternal tesseract::_TFNetworkModel_default_instance_

Definition at line 52 of file tfnetwork.pb.h.

◆ blob_window

ScrollView * tesseract::blob_window = nullptr

Definition at line 36 of file render.cpp.

◆ case_state_table

const int tesseract::case_state_table[6][4]
Initial value:
= {
{
0, 1, 5, 4},
{
0, 3, 2, 4},
{
0, -1, 2, -1},
{
0, 3, -1, 4},
{
0, -1, -1, 4},
{
5, -1, 2, -1},
}

Definition at line 28 of file context.cpp.

◆ CharNormDesc

const FEATURE_DESC_STRUCT tesseract::CharNormDesc
extern

◆ classify_max_slope

double tesseract::classify_max_slope = 2.414213562

"Slope above which lines are called vertical"

Definition at line 31 of file mfx.cpp.

◆ classify_min_slope

double tesseract::classify_min_slope = 0.414213562

"Slope below which lines are called horizontal"

Definition at line 30 of file mfx.cpp.

◆ classify_norm_adj_curl

double tesseract::classify_norm_adj_curl = 2.0

"Norm adjust curl ..."

Definition at line 74 of file normmatch.cpp.

◆ classify_norm_adj_midpoint

double tesseract::classify_norm_adj_midpoint = 32.0

control knobs used to control the normalization adjustment process "Norm adjust midpoint ..."

Definition at line 73 of file normmatch.cpp.

◆ classify_pico_feature_length

double tesseract::classify_pico_feature_length = 0.05

"Pico Feature Length"

Definition at line 37 of file picofeat.cpp.

◆ color_list

ScrollView::Color tesseract::color_list
Initial value:
= {ScrollView::RED, ScrollView::CYAN, ScrollView::YELLOW,
ScrollView::BLUE, ScrollView::GREEN, ScrollView::WHITE}

Definition at line 38 of file render.cpp.

◆ Config

TESS_COMMON_TRAINING_API CLUSTERCONFIG tesseract::Config = {elliptical, 0.625, 0.05, 1.0, 1e-6, 0}

Definition at line 89 of file commontraining.cpp.

◆ current_block_list

BLOCK_LIST* tesseract::current_block_list
extern

◆ devanagari_split_debugimage

bool tesseract::devanagari_split_debugimage = 0

"Whether to create a debug image for split shiro-rekha process."

Definition at line 39 of file devanagari_processing.cpp.

◆ devanagari_split_debuglevel

int tesseract::devanagari_split_debuglevel = 0

"Debug level for split shiro-rekha process."

Definition at line 36 of file devanagari_processing.cpp.

◆ DotProduct

DotProductFunction tesseract::DotProduct

Definition at line 80 of file simddetect.cpp.

◆ edge_window

ScrollView * tesseract::edge_window = nullptr

Definition at line 37 of file plotedges.cpp.

◆ editor_image_blob_bb_color

int tesseract::editor_image_blob_bb_color = ScrollView::YELLOW

"Blob bounding box colour"

Definition at line 127 of file pgedit.cpp.

◆ editor_image_win_name

char* tesseract::editor_image_win_name = "EditorImage"

"Editor image window name"

Definition at line 122 of file pgedit.cpp.

◆ editor_image_word_bb_color

int tesseract::editor_image_word_bb_color = ScrollView::BLUE

"Word bounding box colour"

Definition at line 126 of file pgedit.cpp.

◆ editor_image_xpos

int tesseract::editor_image_xpos = 590

"Editor image X Pos"

Definition at line 123 of file pgedit.cpp.

◆ editor_image_ypos

int tesseract::editor_image_ypos = 10

"Editor image Y Pos"

Definition at line 124 of file pgedit.cpp.

◆ editor_word_height

int tesseract::editor_word_height = 240

"Word window height"

Definition at line 132 of file pgedit.cpp.

◆ editor_word_name

char* tesseract::editor_word_name = "BlnWords"

"BL normalized word window"

Definition at line 129 of file pgedit.cpp.

◆ editor_word_width

int tesseract::editor_word_width = 655

"Word window width"

Definition at line 133 of file pgedit.cpp.

◆ editor_word_xpos

int tesseract::editor_word_xpos = 60

"Word window X Pos"

Definition at line 130 of file pgedit.cpp.

◆ editor_word_ypos

int tesseract::editor_word_ypos = 510

"Word window Y Pos"

Definition at line 131 of file pgedit.cpp.

◆ feature_defs

TESS_COMMON_TRAINING_API FEATURE_DEFS_STRUCT tesseract::feature_defs

Definition at line 90 of file commontraining.cpp.

◆ FTable

const double tesseract::FTable[FTABLE_Y][FTABLE_X]

Definition at line 41 of file cluster.cpp.

◆ fx_win

ScrollView * tesseract::fx_win = nullptr

Definition at line 42 of file drawfx.cpp.

◆ gapmap_big_gaps

double tesseract::gapmap_big_gaps = 1.75

"xht multiplier"

Definition at line 20 of file gap_map.cpp.

◆ gapmap_debug

bool tesseract::gapmap_debug = false

"Say which blocks have tables"

Definition at line 17 of file gap_map.cpp.

◆ gapmap_no_isolated_quanta

bool tesseract::gapmap_no_isolated_quanta = false

"Ensure gaps not less than 2quanta wide"

Definition at line 19 of file gap_map.cpp.

◆ gapmap_use_ends

bool tesseract::gapmap_use_ends = false

"Use large space at start and end of rows"

Definition at line 18 of file gap_map.cpp.

◆ GeoFeatDesc

const FEATURE_DESC_STRUCT tesseract::GeoFeatDesc
extern

◆ IntFeatDesc

const FEATURE_DESC_STRUCT tesseract::IntFeatDesc
extern

◆ kAdamCorrectionIterations

const int tesseract::kAdamCorrectionIterations = 200000

Definition at line 35 of file weightmatrix.cpp.

◆ kAdamEpsilon

const TFloat tesseract::kAdamEpsilon = 1e-8

Definition at line 37 of file weightmatrix.cpp.

◆ kAdamFlag

const int tesseract::kAdamFlag = 4

Definition at line 231 of file weightmatrix.cpp.

◆ kAdjacentLeaderSearchPadding

const int tesseract::kAdjacentLeaderSearchPadding = 2

Definition at line 117 of file tablefind.cpp.

◆ kAlignedFraction

const double tesseract::kAlignedFraction = 0.03125

Definition at line 46 of file alignedblob.cpp.

◆ kAlignedGapFraction

const double tesseract::kAlignedGapFraction = 0.75

Definition at line 50 of file alignedblob.cpp.

◆ kAllowBlobArea

const double tesseract::kAllowBlobArea = 0.05

Definition at line 58 of file tablefind.cpp.

◆ kAllowBlobHeight

const double tesseract::kAllowBlobHeight = 0.3

Definition at line 56 of file tablefind.cpp.

◆ kAllowBlobWidth

const double tesseract::kAllowBlobWidth = 0.4

Definition at line 57 of file tablefind.cpp.

◆ kAllowTextArea

const double tesseract::kAllowTextArea = 0.8

Definition at line 51 of file tablefind.cpp.

◆ kAllowTextHeight

const double tesseract::kAllowTextHeight = 0.5

Definition at line 49 of file tablefind.cpp.

◆ kAllowTextWidth

const double tesseract::kAllowTextWidth = 0.6

Definition at line 50 of file tablefind.cpp.

◆ kArabicText

const char tesseract::kArabicText = "والفكر والصراع 1234,\nوالفكر والصراع"

Definition at line 40 of file pango_font_info_test.cc.

◆ kBadlyFormedHinWords

const char * tesseract::kBadlyFormedHinWords = {"उपयोक्ताो", "नहीें", "प्रंात", "कहीअे", "पत्रिाका", "छह्णाीस"}

Definition at line 85 of file normstrngs_test.cc.

◆ kBadlyFormedThaiWords

const char* tesseract::kBadlyFormedThaiWords[] = {"ฤิ", "กา้ํ", "กิำ", "นำ้", "เเก"}

Definition at line 87 of file normstrngs_test.cc.

◆ kBadPriority

const double tesseract::kBadPriority = 999.0

Definition at line 39 of file split.cpp.

◆ kBatchIterations

const int tesseract::kBatchIterations = 100

Definition at line 36 of file lstm_test.h.

◆ kBestCheckpointFraction

const double tesseract::kBestCheckpointFraction = 31.0 / 32.0

Definition at line 70 of file lstmtrainer.cpp.

◆ kBigPartSizeRatio

const double tesseract::kBigPartSizeRatio = 1.75

Definition at line 47 of file colpartitiongrid.cpp.

◆ kBlameAdaption

const char tesseract::kBlameAdaption[] = "adapt"

Definition at line 46 of file blamer.cpp.

◆ kBlameChopper

const char tesseract::kBlameChopper[] = "chop"

Definition at line 40 of file blamer.cpp.

◆ kBlameClassifier

const char tesseract::kBlameClassifier[] = "cl"

Definition at line 39 of file blamer.cpp.

◆ kBlameClassLMTradeoff

const char tesseract::kBlameClassLMTradeoff[] = "cl/LM"

Definition at line 41 of file blamer.cpp.

◆ kBlameClassOldLMTradeoff

const char tesseract::kBlameClassOldLMTradeoff[] = "cl/old_LM"

Definition at line 45 of file blamer.cpp.

◆ kBlameCorrect

const char tesseract::kBlameCorrect[] = "corr"

Definition at line 38 of file blamer.cpp.

◆ kBlameNoTruth

const char tesseract::kBlameNoTruth[] = "no_tr"

Definition at line 48 of file blamer.cpp.

◆ kBlameNoTruthSplit

const char tesseract::kBlameNoTruthSplit[] = "no_tr_spl"

Definition at line 47 of file blamer.cpp.

◆ kBlamePageLayout

const char tesseract::kBlamePageLayout[] = "pglt"

Definition at line 42 of file blamer.cpp.

◆ kBlameSegsearchHeur

const char tesseract::kBlameSegsearchHeur[] = "ss_heur"

Definition at line 43 of file blamer.cpp.

◆ kBlameSegsearchPP

const char tesseract::kBlameSegsearchPP[] = "ss_pp"

Definition at line 44 of file blamer.cpp.

◆ kBlameUnknown

const char tesseract::kBlameUnknown[] = "unkn"

Definition at line 49 of file blamer.cpp.

◆ kBlnBaselineOffset

const int tesseract::kBlnBaselineOffset = 64

Definition at line 34 of file normalis.h.

◆ kBlnCellHeight

const int tesseract::kBlnCellHeight = 256

Definition at line 32 of file normalis.h.

◆ kBlnXHeight

const int tesseract::kBlnXHeight = 128

Definition at line 33 of file normalis.h.

◆ kBlocks8087_054

const PolyBlockType tesseract::kBlocks8087_054[]

◆ kBoxClipTolerance

const int tesseract::kBoxClipTolerance = 2

Definition at line 31 of file boxword.cpp.

◆ kBoxReadBufSize

const int tesseract::kBoxReadBufSize = 1024

Definition at line 33 of file boxread.h.

◆ kBrokenCJKIterationFraction

const double tesseract::kBrokenCJKIterationFraction = 0.125

Definition at line 72 of file strokewidth.cpp.

◆ kBytesPer64BitNumber

const int tesseract::kBytesPer64BitNumber = 20

Max bytes in the decimal representation of int64_t.

Definition at line 1537 of file baseapi.cpp.

◆ kBytesPerBoxFileLine

const int tesseract::kBytesPerBoxFileLine = (kBytesPerNumber + 1) * kNumbersPerBlob + 1

Multiplier for max expected textlength assumes (kBytesPerNumber + space)

  • kNumbersPerBlob plus the newline. Add to this the original UTF8 characters, and one kMaxBytesPerLine for safety.

Definition at line 1535 of file baseapi.cpp.

◆ kBytesPerNumber

const int tesseract::kBytesPerNumber = 5

The number of bytes taken by each number. Since we use int16_t for ICOORD, assume only 5 digits max.

Definition at line 1529 of file baseapi.cpp.

◆ kCellSplitColumnThreshold

const int tesseract::kCellSplitColumnThreshold = 0

Definition at line 41 of file tablerecog.cpp.

◆ kCellSplitRowThreshold

const int tesseract::kCellSplitRowThreshold = 0

Definition at line 40 of file tablerecog.cpp.

◆ kCenter

const ParagraphJustification tesseract::kCenter = JUSTIFICATION_CENTER

Definition at line 27 of file paragraphs_test.cc.

◆ kCenterGradeCap

const int tesseract::kCenterGradeCap = 25

Definition at line 37 of file split.cpp.

◆ kCertaintyScale

const float tesseract::kCertaintyScale = 7.0f

Definition at line 33 of file linerec.cpp.

◆ kCertOffset

const double tesseract::kCertOffset = -0.085

Definition at line 48 of file lstmrecognizer.cpp.

◆ kCJKAspectRatio

const double tesseract::kCJKAspectRatio = 1.25

Definition at line 66 of file strokewidth.cpp.

◆ kCJKAspectRatioIncrease

const double tesseract::kCJKAspectRatioIncrease = 1.0625

Definition at line 68 of file strokewidth.cpp.

◆ kCJKBrokenDistanceFraction

const double tesseract::kCJKBrokenDistanceFraction = 0.25

Definition at line 62 of file strokewidth.cpp.

◆ kCJKMaxComponents

const int tesseract::kCJKMaxComponents = 8

Definition at line 64 of file strokewidth.cpp.

◆ kCJKRadius

const int tesseract::kCJKRadius = 2

Definition at line 60 of file strokewidth.cpp.

◆ kCNFeatureType

TESS_API const char *const tesseract::kCNFeatureType = "cn"

Definition at line 34 of file featdefs.cpp.

◆ kColumnWidthFactor

const int tesseract::kColumnWidthFactor = 20

Pixel resolution of column width estimates.

Definition at line 41 of file tabfind.h.

◆ kComplexPage1

const TextAndModel tesseract::kComplexPage1[]

Definition at line 368 of file paragraphs_test.cc.

◆ kComplexPage2

const TextAndModel tesseract::kComplexPage2[]

Definition at line 413 of file paragraphs_test.cc.

◆ kComplexShapePerimeterRatio

const double tesseract::kComplexShapePerimeterRatio = 1.5

Definition at line 48 of file blobbox.cpp.

◆ kCosMaxSkewAngle

const double tesseract::kCosMaxSkewAngle = 0.866025

Definition at line 60 of file tabfind.cpp.

◆ kCosSmallAngle

const double tesseract::kCosSmallAngle = 0.866

Definition at line 44 of file blobbox.cpp.

◆ kCrackSpacing

const int tesseract::kCrackSpacing = 100

Spacing of cracks across the page to break up tall vertical lines.

Definition at line 43 of file linefind.cpp.

◆ kCrownedParagraph

const TextAndModel tesseract::kCrownedParagraph[]
Initial value:
= {
{"The first paragraph on a page is", PSTART, PModel(kLeft, 0, 20, 0, 0), true, false},
{"often not indented as the rest ", PCONT, PModel(), false, false},
{"of the paragraphs are. Nonethe-", PCONT, PModel(), false, false},
{"less it should be counted as the", PCONT, PModel(), false, false},
{"same type of paragraph. ", PCONT, PModel(), false, false},
{" The second and third para- ", PSTART, PModel(kLeft, 0, 20, 0, 0), false, false},
{"graphs are both indented two ", PCONT, PModel(), false, false},
{"spaces. ", PCONT, PModel(), false, false},
{" The first paragraph has what ", PSTART, PModel(kLeft, 0, 20, 0, 0), false, false},
{"fmt refers to as a 'crown.' ", PCONT, PModel(), false, false},
}
ParagraphModel PModel

Definition at line 262 of file paragraphs_test.cc.

◆ kCrownLeft

const ParagraphModel * tesseract::kCrownLeft
Initial value:
=
reinterpret_cast<ParagraphModel *>(static_cast<uintptr_t>(0xDEAD111F))

Definition at line 56 of file paragraphs.cpp.

◆ kCrownRight

const ParagraphModel * tesseract::kCrownRight
Initial value:
=
reinterpret_cast<ParagraphModel *>(static_cast<uintptr_t>(0xDEAD888F))

Definition at line 58 of file paragraphs.cpp.

◆ kDefaultResolution

const int tesseract::kDefaultResolution = 300

Definition at line 58 of file pango_font_info.cpp.

◆ kDefiniteAspectRatio

const double tesseract::kDefiniteAspectRatio = 2.0

Definition at line 46 of file blobbox.cpp.

◆ kDiacriticXPadRatio

const double tesseract::kDiacriticXPadRatio = 7.0

Definition at line 75 of file strokewidth.cpp.

◆ kDiacriticYPadRatio

const double tesseract::kDiacriticYPadRatio = 1.75

Definition at line 78 of file strokewidth.cpp.

◆ kDictRatio

const double tesseract::kDictRatio = 2.25

Definition at line 46 of file lstmrecognizer.cpp.

◆ kDivisibleVerticalItalic

const TPOINT tesseract::kDivisibleVerticalItalic(1, 5) ( ,
 
)

◆ kDivisibleVerticalUpright

const TPOINT tesseract::kDivisibleVerticalUpright(0, 1) ( ,
 
)

◆ kDoNotReverse

const char tesseract::kDoNotReverse[] = "RRP_DO_NO_REVERSE"

Definition at line 32 of file trie.cpp.

◆ kDoubleFlag

const int tesseract::kDoubleFlag = 128

Definition at line 235 of file weightmatrix.cpp.

◆ kEngLigatureText

const char tesseract::kEngLigatureText[] = "fidelity"

Definition at line 39 of file stringrenderer_test.cc.

◆ kEngNonLigatureText

const char tesseract::kEngNonLigatureText[] = "fidelity"

Definition at line 37 of file stringrenderer_test.cc.

◆ kEngText

const char tesseract::kEngText = "the quick brown fox jumps over the lazy dog"

Definition at line 81 of file normstrngs_test.cc.

◆ kErrClip

const TFloat tesseract::kErrClip = 1.0f

Definition at line 73 of file lstm.cpp.

◆ kErrorGraphInterval

const int tesseract::kErrorGraphInterval = 1000

Definition at line 58 of file lstmtrainer.cpp.

◆ kExpectedFontNames

const char* tesseract::kExpectedFontNames[]
Initial value:
= {"Arab",
"Arial Bold Italic",
"DejaVu Sans Ultra-Light",
"Lohit Hindi",
"Times New Roman",
"UnBatang",
"Verdana"}

Definition at line 27 of file pango_font_info_test.cc.

◆ kExposureFactor

const int tesseract::kExposureFactor = 16

Definition at line 56 of file degradeimage.cpp.

◆ kFeaturePadding

const int tesseract::kFeaturePadding = 2

Definition at line 37 of file imagedata.h.

◆ kFewCluesWithCrown

const TextAndModel tesseract::kFewCluesWithCrown[]
Initial value:
= {
{"This paragraph starts at the top", PSTART, PModel(kLeft, 0, 20, 0, 0), true, false},
{"of the page and takes two lines.", PCONT, PModel(), false, false},
{" Here I have a second paragraph", PSTART, PModel(kLeft, 0, 20, 0, 0), false, false},
{"which indicates that the first ", PCONT, PModel(), false, false},
{"paragraph is a continuation from", PCONT, PModel(), false, false},
{"a previous page, as it is ", PCONT, PModel(), false, false},
{"indented just like this second ", PCONT, PModel(), false, false},
{"paragraph. ", PCONT, PModel(), false, false},
}

Definition at line 247 of file paragraphs_test.cc.

◆ kFinalPixelTolerance

const float tesseract::kFinalPixelTolerance = 0.125f

Definition at line 37 of file normalis.cpp.

◆ kFlushLeftParagraphs

const TextAndModel tesseract::kFlushLeftParagraphs[]
Initial value:
= {
{"It is sometimes the case that", PSTART, PModel(kLeft, 0, 0, 0, 0), false, false},
{"flush left paragraphs (those", PCONT, PModel(), false, false},
{"with no body indent) are not", PCONT, PModel(), false, false},
{"actually crowns. ", PCONT, PModel(), false, false},
{"Instead, further paragraphs are", PSTART, PModel(kLeft, 0, 0, 0, 0), false, false},
{"also flush left aligned. Usual-", PCONT, PModel(), false, false},
{"ly, these paragraphs are set", PCONT, PModel(), false, false},
{"apart vertically by some white-", PCONT, PModel(), false, false},
{"space, but you can also detect", PCONT, PModel(), false, false},
{"them by observing the big empty", PCONT, PModel(), false, false},
{"space at the ends of the para-", PCONT, PModel(), false, false},
{"graphs. ", PCONT, PModel(), false, false},
}

Definition at line 279 of file paragraphs_test.cc.

◆ kFontMergeDistance

const float tesseract::kFontMergeDistance = 0.025

Definition at line 50 of file mastertrainer.cpp.

◆ kForceReverse

const char tesseract::kForceReverse[] = "RRP_FORCE_REVERSE"

Definition at line 34 of file trie.cpp.

◆ kGeoFeatureType

TESS_API const char *const tesseract::kGeoFeatureType = "tb"

Definition at line 36 of file featdefs.cpp.

◆ kGoodRowNumberOfColumnsLarge

const double tesseract::kGoodRowNumberOfColumnsLarge = 0.7

Definition at line 57 of file tablerecog.cpp.

◆ kGoodRowNumberOfColumnsSmall

const double tesseract::kGoodRowNumberOfColumnsSmall[] = {2, 2, 2, 2, 2, 3, 3}

Definition at line 55 of file tablerecog.cpp.

◆ kGutterMultiple

const int tesseract::kGutterMultiple = 4

Definition at line 36 of file tabvector.cpp.

◆ kGutterToNeighbourRatio

const int tesseract::kGutterToNeighbourRatio = 3

Definition at line 38 of file tabvector.cpp.

◆ kGWR2nds

const char* tesseract::kGWR2nds[]
Initial value:
= {"C", "c", "t", "", "S", "", "W", "O", "t", "h",
"S", " ", "t", "I", "9", "b", "f", ",", nullptr}

Definition at line 43 of file recodebeam_test.cc.

◆ kGWR2ndScores

const float tesseract::kGWR2ndScores[]
Initial value:
= {0.01, 0.10, 0.12, 0.42, 0.01, 0.25, 0.10, 0.01, 0.01,
0.01, 0.01, 0.05, 0.01, 0.09, 0.09, 0.09, 0.05, 0.25}

Definition at line 45 of file recodebeam_test.cc.

◆ kGWRTops

const char* tesseract::kGWRTops[]
Initial value:
= {"G", "e", "f", " ", "s", " ", "w", "o", "r", "d",
"s", "", "r", "i", "g", "h", "t", ".", nullptr}

Definition at line 39 of file recodebeam_test.cc.

◆ kGWRTopScores

const float tesseract::kGWRTopScores[]
Initial value:
= {0.99, 0.85, 0.87, 0.55, 0.99, 0.65, 0.89, 0.99, 0.99,
0.99, 0.99, 0.95, 0.99, 0.90, 0.90, 0.90, 0.95, 0.75}

Definition at line 41 of file recodebeam_test.cc.

◆ kHanRatioInJapanese

const float tesseract::kHanRatioInJapanese = 0.3

Definition at line 47 of file osdetect.cpp.

◆ kHanRatioInKorean

const float tesseract::kHanRatioInKorean = 0.7

Definition at line 46 of file osdetect.cpp.

◆ kHighConfidence

const double tesseract::kHighConfidence = 0.9375

Definition at line 66 of file lstmtrainer.cpp.

◆ kHinText

const char tesseract::kHinText = "पिताने विवाह की | हो गई उद्विग्न वह सोचा"

Definition at line 82 of file normstrngs_test.cc.

◆ kHistogramBuckets

const int tesseract::kHistogramBuckets = 16

Definition at line 517 of file weightmatrix.cpp.

◆ kHistogramSize

const int tesseract::kHistogramSize = 256

Definition at line 30 of file otsuthr.h.

◆ kHorizontalGapMergeFraction

const double tesseract::kHorizontalGapMergeFraction = 0.5

Definition at line 51 of file colfind.cpp.

◆ kHorizontalSpacing

const double tesseract::kHorizontalSpacing = 0.30

Definition at line 34 of file tablerecog.cpp.

◆ kHorzStrongTextlineAspect

const int tesseract::kHorzStrongTextlineAspect = 5

Definition at line 77 of file colpartition.cpp.

◆ kHorzStrongTextlineCount

const int tesseract::kHorzStrongTextlineCount = 8

Definition at line 73 of file colpartition.cpp.

◆ kHorzStrongTextlineHeight

const int tesseract::kHorzStrongTextlineHeight = 10

Definition at line 75 of file colpartition.cpp.

◆ kImagePadding

const int tesseract::kImagePadding = 4

Definition at line 39 of file imagedata.h.

◆ kImprovementFraction

const double tesseract::kImprovementFraction = 15.0 / 16.0

Definition at line 68 of file lstmtrainer.cpp.

◆ kIncorrectResultReasonNames

const char* const tesseract::kIncorrectResultReasonNames[]
Initial value:
= {
const char kBlameNoTruthSplit[]
Definition: blamer.cpp:47
const char kBlameSegsearchHeur[]
Definition: blamer.cpp:43
const char kBlameChopper[]
Definition: blamer.cpp:40
const char kBlameUnknown[]
Definition: blamer.cpp:49
const char kBlamePageLayout[]
Definition: blamer.cpp:42
const char kBlameSegsearchPP[]
Definition: blamer.cpp:44
const char kBlameClassOldLMTradeoff[]
Definition: blamer.cpp:45
const char kBlameNoTruth[]
Definition: blamer.cpp:48
const char kBlameClassifier[]
Definition: blamer.cpp:39
const char kBlameAdaption[]
Definition: blamer.cpp:46
const char kBlameClassLMTradeoff[]
Definition: blamer.cpp:41
const char kBlameCorrect[]
Definition: blamer.cpp:38

Definition at line 51 of file blamer.cpp.

◆ kInfiniteDist

const float tesseract::kInfiniteDist = 999.0f

Definition at line 966 of file mastertrainer.cpp.

◆ kInt8Flag

const int tesseract::kInt8Flag = 1

Definition at line 229 of file weightmatrix.cpp.

◆ kIntFeatureType

TESS_API const char *const tesseract::kIntFeatureType = "if"

Definition at line 35 of file featdefs.cpp.

◆ kKorText

const char tesseract::kKorText = "이는 것으로"

Definition at line 83 of file normstrngs_test.cc.

◆ kLargeTableProjectionThreshold

const double tesseract::kLargeTableProjectionThreshold = 0.45

Definition at line 107 of file tablefind.cpp.

◆ kLargeTableRowCount

const int tesseract::kLargeTableRowCount = 6

Definition at line 109 of file tablefind.cpp.

◆ kLatinChs

const int tesseract::kLatinChs[] = {0x00a2, 0x0022, 0x0022, 0x0027, 0x0027, 0x00b7, 0x002d, 0}

Latin chars corresponding to the unicode chars above.

Definition at line 1594 of file baseapi.cpp.

◆ kLearningRateDecay

const double tesseract::kLearningRateDecay = M_SQRT1_2

Definition at line 54 of file lstmtrainer.cpp.

◆ kLeft

const ParagraphJustification tesseract::kLeft = JUSTIFICATION_LEFT

Definition at line 26 of file paragraphs_test.cc.

◆ kLeftIndentAlignmentCountTh

const int tesseract::kLeftIndentAlignmentCountTh = 1

Definition at line 83 of file equationdetect.cpp.

◆ kLineCountReciprocal

const double tesseract::kLineCountReciprocal = 4.0

Definition at line 49 of file tabvector.cpp.

◆ kLinedTableMinHorizontalLines

const int tesseract::kLinedTableMinHorizontalLines = 3

Definition at line 44 of file tablerecog.cpp.

◆ kLinedTableMinVerticalLines

const int tesseract::kLinedTableMinVerticalLines = 3

Definition at line 43 of file tablerecog.cpp.

◆ kLineFindGridSize

const int tesseract::kLineFindGridSize = 50

Grid size used by line finder. Not very critical.

Definition at line 45 of file linefind.cpp.

◆ kLineFragmentAspectRatio

const double tesseract::kLineFragmentAspectRatio = 10.0

Definition at line 54 of file tabfind.cpp.

◆ kLineResidueAspectRatio

const double tesseract::kLineResidueAspectRatio = 8.0

Definition at line 99 of file strokewidth.cpp.

◆ kLineResiduePadRatio

const int tesseract::kLineResiduePadRatio = 3

Definition at line 101 of file strokewidth.cpp.

◆ kLineResidueSizeRatio

const double tesseract::kLineResidueSizeRatio = 1.75

Definition at line 103 of file strokewidth.cpp.

◆ kLineTrapLongest

const int tesseract::kLineTrapLongest = 4

Definition at line 92 of file strokewidth.cpp.

◆ kLineTrapShortest

const int tesseract::kLineTrapShortest = 2

Definition at line 94 of file strokewidth.cpp.

◆ kMarginFactor

const double tesseract::kMarginFactor = 1.1

Definition at line 49 of file tablerecog.cpp.

◆ kMarginOverlapFraction

const double tesseract::kMarginOverlapFraction = 0.25

Definition at line 45 of file colpartitiongrid.cpp.

◆ kMathDigitDensityTh1

const float tesseract::kMathDigitDensityTh1 = 0.25

Definition at line 78 of file equationdetect.cpp.

◆ kMathDigitDensityTh2

const float tesseract::kMathDigitDensityTh2 = 0.1

Definition at line 79 of file equationdetect.cpp.

◆ kMathItalicDensityTh

const float tesseract::kMathItalicDensityTh = 0.5

Definition at line 80 of file equationdetect.cpp.

◆ kMaxAmbigStringSize

const int tesseract::kMaxAmbigStringSize = UNICHAR_LEN * (MAX_AMBIG_SIZE + 1)

Definition at line 40 of file ambigs.cpp.

◆ kMaxBaselineDrift

const double tesseract::kMaxBaselineDrift = 0.0625

Definition at line 51 of file ratngs.cpp.

◆ kMaxBaselineError

const double tesseract::kMaxBaselineError = 0.4375

Definition at line 80 of file colpartition.cpp.

◆ kMaxBlobOverlapFactor

const double tesseract::kMaxBlobOverlapFactor = 4.0

Definition at line 77 of file tablefind.cpp.

◆ kMaxBlobWidth

const int tesseract::kMaxBlobWidth = 500

Definition at line 40 of file tablefind.cpp.

◆ kMaxBoxEdgeDiff

const int16_t tesseract::kMaxBoxEdgeDiff = 2

Definition at line 32 of file recogtraining.cpp.

◆ kMaxBoxesInDataPartition

const int tesseract::kMaxBoxesInDataPartition = 20

Definition at line 66 of file tablefind.cpp.

◆ kMaxBytesPerLine

const int tesseract::kMaxBytesPerLine = kNumbersPerBlob * (kBytesPer64BitNumber + 1) + 1 + UNICHAR_LEN

A maximal single box could occupy kNumbersPerBlob numbers at kBytesPer64BitNumber digits (if someone sneaks in a 64 bit value) and a space plus the newline and the maximum length of a UNICHAR. Test against this on each iteration for safety.

Definition at line 1544 of file baseapi.cpp.

◆ kMaxCaptionLines

const int tesseract::kMaxCaptionLines = 7

Definition at line 39 of file colpartitiongrid.cpp.

◆ kMaxCharTopRange

const int tesseract::kMaxCharTopRange = 48

Definition at line 69 of file fixxht.cpp.

◆ kMaxCircleErosions

const int tesseract::kMaxCircleErosions = 8

Definition at line 54 of file pagesegmain.cpp.

◆ kMaxCJKSizeRatio

const int tesseract::kMaxCJKSizeRatio = 5

Definition at line 70 of file strokewidth.cpp.

◆ kMaxColorDistance

const int tesseract::kMaxColorDistance = 900

Definition at line 87 of file colpartition.cpp.

◆ kMaxColumnHeaderDistance

const int tesseract::kMaxColumnHeaderDistance = 4

Definition at line 85 of file tablefind.cpp.

◆ kMaxCredibleResolution

constexpr int tesseract::kMaxCredibleResolution = 2400
constexpr

Maximum believable resolution.

Definition at line 38 of file publictypes.h.

◆ kMaxDiacriticDistanceRatio

const double tesseract::kMaxDiacriticDistanceRatio = 1.25

Definition at line 84 of file strokewidth.cpp.

◆ kMaxDiacriticGapToBaseCharHeight

const double tesseract::kMaxDiacriticGapToBaseCharHeight = 1.0

Definition at line 87 of file strokewidth.cpp.

◆ kMaxDistToPartSizeRatio

const double tesseract::kMaxDistToPartSizeRatio = 1.5

Definition at line 56 of file colfind.cpp.

◆ kMaxDropCapBottom

const int tesseract::kMaxDropCapBottom = -128

Definition at line 43 of file ratngs.cpp.

◆ kMaxFillinMultiple

const int tesseract::kMaxFillinMultiple = 11

Definition at line 45 of file tabvector.cpp.

◆ kMaxGapInTextPartition

const double tesseract::kMaxGapInTextPartition = 4.0

Definition at line 69 of file tablefind.cpp.

◆ kMaxGutterWidthAbsolute

const double tesseract::kMaxGutterWidthAbsolute = 2.00

Definition at line 49 of file tabfind.cpp.

◆ kMaxIncompatibleColumnCount

const int tesseract::kMaxIncompatibleColumnCount = 2

Definition at line 48 of file colfind.cpp.

◆ kMaxInputHeight

const int tesseract::kMaxInputHeight = 48

Definition at line 28 of file input.cpp.

◆ kMaxIntPairSize

const int tesseract::kMaxIntPairSize = 45

Definition at line 47 of file scrollview.cpp.

◆ kMaxLargeOverlapsWithMedium

const int tesseract::kMaxLargeOverlapsWithMedium = 12

Definition at line 44 of file ccnontextdetect.cpp.

◆ kMaxLargeOverlapsWithSmall

const int tesseract::kMaxLargeOverlapsWithSmall = 3

Definition at line 35 of file ccnontextdetect.cpp.

◆ kMaxLeaderGapFractionOfMax

const double tesseract::kMaxLeaderGapFractionOfMax = 0.25

Definition at line 63 of file colpartition.cpp.

◆ kMaxLeaderGapFractionOfMin

const double tesseract::kMaxLeaderGapFractionOfMin = 0.5

Definition at line 65 of file colpartition.cpp.

◆ kMaxLigature

const int tesseract::kMaxLigature = 0xfb17

Definition at line 45 of file ligature_table.cpp.

◆ kMaxLineLength

const int tesseract::kMaxLineLength = 1024

Definition at line 322 of file boxchar.cpp.

◆ kMaxLineResidue

const int tesseract::kMaxLineResidue = 6

Definition at line 51 of file linefind.cpp.

◆ kMaxLineSizeRatio

const double tesseract::kMaxLineSizeRatio = 1.25

Definition at line 60 of file pageres.cpp.

◆ kMaxMediumOverlapsWithSmall

const int tesseract::kMaxMediumOverlapsWithSmall = 12

Definition at line 40 of file ccnontextdetect.cpp.

◆ kMaxMediumSizeRatio

const double tesseract::kMaxMediumSizeRatio = 4.0

Definition at line 52 of file blobbox.cpp.

◆ kMaxMsgSize

const int tesseract::kMaxMsgSize = 4096

Definition at line 46 of file scrollview.cpp.

◆ kMaxNeighbourDistFactor

const int tesseract::kMaxNeighbourDistFactor = 4

Definition at line 37 of file colpartitiongrid.cpp.

◆ kMaxNonLineDensity

const double tesseract::kMaxNonLineDensity = 0.25

Definition at line 56 of file linefind.cpp.

◆ kMaxNumberOfScripts

const int tesseract::kMaxNumberOfScripts = 116 + 1 + 2 + 1

Definition at line 36 of file osdetect.h.

◆ kMaxOffsetDist

const int tesseract::kMaxOffsetDist = 32

Definition at line 31 of file intfeaturemap.cpp.

◆ kMaxOverlapDenominator

const double tesseract::kMaxOverlapDenominator = 0.125

Definition at line 45 of file ratngs.cpp.

◆ kMaxPadFactor

const int tesseract::kMaxPadFactor = 6

Definition at line 34 of file colpartitiongrid.cpp.

◆ kMaxParagraphEndingLeftSpaceMultiple

const double tesseract::kMaxParagraphEndingLeftSpaceMultiple = 3.0

Definition at line 126 of file tablefind.cpp.

◆ kMaxPartitionSpacing

const double tesseract::kMaxPartitionSpacing = 1.75

Definition at line 62 of file colpartitiongrid.cpp.

◆ kMaxPerimeterWidthRatio

const double tesseract::kMaxPerimeterWidthRatio = 8.0

Definition at line 36 of file stepblob.cpp.

◆ kMaxRaggedSearch

const int tesseract::kMaxRaggedSearch = 25

Definition at line 39 of file tabfind.cpp.

◆ kMaxReadAhead

const int tesseract::kMaxReadAhead = 8

Definition at line 42 of file imagedata.cpp.

◆ kMaxRealDistance

const int tesseract::kMaxRealDistance = 2.0

Definition at line 39 of file detlinefit.cpp.

◆ kMaxRectangularFraction

const double tesseract::kMaxRectangularFraction = 0.75

Definition at line 43 of file imagefind.cpp.

◆ kMaxRectangularGradient

const double tesseract::kMaxRectangularGradient = 0.1

Definition at line 46 of file imagefind.cpp.

◆ kMaxRMSColorNoise

const int tesseract::kMaxRMSColorNoise = 128

Definition at line 84 of file colpartition.cpp.

◆ kMaxRowSize

const double tesseract::kMaxRowSize = 2.5

Definition at line 52 of file tablerecog.cpp.

◆ kMaxSameBlockLineSpacing

const double tesseract::kMaxSameBlockLineSpacing = 3

Definition at line 59 of file colpartition.cpp.

◆ kMaxSizeRatio

const double tesseract::kMaxSizeRatio = 1.5

Definition at line 61 of file colpartition.cpp.

◆ kMaxSkewFactor

const int tesseract::kMaxSkewFactor = 15

Definition at line 72 of file alignedblob.cpp.

◆ kMaxSmallNeighboursPerPix

const double tesseract::kMaxSmallNeighboursPerPix = 1.0 / 32

Definition at line 32 of file ccnontextdetect.cpp.

◆ kMaxSpacingDrift

const double tesseract::kMaxSpacingDrift = 1.0 / 72

Definition at line 53 of file colpartition.cpp.

◆ kMaxStaveHeight

const double tesseract::kMaxStaveHeight = 1.0

Definition at line 58 of file linefind.cpp.

◆ kMaxTableCellXheight

const double tesseract::kMaxTableCellXheight = 2.0

Definition at line 81 of file tablefind.cpp.

◆ kMaxTopSpacingFraction

const double tesseract::kMaxTopSpacingFraction = 0.25

Definition at line 56 of file colpartition.cpp.

◆ kMaxUnicharsPerCluster

const int tesseract::kMaxUnicharsPerCluster = 2000

Definition at line 48 of file mastertrainer.cpp.

◆ kMaxVerticalSearch

const int tesseract::kMaxVerticalSearch = 12

Definition at line 38 of file tabfind.cpp.

◆ kMaxVerticalSpacing

const int tesseract::kMaxVerticalSpacing = 500

Definition at line 38 of file tablefind.cpp.

◆ kMaxWinSize

const int tesseract::kMaxWinSize = 2000

Definition at line 52 of file network.cpp.

◆ kMaxWordGapRatio

const double tesseract::kMaxWordGapRatio = 2.0

Definition at line 62 of file pageres.cpp.

◆ kMaxWordSizeRatio

const double tesseract::kMaxWordSizeRatio = 1.25

Definition at line 58 of file pageres.cpp.

◆ kMaxXProjectionGapFactor

const double tesseract::kMaxXProjectionGapFactor = 2.0

Definition at line 136 of file tablefind.cpp.

◆ kMicroFeatureType

TESS_API const char *const tesseract::kMicroFeatureType = "mf"

Definition at line 33 of file featdefs.cpp.

◆ kMinAcceptableBlobHeight

const int tesseract::kMinAcceptableBlobHeight = 10

Definition at line 42 of file osdetect.cpp.

◆ kMinAlignedGutter

const double tesseract::kMinAlignedGutter = 0.25

Definition at line 51 of file tabvector.cpp.

◆ kMinAlignedTabs

const int tesseract::kMinAlignedTabs = 4

Definition at line 62 of file alignedblob.cpp.

◆ kMinBaselineCoverage

const double tesseract::kMinBaselineCoverage = 0.5

Definition at line 82 of file colpartition.cpp.

◆ kMinBoxesInTextPartition

const int tesseract::kMinBoxesInTextPartition = 10

Definition at line 63 of file tablefind.cpp.

◆ kMinCapHeightFraction

const double tesseract::kMinCapHeightFraction = 0.05

Definition at line 58 of file unicharset.cpp.

◆ kMinCaptionGapHeightRatio

const double tesseract::kMinCaptionGapHeightRatio = 0.5

Definition at line 43 of file colpartitiongrid.cpp.

◆ kMinCaptionGapRatio

const double tesseract::kMinCaptionGapRatio = 2.0

Definition at line 41 of file colpartitiongrid.cpp.

◆ kMinCertainty

const float tesseract::kMinCertainty = -20.0f

Definition at line 30 of file networkio.cpp.

◆ kMinChainTextValue

const int tesseract::kMinChainTextValue = 3

Definition at line 71 of file colpartition.cpp.

◆ kMinClusteredShapes

const int tesseract::kMinClusteredShapes = 1

Definition at line 46 of file mastertrainer.cpp.

◆ kMinColumnWidth

const int tesseract::kMinColumnWidth = 2.0 / 3

Definition at line 31 of file colpartitionset.cpp.

◆ kMinCredibleResolution

constexpr int tesseract::kMinCredibleResolution = 70
constexpr

Minimum believable resolution. Used as a default if there is no other information, as it is safer to under-estimate than over-estimate.

Definition at line 36 of file publictypes.h.

◆ kMinDiacriticSizeRatio

const double tesseract::kMinDiacriticSizeRatio = 1.0625

Definition at line 81 of file strokewidth.cpp.

◆ kMinDivergenceRate

const double tesseract::kMinDivergenceRate = 50.0

Definition at line 47 of file lstmtrainer.cpp.

◆ kMinEvaluatedTabs

const int tesseract::kMinEvaluatedTabs = 3

Definition at line 56 of file tabfind.cpp.

◆ kMinFilledArea

const double tesseract::kMinFilledArea = 0.35

Definition at line 60 of file tablerecog.cpp.

◆ kMinFractionalLinesInColumn

const double tesseract::kMinFractionalLinesInColumn = 0.125

Definition at line 45 of file tabfind.cpp.

◆ kMinGoodTextPARatio

const double tesseract::kMinGoodTextPARatio = 1.5

Definition at line 60 of file ccnontextdetect.cpp.

◆ kMinGutterFraction

const double tesseract::kMinGutterFraction = 0.5

Definition at line 47 of file tabvector.cpp.

◆ kMinGutterWidthGrid

const double tesseract::kMinGutterWidthGrid = 0.5

Definition at line 53 of file colfind.cpp.

◆ kMinImageFindSize

const int tesseract::kMinImageFindSize = 100

Definition at line 48 of file imagefind.cpp.

◆ kMinLeaderCount

const int tesseract::kMinLeaderCount = 5

Definition at line 67 of file colpartition.cpp.

◆ kMinLigature

const int tesseract::kMinLigature = 0xfb00

Definition at line 44 of file ligature_table.cpp.

◆ kMinLineLengthFraction

const int tesseract::kMinLineLengthFraction = 4

Denominator of resolution makes min pixels to demand line lengths to be.

Definition at line 41 of file linefind.cpp.

◆ kMinLinesInColumn

const int tesseract::kMinLinesInColumn = 10

Definition at line 41 of file tabfind.cpp.

◆ kMinMaxGapInTextPartition

const double tesseract::kMinMaxGapInTextPartition = 0.5

Definition at line 73 of file tablefind.cpp.

◆ kMinMediumSizeRatio

const double tesseract::kMinMediumSizeRatio = 0.25

Definition at line 50 of file blobbox.cpp.

◆ kMinModeFactor

const int tesseract::kMinModeFactor = 12

Definition at line 1506 of file oldbasel.cpp.

◆ kMinModeFactorOcropus

const int tesseract::kMinModeFactorOcropus = 32

Definition at line 1505 of file oldbasel.cpp.

◆ kMinMusicPixelFraction

const double tesseract::kMinMusicPixelFraction = 0.75

Definition at line 60 of file linefind.cpp.

◆ kMinOverlapWithTable

const double tesseract::kMinOverlapWithTable = 0.6

Definition at line 97 of file tablefind.cpp.

◆ kMinParagraphEndingTextToWhitespaceRatio

const double tesseract::kMinParagraphEndingTextToWhitespaceRatio = 3.0

Definition at line 132 of file tablefind.cpp.

◆ kMinPointsForErrorCount

const int tesseract::kMinPointsForErrorCount = 16

Definition at line 36 of file detlinefit.cpp.

◆ kMinProb

const float tesseract::kMinProb = std::exp(kMinCertainty)

Definition at line 32 of file networkio.cpp.

◆ kMinRaggedGutter

const double tesseract::kMinRaggedGutter = 1.5

Definition at line 53 of file tabvector.cpp.

◆ kMinRaggedTabs

const int tesseract::kMinRaggedTabs = 5

Definition at line 60 of file alignedblob.cpp.

◆ kMinRampSize

const int tesseract::kMinRampSize = 1000

Definition at line 60 of file degradeimage.cpp.

◆ kMinRectangularFraction

const double tesseract::kMinRectangularFraction = 0.125

Definition at line 41 of file imagefind.cpp.

◆ kMinRectSize

const int tesseract::kMinRectSize = 10

Minimum sensible image size to be worth running Tesseract.

Definition at line 107 of file baseapi.cpp.

◆ kMinRowsInTable

const int tesseract::kMinRowsInTable = 3

Definition at line 112 of file tablefind.cpp.

◆ kMinSize

const int tesseract::kMinSize = 8

Definition at line 408 of file makerow.cpp.

◆ kMinStallIterations

const int tesseract::kMinStallIterations = 10000

Definition at line 49 of file lstmtrainer.cpp.

◆ kMinStartedErrorRate

const int tesseract::kMinStartedErrorRate = 75

Definition at line 62 of file lstmtrainer.cpp.

◆ kMinStrongTextValue

const int tesseract::kMinStrongTextValue = 6

Definition at line 69 of file colpartition.cpp.

◆ kMinSubscriptOffset

const int tesseract::kMinSubscriptOffset = 20

Definition at line 39 of file ratngs.cpp.

◆ kMinSuperscriptOffset

const int tesseract::kMinSuperscriptOffset = 20

Definition at line 41 of file ratngs.cpp.

◆ kMinTabGradient

const double tesseract::kMinTabGradient = 4.0

Definition at line 68 of file alignedblob.cpp.

◆ kMinThickLineWidth

const int tesseract::kMinThickLineWidth = 12

Definition at line 47 of file linefind.cpp.

◆ kMinVariance

const long double tesseract::kMinVariance = 1.0L / 1024

Definition at line 29 of file quadlsq.cpp.

◆ kMinVerticalSearch

const int tesseract::kMinVerticalSearch = 3

Definition at line 37 of file tabfind.cpp.

◆ kMinWinSize

const int tesseract::kMinWinSize = 500

Definition at line 51 of file network.cpp.

◆ kMinXHeightFraction

const double tesseract::kMinXHeightFraction = 0.25

Definition at line 57 of file unicharset.cpp.

◆ kMinXHeightMatch

const double tesseract::kMinXHeightMatch = 0.5

Definition at line 48 of file ratngs.cpp.

◆ kMixedText

const char tesseract::kMixedText[] = "والفكر 123 والصراع abc"

Definition at line 35 of file stringrenderer_test.cc.

◆ kMostlyOneDirRatio

const int tesseract::kMostlyOneDirRatio = 3

Definition at line 97 of file strokewidth.cpp.

◆ kNeighbourSearchFactor

const double tesseract::kNeighbourSearchFactor = 2.5

Definition at line 107 of file strokewidth.cpp.

◆ kNewZealandIndex

const TextAndModel tesseract::kNewZealandIndex[]

Definition at line 691 of file paragraphs_test.cc.

◆ kNoiseOverlapAreaFactor

const double tesseract::kNoiseOverlapAreaFactor = 1.0 / 512

Definition at line 112 of file strokewidth.cpp.

◆ kNoiseOverlapGrowthFactor

const double tesseract::kNoiseOverlapGrowthFactor = 4.0

Definition at line 109 of file strokewidth.cpp.

◆ kNoisePadding

const int tesseract::kNoisePadding = 4

Definition at line 51 of file ccnontextdetect.cpp.

◆ kNoiseSize

const double tesseract::kNoiseSize = 0.5

Definition at line 407 of file makerow.cpp.

◆ kNonAmbiguousMargin

const float tesseract::kNonAmbiguousMargin = 1.0

Definition at line 49 of file osdetect.cpp.

◆ kNumAdjustmentIterations

const int tesseract::kNumAdjustmentIterations = 100

Definition at line 56 of file lstmtrainer.cpp.

◆ kNumbersPerBlob

const int tesseract::kNumbersPerBlob = 5

The 5 numbers output for each box (the usual 4 and a page number.)

Definition at line 1524 of file baseapi.cpp.

◆ kNumChars

const int tesseract::kNumChars = 100

Definition at line 28 of file recodebeam_test.cc.

◆ kNumEndPoints

const int tesseract::kNumEndPoints = 3

Definition at line 30 of file detlinefit.cpp.

◆ kNumPagesPerBatch

const int tesseract::kNumPagesPerBatch = 100

Definition at line 60 of file lstmtrainer.cpp.

◆ kOldManAndSea

const TextAndModel tesseract::kOldManAndSea[]

Definition at line 607 of file paragraphs_test.cc.

◆ kOriginalNoiseMultiple

const int tesseract::kOriginalNoiseMultiple = 8

Definition at line 47 of file ccnontextdetect.cpp.

◆ kPadding

const int tesseract::kPadding = 64

Definition at line 30 of file recodebeam_test.cc.

◆ kParagraphEndingPreviousLineRatio

const double tesseract::kParagraphEndingPreviousLineRatio = 1.3

Definition at line 122 of file tablefind.cpp.

◆ kPhotoOffsetFraction

const double tesseract::kPhotoOffsetFraction = 0.375

Definition at line 54 of file ccnontextdetect.cpp.

◆ kPointsPerInch

constexpr int tesseract::kPointsPerInch = 72
constexpr

Number of printers' points in an inch. The unit of the pointsize return.

Definition at line 31 of file publictypes.h.

◆ kPrime1

const int tesseract::kPrime1 = 17

Definition at line 41 of file trainingsampleset.cpp.

◆ kPrime2

const int tesseract::kPrime2 = 13

Definition at line 42 of file trainingsampleset.cpp.

◆ kRadicalRadix

const int tesseract::kRadicalRadix = 29

Definition at line 31 of file unicharcompress.cpp.

◆ kRaggedFraction

const double tesseract::kRaggedFraction = 2.5

Definition at line 48 of file alignedblob.cpp.

◆ kRaggedGapFraction

const double tesseract::kRaggedGapFraction = 1.0

Definition at line 52 of file alignedblob.cpp.

◆ kRaggedGutterMultiple

const int tesseract::kRaggedGutterMultiple = 5

Definition at line 51 of file tabfind.cpp.

◆ kRandomizingCenter

const int tesseract::kRandomizingCenter = 128

Definition at line 36 of file trainingsample.cpp.

◆ kRatingEpsilon

const double tesseract::kRatingEpsilon = 1.0 / 32

Definition at line 36 of file errorcounter.cpp.

◆ kRequiredColumns

const double tesseract::kRequiredColumns = 0.7

Definition at line 47 of file tablerecog.cpp.

◆ kResolutionEstimationFactor

constexpr int tesseract::kResolutionEstimationFactor = 10
constexpr

Ratio between median blob size and likely resolution. Used to estimate resolution when none is provided. This is basically 1/usual text size in inches.

Definition at line 43 of file publictypes.h.

◆ kReverseIfHasRTL

const char tesseract::kReverseIfHasRTL[] = "RRP_REVERSE_IF_HAS_RTL"

Definition at line 33 of file trie.cpp.

◆ kRGBRMSColors

const int tesseract::kRGBRMSColors = 4

Definition at line 36 of file colpartition.h.

◆ kRight

const ParagraphJustification tesseract::kRight = JUSTIFICATION_RIGHT

Definition at line 28 of file paragraphs_test.cc.

◆ kRightAligned

const TextAndModel tesseract::kRightAligned[]
Initial value:
= {
{"Right-aligned paragraphs are", PSTART, PModel(kRight, 0, 0, 0, 0), false, false},
{" uncommon in Left-to-Right", PCONT, PModel(), false, false},
{" languages, but they do", PCONT, PModel(), false, false},
{" exist.", PCONT, PModel(), false, false},
{" Mostly, however, they're", PSTART, PModel(kRight, 0, 0, 0, 0), false, false},
{" horribly tiny paragraphs in", PCONT, PModel(), false, false},
{" tables on which we have no", PCONT, PModel(), false, false},
{" chance anyways.", PCONT, PModel(), false, false},
}
const ParagraphJustification kRight

Definition at line 331 of file paragraphs_test.cc.

◆ kRotationRange

const float tesseract::kRotationRange = 0.02f

Definition at line 54 of file degradeimage.cpp.

◆ kSaltnPepper

const int tesseract::kSaltnPepper = 5

Definition at line 58 of file degradeimage.cpp.

◆ kScaleFactor

constexpr TFloat tesseract::kScaleFactor = 256.0
constexpr

Definition at line 37 of file functions.h.

◆ kScriptAcceptRatio

const float tesseract::kScriptAcceptRatio = 1.3

Definition at line 44 of file osdetect.cpp.

◆ kSeedBlobsCountTh

const int tesseract::kSeedBlobsCountTh = 10

Definition at line 82 of file equationdetect.cpp.

◆ kSideSpaceMargin

const int tesseract::kSideSpaceMargin = 10

Definition at line 102 of file tablefind.cpp.

◆ kSimilarRaggedDist

const int tesseract::kSimilarRaggedDist = 50

Definition at line 43 of file tabvector.cpp.

◆ kSimilarVectorDist

const int tesseract::kSimilarVectorDist = 10

Definition at line 40 of file tabvector.cpp.

◆ kSingleFullPageContinuation

const TextAndModel tesseract::kSingleFullPageContinuation[]
Initial value:
= {
{"sometimes a page is one giant", PSTART, PModel(kLeft, 0, 20, 0, 0), true, false},
{"continuation. It flows from", PCONT, PModel(), false, false},
{"line to line, using the full", PCONT, PModel(), false, false},
{"column width with no clear", PCONT, PModel(), false, false},
{"paragraph break, because it", PCONT, PModel(), false, false},
{"actually doesn't have one. It", PCONT, PModel(), false, false},
{"is the middle of one monster", PCONT, PModel(), false, false},
{"paragraph continued from the", PCONT, PModel(), false, false},
{"previous page and continuing", PCONT, PModel(), false, false},
{"onto the next page. There-", PCONT, PModel(), false, false},
{"fore, it ends up getting", PCONT, PModel(), false, false},
{"marked as a crown and then", PCONT, PModel(), false, false},
{"getting re-marked as any ex-", PCONT, PModel(), false, false},
{"isting model. Not great, but", PCONT, PModel(), false, false},
}

Definition at line 298 of file paragraphs_test.cc.

◆ ksizeofUniversalAmbigsFile

const int tesseract::ksizeofUniversalAmbigsFile = sizeof(kUniversalAmbigsFile)
inline

Definition at line 19040 of file universalambigs.h.

◆ kSizeRatioToReject

const float tesseract::kSizeRatioToReject = 2.0

Definition at line 41 of file osdetect.cpp.

◆ kSloppyTolerance

const int tesseract::kSloppyTolerance = 4

Definition at line 35 of file normalis.cpp.

◆ kSmallTableProjectionThreshold

const double tesseract::kSmallTableProjectionThreshold = 0.35

Definition at line 106 of file tablefind.cpp.

◆ kSmoothDecisionMargin

const int tesseract::kSmoothDecisionMargin = 4

Definition at line 65 of file colpartitiongrid.cpp.

◆ kSplitPartitionSize

const double tesseract::kSplitPartitionSize = 2.0

Definition at line 44 of file tablefind.cpp.

◆ kSquareLimit

const int tesseract::kSquareLimit = 25

Definition at line 39 of file trainingsampleset.cpp.

◆ kStageTransitionThreshold

const double tesseract::kStageTransitionThreshold = 10.0

Definition at line 64 of file lstmtrainer.cpp.

◆ kStandardFeatureLength

const double tesseract::kStandardFeatureLength = 64.0 / 5

Definition at line 44 of file intfx.h.

◆ kStateClip

const TFloat tesseract::kStateClip = 100.0

Definition at line 71 of file lstm.cpp.

◆ kStrings8087_054

const char* tesseract::kStrings8087_054[] = {"dat", "Dalmatian", "", "DAMAGED DURING", "margarine,", nullptr}

Definition at line 50 of file layout_test.cc.

◆ kStrokeWidthCJK

const double tesseract::kStrokeWidthCJK = 2.0

Definition at line 57 of file strokewidth.cpp.

◆ kStrokeWidthConstantTolerance

const double tesseract::kStrokeWidthConstantTolerance = 2.0

Definition at line 141 of file tablefind.cpp.

◆ kStrokeWidthFractionalTolerance

const double tesseract::kStrokeWidthFractionalTolerance = 0.25

Definition at line 140 of file tablefind.cpp.

◆ kStrokeWidthFractionCJK

const double tesseract::kStrokeWidthFractionCJK = 0.25

Definition at line 56 of file strokewidth.cpp.

◆ kStrokeWidthFractionTolerance

const double tesseract::kStrokeWidthFractionTolerance = 0.125

Allowed proportional change in stroke width to be the same font.

Definition at line 49 of file strokewidth.cpp.

◆ kStrokeWidthTolerance

const double tesseract::kStrokeWidthTolerance = 1.5

Allowed constant change in stroke width to be the same font. Really 1.5 pixels.

Definition at line 54 of file strokewidth.cpp.

◆ kSubtleCrown

const TextAndModel tesseract::kSubtleCrown[]
Initial value:
= {
{"The first paragraph on a page is", PSTART, PModel(kLeft, 0, 20, 0, 0), true, false},
{"often not indented as the rest ", PCONT, PModel(), false, false},
{"of the paragraphs are. Nonethe-", PCONT, PModel(), false, false},
{"less it should be counted as the", PCONT, PModel(), false, false},
{"same type of paragraph. ", PCONT, PModel(), false, false},
{" Even a short second paragraph ", PSTART, PModel(kLeft, 0, 20, 0, 0), false, false},
{"should suffice. ", PCONT, PModel(), false, false},
{" 1235 ", PNONE, PModel(), false, false},
}

Definition at line 455 of file paragraphs_test.cc.

◆ kSubTrainerMarginFraction

const double tesseract::kSubTrainerMarginFraction = 3.0 / 128

Definition at line 52 of file lstmtrainer.cpp.

◆ kSvPort

const int tesseract::kSvPort = 8461

Definition at line 45 of file scrollview.cpp.

◆ kTableColumnThreshold

const double tesseract::kTableColumnThreshold = 3.0

Definition at line 89 of file tablefind.cpp.

◆ kTableOfContents

const TextAndModel tesseract::kTableOfContents[]
Initial value:
= {
{"1 Hmong People ........... 1", PSTART, PModel(kUnknown, 0, 0, 0, 0), false, false},
{" Hmong Origins . . . . . 1", PSTART, PModel(kUnknown, 0, 0, 0, 0), false, false},
{" Language . . . . . . . 1", PSTART, PModel(kUnknown, 0, 0, 0, 0), false, false},
{" Proverbs . . . . . . 2", PSTART, PModel(kUnknown, 0, 0, 0, 0), false, false},
{" Discussion . . . . 2", PSTART, PModel(kUnknown, 0, 0, 0, 0), false, false},
{" Riddles . . . . . . . 2", PSTART, PModel(kUnknown, 0, 0, 0, 0), false, false},
{" Discussion . . . . 3", PSTART, PModel(kUnknown, 0, 0, 0, 0), false, false},
{" Appearance . . . . . 3", PSTART, PModel(kUnknown, 0, 0, 0, 0), false, false},
{" Hmong History . . . . . 4", PSTART, PModel(kUnknown, 0, 0, 0, 0), false, false},
{" Hmong in SE Asia . . . 4", PSTART, PModel(kUnknown, 0, 0, 0, 0), false, false},
{" Hmong in the West . . .5", PSTART, PModel(kUnknown, 0, 0, 0, 0), false, false},
{" Hmong in the USA . . . 5", PSTART, PModel(kUnknown, 0, 0, 0, 0), false, false},
{" Discussion . . . . 6", PSTART, PModel(kUnknown, 0, 0, 0, 0), false, false},
}

Definition at line 552 of file paragraphs_test.cc.

◆ kTableSize

constexpr int tesseract::kTableSize = 4096
constexpr

Definition at line 35 of file functions.h.

◆ kTabRadiusFactor

const int tesseract::kTabRadiusFactor = 5

Definition at line 35 of file tabfind.cpp.

◆ kTargetXScale

const int tesseract::kTargetXScale = 5

Definition at line 73 of file lstmtrainer.cpp.

◆ kTargetYScale

const int tesseract::kTargetYScale = 100

Definition at line 74 of file lstmtrainer.cpp.

◆ kTesseractReject

const char tesseract::kTesseractReject = '~'

Character returned when Tesseract couldn't recognize as anything.

Definition at line 109 of file baseapi.cpp.

◆ kTestChar

const int tesseract::kTestChar = -1

Definition at line 37 of file trainingsampleset.cpp.

◆ kTestData

const int tesseract::kTestData[] = {2, 0, 12, 1, 1, 2, 10, 1, 0, 0, 0, 2, 0, 4, 1, 1}

Definition at line 19 of file stats_test.cc.

◆ kTextWithSourceCode

const TextAndModel tesseract::kTextWithSourceCode[]
Initial value:
= {
{" A typical page of a programming book may contain", PSTART, PModel(kLeft, 0, 20, 0, 0),
false, false},
{"examples of source code to exemplify an algorithm ", PCONT, PModel(), false, false},
{"being described in prose. Such examples should be", PCONT, PModel(), false, false},
{"rendered as lineated text, meaning text with ", PCONT, PModel(), false, false},
{"explicit line breaks but without extra inter-line ", PCONT, PModel(), false, false},
{"spacing. Accidentally finding stray paragraphs in", PCONT, PModel(), false, false},
{"source code would lead to a bad reading experience", PCONT, PModel(), false, false},
{"when the text is re-flowed. ", PCONT, PModel(), false, false},
{" Let's show this by describing the function fact-", PSTART, PModel(kLeft, 0, 20, 0, 0),
false, false},
{"orial. Factorial is a simple recursive function ", PCONT, PModel(), false, false},
{"which grows very quickly. So quickly, in fact, ", PCONT, PModel(), false, false},
{"that the typical C implementation will only work ", PCONT, PModel(), false, false},
{"for values less than about 12: ", PCONT, PModel(), false, false},
{" ", PNONE, PModel(), false, false},
{" # Naive implementation in C ", PCONT, PModel(), false, false},
{" int factorial(int n) { ", PCONT, PModel(), false, false},
{" if (n < 2) ", PCONT, PModel(), false, false},
{" return 1; ", PCONT, PModel(), false, false},
{" return n * factorial(n - 1); ", PCONT, PModel(), false, false},
{" } ", PCONT, PModel(), false, false},
{" ", PCONT, PModel(), false, false},
{" The C programming language does not have built- ", PSTART, PModel(kLeft, 0, 20, 0, 0),
false, false},
{"in support for detecting integer overflow, so this", PCONT, PModel(), false, false},
{"naive implementation simply returns random values ", PCONT, PModel(), false, false},
{"if even a moderate sized n is provided. ", PCONT, PModel(), false, false},
}

Definition at line 572 of file paragraphs_test.cc.

◆ kThickLengthMultiple

const double tesseract::kThickLengthMultiple = 0.75

Definition at line 54 of file linefind.cpp.

◆ kThinLineFraction

const int tesseract::kThinLineFraction = 20

Denominator of resolution makes max pixel width to allow thin lines.

Definition at line 39 of file linefind.cpp.

◆ kTinyEnoughTextlineOverlapFraction

const double tesseract::kTinyEnoughTextlineOverlapFraction = 0.25

Definition at line 49 of file colpartitiongrid.cpp.

◆ kTinyParagraphs

const TextAndModel tesseract::kTinyParagraphs[]
Initial value:
= {
{" Occasionally, interspersed with", PSTART, PModel(kLeft, 0, 20, 0, 0), false, false},
{"obvious paragraph text, you might", PCONT, PModel(), false, false},
{"find short exchanges of dialogue ", PCONT, PModel(), false, false},
{"between characters. ", PCONT, PModel(), false, false},
{" 'Oh?' ", PSTART, PModel(kLeft, 0, 20, 0, 0), false, false},
{" 'Don't be confused!' ", PSTART, PModel(kLeft, 0, 20, 0, 0), false, false},
{" 'Not me!' ", PSTART, PModel(kLeft, 0, 20, 0, 0), false, false},
{" One naive approach would be to ", PSTART, PModel(kLeft, 0, 20, 0, 0), false, false},
{"mark a new paragraph whenever one", PCONT, PModel(), false, false},
{"of the statistics (left, right or", PCONT, PModel(), false, false},
{"center) changes from one text-", PCONT, PModel(), false, false},
{"line to the next. Such an", PCONT, PModel(), false, false},
{"approach would misclassify the", PCONT, PModel(), false, false},
{"tiny paragraphs above as a single", PCONT, PModel(), false, false},
{"paragraph. ", PCONT, PModel(), false, false},
}

Definition at line 346 of file paragraphs_test.cc.

◆ kTrainerIterations

const int tesseract::kTrainerIterations = 600

Definition at line 34 of file lstm_test.h.

◆ kTruthTextLine

const char* tesseract::kTruthTextLine = "Tosimpleburnrunningofgoodslately.\n"

Definition at line 24 of file applybox_test.cc.

◆ kTruthTextWords

const char* tesseract::kTruthTextWords = "To simple burn running of goods lately.\n"

Definition at line 23 of file applybox_test.cc.

◆ kTwoSimpleParagraphs

const TextAndModel tesseract::kTwoSimpleParagraphs[]
Initial value:
= {
{" Look here, I have a paragraph.", PSTART, PModel(kLeft, 0, 20, 0, 0), false, false},
{"This paragraph starts at the top", PCONT, PModel(), false, false},
{"of the page and takes 3 lines. ", PCONT, PModel(), false, false},
{" Here I have a second paragraph", PSTART, PModel(kLeft, 0, 20, 0, 0), false, false},
{"which indicates that the first ", PCONT, PModel(), false, false},
{"paragraph is not a continuation ", PCONT, PModel(), false, false},
{"from a previous page, as it is ", PCONT, PModel(), false, false},
{"indented just like this second ", PCONT, PModel(), false, false},
{"paragraph. ", PCONT, PModel(), false, false},
}

Definition at line 231 of file paragraphs_test.cc.

◆ kUnclearDensityTh

const float tesseract::kUnclearDensityTh = 0.25

Definition at line 81 of file equationdetect.cpp.

◆ kUniChs

const int tesseract::kUniChs[] = {0x20ac, 0x201c, 0x201d, 0x2018, 0x2019, 0x2022, 0x2014, 0}

Conversion table for non-latin characters. Maps characters out of the latin set into the latin set. TODO(rays) incorporate this translation into unicharset.

Definition at line 1592 of file baseapi.cpp.

◆ kUniversalAmbigsFile

const char tesseract::kUniversalAmbigsFile[]
inline

Definition at line 27 of file universalambigs.h.

◆ kUnknown

const ParagraphJustification tesseract::kUnknown = JUSTIFICATION_UNKNOWN

Definition at line 29 of file paragraphs_test.cc.

◆ kUNLVReject

const char tesseract::kUNLVReject = '~'

Character used by UNLV error counter as a reject.

Definition at line 111 of file baseapi.cpp.

◆ kUnlvRep3AO

const TextAndModel tesseract::kUnlvRep3AO[]

Definition at line 474 of file paragraphs_test.cc.

◆ kUNLVSuspect

const char tesseract::kUNLVSuspect = '^'

Character used by UNLV as a suspect marker.

Definition at line 113 of file baseapi.cpp.

◆ kVerticalSpacing

const double tesseract::kVerticalSpacing = -0.2

Definition at line 37 of file tablerecog.cpp.

◆ kVi2nds

const char* tesseract::kVi2nds[] = {"V", "a", "v", "", "l", "o", "", nullptr}

Definition at line 55 of file recodebeam_test.cc.

◆ kVi2ndScores

const float tesseract::kVi2ndScores[] = {0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01}

Definition at line 56 of file recodebeam_test.cc.

◆ kViTops

const char* tesseract::kViTops[] = {"v", "ậ", "y", " ", "t", "ộ", "i", nullptr}

Definition at line 53 of file recodebeam_test.cc.

◆ kViTopScores

const float tesseract::kViTopScores[] = {0.98, 0.98, 0.98, 0.98, 0.98, 0.98, 0.97}

Definition at line 54 of file recodebeam_test.cc.

◆ kVLineAlignment

const int tesseract::kVLineAlignment = 3

Definition at line 54 of file alignedblob.cpp.

◆ kVLineGutter

const int tesseract::kVLineGutter = 1

Definition at line 56 of file alignedblob.cpp.

◆ kVLineMinLength

const int tesseract::kVLineMinLength = 300

Definition at line 64 of file alignedblob.cpp.

◆ kVLineSearchSize

const int tesseract::kVLineSearchSize = 150

Definition at line 58 of file alignedblob.cpp.

◆ kWidthErrorWeighting

const double tesseract::kWidthErrorWeighting = 0.125

Weight of width variance against height and vertical position.

Definition at line 76 of file normmatch.cpp.

◆ kWordrecMaxNumJoinChunks

const int tesseract::kWordrecMaxNumJoinChunks = 4

Definition at line 55 of file pageres.cpp.

◆ kWorstDictCertainty

const float tesseract::kWorstDictCertainty = -25.0f

Definition at line 35 of file linerec.cpp.

◆ kXWinFrameSize

const int tesseract::kXWinFrameSize = 30

Definition at line 54 of file network.cpp.

◆ kYWinFrameSize

const int tesseract::kYWinFrameSize = 80

Definition at line 55 of file network.cpp.

◆ kZH2nds

const char* tesseract::kZH2nds[] = {"学", "储", "投", "生", "学", "生", "实", nullptr}

Definition at line 50 of file recodebeam_test.cc.

◆ kZH2ndScores

const float tesseract::kZH2ndScores[] = {0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01}

Definition at line 51 of file recodebeam_test.cc.

◆ kZHTops

const char* tesseract::kZHTops[] = {"实", "学", "储", "啬", "投", "学", "生", nullptr}

Definition at line 48 of file recodebeam_test.cc.

◆ kZHTopScores

const float tesseract::kZHTopScores[] = {0.98, 0.98, 0.98, 0.98, 0.98, 0.98, 0.98}

Definition at line 49 of file recodebeam_test.cc.

◆ log_level

int tesseract::log_level = INT_MAX

"Logging level"

Definition at line 36 of file tprintf.cpp.

◆ LogisticTable

const TFloat tesseract::LogisticTable

Definition at line 4102 of file functions.cpp.

◆ MF_SCALE_FACTOR

const float tesseract::MF_SCALE_FACTOR = 0.5f / kBlnXHeight

Definition at line 61 of file mfoutline.h.

◆ MicroFeatureDesc

const FEATURE_DESC_STRUCT tesseract::MicroFeatureDesc
extern

Global Data Definitions and Declarations

◆ of

EndParamDesc tesseract::of

Definition at line 78 of file featdefs.cpp.

◆ OutlineFeatDesc

const FEATURE_DESC_STRUCT tesseract::OutlineFeatDesc
extern

◆ par1

const int tesseract::par1 = 4500 / (approx_dist * approx_dist)

Definition at line 43 of file polyaprx.cpp.

◆ par2

const int tesseract::par2 = 6750 / (approx_dist * approx_dist)

Definition at line 44 of file polyaprx.cpp.

◆ PicoFeatDesc

TESS_API const FEATURE_DESC_STRUCT tesseract::PicoFeatDesc
extern

◆ PicoFeatureLength

TESS_API float tesseract::PicoFeatureLength
extern

Global Data Definitions and Declarations

◆ pitsync_joined_edge

double tesseract::pitsync_joined_edge = 0.75

"Dist inside big blob for chopping"

Definition at line 27 of file pitsync1.cpp.

◆ pitsync_linear_version

int tesseract::pitsync_linear_version = 6

"Use new fast algorithm"

Definition at line 26 of file pitsync1.cpp.

◆ pitsync_offset_freecut_fraction

double tesseract::pitsync_offset_freecut_fraction = 0.25

"Fraction of cut for free cuts"

Definition at line 28 of file pitsync1.cpp.

◆ RTLReversePolicyNames

const char* const tesseract::RTLReversePolicyNames[] = {kDoNotReverse, kReverseIfHasRTL, kForceReverse}

Definition at line 36 of file trie.cpp.

◆ TanhTable

const TFloat tesseract::TanhTable

Definition at line 4 of file functions.cpp.

◆ test_data

int tesseract::test_data = {8, 1, 2, -4, 7, 9, 65536, 4, 9, 0}

Definition at line 23 of file heap_test.cc.

◆ textord_ascheight_mode_fraction

double tesseract::textord_ascheight_mode_fraction = 0.08

"Min pile height to make ascheight"

Definition at line 87 of file makerow.cpp.

◆ textord_ascx_ratio_max

double tesseract::textord_ascx_ratio_max = 1.8

"Max cap/xheight"

Definition at line 90 of file makerow.cpp.

◆ textord_ascx_ratio_min

double tesseract::textord_ascx_ratio_min = 1.25

"Min cap/xheight"

Definition at line 89 of file makerow.cpp.

◆ textord_balance_factor

double tesseract::textord_balance_factor = 1.0

"Ding rate for unbalanced char cells"

Definition at line 50 of file topitch.cpp.

◆ textord_blockndoc_fixed

bool tesseract::textord_blockndoc_fixed = false

"Attempt whole doc/block fixed pitch"

Definition at line 48 of file topitch.cpp.

◆ textord_blocksall_fixed

bool tesseract::textord_blocksall_fixed = false

"Moan about prop blocks"

Definition at line 26 of file tovars.cpp.

◆ textord_blocksall_prop

bool tesseract::textord_blocksall_prop = false

"Moan about fixed pitch blocks"

Definition at line 27 of file tovars.cpp.

◆ textord_chop_width

double tesseract::textord_chop_width = 1.5

"Max width before chopping"

Definition at line 76 of file makerow.cpp.

◆ textord_chopper_test

bool tesseract::textord_chopper_test = false

"Chopper is being tested."

Definition at line 42 of file wordseg.cpp.

◆ textord_debug_blob

bool tesseract::textord_debug_blob = false

"Print test blob information"

Definition at line 96 of file makerow.cpp.

◆ textord_debug_block

int tesseract::textord_debug_block = 0

"Block to do debug on"

Definition at line 29 of file tovars.cpp.

◆ textord_debug_bugs

int tesseract::textord_debug_bugs = 0

"Turn on output related to bugs in tab finding"

Definition at line 30 of file alignedblob.cpp.

◆ textord_debug_pitch_metric

bool tesseract::textord_debug_pitch_metric = false

"Write full metric stuff"

Definition at line 45 of file topitch.cpp.

◆ textord_debug_pitch_test

bool tesseract::textord_debug_pitch_test = false

"Debug on fixed pitch test"

Definition at line 42 of file topitch.cpp.

◆ textord_debug_printable

bool tesseract::textord_debug_printable = false

"Make debug windows printable"

Definition at line 43 of file alignedblob.cpp.

◆ textord_debug_tabfind

int tesseract::textord_debug_tabfind = 0

"Debug tab finding"

Definition at line 29 of file alignedblob.cpp.

◆ textord_debug_xheights

bool tesseract::textord_debug_xheights = false

"Test xheight algorithms"

Definition at line 59 of file makerow.cpp.

◆ textord_descx_ratio_max

double tesseract::textord_descx_ratio_max = 0.6

"Max desc/xheight"

Definition at line 92 of file makerow.cpp.

◆ textord_descx_ratio_min

double tesseract::textord_descx_ratio_min = 0.25

"Min desc/xheight"

Definition at line 91 of file makerow.cpp.

◆ textord_dotmatrix_gap

int tesseract::textord_dotmatrix_gap = 3

"Max pixel gap for broken pixed pitch"

Definition at line 28 of file tovars.cpp.

◆ textord_excess_blobsize

double tesseract::textord_excess_blobsize = 1.3

"New row made if blob makes row this big"

Definition at line 81 of file makerow.cpp.

◆ textord_fast_pitch_test

bool tesseract::textord_fast_pitch_test = false

"Do even faster pitch algorithm"

Definition at line 44 of file topitch.cpp.

◆ textord_fix_makerow_bug

bool tesseract::textord_fix_makerow_bug = true

"Prevent multiple baselines"

Definition at line 58 of file makerow.cpp.

◆ textord_fix_xheight_bug

bool tesseract::textord_fix_xheight_bug = true

"Use spline baseline"

Definition at line 57 of file makerow.cpp.

◆ textord_force_make_prop_words

bool tesseract::textord_force_make_prop_words = false

"Force proportional word segmentation on all rows"

Definition at line 41 of file wordseg.cpp.

◆ textord_fp_chop_error

int tesseract::textord_fp_chop_error = 2

"Max allowed bending of chop cells"

Definition at line 34 of file fpchop.cpp.

◆ textord_fpiqr_ratio

double tesseract::textord_fpiqr_ratio = 1.5

"Pitch IQR/Gap IQR threshold"

Definition at line 53 of file tovars.cpp.

◆ textord_heavy_nr

bool tesseract::textord_heavy_nr = false

"Vigorously remove noise"

Definition at line 46 of file makerow.cpp.

◆ textord_linespace_iqrlimit

double tesseract::textord_linespace_iqrlimit = 0.2

"Max iqr/median for linespace"

Definition at line 74 of file makerow.cpp.

◆ textord_lms_line_trials

int tesseract::textord_lms_line_trials = 12

"Number of linew fits to do"

Definition at line 94 of file makerow.cpp.

◆ textord_max_pitch_iqr

double tesseract::textord_max_pitch_iqr = 0.20

"Xh fraction noise in pitch"

Definition at line 54 of file tovars.cpp.

◆ textord_min_blob_height_fraction

double tesseract::textord_min_blob_height_fraction = 0.75

"Min blob height/top to include blob top into xheight stats"

Definition at line 85 of file makerow.cpp.

◆ textord_min_blobs_in_row

int tesseract::textord_min_blobs_in_row = 4

"Min blobs before gradient counted"

Definition at line 66 of file makerow.cpp.

◆ textord_min_linesize

double tesseract::textord_min_linesize = 1.25

"* blob height for initial linesize"

Definition at line 80 of file makerow.cpp.

◆ textord_min_xheight

int tesseract::textord_min_xheight = 10

"Min credible pixel xheight"

Definition at line 70 of file makerow.cpp.

◆ textord_minxh

double tesseract::textord_minxh = 0.25

"fraction of linesize for min xheight"

Definition at line 79 of file makerow.cpp.

◆ textord_new_initial_xheight

bool tesseract::textord_new_initial_xheight = true

"Use test xheight mechanism"

Definition at line 95 of file makerow.cpp.

◆ textord_occupancy_threshold

double tesseract::textord_occupancy_threshold = 0.4

"Fraction of neighbourhood"

Definition at line 82 of file makerow.cpp.

◆ textord_old_baselines

bool tesseract::textord_old_baselines = true

"Use old baseline algorithm"

Definition at line 55 of file makerow.cpp.

◆ textord_old_xheight

bool tesseract::textord_old_xheight = false

"Use old xheight algorithm"

Definition at line 56 of file makerow.cpp.

◆ textord_oldbl_debug

bool tesseract::textord_oldbl_debug = false

"Debug old baseline generation"

Definition at line 43 of file oldbasel.cpp.

◆ textord_parallel_baselines

bool tesseract::textord_parallel_baselines = true

"Force parallel baselines"

Definition at line 53 of file makerow.cpp.

◆ textord_pitch_range

int tesseract::textord_pitch_range = 2

"Max range test on pitch"

Definition at line 30 of file tovars.cpp.

◆ textord_pitch_rowsimilarity

double tesseract::textord_pitch_rowsimilarity = 0.08

"Fraction of xheight for sameness"

Definition at line 44 of file tovars.cpp.

◆ textord_pitch_scalebigwords

bool tesseract::textord_pitch_scalebigwords = false

"Scale scores on big words"

Definition at line 45 of file tovars.cpp.

◆ textord_projection_scale

double tesseract::textord_projection_scale = 0.200

"Ding rate for mid-cuts"

Definition at line 49 of file topitch.cpp.

◆ textord_restore_underlines

bool tesseract::textord_restore_underlines = true

"Chop underlines & put back"

Definition at line 24 of file underlin.cpp.

◆ textord_show_expanded_rows

bool tesseract::textord_show_expanded_rows = false

"Display rows after expanding"

Definition at line 49 of file makerow.cpp.

◆ textord_show_final_blobs

bool tesseract::textord_show_final_blobs = false

"Display blob bounds after pre-ass"

Definition at line 51 of file makerow.cpp.

◆ textord_show_final_rows

bool tesseract::textord_show_final_rows = false

"Display rows after final fitting"

Definition at line 50 of file makerow.cpp.

◆ textord_show_fixed_cuts

bool tesseract::textord_show_fixed_cuts = false

"Draw fixed pitch cell boundaries"

Definition at line 35 of file drawtord.cpp.

◆ textord_show_initial_rows

bool tesseract::textord_show_initial_rows = false

"Display row accumulation"

Definition at line 47 of file makerow.cpp.

◆ textord_show_initial_words

bool tesseract::textord_show_initial_words = false

"Display separate words"

Definition at line 25 of file tovars.cpp.

◆ textord_show_page_cuts

bool tesseract::textord_show_page_cuts = false

"Draw page-level cuts"

Definition at line 47 of file topitch.cpp.

◆ textord_show_parallel_rows

bool tesseract::textord_show_parallel_rows = false

"Display page correlated rows"

Definition at line 48 of file makerow.cpp.

◆ textord_show_row_cuts

bool tesseract::textord_show_row_cuts = false

"Draw row-level cuts"

Definition at line 46 of file topitch.cpp.

◆ textord_skew_ile

double tesseract::textord_skew_ile = 0.5

"Ile of gradients for page skew"

Definition at line 72 of file makerow.cpp.

◆ textord_skew_lag

double tesseract::textord_skew_lag = 0.02

"Lag for skew on row accumulation"

Definition at line 73 of file makerow.cpp.

◆ textord_spacesize_ratioprop

double tesseract::textord_spacesize_ratioprop = 2.0

"Min ratio space/nonspace"

Definition at line 52 of file tovars.cpp.

◆ textord_spline_medianwin

int tesseract::textord_spline_medianwin = 6

"Size of window for spline segmentation"

Definition at line 68 of file makerow.cpp.

◆ textord_spline_minblobs

int tesseract::textord_spline_minblobs = 8

"Min blobs in each spline segment"

Definition at line 67 of file makerow.cpp.

◆ textord_spline_shift_fraction

double tesseract::textord_spline_shift_fraction = 0.02

"Fraction of line spacing for quad"

Definition at line 71 of file makerow.cpp.

◆ textord_straight_baselines

bool tesseract::textord_straight_baselines = false

"Force straight baselines"

Definition at line 54 of file makerow.cpp.

◆ textord_tabvector_vertical_box_ratio

double tesseract::textord_tabvector_vertical_box_ratio = 0.5

"Fraction of box matches required to declare a line vertical"

Definition at line 60 of file tabvector.cpp.

◆ textord_tabvector_vertical_gap_fraction

double tesseract::textord_tabvector_vertical_gap_fraction = 0.5

"max fraction of mean blob width allowed for vertical gaps in " "vertical text"

Definition at line 57 of file tabvector.cpp.

◆ textord_test_landscape

bool tesseract::textord_test_landscape = false

"Tests refer to land/port"

Definition at line 52 of file makerow.cpp.

◆ textord_test_x

int tesseract::textord_test_x = -INT32_MAX

"coord of test pt"

Definition at line 64 of file makerow.cpp.

◆ textord_test_y

int tesseract::textord_test_y = -INT32_MAX

"coord of test pt"

Definition at line 65 of file makerow.cpp.

◆ textord_underline_offset

double tesseract::textord_underline_offset = 0.1

"Fraction of x to ignore"

Definition at line 23 of file underlin.cpp.

◆ textord_underline_threshold

double tesseract::textord_underline_threshold = 0.5

"Fraction of width occupied"

Definition at line 32 of file blkocc.cpp.

◆ textord_underline_width

double tesseract::textord_underline_width = 2.0

"Multiple of line_size for underline"

Definition at line 83 of file makerow.cpp.

◆ textord_width_limit

double tesseract::textord_width_limit = 8

"Max width of blobs to make rows"

Definition at line 75 of file makerow.cpp.

◆ textord_words_def_fixed

double tesseract::textord_words_def_fixed = 0.016

"Threshold for definite fixed"

Definition at line 41 of file tovars.cpp.

◆ textord_words_def_prop

double tesseract::textord_words_def_prop = 0.090

"Threshold for definite prop"

Definition at line 42 of file tovars.cpp.

◆ textord_words_default_maxspace

double tesseract::textord_words_default_maxspace = 3.5

"Max believable third space"

Definition at line 33 of file tovars.cpp.

◆ textord_words_default_minspace

double tesseract::textord_words_default_minspace = 0.6

"Fraction of xheight"

Definition at line 34 of file tovars.cpp.

◆ textord_words_default_nonspace

double tesseract::textord_words_default_nonspace = 0.2

"Fraction of xheight"

Definition at line 36 of file tovars.cpp.

◆ textord_words_definite_spread

double tesseract::textord_words_definite_spread = 0.30

"Non-fuzzy spacing region"

Definition at line 51 of file tovars.cpp.

◆ textord_words_initial_lower

double tesseract::textord_words_initial_lower = 0.25

"Max initial cluster size"

Definition at line 37 of file tovars.cpp.

◆ textord_words_initial_upper

double tesseract::textord_words_initial_upper = 0.15

"Min initial cluster spacing"

Definition at line 38 of file tovars.cpp.

◆ textord_words_maxspace

double tesseract::textord_words_maxspace = 4.0

"Multiple of xheight"

Definition at line 32 of file tovars.cpp.

◆ textord_words_min_minspace

double tesseract::textord_words_min_minspace = 0.3

"Fraction of xheight"

Definition at line 35 of file tovars.cpp.

◆ textord_words_minlarge

double tesseract::textord_words_minlarge = 0.75

"Fraction of valid gaps needed"

Definition at line 39 of file tovars.cpp.

◆ textord_words_pitchsd_threshold

double tesseract::textord_words_pitchsd_threshold = 0.040

"Pitch sync threshold"

Definition at line 40 of file tovars.cpp.

◆ textord_words_veto_power

int tesseract::textord_words_veto_power = 5

"Rows required to outvote a veto"

Definition at line 43 of file tovars.cpp.

◆ textord_wordstats_smooth_factor

double tesseract::textord_wordstats_smooth_factor = 0.05

"Smoothing gap stats"

Definition at line 31 of file tovars.cpp.

◆ textord_xheight_error_margin

double tesseract::textord_xheight_error_margin = 0.1

"Accepted variation"

Definition at line 93 of file makerow.cpp.

◆ textord_xheight_mode_fraction

double tesseract::textord_xheight_mode_fraction = 0.4

"Min pile height to make xheight"

Definition at line 86 of file makerow.cpp.

◆ to_debug

FILE* tesseract::to_debug
extern

◆ to_win

ScrollView * tesseract::to_win = nullptr

Definition at line 37 of file drawtord.cpp.

◆ wordrec_blob_pause

bool tesseract::wordrec_blob_pause = 0

"Blob pause"

Definition at line 43 of file render.cpp.

◆ wordrec_display_all_blobs

bool tesseract::wordrec_display_all_blobs = 0

"Display Blobs"

Definition at line 41 of file render.cpp.

◆ wordrec_display_splits

bool tesseract::wordrec_display_splits = 0

"Display splits"

Definition at line 41 of file split.cpp.

◆ words_default_fixed_limit

double tesseract::words_default_fixed_limit = 0.6

"Allowed size variance"

Definition at line 50 of file tovars.cpp.

◆ words_default_fixed_space

double tesseract::words_default_fixed_space = 0.75

"Fraction of xheight"

Definition at line 49 of file tovars.cpp.

◆ words_default_prop_nonspace

double tesseract::words_default_prop_nonspace = 0.25

"Fraction of xheight"

Definition at line 48 of file tovars.cpp.

◆ words_initial_lower

double tesseract::words_initial_lower = 0.5

"Max initial cluster size"

Definition at line 46 of file tovars.cpp.

◆ words_initial_upper

double tesseract::words_initial_upper = 0.15

"Min initial cluster spacing"

Definition at line 47 of file tovars.cpp.