tesseract v5.3.3.20231005
tesseract::Wordrec Class Reference

#include <wordrec.h>

Inheritance diagram for tesseract::Wordrec:
tesseract::Classify tesseract::CCStruct tesseract::CCUtil tesseract::Tesseract

Public Member Functions

 BOOL_VAR_H (merge_fragments_in_matrix)
 
 BOOL_VAR_H (wordrec_enable_assoc)
 
 BOOL_VAR_H (force_word_assoc)
 
 INT_VAR_H (repair_unchopped_blobs)
 
 double_VAR_H (tessedit_certainty_threshold)
 
 INT_VAR_H (chop_debug)
 
 BOOL_VAR_H (chop_enable)
 
 BOOL_VAR_H (chop_vertical_creep)
 
 INT_VAR_H (chop_split_length)
 
 INT_VAR_H (chop_same_distance)
 
 INT_VAR_H (chop_min_outline_points)
 
 INT_VAR_H (chop_seam_pile_size)
 
 BOOL_VAR_H (chop_new_seam_pile)
 
 INT_VAR_H (chop_inside_angle)
 
 INT_VAR_H (chop_min_outline_area)
 
 double_VAR_H (chop_split_dist_knob)
 
 double_VAR_H (chop_overlap_knob)
 
 double_VAR_H (chop_center_knob)
 
 INT_VAR_H (chop_centered_maxwidth)
 
 double_VAR_H (chop_sharpness_knob)
 
 double_VAR_H (chop_width_change_knob)
 
 double_VAR_H (chop_ok_split)
 
 double_VAR_H (chop_good_split)
 
 INT_VAR_H (chop_x_y_weight)
 
 BOOL_VAR_H (assume_fixed_pitch_char_segment)
 
 INT_VAR_H (wordrec_debug_level)
 
 INT_VAR_H (wordrec_max_join_chunks)
 
 BOOL_VAR_H (wordrec_skip_no_truth_words)
 
 BOOL_VAR_H (wordrec_debug_blamer)
 
 BOOL_VAR_H (wordrec_run_blamer)
 
 INT_VAR_H (segsearch_debug_level)
 
 INT_VAR_H (segsearch_max_pain_points)
 
 INT_VAR_H (segsearch_max_futile_classifications)
 
 double_VAR_H (segsearch_max_char_wh_ratio)
 
 BOOL_VAR_H (save_alt_choices)
 
 Wordrec ()
 
 ~Wordrec () override=default
 
void SaveAltChoices (const LIST &best_choices, WERD_RES *word)
 
void FillLattice (const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle)
 
void CallFillLattice (const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle)
 
void SegSearch (WERD_RES *word_res, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle)
 
void InitialSegSearch (WERD_RES *word_res, LMPainPoints *pain_points, std::vector< SegSearchPending > *pending, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle)
 
void add_seam_to_queue (float new_priority, SEAM *new_seam, SeamQueue *seams)
 
void choose_best_seam (SeamQueue *seam_queue, const SPLIT *split, PRIORITY priority, SEAM **seam_result, TBLOB *blob, SeamPile *seam_pile)
 
void combine_seam (const SeamPile &seam_pile, const SEAM *seam, SeamQueue *seam_queue)
 
SEAMpick_good_seam (TBLOB *blob)
 
void try_point_pairs (EDGEPT *points[MAX_NUM_POINTS], int16_t num_points, SeamQueue *seam_queue, SeamPile *seam_pile, SEAM **seam, TBLOB *blob)
 
void try_vertical_splits (EDGEPT *points[MAX_NUM_POINTS], int16_t num_points, EDGEPT_CLIST *new_points, SeamQueue *seam_queue, SeamPile *seam_pile, SEAM **seam, TBLOB *blob)
 
PRIORITY grade_split_length (SPLIT *split)
 
PRIORITY grade_sharpness (SPLIT *split)
 
bool near_point (EDGEPT *point, EDGEPT *line_pt_0, EDGEPT *line_pt_1, EDGEPT **near_pt)
 
virtual BLOB_CHOICE_LIST * classify_piece (const std::vector< SEAM * > &seams, int16_t start, int16_t end, const char *description, TWERD *word, BlamerBundle *blamer_bundle)
 
program_editup

Initialize all the things in the program that need to be initialized. init_permute determines whether to initialize the permute functions and Dawg models.

void program_editup (const std::string &textbase, TessdataManager *init_classifier, TessdataManager *init_dict)
 
cc_recog

Recognize a word.

void cc_recog (WERD_RES *word)
 
program_editdown

This function holds any necessary post processing for the Wise Owl program.

void program_editdown (int32_t elasped_time)
 
set_pass1

Get ready to do some pass 1 stuff.

void set_pass1 ()
 
set_pass2

Get ready to do some pass 2 stuff.

void set_pass2 ()
 
end_recog

Cleanup and exit the recog program.

int end_recog ()
 
call_matcher

Called from Tess with a blob in tess form. The blob may need rotating to the correct orientation for classification.

BLOB_CHOICE_LIST * call_matcher (TBLOB *blob)
 
dict_word()

Test the dictionaries, returning NO_PERM (0) if not found, or one of the PermuterType values if found, according to the dictionary.

int dict_word (const WERD_CHOICE &word)
 
classify_blob

Classify the this blob if it is not already recorded in the match table. Attempt to recognize this blob as a character. The recognition rating for this blob will be stored as a part of the blob. This value will also be returned to the caller.

Parameters
blobCurrent blob
stringThe string to display in ScrollView
colorThe colour to use when displayed with ScrollView
BLOB_CHOICE_LIST * classify_blob (TBLOB *blob, const char *string, ScrollView::Color color, BlamerBundle *blamer_bundle)
 
point_priority

Assign a priority to and edge point that might be used as part of a split. The argument should be of type EDGEPT.

PRIORITY point_priority (EDGEPT *point)
 
add_point_to_list

Add an edge point to a POINT_GROUP containing a list of other points.

void add_point_to_list (PointHeap *point_heap, EDGEPT *point)
 
bool is_inside_angle (EDGEPT *pt)
 
angle_change

Return the change in angle (degrees) of the line segments between points one and two, and two and three.

int angle_change (EDGEPT *point1, EDGEPT *point2, EDGEPT *point3)
 
pick_close_point

Choose the edge point that is closest to the critical point. This point may not be exactly vertical from the critical point.

EDGEPTpick_close_point (EDGEPT *critical_point, EDGEPT *vertical_point, int *best_dist)
 
prioritize_points

Find a list of edge points from the outer outline of this blob. For each of these points assign a priority. Sort these points using a heap structure so that they can be visited in order.

void prioritize_points (TESSLINE *outline, PointHeap *points)
 
new_min_point

Found a new minimum point try to decide whether to save it or not. Return the new value for the local minimum. If a point is saved then the local minimum is reset to nullptr.

void new_min_point (EDGEPT *local_min, PointHeap *points)
 
new_max_point

Found a new minimum point try to decide whether to save it or not. Return the new value for the local minimum. If a point is saved then the local minimum is reset to nullptr.

void new_max_point (EDGEPT *local_max, PointHeap *points)
 
vertical_projection_point

For one point on the outline, find the corresponding point on the other side of the outline that is a likely projection for a split point. This is done by iterating through the edge points until the X value of the point being looked at is greater than the X value of the split point. Ensure that the point being returned is not right next to the split point. Return the edge point in *best_point as a result, and any points that were newly created are also saved on the new_points list.

void vertical_projection_point (EDGEPT *split_point, EDGEPT *target_point, EDGEPT **best_point, EDGEPT_CLIST *new_points)
 
attempt_blob_chop

Try to split the this blob after this one. Check to make sure that it was successful.

SEAMattempt_blob_chop (TWERD *word, TBLOB *blob, int32_t blob_number, bool italic_blob, const std::vector< SEAM * > &seams)
 
SEAMchop_numbered_blob (TWERD *word, int32_t blob_number, bool italic_blob, const std::vector< SEAM * > &seams)
 
SEAMchop_overlapping_blob (const std::vector< TBOX > &boxes, bool italic_blob, WERD_RES *word_res, unsigned *blob_number)
 
improve_one_blob

Finds the best place to chop, based on the worst blob, fixpt, or next to a fragment, according to the input. Returns the SEAM corresponding to the chop point, if any is found, and the index in the ratings_matrix of the chopped blob. Note that blob_choices is just a copy of the pointers in the leading diagonal of the ratings MATRIX. Although the blob is chopped, the returned SEAM is yet to be inserted into word->seam_array and the resulting blobs are unclassified, so this function can be used by ApplyBox as well as during recognition.

SEAMimprove_one_blob (const std::vector< BLOB_CHOICE * > &blob_choices, DANGERR *fixpt, bool split_next_to_fragment, bool italic_blob, WERD_RES *word, unsigned *blob_number)
 
chop_one_blob

Start with the current one-blob word and its classification. Find the worst blobs and try to divide it up to improve the ratings. Used for testing chopper.

SEAMchop_one_blob (const std::vector< TBOX > &boxes, const std::vector< BLOB_CHOICE * > &blob_choices, WERD_RES *word_res, unsigned *blob_number)
 
chop_word_main

Classify the blobs in this word and permute the results. Find the worst blob in the word and chop it up. Continue this process until a good answer has been found or all the blobs have been chopped up enough. The results are returned in the WERD_RES.

void chop_word_main (WERD_RES *word)
 
improve_by_chopping

Repeatedly chops the worst blob, classifying the new blobs fixing up all the data, and incrementally runs the segmentation search until a good word is found, or no more chops can be found.

void improve_by_chopping (float rating_cert_scale, WERD_RES *word, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle, LMPainPoints *pain_points, std::vector< SegSearchPending > *pending)
 
int select_blob_to_split (const std::vector< BLOB_CHOICE * > &blob_choices, float rating_ceiling, bool split_next_to_fragment)
 
int select_blob_to_split_from_fixpt (DANGERR *fixpt)
 
- Public Member Functions inherited from tesseract::Classify
 Classify ()
 
 ~Classify () override
 
virtual DictgetDict ()
 
const ShapeTableshape_table () const
 
void SetStaticClassifier (ShapeClassifier *static_classifier)
 
void AddLargeSpeckleTo (int blob_length, BLOB_CHOICE_LIST *choices)
 
bool LargeSpeckle (const TBLOB &blob)
 
int GetFontinfoId (ADAPT_CLASS_STRUCT *Class, uint8_t ConfigId)
 
int PruneClasses (const INT_TEMPLATES_STRUCT *int_templates, int num_features, int keep_this, const INT_FEATURE_STRUCT *features, const uint8_t *normalization_factors, const uint16_t *expected_num_features, std::vector< CP_RESULT_STRUCT > *results)
 
void ReadNewCutoffs (TFile *fp, uint16_t *Cutoffs)
 
void PrintAdaptedTemplates (FILE *File, ADAPT_TEMPLATES_STRUCT *Templates)
 
void WriteAdaptedTemplates (FILE *File, ADAPT_TEMPLATES_STRUCT *Templates)
 
ADAPT_TEMPLATES_STRUCTReadAdaptedTemplates (TFile *File)
 
void ConvertProto (PROTO_STRUCT *Proto, int ProtoId, INT_CLASS_STRUCT *Class)
 
INT_TEMPLATES_STRUCTCreateIntTemplates (CLASSES FloatProtos, const UNICHARSET &target_unicharset)
 
void LearnWord (const char *fontname, WERD_RES *word)
 
void LearnPieces (const char *fontname, int start, int length, float threshold, CharSegmentationType segmentation, const char *correct_text, WERD_RES *word)
 
void InitAdaptiveClassifier (TessdataManager *mgr)
 
void InitAdaptedClass (TBLOB *Blob, CLASS_ID ClassId, int FontinfoId, ADAPT_CLASS_STRUCT *Class, ADAPT_TEMPLATES_STRUCT *Templates)
 
void AmbigClassifier (const std::vector< INT_FEATURE_STRUCT > &int_features, const INT_FX_RESULT_STRUCT &fx_info, const TBLOB *blob, INT_TEMPLATES_STRUCT *templates, ADAPT_CLASS_STRUCT **classes, UNICHAR_ID *ambiguities, ADAPT_RESULTS *results)
 
void MasterMatcher (INT_TEMPLATES_STRUCT *templates, int16_t num_features, const INT_FEATURE_STRUCT *features, const uint8_t *norm_factors, ADAPT_CLASS_STRUCT **classes, int debug, int matcher_multiplier, const TBOX &blob_box, const std::vector< CP_RESULT_STRUCT > &results, ADAPT_RESULTS *final_results)
 
void ExpandShapesAndApplyCorrections (ADAPT_CLASS_STRUCT **classes, bool debug, int class_id, int bottom, int top, float cp_rating, int blob_length, int matcher_multiplier, const uint8_t *cn_factors, UnicharRating *int_result, ADAPT_RESULTS *final_results)
 
double ComputeCorrectedRating (bool debug, int unichar_id, double cp_rating, double im_rating, int feature_misses, int bottom, int top, int blob_length, int matcher_multiplier, const uint8_t *cn_factors)
 
void ConvertMatchesToChoices (const DENORM &denorm, const TBOX &box, ADAPT_RESULTS *Results, BLOB_CHOICE_LIST *Choices)
 
void AddNewResult (const UnicharRating &new_result, ADAPT_RESULTS *results)
 
int GetAdaptiveFeatures (TBLOB *Blob, INT_FEATURE_ARRAY IntFeatures, FEATURE_SET *FloatFeatures)
 
void DebugAdaptiveClassifier (TBLOB *Blob, ADAPT_RESULTS *Results)
 
PROTO_ID MakeNewTempProtos (FEATURE_SET Features, int NumBadFeat, FEATURE_ID BadFeat[], INT_CLASS_STRUCT *IClass, ADAPT_CLASS_STRUCT *Class, BIT_VECTOR TempProtoMask)
 
int MakeNewTemporaryConfig (ADAPT_TEMPLATES_STRUCT *Templates, CLASS_ID ClassId, int FontinfoId, int NumFeatures, INT_FEATURE_ARRAY Features, FEATURE_SET FloatFeatures)
 
void MakePermanent (ADAPT_TEMPLATES_STRUCT *Templates, CLASS_ID ClassId, int ConfigId, TBLOB *Blob)
 
void PrintAdaptiveMatchResults (const ADAPT_RESULTS &results)
 
void RemoveExtraPuncs (ADAPT_RESULTS *Results)
 
void RemoveBadMatches (ADAPT_RESULTS *Results)
 
void SetAdaptiveThreshold (float Threshold)
 
void ShowBestMatchFor (int shape_id, const INT_FEATURE_STRUCT *features, int num_features)
 
std::string ClassIDToDebugStr (const INT_TEMPLATES_STRUCT *templates, int class_id, int config_id) const
 
int ClassAndConfigIDToFontOrShapeID (int class_id, int int_result_config) const
 
int ShapeIDToClassID (int shape_id) const
 
UNICHAR_IDBaselineClassifier (TBLOB *Blob, const std::vector< INT_FEATURE_STRUCT > &int_features, const INT_FX_RESULT_STRUCT &fx_info, ADAPT_TEMPLATES_STRUCT *Templates, ADAPT_RESULTS *Results)
 
int CharNormClassifier (TBLOB *blob, const TrainingSample &sample, ADAPT_RESULTS *adapt_results)
 
int CharNormTrainingSample (bool pruner_only, int keep_this, const TrainingSample &sample, std::vector< UnicharRating > *results)
 
UNICHAR_IDGetAmbiguities (TBLOB *Blob, CLASS_ID CorrectClass)
 
void DoAdaptiveMatch (TBLOB *Blob, ADAPT_RESULTS *Results)
 
void AdaptToChar (TBLOB *Blob, CLASS_ID ClassId, int FontinfoId, float Threshold, ADAPT_TEMPLATES_STRUCT *adaptive_templates)
 
void DisplayAdaptedChar (TBLOB *blob, INT_CLASS_STRUCT *int_class)
 
bool AdaptableWord (WERD_RES *word)
 
void EndAdaptiveClassifier ()
 
void SettupPass1 ()
 
void SettupPass2 ()
 
void AdaptiveClassifier (TBLOB *Blob, BLOB_CHOICE_LIST *Choices)
 
void ClassifyAsNoise (ADAPT_RESULTS *Results)
 
void ResetAdaptiveClassifierInternal ()
 
void SwitchAdaptiveClassifier ()
 
void StartBackupAdaptiveClassifier ()
 
int GetCharNormFeature (const INT_FX_RESULT_STRUCT &fx_info, INT_TEMPLATES_STRUCT *templates, uint8_t *pruner_norm_array, uint8_t *char_norm_array)
 
void ComputeCharNormArrays (FEATURE_STRUCT *norm_feature, INT_TEMPLATES_STRUCT *templates, uint8_t *char_norm_array, uint8_t *pruner_array)
 
bool TempConfigReliable (CLASS_ID class_id, const TEMP_CONFIG_STRUCT *config)
 
void UpdateAmbigsGroup (CLASS_ID class_id, TBLOB *Blob)
 
bool AdaptiveClassifierIsFull () const
 
bool AdaptiveClassifierIsEmpty () const
 
bool LooksLikeGarbage (TBLOB *blob)
 
void RefreshDebugWindow (ScrollView **win, const char *msg, int y_offset, const TBOX &wbox)
 
void ClearCharNormArray (uint8_t *char_norm_array)
 
void ComputeIntCharNormArray (const FEATURE_STRUCT &norm_feature, uint8_t *char_norm_array)
 
void ComputeIntFeatures (FEATURE_SET Features, INT_FEATURE_ARRAY IntFeatures)
 
INT_TEMPLATES_STRUCTReadIntTemplates (TFile *fp)
 
void WriteIntTemplates (FILE *File, INT_TEMPLATES_STRUCT *Templates, const UNICHARSET &target_unicharset)
 
CLASS_ID GetClassToDebug (const char *Prompt, bool *adaptive_on, bool *pretrained_on, int *shape_id)
 
void ShowMatchDisplay ()
 
UnicityTable< FontInfo > & get_fontinfo_table ()
 
const UnicityTable< FontInfo > & get_fontinfo_table () const
 
UnicityTable< FontSet > & get_fontset_table ()
 
void NormalizeOutlines (LIST Outlines, float *XScale, float *YScale)
 
FEATURE_SET ExtractOutlineFeatures (TBLOB *Blob)
 
FEATURE_SET ExtractPicoFeatures (TBLOB *Blob)
 
FEATURE_SET ExtractIntCNFeatures (const TBLOB &blob, const INT_FX_RESULT_STRUCT &fx_info)
 
FEATURE_SET ExtractIntGeoFeatures (const TBLOB &blob, const INT_FX_RESULT_STRUCT &fx_info)
 
void LearnBlob (const std::string &fontname, TBLOB *Blob, const DENORM &cn_denorm, const INT_FX_RESULT_STRUCT &fx_info, const char *blob_text)
 
bool WriteTRFile (const char *filename)
 
 BOOL_VAR_H (allow_blob_division)
 
 BOOL_VAR_H (prioritize_division)
 
 BOOL_VAR_H (classify_enable_learning)
 
 INT_VAR_H (classify_debug_level)
 
 INT_VAR_H (classify_norm_method)
 
 double_VAR_H (classify_char_norm_range)
 
 double_VAR_H (classify_max_rating_ratio)
 
 double_VAR_H (classify_max_certainty_margin)
 
 BOOL_VAR_H (tess_cn_matching)
 
 BOOL_VAR_H (tess_bn_matching)
 
 BOOL_VAR_H (classify_enable_adaptive_matcher)
 
 BOOL_VAR_H (classify_use_pre_adapted_templates)
 
 BOOL_VAR_H (classify_save_adapted_templates)
 
 BOOL_VAR_H (classify_enable_adaptive_debugger)
 
 BOOL_VAR_H (classify_nonlinear_norm)
 
 INT_VAR_H (matcher_debug_level)
 
 INT_VAR_H (matcher_debug_flags)
 
 INT_VAR_H (classify_learning_debug_level)
 
 double_VAR_H (matcher_good_threshold)
 
 double_VAR_H (matcher_reliable_adaptive_result)
 
 double_VAR_H (matcher_perfect_threshold)
 
 double_VAR_H (matcher_bad_match_pad)
 
 double_VAR_H (matcher_rating_margin)
 
 double_VAR_H (matcher_avg_noise_size)
 
 INT_VAR_H (matcher_permanent_classes_min)
 
 INT_VAR_H (matcher_min_examples_for_prototyping)
 
 INT_VAR_H (matcher_sufficient_examples_for_prototyping)
 
 double_VAR_H (matcher_clustering_max_angle_delta)
 
 double_VAR_H (classify_misfit_junk_penalty)
 
 double_VAR_H (rating_scale)
 
 double_VAR_H (tessedit_class_miss_scale)
 
 double_VAR_H (classify_adapted_pruning_factor)
 
 double_VAR_H (classify_adapted_pruning_threshold)
 
 INT_VAR_H (classify_adapt_proto_threshold)
 
 INT_VAR_H (classify_adapt_feature_threshold)
 
 BOOL_VAR_H (disable_character_fragments)
 
 double_VAR_H (classify_character_fragments_garbage_certainty_threshold)
 
 BOOL_VAR_H (classify_debug_character_fragments)
 
 BOOL_VAR_H (matcher_debug_separate_windows)
 
 STRING_VAR_H (classify_learn_debug_str)
 
 INT_VAR_H (classify_class_pruner_threshold)
 
 INT_VAR_H (classify_class_pruner_multiplier)
 
 INT_VAR_H (classify_cp_cutoff_strength)
 
 INT_VAR_H (classify_integer_matcher_multiplier)
 
 BOOL_VAR_H (classify_bln_numeric_mode)
 
 double_VAR_H (speckle_large_max_size)
 
 double_VAR_H (speckle_rating_penalty)
 
float ComputeNormMatch (CLASS_ID ClassId, const FEATURE_STRUCT &feature, bool DebugMatch)
 
void FreeNormProtos ()
 
NORM_PROTOSReadNormProtos (TFile *fp)
 
- Public Member Functions inherited from tesseract::CCUtil
 CCUtil ()
 
virtual ~CCUtil ()
 
void main_setup (const std::string &argv0, const std::string &basename)
 CCUtil::main_setup - set location of tessdata and name of image. More...
 
ParamsVectorsparams ()
 
 INT_VAR_H (ambigs_debug_level)
 
 BOOL_VAR_H (use_ambigs_for_adaption)
 

Public Attributes

std::unique_ptr< LanguageModellanguage_model_
 
PRIORITY pass2_ok_split
 
WERD_CHOICEprev_word_best_choice_
 
void(Wordrec::* fill_lattice_ )(const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle)
 
- Public Attributes inherited from tesseract::Classify
INT_TEMPLATES_STRUCTPreTrainedTemplates = nullptr
 
ADAPT_TEMPLATES_STRUCTAdaptedTemplates = nullptr
 
ADAPT_TEMPLATES_STRUCTBackupAdaptedTemplates = nullptr
 
BIT_VECTOR AllProtosOn = nullptr
 
BIT_VECTOR AllConfigsOn = nullptr
 
BIT_VECTOR AllConfigsOff = nullptr
 
BIT_VECTOR TempProtoMask = nullptr
 
NORM_PROTOSNormProtos = nullptr
 
UnicityTable< FontInfofontinfo_table_
 
UnicityTable< FontSetfontset_table_
 
bool EnableLearning = true
 
- Public Attributes inherited from tesseract::CCUtil
std::string datadir
 
std::string imagebasename
 
std::string lang
 
std::string language_data_path_prefix
 
UNICHARSET unicharset
 
UnicharAmbigs unichar_ambigs
 
std::string imagefile
 
std::string directory
 

Protected Member Functions

bool SegSearchDone (int num_futile_classifications)
 
void UpdateSegSearchNodes (float rating_cert_scale, int starting_col, std::vector< SegSearchPending > *pending, WERD_RES *word_res, LMPainPoints *pain_points, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle)
 
void ProcessSegSearchPainPoint (float pain_point_priority, const MATRIX_COORD &pain_point, const char *pain_point_type, std::vector< SegSearchPending > *pending, WERD_RES *word_res, LMPainPoints *pain_points, BlamerBundle *blamer_bundle)
 
void ResetNGramSearch (WERD_RES *word_res, BestChoiceBundle *best_choice_bundle, std::vector< SegSearchPending > &pending)
 
void InitBlamerForSegSearch (WERD_RES *word_res, LMPainPoints *pain_points, BlamerBundle *blamer_bundle, std::string &blamer_debug)
 

Additional Inherited Members

- Static Public Member Functions inherited from tesseract::Classify
static void SetupBLCNDenorms (const TBLOB &blob, bool nonlinear_norm, DENORM *bl_denorm, DENORM *cn_denorm, INT_FX_RESULT_STRUCT *fx_info)
 
static void ExtractFeatures (const TBLOB &blob, bool nonlinear_norm, std::vector< INT_FEATURE_STRUCT > *bl_features, std::vector< INT_FEATURE_STRUCT > *cn_features, INT_FX_RESULT_STRUCT *results, std::vector< int > *outline_cn_counts)
 
- Static Public Attributes inherited from tesseract::CCStruct
static const double kDescenderFraction = 0.25
 
static const double kXHeightFraction = 0.5
 
static const double kAscenderFraction = 0.25
 
static const double kXHeightCapRatio
 
- Protected Attributes inherited from tesseract::Classify
IntegerMatcher im_
 
FEATURE_DEFS_STRUCT feature_defs_
 
ShapeTableshape_table_ = nullptr
 

Detailed Description

Definition at line 186 of file wordrec.h.

Constructor & Destructor Documentation

◆ Wordrec()

tesseract::Wordrec::Wordrec ( )

Definition at line 46 of file wordrec.cpp.

47 : // control parameters
48 BOOL_MEMBER(merge_fragments_in_matrix, true,
49 "Merge the fragments in the ratings matrix and delete them"
50 " after merging",
51 params())
52 , BOOL_MEMBER(wordrec_enable_assoc, true, "Associator Enable", params())
53 , BOOL_MEMBER(force_word_assoc, false,
54 "force associator to run regardless of what enable_assoc is."
55 " This is used for CJK where component grouping is necessary.",
57 , INT_MEMBER(repair_unchopped_blobs, 1, "Fix blobs that aren't chopped", params())
58 , double_MEMBER(tessedit_certainty_threshold, -2.25, "Good blob limit", params())
59 , INT_MEMBER(chop_debug, 0, "Chop debug", params())
60 , BOOL_MEMBER(chop_enable, 1, "Chop enable", params())
61 , BOOL_MEMBER(chop_vertical_creep, 0, "Vertical creep", params())
62 , INT_MEMBER(chop_split_length, 10000, "Split Length", params())
63 , INT_MEMBER(chop_same_distance, 2, "Same distance", params())
64 , INT_MEMBER(chop_min_outline_points, 6, "Min Number of Points on Outline", params())
65 , INT_MEMBER(chop_seam_pile_size, 150, "Max number of seams in seam_pile", params())
66 , BOOL_MEMBER(chop_new_seam_pile, 1, "Use new seam_pile", params())
67 , INT_MEMBER(chop_inside_angle, -50, "Min Inside Angle Bend", params())
68 , INT_MEMBER(chop_min_outline_area, 2000, "Min Outline Area", params())
69 , double_MEMBER(chop_split_dist_knob, 0.5, "Split length adjustment", params())
70 , double_MEMBER(chop_overlap_knob, 0.9, "Split overlap adjustment", params())
71 , double_MEMBER(chop_center_knob, 0.15, "Split center adjustment", params())
72 , INT_MEMBER(chop_centered_maxwidth, 90,
73 "Width of (smaller) chopped blobs "
74 "above which we don't care that a chop is not near the center.",
75 params())
76 , double_MEMBER(chop_sharpness_knob, 0.06, "Split sharpness adjustment", params())
77 , double_MEMBER(chop_width_change_knob, 5.0, "Width change adjustment", params())
78 , double_MEMBER(chop_ok_split, 100.0, "OK split limit", params())
79 , double_MEMBER(chop_good_split, 50.0, "Good split limit", params())
80 , INT_MEMBER(chop_x_y_weight, 3, "X / Y length weight", params())
81 , BOOL_MEMBER(assume_fixed_pitch_char_segment, false,
82 "include fixed-pitch heuristics in char segmentation", params())
83 , INT_MEMBER(wordrec_debug_level, 0, "Debug level for wordrec", params())
84 , INT_MEMBER(wordrec_max_join_chunks, 4, "Max number of broken pieces to associate", params())
85 , BOOL_MEMBER(wordrec_skip_no_truth_words, false,
86 "Only run OCR for words that had truth recorded in BlamerBundle", params())
87 , BOOL_MEMBER(wordrec_debug_blamer, false, "Print blamer debug messages", params())
88 , BOOL_MEMBER(wordrec_run_blamer, false, "Try to set the blame for errors", params())
89 , INT_MEMBER(segsearch_debug_level, 0, "SegSearch debug level", params())
90 , INT_MEMBER(segsearch_max_pain_points, 2000,
91 "Maximum number of pain points stored in the queue", params())
92 , INT_MEMBER(segsearch_max_futile_classifications, 20,
93 "Maximum number of pain point classifications per chunk that"
94 " did not result in finding a better word choice.",
95 params())
96 , double_MEMBER(segsearch_max_char_wh_ratio, 2.0, "Maximum character width-to-height ratio",
97 params())
98 , BOOL_MEMBER(save_alt_choices, true,
99 "Save alternative paths found during chopping"
100 " and segmentation search",
101 params())
102 , pass2_ok_split(0.0f) {
103 prev_word_best_choice_ = nullptr;
104 language_model_ = std::make_unique<LanguageModel>(&get_fontinfo_table(), &(getDict()));
105 fill_lattice_ = nullptr;
106}
#define INT_MEMBER(name, val, comment, vec)
Definition: params.h:369
#define double_MEMBER(name, val, comment, vec)
Definition: params.h:375
#define BOOL_MEMBER(name, val, comment, vec)
Definition: params.h:371
ParamsVectors * params()
Definition: ccutil.h:53
UnicityTable< FontInfo > & get_fontinfo_table()
Definition: classify.h:324
virtual Dict & getDict()
Definition: classify.h:98
void(Wordrec::* fill_lattice_)(const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle)
Definition: wordrec.h:390
WERD_CHOICE * prev_word_best_choice_
Definition: wordrec.h:387
PRIORITY pass2_ok_split
Definition: wordrec.h:383
std::unique_ptr< LanguageModel > language_model_
Definition: wordrec.h:382

◆ ~Wordrec()

tesseract::Wordrec::~Wordrec ( )
overridedefault

Member Function Documentation

◆ add_point_to_list()

void tesseract::Wordrec::add_point_to_list ( PointHeap point_heap,
EDGEPT point 
)

Definition at line 73 of file chop.cpp.

73 {
74 if (point_heap->size() < MAX_NUM_POINTS - 2) {
75 PointPair pair(point_priority(point), point);
76 point_heap->Push(&pair);
77 }
78
79#ifndef GRAPHICS_DISABLED
80 if (chop_debug > 2) {
81 mark_outline(point);
82 }
83#endif
84}
#define MAX_NUM_POINTS
Definition: chop.h:28
void mark_outline(EDGEPT *edgept)
Definition: plotedges.cpp:83
KDPairInc< float, EDGEPT * > PointPair
Definition: chop.h:31
PRIORITY point_priority(EDGEPT *point)
Definition: chop.cpp:64

◆ add_seam_to_queue()

void tesseract::Wordrec::add_seam_to_queue ( float  new_priority,
SEAM new_seam,
SeamQueue seams 
)

Definition at line 64 of file findseam.cpp.

64 {
65 if (new_seam == nullptr) {
66 return;
67 }
68 if (chop_debug) {
69 tprintf("Pushing new seam with priority %g :", new_priority);
70 new_seam->Print("seam: ");
71 }
72 if (seams->size() >= MAX_NUM_SEAMS) {
73 SeamPair old_pair(0, nullptr);
74 if (seams->PopWorst(&old_pair) && old_pair.key() <= new_priority) {
75 if (chop_debug) {
76 tprintf("Old seam staying with priority %g\n", old_pair.key());
77 }
78 delete new_seam;
79 seams->Push(&old_pair);
80 return;
81 } else if (chop_debug) {
82 tprintf("New seam with priority %g beats old worst seam with %g\n", new_priority,
83 old_pair.key());
84 }
85 }
86 SeamPair new_pair(new_priority, new_seam);
87 seams->Push(&new_pair);
88}
#define MAX_NUM_SEAMS
Definition: findseam.cpp:47
KDPtrPairInc< float, SEAM > SeamPair
Definition: findseam.h:30
void tprintf(const char *format,...)
Definition: tprintf.cpp:41

◆ angle_change()

int tesseract::Wordrec::angle_change ( EDGEPT point1,
EDGEPT point2,
EDGEPT point3 
)

Definition at line 98 of file chop.cpp.

98 {
99 VECTOR vector1;
100 VECTOR vector2;
101
102 int angle;
103
104 /* Compute angle */
105 vector1.x = point2->pos.x - point1->pos.x;
106 vector1.y = point2->pos.y - point1->pos.y;
107 vector2.x = point3->pos.x - point2->pos.x;
108 vector2.y = point3->pos.y - point2->pos.y;
109 /* Use cross product */
110 float length = std::sqrt(static_cast<float>(vector1.length()) * vector2.length());
111 if (static_cast<int>(length) == 0) {
112 return (0);
113 }
114 angle = static_cast<int>(floor(std::asin(vector1.cross(vector2) / length) / M_PI * 180.0 + 0.5));
115
116 /* Use dot product */
117 if (vector1.dot(vector2) < 0) {
118 angle = 180 - angle;
119 }
120 /* Adjust angle */
121 if (angle > 180) {
122 angle -= 360;
123 }
124 if (angle <= -180) {
125 angle += 360;
126 }
127 return (angle);
128}
TPOINT VECTOR
Definition: blobs.h:93
TDimension x
Definition: blobs.h:89

◆ attempt_blob_chop()

SEAM * tesseract::Wordrec::attempt_blob_chop ( TWERD word,
TBLOB blob,
int32_t  blob_number,
bool  italic_blob,
const std::vector< SEAM * > &  seams 
)

Definition at line 207 of file chopper.cpp.

208 {
209 if (repair_unchopped_blobs) {
210 preserve_outline_tree(blob->outlines);
211 }
212 TBLOB *other_blob = TBLOB::ShallowCopy(*blob); /* Make new blob */
213 // Insert it into the word.
214 word->blobs.insert(word->blobs.begin() + blob_number + 1, other_blob);
215
216 SEAM *seam = nullptr;
217 if (prioritize_division) {
218 TPOINT location;
219 if (divisible_blob(blob, italic_blob, &location)) {
220 seam = new SEAM(0.0f, location);
221 }
222 }
223 if (seam == nullptr) {
224 seam = pick_good_seam(blob);
225 }
226 if (chop_debug) {
227 if (seam != nullptr) {
228 seam->Print("Good seam picked=");
229 } else {
230 tprintf("\n** no seam picked *** \n");
231 }
232 }
233 if (seam) {
234 seam->ApplySeam(italic_blob, blob, other_blob);
235 }
236
237 seam = CheckSeam(chop_debug, blob_number, word, blob, other_blob, seams, seam);
238 if (seam == nullptr) {
239 if (repair_unchopped_blobs) {
240 restore_outline_tree(blob->outlines);
241 }
242 if (allow_blob_division && !prioritize_division) {
243 // If the blob can simply be divided into outlines, then do that.
244 TPOINT location;
245 if (divisible_blob(blob, italic_blob, &location)) {
246 other_blob = TBLOB::ShallowCopy(*blob); /* Make new blob */
247 word->blobs.insert(word->blobs.begin() + blob_number + 1, other_blob);
248 seam = new SEAM(0.0f, location);
249 seam->ApplySeam(italic_blob, blob, other_blob);
250 seam = CheckSeam(chop_debug, blob_number, word, blob, other_blob, seams, seam);
251 }
252 }
253 }
254 if (seam != nullptr) {
255 // Make sure this seam doesn't get chopped again.
256 seam->Finalize();
257 }
258 return seam;
259}
@ TPOINT
bool divisible_blob(TBLOB *blob, bool italic_blob, TPOINT *location)
Definition: blobs.cpp:923
static TBLOB * ShallowCopy(const TBLOB &src)
Definition: blobs.cpp:342
SEAM * pick_good_seam(TBLOB *blob)
Definition: findseam.cpp:214

◆ BOOL_VAR_H() [1/11]

tesseract::Wordrec::BOOL_VAR_H ( assume_fixed_pitch_char_segment  )

◆ BOOL_VAR_H() [2/11]

tesseract::Wordrec::BOOL_VAR_H ( chop_enable  )

◆ BOOL_VAR_H() [3/11]

tesseract::Wordrec::BOOL_VAR_H ( chop_new_seam_pile  )

◆ BOOL_VAR_H() [4/11]

tesseract::Wordrec::BOOL_VAR_H ( chop_vertical_creep  )

◆ BOOL_VAR_H() [5/11]

tesseract::Wordrec::BOOL_VAR_H ( force_word_assoc  )

◆ BOOL_VAR_H() [6/11]

tesseract::Wordrec::BOOL_VAR_H ( merge_fragments_in_matrix  )

◆ BOOL_VAR_H() [7/11]

tesseract::Wordrec::BOOL_VAR_H ( save_alt_choices  )

◆ BOOL_VAR_H() [8/11]

tesseract::Wordrec::BOOL_VAR_H ( wordrec_debug_blamer  )

◆ BOOL_VAR_H() [9/11]

tesseract::Wordrec::BOOL_VAR_H ( wordrec_enable_assoc  )

◆ BOOL_VAR_H() [10/11]

tesseract::Wordrec::BOOL_VAR_H ( wordrec_run_blamer  )

◆ BOOL_VAR_H() [11/11]

tesseract::Wordrec::BOOL_VAR_H ( wordrec_skip_no_truth_words  )

◆ call_matcher()

BLOB_CHOICE_LIST * tesseract::Wordrec::call_matcher ( TBLOB blob)

Definition at line 132 of file tface.cpp.

132 {
133 // Rotate the blob for classification if necessary.
134 TBLOB *rotated_blob = tessblob->ClassifyNormalizeIfNeeded();
135 if (rotated_blob == nullptr) {
136 rotated_blob = tessblob;
137 }
138 auto *ratings = new BLOB_CHOICE_LIST(); // matcher result
139 AdaptiveClassifier(rotated_blob, ratings);
140 if (rotated_blob != tessblob) {
141 delete rotated_blob;
142 }
143 return ratings;
144}
void AdaptiveClassifier(TBLOB *Blob, BLOB_CHOICE_LIST *Choices)
Definition: adaptmatch.cpp:202

◆ CallFillLattice()

void tesseract::Wordrec::CallFillLattice ( const MATRIX ratings,
const WERD_CHOICE_LIST &  best_choices,
const UNICHARSET unicharset,
BlamerBundle blamer_bundle 
)
inline

Definition at line 240 of file wordrec.h.

241 {
242 (this->*fill_lattice_)(ratings, best_choices, unicharset, blamer_bundle);
243 }
UNICHARSET unicharset
Definition: ccutil.h:61

◆ cc_recog()

void tesseract::Wordrec::cc_recog ( WERD_RES word)

Definition at line 119 of file tface.cpp.

119 {
120 getDict().reset_hyphen_vars(word->word->flag(W_EOL));
121 chop_word_main(word);
122 word->DebugWordChoices(getDict().stopper_debug_level >= 1, getDict().word_to_debug.c_str());
123 ASSERT_HOST(word->StatesAllValid());
124}
#define ASSERT_HOST(x)
Definition: errcode.h:54
@ W_EOL
end of line
Definition: werd.h:35
void reset_hyphen_vars(bool last_word_on_line)
Definition: hyphen.cpp:27
void chop_word_main(WERD_RES *word)
Definition: chopper.cpp:385

◆ choose_best_seam()

void tesseract::Wordrec::choose_best_seam ( SeamQueue seam_queue,
const SPLIT split,
PRIORITY  priority,
SEAM **  seam_result,
TBLOB blob,
SeamPile seam_pile 
)

Definition at line 103 of file findseam.cpp.

104 {
105 SEAM *seam;
106 float my_priority;
107 /* Add seam of split */
108 my_priority = priority;
109 if (split != nullptr) {
110 TPOINT split_point = split->point1->pos;
111 split_point += split->point2->pos;
112 split_point /= 2;
113 seam = new SEAM(my_priority, split_point, *split);
114 if (chop_debug > 1) {
115 seam->Print("Partial priority ");
116 }
117 add_seam_to_queue(my_priority, seam, seam_queue);
118
119 if (my_priority > chop_good_split) {
120 return;
121 }
122 }
123
124 TBOX bbox = blob->bounding_box();
125 /* Queue loop */
126 while (!seam_queue->empty()) {
127 SeamPair seam_pair;
128 seam_queue->Pop(&seam_pair);
129 seam = seam_pair.extract_data();
130 /* Set full priority */
131 my_priority =
132 seam->FullPriority(bbox.left(), bbox.right(), chop_overlap_knob, chop_centered_maxwidth,
133 chop_center_knob, chop_width_change_knob);
134 if (chop_debug) {
135 char str[80];
136 snprintf(str, sizeof(str), "Full my_priority %0.0f, ", my_priority);
137 seam->Print(str);
138 }
139
140 if ((*seam_result == nullptr || (*seam_result)->priority() > my_priority) &&
141 my_priority < chop_ok_split) {
142 /* No crossing */
143 if (seam->IsHealthy(*blob, chop_min_outline_points, chop_min_outline_area)) {
144 delete *seam_result;
145 *seam_result = new SEAM(*seam);
146 (*seam_result)->set_priority(my_priority);
147 } else {
148 delete seam;
149 seam = nullptr;
150 my_priority = BAD_PRIORITY;
151 }
152 }
153
154 if (my_priority < chop_good_split) {
155 delete seam;
156 return; /* Made good answer */
157 }
158
159 if (seam) {
160 /* Combine with others */
161 if (seam_pile->size() < chop_seam_pile_size) {
162 combine_seam(*seam_pile, seam, seam_queue);
163 SeamDecPair pair(seam_pair.key(), seam);
164 seam_pile->Push(&pair);
165 } else if (chop_new_seam_pile && seam_pile->size() == chop_seam_pile_size &&
166 seam_pile->PeekTop().key() > seam_pair.key()) {
167 combine_seam(*seam_pile, seam, seam_queue);
168 SeamDecPair pair;
169 seam_pile->Pop(&pair); // pop the worst.
170 // Replace the seam in pair (deleting the old one) with
171 // the new seam and score, then push back into the heap.
172 pair.set_key(seam_pair.key());
173 pair.set_data(seam);
174 seam_pile->Push(&pair);
175 } else {
176 delete seam;
177 }
178 }
179
180 my_priority = seam_queue->empty() ? NO_FULL_PRIORITY : seam_queue->PeekTop().key();
181 if ((my_priority > chop_ok_split) || (my_priority > chop_good_split && split)) {
182 return;
183 }
184 }
185}
#define NO_FULL_PRIORITY
Definition: findseam.cpp:49
#define BAD_PRIORITY
Definition: findseam.cpp:51
@ TBOX
KDPtrPairDec< float, SEAM > SeamDecPair
Definition: findseam.h:33
const std::vector< std::string > split(const std::string &s, char c)
Definition: helpers.h:43
void add_seam_to_queue(float new_priority, SEAM *new_seam, SeamQueue *seams)
Definition: findseam.cpp:64
void combine_seam(const SeamPile &seam_pile, const SEAM *seam, SeamQueue *seam_queue)
Definition: findseam.cpp:194

◆ chop_numbered_blob()

SEAM * tesseract::Wordrec::chop_numbered_blob ( TWERD word,
int32_t  blob_number,
bool  italic_blob,
const std::vector< SEAM * > &  seams 
)

Definition at line 261 of file chopper.cpp.

262 {
263 return attempt_blob_chop(word, word->blobs[blob_number], blob_number, italic_blob, seams);
264}
SEAM * attempt_blob_chop(TWERD *word, TBLOB *blob, int32_t blob_number, bool italic_blob, const std::vector< SEAM * > &seams)
Definition: chopper.cpp:207

◆ chop_one_blob()

SEAM * tesseract::Wordrec::chop_one_blob ( const std::vector< TBOX > &  boxes,
const std::vector< BLOB_CHOICE * > &  blob_choices,
WERD_RES word_res,
unsigned *  blob_number 
)

Definition at line 367 of file chopper.cpp.

369 {
370 if (prioritize_division) {
371 return chop_overlapping_blob(boxes, true, word_res, blob_number);
372 } else {
373 return improve_one_blob(blob_choices, nullptr, false, true, word_res, blob_number);
374 }
375}
SEAM * improve_one_blob(const std::vector< BLOB_CHOICE * > &blob_choices, DANGERR *fixpt, bool split_next_to_fragment, bool italic_blob, WERD_RES *word, unsigned *blob_number)
Definition: chopper.cpp:320
SEAM * chop_overlapping_blob(const std::vector< TBOX > &boxes, bool italic_blob, WERD_RES *word_res, unsigned *blob_number)
Definition: chopper.cpp:266

◆ chop_overlapping_blob()

SEAM * tesseract::Wordrec::chop_overlapping_blob ( const std::vector< TBOX > &  boxes,
bool  italic_blob,
WERD_RES word_res,
unsigned *  blob_number 
)

Definition at line 266 of file chopper.cpp.

267 {
268 TWERD *word = word_res->chopped_word;
269 for (*blob_number = 0; *blob_number < word->NumBlobs(); ++*blob_number) {
270 TBLOB *blob = word->blobs[*blob_number];
271 TPOINT topleft, botright;
272 topleft.x = blob->bounding_box().left();
273 topleft.y = blob->bounding_box().top();
274 botright.x = blob->bounding_box().right();
275 botright.y = blob->bounding_box().bottom();
276
277 TPOINT original_topleft, original_botright;
278 word_res->denorm.DenormTransform(nullptr, topleft, &original_topleft);
279 word_res->denorm.DenormTransform(nullptr, botright, &original_botright);
280
281 TBOX original_box =
282 TBOX(original_topleft.x, original_botright.y, original_botright.x, original_topleft.y);
283
284 bool almost_equal_box = false;
285 int num_overlap = 0;
286 for (auto &&boxe : boxes) {
287 if (original_box.overlap_fraction(boxe) > 0.125) {
288 num_overlap++;
289 }
290 if (original_box.almost_equal(boxe, 3)) {
291 almost_equal_box = true;
292 }
293 }
294
295 TPOINT location;
296 if (divisible_blob(blob, italic_blob, &location) || (!almost_equal_box && num_overlap > 1)) {
297 SEAM *seam = attempt_blob_chop(word, blob, *blob_number, italic_blob, word_res->seam_array);
298 if (seam != nullptr) {
299 return seam;
300 }
301 }
302 }
303
304 *blob_number = UINT_MAX;
305 return nullptr;
306}

◆ chop_word_main()

void tesseract::Wordrec::chop_word_main ( WERD_RES word)

Definition at line 385 of file chopper.cpp.

385 {
386 int num_blobs = word->chopped_word->NumBlobs();
387 if (word->ratings == nullptr) {
388 word->ratings = new MATRIX(num_blobs, wordrec_max_join_chunks);
389 }
390 if (word->ratings->get(0, 0) == nullptr) {
391 // Run initial classification.
392 for (int b = 0; b < num_blobs; ++b) {
393 BLOB_CHOICE_LIST *choices = classify_piece(
394 word->seam_array, b, b, "Initial:", word->chopped_word, word->blamer_bundle);
395 word->ratings->put(b, b, choices);
396 }
397 } else {
398 // Blobs have been pre-classified. Set matrix cell for all blob choices
399 for (int col = 0; col < word->ratings->dimension(); ++col) {
400 for (int row = col;
401 row < word->ratings->dimension() && row < col + word->ratings->bandwidth(); ++row) {
402 BLOB_CHOICE_LIST *choices = word->ratings->get(col, row);
403 if (choices != nullptr) {
404 BLOB_CHOICE_IT bc_it(choices);
405 for (bc_it.mark_cycle_pt(); !bc_it.cycled_list(); bc_it.forward()) {
406 bc_it.data()->set_matrix_cell(col, row);
407 }
408 }
409 }
410 }
411 }
412
413 // Run Segmentation Search.
414 BestChoiceBundle best_choice_bundle(word->ratings->dimension());
415 SegSearch(word, &best_choice_bundle, word->blamer_bundle);
416
417 if (word->best_choice == nullptr) {
418 // SegSearch found no valid paths, so just use the leading diagonal.
419 word->FakeWordFromRatings(TOP_CHOICE_PERM);
420 }
421 word->RebuildBestState();
422 // If we finished without a hyphen at the end of the word, let the next word
423 // be found in the dictionary.
424 if (word->word->flag(W_EOL) && !getDict().has_hyphen_end(*word->best_choice)) {
426 }
427
428 if (word->blamer_bundle != nullptr && this->fill_lattice_ != nullptr) {
429 CallFillLattice(*word->ratings, word->best_choices, *word->uch_set, word->blamer_bundle);
430 }
431 if (wordrec_debug_level > 0) {
432 tprintf("Final Ratings Matrix:\n");
433 word->ratings->print(getDict().getUnicharset());
434 }
435 word->FilterWordChoices(getDict().stopper_debug_level);
436}
@ TOP_CHOICE_PERM
Definition: ratngs.h:238
void CallFillLattice(const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle)
Definition: wordrec.h:240
virtual BLOB_CHOICE_LIST * classify_piece(const std::vector< SEAM * > &seams, int16_t start, int16_t end, const char *description, TWERD *word, BlamerBundle *blamer_bundle)
Definition: pieces.cpp:49
void SegSearch(WERD_RES *word_res, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle)
Definition: segsearch.cpp:33

◆ classify_blob()

BLOB_CHOICE_LIST * tesseract::Wordrec::classify_blob ( TBLOB blob,
const char *  string,
ScrollView::Color  color,
BlamerBundle blamer_bundle 
)

Definition at line 50 of file wordclass.cpp.

51 {
52#ifndef GRAPHICS_DISABLED
54 display_blob(blob, color);
55 }
56#endif
57 // TODO(rays) collapse with call_matcher and move all to wordrec.cpp.
58 BLOB_CHOICE_LIST *choices = call_matcher(blob);
59 // If a blob with the same bounding box as one of the truth character
60 // bounding boxes is not classified as the corresponding truth character
61 // blame character classifier for incorrect answer.
62 if (blamer_bundle != nullptr) {
63 blamer_bundle->BlameClassifier(getDict().getUnicharset(), blob->bounding_box(), *choices,
64 wordrec_debug_blamer);
65 }
66#ifndef GRAPHICS_DISABLED
67 if (classify_debug_level && string) {
68 print_ratings_list(string, choices, getDict().getUnicharset());
69 }
70
73 }
74#endif
75
76 return choices;
77}
ScrollView * blob_window
Definition: render.cpp:36
void print_ratings_list(const char *msg, BLOB_CHOICE_LIST *ratings, const UNICHARSET &current_unicharset)
Definition: ratngs.cpp:804
bool wordrec_display_all_blobs
Definition: render.cpp:41
bool wordrec_blob_pause
Definition: render.cpp:43
void display_blob(TBLOB *blob, ScrollView::Color color)
Definition: render.cpp:54
BLOB_CHOICE_LIST * call_matcher(TBLOB *blob)
Definition: tface.cpp:132

◆ classify_piece()

BLOB_CHOICE_LIST * tesseract::Wordrec::classify_piece ( const std::vector< SEAM * > &  seams,
int16_t  start,
int16_t  end,
const char *  description,
TWERD word,
BlamerBundle blamer_bundle 
)
virtual

Definition at line 49 of file pieces.cpp.

51 {
52 if (end > start) {
53 SEAM::JoinPieces(seams, word->blobs, start, end);
54 }
55 BLOB_CHOICE_LIST *choices =
56 classify_blob(word->blobs[start], description, ScrollView::WHITE, blamer_bundle);
57 // Set the matrix_cell_ entries in all the BLOB_CHOICES.
58 BLOB_CHOICE_IT bc_it(choices);
59 for (bc_it.mark_cycle_pt(); !bc_it.cycled_list(); bc_it.forward()) {
60 bc_it.data()->set_matrix_cell(start, end);
61 }
62
63 if (end > start) {
64 SEAM::BreakPieces(seams, word->blobs, start, end);
65 }
66
67 return (choices);
68}
static void JoinPieces(const std::vector< SEAM * > &seams, const std::vector< TBLOB * > &blobs, int first, int last)
Definition: seam.cpp:204
static void BreakPieces(const std::vector< SEAM * > &seams, const std::vector< TBLOB * > &blobs, int first, int last)
Definition: seam.cpp:181
BLOB_CHOICE_LIST * classify_blob(TBLOB *blob, const char *string, ScrollView::Color color, BlamerBundle *blamer_bundle)
Definition: wordclass.cpp:50

◆ combine_seam()

void tesseract::Wordrec::combine_seam ( const SeamPile seam_pile,
const SEAM seam,
SeamQueue seam_queue 
)

Definition at line 194 of file findseam.cpp.

194 {
195 for (int x = 0; x < seam_pile.size(); ++x) {
196 const SEAM *this_one = seam_pile.get(x).data();
197 if (seam->CombineableWith(*this_one, SPLIT_CLOSENESS, chop_ok_split)) {
198 SEAM *new_one = new SEAM(*seam);
199 new_one->CombineWith(*this_one);
200 if (chop_debug > 1) {
201 new_one->Print("Combo priority ");
202 }
203 add_seam_to_queue(new_one->priority(), new_one, seam_queue);
204 }
205 }
206}
#define SPLIT_CLOSENESS
Definition: findseam.cpp:45

◆ dict_word()

int tesseract::Wordrec::dict_word ( const WERD_CHOICE word)

Definition at line 86 of file tface.cpp.

86 {
87 return getDict().valid_word(word);
88}
int valid_word(const WERD_CHOICE &word, bool numbers_ok) const
Definition: dict.cpp:801

◆ double_VAR_H() [1/9]

tesseract::Wordrec::double_VAR_H ( chop_center_knob  )

◆ double_VAR_H() [2/9]

tesseract::Wordrec::double_VAR_H ( chop_good_split  )

◆ double_VAR_H() [3/9]

tesseract::Wordrec::double_VAR_H ( chop_ok_split  )

◆ double_VAR_H() [4/9]

tesseract::Wordrec::double_VAR_H ( chop_overlap_knob  )

◆ double_VAR_H() [5/9]

tesseract::Wordrec::double_VAR_H ( chop_sharpness_knob  )

◆ double_VAR_H() [6/9]

tesseract::Wordrec::double_VAR_H ( chop_split_dist_knob  )

◆ double_VAR_H() [7/9]

tesseract::Wordrec::double_VAR_H ( chop_width_change_knob  )

◆ double_VAR_H() [8/9]

tesseract::Wordrec::double_VAR_H ( segsearch_max_char_wh_ratio  )

◆ double_VAR_H() [9/9]

tesseract::Wordrec::double_VAR_H ( tessedit_certainty_threshold  )

◆ end_recog()

int tesseract::Wordrec::end_recog ( )

Definition at line 61 of file tface.cpp.

61 {
63
64 return (0);
65}
void program_editdown(int32_t elasped_time)
Definition: tface.cpp:73

◆ FillLattice()

void tesseract::Wordrec::FillLattice ( const MATRIX ratings,
const WERD_CHOICE_LIST &  best_choices,
const UNICHARSET unicharset,
BlamerBundle blamer_bundle 
)

◆ grade_sharpness()

PRIORITY tesseract::Wordrec::grade_sharpness ( SPLIT split)

Definition at line 67 of file gradechop.cpp.

67 {
68 PRIORITY grade;
69
70 grade = point_priority(split->point1) + point_priority(split->point2);
71
72 if (grade < -360.0) {
73 grade = 0;
74 } else {
75 grade += 360.0;
76 }
77
78 grade *= chop_sharpness_knob; /* Values 0 to -360 */
79
80 return (grade);
81}
float PRIORITY
Definition: seam.h:31

◆ grade_split_length()

PRIORITY tesseract::Wordrec::grade_split_length ( SPLIT split)

Definition at line 45 of file gradechop.cpp.

45 {
46 PRIORITY grade;
47 float split_length;
48
49 split_length = split->point1->WeightedDistance(*split->point2, chop_x_y_weight);
50
51 if (split_length <= 0) {
52 grade = 0;
53 } else {
54 grade = std::sqrt(split_length) * chop_split_dist_knob;
55 }
56
57 return (std::max(0.0f, grade));
58}

◆ improve_by_chopping()

void tesseract::Wordrec::improve_by_chopping ( float  rating_cert_scale,
WERD_RES word,
BestChoiceBundle best_choice_bundle,
BlamerBundle blamer_bundle,
LMPainPoints pain_points,
std::vector< SegSearchPending > *  pending 
)

Definition at line 445 of file chopper.cpp.

448 {
449 unsigned blob_number;
450 do { // improvement loop.
451 // Make a simple vector of BLOB_CHOICEs to make it easy to pick which
452 // one to chop.
453 std::vector<BLOB_CHOICE *> blob_choices;
454 int num_blobs = word->ratings->dimension();
455 for (int i = 0; i < num_blobs; ++i) {
456 BLOB_CHOICE_LIST *choices = word->ratings->get(i, i);
457 if (choices == nullptr || choices->empty()) {
458 blob_choices.push_back(nullptr);
459 } else {
460 BLOB_CHOICE_IT bc_it(choices);
461 blob_choices.push_back(bc_it.data());
462 }
463 }
464 SEAM *seam = improve_one_blob(blob_choices, &best_choice_bundle->fixpt, false, false, word,
465 &blob_number);
466 if (seam == nullptr) {
467 break;
468 }
469 // A chop has been made. We have to correct all the data structures to
470 // take into account the extra bottom-level blob.
471 // Put the seam into the seam_array and correct everything else on the
472 // word: ratings matrix (including matrix location in the BLOB_CHOICES),
473 // states in WERD_CHOICEs, and blob widths.
474 word->InsertSeam(blob_number, seam);
475 // Insert a new entry in the beam array.
476 best_choice_bundle->beam.insert(best_choice_bundle->beam.begin() + blob_number, new LanguageModelState);
477 // Fixpts are outdated, but will get recalculated.
478 best_choice_bundle->fixpt.clear();
479 // Remap existing pain points.
480 pain_points->RemapForSplit(blob_number);
481 // Insert a new pending at the chop point.
482 pending->insert(pending->begin() + blob_number, SegSearchPending());
483
484 // Classify the two newly created blobs using ProcessSegSearchPainPoint,
485 // as that updates the pending correctly and adds new pain points.
486 MATRIX_COORD pain_point(blob_number, blob_number);
487 ProcessSegSearchPainPoint(0.0f, pain_point, "Chop1", pending, word, pain_points, blamer_bundle);
488 pain_point.col = blob_number + 1;
489 pain_point.row = blob_number + 1;
490 ProcessSegSearchPainPoint(0.0f, pain_point, "Chop2", pending, word, pain_points, blamer_bundle);
491 if (language_model_->language_model_ngram_on) {
492 // N-gram evaluation depends on the number of blobs in a chunk, so we
493 // have to re-evaluate everything in the word.
494 ResetNGramSearch(word, best_choice_bundle, *pending);
495 blob_number = 0;
496 }
497 // Run language model incrementally. (Except with the n-gram model on.)
498 UpdateSegSearchNodes(rating_cert_scale, blob_number, pending, word, pain_points,
499 best_choice_bundle, blamer_bundle);
500 } while (!language_model_->AcceptableChoiceFound() && word->ratings->dimension() < kMaxNumChunks);
501
502 // If after running only the chopper best_choice is incorrect and no blame
503 // has been yet set, blame the classifier if best_choice is classifier's
504 // top choice and is a dictionary word (i.e. language model could not have
505 // helped). Otherwise blame the tradeoff between the classifier and
506 // the old language model (permuters).
507 if (word->blamer_bundle != nullptr &&
508 word->blamer_bundle->incorrect_result_reason() == IRR_CORRECT &&
509 !word->blamer_bundle->ChoiceIsCorrect(word->best_choice)) {
510 bool valid_permuter = word->best_choice != nullptr &&
511 Dict::valid_word_permuter(word->best_choice->permuter(), false);
512 word->blamer_bundle->BlameClassifierOrLangModel(word, getDict().getUnicharset(), valid_permuter,
513 wordrec_debug_blamer);
514 }
515}
@ IRR_CORRECT
Definition: blamer.h:58
static bool valid_word_permuter(uint8_t perm, bool numbers_ok)
Check all the DAWGs to see if this word is in any of them.
Definition: dict.h:437
void UpdateSegSearchNodes(float rating_cert_scale, int starting_col, std::vector< SegSearchPending > *pending, WERD_RES *word_res, LMPainPoints *pain_points, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle)
Definition: segsearch.cpp:162
void ProcessSegSearchPainPoint(float pain_point_priority, const MATRIX_COORD &pain_point, const char *pain_point_type, std::vector< SegSearchPending > *pending, WERD_RES *word_res, LMPainPoints *pain_points, BlamerBundle *blamer_bundle)
Definition: segsearch.cpp:222
void ResetNGramSearch(WERD_RES *word_res, BestChoiceBundle *best_choice_bundle, std::vector< SegSearchPending > &pending)
Definition: segsearch.cpp:279

◆ improve_one_blob()

SEAM * tesseract::Wordrec::improve_one_blob ( const std::vector< BLOB_CHOICE * > &  blob_choices,
DANGERR fixpt,
bool  split_next_to_fragment,
bool  italic_blob,
WERD_RES word,
unsigned *  blob_number 
)

Definition at line 320 of file chopper.cpp.

322 {
323 float rating_ceiling = FLT_MAX;
324 SEAM *seam = nullptr;
325 do {
326 auto blob = select_blob_to_split_from_fixpt(fixpt);
327 if (chop_debug) {
328 tprintf("blob_number from fixpt = %d\n", blob);
329 }
330 bool split_point_from_dict = (blob != -1);
331 if (split_point_from_dict) {
332 fixpt->clear();
333 } else {
334 blob = select_blob_to_split(blob_choices, rating_ceiling, split_next_to_fragment);
335 }
336 if (chop_debug) {
337 tprintf("blob_number = %d\n", blob);
338 }
339 *blob_number = blob;
340 if (blob == -1) {
341 return nullptr;
342 }
343
344 // TODO(rays) it may eventually help to allow italic_blob to be true,
345 seam = chop_numbered_blob(word->chopped_word, *blob_number, italic_blob, word->seam_array);
346 if (seam != nullptr) {
347 return seam; // Success!
348 }
349 if (blob_choices[*blob_number] == nullptr) {
350 return nullptr;
351 }
352 if (!split_point_from_dict) {
353 // We chopped the worst rated blob, try something else next time.
354 rating_ceiling = blob_choices[*blob_number]->rating();
355 }
356 } while (true);
357 return seam;
358}
int select_blob_to_split(const std::vector< BLOB_CHOICE * > &blob_choices, float rating_ceiling, bool split_next_to_fragment)
Definition: chopper.cpp:523
SEAM * chop_numbered_blob(TWERD *word, int32_t blob_number, bool italic_blob, const std::vector< SEAM * > &seams)
Definition: chopper.cpp:261
int select_blob_to_split_from_fixpt(DANGERR *fixpt)
Definition: chopper.cpp:605

◆ InitBlamerForSegSearch()

void tesseract::Wordrec::InitBlamerForSegSearch ( WERD_RES word_res,
LMPainPoints pain_points,
BlamerBundle blamer_bundle,
std::string &  blamer_debug 
)
protected

Definition at line 296 of file segsearch.cpp.

297 {
298 pain_points->Clear(); // Clear pain points heap.
299 blamer_bundle->InitForSegSearch(word_res->best_choice, word_res->ratings, getDict().WildcardID(),
300 wordrec_debug_blamer, blamer_debug, pain_points,
301 segsearch_max_char_wh_ratio, word_res);
302}

◆ InitialSegSearch()

void tesseract::Wordrec::InitialSegSearch ( WERD_RES word_res,
LMPainPoints pain_points,
std::vector< SegSearchPending > *  pending,
BestChoiceBundle best_choice_bundle,
BlamerBundle blamer_bundle 
)

Definition at line 120 of file segsearch.cpp.

122 {
123 if (segsearch_debug_level > 0) {
124 tprintf("Starting SegSearch on ratings matrix%s:\n",
125 wordrec_enable_assoc ? " (with assoc)" : "");
126 word_res->ratings->print(getDict().getUnicharset());
127 }
128
129 pain_points->GenerateInitial(word_res);
130
131 // Compute scaling factor that will help us recover blob outline length
132 // from classifier rating and certainty for the blob.
133 float rating_cert_scale = -1.0 * getDict().certainty_scale / rating_scale;
134
135 language_model_->InitForWord(prev_word_best_choice_, assume_fixed_pitch_char_segment,
136 segsearch_max_char_wh_ratio, rating_cert_scale);
137
138 // Initialize blamer-related information: map character boxes recorded in
139 // blamer_bundle->norm_truth_word to the corresponding i,j indices in the
140 // ratings matrix. We expect this step to succeed, since when running the
141 // chopper we checked that the correct chops are present.
142 if (blamer_bundle != nullptr) {
143 blamer_bundle->SetupCorrectSegmentation(word_res->chopped_word, wordrec_debug_blamer);
144 }
145
146 // pending[col] tells whether there is update work to do to combine
147 // best_choice_bundle->beam[col - 1] with some BLOB_CHOICEs in matrix[col, *].
148 // As the language model state is updated, pending entries are modified to
149 // minimize duplication of work. It is important that during the update the
150 // children are considered in the non-decreasing order of their column, since
151 // this guarantees that all the parents would be up to date before an update
152 // of a child is done.
153 pending->clear();
154 pending->resize(word_res->ratings->dimension(), SegSearchPending());
155
156 // Search the ratings matrix for the initial best path.
157 (*pending)[0].SetColumnClassified();
158 UpdateSegSearchNodes(rating_cert_scale, 0, pending, word_res, pain_points, best_choice_bundle,
159 blamer_bundle);
160}

◆ INT_VAR_H() [1/15]

tesseract::Wordrec::INT_VAR_H ( chop_centered_maxwidth  )

◆ INT_VAR_H() [2/15]

tesseract::Wordrec::INT_VAR_H ( chop_debug  )

◆ INT_VAR_H() [3/15]

tesseract::Wordrec::INT_VAR_H ( chop_inside_angle  )

◆ INT_VAR_H() [4/15]

tesseract::Wordrec::INT_VAR_H ( chop_min_outline_area  )

◆ INT_VAR_H() [5/15]

tesseract::Wordrec::INT_VAR_H ( chop_min_outline_points  )

◆ INT_VAR_H() [6/15]

tesseract::Wordrec::INT_VAR_H ( chop_same_distance  )

◆ INT_VAR_H() [7/15]

tesseract::Wordrec::INT_VAR_H ( chop_seam_pile_size  )

◆ INT_VAR_H() [8/15]

tesseract::Wordrec::INT_VAR_H ( chop_split_length  )

◆ INT_VAR_H() [9/15]

tesseract::Wordrec::INT_VAR_H ( chop_x_y_weight  )

◆ INT_VAR_H() [10/15]

tesseract::Wordrec::INT_VAR_H ( repair_unchopped_blobs  )

◆ INT_VAR_H() [11/15]

tesseract::Wordrec::INT_VAR_H ( segsearch_debug_level  )

◆ INT_VAR_H() [12/15]

tesseract::Wordrec::INT_VAR_H ( segsearch_max_futile_classifications  )

◆ INT_VAR_H() [13/15]

tesseract::Wordrec::INT_VAR_H ( segsearch_max_pain_points  )

◆ INT_VAR_H() [14/15]

tesseract::Wordrec::INT_VAR_H ( wordrec_debug_level  )

◆ INT_VAR_H() [15/15]

tesseract::Wordrec::INT_VAR_H ( wordrec_max_join_chunks  )

◆ is_inside_angle()

bool tesseract::Wordrec::is_inside_angle ( EDGEPT pt)

Definition at line 88 of file chop.cpp.

88 {
89 return angle_change(pt->prev, pt, pt->next) < chop_inside_angle;
90}
int angle_change(EDGEPT *point1, EDGEPT *point2, EDGEPT *point3)
Definition: chop.cpp:98

◆ near_point()

bool tesseract::Wordrec::near_point ( EDGEPT point,
EDGEPT line_pt_0,
EDGEPT line_pt_1,
EDGEPT **  near_pt 
)

Definition at line 36 of file outlines.cpp.

36 {
37 TPOINT p;
38
39 float slope;
40 float intercept;
41
42 float x0 = line_pt_0->pos.x;
43 float x1 = line_pt_1->pos.x;
44 float y0 = line_pt_0->pos.y;
45 float y1 = line_pt_1->pos.y;
46
47 if (x0 == x1) {
48 /* Handle vertical line */
49 p.x = static_cast<int16_t>(x0);
50 p.y = point->pos.y;
51 } else {
52 /* Slope and intercept */
53 slope = (y0 - y1) / (x0 - x1);
54 intercept = y1 - x1 * slope;
55
56 /* Find perpendicular */
57 p.x = static_cast<int16_t>((point->pos.x + (point->pos.y - intercept) * slope) /
58 (slope * slope + 1));
59 p.y = static_cast<int16_t>(slope * p.x + intercept);
60 }
61
62 if (is_on_line(p, line_pt_0->pos, line_pt_1->pos) && (!same_point(p, line_pt_0->pos)) &&
63 (!same_point(p, line_pt_1->pos))) {
64 /* Intersection on line */
65 *near_pt = make_edgept(p.x, p.y, line_pt_1, line_pt_0);
66 return true;
67 } else { /* Intersection not on line */
68 *near_pt = closest(point, line_pt_0, line_pt_1);
69 return false;
70 }
71}
#define same_point(p1, p2)
Definition: outlines.h:44
#define is_on_line(p, p0, p1)
Definition: outlines.h:103
#define closest(test_p, p1, p2)
Definition: outlines.h:63
const char * p
EDGEPT * make_edgept(TDimension x, TDimension y, EDGEPT *next, EDGEPT *prev)
Definition: split.cpp:138

◆ new_max_point()

void tesseract::Wordrec::new_max_point ( EDGEPT local_max,
PointHeap points 
)

Definition at line 249 of file chop.cpp.

249 {
250 int16_t dir;
251
252 dir = direction(local_max);
253
254 if (dir > 0) {
255 add_point_to_list(points, local_max);
256 return;
257 }
258
259 if (dir == 0 && point_priority(local_max) < 0) {
260 add_point_to_list(points, local_max);
261 return;
262 }
263}
void add_point_to_list(PointHeap *point_heap, EDGEPT *point)
Definition: chop.cpp:73

◆ new_min_point()

void tesseract::Wordrec::new_min_point ( EDGEPT local_min,
PointHeap points 
)

Definition at line 226 of file chop.cpp.

226 {
227 int16_t dir;
228
229 dir = direction(local_min);
230
231 if (dir < 0) {
232 add_point_to_list(points, local_min);
233 return;
234 }
235
236 if (dir == 0 && point_priority(local_min) < 0) {
237 add_point_to_list(points, local_min);
238 return;
239 }
240}

◆ pick_close_point()

EDGEPT * tesseract::Wordrec::pick_close_point ( EDGEPT critical_point,
EDGEPT vertical_point,
int *  best_dist 
)

Definition at line 136 of file chop.cpp.

136 {
137 EDGEPT *best_point = nullptr;
138 int this_distance;
139 bool found_better;
140
141 do {
142 found_better = false;
143
144 this_distance = edgept_dist(critical_point, vertical_point);
145 if (this_distance <= *best_dist) {
146 if (!(same_point(critical_point->pos, vertical_point->pos) ||
147 same_point(critical_point->pos, vertical_point->next->pos) ||
148 (best_point && same_point(best_point->pos, vertical_point->pos)) ||
149 is_exterior_point(critical_point, vertical_point))) {
150 *best_dist = this_distance;
151 best_point = vertical_point;
152 if (chop_vertical_creep) {
153 found_better = true;
154 }
155 }
156 }
157 vertical_point = vertical_point->next;
158 } while (found_better == true);
159
160 return (best_point);
161}
#define edgept_dist(p1, p2)
Definition: outlines.h:74
#define is_exterior_point(edge, point)
Definition: outlines.h:83

◆ pick_good_seam()

SEAM * tesseract::Wordrec::pick_good_seam ( TBLOB blob)

Definition at line 214 of file findseam.cpp.

214 {
215 SeamPile seam_pile(chop_seam_pile_size);
216 EDGEPT *points[MAX_NUM_POINTS];
217 EDGEPT_CLIST new_points;
218 SEAM *seam = nullptr;
219 TESSLINE *outline;
220 int16_t num_points = 0;
221
222#ifndef GRAPHICS_DISABLED
223 if (chop_debug > 2) {
224 wordrec_display_splits.set_value(true);
225 }
226
227 draw_blob_edges(blob);
228#endif
229
230 PointHeap point_heap(MAX_NUM_POINTS);
231 for (outline = blob->outlines; outline; outline = outline->next) {
232 prioritize_points(outline, &point_heap);
233 }
234
235 while (!point_heap.empty() && num_points < MAX_NUM_POINTS) {
236 points[num_points++] = point_heap.PeekTop().data();
237 point_heap.Pop(nullptr);
238 }
239
240 /* Initialize queue */
241 SeamQueue seam_queue(MAX_NUM_SEAMS);
242
243 try_point_pairs(points, num_points, &seam_queue, &seam_pile, &seam, blob);
244 try_vertical_splits(points, num_points, &new_points, &seam_queue, &seam_pile, &seam, blob);
245
246 if (seam == nullptr) {
247 choose_best_seam(&seam_queue, nullptr, BAD_PRIORITY, &seam, blob, &seam_pile);
248 } else if (seam->priority() > chop_good_split) {
249 choose_best_seam(&seam_queue, nullptr, seam->priority(), &seam, blob, &seam_pile);
250 }
251
252 EDGEPT_C_IT it(&new_points);
253 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
254 EDGEPT *inserted_point = it.data();
255 if (seam == nullptr || !seam->UsesPoint(inserted_point)) {
256 for (outline = blob->outlines; outline; outline = outline->next) {
257 if (outline->loop == inserted_point) {
258 outline->loop = outline->loop->next;
259 }
260 }
261 remove_edgept(inserted_point);
262 }
263 }
264
265 if (seam) {
266 if (seam->priority() > chop_ok_split) {
267 delete seam;
268 seam = nullptr;
269 }
270#ifndef GRAPHICS_DISABLED
271 else if (wordrec_display_splits) {
272 seam->Mark(edge_window);
273 if (chop_debug > 2) {
275 edge_window->Wait();
276 }
277 }
278#endif
279 }
280
281 if (chop_debug) {
282 wordrec_display_splits.set_value(false);
283 }
284
285 return (seam);
286}
GenericHeap< SeamPair > SeamQueue
Definition: findseam.h:31
void remove_edgept(EDGEPT *point)
Definition: split.cpp:199
GenericHeap< PointPair > PointHeap
Definition: chop.h:32
ScrollView * edge_window
Definition: plotedges.cpp:37
bool wordrec_display_splits
Definition: split.cpp:41
GenericHeap< SeamDecPair > SeamPile
Definition: findseam.h:34
void draw_blob_edges(TBLOB *blob)
Definition: plotedges.cpp:67
static void Update()
Definition: scrollview.cpp:700
void prioritize_points(TESSLINE *outline, PointHeap *points)
Definition: chop.cpp:170
void try_point_pairs(EDGEPT *points[MAX_NUM_POINTS], int16_t num_points, SeamQueue *seam_queue, SeamPile *seam_pile, SEAM **seam, TBLOB *blob)
Definition: findseam.cpp:295
void try_vertical_splits(EDGEPT *points[MAX_NUM_POINTS], int16_t num_points, EDGEPT_CLIST *new_points, SeamQueue *seam_queue, SeamPile *seam_pile, SEAM **seam, TBLOB *blob)
Definition: findseam.cpp:327
void choose_best_seam(SeamQueue *seam_queue, const SPLIT *split, PRIORITY priority, SEAM **seam_result, TBLOB *blob, SeamPile *seam_pile)
Definition: findseam.cpp:103

◆ point_priority()

PRIORITY tesseract::Wordrec::point_priority ( EDGEPT point)

Definition at line 64 of file chop.cpp.

64 {
65 return static_cast<PRIORITY>(angle_change(point->prev, point, point->next));
66}

◆ prioritize_points()

void tesseract::Wordrec::prioritize_points ( TESSLINE outline,
PointHeap points 
)

Definition at line 170 of file chop.cpp.

170 {
171 EDGEPT *this_point;
172 EDGEPT *local_min = nullptr;
173 EDGEPT *local_max = nullptr;
174
175 this_point = outline->loop;
176 local_min = this_point;
177 local_max = this_point;
178 do {
179 if (this_point->vec.y < 0) {
180 /* Look for minima */
181 if (local_max != nullptr) {
182 new_max_point(local_max, points);
183 } else if (is_inside_angle(this_point)) {
184 add_point_to_list(points, this_point);
185 }
186 local_max = nullptr;
187 local_min = this_point->next;
188 } else if (this_point->vec.y > 0) {
189 /* Look for maxima */
190 if (local_min != nullptr) {
191 new_min_point(local_min, points);
192 } else if (is_inside_angle(this_point)) {
193 add_point_to_list(points, this_point);
194 }
195 local_min = nullptr;
196 local_max = this_point->next;
197 } else {
198 /* Flat area */
199 if (local_max != nullptr) {
200 if (local_max->prev->vec.y != 0) {
201 new_max_point(local_max, points);
202 }
203 local_max = this_point->next;
204 local_min = nullptr;
205 } else {
206 if (local_min->prev->vec.y != 0) {
207 new_min_point(local_min, points);
208 }
209 local_min = this_point->next;
210 local_max = nullptr;
211 }
212 }
213
214 /* Next point */
215 this_point = this_point->next;
216 } while (this_point != outline->loop);
217}
bool is_inside_angle(EDGEPT *pt)
Definition: chop.cpp:88
void new_min_point(EDGEPT *local_min, PointHeap *points)
Definition: chop.cpp:226
void new_max_point(EDGEPT *local_max, PointHeap *points)
Definition: chop.cpp:249

◆ ProcessSegSearchPainPoint()

void tesseract::Wordrec::ProcessSegSearchPainPoint ( float  pain_point_priority,
const MATRIX_COORD pain_point,
const char *  pain_point_type,
std::vector< SegSearchPending > *  pending,
WERD_RES word_res,
LMPainPoints pain_points,
BlamerBundle blamer_bundle 
)
protected

Definition at line 222 of file segsearch.cpp.

226 {
227 if (segsearch_debug_level > 0) {
228 tprintf("Classifying pain point %s priority=%.4f, col=%d, row=%d\n", pain_point_type,
229 pain_point_priority, pain_point.col, pain_point.row);
230 }
231 ASSERT_HOST(pain_points != nullptr);
232 MATRIX *ratings = word_res->ratings;
233 // Classify blob [pain_point.col pain_point.row]
234 if (!pain_point.Valid(*ratings)) {
235 ratings->IncreaseBandSize(pain_point.row + 1 - pain_point.col);
236 }
237 ASSERT_HOST(pain_point.Valid(*ratings));
238 BLOB_CHOICE_LIST *classified =
239 classify_piece(word_res->seam_array, pain_point.col, pain_point.row, pain_point_type,
240 word_res->chopped_word, blamer_bundle);
241 BLOB_CHOICE_LIST *lst = ratings->get(pain_point.col, pain_point.row);
242 if (lst == nullptr) {
243 ratings->put(pain_point.col, pain_point.row, classified);
244 } else {
245 // We cannot delete old BLOB_CHOICEs, since they might contain
246 // ViterbiStateEntries that are parents of other "active" entries.
247 // Thus if the matrix cell already contains classifications we add
248 // the new ones to the beginning of the list.
249 BLOB_CHOICE_IT it(lst);
250 it.add_list_before(classified);
251 delete classified; // safe to delete, since empty after add_list_before()
252 classified = nullptr;
253 }
254
255 if (segsearch_debug_level > 0) {
256 print_ratings_list("Updated ratings matrix with a new entry:",
257 ratings->get(pain_point.col, pain_point.row), getDict().getUnicharset());
258 ratings->print(getDict().getUnicharset());
259 }
260
261 // Insert initial "pain points" to join the newly classified blob
262 // with its left and right neighbors.
263 if (classified != nullptr && !classified->empty()) {
264 if (pain_point.col > 0) {
265 pain_points->GeneratePainPoint(pain_point.col - 1, pain_point.row, LM_PPTYPE_SHAPE, 0.0, true,
266 segsearch_max_char_wh_ratio, word_res);
267 }
268 if (pain_point.row + 1 < ratings->dimension()) {
269 pain_points->GeneratePainPoint(pain_point.col, pain_point.row + 1, LM_PPTYPE_SHAPE, 0.0, true,
270 segsearch_max_char_wh_ratio, word_res);
271 }
272 }
273 (*pending)[pain_point.col].SetBlobClassified(pain_point.row);
274}

◆ program_editdown()

void tesseract::Wordrec::program_editdown ( int32_t  elasped_time)

Definition at line 73 of file tface.cpp.

73 {
74#ifndef DISABLED_LEGACY_ENGINE
76#endif // ndef DISABLED_LEGACY_ENGINE
77 getDict().End();
78}
void EndAdaptiveClassifier()
Definition: adaptmatch.cpp:464
void End()
Definition: dict.cpp:379

◆ program_editup()

void tesseract::Wordrec::program_editup ( const std::string &  textbase,
TessdataManager init_classifier,
TessdataManager init_dict 
)

Definition at line 39 of file tface.cpp.

40 {
41 if (!textbase.empty()) {
42 imagefile = textbase;
43 }
44#ifndef DISABLED_LEGACY_ENGINE
46 InitAdaptiveClassifier(init_classifier);
47 if (init_dict) {
49 getDict().Load(lang, init_dict);
51 }
52 pass2_ok_split = chop_ok_split;
53#endif // ndef DISABLED_LEGACY_ENGINE
54}
void InitFeatureDefs(FEATURE_DEFS_STRUCT *featuredefs)
Definition: featdefs.cpp:87
std::string imagefile
Definition: ccutil.h:65
std::string lang
Definition: ccutil.h:59
FEATURE_DEFS_STRUCT feature_defs_
Definition: classify.h:446
void InitAdaptiveClassifier(TessdataManager *mgr)
Definition: adaptmatch.cpp:527
static DawgCache * GlobalDawgCache()
Definition: dict.cpp:172
void SetupForLoad(DawgCache *dawg_cache)
Definition: dict.cpp:180
bool FinishLoad()
Definition: dict.cpp:357
void Load(const std::string &lang, TessdataManager *data_file)
Definition: dict.cpp:200

◆ ResetNGramSearch()

void tesseract::Wordrec::ResetNGramSearch ( WERD_RES word_res,
BestChoiceBundle best_choice_bundle,
std::vector< SegSearchPending > &  pending 
)
protected

Definition at line 279 of file segsearch.cpp.

280 {
281 // TODO(rays) More refactoring required here.
282 // Delete existing viterbi states.
283 for (auto &col : best_choice_bundle->beam) {
284 col->Clear();
285 }
286 // Reset best_choice_bundle.
287 word_res->ClearWordChoices();
288 best_choice_bundle->best_vse = nullptr;
289 // Clear out all existing pendings and add a new one for the first column.
290 pending[0].SetColumnClassified();
291 for (auto &data : pending) {
292 data.Clear();
293 }
294}

◆ SaveAltChoices()

void tesseract::Wordrec::SaveAltChoices ( const LIST best_choices,
WERD_RES word 
)

◆ SegSearch()

void tesseract::Wordrec::SegSearch ( WERD_RES word_res,
BestChoiceBundle best_choice_bundle,
BlamerBundle blamer_bundle 
)

Definition at line 33 of file segsearch.cpp.

34 {
35 LMPainPoints pain_points(segsearch_max_pain_points, segsearch_max_char_wh_ratio,
36 assume_fixed_pitch_char_segment, &getDict(), segsearch_debug_level);
37 // Compute scaling factor that will help us recover blob outline length
38 // from classifier rating and certainty for the blob.
39 float rating_cert_scale = -1.0 * getDict().certainty_scale / rating_scale;
40 std::vector<SegSearchPending> pending;
41 InitialSegSearch(word_res, &pain_points, &pending, best_choice_bundle, blamer_bundle);
42
43 if (!SegSearchDone(0)) { // find a better choice
44 if (chop_enable && word_res->chopped_word != nullptr) {
45 improve_by_chopping(rating_cert_scale, word_res, best_choice_bundle, blamer_bundle,
46 &pain_points, &pending);
47 }
48 if (chop_debug) {
49 SEAM::PrintSeams("Final seam list:", word_res->seam_array);
50 }
51
52 if (blamer_bundle != nullptr && !blamer_bundle->ChoiceIsCorrect(word_res->best_choice)) {
53 blamer_bundle->SetChopperBlame(word_res, wordrec_debug_blamer);
54 }
55 }
56 // Keep trying to find a better path by fixing the "pain points".
57
58 MATRIX_COORD pain_point;
59 float pain_point_priority;
60 int num_futile_classifications = 0;
61 std::string blamer_debug;
62 while (wordrec_enable_assoc &&
63 (!SegSearchDone(num_futile_classifications) ||
64 (blamer_bundle != nullptr && blamer_bundle->GuidedSegsearchStillGoing()))) {
65 // Get the next valid "pain point".
66 bool found_nothing = true;
67 LMPainPointsType pp_type;
68 while ((pp_type = pain_points.Deque(&pain_point, &pain_point_priority)) != LM_PPTYPE_NUM) {
69 if (!pain_point.Valid(*word_res->ratings)) {
70 word_res->ratings->IncreaseBandSize(pain_point.row - pain_point.col + 1);
71 }
72 if (pain_point.Valid(*word_res->ratings) &&
73 !word_res->ratings->Classified(pain_point.col, pain_point.row, getDict().WildcardID())) {
74 found_nothing = false;
75 break;
76 }
77 }
78 if (found_nothing) {
79 if (segsearch_debug_level > 0) {
80 tprintf("Pain points queue is empty\n");
81 }
82 break;
83 }
84 ProcessSegSearchPainPoint(pain_point_priority, pain_point,
85 LMPainPoints::PainPointDescription(pp_type), &pending, word_res,
86 &pain_points, blamer_bundle);
87
88 UpdateSegSearchNodes(rating_cert_scale, pain_point.col, &pending, word_res, &pain_points,
89 best_choice_bundle, blamer_bundle);
90 if (!best_choice_bundle->updated) {
91 ++num_futile_classifications;
92 }
93
94 if (segsearch_debug_level > 0) {
95 tprintf("num_futile_classifications %d\n", num_futile_classifications);
96 }
97
98 best_choice_bundle->updated = false; // reset updated
99
100 // See if it's time to terminate SegSearch or time for starting a guided
101 // search for the true path to find the blame for the incorrect best_choice.
102 if (SegSearchDone(num_futile_classifications) && blamer_bundle != nullptr &&
103 blamer_bundle->GuidedSegsearchNeeded(word_res->best_choice)) {
104 InitBlamerForSegSearch(word_res, &pain_points, blamer_bundle, blamer_debug);
105 }
106 } // end while loop exploring alternative paths
107 if (blamer_bundle != nullptr) {
108 blamer_bundle->FinishSegSearch(word_res->best_choice, wordrec_debug_blamer, blamer_debug);
109 }
110
111 if (segsearch_debug_level > 0) {
112 tprintf("Done with SegSearch (AcceptableChoiceFound: %d)\n",
113 language_model_->AcceptableChoiceFound());
114 }
115}
static void PrintSeams(const char *label, const std::vector< SEAM * > &seams)
Definition: seam.cpp:158
static const char * PainPointDescription(LMPainPointsType type)
void improve_by_chopping(float rating_cert_scale, WERD_RES *word, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle, LMPainPoints *pain_points, std::vector< SegSearchPending > *pending)
Definition: chopper.cpp:445
void InitBlamerForSegSearch(WERD_RES *word_res, LMPainPoints *pain_points, BlamerBundle *blamer_bundle, std::string &blamer_debug)
Definition: segsearch.cpp:296
void InitialSegSearch(WERD_RES *word_res, LMPainPoints *pain_points, std::vector< SegSearchPending > *pending, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle)
Definition: segsearch.cpp:120
bool SegSearchDone(int num_futile_classifications)
Definition: wordrec.h:394

◆ SegSearchDone()

bool tesseract::Wordrec::SegSearchDone ( int  num_futile_classifications)
inlineprotected

Definition at line 394 of file wordrec.h.

394 {
395 return (language_model_->AcceptableChoiceFound() ||
396 num_futile_classifications >= segsearch_max_futile_classifications);
397 }

◆ select_blob_to_split()

int tesseract::Wordrec::select_blob_to_split ( const std::vector< BLOB_CHOICE * > &  blob_choices,
float  rating_ceiling,
bool  split_next_to_fragment 
)

Definition at line 523 of file chopper.cpp.

524 {
525 BLOB_CHOICE *blob_choice;
526 float worst = -FLT_MAX;
527 int worst_index = -1;
528 float worst_near_fragment = -FLT_MAX;
529 int worst_index_near_fragment = -1;
530 std::vector<const CHAR_FRAGMENT *> fragments;
531
532 if (chop_debug) {
533 if (rating_ceiling < FLT_MAX) {
534 tprintf("rating_ceiling = %8.4f\n", rating_ceiling);
535 } else {
536 tprintf("rating_ceiling = No Limit\n");
537 }
538 }
539
540 if (split_next_to_fragment && blob_choices.size() > 0) {
541 fragments.resize(blob_choices.size());
542 if (blob_choices[0] != nullptr) {
543 fragments[0] = getDict().getUnicharset().get_fragment(blob_choices[0]->unichar_id());
544 } else {
545 fragments[0] = nullptr;
546 }
547 }
548
549 for (unsigned x = 0; x < blob_choices.size(); ++x) {
550 if (blob_choices[x] == nullptr) {
551 return x;
552 } else {
553 blob_choice = blob_choices[x];
554 // Populate fragments for the following position.
555 if (split_next_to_fragment && x + 1 < blob_choices.size()) {
556 if (blob_choices[x + 1] != nullptr) {
557 fragments[x + 1] =
558 getDict().getUnicharset().get_fragment(blob_choices[x + 1]->unichar_id());
559 } else {
560 fragments[x + 1] = nullptr;
561 }
562 }
563 if (blob_choice->rating() < rating_ceiling &&
564 blob_choice->certainty() < tessedit_certainty_threshold) {
565 // Update worst and worst_index.
566 if (blob_choice->rating() > worst) {
567 worst_index = x;
568 worst = blob_choice->rating();
569 }
570 if (split_next_to_fragment) {
571 // Update worst_near_fragment and worst_index_near_fragment.
572 bool expand_following_fragment =
573 (x + 1 < blob_choices.size() && fragments[x + 1] != nullptr &&
574 !fragments[x + 1]->is_beginning());
575 bool expand_preceding_fragment =
576 (x > 0 && fragments[x - 1] != nullptr && !fragments[x - 1]->is_ending());
577 if ((expand_following_fragment || expand_preceding_fragment) &&
578 blob_choice->rating() > worst_near_fragment) {
579 worst_index_near_fragment = x;
580 worst_near_fragment = blob_choice->rating();
581 if (chop_debug) {
582 tprintf(
583 "worst_index_near_fragment=%d"
584 " expand_following_fragment=%d"
585 " expand_preceding_fragment=%d\n",
586 worst_index_near_fragment, expand_following_fragment, expand_preceding_fragment);
587 }
588 }
589 }
590 }
591 }
592 }
593 // TODO(daria): maybe a threshold of badness for
594 // worst_near_fragment would be useful.
595 return worst_index_near_fragment != -1 ? worst_index_near_fragment : worst_index;
596}
const CHAR_FRAGMENT * get_fragment(UNICHAR_ID unichar_id) const
Definition: unicharset.h:768
const UNICHARSET & getUnicharset() const
Definition: dict.h:104

◆ select_blob_to_split_from_fixpt()

int tesseract::Wordrec::select_blob_to_split_from_fixpt ( DANGERR fixpt)

Definition at line 605 of file chopper.cpp.

605 {
606 if (!fixpt) {
607 return -1;
608 }
609 for (auto &i : *fixpt) {
610 if (i.begin + 1 == i.end && i.dangerous && i.correct_is_ngram) {
611 return i.begin;
612 }
613 }
614 return -1;
615}

◆ set_pass1()

void tesseract::Wordrec::set_pass1 ( )

Definition at line 97 of file tface.cpp.

97 {
98 chop_ok_split.set_value(70.0);
99 language_model_->getParamsModel().SetPass(ParamsModel::PTRAIN_PASS1);
100 SettupPass1();
101}

◆ set_pass2()

void tesseract::Wordrec::set_pass2 ( )

Definition at line 108 of file tface.cpp.

108 {
109 chop_ok_split.set_value(pass2_ok_split);
110 language_model_->getParamsModel().SetPass(ParamsModel::PTRAIN_PASS2);
111 SettupPass2();
112}

◆ try_point_pairs()

void tesseract::Wordrec::try_point_pairs ( EDGEPT points[MAX_NUM_POINTS],
int16_t  num_points,
SeamQueue seam_queue,
SeamPile seam_pile,
SEAM **  seam,
TBLOB blob 
)

Definition at line 295 of file findseam.cpp.

297 {
298 int16_t x;
299 int16_t y;
300 PRIORITY priority;
301
302 for (x = 0; x < num_points; x++) {
303 for (y = x + 1; y < num_points; y++) {
304 if (points[y] &&
305 points[x]->WeightedDistance(*points[y], chop_x_y_weight) < chop_split_length &&
306 points[x] != points[y]->next && points[y] != points[x]->next &&
307 !is_exterior_point(points[x], points[y]) && !is_exterior_point(points[y], points[x])) {
308 SPLIT split(points[x], points[y]);
309 priority = partial_split_priority(&split);
310
311 choose_best_seam(seam_queue, &split, priority, seam, blob, seam_pile);
312 }
313 }
314 }
315}
#define partial_split_priority(split)
Definition: findseam.cpp:40
const double y
def next(obj)
Definition: ast.py:56

◆ try_vertical_splits()

void tesseract::Wordrec::try_vertical_splits ( EDGEPT points[MAX_NUM_POINTS],
int16_t  num_points,
EDGEPT_CLIST *  new_points,
SeamQueue seam_queue,
SeamPile seam_pile,
SEAM **  seam,
TBLOB blob 
)

Definition at line 327 of file findseam.cpp.

329 {
330 EDGEPT *vertical_point = nullptr;
331 int16_t x;
332 PRIORITY priority;
333 TESSLINE *outline;
334
335 for (x = 0; x < num_points; x++) {
336 vertical_point = nullptr;
337 for (outline = blob->outlines; outline; outline = outline->next) {
338 vertical_projection_point(points[x], outline->loop, &vertical_point, new_points);
339 }
340
341 if (vertical_point && points[x] != vertical_point->next && vertical_point != points[x]->next &&
342 points[x]->WeightedDistance(*vertical_point, chop_x_y_weight) < chop_split_length) {
343 SPLIT split(points[x], vertical_point);
344 priority = partial_split_priority(&split);
345 choose_best_seam(seam_queue, &split, priority, seam, blob, seam_pile);
346 }
347 }
348}
void vertical_projection_point(EDGEPT *split_point, EDGEPT *target_point, EDGEPT **best_point, EDGEPT_CLIST *new_points)
Definition: chop.cpp:277

◆ UpdateSegSearchNodes()

void tesseract::Wordrec::UpdateSegSearchNodes ( float  rating_cert_scale,
int  starting_col,
std::vector< SegSearchPending > *  pending,
WERD_RES word_res,
LMPainPoints pain_points,
BestChoiceBundle best_choice_bundle,
BlamerBundle blamer_bundle 
)
protected

Definition at line 162 of file segsearch.cpp.

165 {
166 MATRIX *ratings = word_res->ratings;
167 ASSERT_HOST(static_cast<unsigned>(ratings->dimension()) == pending->size());
168 ASSERT_HOST(static_cast<unsigned>(ratings->dimension()) == best_choice_bundle->beam.size());
169 for (int col = starting_col; col < ratings->dimension(); ++col) {
170 if (!(*pending)[col].WorkToDo()) {
171 continue;
172 }
173 int first_row = col;
174 int last_row = std::min(ratings->dimension() - 1, col + ratings->bandwidth() - 1);
175 if ((*pending)[col].SingleRow() >= 0) {
176 first_row = last_row = (*pending)[col].SingleRow();
177 }
178 if (segsearch_debug_level > 0) {
179 tprintf("\n\nUpdateSegSearchNodes: col=%d, rows=[%d,%d], alljust=%d\n", col, first_row,
180 last_row, (*pending)[col].IsRowJustClassified(INT32_MAX));
181 }
182 // Iterate over the pending list for this column.
183 for (int row = first_row; row <= last_row; ++row) {
184 // Update language model state of this child+parent pair.
185 BLOB_CHOICE_LIST *current_node = ratings->get(col, row);
186 LanguageModelState *parent_node = col == 0 ? nullptr : best_choice_bundle->beam[col - 1];
187 if (current_node != nullptr &&
188 language_model_->UpdateState((*pending)[col].IsRowJustClassified(row), col, row,
189 current_node, parent_node, pain_points, word_res,
190 best_choice_bundle, blamer_bundle) &&
191 row + 1 < ratings->dimension()) {
192 // Since the language model state of this entry changed, process all
193 // the child column.
194 (*pending)[row + 1].RevisitWholeColumn();
195 if (segsearch_debug_level > 0) {
196 tprintf("Added child col=%d to pending\n", row + 1);
197 }
198 } // end if UpdateState.
199 } // end for row.
200 } // end for col.
201 if (best_choice_bundle->best_vse != nullptr) {
202 ASSERT_HOST(word_res->StatesAllValid());
203 if (best_choice_bundle->best_vse->updated) {
204 pain_points->GenerateFromPath(rating_cert_scale, best_choice_bundle->best_vse, word_res);
205 if (!best_choice_bundle->fixpt.empty()) {
206 pain_points->GenerateFromAmbigs(best_choice_bundle->fixpt, best_choice_bundle->best_vse,
207 word_res);
208 }
209 }
210 }
211 // The segsearch is completed. Reset all updated flags on all VSEs and reset
212 // all pendings.
213 for (unsigned col = 0; col < pending->size(); ++col) {
214 (*pending)[col].Clear();
215 ViterbiStateEntry_IT vse_it(&best_choice_bundle->beam[col]->viterbi_state_entries);
216 for (vse_it.mark_cycle_pt(); !vse_it.cycled_list(); vse_it.forward()) {
217 vse_it.data()->updated = false;
218 }
219 }
220}

◆ vertical_projection_point()

void tesseract::Wordrec::vertical_projection_point ( EDGEPT split_point,
EDGEPT target_point,
EDGEPT **  best_point,
EDGEPT_CLIST *  new_points 
)

Definition at line 277 of file chop.cpp.

278 {
279 EDGEPT *p; /* Iterator */
280 EDGEPT *this_edgept; /* Iterator */
281 EDGEPT_C_IT new_point_it(new_points);
282 int x = split_point->pos.x; /* X value of vertical */
283 int best_dist = LARGE_DISTANCE; /* Best point found */
284
285 if (*best_point != nullptr) {
286 best_dist = edgept_dist(split_point, *best_point);
287 }
288
289 p = target_point;
290 /* Look at each edge point */
291 do {
292 if (((p->pos.x <= x && x <= p->next->pos.x) || (p->next->pos.x <= x && x <= p->pos.x)) &&
293 !same_point(split_point->pos, p->pos) && !same_point(split_point->pos, p->next->pos) &&
294 !p->IsChopPt() && (*best_point == nullptr || !same_point((*best_point)->pos, p->pos))) {
295 if (near_point(split_point, p, p->next, &this_edgept)) {
296 new_point_it.add_before_then_move(this_edgept);
297 }
298
299 if (*best_point == nullptr) {
300 best_dist = edgept_dist(split_point, this_edgept);
301 }
302
303 this_edgept = pick_close_point(split_point, this_edgept, &best_dist);
304 if (this_edgept) {
305 *best_point = this_edgept;
306 }
307 }
308
309 p = p->next;
310 } while (p != target_point);
311}
#define LARGE_DISTANCE
Definition: outlines.h:31
bool near_point(EDGEPT *point, EDGEPT *line_pt_0, EDGEPT *line_pt_1, EDGEPT **near_pt)
Definition: outlines.cpp:36
EDGEPT * pick_close_point(EDGEPT *critical_point, EDGEPT *vertical_point, int *best_dist)
Definition: chop.cpp:136

Member Data Documentation

◆ fill_lattice_

void(Wordrec::* tesseract::Wordrec::fill_lattice_) (const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle)

Definition at line 390 of file wordrec.h.

◆ language_model_

std::unique_ptr<LanguageModel> tesseract::Wordrec::language_model_

Definition at line 382 of file wordrec.h.

◆ pass2_ok_split

PRIORITY tesseract::Wordrec::pass2_ok_split

Definition at line 383 of file wordrec.h.

◆ prev_word_best_choice_

WERD_CHOICE* tesseract::Wordrec::prev_word_best_choice_

Definition at line 387 of file wordrec.h.


The documentation for this class was generated from the following files: