tesseract v5.3.3.20231005
tesseract::FPAnalyzer Class Reference

Public Member Functions

 FPAnalyzer (ICOORD page_tr, TO_BLOCK_LIST *port_blocks)
 
 ~FPAnalyzer ()=default
 
void Pass1Analyze ()
 
void EstimatePitch (bool pass1)
 
bool maybe_fixed_pitch ()
 
void MergeFragments ()
 
void FinalizeLargeChars ()
 
bool Pass2Analyze ()
 
void OutputEstimations ()
 
void DebugOutputResult ()
 
size_t num_rows ()
 
unsigned max_iteration ()
 

Detailed Description

Definition at line 951 of file cjkpitch.cpp.

Constructor & Destructor Documentation

◆ FPAnalyzer()

tesseract::FPAnalyzer::FPAnalyzer ( ICOORD  page_tr,
TO_BLOCK_LIST *  port_blocks 
)

Definition at line 1031 of file cjkpitch.cpp.

1032 : page_tr_(page_tr)
1033 , num_tall_rows_(0)
1034 , num_bad_rows_(0)
1035 , num_empty_rows_(0)
1036 , max_chars_per_row_(0) {
1037 TO_BLOCK_IT block_it(port_blocks);
1038
1039 for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
1040 TO_BLOCK *block = block_it.data();
1041 if (!block->get_rows()->empty()) {
1042 ASSERT_HOST(block->xheight > 0);
1043 find_repeated_chars(block, false);
1044 }
1045 }
1046
1047 for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
1048 TO_ROW_IT row_it = block_it.data()->get_rows();
1049 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
1050 FPRow row;
1051 row.Init(row_it.data());
1052 rows_.push_back(row);
1053 size_t num_chars = rows_.back().num_chars();
1054 if (num_chars <= 1) {
1055 num_empty_rows_++;
1056 }
1057 if (num_chars > max_chars_per_row_) {
1058 max_chars_per_row_ = num_chars;
1059 }
1060 }
1061 }
1062}
#define ASSERT_HOST(x)
Definition: errcode.h:54
void find_repeated_chars(TO_BLOCK *block, bool testing_on)
Definition: topitch.cpp:1660

◆ ~FPAnalyzer()

tesseract::FPAnalyzer::~FPAnalyzer ( )
default

Member Function Documentation

◆ DebugOutputResult()

void tesseract::FPAnalyzer::DebugOutputResult ( )
inline

Definition at line 1003 of file cjkpitch.cpp.

1003 {
1004 tprintf("FPAnalyzer: final result\n");
1005 for (size_t i = 0; i < rows_.size(); i++) {
1006 rows_[i].DebugOutputResult(i);
1007 }
1008 }
void tprintf(const char *format,...)
Definition: tprintf.cpp:41

◆ EstimatePitch()

void tesseract::FPAnalyzer::EstimatePitch ( bool  pass1)

Definition at line 1064 of file cjkpitch.cpp.

1064 {
1065 LocalCorrelation pitch_height_stats;
1066
1067 num_tall_rows_ = 0;
1068 num_bad_rows_ = 0;
1069 pitch_height_stats.Clear();
1070 for (auto &row : rows_) {
1071 row.EstimatePitch(pass1);
1072 if (row.good_pitches()) {
1073 pitch_height_stats.Add(row.height() + row.gap(), row.pitch(), row.good_pitches());
1074 if (row.height_pitch_ratio() > 1.1) {
1075 num_tall_rows_++;
1076 }
1077 } else {
1078 num_bad_rows_++;
1079 }
1080 }
1081
1082 pitch_height_stats.Finish();
1083 for (auto &row : rows_) {
1084 if (row.good_pitches() >= 5) {
1085 // We have enough evidences. Just use the pitch estimation
1086 // from this row.
1087 row.set_estimated_pitch(row.pitch());
1088 } else if (row.num_chars() > 1) {
1089 float estimated_pitch = pitch_height_stats.EstimateYFor(row.height() + row.gap(), 0.1f);
1090 // CJK characters are more likely to be fragmented than poorly
1091 // chopped. So trust the page-level estimation of character
1092 // pitch only if it's larger than row-level estimation or
1093 // row-level estimation is too large (2x bigger than row height).
1094 if (estimated_pitch > row.pitch() || row.pitch() > row.height() * 2.0) {
1095 row.set_estimated_pitch(estimated_pitch);
1096 } else {
1097 row.set_estimated_pitch(row.pitch());
1098 }
1099 }
1100 }
1101}

◆ FinalizeLargeChars()

void tesseract::FPAnalyzer::FinalizeLargeChars ( )
inline

Definition at line 980 of file cjkpitch.cpp.

980 {
981 for (auto &row : rows_) {
982 row.FinalizeLargeChars();
983 }
984 }

◆ max_iteration()

unsigned tesseract::FPAnalyzer::max_iteration ( )
inline

Definition at line 1015 of file cjkpitch.cpp.

1015 {
1016 // We're fixing at least one character per iteration. So basically
1017 // we shouldn't require more than max_chars_per_row_ iterations.
1018 return max_chars_per_row_ + 100;
1019 }

◆ maybe_fixed_pitch()

bool tesseract::FPAnalyzer::maybe_fixed_pitch ( )
inline

Definition at line 967 of file cjkpitch.cpp.

967 {
968 if (rows_.empty() || rows_.size() <= num_bad_rows_ + num_tall_rows_ + 1) {
969 return false;
970 }
971 return true;
972 }

◆ MergeFragments()

void tesseract::FPAnalyzer::MergeFragments ( )
inline

Definition at line 974 of file cjkpitch.cpp.

974 {
975 for (auto &row : rows_) {
976 row.MergeFragments();
977 }
978 }

◆ num_rows()

size_t tesseract::FPAnalyzer::num_rows ( )
inline

Definition at line 1010 of file cjkpitch.cpp.

1010 {
1011 return rows_.size();
1012 }

◆ OutputEstimations()

void tesseract::FPAnalyzer::OutputEstimations ( )
inline

Definition at line 996 of file cjkpitch.cpp.

996 {
997 for (auto &row : rows_) {
998 row.OutputEstimations();
999 }
1000 // Don't we need page-level estimation of gaps/spaces?
1001 }

◆ Pass1Analyze()

void tesseract::FPAnalyzer::Pass1Analyze ( )
inline

Definition at line 956 of file cjkpitch.cpp.

956 {
957 for (auto &row : rows_) {
958 row.Pass1Analyze();
959 }
960 }

◆ Pass2Analyze()

bool tesseract::FPAnalyzer::Pass2Analyze ( )
inline

Definition at line 986 of file cjkpitch.cpp.

986 {
987 bool changed = false;
988 for (auto &row : rows_) {
989 if (row.Pass2Analyze()) {
990 changed = true;
991 }
992 }
993 return changed;
994 }

The documentation for this class was generated from the following file: