tesseract v5.3.3.20231005
tesseract::TextlineProjectionTest Class Reference
Inheritance diagram for tesseract::TextlineProjectionTest:
testing::Test

Protected Member Functions

std::string OutputNameToPath (const std::string &name)
 
 TextlineProjectionTest ()
 
 ~TextlineProjectionTest () override
 
void SetImage (const char *filename)
 
void SetupProjection ()
 
void EvaluateBox (const TBOX &box, bool greater_or_equal, int target_value, const char *text, const char *message)
 
void EvaluateDistance (const TBOX &box, const TBOX &true_box, const TBOX &false_box, const char *text, const char *message)
 
void VerifyBoxes (const char *imagefile, int line_height)
 
- Protected Member Functions inherited from testing::Test
 Test ()
 
virtual void SetUp ()
 
virtual void TearDown ()
 

Protected Attributes

Image src_pix_
 
Image bin_pix_
 
BLOCK_LIST blocks_
 
std::string ocr_text_
 
tesseract::TessBaseAPI api_
 
std::unique_ptr< Tesseracttesseract_
 
ColumnFinderfinder_
 
const DENORMdenorm_
 
const TextlineProjectionprojection_
 

Additional Inherited Members

- Public Member Functions inherited from testing::Test
virtual ~Test ()
 
- Static Public Member Functions inherited from testing::Test
static void SetUpTestSuite ()
 
static void TearDownTestSuite ()
 
static void TearDownTestCase ()
 
static void SetUpTestCase ()
 
static bool HasFatalFailure ()
 
static bool HasNonfatalFailure ()
 
static bool IsSkipped ()
 
static bool HasFailure ()
 
static void RecordProperty (const std::string &key, const std::string &value)
 
static void RecordProperty (const std::string &key, int value)
 

Detailed Description

Definition at line 33 of file textlineprojection_test.cc.

Constructor & Destructor Documentation

◆ TextlineProjectionTest()

tesseract::TextlineProjectionTest::TextlineProjectionTest ( )
inlineprotected

Definition at line 40 of file textlineprojection_test.cc.

◆ ~TextlineProjectionTest()

tesseract::TextlineProjectionTest::~TextlineProjectionTest ( )
inlineoverrideprotected

Definition at line 47 of file textlineprojection_test.cc.

47 {
50 delete finder_;
51 }
void destroy()
Definition: image.cpp:32

Member Function Documentation

◆ EvaluateBox()

void tesseract::TextlineProjectionTest::EvaluateBox ( const TBOX box,
bool  greater_or_equal,
int  target_value,
const char *  text,
const char *  message 
)
inlineprotected

Definition at line 112 of file textlineprojection_test.cc.

113 {
114 int value = projection_->EvaluateBox(box, denorm_, false);
115 if (greater_or_equal != (value > target_value)) {
116 LOG(INFO) << "EvaluateBox too " << (greater_or_equal ? "low" : "high")
117 << ":" << value << " vs " << target_value << " for " << message << " word '" << text << "' at:";
118 box.print();
119 value = projection_->EvaluateBox(box, denorm_, true);
120 } else {
121 LOG(INFO) << "EvaluateBox OK(" << value << ") for " << message << " word '" << text << "'";
122 }
123 if (greater_or_equal) {
124 EXPECT_GE(value, target_value);
125 } else {
126 EXPECT_LT(value, target_value);
127 }
128 }
@ LOG
@ INFO
Definition: log.h:28
int value
#define EXPECT_GE(val1, val2)
Definition: gtest.h:2051
#define EXPECT_LT(val1, val2)
Definition: gtest.h:2049
int EvaluateBox(const TBOX &box, const DENORM *denorm, bool debug) const

◆ EvaluateDistance()

void tesseract::TextlineProjectionTest::EvaluateDistance ( const TBOX box,
const TBOX true_box,
const TBOX false_box,
const char *  text,
const char *  message 
)
inlineprotected

Definition at line 132 of file textlineprojection_test.cc.

133 {
134 int true_dist = projection_->DistanceOfBoxFromBox(box, true_box, true, denorm_, false);
135 int false_dist = projection_->DistanceOfBoxFromBox(box, false_box, true, denorm_, false);
136 if (false_dist <= true_dist) {
137 LOG(INFO) << "Distance wrong:" << false_dist << " vs " << true_dist
138 << " for " << message << " word '" << text << "' at:";
139 true_box.print();
140 projection_->DistanceOfBoxFromBox(box, true_box, true, denorm_, true);
141 projection_->DistanceOfBoxFromBox(box, false_box, true, denorm_, true);
142 } else {
143 LOG(INFO) << "Distance OK(" << false_dist << " vs " << true_dist
144 << ") for " << message << " word '" << text << "'";
145 }
146 }
int DistanceOfBoxFromBox(const TBOX &from_box, const TBOX &to_box, bool horizontal_textline, const DENORM *denorm, bool debug) const

◆ OutputNameToPath()

std::string tesseract::TextlineProjectionTest::OutputNameToPath ( const std::string &  name)
inlineprotected

Definition at line 35 of file textlineprojection_test.cc.

35 {
37 return file::JoinPath(FLAGS_test_tmpdir, name);
38 }
static void MakeTmpdir()
Definition: include_gunit.h:38
static std::string JoinPath(const std::string &s1, const std::string &s2)
Definition: include_gunit.h:65

◆ SetImage()

void tesseract::TextlineProjectionTest::SetImage ( const char *  filename)
inlineprotected

Definition at line 53 of file textlineprojection_test.cc.

53 {
55 src_pix_ = pixRead(file::JoinPath(TESTING_DIR, filename).c_str());
56 api_.Init(TESSDATA_DIR, "eng", tesseract::OEM_TESSERACT_ONLY);
59 }
@ OEM_TESSERACT_ONLY
Definition: publictypes.h:264
void SetPageSegMode(PageSegMode mode)
Definition: baseapi.cpp:511
int Init(const char *datapath, const char *language, OcrEngineMode mode, char **configs, int configs_size, const std::vector< std::string > *vars_vec, const std::vector< std::string > *vars_values, bool set_only_non_debug_params)
Definition: baseapi.cpp:368
void SetImage(const unsigned char *imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line)
Definition: baseapi.cpp:576

◆ SetupProjection()

void tesseract::TextlineProjectionTest::SetupProjection ( )
inlineprotected

Definition at line 68 of file textlineprojection_test.cc.

68 {
70 auto osd_tess = std::make_unique<Tesseract>();
71 OSResults osr;
72 EXPECT_EQ(osd_tess->init_tesseract(TESSDATA_DIR, "", "osd", tesseract::OEM_TESSERACT_ONLY,
73 nullptr, 0, nullptr, nullptr, false, &mgr),
74 0);
75 tesseract_ = std::make_unique<Tesseract>();
76 EXPECT_EQ(tesseract_->init_tesseract(TESSDATA_DIR, "", "eng", tesseract::OEM_TESSERACT_ONLY,
77 nullptr, 0, nullptr, nullptr, false, &mgr),
78 0);
80 *tesseract_->mutable_pix_binary() = bin_pix_.clone();
81 osd_tess->set_source_resolution(api_.tesseract()->source_resolution());
82 tesseract_->set_source_resolution(api_.tesseract()->source_resolution());
83 int width = pixGetWidth(bin_pix_);
84 int height = pixGetHeight(bin_pix_);
85 // First make a single block covering the whole image.
86 auto *block = new BLOCK("", true, 0, 0, 0, 0, width, height);
87 block->set_right_to_left(false);
88 BLOCK_LIST src_blocks;
89 BLOCK_IT block_it(&src_blocks);
90 block_it.add_to_end(block);
91 Image photomask_pix = nullptr;
92 // The blocks made by the ColumnFinder. Moved to blocks before return.
93 BLOCK_LIST found_blocks;
94 TO_BLOCK_LIST temp_blocks;
95 finder_ =
96 tesseract_->SetupPageSegAndDetectOrientation(tesseract::PSM_AUTO_OSD, &src_blocks, osd_tess.get(),
97 &osr, &temp_blocks, &photomask_pix, nullptr);
98 TO_BLOCK_IT to_block_it(&temp_blocks);
99 TO_BLOCK *to_block = to_block_it.data();
101 TO_BLOCK_LIST to_blocks;
102 BLOBNBOX_LIST diacritic_blobs;
103 EXPECT_GE(finder_->FindBlocks(tesseract::PSM_AUTO, nullptr, 1, to_block, photomask_pix, nullptr,
104 nullptr, nullptr, &found_blocks, &diacritic_blobs, &to_blocks),
105 0);
107 photomask_pix.destroy();
108 }
#define EXPECT_EQ(val1, val2)
Definition: gtest.h:2043
@ PSM_AUTO
Fully automatic page segmentation, but no OSD.
Definition: publictypes.h:162
Tesseract * tesseract() const
Definition: baseapi.h:711
Pix * GetThresholdedImage()
Definition: baseapi.cpp:631
int source_resolution() const
Image clone() const
Definition: image.cpp:24
const TextlineProjection * projection() const
Definition: colfind.h:70
const DENORM * denorm() const
Definition: colfind.h:67
int FindBlocks(PageSegMode pageseg_mode, Image scaled_color, int scaled_factor, TO_BLOCK *block, Image photo_mask_pix, Image thresholds_pix, Image grey_pix, DebugPixa *pixa_debug, BLOCK_LIST *blocks, BLOBNBOX_LIST *diacritic_blobs, TO_BLOCK_LIST *to_blocks)
Definition: colfind.cpp:286
std::unique_ptr< Tesseract > tesseract_

◆ VerifyBoxes()

void tesseract::TextlineProjectionTest::VerifyBoxes ( const char *  imagefile,
int  line_height 
)
inlineprotected

Definition at line 150 of file textlineprojection_test.cc.

150 {
151 SetImage(imagefile);
152 api_.Recognize(nullptr);
154 MutableIterator *it = api_.GetMutableIterator();
155 do {
156 char *text = it->GetUTF8Text(tesseract::RIL_WORD);
157 const PAGE_RES_IT *pr_it = it->PageResIt();
158 WERD_RES *word = pr_it->word();
159 // The word_box refers to the internal, possibly rotated, coords.
160 TBOX word_box = word->word->bounding_box();
161 bool small_word = word_box.height() * 1.5 < line_height;
162 bool tall_word = word_box.height() * 1.125 > line_height;
163 // We pad small and tall words differently because ascenders and
164 // descenders affect the position and size of the upper/lower boxes.
165 int padding;
166 if (small_word) {
167 padding = word_box.height();
168 } else if (tall_word) {
169 padding = word_box.height() / 3;
170 } else {
171 padding = word_box.height() / 2;
172 }
173 // Test that the word box gets a good score.
174 EvaluateBox(word_box, true, kMinStrongTextValue, text, "Real Word");
175
176 // Now test a displaced box, both above and below the word.
177 TBOX upper_box(word_box);
178 upper_box.set_bottom(word_box.top());
179 upper_box.set_top(word_box.top() + padding);
180 EvaluateBox(upper_box, false, kMinStrongTextValue, text, "Upper Word");
181 EvaluateBox(upper_box, true, -1, text, "Upper Word not vertical");
182 TBOX lower_box = word_box;
183 lower_box.set_top(word_box.bottom());
184 lower_box.set_bottom(word_box.bottom() - padding);
185 if (tall_word) {
186 lower_box.move(ICOORD(0, padding / 2));
187 }
188 EvaluateBox(lower_box, false, kMinStrongTextValue, text, "Lower Word");
189 EvaluateBox(lower_box, true, -1, text, "Lower Word not vertical");
190
191 // Since some words have no text below and some words have no text above
192 // check that at least one of the boxes satisfies BoxOutOfTextline.
193 bool upper_or_lower_out_of_textline =
194 projection_->BoxOutOfHTextline(upper_box, denorm_, false) ||
195 projection_->BoxOutOfHTextline(lower_box, denorm_, false);
196 if (!upper_or_lower_out_of_textline) {
197 projection_->BoxOutOfHTextline(upper_box, denorm_, true);
198 projection_->BoxOutOfHTextline(lower_box, denorm_, true);
199 }
200 EXPECT_TRUE(upper_or_lower_out_of_textline);
201
202 // Now test DistanceOfBoxFromBox by faking a challenger word, and asking
203 // that each pad box be nearer to its true textline than the
204 // challenger. Due to the tight spacing of latin text, getting
205 // the right position and size of these test boxes is quite fiddly.
206 padding = line_height / 4;
207 upper_box.set_top(upper_box.bottom() + padding);
208 TBOX target_box(word_box);
209 if (!small_word) {
210 upper_box.move(ICOORD(0, -padding * 3 / 2));
211 }
212 target_box.set_top(upper_box.bottom());
213 TBOX upper_challenger(upper_box);
214 upper_challenger.set_bottom(upper_box.top());
215 upper_challenger.set_top(upper_box.top() + word_box.height());
216 EvaluateDistance(upper_box, target_box, upper_challenger, text, "Upper Word");
217 if (tall_word) {
218 lower_box.move(ICOORD(0, padding / 2));
219 }
220 lower_box.set_bottom(lower_box.top() - padding);
221 target_box = word_box;
222 target_box.set_bottom(lower_box.top());
223 TBOX lower_challenger(lower_box);
224 lower_challenger.set_top(lower_box.bottom());
225 lower_challenger.set_bottom(lower_box.bottom() - word_box.height());
226 EvaluateDistance(lower_box, target_box, lower_challenger, text, "Lower Word");
227
228 delete[] text;
229 } while (it->Next(tesseract::RIL_WORD));
230 delete it;
231 }
@ TBOX
#define EXPECT_TRUE(condition)
Definition: gtest.h:1982
const int kMinStrongTextValue
int Recognize(ETEXT_DESC *monitor)
Definition: baseapi.cpp:834
MutableIterator * GetMutableIterator()
Definition: baseapi.cpp:1354
virtual char * GetUTF8Text(PageIteratorLevel level) const
bool BoxOutOfHTextline(const TBOX &box, const DENORM *denorm, bool debug) const
void EvaluateDistance(const TBOX &box, const TBOX &true_box, const TBOX &false_box, const char *text, const char *message)
void EvaluateBox(const TBOX &box, bool greater_or_equal, int target_value, const char *text, const char *message)

Member Data Documentation

◆ api_

tesseract::TessBaseAPI tesseract::TextlineProjectionTest::api_
protected

Definition at line 237 of file textlineprojection_test.cc.

◆ bin_pix_

Image tesseract::TextlineProjectionTest::bin_pix_
protected

Definition at line 234 of file textlineprojection_test.cc.

◆ blocks_

BLOCK_LIST tesseract::TextlineProjectionTest::blocks_
protected

Definition at line 235 of file textlineprojection_test.cc.

◆ denorm_

const DENORM* tesseract::TextlineProjectionTest::denorm_
protected

Definition at line 240 of file textlineprojection_test.cc.

◆ finder_

ColumnFinder* tesseract::TextlineProjectionTest::finder_
protected

Definition at line 239 of file textlineprojection_test.cc.

◆ ocr_text_

std::string tesseract::TextlineProjectionTest::ocr_text_
protected

Definition at line 236 of file textlineprojection_test.cc.

◆ projection_

const TextlineProjection* tesseract::TextlineProjectionTest::projection_
protected

Definition at line 241 of file textlineprojection_test.cc.

◆ src_pix_

Image tesseract::TextlineProjectionTest::src_pix_
protected

Definition at line 233 of file textlineprojection_test.cc.

◆ tesseract_

std::unique_ptr<Tesseract> tesseract::TextlineProjectionTest::tesseract_
protected

Definition at line 238 of file textlineprojection_test.cc.


The documentation for this class was generated from the following file: