tesseract v5.3.3.20231005
tesseract::BLOCK Class Reference

#include <ocrblock.h>

Inheritance diagram for tesseract::BLOCK:
tesseract::ELIST_LINK

Public Member Functions

 BLOCK ()
 
 BLOCK (const char *name, bool prop, int16_t kern, int16_t space, TDimension xmin, TDimension ymin, TDimension xmax, TDimension ymax)
 
 ~BLOCK ()=default
 
void set_stats (bool prop, int16_t kern, int16_t space, int16_t ch_pitch)
 
void set_xheight (int32_t height)
 set char size More...
 
void set_font_class (int16_t font)
 set font class More...
 
bool prop () const
 return proportional More...
 
bool right_to_left () const
 
void set_right_to_left (bool value)
 
int32_t fixed_pitch () const
 return pitch More...
 
int16_t kern () const
 return kerning More...
 
int16_t font () const
 return font class More...
 
int16_t space () const
 return spacing More...
 
const char * name () const
 return filename More...
 
int32_t x_height () const
 return xheight More...
 
float cell_over_xheight () const
 
void set_cell_over_xheight (float ratio)
 
ROW_LIST * row_list ()
 get rows More...
 
void compute_row_margins ()
 
PARA_LIST * para_list ()
 
C_BLOB_LIST * blob_list ()
 get blobs More...
 
C_BLOB_LIST * reject_blobs ()
 
FCOORD re_rotation () const
 
void set_re_rotation (const FCOORD &rotation)
 
FCOORD classify_rotation () const
 
void set_classify_rotation (const FCOORD &rotation)
 
FCOORD skew () const
 
void set_skew (const FCOORD &skew)
 
const ICOORDmedian_size () const
 
void set_median_size (int x, int y)
 
Image render_mask (TBOX *mask_box)
 
TBOX restricted_bounding_box (bool upper_dots, bool lower_dots) const
 
void reflect_polygon_in_y_axis ()
 
void rotate (const FCOORD &rotation)
 
void sort_rows ()
 decreasing y order More...
 
void compress ()
 shrink white space More...
 
void check_pitch ()
 check proportional More...
 
void compress (const ICOORD vec)
 shrink white space and move by vector More...
 
void print (FILE *fp, bool dump)
 dump whole table More...
 
BLOCKoperator= (const BLOCK &source)
 
- Public Member Functions inherited from tesseract::ELIST_LINK
 ELIST_LINK ()
 
 ELIST_LINK (const ELIST_LINK &)
 
void operator= (const ELIST_LINK &)
 

Public Attributes

PDBLK pdblk
 Page Description Block. More...
 

Friends

class BLOCK_RECT_IT
 

Detailed Description

Definition at line 32 of file ocrblock.h.

Constructor & Destructor Documentation

◆ BLOCK() [1/2]

tesseract::BLOCK::BLOCK ( )
inline

Definition at line 37 of file ocrblock.h.

37: re_rotation_(1.0f, 0.0f), classify_rotation_(1.0f, 0.0f), skew_(1.0f, 0.0f) {}

◆ BLOCK() [2/2]

tesseract::BLOCK::BLOCK ( const char *  name,
bool  prop,
int16_t  kern,
int16_t  space,
TDimension  xmin,
TDimension  ymin,
TDimension  xmax,
TDimension  ymax 
)

BLOCK::BLOCK

Constructor for a simple rectangular block.

Parameters
namefilename
propproportional
kernkerning
spacespacing
xminbottom left
xmaxtop right

Definition at line 34 of file ocrblock.cpp.

42 : pdblk(xmin, ymin, xmax, ymax)
43 , filename(name)
44 , re_rotation_(1.0f, 0.0f)
45 , classify_rotation_(1.0f, 0.0f)
46 , skew_(1.0f, 0.0f) {
47 ICOORDELT_IT left_it = &pdblk.leftside;
48 ICOORDELT_IT right_it = &pdblk.rightside;
49
50 proportional = prop;
51 kerning = kern;
52 spacing = space;
53 font_class = -1; // not assigned
54 cell_over_xheight_ = 2.0f;
55 pdblk.hand_poly = nullptr;
56 left_it.set_to_list(&pdblk.leftside);
57 right_it.set_to_list(&pdblk.rightside);
58 // make default box
59 left_it.add_to_end(new ICOORDELT(xmin, ymin));
60 left_it.add_to_end(new ICOORDELT(xmin, ymax));
61 right_it.add_to_end(new ICOORDELT(xmax, ymin));
62 right_it.add_to_end(new ICOORDELT(xmax, ymax));
63}
const char * name() const
return filename
Definition: ocrblock.h:97
PDBLK pdblk
Page Description Block.
Definition: ocrblock.h:185
bool prop() const
return proportional
Definition: ocrblock.h:71
int16_t kern() const
return kerning
Definition: ocrblock.h:85
int16_t space() const
return spacing
Definition: ocrblock.h:93
ICOORDELT_LIST rightside
right side vertices
Definition: pdblock.h:111
POLY_BLOCK * hand_poly
weird as well
Definition: pdblock.h:109
ICOORDELT_LIST leftside
left side vertices
Definition: pdblock.h:110

◆ ~BLOCK()

tesseract::BLOCK::~BLOCK ( )
default

Member Function Documentation

◆ blob_list()

C_BLOB_LIST * tesseract::BLOCK::blob_list ( )
inline

get blobs

Definition at line 123 of file ocrblock.h.

123 {
124 return &c_blobs;
125 }

◆ cell_over_xheight()

float tesseract::BLOCK::cell_over_xheight ( ) const
inline

Definition at line 104 of file ocrblock.h.

104 {
105 return cell_over_xheight_;
106 }

◆ check_pitch()

void tesseract::BLOCK::check_pitch ( )

check proportional

BLOCK::check_pitch

Check whether the block is fixed or prop, set the flag, and set the pitch if it is fixed.

Definition at line 164 of file ocrblock.cpp.

164 { // check prop
165 // tprintf("Missing FFT fixed pitch stuff!\n");
166 pitch = -1;
167}

◆ classify_rotation()

FCOORD tesseract::BLOCK::classify_rotation ( ) const
inline

Definition at line 135 of file ocrblock.h.

135 {
136 return classify_rotation_; // Apply this before classifying.
137 }

◆ compress() [1/2]

void tesseract::BLOCK::compress ( )

shrink white space

BLOCK::compress

Delete space between the rows. (And maybe one day, compress the rows) Fill space of block from top down, left aligning rows.

Definition at line 128 of file ocrblock.cpp.

128 { // squash it up
129#define ROW_SPACING 5
130
131 ROW_IT row_it(&rows);
132 ROW *row;
133 ICOORD row_spacing(0, ROW_SPACING);
134
135 ICOORDELT_IT icoordelt_it;
136
137 sort_rows();
138
141 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
142 row = row_it.data();
143 row->move(pdblk.box.botleft() - row_spacing - row->bounding_box().topleft());
144 pdblk.box += row->bounding_box();
145 }
146
147 pdblk.leftside.clear();
148 icoordelt_it.set_to_list(&pdblk.leftside);
149 icoordelt_it.add_to_end(new ICOORDELT(pdblk.box.left(), pdblk.box.bottom()));
150 icoordelt_it.add_to_end(new ICOORDELT(pdblk.box.left(), pdblk.box.top()));
151 pdblk.rightside.clear();
152 icoordelt_it.set_to_list(&pdblk.rightside);
153 icoordelt_it.add_to_end(new ICOORDELT(pdblk.box.right(), pdblk.box.bottom()));
154 icoordelt_it.add_to_end(new ICOORDELT(pdblk.box.right(), pdblk.box.top()));
155}
#define ROW_SPACING
@ TBOX
void sort_rows()
decreasing y order
Definition: ocrblock.cpp:115
TBOX box
bounding box
Definition: pdblock.h:112
TDimension left() const
Definition: rect.h:82
void move_bottom_edge(const TDimension y)
Definition: rect.h:150
TDimension top() const
Definition: rect.h:68
const ICOORD & botleft() const
Definition: rect.h:102
ICOORD topleft() const
Definition: rect.h:110
TDimension right() const
Definition: rect.h:89
TDimension bottom() const
Definition: rect.h:75

◆ compress() [2/2]

void tesseract::BLOCK::compress ( const ICOORD  vec)

shrink white space and move by vector

BLOCK::compress

Compress and move in a single operation.

Definition at line 175 of file ocrblock.cpp.

177 {
178 pdblk.box.move(vec);
179 compress();
180}
void compress()
shrink white space
Definition: ocrblock.cpp:128
void move(const ICOORD vec)
Definition: rect.h:170

◆ compute_row_margins()

void tesseract::BLOCK::compute_row_margins ( )

Definition at line 330 of file ocrblock.cpp.

330 {
331 if (row_list()->empty() || row_list()->singleton()) {
332 return;
333 }
334
335 // If Layout analysis was not called, default to this.
336 POLY_BLOCK rect_block(pdblk.bounding_box(), PT_FLOWING_TEXT);
337 POLY_BLOCK *pblock = &rect_block;
338 if (pdblk.poly_block() != nullptr) {
339 pblock = pdblk.poly_block();
340 }
341
342 // Step One: Determine if there is a drop-cap.
343 // TODO(eger): Fix up drop cap code for RTL languages.
344 ROW_IT r_it(row_list());
345 ROW *first_row = r_it.data();
346 ROW *second_row = r_it.data_relative(1);
347
348 // initialize the bottom of a fictitious drop cap far above the first line.
349 int drop_cap_bottom = first_row->bounding_box().top() + first_row->bounding_box().height();
350 int drop_cap_right = first_row->bounding_box().left();
351 int mid_second_line = second_row->bounding_box().top() - second_row->bounding_box().height() / 2;
352 WERD_IT werd_it(r_it.data()->word_list()); // words of line one
353 if (!werd_it.empty()) {
354 C_BLOB_IT cblob_it(werd_it.data()->cblob_list());
355 for (cblob_it.mark_cycle_pt(); !cblob_it.cycled_list(); cblob_it.forward()) {
356 TBOX bbox = cblob_it.data()->bounding_box();
357 if (bbox.bottom() <= mid_second_line) {
358 // we found a real drop cap
359 first_row->set_has_drop_cap(true);
360 if (drop_cap_bottom > bbox.bottom()) {
361 drop_cap_bottom = bbox.bottom();
362 }
363 if (drop_cap_right < bbox.right()) {
364 drop_cap_right = bbox.right();
365 }
366 }
367 }
368 }
369
370 // Step Two: Calculate the margin from the text of each row to the block
371 // (or drop-cap) boundaries.
372 PB_LINE_IT lines(pblock);
373 r_it.set_to_list(row_list());
374 for (r_it.mark_cycle_pt(); !r_it.cycled_list(); r_it.forward()) {
375 ROW *row = r_it.data();
376 TBOX row_box = row->bounding_box();
377 int left_y = row->base_line(row_box.left()) + row->x_height();
378 int left_margin;
379 const std::unique_ptr</*non-const*/ ICOORDELT_LIST> segments_left(lines.get_line(left_y));
380 LeftMargin(segments_left.get(), row_box.left(), &left_margin);
381
382 if (row_box.top() >= drop_cap_bottom) {
383 int drop_cap_distance = row_box.left() - row->space() - drop_cap_right;
384 if (drop_cap_distance < 0) {
385 drop_cap_distance = 0;
386 }
387 if (drop_cap_distance < left_margin) {
388 left_margin = drop_cap_distance;
389 }
390 }
391
392 int right_y = row->base_line(row_box.right()) + row->x_height();
393 int right_margin;
394 const std::unique_ptr</*non-const*/ ICOORDELT_LIST> segments_right(lines.get_line(right_y));
395 RightMargin(segments_right.get(), row_box.right(), &right_margin);
396 row->set_lmargin(left_margin);
397 row->set_rmargin(right_margin);
398 }
399}
@ PT_FLOWING_TEXT
Definition: publictypes.h:53
ROW_LIST * row_list()
get rows
Definition: ocrblock.h:111
POLY_BLOCK * poly_block() const
Definition: pdblock.h:59
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
Definition: pdblock.h:67

◆ fixed_pitch()

int32_t tesseract::BLOCK::fixed_pitch ( ) const
inline

return pitch

Definition at line 81 of file ocrblock.h.

81 {
82 return pitch;
83 }

◆ font()

int16_t tesseract::BLOCK::font ( ) const
inline

return font class

Definition at line 89 of file ocrblock.h.

89 {
90 return font_class;
91 }

◆ kern()

int16_t tesseract::BLOCK::kern ( ) const
inline

return kerning

Definition at line 85 of file ocrblock.h.

85 {
86 return kerning;
87 }

◆ median_size()

const ICOORD & tesseract::BLOCK::median_size ( ) const
inline

Definition at line 147 of file ocrblock.h.

147 {
148 return median_size_;
149 }

◆ name()

const char * tesseract::BLOCK::name ( ) const
inline

return filename

Definition at line 97 of file ocrblock.h.

97 {
98 return filename.c_str();
99 }

◆ operator=()

BLOCK & tesseract::BLOCK::operator= ( const BLOCK source)

BLOCK::operator=

Assignment - duplicate the block structure, but with an EMPTY row list.

Definition at line 222 of file ocrblock.cpp.

224 {
225 this->ELIST_LINK::operator=(source);
226 pdblk = source.pdblk;
227 proportional = source.proportional;
228 kerning = source.kerning;
229 spacing = source.spacing;
230 filename = source.filename; // STRINGs assign ok
231 if (!rows.empty()) {
232 rows.clear();
233 }
234 re_rotation_ = source.re_rotation_;
235 classify_rotation_ = source.classify_rotation_;
236 skew_ = source.skew_;
237 return *this;
238}
void operator=(const ELIST_LINK &)
Definition: elst.h:100

◆ para_list()

PARA_LIST * tesseract::BLOCK::para_list ( )
inline

Definition at line 119 of file ocrblock.h.

119 {
120 return &paras_;
121 }

◆ print()

void tesseract::BLOCK::print ( FILE *  fp,
bool  dump 
)

dump whole table

BLOCK::print

Print the info on a block

Parameters
fpfile to print on
dumpprint full detail

Definition at line 188 of file ocrblock.cpp.

191 {
192 ICOORDELT_IT it = &pdblk.leftside; // iterator
193
194 pdblk.box.print();
195 tprintf("Proportional= %s\n", proportional ? "TRUE" : "FALSE");
196 tprintf("Kerning= %d\n", kerning);
197 tprintf("Spacing= %d\n", spacing);
198 tprintf("Fixed_pitch=%d\n", pitch);
199 tprintf("Filename= %s\n", filename.c_str());
200
201 if (dump) {
202 tprintf("Left side coords are:\n");
203 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
204 tprintf("(%d,%d) ", it.data()->x(), it.data()->y());
205 }
206 tprintf("\n");
207 tprintf("Right side coords are:\n");
208 it.set_to_list(&pdblk.rightside);
209 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
210 tprintf("(%d,%d) ", it.data()->x(), it.data()->y());
211 }
212 tprintf("\n");
213 }
214}
void tprintf(const char *format,...)
Definition: tprintf.cpp:41
void print() const
Definition: rect.h:289

◆ prop()

bool tesseract::BLOCK::prop ( ) const
inline

return proportional

Definition at line 71 of file ocrblock.h.

71 {
72 return proportional;
73 }

◆ re_rotation()

FCOORD tesseract::BLOCK::re_rotation ( ) const
inline

Definition at line 129 of file ocrblock.h.

129 {
130 return re_rotation_; // How to transform coords back to image.
131 }

◆ reflect_polygon_in_y_axis()

void tesseract::BLOCK::reflect_polygon_in_y_axis ( )

BLOCK::reflect_polygon_in_y_axis

Reflects the polygon in the y-axis and recompute the bounding_box. Does nothing to any contained rows/words/blobs etc.

Definition at line 104 of file ocrblock.cpp.

104 {
107}
TBOX * bounding_box()
Definition: polyblk.h:38

◆ reject_blobs()

C_BLOB_LIST * tesseract::BLOCK::reject_blobs ( )
inline

Definition at line 126 of file ocrblock.h.

126 {
127 return &rej_blobs;
128 }

◆ render_mask()

Image tesseract::BLOCK::render_mask ( TBOX mask_box)
inline

Definition at line 155 of file ocrblock.h.

155 {
156 return pdblk.render_mask(re_rotation_, mask_box);
157 }
Image render_mask(const FCOORD &rerotation, TBOX *mask_box)
Definition: pdblock.cpp:137

◆ restricted_bounding_box()

TBOX tesseract::BLOCK::restricted_bounding_box ( bool  upper_dots,
bool  lower_dots 
) const

Definition at line 88 of file ocrblock.cpp.

88 {
89 TBOX box;
90 // This is a read-only iteration of the rows in the block.
91 ROW_IT it(const_cast<ROW_LIST *>(&rows));
92 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
93 box += it.data()->restricted_bounding_box(upper_dots, lower_dots);
94 }
95 return box;
96}

◆ right_to_left()

bool tesseract::BLOCK::right_to_left ( ) const
inline

Definition at line 74 of file ocrblock.h.

74 {
75 return right_to_left_;
76 }

◆ rotate()

void tesseract::BLOCK::rotate ( const FCOORD rotation)

BLOCK::rotate

Rotate the polygon by the given rotation and recompute the bounding_box.

Definition at line 81 of file ocrblock.cpp.

81 {
82 pdblk.poly_block()->rotate(rotation);
84}
void rotate(FCOORD rotation)
Definition: polyblk.cpp:191

◆ row_list()

ROW_LIST * tesseract::BLOCK::row_list ( )
inline

get rows

Definition at line 111 of file ocrblock.h.

111 {
112 return &rows;
113 }

◆ set_cell_over_xheight()

void tesseract::BLOCK::set_cell_over_xheight ( float  ratio)
inline

Definition at line 107 of file ocrblock.h.

107 {
108 cell_over_xheight_ = ratio;
109 }

◆ set_classify_rotation()

void tesseract::BLOCK::set_classify_rotation ( const FCOORD rotation)
inline

Definition at line 138 of file ocrblock.h.

138 {
139 classify_rotation_ = rotation;
140 }

◆ set_font_class()

void tesseract::BLOCK::set_font_class ( int16_t  font)
inline

set font class

Definition at line 67 of file ocrblock.h.

67 {
68 font_class = font;
69 }
int16_t font() const
return font class
Definition: ocrblock.h:89

◆ set_median_size()

void tesseract::BLOCK::set_median_size ( int  x,
int  y 
)
inline

Definition at line 150 of file ocrblock.h.

150 {
151 median_size_.set_x(x);
152 median_size_.set_y(y);
153 }
const double y
void set_x(TDimension xin)
rewrite function
Definition: points.h:67
void set_y(TDimension yin)
rewrite function
Definition: points.h:71

◆ set_re_rotation()

void tesseract::BLOCK::set_re_rotation ( const FCOORD rotation)
inline

Definition at line 132 of file ocrblock.h.

132 {
133 re_rotation_ = rotation;
134 }

◆ set_right_to_left()

void tesseract::BLOCK::set_right_to_left ( bool  value)
inline

Definition at line 77 of file ocrblock.h.

77 {
78 right_to_left_ = value;
79 }
int value

◆ set_skew()

void tesseract::BLOCK::set_skew ( const FCOORD skew)
inline

Definition at line 144 of file ocrblock.h.

144 {
145 skew_ = skew;
146 }
FCOORD skew() const
Definition: ocrblock.h:141

◆ set_stats()

void tesseract::BLOCK::set_stats ( bool  prop,
int16_t  kern,
int16_t  space,
int16_t  ch_pitch 
)
inline

set space size etc.

Parameters
propproportional
kerninter char size
spaceinter word size
ch_pitchpitch if fixed

Definition at line 56 of file ocrblock.h.

56 {
57 proportional = prop;
58 kerning = static_cast<int8_t>(kern);
59 spacing = space;
60 pitch = ch_pitch;
61 }

◆ set_xheight()

void tesseract::BLOCK::set_xheight ( int32_t  height)
inline

set char size

Definition at line 63 of file ocrblock.h.

63 {
64 xheight = height;
65 }

◆ skew()

FCOORD tesseract::BLOCK::skew ( ) const
inline

Definition at line 141 of file ocrblock.h.

141 {
142 return skew_; // Direction of true horizontal.
143 }

◆ sort_rows()

void tesseract::BLOCK::sort_rows ( )

decreasing y order

BLOCK::sort_rows

Order rows so that they are in order of decreasing Y coordinate

Definition at line 115 of file ocrblock.cpp.

115 { // order on "top"
116 ROW_IT row_it(&rows);
117
118 row_it.sort(decreasing_top_order);
119}

◆ space()

int16_t tesseract::BLOCK::space ( ) const
inline

return spacing

Definition at line 93 of file ocrblock.h.

93 {
94 return spacing;
95 }

◆ x_height()

int32_t tesseract::BLOCK::x_height ( ) const
inline

return xheight

Definition at line 101 of file ocrblock.h.

101 {
102 return xheight;
103 }

Friends And Related Function Documentation

◆ BLOCK_RECT_IT

friend class BLOCK_RECT_IT
friend

Definition at line 35 of file ocrblock.h.

Member Data Documentation

◆ pdblk

PDBLK tesseract::BLOCK::pdblk

Page Description Block.

Definition at line 185 of file ocrblock.h.


The documentation for this class was generated from the following files: