tesseract v5.3.3.20231005
strokewidth.h
Go to the documentation of this file.
1
2// File: strokewidth.h
3// Description: Subclass of BBGrid to find uniformity of strokewidth.
4// Author: Ray Smith
5//
6// (C) Copyright 2008, Google Inc.
7// Licensed under the Apache License, Version 2.0 (the "License");
8// you may not use this file except in compliance with the License.
9// You may obtain a copy of the License at
10// http://www.apache.org/licenses/LICENSE-2.0
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS,
13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// See the License for the specific language governing permissions and
15// limitations under the License.
16//
18
19#ifndef TESSERACT_TEXTORD_STROKEWIDTH_H_
20#define TESSERACT_TEXTORD_STROKEWIDTH_H_
21
22#include "blobbox.h" // BlobNeighourDir.
23#include "blobgrid.h" // Base class.
24#include "colpartitiongrid.h"
25#include "textlineprojection.h"
26
27class DENORM;
28class ScrollView;
29class TO_BLOCK;
30
31namespace tesseract {
32
33class ColPartition_LIST;
34class TabFind;
35class TextlineProjection;
36
37// Misc enums to clarify bool arguments for direction-controlling args.
39
40// Return value from FindInitialPartitions indicates detection of severe
41// skew or noise.
43 PFR_OK, // Everything is OK.
44 PFR_SKEW, // Skew was detected and rotated.
45 PFR_NOISE // Noise was detected and removed.
46};
47
53class StrokeWidth : public BlobGrid {
54public:
55 StrokeWidth(int gridsize, const ICOORD &bleft, const ICOORD &tright);
56 ~StrokeWidth() override;
57
58 // Sets the neighbours member of the medium-sized blobs in the block.
59 // Searches on 4 sides of each blob for similar-sized, similar-strokewidth
60 // blobs and sets pointers to the good neighbours.
62
63 // Sets the neighbour/textline writing direction members of the medium
64 // and large blobs with optional repair of broken CJK characters first.
65 // Repair of broken CJK is needed here because broken CJK characters
66 // can fool the textline direction detection algorithm.
67 void FindTextlineDirectionAndFixBrokenCJK(PageSegMode pageseg_mode, bool cjk_merge,
68 TO_BLOCK *input_block);
69
70 // To save computation, the process of generating partitions is broken
71 // into the following 4 steps:
72 // TestVerticalTextDirection
73 // CorrectForRotation (used only if a rotation is to be applied)
74 // FindLeaderPartitions
75 // GradeBlobsIntoPartitions.
76 // These functions are all required, in sequence, except for
77 // CorrectForRotation, which is not needed if no rotation is applied.
78
79 // Types all the blobs as vertical or horizontal text or unknown and
80 // returns true if the majority are vertical.
81 // If the blobs are rotated, it is necessary to call CorrectForRotation
82 // after rotating everything, otherwise the work done here will be enough.
83 // If osd_blobs is not null, a list of blobs from the dominant textline
84 // direction are returned for use in orientation and script detection.
85 // find_vertical_text_ratio should be textord_tabfind_vertical_text_ratio.
86 bool TestVerticalTextDirection(double find_vertical_text_ratio, TO_BLOCK *block,
87 BLOBNBOX_CLIST *osd_blobs);
88
89 // Corrects the data structures for the given rotation.
90 void CorrectForRotation(const FCOORD &rerotation, ColPartitionGrid *part_grid);
91
92 // Finds leader partitions and inserts them into the give grid.
93 void FindLeaderPartitions(TO_BLOCK *block, ColPartitionGrid *part_grid);
94
95 // Finds and marks noise those blobs that look like bits of vertical lines
96 // that would otherwise screw up layout analysis.
97 void RemoveLineResidue(ColPartition_LIST *big_part_list);
98
99 // Types all the blobs as vertical text or horizontal text or unknown and
100 // puts them into initial ColPartitions in the supplied part_grid.
101 // rerotation determines how to get back to the image coordinates from the
102 // blob coordinates (since they may have been rotated for vertical text).
103 // block is the single block for the whole page or rectangle to be OCRed.
104 // nontext_pix (full-size), is a binary mask used to prevent merges across
105 // photo/text boundaries. It is not kept beyond this function.
106 // denorm provides a mapping back to the image from the current blob
107 // coordinate space.
108 // projection provides a measure of textline density over the image and
109 // provides functions to assist with diacritic detection. It should be a
110 // pointer to a new TextlineProjection, and will be setup here.
111 // part_grid is the output grid of textline partitions.
112 // Large blobs that cause overlap are put in separate partitions and added
113 // to the big_parts list.
114 void GradeBlobsIntoPartitions(PageSegMode pageseg_mode, const FCOORD &rerotation, TO_BLOCK *block,
115 Image nontext_pix, const DENORM *denorm, bool cjk_script,
116 TextlineProjection *projection, BLOBNBOX_LIST *diacritic_blobs,
117 ColPartitionGrid *part_grid, ColPartition_LIST *big_parts);
118
119 // Handles a click event in a display window.
120 void HandleClick(int x, int y) override;
121
122private:
123 // Computes the noise_density_ by summing the number of elements in a
124 // neighbourhood of each grid cell.
125 void ComputeNoiseDensity(TO_BLOCK *block, TabFind *line_grid);
126
127 // Detects and marks leader dots/dashes.
128 // Leaders are horizontal chains of small or noise blobs that look
129 // monospace according to ColPartition::MarkAsLeaderIfMonospaced().
130 // Detected leaders become the only occupants of the block->small_blobs list.
131 // Non-leader small blobs get moved to the blobs list.
132 // Non-leader noise blobs remain singletons in the noise list.
133 // All small and noise blobs in high density regions are marked BTFT_NONTEXT.
134 // block is the single block for the whole page or rectangle to be OCRed.
135 // leader_parts is the output.
136 void FindLeadersAndMarkNoise(TO_BLOCK *block, ColPartition_LIST *leader_parts);
137
140 void InsertBlobs(TO_BLOCK *block);
141
142 // Fix broken CJK characters, using the fake joined blobs mechanism.
143 // Blobs are really merged, ie the master takes all the outlines and the
144 // others are deleted.
145 // Returns true if sufficient blobs are merged that it may be worth running
146 // again, due to a better estimate of character size.
147 bool FixBrokenCJK(TO_BLOCK *block);
148
149 // Collect blobs that overlap or are within max_dist of the input bbox.
150 // Return them in the list of blobs and expand the bbox to be the union
151 // of all the boxes. not_this is excluded from the search, as are blobs
152 // that cause the merged box to exceed max_size in either dimension.
153 void AccumulateOverlaps(const BLOBNBOX *not_this, bool debug, int max_size, int max_dist,
154 TBOX *bbox, BLOBNBOX_CLIST *blobs);
155
156 // For each blob in this grid, Finds the textline direction to be horizontal
157 // or vertical according to distance to neighbours and 1st and 2nd order
158 // neighbours. Non-text tends to end up without a definite direction.
159 // Result is setting of the neighbours and vert_possible/horz_possible
160 // flags in the BLOBNBOXes currently in this grid.
161 // This function is called more than once if page orientation is uncertain,
162 // so display_if_debugging is true on the final call to display the results.
163 void FindTextlineFlowDirection(PageSegMode pageseg_mode, bool display_if_debugging);
164
165 // Sets the neighbours and good_stroke_neighbours members of the blob by
166 // searching close on all 4 sides.
167 // When finding leader dots/dashes, there is a slightly different rule for
168 // what makes a good neighbour.
169 // If activate_line_trap, then line-like objects are found and isolated.
170 void SetNeighbours(bool leaders, bool activate_line_trap, BLOBNBOX *blob);
171
172 // Sets the good_stroke_neighbours member of the blob if it has a
173 // GoodNeighbour on the given side.
174 // Also sets the neighbour in the blob, whether or not a good one is found.
175 // Return value is the number of neighbours in the line trap size range.
176 // Leaders get extra special lenient treatment.
177 int FindGoodNeighbour(BlobNeighbourDir dir, bool leaders, BLOBNBOX *blob);
178
179 // Makes the blob to be only horizontal or vertical where evidence
180 // is clear based on gaps of 2nd order neighbours.
181 void SetNeighbourFlows(BLOBNBOX *blob);
182
183 // Nullify the neighbours in the wrong directions where the direction
184 // is clear-cut based on a distance margin. Good for isolating vertical
185 // text from neighbouring horizontal text.
186 void SimplifyObviousNeighbours(BLOBNBOX *blob);
187
188 // Smoothes the vertical/horizontal type of the blob based on the
189 // 2nd-order neighbours. If reset_all is true, then all blobs are
190 // changed. Otherwise, only ambiguous blobs are processed.
191 void SmoothNeighbourTypes(PageSegMode pageseg_mode, bool desperate, BLOBNBOX *blob);
192
193 // Checks the left or right side of the given leader partition and sets the
194 // (opposite) leader_on_right or leader_on_left flags for blobs
195 // that are next to the given side of the given leader partition.
196 void MarkLeaderNeighbours(const ColPartition *part, LeftOrRight side);
197
198 // Partition creation. Accumulates vertical and horizontal text chains,
199 // puts the remaining blobs in as unknowns, and then merges/splits to
200 // minimize overlap and smoothes the types with neighbours and the color
201 // image if provided. rerotation is used to rotate the coordinate space
202 // back to the nontext_map_ image.
203 // If find_problems is true, detects possible noise pollution by the amount
204 // of partition overlap that is created by the diacritics. If excessive, the
205 // noise is separated out into diacritic blobs, and PFR_NOISE is returned.
206 // [TODO(rays): if the partition overlap is caused by heavy skew, deskews
207 // the components, saves the skew_angle and returns PFR_SKEW.] If the return
208 // is not PFR_OK, the job is incomplete, and FindInitialPartitions must be
209 // called again after cleaning up the partly done work.
210 PartitionFindResult FindInitialPartitions(PageSegMode pageseg_mode, const FCOORD &rerotation,
211 bool find_problems, TO_BLOCK *block,
212 BLOBNBOX_LIST *diacritic_blobs,
213 ColPartitionGrid *part_grid,
214 ColPartition_LIST *big_parts, FCOORD *skew_angle);
215 // Detects noise by a significant increase in partition overlap from
216 // pre_overlap to now, and removes noise from the union of all the overlapping
217 // partitions, placing the blobs in diacritic_blobs. Returns true if any noise
218 // was found and removed.
219 bool DetectAndRemoveNoise(int pre_overlap, const TBOX &grid_box, TO_BLOCK *block,
220 ColPartitionGrid *part_grid, BLOBNBOX_LIST *diacritic_blobs);
221 // Finds vertical chains of text-like blobs and puts them in ColPartitions.
222 void FindVerticalTextChains(ColPartitionGrid *part_grid);
223 // Finds horizontal chains of text-like blobs and puts them in ColPartitions.
224 void FindHorizontalTextChains(ColPartitionGrid *part_grid);
225 // Finds diacritics and saves their base character in the blob.
226 void TestDiacritics(ColPartitionGrid *part_grid, TO_BLOCK *block);
227 // Searches this grid for an appropriately close and sized neighbour of the
228 // given [small] blob. If such a blob is found, the diacritic base is saved
229 // in the blob and true is returned.
230 // The small_grid is a secondary grid that contains the small/noise objects
231 // that are not in this grid, but may be useful for determining a connection
232 // between blob and its potential base character. (See DiacriticXGapFilled.)
233 bool DiacriticBlob(BlobGrid *small_grid, BLOBNBOX *blob);
234 // Returns true if there is no gap between the base char and the diacritic
235 // bigger than a fraction of the height of the base char:
236 // Eg: line end.....'
237 // The quote is a long way from the end of the line, yet it needs to be a
238 // diacritic. To determine that the quote is not part of an image, or
239 // a different text block, we check for other marks in the gap between
240 // the base char and the diacritic.
241 // '<--Diacritic
242 // |---------|
243 // | |<-toobig-gap->
244 // | Base |<ok gap>
245 // |---------| x<-----Dot occupying gap
246 // The grid is const really.
247 bool DiacriticXGapFilled(BlobGrid *grid, const TBOX &diacritic_box, const TBOX &base_box);
248 // Merges diacritics with the ColPartition of the base character blob.
249 void MergeDiacritics(TO_BLOCK *block, ColPartitionGrid *part_grid);
250 // Any blobs on the large_blobs list of block that are still unowned by a
251 // ColPartition, are probably drop-cap or vertically touching so the blobs
252 // are removed to the big_parts list and treated separately.
253 void RemoveLargeUnusedBlobs(TO_BLOCK *block, ColPartitionGrid *part_grid,
254 ColPartition_LIST *big_parts);
255
256 // All remaining unused blobs are put in individual ColPartitions.
257 void PartitionRemainingBlobs(PageSegMode pageseg_mode, ColPartitionGrid *part_grid);
258
259 // If combine, put all blobs in the cell_list into a single partition,
260 // otherwise put each one into its own partition.
261 void MakePartitionsFromCellList(PageSegMode pageseg_mode, bool combine,
262 ColPartitionGrid *part_grid, BLOBNBOX_CLIST *cell_list);
263
264 // Helper function to finish setting up a ColPartition and insert into
265 // part_grid.
266 void CompletePartition(PageSegMode pageseg_mode, ColPartition *part, ColPartitionGrid *part_grid);
267
268 // Helper returns true if we are looking only for vertical textlines,
269 // taking into account any rotation that has been done.
270 bool FindingVerticalOnly(PageSegMode pageseg_mode) const {
271 if (rerotation_.y() == 0.0f) {
272 return pageseg_mode == PSM_SINGLE_BLOCK_VERT_TEXT;
273 }
274 return !PSM_ORIENTATION_ENABLED(pageseg_mode) && pageseg_mode != PSM_SINGLE_BLOCK_VERT_TEXT;
275 }
276 // Helper returns true if we are looking only for horizontal textlines,
277 // taking into account any rotation that has been done.
278 bool FindingHorizontalOnly(PageSegMode pageseg_mode) const {
279 if (rerotation_.y() == 0.0f) {
280 return !PSM_ORIENTATION_ENABLED(pageseg_mode) && pageseg_mode != PSM_SINGLE_BLOCK_VERT_TEXT;
281 }
282 return pageseg_mode == PSM_SINGLE_BLOCK_VERT_TEXT;
283 }
284
285 // Merge partitions where the merge appears harmless.
286 void EasyMerges(ColPartitionGrid *part_grid);
287
288 // Compute a search box based on the orientation of the partition.
289 // Returns true if a suitable box can be calculated.
290 // Callback for EasyMerges.
291 bool OrientationSearchBox(ColPartition *part, TBOX *box);
292
293 // Merge confirmation callback for EasyMerges.
294 bool ConfirmEasyMerge(const ColPartition *p1, const ColPartition *p2);
295
296 // Returns true if there is no significant noise in between the boxes.
297 bool NoNoiseInBetween(const TBOX &box1, const TBOX &box2) const;
298
299#ifndef GRAPHICS_DISABLED
300 // Displays the blobs colored according to the number of good neighbours
301 // and the vertical/horizontal flow.
302 ScrollView *DisplayGoodBlobs(const char *window_name, int x, int y);
303
304 // Displays blobs colored according to whether or not they are diacritics.
305 ScrollView *DisplayDiacritics(const char *window_name, int x, int y, TO_BLOCK *block);
306#endif
307
308private:
309 // Image map of photo/noise areas on the page. Borrowed pointer (not owned.)
310 Image nontext_map_;
311 // Textline projection map. Borrowed pointer.
312 TextlineProjection *projection_;
313 // DENORM used by projection_ to get back to image coords. Borrowed pointer.
314 const DENORM *denorm_;
315 // Bounding box of the grid.
316 TBOX grid_box_;
317 // Rerotation to get back to the original image.
318 FCOORD rerotation_;
319#ifndef GRAPHICS_DISABLED
320 // Windows for debug display.
321 ScrollView *leaders_win_ = nullptr;
322 ScrollView *initial_widths_win_ = nullptr;
323 ScrollView *widths_win_ = nullptr;
324 ScrollView *chains_win_ = nullptr;
325 ScrollView *diacritics_win_ = nullptr;
326 ScrollView *textlines_win_ = nullptr;
327 ScrollView *smoothed_win_ = nullptr;
328#endif
329};
330
331} // namespace tesseract.
332
333#endif // TESSERACT_TEXTORD_STROKEWIDTH_H_
const double y
PartitionFindResult
Definition: strokewidth.h:42
@ PSM_SINGLE_BLOCK_VERT_TEXT
Definition: publictypes.h:164
bool PSM_ORIENTATION_ENABLED(int pageseg_mode)
Definition: publictypes.h:189
BlobNeighbourDir
Definition: blobbox.h:89
integer coordinate
Definition: points.h:36
float y() const
Definition: points.h:209
int gridsize() const
Definition: bbgrid.h:63
const ICOORD & bleft() const
Definition: bbgrid.h:72
const ICOORD & tright() const
Definition: bbgrid.h:75
void FindTextlineDirectionAndFixBrokenCJK(PageSegMode pageseg_mode, bool cjk_merge, TO_BLOCK *input_block)
void CorrectForRotation(const FCOORD &rerotation, ColPartitionGrid *part_grid)
void HandleClick(int x, int y) override
StrokeWidth(int gridsize, const ICOORD &bleft, const ICOORD &tright)
void RemoveLineResidue(ColPartition_LIST *big_part_list)
void GradeBlobsIntoPartitions(PageSegMode pageseg_mode, const FCOORD &rerotation, TO_BLOCK *block, Image nontext_pix, const DENORM *denorm, bool cjk_script, TextlineProjection *projection, BLOBNBOX_LIST *diacritic_blobs, ColPartitionGrid *part_grid, ColPartition_LIST *big_parts)
void SetNeighboursOnMediumBlobs(TO_BLOCK *block)
void FindLeaderPartitions(TO_BLOCK *block, ColPartitionGrid *part_grid)
bool TestVerticalTextDirection(double find_vertical_text_ratio, TO_BLOCK *block, BLOBNBOX_CLIST *osd_blobs)