tesseract v5.3.3.20231005
findseam.cpp
Go to the documentation of this file.
1/******************************************************************************
2 *
3 * File: findseam.cpp (Formerly findseam.c)
4 * Author: Mark Seaman, OCR Technology
5 *
6 * (c) Copyright 1987, Hewlett-Packard Company.
7 ** Licensed under the Apache License, Version 2.0 (the "License");
8 ** you may not use this file except in compliance with the License.
9 ** You may obtain a copy of the License at
10 ** http://www.apache.org/licenses/LICENSE-2.0
11 ** Unless required by applicable law or agreed to in writing, software
12 ** distributed under the License is distributed on an "AS IS" BASIS,
13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 ** See the License for the specific language governing permissions and
15 ** limitations under the License.
16 *
17 *****************************************************************************/
18/*----------------------------------------------------------------------
19 I n c l u d e s
20----------------------------------------------------------------------*/
21#include "findseam.h"
22#include "outlines.h"
23#include "plotedges.h"
24#include "seam.h"
25#include "wordrec.h"
26
27// Include automatically generated configuration file if running autoconf.
28#ifdef HAVE_CONFIG_H
29# include "config_auto.h"
30#endif
31
32/**********************************************************************
33 * partial_split_priority
34 *
35 * Assign a priority to this split based on the features that it has.
36 * Grade it according to the different rating schemes and return the
37 * value of its goodness.
38 **********************************************************************/
39
40#define partial_split_priority(split) (grade_split_length(split) + grade_sharpness(split))
41
42/*----------------------------------------------------------------------
43 T y p e s
44----------------------------------------------------------------------*/
45#define SPLIT_CLOSENESS 20 /* Difference in x value */
46 /* How many to keep */
47#define MAX_NUM_SEAMS 150
48/* How many to keep */
49#define NO_FULL_PRIORITY (-1) // Special marker for pri.
50 /* Evaluate right away */
51#define BAD_PRIORITY 9999.0
52
53/*----------------------------------------------------------------------
54 F u n c t i o n s
55----------------------------------------------------------------------*/
56namespace tesseract {
57
58/**********************************************************************
59 * add_seam_to_queue
60 *
61 * Adds the given new_seam to the seams priority queue, unless it is full
62 * and the new seam is worse than the worst.
63 **********************************************************************/
64void Wordrec::add_seam_to_queue(float new_priority, SEAM *new_seam, SeamQueue *seams) {
65 if (new_seam == nullptr) {
66 return;
67 }
68 if (chop_debug) {
69 tprintf("Pushing new seam with priority %g :", new_priority);
70 new_seam->Print("seam: ");
71 }
72 if (seams->size() >= MAX_NUM_SEAMS) {
73 SeamPair old_pair(0, nullptr);
74 if (seams->PopWorst(&old_pair) && old_pair.key() <= new_priority) {
75 if (chop_debug) {
76 tprintf("Old seam staying with priority %g\n", old_pair.key());
77 }
78 delete new_seam;
79 seams->Push(&old_pair);
80 return;
81 } else if (chop_debug) {
82 tprintf("New seam with priority %g beats old worst seam with %g\n", new_priority,
83 old_pair.key());
84 }
85 }
86 SeamPair new_pair(new_priority, new_seam);
87 seams->Push(&new_pair);
88}
89
90/**********************************************************************
91 * choose_best_seam
92 *
93 * Choose the best seam that can be created by assembling this a
94 * collection of splits. A queue of all the possible seams is
95 * maintained. Each new split received is placed in that queue with
96 * its partial priority value. These values in the seam queue are
97 * evaluated and combined until a good enough seam is found. If no
98 * further good seams are being found then this function returns to the
99 * caller, who will send more splits. If this function is called with
100 * a split of nullptr, then no further splits can be supplied by the
101 * caller.
102 **********************************************************************/
103void Wordrec::choose_best_seam(SeamQueue *seam_queue, const SPLIT *split, PRIORITY priority,
104 SEAM **seam_result, TBLOB *blob, SeamPile *seam_pile) {
105 SEAM *seam;
106 float my_priority;
107 /* Add seam of split */
108 my_priority = priority;
109 if (split != nullptr) {
110 TPOINT split_point = split->point1->pos;
111 split_point += split->point2->pos;
112 split_point /= 2;
113 seam = new SEAM(my_priority, split_point, *split);
114 if (chop_debug > 1) {
115 seam->Print("Partial priority ");
116 }
117 add_seam_to_queue(my_priority, seam, seam_queue);
118
119 if (my_priority > chop_good_split) {
120 return;
121 }
122 }
123
124 TBOX bbox = blob->bounding_box();
125 /* Queue loop */
126 while (!seam_queue->empty()) {
127 SeamPair seam_pair;
128 seam_queue->Pop(&seam_pair);
129 seam = seam_pair.extract_data();
130 /* Set full priority */
131 my_priority =
132 seam->FullPriority(bbox.left(), bbox.right(), chop_overlap_knob, chop_centered_maxwidth,
133 chop_center_knob, chop_width_change_knob);
134 if (chop_debug) {
135 char str[80];
136 snprintf(str, sizeof(str), "Full my_priority %0.0f, ", my_priority);
137 seam->Print(str);
138 }
139
140 if ((*seam_result == nullptr || (*seam_result)->priority() > my_priority) &&
141 my_priority < chop_ok_split) {
142 /* No crossing */
143 if (seam->IsHealthy(*blob, chop_min_outline_points, chop_min_outline_area)) {
144 delete *seam_result;
145 *seam_result = new SEAM(*seam);
146 (*seam_result)->set_priority(my_priority);
147 } else {
148 delete seam;
149 seam = nullptr;
150 my_priority = BAD_PRIORITY;
151 }
152 }
153
154 if (my_priority < chop_good_split) {
155 delete seam;
156 return; /* Made good answer */
157 }
158
159 if (seam) {
160 /* Combine with others */
161 if (seam_pile->size() < chop_seam_pile_size) {
162 combine_seam(*seam_pile, seam, seam_queue);
163 SeamDecPair pair(seam_pair.key(), seam);
164 seam_pile->Push(&pair);
165 } else if (chop_new_seam_pile && seam_pile->size() == chop_seam_pile_size &&
166 seam_pile->PeekTop().key() > seam_pair.key()) {
167 combine_seam(*seam_pile, seam, seam_queue);
168 SeamDecPair pair;
169 seam_pile->Pop(&pair); // pop the worst.
170 // Replace the seam in pair (deleting the old one) with
171 // the new seam and score, then push back into the heap.
172 pair.set_key(seam_pair.key());
173 pair.set_data(seam);
174 seam_pile->Push(&pair);
175 } else {
176 delete seam;
177 }
178 }
179
180 my_priority = seam_queue->empty() ? NO_FULL_PRIORITY : seam_queue->PeekTop().key();
181 if ((my_priority > chop_ok_split) || (my_priority > chop_good_split && split)) {
182 return;
183 }
184 }
185}
186
187/**********************************************************************
188 * combine_seam
189 *
190 * Find other seams to combine with this one. The new seams that result
191 * from this union should be added to the seam queue. The return value
192 * tells whether or not any additional seams were added to the queue.
193 **********************************************************************/
194void Wordrec::combine_seam(const SeamPile &seam_pile, const SEAM *seam, SeamQueue *seam_queue) {
195 for (int x = 0; x < seam_pile.size(); ++x) {
196 const SEAM *this_one = seam_pile.get(x).data();
197 if (seam->CombineableWith(*this_one, SPLIT_CLOSENESS, chop_ok_split)) {
198 SEAM *new_one = new SEAM(*seam);
199 new_one->CombineWith(*this_one);
200 if (chop_debug > 1) {
201 new_one->Print("Combo priority ");
202 }
203 add_seam_to_queue(new_one->priority(), new_one, seam_queue);
204 }
205 }
206}
207
208/**********************************************************************
209 * pick_good_seam
210 *
211 * Find and return a good seam that will split this blob into two pieces.
212 * Work from the outlines provided.
213 **********************************************************************/
215 SeamPile seam_pile(chop_seam_pile_size);
216 EDGEPT *points[MAX_NUM_POINTS];
217 EDGEPT_CLIST new_points;
218 SEAM *seam = nullptr;
219 TESSLINE *outline;
220 int16_t num_points = 0;
221
222#ifndef GRAPHICS_DISABLED
223 if (chop_debug > 2) {
224 wordrec_display_splits.set_value(true);
225 }
226
227 draw_blob_edges(blob);
228#endif
229
230 PointHeap point_heap(MAX_NUM_POINTS);
231 for (outline = blob->outlines; outline; outline = outline->next) {
232 prioritize_points(outline, &point_heap);
233 }
234
235 while (!point_heap.empty() && num_points < MAX_NUM_POINTS) {
236 points[num_points++] = point_heap.PeekTop().data();
237 point_heap.Pop(nullptr);
238 }
239
240 /* Initialize queue */
241 SeamQueue seam_queue(MAX_NUM_SEAMS);
242
243 try_point_pairs(points, num_points, &seam_queue, &seam_pile, &seam, blob);
244 try_vertical_splits(points, num_points, &new_points, &seam_queue, &seam_pile, &seam, blob);
245
246 if (seam == nullptr) {
247 choose_best_seam(&seam_queue, nullptr, BAD_PRIORITY, &seam, blob, &seam_pile);
248 } else if (seam->priority() > chop_good_split) {
249 choose_best_seam(&seam_queue, nullptr, seam->priority(), &seam, blob, &seam_pile);
250 }
251
252 EDGEPT_C_IT it(&new_points);
253 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
254 EDGEPT *inserted_point = it.data();
255 if (seam == nullptr || !seam->UsesPoint(inserted_point)) {
256 for (outline = blob->outlines; outline; outline = outline->next) {
257 if (outline->loop == inserted_point) {
258 outline->loop = outline->loop->next;
259 }
260 }
261 remove_edgept(inserted_point);
262 }
263 }
264
265 if (seam) {
266 if (seam->priority() > chop_ok_split) {
267 delete seam;
268 seam = nullptr;
269 }
270#ifndef GRAPHICS_DISABLED
271 else if (wordrec_display_splits) {
272 seam->Mark(edge_window);
273 if (chop_debug > 2) {
275 edge_window->Wait();
276 }
277 }
278#endif
279 }
280
281 if (chop_debug) {
282 wordrec_display_splits.set_value(false);
283 }
284
285 return (seam);
286}
287
288/**********************************************************************
289 * try_point_pairs
290 *
291 * Try all the splits that are produced by pairing critical points
292 * together. See if any of them are suitable for use. Use a seam
293 * queue and seam pile that have already been initialized and used.
294 **********************************************************************/
295void Wordrec::try_point_pairs(EDGEPT *points[MAX_NUM_POINTS], int16_t num_points,
296 SeamQueue *seam_queue, SeamPile *seam_pile, SEAM **seam,
297 TBLOB *blob) {
298 int16_t x;
299 int16_t y;
300 PRIORITY priority;
301
302 for (x = 0; x < num_points; x++) {
303 for (y = x + 1; y < num_points; y++) {
304 if (points[y] &&
305 points[x]->WeightedDistance(*points[y], chop_x_y_weight) < chop_split_length &&
306 points[x] != points[y]->next && points[y] != points[x]->next &&
307 !is_exterior_point(points[x], points[y]) && !is_exterior_point(points[y], points[x])) {
308 SPLIT split(points[x], points[y]);
309 priority = partial_split_priority(&split);
310
311 choose_best_seam(seam_queue, &split, priority, seam, blob, seam_pile);
312 }
313 }
314 }
315}
316
317/**********************************************************************
318 * try_vertical_splits
319 *
320 * Try all the splits that are produced by vertical projection to see
321 * if any of them are suitable for use. Use a seam queue and seam pile
322 * that have already been initialized and used.
323 * Return in new_points a collection of points that were inserted into
324 * the blob while examining vertical splits and which may safely be
325 * removed once a seam is chosen if they are not part of the seam.
326 **********************************************************************/
327void Wordrec::try_vertical_splits(EDGEPT *points[MAX_NUM_POINTS], int16_t num_points,
328 EDGEPT_CLIST *new_points, SeamQueue *seam_queue,
329 SeamPile *seam_pile, SEAM **seam, TBLOB *blob) {
330 EDGEPT *vertical_point = nullptr;
331 int16_t x;
332 PRIORITY priority;
333 TESSLINE *outline;
334
335 for (x = 0; x < num_points; x++) {
336 vertical_point = nullptr;
337 for (outline = blob->outlines; outline; outline = outline->next) {
338 vertical_projection_point(points[x], outline->loop, &vertical_point, new_points);
339 }
340
341 if (vertical_point && points[x] != vertical_point->next && vertical_point != points[x]->next &&
342 points[x]->WeightedDistance(*vertical_point, chop_x_y_weight) < chop_split_length) {
343 SPLIT split(points[x], vertical_point);
344 priority = partial_split_priority(&split);
345 choose_best_seam(seam_queue, &split, priority, seam, blob, seam_pile);
346 }
347 }
348}
349
350} // namespace tesseract
#define MAX_NUM_POINTS
Definition: chop.h:28
#define SPLIT_CLOSENESS
Definition: findseam.cpp:45
#define NO_FULL_PRIORITY
Definition: findseam.cpp:49
#define BAD_PRIORITY
Definition: findseam.cpp:51
#define partial_split_priority(split)
Definition: findseam.cpp:40
#define MAX_NUM_SEAMS
Definition: findseam.cpp:47
#define is_exterior_point(edge, point)
Definition: outlines.h:83
const double y
void remove_edgept(EDGEPT *point)
Definition: split.cpp:199
void tprintf(const char *format,...)
Definition: tprintf.cpp:41
ScrollView * edge_window
Definition: plotedges.cpp:37
float PRIORITY
Definition: seam.h:31
bool wordrec_display_splits
Definition: split.cpp:41
const std::vector< std::string > split(const std::string &s, char c)
Definition: helpers.h:43
void draw_blob_edges(TBLOB *blob)
Definition: plotedges.cpp:67
def next(obj)
Definition: ast.py:56
int WeightedDistance(const EDGEPT &other, int x_factor) const
Definition: blobs.h:118
EDGEPT * next
Definition: blobs.h:200
EDGEPT * loop
Definition: blobs.h:287
TESSLINE * next
Definition: blobs.h:288
TBOX bounding_box() const
Definition: blobs.cpp:466
TESSLINE * outlines
Definition: blobs.h:404
TDimension left() const
Definition: rect.h:82
TDimension right() const
Definition: rect.h:89
void CombineWith(const SEAM &other)
Definition: seam.h:69
bool UsesPoint(const EDGEPT *point) const
Definition: seam.h:91
float FullPriority(int xmin, int xmax, double overlap_knob, int centered_maxwidth, double center_knob, double width_change_knob) const
Definition: seam.cpp:238
bool CombineableWith(const SEAM &other, int max_x_dist, float max_total_priority) const
Definition: seam.h:60
float priority() const
Definition: seam.h:46
void Mark(ScrollView *window) const
Definition: seam.cpp:171
bool IsHealthy(const TBLOB &blob, int min_points, int min_area) const
Definition: seam.cpp:44
void Print(const char *label) const
Definition: seam.cpp:144
bool empty() const
Definition: genericheap.h:68
bool PopWorst(Pair *entry)
Definition: genericheap.h:144
const Pair & PeekTop() const
Definition: genericheap.h:108
bool Pop(Pair *entry)
Definition: genericheap.h:120
const Pair & get(int index) const
Definition: genericheap.h:87
void Push(Pair *entry)
Definition: genericheap.h:95
void set_data(Data *new_data)
Definition: kdpair.h:138
const Key & key() const
Definition: kdpair.h:128
void set_key(const Key &new_key)
Definition: kdpair.h:131
Data * extract_data()
Definition: kdpair.h:143
static void Update()
Definition: scrollview.cpp:700
void add_seam_to_queue(float new_priority, SEAM *new_seam, SeamQueue *seams)
Definition: findseam.cpp:64
void prioritize_points(TESSLINE *outline, PointHeap *points)
Definition: chop.cpp:170
void try_point_pairs(EDGEPT *points[MAX_NUM_POINTS], int16_t num_points, SeamQueue *seam_queue, SeamPile *seam_pile, SEAM **seam, TBLOB *blob)
Definition: findseam.cpp:295
void try_vertical_splits(EDGEPT *points[MAX_NUM_POINTS], int16_t num_points, EDGEPT_CLIST *new_points, SeamQueue *seam_queue, SeamPile *seam_pile, SEAM **seam, TBLOB *blob)
Definition: findseam.cpp:327
void combine_seam(const SeamPile &seam_pile, const SEAM *seam, SeamQueue *seam_queue)
Definition: findseam.cpp:194
void vertical_projection_point(EDGEPT *split_point, EDGEPT *target_point, EDGEPT **best_point, EDGEPT_CLIST *new_points)
Definition: chop.cpp:277
SEAM * pick_good_seam(TBLOB *blob)
Definition: findseam.cpp:214
void choose_best_seam(SeamQueue *seam_queue, const SPLIT *split, PRIORITY priority, SEAM **seam_result, TBLOB *blob, SeamPile *seam_pile)
Definition: findseam.cpp:103