tesseract v5.3.3.20231005
polyaprx.cpp
Go to the documentation of this file.
1/**********************************************************************
2 * File: polyaprx.cpp
3 * Description: Code for polygonal approximation from old edgeprog.
4 * Author: Ray Smith
5 *
6 * (C) Copyright 1993, Hewlett-Packard Ltd.
7 ** Licensed under the Apache License, Version 2.0 (the "License");
8 ** you may not use this file except in compliance with the License.
9 ** You may obtain a copy of the License at
10 ** http://www.apache.org/licenses/LICENSE-2.0
11 ** Unless required by applicable law or agreed to in writing, software
12 ** distributed under the License is distributed on an "AS IS" BASIS,
13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 ** See the License for the specific language governing permissions and
15 ** limitations under the License.
16 *
17 **********************************************************************/
18
19#include "polyaprx.h"
20
21#include "blobs.h" // for EDGEPT, TPOINT, VECTOR, TESSLINE
22#include "coutln.h" // for C_OUTLINE
23#include "errcode.h" // for ASSERT_HOST
24#include "mod128.h" // for DIR128
25#include "params.h" // for BoolParam, BOOL_VAR
26#include "points.h" // for ICOORD
27#include "rect.h" // for TBOX
28#include "tprintf.h" // for tprintf
29
30#include <cstdint> // for INT16_MAX, int8_t
31
32namespace tesseract {
33
34#define FASTEDGELENGTH 256
35
36static BOOL_VAR(poly_debug, false, "Debug old poly");
37static BOOL_VAR(poly_wide_objects_better, true,
38 "More accurate approx on wide things");
39
40#define fixed_dist 20 // really an int_variable
41#define approx_dist 15 // really an int_variable
42
43const int par1 = 4500 / (approx_dist * approx_dist);
44const int par2 = 6750 / (approx_dist * approx_dist);
45
46/**********************************************************************
47 *cutline(first,last,area) straightens out a line by partitioning
48 *and joining the ends by a straight line*
49 **********************************************************************/
50
51static void cutline( // recursive refine
52 EDGEPT *first, // ends of line
53 EDGEPT *last, int area // area of object
54) {
55 EDGEPT *edge; // current edge
56 TPOINT vecsum; // vector sum
57 int vlen; // approx length of vecsum
58 TPOINT vec; // accumulated vector
59 EDGEPT *maxpoint; // worst point
60 int maxperp; // max deviation
61 int perp; // perp distance
62 int ptcount; // no of points
63 int squaresum; // sum of perps
64
65 edge = first; // start of line
66 if (edge->next == last) {
67 return; // simple line
68 }
69
70 // vector sum
71 vecsum.x = last->pos.x - edge->pos.x;
72 vecsum.y = last->pos.y - edge->pos.y;
73 if (vecsum.x == 0 && vecsum.y == 0) {
74 // special case
75 vecsum.x = -edge->prev->vec.x;
76 vecsum.y = -edge->prev->vec.y;
77 }
78 // absolute value
79 vlen = vecsum.x > 0 ? vecsum.x : -vecsum.x;
80 if (vecsum.y > vlen) {
81 vlen = vecsum.y; // maximum
82 } else if (-vecsum.y > vlen) {
83 vlen = -vecsum.y; // absolute value
84 }
85
86 vec.x = edge->vec.x; // accumulated vector
87 vec.y = edge->vec.y;
88 maxperp = 0; // none yet
89 squaresum = ptcount = 0;
90 edge = edge->next; // move to actual point
91 maxpoint = edge; // in case there isn't one
92 do {
93 perp = vec.cross(vecsum); // get perp distance
94 if (perp != 0) {
95 perp *= perp; // squared deviation
96 }
97 squaresum += perp; // sum squares
98 ptcount++; // count points
99 if (poly_debug) {
100 tprintf("Cutline:Final perp=%d\n", perp);
101 }
102 if (perp > maxperp) {
103 maxperp = perp;
104 maxpoint = edge; // find greatest deviation
105 }
106 vec.x += edge->vec.x; // accumulate vectors
107 vec.y += edge->vec.y;
108 edge = edge->next;
109 } while (edge != last); // test all line
110
111 perp = vecsum.length();
112 ASSERT_HOST(perp != 0);
113
114 if (maxperp < 256 * INT16_MAX) {
115 maxperp <<= 8;
116 maxperp /= perp; // true max perp
117 } else {
118 maxperp /= perp;
119 maxperp <<= 8; // avoid overflow
120 }
121 if (squaresum < 256 * INT16_MAX) {
122 // mean squared perp
123 perp = (squaresum << 8) / (perp * ptcount);
124 } else {
125 // avoid overflow
126 perp = (squaresum / perp << 8) / ptcount;
127 }
128
129 if (poly_debug) {
130 tprintf("Cutline:A=%d, max=%.2f(%.2f%%), msd=%.2f(%.2f%%)\n", area,
131 maxperp / 256.0, maxperp * 200.0 / area, perp / 256.0,
132 perp * 300.0 / area);
133 }
134 if (maxperp * par1 >= 10 * area || perp * par2 >= 10 * area || vlen >= 126) {
135 maxpoint->fixed = true;
136 // partitions
137 cutline(first, maxpoint, area);
138 cutline(maxpoint, last, area);
139 }
140}
141
142/**********************************************************************
143 * edgesteps_to_edgepts
144 *
145 * Convert a C_OUTLINE to EDGEPTs.
146 **********************************************************************/
147
148static EDGEPT *edgesteps_to_edgepts( // convert outline
149 C_OUTLINE *c_outline, // input
150 EDGEPT edgepts[] // output is array
151) {
152 int32_t length; // steps in path
153 ICOORD pos; // current coords
154 int32_t stepindex; // current step
155 int32_t stepinc; // increment
156 int32_t epindex; // current EDGEPT
157 ICOORD vec; // for this 8 step
158 ICOORD prev_vec;
159 int8_t epdir; // of this step
160 DIR128 prevdir; // previous dir
161 DIR128 dir; // of this step
162
163 pos = c_outline->start_pos(); // start of loop
164 length = c_outline->pathlength();
165 stepindex = 0;
166 epindex = 0;
167 prevdir = -1;
168 // repeated steps
169 uint32_t count = 0;
170 int prev_stepindex = 0;
171 do {
172 dir = c_outline->step_dir(stepindex);
173 vec = c_outline->step(stepindex);
174 if (stepindex < length - 1 &&
175 c_outline->step_dir(stepindex + 1) - dir == -32) {
176 dir += 128 - 16;
177 vec += c_outline->step(stepindex + 1);
178 stepinc = 2;
179 } else {
180 stepinc = 1;
181 }
182 if (count == 0) {
183 prevdir = dir;
184 prev_vec = vec;
185 }
186 if (prevdir.get_dir() != dir.get_dir()) {
187 edgepts[epindex].pos.x = pos.x();
188 edgepts[epindex].pos.y = pos.y();
189 prev_vec *= count;
190 edgepts[epindex].vec.x = prev_vec.x();
191 edgepts[epindex].vec.y = prev_vec.y();
192 pos += prev_vec;
193 edgepts[epindex].runlength = count;
194 edgepts[epindex].prev = &edgepts[epindex - 1];
195 // TODO: reset is_hidden, too?
196 edgepts[epindex].fixed = false;
197 edgepts[epindex].next = &edgepts[epindex + 1];
198 prevdir += 64;
199 epdir = DIR128(0) - prevdir;
200 epdir >>= 4;
201 epdir &= 7;
202 edgepts[epindex].dir = epdir;
203 edgepts[epindex].src_outline = c_outline;
204 edgepts[epindex].start_step = prev_stepindex;
205 edgepts[epindex].step_count = stepindex - prev_stepindex;
206 epindex++;
207 prevdir = dir;
208 prev_vec = vec;
209 count = 1;
210 prev_stepindex = stepindex;
211 } else {
212 count++;
213 }
214 stepindex += stepinc;
215 } while (stepindex < length);
216 edgepts[epindex].pos.x = pos.x();
217 edgepts[epindex].pos.y = pos.y();
218 prev_vec *= count;
219 edgepts[epindex].vec.x = prev_vec.x();
220 edgepts[epindex].vec.y = prev_vec.y();
221 pos += prev_vec;
222 edgepts[epindex].runlength = count;
223 // TODO: reset is_hidden, too?
224 edgepts[epindex].fixed = false;
225 edgepts[epindex].src_outline = c_outline;
226 edgepts[epindex].start_step = prev_stepindex;
227 edgepts[epindex].step_count = stepindex - prev_stepindex;
228 edgepts[epindex].prev = &edgepts[epindex - 1];
229 edgepts[epindex].next = &edgepts[0];
230 prevdir += 64;
231 epdir = DIR128(0) - prevdir;
232 epdir >>= 4;
233 epdir &= 7;
234 edgepts[epindex].dir = epdir;
235 edgepts[0].prev = &edgepts[epindex];
236 ASSERT_HOST(pos.x() == c_outline->start_pos().x() &&
237 pos.y() == c_outline->start_pos().y());
238 return &edgepts[0];
239}
240
241/**********************************************************************
242 *fix2(start,area) fixes points on the outline according to a trial method*
243 **********************************************************************/
244
245static void fix2( // polygonal approx
246 EDGEPT *start, // loop to approximate
247 int area) {
248 EDGEPT *edgept; // current point
249 EDGEPT *edgept1;
250 EDGEPT *loopstart; // modified start of loop
251 EDGEPT *linestart; // start of line segment
252 int fixed_count; // no of fixed points
253 int8_t dir;
254 int d01, d12, d23, gapmin;
255 TPOINT d01vec, d12vec, d23vec;
256 EDGEPT *edgefix, *startfix;
257 EDGEPT *edgefix0, *edgefix1, *edgefix2, *edgefix3;
258
259 edgept = start; // start of loop
260 while (((edgept->dir - edgept->prev->dir + 1) & 7) < 3 &&
261 (dir = (edgept->prev->dir - edgept->next->dir) & 7) != 2 && dir != 6) {
262 edgept = edgept->next; // find suitable start
263 }
264 loopstart = edgept; // remember start
265
266 // completed flag
267 bool stopped = false;
268 edgept->fixed = true; // fix it
269 do {
270 linestart = edgept; // possible start of line
271 auto dir1 = edgept->dir; // first direction
272 // length of dir1
273 auto sum1 = edgept->runlength;
274 edgept = edgept->next;
275 auto dir2 = edgept->dir; // 2nd direction
276 // length in dir2
277 auto sum2 = edgept->runlength;
278 if (((dir1 - dir2 + 1) & 7) < 3) {
279 while (edgept->prev->dir == edgept->next->dir) {
280 edgept = edgept->next; // look at next
281 if (edgept->dir == dir1) {
282 // sum lengths
283 sum1 += edgept->runlength;
284 } else {
285 sum2 += edgept->runlength;
286 }
287 }
288
289 if (edgept == loopstart) {
290 // finished
291 stopped = true;
292 }
293 if (sum2 + sum1 > 2 && linestart->prev->dir == dir2 &&
294 (linestart->prev->runlength > linestart->runlength || sum2 > sum1)) {
295 // start is back one
296 linestart = linestart->prev;
297 linestart->fixed = true;
298 }
299
300 if (((edgept->next->dir - edgept->dir + 1) & 7) >= 3 ||
301 (edgept->dir == dir1 && sum1 >= sum2) ||
302 ((edgept->prev->runlength < edgept->runlength ||
303 (edgept->dir == dir2 && sum2 >= sum1)) &&
304 linestart->next != edgept)) {
305 edgept = edgept->next;
306 }
307 }
308 // sharp bend
309 edgept->fixed = true;
310 }
311 // do whole loop
312 while (edgept != loopstart && !stopped);
313
314 edgept = start;
315 do {
316 if (((edgept->runlength >= 8) && (edgept->dir != 2) &&
317 (edgept->dir != 6)) ||
318 ((edgept->runlength >= 8) &&
319 ((edgept->dir == 2) || (edgept->dir == 6)))) {
320 edgept->fixed = true;
321 edgept1 = edgept->next;
322 edgept1->fixed = true;
323 }
324 edgept = edgept->next;
325 } while (edgept != start);
326
327 edgept = start;
328 do {
329 // single fixed step
330 if (edgept->fixed &&
331 edgept->runlength == 1
332 // and neighbours free
333 && edgept->next->fixed &&
334 !edgept->prev->fixed
335 // same pair of dirs
336 && !edgept->next->next->fixed &&
337 edgept->prev->dir == edgept->next->dir &&
338 edgept->prev->prev->dir == edgept->next->next->dir &&
339 ((edgept->prev->dir - edgept->dir + 1) & 7) < 3) {
340 // unfix it
341 edgept->fixed = false;
342 edgept->next->fixed = false;
343 }
344 edgept = edgept->next; // do all points
345 } while (edgept != start); // until finished
346
347 stopped = false;
348 if (area < 450) {
349 area = 450;
350 }
351
352 gapmin = area * fixed_dist * fixed_dist / 44000;
353
354 edgept = start;
355 fixed_count = 0;
356 do {
357 if (edgept->fixed) {
358 fixed_count++;
359 }
360 edgept = edgept->next;
361 } while (edgept != start);
362 while (!edgept->fixed) {
363 edgept = edgept->next;
364 }
365 edgefix0 = edgept;
366
367 edgept = edgept->next;
368 while (!edgept->fixed) {
369 edgept = edgept->next;
370 }
371 edgefix1 = edgept;
372
373 edgept = edgept->next;
374 while (!edgept->fixed) {
375 edgept = edgept->next;
376 }
377 edgefix2 = edgept;
378
379 edgept = edgept->next;
380 while (!edgept->fixed) {
381 edgept = edgept->next;
382 }
383 edgefix3 = edgept;
384
385 startfix = edgefix2;
386
387 do {
388 if (fixed_count <= 3) {
389 break; // already too few
390 }
391 d12vec.diff(edgefix1->pos, edgefix2->pos);
392 d12 = d12vec.length();
393 // TODO(rays) investigate this change:
394 // Only unfix a point if it is part of a low-curvature section
395 // of outline and the total angle change of the outlines is
396 // less than 90 degrees, ie the scalar product is positive.
397 // if (d12 <= gapmin && edgefix0->vec.dot(edgefix2->vec) > 0) {
398 if (d12 <= gapmin) {
399 d01vec.diff(edgefix0->pos, edgefix1->pos);
400 d01 = d01vec.length();
401 d23vec.diff(edgefix2->pos, edgefix3->pos);
402 d23 = d23vec.length();
403 if (d01 > d23) {
404 edgefix2->fixed = false;
405 fixed_count--;
406 } else {
407 edgefix1->fixed = false;
408 fixed_count--;
409 edgefix1 = edgefix2;
410 }
411 } else {
412 edgefix0 = edgefix1;
413 edgefix1 = edgefix2;
414 }
415 edgefix2 = edgefix3;
416 edgept = edgept->next;
417 while (!edgept->fixed) {
418 if (edgept == startfix) {
419 stopped = true;
420 }
421 edgept = edgept->next;
422 }
423 edgefix3 = edgept;
424 edgefix = edgefix2;
425 } while ((edgefix != startfix) && (!stopped));
426}
427
428/**********************************************************************
429 *poly2(startpt,area,path) applies a second approximation to the outline
430 *using the points which have been fixed by the first approximation*
431 **********************************************************************/
432
433static EDGEPT *poly2( // second poly
434 EDGEPT *startpt, // start of loop
435 int area // area of blob box
436) {
437 EDGEPT *edgept; // current outline point
438 EDGEPT *loopstart; // starting point
439 EDGEPT *linestart; // start of line
440 int edgesum; // correction count
441
442 if (area < 1200) {
443 area = 1200; // minimum value
444 }
445
446 loopstart = nullptr; // not found it yet
447 edgept = startpt; // start of loop
448
449 do {
450 // current point fixed and next not
451 if (edgept->fixed && !edgept->next->fixed) {
452 loopstart = edgept; // start of repoly
453 break;
454 }
455 edgept = edgept->next; // next point
456 } while (edgept != startpt); // until found or finished
457
458 if (loopstart == nullptr && !startpt->fixed) {
459 // fixed start of loop
460 startpt->fixed = true;
461 loopstart = startpt; // or start of loop
462 }
463 if (loopstart) {
464 do {
465 edgept = loopstart; // first to do
466 do {
467 linestart = edgept;
468 edgesum = 0; // sum of lengths
469 do {
470 // sum lengths
471 edgesum += edgept->runlength;
472 edgept = edgept->next; // move on
473 } while (!edgept->fixed && edgept != loopstart && edgesum < 126);
474 if (poly_debug) {
475 tprintf("Poly2:starting at (%d,%d)+%d=(%d,%d),%d to (%d,%d)\n",
476 linestart->pos.x, linestart->pos.y, linestart->dir,
477 linestart->vec.x, linestart->vec.y, edgesum, edgept->pos.x,
478 edgept->pos.y);
479 }
480 // reapproximate
481 cutline(linestart, edgept, area);
482
483 while (edgept->next->fixed && edgept != loopstart) {
484 edgept = edgept->next; // look for next non-fixed
485 }
486 }
487 // do all the loop
488 while (edgept != loopstart);
489 edgesum = 0;
490 do {
491 if (edgept->fixed) {
492 edgesum++;
493 }
494 edgept = edgept->next;
495 }
496 // count fixed pts
497 while (edgept != loopstart);
498 if (edgesum < 3) {
499 area /= 2; // must have 3 pts
500 }
501 } while (edgesum < 3);
502 do {
503 linestart = edgept;
504 do {
505 edgept = edgept->next;
506 } while (!edgept->fixed);
507 linestart->next = edgept;
508 edgept->prev = linestart;
509 linestart->vec.x = edgept->pos.x - linestart->pos.x;
510 linestart->vec.y = edgept->pos.y - linestart->pos.y;
511 } while (edgept != loopstart);
512 } else {
513 edgept = startpt; // start of loop
514 }
515
516 loopstart = edgept; // new start
517 return loopstart; // correct exit
518}
519
520/**********************************************************************
521 * tesspoly_outline
522 *
523 * Approximate an outline from chain codes form using the old tess algorithm.
524 * If allow_detailed_fx is true, the EDGEPTs in the returned TBLOB
525 * contain pointers to the input C_OUTLINEs that enable higher-resolution
526 * feature extraction that does not use the polygonal approximation.
527 **********************************************************************/
528
529TESSLINE *ApproximateOutline(bool allow_detailed_fx, C_OUTLINE *c_outline) {
530 EDGEPT stack_edgepts[FASTEDGELENGTH]; // converted path
531 EDGEPT *edgepts = stack_edgepts;
532
533 // Use heap memory if the stack buffer is not big enough.
534 if (c_outline->pathlength() > FASTEDGELENGTH) {
535 edgepts = new EDGEPT[c_outline->pathlength()];
536 }
537
538 // bounding box
539 const auto &loop_box = c_outline->bounding_box();
540 int32_t area = loop_box.height();
541 if (!poly_wide_objects_better && loop_box.width() > area) {
542 area = loop_box.width();
543 }
544 area *= area;
545 edgesteps_to_edgepts(c_outline, edgepts);
546 fix2(edgepts, area);
547 EDGEPT *edgept = poly2(edgepts, area); // 2nd approximation.
548 EDGEPT *startpt = edgept;
549 EDGEPT *result = nullptr;
550 EDGEPT *prev_result = nullptr;
551 do {
552 auto *new_pt = new EDGEPT;
553 new_pt->pos = edgept->pos;
554 new_pt->prev = prev_result;
555 if (prev_result == nullptr) {
556 result = new_pt;
557 } else {
558 prev_result->next = new_pt;
559 new_pt->prev = prev_result;
560 }
561 if (allow_detailed_fx) {
562 new_pt->src_outline = edgept->src_outline;
563 new_pt->start_step = edgept->start_step;
564 new_pt->step_count = edgept->step_count;
565 }
566 prev_result = new_pt;
567 edgept = edgept->next;
568 } while (edgept != startpt);
569 prev_result->next = result;
570 result->prev = prev_result;
571 if (edgepts != stack_edgepts) {
572 delete[] edgepts;
573 }
574 return TESSLINE::BuildFromOutlineList(result);
575}
576
577} // namespace tesseract
#define BOOL_VAR(name, val, comment)
Definition: params.h:360
#define ASSERT_HOST(x)
Definition: errcode.h:54
#define fixed_dist
Definition: polyaprx.cpp:40
#define FASTEDGELENGTH
Definition: polyaprx.cpp:34
#define approx_dist
Definition: polyaprx.cpp:41
@ TPOINT
int * count
LIST last(LIST var_list)
Definition: oldlist.cpp:153
const int par2
Definition: polyaprx.cpp:44
TESSLINE * ApproximateOutline(bool allow_detailed_fx, C_OUTLINE *c_outline)
Definition: polyaprx.cpp:529
void tprintf(const char *format,...)
Definition: tprintf.cpp:41
const int par1
Definition: polyaprx.cpp:43
TDimension x
Definition: blobs.h:89
int cross(const TPOINT &other) const
Definition: blobs.h:75
int length() const
Definition: blobs.h:85
TDimension y
Definition: blobs.h:90
EDGEPT * next
Definition: blobs.h:200
EDGEPT * prev
Definition: blobs.h:201
TPOINT pos
Definition: blobs.h:194
VECTOR vec
Definition: blobs.h:195
C_OUTLINE * src_outline
Definition: blobs.h:202
static TESSLINE * BuildFromOutlineList(EDGEPT *outline)
Definition: blobs.cpp:91
int32_t pathlength() const
Definition: coutln.h:134
const TBOX & bounding_box() const
Definition: coutln.h:113
TDimension height() const
Definition: rect.h:118