21# include "config_auto.h"
38static BOOL_VAR(edges_use_new_outline_complexity,
false,
39 "Use the new outline complexity module");
40static INT_VAR(edges_max_children_per_outline, 10,
41 "Max number of children inside a character outline");
42static INT_VAR(edges_max_children_layers, 5,
43 "Max layers of nested children inside a character outline");
44static BOOL_VAR(edges_debug,
false,
"turn on debugging for this module");
46static INT_VAR(edges_children_per_grandchild, 10,
47 "Importance ratio for chucking outlines");
48static INT_VAR(edges_children_count_limit, 45,
"Max holes allowed in blob");
49static BOOL_VAR(edges_children_fix,
false,
50 "Remove boxy parents of char-like children");
51static INT_VAR(edges_min_nonhole, 12,
"Min pixels for potential char in box");
52static INT_VAR(edges_patharea_ratio, 40,
53 "Max lensq/area for acceptable child outline");
54static double_VAR(edges_childarea, 0.5,
"Min area fraction of child outline");
56 "Min area fraction of grandchild for box");
68 buckets(bxdim * bydim),
95 it = std::find_if(in_it, buckets.end(), [](
auto &&b) { return !b.empty(); });
96 if (it == buckets.end())
129 int32_t grandchild_count;
130 C_OUTLINE_IT child_it;
138 grandchild_count = 0;
139 if (++depth > edges_max_children_layers) {
140 return max_count + depth;
143 for (
auto yindex = ymin; yindex <= ymax; yindex++) {
144 for (
auto xindex = xmin; xindex <= xmax; xindex++) {
145 child_it.set_to_list(&buckets[yindex * bxdim + xindex]);
146 if (child_it.empty()) {
149 for (child_it.mark_cycle_pt(); !child_it.cycled_list();
150 child_it.forward()) {
151 child = child_it.data();
152 if (
child == outline || !(*
child < *outline)) {
157 if (child_count > edges_max_children_per_outline) {
160 "Discard outline on child_count=%d > "
161 "max_children_per_outline=%d\n",
163 static_cast<int32_t
>(edges_max_children_per_outline));
165 return max_count + child_count;
169 int32_t remaining_count = max_count - child_count - grandchild_count;
170 if (remaining_count > 0) {
171 grandchild_count += edges_children_per_grandchild *
174 if (child_count + grandchild_count > max_count) {
177 "Discard outline on child_count=%d + grandchild_count=%d "
179 child_count, grandchild_count, max_count);
181 return child_count + grandchild_count;
186 return child_count + grandchild_count;
204 int32_t grandchild_count;
206 float max_parent_area;
208 int32_t child_length;
210 C_OUTLINE_IT child_it;
218 grandchild_count = 0;
222 for (
auto yindex = ymin; yindex <= ymax; yindex++) {
223 for (
auto xindex = xmin; xindex <= xmax; xindex++) {
224 child_it.set_to_list(&buckets[yindex * bxdim + xindex]);
225 if (child_it.empty()) {
228 for (child_it.mark_cycle_pt(); !child_it.cycled_list();
229 child_it.forward()) {
230 child = child_it.data();
233 if (child_count <= max_count) {
235 (max_count - child_count) / edges_children_per_grandchild;
238 edges_children_per_grandchild;
243 if (child_count + grandchild_count > max_count) {
245 tprintf(
"Discarding parent with child count=%d, gc=%d\n",
246 child_count, grandchild_count);
248 return child_count + grandchild_count;
250 if (parent_area == 0) {
252 if (parent_area < 0) {
253 parent_area = -parent_area;
256 if (parent_area < max_parent_area) {
261 (!edges_children_fix ||
262 child->bounding_box().height() > edges_min_nonhole)) {
263 child_area =
child->outer_area();
264 if (child_area < 0) {
265 child_area = -child_area;
267 if (edges_children_fix) {
268 if (parent_area - child_area < max_parent_area) {
272 if (grandchild_count > 0) {
275 "Discarding parent of area %d, child area=%d, max%g "
277 parent_area, child_area, max_parent_area,
280 return max_count + 1;
282 child_length =
child->pathlength();
283 if (child_length * child_length >
284 child_area * edges_patharea_ratio) {
287 "Discarding parent of area %d, child area=%d, max%g "
288 "with child length=%d\n",
289 parent_area, child_area, max_parent_area, child_length);
291 return max_count + 1;
294 if (child_area < child->bounding_box().area() * edges_childarea) {
297 "Discarding parent of area %d, child area=%d, max%g "
298 "with child rect=%d\n",
299 parent_area, child_area, max_parent_area,
300 child->bounding_box().area());
302 return max_count + 1;
309 return child_count + grandchild_count;
325 C_OUTLINE_IT child_it;
332 for (
auto yindex = ymin; yindex <= ymax; yindex++) {
333 for (
auto xindex = xmin; xindex <= xmax; xindex++) {
334 child_it.set_to_list(&buckets[yindex * bxdim + xindex]);
335 for (child_it.mark_cycle_pt(); !child_it.cycled_list();
336 child_it.forward()) {
337 if (*child_it.data() < *outline) {
338 it->add_after_then_move(child_it.extract());
349 C_OUTLINE_LIST outlines;
350 C_OUTLINE_IT out_it = &outlines;
362static void fill_buckets(C_OUTLINE_LIST *outlines,
365 C_OUTLINE_IT out_it = outlines;
366 C_OUTLINE_IT bucket_it;
368 for (out_it.mark_cycle_pt(); !out_it.cycled_list(); out_it.forward()) {
369 auto outline = out_it.extract();
371 const TBOX &ol_box(outline->bounding_box());
372 bucket_it.set_to_list((*buckets)(ol_box.left(), ol_box.bottom()));
373 bucket_it.add_to_end(outline);
385static bool capture_children(OL_BUCKETS *buckets,
386 C_BLOB_IT *reject_it,
387 C_OUTLINE_IT *blob_it
390 auto outline = blob_it->data();
393 if (edges_use_new_outline_complexity) {
395 buckets->outline_complexity(outline, edges_children_count_limit, 0);
397 child_count = buckets->count_children(outline, edges_children_count_limit);
399 if (child_count > edges_children_count_limit) {
403 if (child_count > 0) {
404 buckets->extract_children(outline, blob_it);
415static void empty_buckets(BLOCK *block,
418 C_OUTLINE_LIST outlines;
420 C_OUTLINE_IT out_it = &outlines;
421 auto start_scan = buckets->start_scan();
422 if (start_scan ==
nullptr) {
425 C_OUTLINE_IT bucket_it = start_scan;
426 C_BLOB_IT good_blobs = block->blob_list();
427 C_BLOB_IT junk_blobs = block->reject_blobs();
429 while (!bucket_it.empty()) {
430 out_it.set_to_list(&outlines);
431 C_OUTLINE_IT parent_it;
433 parent_it = bucket_it;
436 }
while (!bucket_it.at_first() &&
437 !(*parent_it.data() < *bucket_it.data()));
438 }
while (!bucket_it.at_first());
441 out_it.add_after_then_move(parent_it.extract());
443 bool good_blob = capture_children(buckets, &junk_blobs, &out_it);
447 if (
auto l = buckets->scan_next())
448 bucket_it.set_to_list(l);
462 ICOORD bleft,
ICOORD tright, C_OUTLINE_LIST *outlines) {
466 fill_buckets(outlines, &buckets);
467 empty_buckets(block, &buckets);
#define BOOL_VAR(name, val, comment)
#define INT_VAR(name, val, comment)
#define double_VAR(name, val, comment)
void tprintf(const char *format,...)
void block_edges(Image t_pix, PDBLK *block, C_OUTLINE_IT *outline_it)
void outlines_to_blobs(BLOCK *block, ICOORD bleft, ICOORD tright, C_OUTLINE_LIST *outlines)
void extract_edges(Image pix, BLOCK *block)
const TBOX & bounding_box() const
int32_t outer_area() const
PDBLK pdblk
Page Description Block.
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
TDimension y() const
access_function
TDimension x() const
access function
TDimension bottom() const
static void ConstructBlobsFromOutlines(bool good_blob, C_OUTLINE_LIST *outline_list, C_BLOB_IT *good_blobs_it, C_BLOB_IT *bad_blobs_it)
void extract_children(C_OUTLINE *outline, C_OUTLINE_IT *it)
OL_BUCKETS(ICOORD bleft, ICOORD tright)
C_OUTLINE_LIST * start_scan()
C_OUTLINE_LIST * scan_next()
int32_t outline_complexity(C_OUTLINE *outline, int32_t max_count, int16_t depth)
int32_t count_children(C_OUTLINE *outline, int32_t max_count)
C_OUTLINE_LIST * operator()(TDimension x, TDimension y)