21# include "config_auto.h"
42static BOOL_VAR(textord_really_old_xheight,
false,
"Use original wiseowl xheight");
44static BOOL_VAR(textord_debug_baselines,
false,
"Debug baseline generation");
45static BOOL_VAR(textord_oldbl_paradef,
true,
"Use para default mechanism");
46static BOOL_VAR(textord_oldbl_split_splines,
true,
"Split stepped splines");
47static BOOL_VAR(textord_oldbl_merge_parts,
true,
"Merge suspect partitions");
48static BOOL_VAR(oldbl_corrfix,
true,
"Improve correlation of heights");
49static BOOL_VAR(oldbl_xhfix,
false,
"Fix bug in modes threshold for xheights");
50static BOOL_VAR(textord_ocropus_mode,
false,
"Make baselines for ocropus");
51static double_VAR(oldbl_xhfract, 0.4,
"Fraction of est allowed in calc");
52static INT_VAR(oldbl_holed_losscount, 10,
"Max lost before fallback line used");
53static double_VAR(oldbl_dot_error_size, 1.26,
"Max aspect ratio of a dot");
54static double_VAR(textord_oldbl_jumplimit, 0.15,
"X fraction for new partition");
57#define X_HEIGHT_FRACTION 0.7
58#define DESCENDER_FRACTION 0.5
59#define MIN_ASC_FRACTION 0.20
60#define MIN_DESC_FRACTION 0.25
62#define MAXHEIGHTVARIANCE 0.15
66#define HEIGHTBUCKETS 200
71#define ABS(x) ((x) < 0 ? (-(x)) : (x))
79void Textord::make_old_baselines(TO_BLOCK *block,
82 QSPLINE *prev_baseline;
84 TO_ROW_IT row_it = block->get_rows();
87 prev_baseline =
nullptr;
88 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
90 find_textlines(block, row, 2,
nullptr);
91 if (row->xheight <= 0 && prev_baseline !=
nullptr) {
92 find_textlines(block, row, 2, prev_baseline);
94 if (row->xheight > 0) {
95 prev_baseline = &row->baseline;
97 prev_baseline =
nullptr;
98 blob_it.set_to_list(row->blob_list());
99 if (textord_debug_baselines) {
100 tprintf(
"Row baseline generation failed on row at (%d,%d)\n",
101 blob_it.data()->bounding_box().left(), blob_it.data()->bounding_box().bottom());
105 correlate_lines(block, gradient);
106 block->block->set_xheight(block->xheight);
117void Textord::correlate_lines(TO_BLOCK *block,
float gradient) {
121 TO_ROW_IT row_it = block->get_rows();
123 rowcount = row_it.length();
126 block->xheight = block->line_size;
130 std::vector<TO_ROW *> rows(rowcount);
132 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
134 rows[rowindex++] = row_it.data();
138 correlate_neighbours(block, &rows[0], rowcount);
141 block->xheight =
static_cast<float>(correlate_with_stats(&rows[0], rowcount, block));
142 if (block->xheight <= 0) {
159void Textord::correlate_neighbours(TO_BLOCK *block,
169 for (rowindex = 0; rowindex < rowcount; rowindex++) {
170 row = rows[rowindex];
171 if (row->xheight < 0) {
173 for (otherrow = rowindex - 2;
174 otherrow >= 0 && (rows[otherrow]->xheight < 0.0 ||
179 for (otherrow = rowindex + 1;
180 otherrow < rowcount && (rows[otherrow]->xheight < 0.0 ||
186 find_textlines(block, row, 2, &rows[upperrow]->
baseline);
188 if (row->xheight < 0 && lowerrow < rowcount) {
189 find_textlines(block, row, 2, &rows[lowerrow]->
baseline);
191 if (row->xheight < 0) {
193 find_textlines(block, row, 1, &rows[upperrow]->
baseline);
194 }
else if (lowerrow < rowcount) {
195 find_textlines(block, row, 1, &rows[lowerrow]->
baseline);
201 for (biggest = 0.0f, rowindex = 0; rowindex < rowcount; rowindex++) {
202 row = rows[rowindex];
203 if (row->xheight < 0) {
205 row->xheight = -row->xheight;
207 biggest = std::max(biggest, row->xheight);
218int Textord::correlate_with_stats(TO_ROW **rows,
234 xcount = fullcount = desccount = 0;
235 lineheight = ascheight = fullheight = descheight = 0.0;
236 for (rowindex = 0; rowindex < rowcount; rowindex++) {
237 row = rows[rowindex];
238 if (row->ascrise > 0.0) {
239 lineheight += row->xheight;
240 ascheight += row->ascrise;
243 fullheight += row->xheight;
246 if (row->descdrop < 0.0) {
248 descheight += row->descdrop;
253 if (xcount > 0 && (!oldbl_corrfix || xcount >= fullcount)) {
254 lineheight /= xcount;
256 fullheight = lineheight + ascheight / xcount;
262 fullheight /= fullcount;
266 if (desccount > 0 && (!oldbl_corrfix || desccount >= rowcount / 2)) {
267 descheight /= desccount;
273 if (lineheight > 0.0f) {
274 block->block->set_cell_over_xheight((fullheight - descheight) / lineheight);
279 for (rowindex = 0; rowindex < rowcount; rowindex++) {
280 row = rows[rowindex];
281 row->all_caps =
false;
286 row->ascrise = fullheight - lineheight;
288 row->xheight = lineheight;
292 row->ascrise = row->xheight - lineheight;
294 row->xheight = lineheight;
295 row->all_caps =
true;
297 row->ascrise = (fullheight - lineheight) * row->xheight / fullheight;
299 row->xheight -= row->ascrise;
300 row->all_caps =
true;
302 if (row->ascrise < minascheight) {
306 if (row->descdrop > mindescheight) {
310 row->descdrop = descheight;
316 return static_cast<int>(lineheight);
325void Textord::find_textlines(TO_BLOCK *block,
330 bool holed_line =
false;
341 blobcount = row->blob_list()->length();
343 std::vector<char> partids(blobcount);
345 std::vector<int> xcoords(blobcount);
347 std::vector<int> ycoords(blobcount);
349 std::vector<TBOX> blobcoords(blobcount);
351 std::vector<float> ydiffs(blobcount);
353 lineheight =
get_blob_coords(row,
static_cast<int>(block->line_size), &blobcoords[0], holed_line,
356 jumplimit = lineheight * textord_oldbl_jumplimit;
362 tprintf(
"\nInput height=%g, Estimate x-height=%d pixels, jumplimit=%.2f\n", block->line_size,
363 lineheight, jumplimit);
368 make_first_baseline(&blobcoords[0], blobcount, &xcoords[0], &ycoords[0], spline, &row->baseline,
371#ifndef GRAPHICS_DISABLED
377 bestpart =
partition_line(&blobcoords[0], blobcount, &partcount, &partids[0], partsizes,
378 &row->baseline, jumplimit, &ydiffs[0]);
379 pointcount =
partition_coords(&blobcoords[0], blobcount, &partids[0], bestpart, &xcoords[0],
381 segments =
segment_spline(&blobcoords[0], blobcount, &xcoords[0], &ycoords[0], degree,
382 pointcount, xstarts);
385 row->baseline = QSPLINE(xstarts, segments, &xcoords[0], &ycoords[0], pointcount, degree);
386 }
while (textord_oldbl_split_splines &&
389 find_lesser_parts(row, &blobcoords[0], blobcount, &partids[0], partsizes, partcount, bestpart);
392 row->xheight = -1.0f;
393 row->descdrop = 0.0f;
396 row->baseline.extrapolate(row->line_m(), block->block->pdblk.bounding_box().left(),
397 block->block->pdblk.bounding_box().right());
399 if (textord_really_old_xheight) {
400 old_first_xheight(row, &blobcoords[0], lineheight, blobcount, &row->baseline, jumplimit);
402 make_first_xheight(row, &blobcoords[0], lineheight,
static_cast<int>(block->line_size),
403 blobcount, &row->baseline, jumplimit);
405 compute_row_xheight(row, block->block->classify_rotation(), row->line_m(), block->line_size);
431 if (blob_it.empty()) {
436 blob_it.mark_cycle_pt();
440 if (blobcoords[blobindex].height() > lineheight * 0.25) {
441 heightstat.
add(blobcoords[blobindex].height(), 1);
443 if (blobindex == 0 || blobcoords[blobindex].height() > lineheight * 0.25 ||
444 blob_it.cycled_list()) {
448 if (blobcoords[blobindex].height() < blobcoords[blobindex].width() * oldbl_dot_error_size &&
449 blobcoords[blobindex].width() < blobcoords[blobindex].height() * oldbl_dot_error_size) {
455 if (losscount > maxlosscount) {
457 maxlosscount = losscount;
461 }
while (!blob_it.cycled_list());
463 holed_line = maxlosscount > oldbl_holed_losscount;
464 outcount = blobindex;
468 return static_cast<int>(heightstat.
ile(0.25));
470 return blobcoords[0].
height();
495 float prevy, thisy, nexty;
497 float maxmax, minmin;
508 leftedge = blobcoords[0].
left();
510 rightedge = blobcoords[blobcount - 1].
right();
511 if (spline ==
nullptr
512 || spline->segments < 3
514 || spline->xcoords[1] > leftedge +
MAXOVERLAP * (rightedge - leftedge) ||
515 spline->xcoords[spline->segments - 1] < rightedge -
MAXOVERLAP * (rightedge - leftedge)) {
516 if (textord_oldbl_paradef) {
519 xstarts[0] = blobcoords[0].
left() - 1;
520 for (blobindex = 0; blobindex < blobcount; blobindex++) {
521 xcoords[blobindex] = (blobcoords[blobindex].
left() + blobcoords[blobindex].
right()) / 2;
522 ycoords[blobindex] = blobcoords[blobindex].
bottom();
524 xstarts[1] = blobcoords[blobcount - 1].
right() + 1;
528 *
baseline =
QSPLINE(xstarts, segments, xcoords, ycoords, blobcount, 1);
530 if (blobcount >= 3) {
534 maxmax = minmin = 0.0f;
535 thisy = ycoords[0] -
baseline->y(xcoords[0]);
536 nexty = ycoords[1] -
baseline->y(xcoords[1]);
537 for (blobindex = 2; blobindex < blobcount; blobindex++) {
540 nexty = ycoords[blobindex] -
baseline->y(xcoords[blobindex]);
542 if (
ABS(thisy - prevy) < jumplimit &&
ABS(thisy - nexty) < jumplimit) {
548 if (ycount >= 3 && ((y1 < y2 && y2 >= y3)
550 || (y1 > y2 && y2 <= y3))) {
553 xturns[segment] = x2;
554 yturns[segment] = y2;
559 maxmax = minmin = y3;
569 x2 = blobcoords[blobindex - 1].
right();
575 if (maxmax - minmin > jumplimit) {
577 for (blobindex = 0, segment = 1; blobindex < ycount; blobindex++) {
578 if (yturns[blobindex] > minmin + jumplimit || yturns[blobindex] < maxmax - jumplimit) {
580 if (segment == 1 || yturns[blobindex] > prevy + jumplimit ||
581 yturns[blobindex] < prevy - jumplimit) {
583 xstarts[segment] = xturns[blobindex];
585 prevy = yturns[blobindex];
588 else if ((prevy > minmin + jumplimit && yturns[blobindex] > prevy)
590 || (prevy < maxmax - jumplimit && yturns[blobindex] < prevy)) {
591 xstarts[segment - 1] = xturns[blobindex];
593 prevy = yturns[blobindex];
597 xstarts[segment] = blobcoords[blobcount - 1].
right() + 1;
600 *
baseline =
QSPLINE(xstarts, segments, xcoords, ycoords, blobcount, 1);
606 ICOORD(0,
static_cast<int16_t
>(blobcoords[0].bottom() - spline->
y(blobcoords[0].
right())));
638 leftedge = blobcoords[0].
left();
640 rightedge = blobcoords[blobcount - 1].
right();
641 for (blobindex = 0; blobindex < blobcount; blobindex++) {
642 lms.
Add(
ICOORD((blobcoords[blobindex].left() + blobcoords[blobindex].right()) / 2,
643 blobcoords[blobindex].bottom()));
646 xstarts[0] = leftedge;
647 xstarts[1] = rightedge;
649 coeffs[1] = gradient;
652 if (spline !=
nullptr
653 && spline->segments >= 3
655 && spline->xcoords[1] <= leftedge +
MAXOVERLAP * (rightedge - leftedge) &&
656 spline->xcoords[spline->segments - 1] >= rightedge -
MAXOVERLAP * (rightedge - leftedge)) {
658 x = (leftedge + rightedge) / 2.0;
659 shift =
ICOORD(0,
static_cast<int16_t
>(gradient *
x + c - spline->
y(
x)));
690 for (bestpart = 0; bestpart <
MAXPARTS; bestpart++) {
691 partsizes[bestpart] = 0;
694 startx =
get_ydiffs(blobcoords, blobcount, spline, ydiffs);
698 float last_delta = 0.0f;
699 for (blobindex = startx; blobindex < blobcount; blobindex++) {
701 diff = ydiffs[blobindex];
703 tprintf(
"%d(%d,%d), ", blobindex, blobcoords[blobindex].left(),
704 blobcoords[blobindex].bottom());
707 choose_partition(diff, partdiffs, bestpart, jumplimit, &drift, &last_delta, numparts);
709 partids[blobindex] = bestpart;
710 partsizes[bestpart]++;
718 for (blobindex = startx; blobindex >= 0; blobindex--) {
719 diff = ydiffs[blobindex];
721 tprintf(
"%d(%d,%d), ", blobindex, blobcoords[blobindex].left(),
722 blobcoords[blobindex].bottom());
725 choose_partition(diff, partdiffs, bestpart, jumplimit, &drift, &last_delta, numparts);
727 partids[blobindex] = bestpart;
728 partsizes[bestpart]++;
731 for (biggestpart = 0, bestpart = 1; bestpart < *numparts; bestpart++) {
732 if (partsizes[bestpart] >= partsizes[biggestpart]) {
733 biggestpart = bestpart;
736 if (textord_oldbl_merge_parts) {
737 merge_oldbl_parts(blobcoords, blobcount, partids, partsizes, biggestpart, jumplimit);
769 prevpart = biggestpart;
772 for (blobindex = 0; blobindex < blobcount; blobindex++) {
773 if (partids[blobindex] != prevpart) {
778 if (prevpart != biggestpart && runlength >
MAXBADRUN) {
780 for (test_blob = startx; test_blob < blobindex; test_blob++) {
781 coord =
FCOORD((blobcoords[test_blob].left() + blobcoords[test_blob].right()) / 2.0,
782 blobcoords[test_blob].bottom());
783 stats.
add(coord.
x(), coord.
y());
789 tprintf(
"Fitted line y=%g x + %g\n", m, c);
794 !found_one && (startx - test_blob >= 0 || blobindex + test_blob <= blobcount);
796 if (startx - test_blob >= 0 && partids[startx - test_blob] == biggestpart) {
799 (blobcoords[startx - test_blob].left() + blobcoords[startx - test_blob].right()) /
801 blobcoords[startx - test_blob].bottom());
802 diff = m * coord.
x() + c - coord.
y();
804 tprintf(
"Diff of common blob to suspect part=%g at (%g,%g)\n", diff, coord.
x(),
807 if (diff < jumplimit && -diff < jumplimit) {
811 if (blobindex + test_blob <= blobcount &&
812 partids[blobindex + test_blob - 1] == biggestpart) {
814 coord =
FCOORD((blobcoords[blobindex + test_blob - 1].left() +
815 blobcoords[blobindex + test_blob - 1].right()) /
817 blobcoords[blobindex + test_blob - 1].bottom());
818 diff = m * coord.
x() + c - coord.
y();
820 tprintf(
"Diff of common blob to suspect part=%g at (%g,%g)\n", diff, coord.
x(),
823 if (diff < jumplimit && -diff < jumplimit) {
831 "Merged %d blobs back into part %d from %d starting at "
833 runlength, biggestpart, prevpart, blobcoords[startx].left(),
834 blobcoords[startx].bottom());
837 partsizes[prevpart] -= runlength;
838 for (test_blob = startx; test_blob < blobindex; test_blob++) {
839 partids[test_blob] = biggestpart;
843 prevpart = partids[blobindex];
877 bestsum =
static_cast<float>(INT32_MAX);
879 lastx = blobcoords[0].
left();
881 for (blobindex = 0; blobindex < blobcount; blobindex++) {
883 xcentre = (blobcoords[blobindex].
left() + blobcoords[blobindex].
right()) >> 1;
885 drift += spline->
step(lastx, xcentre);
887 diff = blobcoords[blobindex].
bottom();
888 diff -= spline->
y(xcentre);
890 ydiffs[blobindex] = diff;
893 diffsum -=
ABS(ydiffs[blobindex - 3]);
895 diffsum +=
ABS(diff);
896 if (blobindex >= 2 && diffsum < bestsum) {
898 bestindex = blobindex - 1;
915 float *drift,
float *lastdelta,
int *partcount
929 delta = diff - partdiffs[lastpart] - *drift;
931 tprintf(
"Diff=%.2f, Delta=%.3f, Drift=%.3f, ", diff, delta, *drift);
933 if (
ABS(delta) > jumplimit / 2) {
935 bestdelta = diff - partdiffs[0] - *drift;
937 for (partition = 1; partition < *partcount; partition++) {
938 delta = diff - partdiffs[partition] - *drift;
939 if (
ABS(delta) <
ABS(bestdelta)) {
941 bestpart = partition;
946 if (
ABS(bestdelta) > jumplimit && *partcount <
MAXPARTS) {
947 bestpart = (*partcount)++;
949 partdiffs[bestpart] = diff - *drift;
956 if (bestpart == lastpart &&
957 (
ABS(delta - *lastdelta) < jumplimit / 2 ||
ABS(delta) < jumplimit / 2)) {
959 *drift = (3 * *drift + delta) / 3;
989 for (blobindex = 0; blobindex < blobcount; blobindex++) {
990 if (partids[blobindex] == bestpart) {
992 xcoords[pointcount] = (blobcoords[blobindex].
left() + blobcoords[blobindex].
right()) >> 1;
993 ycoords[pointcount++] = blobcoords[blobindex].
bottom();
1011 int degree,
int pointcount,
1016 int lastmin, lastmax;
1021 xstarts[0] = xcoords[0] - 1;
1022 max_x = xcoords[pointcount - 1] + 1;
1027 if (pointcount > 3) {
1029 lastmax = lastmin = 0;
1030 while (ptindex < pointcount - 1 && turncount <
SPLINESIZE - 1) {
1032 if (ycoords[ptindex - 1] > ycoords[ptindex] && ycoords[ptindex] <= ycoords[ptindex + 1]) {
1033 if (ycoords[ptindex] < ycoords[lastmax] -
TURNLIMIT) {
1034 if (turncount == 0 || turnpoints[turncount - 1] != lastmax) {
1036 turnpoints[turncount++] = lastmax;
1039 }
else if (ycoords[ptindex] < ycoords[lastmin]) {
1045 if (ycoords[ptindex - 1] < ycoords[ptindex] && ycoords[ptindex] >= ycoords[ptindex + 1]) {
1046 if (ycoords[ptindex] > ycoords[lastmin] +
TURNLIMIT) {
1047 if (turncount == 0 || turnpoints[turncount - 1] != lastmin) {
1049 turnpoints[turncount++] = lastmin;
1052 }
else if (ycoords[ptindex] > ycoords[lastmax]) {
1059 if (ycoords[ptindex] < ycoords[lastmax] -
TURNLIMIT &&
1060 (turncount == 0 || turnpoints[turncount - 1] != lastmax)) {
1063 turnpoints[turncount++] = lastmax;
1066 turnpoints[turncount++] = ptindex;
1068 }
else if (ycoords[ptindex] > ycoords[lastmin] +
TURNLIMIT
1070 && (turncount == 0 || turnpoints[turncount - 1] != lastmin)) {
1073 turnpoints[turncount++] = lastmin;
1076 turnpoints[turncount++] = ptindex;
1078 }
else if (turncount > 0 && turnpoints[turncount - 1] == lastmin &&
1080 if (ycoords[ptindex] > ycoords[lastmax]) {
1081 turnpoints[turncount++] = ptindex;
1083 turnpoints[turncount++] = lastmax;
1085 }
else if (turncount > 0 && turnpoints[turncount - 1] == lastmax &&
1087 if (ycoords[ptindex] < ycoords[lastmin]) {
1088 turnpoints[turncount++] = ptindex;
1090 turnpoints[turncount++] = lastmin;
1096 tprintf(
"First turn is %d at (%d,%d)\n", turnpoints[0], xcoords[turnpoints[0]],
1097 ycoords[turnpoints[0]]);
1099 for (segment = 1; segment < turncount; segment++) {
1101 lastmax = (ycoords[turnpoints[segment - 1]] + ycoords[turnpoints[segment]]) / 2;
1104 if (ycoords[turnpoints[segment - 1]] < ycoords[turnpoints[segment]]) {
1106 for (ptindex = turnpoints[segment - 1] + 1;
1107 ptindex < turnpoints[segment] && ycoords[ptindex + 1] <= lastmax; ptindex++) {
1111 for (ptindex = turnpoints[segment - 1] + 1;
1112 ptindex < turnpoints[segment] && ycoords[ptindex + 1] >= lastmax; ptindex++) {
1117 xstarts[segment] = (xcoords[ptindex - 1] + xcoords[ptindex] + xcoords[turnpoints[segment - 1]] +
1118 xcoords[turnpoints[segment]] + 2) /
1122 tprintf(
"Turn %d is %d at (%d,%d), mid pt is %d@%d, final @%d\n", segment,
1123 turnpoints[segment], xcoords[turnpoints[segment]], ycoords[turnpoints[segment]],
1124 ptindex - 1, xcoords[ptindex - 1], xstarts[segment]);
1128 xstarts[segment] = max_x;
1148 int startindex, centreindex, endindex;
1149 float leftcoord, rightcoord;
1150 int leftindex, rightindex;
1155 for (segment = 1; segment < segments - 1; segment++) {
1156 step =
baseline->step((xstarts[segment - 1] + xstarts[segment]) / 2.0,
1157 (xstarts[segment] + xstarts[segment + 1]) / 2.0);
1161 if (step > jumplimit) {
1162 while (xcoords[startindex] < xstarts[segment - 1]) {
1165 centreindex = startindex;
1166 while (xcoords[centreindex] < xstarts[segment]) {
1169 endindex = centreindex;
1170 while (xcoords[endindex] < xstarts[segment + 1]) {
1174 if (textord_debug_baselines) {
1175 tprintf(
"Too many segments to resegment spline!!\n");
1184 leftindex = (startindex + startindex + centreindex) / 3;
1185 rightindex = (centreindex + endindex + endindex) / 3;
1186 leftcoord = (xcoords[startindex] * 2 + xcoords[centreindex]) / 3.0;
1187 rightcoord = (xcoords[centreindex] + xcoords[endindex] * 2) / 3.0;
1188 while (xcoords[leftindex] > leftcoord &&
1192 while (xcoords[leftindex] < leftcoord &&
1196 if (xcoords[leftindex] - leftcoord > leftcoord - xcoords[leftindex - 1]) {
1199 while (xcoords[rightindex] > rightcoord &&
1203 while (xcoords[rightindex] < rightcoord &&
1207 if (xcoords[rightindex] - rightcoord > rightcoord - xcoords[rightindex - 1]) {
1210 if (textord_debug_baselines) {
1211 tprintf(
"Splitting spline at %d with step %g at (%d,%d)\n", xstarts[segment],
1212 baseline->step((xstarts[segment - 1] + xstarts[segment]) / 2.0,
1213 (xstarts[segment] + xstarts[segment + 1]) / 2.0),
1214 (xcoords[leftindex - 1] + xcoords[leftindex]) / 2,
1215 (xcoords[rightindex - 1] + xcoords[rightindex]) / 2);
1218 (xcoords[rightindex - 1] + xcoords[rightindex]) / 2, segments);
1220 }
else if (textord_debug_baselines) {
1221 tprintf(
"Resegmenting spline failed - insufficient pts (%d,%d,%d,%d)\n", startindex,
1243 int coord2,
int &segments
1247 for (index = segments; index > segment; index--) {
1248 xstarts[index + 1] = xstarts[index];
1251 xstarts[segment] = coord1;
1252 xstarts[segment + 1] = coord2;
1282 for (partition = 0; partition < partcount; partition++) {
1283 partsteps[partition] = 0.0;
1285 for (runlength = 0, blobindex = 0; blobindex < blobcount; blobindex++) {
1286 xcentre = (blobcoords[blobindex].
left() + blobcoords[blobindex].
right()) >> 1;
1288 int part_id =
static_cast<int>(
static_cast<unsigned char>(partids[blobindex]));
1289 if (part_id != bestpart) {
1291 if (runlength > biggestrun) {
1292 biggestrun = runlength;
1294 partsteps[part_id] += blobcoords[blobindex].
bottom() - row->
baseline.
y(xcentre);
1304 poscount = negcount = 0;
1306 for (partition = 0; partition < partcount; partition++) {
1307 if (partition != bestpart) {
1309 if (partsizes[partition] == 0) {
1310 partsteps[partition] = 0;
1312 partsteps[partition] /= partsizes[partition];
1316 if (partsteps[partition] >=
MINASCRISE && partsizes[partition] > poscount) {
1317 poscount = partsizes[partition];
1319 if (partsteps[partition] <= -
MINASCRISE && partsizes[partition] > negcount) {
1321 bestneg = partsteps[partition];
1323 negcount = partsizes[partition];
1328 partsteps[bestpart] /= blobcount;
1360 if (blobcount > 1) {
1361 for (blobindex = 0; blobindex < blobcount; blobindex++) {
1362 xcentre = (blobcoords[blobindex].
left() + blobcoords[blobindex].
right()) / 2;
1364 height =
static_cast<int>(blobcoords[blobindex].
top() -
baseline->y(xcentre) + 0.5);
1366 heightstat.
add(height, 1);
1370 lineheight =
static_cast<int>(heightstat.
ile(0.25));
1371 if (lineheight <= 0) {
1372 lineheight =
static_cast<int>(heightstat.
ile(0.5));
1375 lineheight = initialheight;
1379 static_cast<int>(blobcoords[0].
top() -
1380 baseline->y((blobcoords[0].left() + blobcoords[0].
right()) / 2) + 0.5);
1385 for (ascenders = 0.0f, asccount = 0, blobindex = 0; blobindex < blobcount; blobindex++) {
1386 xcentre = (blobcoords[blobindex].
left() + blobcoords[blobindex].
right()) / 2;
1387 diff = blobcoords[blobindex].
top() -
baseline->y(xcentre);
1389 if (diff > lineheight + jumplimit) {
1392 }
else if (diff > lineheight - jumplimit) {
1400 xsum =
static_cast<float>(lineheight);
1404 row->
ascrise = ascenders / asccount - xsum;
1425 int init_lineheight,
1438 const int kBaselineTouch = 2;
1439 const int kGoodStrength = 8;
1440 const float kMinHeight = 0.25;
1442 sign_bit = row->
xheight > 0 ? 1 : -1;
1447 for (blobindex = 0; blobindex < blobcount; blobindex++) {
1448 int xcenter = (blobcoords[blobindex].
left() + blobcoords[blobindex].
right()) / 2;
1450 float bottomdiff = std::fabs(base - blobcoords[blobindex].bottom());
1451 int strength = textord_ocropus_mode && bottomdiff <= kBaselineTouch ? kGoodStrength : 1;
1452 int height =
static_cast<int>(blobcoords[blobindex].
top() - base + 0.5);
1453 if (blobcoords[blobindex].height() > init_lineheight * kMinHeight) {
1455 heightstat.
add(height, strength);
1457 if (xcenter > rights[height]) {
1458 rights[height] = xcenter;
1460 if (xcenter > 0 && (lefts[height] == 0 || xcenter < lefts[height])) {
1461 lefts[height] = xcenter;
1465 mode_count += strength;
1469 mode_threshold =
static_cast<int>(blobcount * 0.1);
1470 if (oldbl_dot_error_size > 1 || oldbl_xhfix) {
1471 mode_threshold =
static_cast<int>(mode_count * 0.1);
1475 tprintf(
"blobcount=%d, mode_count=%d, mode_t=%d\n", blobcount, mode_count, mode_threshold);
1479 for (blobindex = 0; blobindex <
MODENUM; blobindex++) {
1480 tprintf(
"mode[%d]=%d ", blobindex, modelist[blobindex]);
1484 pick_x_height(row, modelist, lefts, rights, &heightstat, mode_threshold);
1511 int modelist[],
int modenum
1515 int last_max = INT32_MAX;
1521 for (mode_count = 0; mode_count < modenum; mode_count++) {
1523 for (
i = 0;
i < statnum;
i++) {
1526 ((stats->
pile_count(
i) == last_max) && (
i > last_i))) {
1533 total_max += last_max;
1534 if (last_max <= total_max / mode_factor) {
1537 modelist[mode_count] = mode;
1548 int modelist[],
int lefts[],
int rights[],
STATS *heightstat,
1549 int mode_threshold) {
1554 int found_one_bigger =
false;
1555 int best_x_height = 0;
1562 if (modelist[
x] && modelist[
y] && heightstat->
pile_count(modelist[
x]) > mode_threshold &&
1563 (!textord_ocropus_mode || std::min(rights[modelist[
x]], rights[modelist[
y]]) >
1564 std::max(lefts[modelist[
x]], lefts[modelist[
y]]))) {
1565 ratio =
static_cast<float>(modelist[
y]) /
static_cast<float>(modelist[
x]);
1566 if (1.2 < ratio && ratio < 1.8) {
1568 best_x_height = modelist[
x];
1569 num_in_best = heightstat->
pile_count(modelist[
x]);
1573 found_one_bigger =
false;
1575 if (modelist[
z] == best_x_height + 1 &&
1576 (!textord_ocropus_mode || std::min(rights[modelist[
x]], rights[modelist[
y]]) >
1577 std::max(lefts[modelist[
x]], lefts[modelist[
y]]))) {
1578 ratio =
static_cast<float>(modelist[
y]) /
static_cast<float>(modelist[
z]);
1579 if ((1.2 < ratio && ratio < 1.8) &&
1581 heightstat->
pile_count(modelist[
z]) > num_in_best * 0.5) {
1583 found_one_bigger =
true;
1588 }
while (found_one_bigger);
1592 best_asc = modelist[
y];
1593 num_in_best = heightstat->
pile_count(modelist[
y]);
1597 found_one_bigger =
false;
1599 if (modelist[
z] > best_asc &&
1600 (!textord_ocropus_mode || std::min(rights[modelist[
x]], rights[modelist[
y]]) >
1601 std::max(lefts[modelist[
x]], lefts[modelist[
y]]))) {
1602 ratio =
static_cast<float>(modelist[
z]) /
static_cast<float>(best_x_height);
1603 if ((1.2 < ratio && ratio < 1.8) &&
1605 heightstat->
pile_count(modelist[
z]) > num_in_best * 0.5) {
1606 best_asc = modelist[
z];
1607 found_one_bigger =
true;
1612 }
while (found_one_bigger);
1614 row->
xheight =
static_cast<float>(best_x_height);
1615 row->
ascrise =
static_cast<float>(best_asc) - best_x_height;
1622 best_x_height = modelist[0];
1623 num_in_best = heightstat->
pile_count(best_x_height);
1626 found_one_bigger =
false;
1629 if ((modelist[
z] == best_x_height + 1) &&
1630 (heightstat->
pile_count(modelist[
z]) > num_in_best * 0.5)) {
1632 found_one_bigger =
true;
1636 }
while (found_one_bigger);
1639 row->
xheight =
static_cast<float>(best_x_height);
#define BOOL_VAR(name, val, comment)
#define INT_VAR(name, val, comment)
#define double_VAR(name, val, comment)
#define DESCENDER_FRACTION
#define X_HEIGHT_FRACTION
#define MIN_DESC_FRACTION
#define MAXHEIGHTVARIANCE
int segment_spline(TBOX blobcoords[], int blobcount, int xcoords[], int ycoords[], int degree, int pointcount, int xstarts[])
int get_ydiffs(TBOX blobcoords[], int blobcount, QSPLINE *spline, float ydiffs[])
bool textord_show_final_rows
void make_first_baseline(TBOX blobcoords[], int blobcount, int xcoords[], int ycoords[], QSPLINE *spline, QSPLINE *baseline, float jumplimit)
void find_top_modes(STATS *stats, int statnum, int modelist[], int modenum)
void tprintf(const char *format,...)
void pick_x_height(TO_ROW *row, int modelist[], int lefts[], int rights[], STATS *heightstat, int mode_threshold)
void insert_spline_point(int xstarts[], int segment, int coord1, int coord2, int &segments)
int textord_spline_medianwin
void old_first_xheight(TO_ROW *row, TBOX blobcoords[], int initialheight, int blobcount, QSPLINE *baseline, float jumplimit)
int partition_coords(TBOX blobcoords[], int blobcount, char partids[], int bestpart, int xcoords[], int ycoords[])
int choose_partition(float diff, float partdiffs[], int lastpart, float jumplimit, float *drift, float *lastdelta, int *partcount)
void make_holed_baseline(TBOX blobcoords[], int blobcount, QSPLINE *spline, QSPLINE *baseline, float gradient)
const int kMinModeFactorOcropus
bool split_stepped_spline(QSPLINE *baseline, float jumplimit, int *xcoords, int *xstarts, int &segments)
int partition_line(TBOX blobcoords[], int blobcount, int *numparts, char partids[], int partsizes[], QSPLINE *spline, float jumplimit, float ydiffs[])
void find_lesser_parts(TO_ROW *row, TBOX blobcoords[], int blobcount, char partids[], int partsizes[], int partcount, int bestpart)
void make_first_xheight(TO_ROW *row, TBOX blobcoords[], int lineheight, int init_lineheight, int blobcount, QSPLINE *baseline, float jumplimit)
TBOX box_next_pre_chopped(BLOBNBOX_IT *it)
void merge_oldbl_parts(TBOX blobcoords[], int blobcount, char partids[], int partsizes[], int biggestpart, float jumplimit)
int get_blob_coords(TO_ROW *row, int32_t lineheight, TBOX *blobcoords, bool &holed_line, int &outcount)
BLOBNBOX_LIST * blob_list()
static const double kXHeightFraction
void Add(const ICOORD &pt)
double ConstrainedFit(const FCOORD &direction, double min_dist, double max_dist, bool debug, ICOORD *line_pt)
void add(double x, double y)
double step(double x1, double x2)
TDimension height() const
TDimension bottom() const
void add(int32_t value, int32_t count)
int32_t pile_count(int32_t value) const
int32_t get_total() const
double ile(double frac) const
void compute_row_xheight(TO_ROW *row, const FCOORD &rotation, float gradient, int block_line_size)
void compute_block_xheight(TO_BLOCK *block, float gradient)