25# include "config_auto.h"
66 TPOINT b0a1, b0a0, a1b1, b0b1, a1a0;
79 int b0a1xb0b1 = b0a1.
cross(b0b1);
80 int b0b1xb0a0 = b0b1.
cross(b0a0);
81 int a1b1xa1a0 = a1b1.
cross(a1a0);
84 int a1a0xa1b0 = -a1a0.
cross(b0a1);
86 return ((b0a1xb0b1 > 0 && b0b1xb0a0 > 0) || (b0a1xb0b1 < 0 && b0b1xb0a0 < 0)) &&
87 ((a1b1xa1a0 > 0 && a1a0xa1b0 > 0) || (a1b1xa1a0 < 0 && a1a0xa1b0 < 0));
93 result->loop = outline;
107 }
while (pt != outline);
109 result->SetupFromPos();
120 if (src.
loop !=
nullptr) {
125 newpt =
new EDGEPT(*srcpt);
126 if (prevpt ==
nullptr) {
129 newpt->
prev = prevpt;
130 prevpt->
next = newpt;
134 }
while (srcpt != src.
loop);
142 if (
loop ==
nullptr) {
150 this_edge = next_edge;
151 }
while (this_edge !=
loop);
161 }
while (pt !=
loop);
169 int tmp =
static_cast<int>(floor(pt->
pos.
x * rot.
x() - pt->
pos.
y * rot.
y() + 0.5));
170 pt->
pos.
y =
static_cast<int>(floor(pt->
pos.
y * rot.
x() + pt->
pos.
x * rot.
y() + 0.5));
173 }
while (pt !=
loop);
181 pt->
pos.
x += vec.
x();
182 pt->
pos.
y += vec.
y();
184 }
while (pt !=
loop);
192 pt->
pos.
x =
static_cast<int>(floor(pt->
pos.
x * factor + 0.5));
193 pt->
pos.
y =
static_cast<int>(floor(pt->
pos.
y * factor + 0.5));
195 }
while (pt !=
loop);
206 }
while (pt !=
loop);
213 int minx = INT32_MAX;
214 int miny = INT32_MAX;
215 int maxx = -INT32_MAX;
216 int maxy = -INT32_MAX;
223 if (this_edge->
pos.
x < minx) {
224 minx = this_edge->
pos.
x;
226 if (this_edge->
pos.
y < miny) {
227 miny = this_edge->
pos.
y;
229 if (this_edge->
pos.
x > maxx) {
230 maxx = this_edge->
pos.
x;
232 if (this_edge->
pos.
y > maxy) {
233 maxy = this_edge->
pos.
y;
236 this_edge = this_edge->
next;
237 }
while (this_edge !=
loop);
256 int product = this_edge->
pos.
cross(vec);
259 this_edge = this_edge->
next;
260 }
while (this_edge !=
loop);
267#ifndef GRAPHICS_DISABLED
270 window->
Pen(child_color);
284 }
while (pt !=
loop);
313static TESSLINE **ApproximateOutlineList(
bool allow_detailed_fx, C_OUTLINE_LIST *outlines,
315 C_OUTLINE_IT ol_it(outlines);
316 for (ol_it.mark_cycle_pt(); !ol_it.cycled_list(); ol_it.forward()) {
322 tail = &tessline->
next;
324 if (!outline->
child()->empty()) {
325 tail = ApproximateOutlineList(allow_detailed_fx, outline->
child(),
true, tail);
336 auto *tblob =
new TBLOB;
337 ApproximateOutlineList(allow_detailed_fx, src->
out_list(),
false, &tblob->outlines);
343 auto *blob =
new TBLOB;
344 blob->denorm_ = src.denorm_;
354 TBLOB *rotated_blob =
nullptr;
359 int x_middle = (box.
left() + box.
right()) / 2;
360 int y_middle = (box.
top() + box.
bottom()) / 2;
361 rotated_blob =
new TBLOB(*
this);
367 rotated_blob->
Normalize(
nullptr, &rotation, &denorm_, x_middle, y_middle, 1.0f, 1.0f, 0.0f,
378 auto *new_outline =
new TESSLINE(*srcline);
382 prev_outline->
next = new_outline;
384 prev_outline = new_outline;
386 denorm_ = src.denorm_;
401 float x_origin,
float y_origin,
float x_scale,
float y_scale,
402 float final_xshift,
float final_yshift,
bool inverse,
Image pix) {
403 denorm_.
SetupNormalization(block, rotation, predecessor, x_origin, y_origin, x_scale, y_scale,
404 final_xshift, final_yshift);
415 for (
TESSLINE* outline =
outlines; outline !=
nullptr; outline = outline->next) {
416 outline->Normalize(denorm_);
425 for (
TESSLINE *outline =
outlines; outline !=
nullptr; outline = outline->next) {
426 outline->Rotate(rotation);
432 for (
TESSLINE *outline =
outlines; outline !=
nullptr; outline = outline->next) {
439 for (
TESSLINE *outline =
outlines; outline !=
nullptr; outline = outline->next) {
440 outline->Scale(factor);
446 for (
TESSLINE *outline =
outlines; outline !=
nullptr; outline = outline->next) {
447 outline->ComputeBoundingBox();
454 for (
TESSLINE *outline =
outlines; outline !=
nullptr; outline = outline->next) {
468 return TBOX(0, 0, 0, 0);
472 for (outline = outline->
next; outline !=
nullptr; outline = outline->
next) {
481 for (
TESSLINE *outline =
outlines; outline !=
nullptr; outline = outline->next) {
483 for (
TESSLINE *other_outline = outline->next; other_outline !=
nullptr;
484 last_outline = other_outline, other_outline = other_outline->
next) {
485 if (outline->SameBox(*other_outline)) {
486 last_outline->
next = other_outline->
next;
488 other_outline->
loop =
nullptr;
489 delete other_outline;
490 other_outline = last_outline;
492 outline->is_hole =
false;
502 TBOX next_box =
next->bounding_box();
508#ifndef GRAPHICS_DISABLED
510 for (
TESSLINE *outline =
outlines; outline !=
nullptr; outline = outline->next) {
511 outline->
plot(window, color, child_color);
525 CollectEdges(box,
nullptr, &accumulator,
nullptr,
nullptr);
536 second_moments->
set_x(x2nd);
537 second_moments->
set_y(y2nd);
538 return accumulator.
count();
545 *precise_box =
TBOX();
546 CollectEdges(box, precise_box,
nullptr,
nullptr,
nullptr);
560 std::vector<std::vector<int>> &y_coords)
const {
562 x_coords.resize(box.
height());
564 y_coords.resize(box.
width());
565 CollectEdges(box,
nullptr,
nullptr, &x_coords, &y_coords);
567 for (
auto &coord : x_coords) {
568 std::sort(coord.begin(), coord.end());
570 for (
auto &coord : y_coords) {
571 std::sort(coord.begin(), coord.end());
577static void SegmentLLSQ(
const FCOORD &pt1,
const FCOORD &pt2,
LLSQ *accumulator) {
584 if (xstart == xend && ystart == yend) {
587 double weight = step.length() / (xend - xstart + yend - ystart);
589 for (
int x = xstart;
x < xend; ++
x) {
590 double y = pt1.
y() + step.y() * (
x + 0.5 - pt1.
x()) / step.x();
591 accumulator->
add(
x + 0.5,
y, weight);
594 for (
int y = ystart;
y < yend; ++
y) {
595 double x = pt1.
x() + step.x() * (
y + 0.5 - pt1.
y()) / step.y();
596 accumulator->
add(
x,
y + 0.5, weight);
605static void SegmentCoords(
const FCOORD &pt1,
const FCOORD &pt2,
int x_limit,
int y_limit,
606 std::vector<std::vector<int>> *x_coords,
607 std::vector<std::vector<int>> *y_coords) {
612 for (
int x = start;
x < end; ++
x) {
613 int y =
IntCastRounded(pt1.y() + step.y() * (
x + 0.5 - pt1.x()) / step.x());
614 (*y_coords)[
x].push_back(
y);
618 for (
int y = start;
y < end; ++
y) {
619 int x =
IntCastRounded(pt1.x() + step.x() * (
y + 0.5 - pt1.y()) / step.y());
620 (*x_coords)[
y].push_back(
x);
627static void SegmentBBox(
const FCOORD &pt1,
const FCOORD &pt2,
TBOX *bbox) {
633 int y1 =
IntCastRounded(pt1.y() + step.y() * (x1 + 0.5 - pt1.x()) / step.x());
634 int y2 =
IntCastRounded(pt1.y() + step.y() * (x2 - 0.5 - pt1.x()) / step.x());
635 TBOX point(x1, std::min(y1, y2), x2, std::max(y1, y2));
641 int x1 =
IntCastRounded(pt1.x() + step.x() * (y1 + 0.5 - pt1.y()) / step.y());
642 int x2 =
IntCastRounded(pt1.x() + step.x() * (y2 - 0.5 - pt1.y()) / step.y());
643 TBOX point(std::min(x1, x2), y1, std::max(x1, x2), y2);
658static void CollectEdgesOfRun(
const EDGEPT *startpt,
const EDGEPT *lastpt,
const DENORM &denorm,
659 const TBOX &box,
TBOX *bounding_box, LLSQ *accumulator,
660 std::vector<std::vector<int>> *x_coords,
661 std::vector<std::vector<int>> *y_coords) {
662 const C_OUTLINE *outline = startpt->src_outline;
663 int x_limit = box.width() - 1;
664 int y_limit = box.height() - 1;
665 if (outline !=
nullptr) {
670 const DENORM *root_denorm = denorm.RootDenorm();
672 int start_index = startpt->start_step;
679 int end_index = lastpt->start_step + lastpt->step_count;
680 if (end_index <= start_index) {
681 end_index += step_length;
684 ICOORD pos = outline->position_at_index(start_index);
685 FCOORD origin(box.left(), box.bottom());
688 FCOORD f_pos = outline->sub_pixel_pos_at_index(pos, start_index);
693 denorm.NormTransform(root_denorm, f_pos, &prev_normed);
694 prev_normed -= origin;
695 for (
int index = start_index; index < end_index; ++index) {
696 ICOORD step = outline->step(index % step_length);
704 if (outline->edge_strength_at_index(index % step_length) > 0) {
705 FCOORD f_pos = outline->sub_pixel_pos_at_index(pos, index % step_length);
707 denorm.NormTransform(root_denorm, f_pos, &pos_normed);
708 pos_normed -= origin;
710 if (bounding_box !=
nullptr) {
711 SegmentBBox(pos_normed, prev_normed, bounding_box);
713 if (accumulator !=
nullptr) {
714 SegmentLLSQ(pos_normed, prev_normed, accumulator);
716 if (x_coords !=
nullptr && y_coords !=
nullptr) {
717 SegmentCoords(pos_normed, prev_normed, x_limit, y_limit, x_coords, y_coords);
719 prev_normed = pos_normed;
725 const EDGEPT *endpt = lastpt->next;
726 const EDGEPT *pt = startpt;
728 FCOORD next_pos(pt->next->pos.x - box.left(), pt->next->pos.y - box.bottom());
729 FCOORD pos(pt->pos.x - box.left(), pt->pos.y - box.bottom());
730 if (bounding_box !=
nullptr) {
731 SegmentBBox(next_pos, pos, bounding_box);
733 if (accumulator !=
nullptr) {
734 SegmentLLSQ(next_pos, pos, accumulator);
736 if (x_coords !=
nullptr && y_coords !=
nullptr) {
737 SegmentCoords(next_pos, pos, x_limit, y_limit, x_coords, y_coords);
739 }
while ((pt = pt->next) != endpt);
748void TBLOB::CollectEdges(
const TBOX &box,
TBOX *bounding_box, LLSQ *llsq,
749 std::vector<std::vector<int>> *x_coords,
750 std::vector<std::vector<int>> *y_coords)
const {
752 for (
const TESSLINE *ol =
outlines; ol !=
nullptr; ol = ol->
next) {
754 EDGEPT *loop_pt = ol->FindBestStartPt();
755 EDGEPT *pt = loop_pt;
760 if (pt->IsHidden()) {
764 EDGEPT *last_pt = pt;
766 last_pt = last_pt->next;
767 }
while (last_pt != loop_pt && !last_pt->IsHidden() &&
768 last_pt->src_outline == pt->src_outline);
769 last_pt = last_pt->prev;
770 CollectEdgesOfRun(pt, last_pt, denorm_, box,
bounding_box, llsq, x_coords, y_coords);
772 }
while ((pt = pt->next) != loop_pt);
779 auto *tessword =
new TWERD;
782 for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
783 C_BLOB *blob = b_it.data();
785 tessword->blobs.push_back(tblob);
796 if (norm_box !=
nullptr) {
797 word_box = *norm_box;
799 float word_middle = (word_box.
left() + word_box.
right()) / 2.0f;
800 float input_y_offset = 0.0f;
803 if (row ==
nullptr) {
804 word_middle = word_box.
left();
805 input_y_offset = word_box.
bottom();
806 final_y_offset = 0.0f;
808 input_y_offset = row->
base_line(word_middle) + baseline_shift;
810 for (
auto blob :
blobs) {
811 TBOX blob_box = blob->bounding_box();
812 float mid_x = (blob_box.
left() + blob_box.
right()) / 2.0f;
814 float blob_scale = scale;
818 }
else if (row !=
nullptr) {
827 blob->Normalize(block,
nullptr,
nullptr, word_middle,
baseline, blob_scale, blob_scale, 0.0f,
828 final_y_offset, inverse, pix);
830 if (word_denorm !=
nullptr) {
831 word_denorm->
SetupNormalization(block,
nullptr,
nullptr, word_middle, input_y_offset, scale,
832 scale, 0.0f, final_y_offset);
842 for (
auto blob : src.
blobs) {
843 auto *new_blob =
new TBLOB(*blob);
844 blobs.push_back(new_blob);
850 for (
auto blob :
blobs) {
858 for (
auto &blob :
blobs) {
859 blob->ComputeBoundingBoxes();
865 for (
auto blob :
blobs) {
866 TBOX box = blob->bounding_box();
875 if (end >
blobs.size()) {
882 for (
auto i = start + 1;
i < end; ++
i) {
885 if (outline ==
nullptr) {
887 outline =
blobs[start]->outlines;
889 while (outline->
next !=
nullptr) {
890 outline = outline->
next;
901 for (
auto i = start + 1;
i < end && start + 1 <
blobs.size(); ++
i) {
906#ifndef GRAPHICS_DISABLED
909 for (
auto &blob :
blobs) {
930 if (outline1->is_hole) {
933 TPOINT mid_pt1((outline1->topleft.x + outline1->botright.x) / 2,
934 (outline1->topleft.y + outline1->botright.y) / 2);
935 int mid_prod1 = mid_pt1.
cross(vertical);
936 int min_prod1, max_prod1;
937 outline1->MinMaxCrossProduct(vertical, &min_prod1, &max_prod1);
938 for (
TESSLINE *outline2 = outline1->
next; outline2 !=
nullptr; outline2 = outline2->
next) {
939 if (outline2->is_hole) {
942 TPOINT mid_pt2((outline2->topleft.x + outline2->botright.x) / 2,
943 (outline2->topleft.y + outline2->botright.y) / 2);
944 int mid_prod2 = mid_pt2.
cross(vertical);
945 int min_prod2, max_prod2;
946 outline2->MinMaxCrossProduct(vertical, &min_prod2, &max_prod2);
947 int mid_gap = abs(mid_prod2 - mid_prod1);
948 int overlap = std::min(max_prod1, max_prod2) - std::max(min_prod1, min_prod2);
949 if (mid_gap - overlap / 4 > max_gap) {
950 max_gap = mid_gap - overlap / 4;
952 *location += mid_pt2;
959 return max_gap > vertical.
y;
977 int location_prod = location.
cross(vertical);
979 while (outline !=
nullptr) {
982 int mid_prod = mid_pt.
cross(vertical);
983 if (mid_prod < location_prod) {
986 outline1->
next = outline;
994 outline2->
next = outline;
1000 outline = outline->
next;
1004 outline1->
next =
nullptr;
1007 outline2->
next =
nullptr;
@ W_SCRIPT_IS_LATIN
Special case latin for y. splitting.
TESSLINE * ApproximateOutline(bool allow_detailed_fx, C_OUTLINE *c_outline)
const TPOINT kDivisibleVerticalUpright(0, 1)
const TPOINT kDivisibleVerticalItalic(1, 5)
int IntCastRounded(double x)
void divide_blobs(TBLOB *blob, TBLOB *other_blob, bool italic_blob, const TPOINT &location)
T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound)
void UpdateRange(const T1 &x, T2 *lower_bound, T2 *upper_bound)
bool divisible_blob(TBLOB *blob, bool italic_blob, TPOINT *location)
const int kBlnBaselineOffset
int cross(const TPOINT &other) const
static bool IsCrossed(const TPOINT &a0, const TPOINT &a1, const TPOINT &b0, const TPOINT &b1)
void Move(const ICOORD vec)
void MinMaxCrossProduct(const TPOINT vec, int *min_xp, int *max_xp) const
void plot(ScrollView *window, ScrollView::Color color, ScrollView::Color child_color)
TBOX bounding_box() const
EDGEPT * FindBestStartPt() const
void ComputeBoundingBox()
void CopyFrom(const TESSLINE &src)
void Rotate(const FCOORD rotation)
static TESSLINE * BuildFromOutlineList(EDGEPT *outline)
void Normalize(const DENORM &denorm)
TBOX bounding_box() const
int ComputeMoments(FCOORD *center, FCOORD *second_moments) const
void Move(const ICOORD vec)
void ComputeBoundingBoxes()
static TBLOB * ShallowCopy(const TBLOB &src)
void GetPreciseBoundingBox(TBOX *precise_box) const
void plot(ScrollView *window, ScrollView::Color color, ScrollView::Color child_color)
void Normalize(const BLOCK *block, const FCOORD *rotation, const DENORM *predecessor, float x_origin, float y_origin, float x_scale, float y_scale, float final_xshift, float final_yshift, bool inverse, Image pix)
void GetEdgeCoords(const TBOX &box, std::vector< std::vector< int > > &x_coords, std::vector< std::vector< int > > &y_coords) const
void EliminateDuplicateOutlines()
void CorrectBlobOrder(TBLOB *next)
static TBLOB * PolygonalCopy(bool allow_detailed_fx, C_BLOB *src)
TBLOB * ClassifyNormalizeIfNeeded() const
void Rotate(const FCOORD rotation)
void CopyFrom(const TBLOB &src)
void CopyFrom(const TWERD &src)
TBOX bounding_box() const
void ComputeBoundingBoxes()
static TWERD * PolygonalCopy(bool allow_detailed_fx, WERD *src)
std::vector< TBLOB * > blobs
void BLNormalize(const BLOCK *block, const ROW *row, Image pix, bool inverse, float x_height, float baseline_shift, bool numeric_mode, tesseract::OcrEngineMode hint, const TBOX *norm_box, DENORM *word_denorm)
void plot(ScrollView *window)
void MergeBlobs(unsigned start, unsigned end)
int32_t pathlength() const
void plot(ScrollView *window, ScrollView::Color colour) const
void add(double x, double y)
double x_variance() const
FCOORD mean_point() const
double y_variance() const
void SetupNormalization(const BLOCK *block, const FCOORD *rotation, const DENORM *predecessor, float x_origin, float y_origin, float x_scale, float y_scale, float final_xshift, float final_yshift)
void set_inverse(bool value)
void LocalNormTransform(const TPOINT &pt, TPOINT *transformed) const
void LocalNormBlob(TBLOB *blob) const
const BLOCK * block() const
FCOORD classify_rotation() const
float base_line(float xpos) const
TDimension y() const
access_function
TDimension x() const
access function
void set_y(float yin)
rewrite function
void set_x(float xin)
rewrite function
TDimension height() const
void move(const ICOORD vec)
const ICOORD & botleft() const
TDimension bottom() const
C_OUTLINE_LIST * out_list()
bool flag(WERD_FLAGS mask) const
static ScrollView::Color NextColor(ScrollView::Color colour)
C_BLOB_LIST * cblob_list()
void SetCursor(int x, int y)
void DrawTo(int x, int y)