tesseract v5.3.3.20231005
mergenf.cpp
Go to the documentation of this file.
1/******************************************************************************
2** Filename: MergeNF.c
3** Purpose: Program for merging similar nano-feature protos
4** Author: Dan Johnson
5**
6** (c) Copyright Hewlett-Packard Company, 1988.
7** Licensed under the Apache License, Version 2.0 (the "License");
8** you may not use this file except in compliance with the License.
9** You may obtain a copy of the License at
10** http://www.apache.org/licenses/LICENSE-2.0
11** Unless required by applicable law or agreed to in writing, software
12** distributed under the License is distributed on an "AS IS" BASIS,
13** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14** See the License for the specific language governing permissions and
15** limitations under the License.
16******************************************************************************/
17
18#define _USE_MATH_DEFINES // for M_PI
19#include <algorithm>
20#include <cfloat> // for FLT_MAX
21#include <cmath> // for M_PI
22#include <cstdio>
23#include <cstring>
24
25#include "cluster.h"
26#include "clusttool.h"
27#include "featdefs.h"
28#include "intproto.h"
29#include "mergenf.h"
30#include "ocrfeatures.h"
31#include "oldlist.h"
32#include "params.h"
33#include "protos.h"
34
35using namespace tesseract;
36
37/*-------------------once in subfeat---------------------------------*/
38static double_VAR(training_angle_match_scale, 1.0, "Angle Match Scale ...");
39
40static double_VAR(training_similarity_midpoint, 0.0075, "Similarity Midpoint ...");
41
42static double_VAR(training_similarity_curl, 2.0, "Similarity Curl ...");
43
44/*-----------------------------once in
45 * fasttrain----------------------------------*/
46static double_VAR(training_tangent_bbox_pad, 0.5, "Tangent bounding box pad ...");
47
48static double_VAR(training_orthogonal_bbox_pad, 2.5, "Orthogonal bounding box pad ...");
49
50static double_VAR(training_angle_pad, 45.0, "Angle pad ...");
51
67 float WorstEvidence = WORST_EVIDENCE;
68 float Evidence;
69 float Angle, Length;
70
71 /* if p1 and p2 are not close in length, don't let them match */
72 Length = std::fabs(p1->Length - p2->Length);
73 if (Length > MAX_LENGTH_MISMATCH) {
74 return (0.0);
75 }
76
77 /* create a dummy pico-feature to be used for comparisons */
78 auto Feature = new FEATURE_STRUCT(&PicoFeatDesc);
79 Feature->Params[PicoFeatDir] = p1->Angle;
80
81 /* convert angle to radians */
82 Angle = p1->Angle * 2.0 * M_PI;
83
84 /* find distance from center of p1 to 1/2 picofeat from end */
85 Length = p1->Length / 2.0 - GetPicoFeatureLength() / 2.0;
86 if (Length < 0) {
87 Length = 0;
88 }
89
90 /* set the dummy pico-feature at one end of p1 and match it to p2 */
91 Feature->Params[PicoFeatX] = p1->X + std::cos(Angle) * Length;
92 Feature->Params[PicoFeatY] = p1->Y + std::sin(Angle) * Length;
93 if (DummyFastMatch(Feature, p2)) {
94 Evidence = SubfeatureEvidence(Feature, p2);
95 if (Evidence < WorstEvidence) {
96 WorstEvidence = Evidence;
97 }
98 } else {
99 delete Feature;
100 return 0.0;
101 }
102
103 /* set the dummy pico-feature at the other end of p1 and match it to p2 */
104 Feature->Params[PicoFeatX] = p1->X - std::cos(Angle) * Length;
105 Feature->Params[PicoFeatY] = p1->Y - std::sin(Angle) * Length;
106 if (DummyFastMatch(Feature, p2)) {
107 Evidence = SubfeatureEvidence(Feature, p2);
108 if (Evidence < WorstEvidence) {
109 WorstEvidence = Evidence;
110 }
111 } else {
112 delete Feature;
113 return 0.0;
114 }
115
116 delete Feature;
117 return (WorstEvidence);
118
119} /* CompareProtos */
120
130void ComputeMergedProto(PROTO_STRUCT *p1, PROTO_STRUCT *p2, float w1, float w2, PROTO_STRUCT *MergedProto) {
131 float TotalWeight;
132
133 TotalWeight = w1 + w2;
134 w1 /= TotalWeight;
135 w2 /= TotalWeight;
136
137 MergedProto->X = p1->X * w1 + p2->X * w2;
138 MergedProto->Y = p1->Y * w1 + p2->Y * w2;
139 MergedProto->Length = p1->Length * w1 + p2->Length * w2;
140 MergedProto->Angle = p1->Angle * w1 + p2->Angle * w2;
141 FillABC(MergedProto);
142} /* ComputeMergedProto */
143
158int FindClosestExistingProto(CLASS_TYPE Class, int NumMerged[], PROTOTYPE *Prototype) {
159 PROTO_STRUCT NewProto;
160 PROTO_STRUCT MergedProto;
161 int Pid;
162 PROTO_STRUCT *Proto;
163 int BestProto;
164 float BestMatch;
165 float Match, OldMatch, NewMatch;
166
167 MakeNewFromOld(&NewProto, Prototype);
168
169 BestProto = NO_PROTO;
170 BestMatch = WORST_MATCH_ALLOWED;
171 for (Pid = 0; Pid < Class->NumProtos; Pid++) {
172 Proto = ProtoIn(Class, Pid);
173 ComputeMergedProto(Proto, &NewProto, static_cast<float>(NumMerged[Pid]), 1.0, &MergedProto);
174 OldMatch = CompareProtos(Proto, &MergedProto);
175 NewMatch = CompareProtos(&NewProto, &MergedProto);
176 Match = std::min(OldMatch, NewMatch);
177 if (Match > BestMatch) {
178 BestProto = Pid;
179 BestMatch = Match;
180 }
181 }
182 return BestProto;
183} /* FindClosestExistingProto */
184
195 New->X = CenterX(Old->Mean);
196 New->Y = CenterY(Old->Mean);
197 New->Length = LengthOf(Old->Mean);
198 New->Angle = OrientationOf(Old->Mean);
199 FillABC(New);
200} /* MakeNewFromOld */
201
202/*-------------------once in subfeat---------------------------------*/
203
210 float Distance;
211 float Dangle;
212
213 Dangle = Proto->Angle - Feature->Params[PicoFeatDir];
214 if (Dangle < -0.5) {
215 Dangle += 1.0;
216 }
217 if (Dangle > 0.5) {
218 Dangle -= 1.0;
219 }
220 Dangle *= training_angle_match_scale;
221
222 Distance =
223 Proto->A * Feature->Params[PicoFeatX] + Proto->B * Feature->Params[PicoFeatY] + Proto->C;
224
225 return (EvidenceOf(Distance * Distance + Dangle * Dangle));
226}
227
236double EvidenceOf(double Similarity) {
237 Similarity /= training_similarity_midpoint;
238
239 if (training_similarity_curl == 3) {
240 Similarity = Similarity * Similarity * Similarity;
241 } else if (training_similarity_curl == 2) {
242 Similarity = Similarity * Similarity;
243 } else {
244 Similarity = pow(Similarity, training_similarity_curl);
245 }
246
247 return (1.0 / (1.0 + Similarity));
248}
249
263bool DummyFastMatch(FEATURE Feature, PROTO_STRUCT *Proto) {
264 FRECT BoundingBox;
265 float MaxAngleError;
266 float AngleError;
267
268 MaxAngleError = training_angle_pad / 360.0;
269 AngleError = std::fabs(Proto->Angle - Feature->Params[PicoFeatDir]);
270 if (AngleError > 0.5) {
271 AngleError = 1.0 - AngleError;
272 }
273
274 if (AngleError > MaxAngleError) {
275 return false;
276 }
277
278 ComputePaddedBoundingBox(Proto, training_tangent_bbox_pad * GetPicoFeatureLength(),
279 training_orthogonal_bbox_pad * GetPicoFeatureLength(), &BoundingBox);
280
281 return PointInside(&BoundingBox, Feature->Params[PicoFeatX], Feature->Params[PicoFeatY]);
282} /* DummyFastMatch */
283
295void ComputePaddedBoundingBox(PROTO_STRUCT *Proto, float TangentPad, float OrthogonalPad,
296 FRECT *BoundingBox) {
297 float Length = Proto->Length / 2.0 + TangentPad;
298 float Angle = Proto->Angle * 2.0 * M_PI;
299 float CosOfAngle = fabs(std::cos(Angle));
300 float SinOfAngle = fabs(std::sin(Angle));
301
302 float Pad = std::max(CosOfAngle * Length, SinOfAngle * OrthogonalPad);
303 BoundingBox->MinX = Proto->X - Pad;
304 BoundingBox->MaxX = Proto->X + Pad;
305
306 Pad = std::max(SinOfAngle * Length, CosOfAngle * OrthogonalPad);
307 BoundingBox->MinY = Proto->Y - Pad;
308 BoundingBox->MaxY = Proto->Y + Pad;
309
310} /* ComputePaddedBoundingBox */
311
319bool PointInside(FRECT *Rectangle, float X, float Y) {
320 return (X >= Rectangle->MinX) && (X <= Rectangle->MaxX) && (Y >= Rectangle->MinY) &&
321 (Y <= Rectangle->MaxY);
322} /* PointInside */
#define double_VAR(name, val, comment)
Definition: params.h:366
#define NO_PROTO
Definition: matchdefs.h:41
#define CenterX(M)
Definition: mergenf.h:48
#define MAX_LENGTH_MISMATCH
Definition: mergenf.h:31
#define CenterY(M)
Definition: mergenf.h:49
#define WORST_MATCH_ALLOWED
Definition: mergenf.h:29
#define LengthOf(M)
Definition: mergenf.h:50
#define WORST_EVIDENCE
Definition: mergenf.h:30
#define OrientationOf(M)
Definition: mergenf.h:51
bool PointInside(FRECT *Rectangle, float X, float Y)
Definition: mergenf.cpp:319
double EvidenceOf(double Similarity)
Definition: mergenf.cpp:236
bool DummyFastMatch(FEATURE Feature, PROTO_STRUCT *Proto)
Definition: mergenf.cpp:263
float SubfeatureEvidence(FEATURE Feature, PROTO_STRUCT *Proto)
Definition: mergenf.cpp:209
void ComputeMergedProto(PROTO_STRUCT *p1, PROTO_STRUCT *p2, float w1, float w2, PROTO_STRUCT *MergedProto)
Definition: mergenf.cpp:130
int FindClosestExistingProto(CLASS_TYPE Class, int NumMerged[], PROTOTYPE *Prototype)
Definition: mergenf.cpp:158
void ComputePaddedBoundingBox(PROTO_STRUCT *Proto, float TangentPad, float OrthogonalPad, FRECT *BoundingBox)
Definition: mergenf.cpp:295
float CompareProtos(PROTO_STRUCT *p1, PROTO_STRUCT *p2)
Definition: mergenf.cpp:66
void MakeNewFromOld(PROTO_STRUCT *New, PROTOTYPE *Old)
Definition: mergenf.cpp:194
#define ProtoIn(Class, Pid)
Definition: protos.h:70
#define GetPicoFeatureLength()
Definition: picofeat.h:56
@ PicoFeatDir
Definition: picofeat.h:43
@ PicoFeatX
Definition: picofeat.h:43
@ PicoFeatY
Definition: picofeat.h:43
TESS_API const FEATURE_DESC_STRUCT PicoFeatDesc
void FillABC(PROTO_STRUCT *Proto)
Definition: protos.cpp:103
std::vector< float > Mean
Definition: cluster.h:83
std::vector< float > Params
Definition: ocrfeatures.h:66
Definition: mergenf.h:41
float MaxY
Definition: mergenf.h:42
float MinX
Definition: mergenf.h:42
float MinY
Definition: mergenf.h:42
float MaxX
Definition: mergenf.h:42