tesseract v5.3.3.20231005
picofeat.cpp
Go to the documentation of this file.
1/******************************************************************************
2 ** Filename: picofeat.c
3 ** Purpose: Definition of pico-features.
4 ** Author: Dan Johnson
5 **
6 ** (c) Copyright Hewlett-Packard Company, 1988.
7 ** Licensed under the Apache License, Version 2.0 (the "License");
8 ** you may not use this file except in compliance with the License.
9 ** You may obtain a copy of the License at
10 ** http://www.apache.org/licenses/LICENSE-2.0
11 ** Unless required by applicable law or agreed to in writing, software
12 ** distributed under the License is distributed on an "AS IS" BASIS,
13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 ** See the License for the specific language governing permissions and
15 ** limitations under the License.
16 ******************************************************************************/
17
18#include "picofeat.h"
19
20#include "classify.h"
21#include "featdefs.h"
22#include "fpoint.h"
23#include "mfoutline.h"
24#include "ocrfeatures.h"
25#include "params.h"
26#include "trainingsample.h"
27
28#include <cmath>
29#include <cstdio>
30
31namespace tesseract {
32
33/*---------------------------------------------------------------------------
34 Variables
35----------------------------------------------------------------------------*/
36
37double_VAR(classify_pico_feature_length, 0.05, "Pico Feature Length");
38
39/*---------------------------------------------------------------------------
40 Private Function Prototypes
41----------------------------------------------------------------------------*/
42void ConvertSegmentToPicoFeat(FPOINT *Start, FPOINT *End, FEATURE_SET FeatureSet);
43
44void ConvertToPicoFeatures2(MFOUTLINE Outline, FEATURE_SET FeatureSet);
45
46void NormalizePicoX(FEATURE_SET FeatureSet);
47
48/*----------------------------------------------------------------------------
49 Public Code
50----------------------------------------------------------------------------*/
51/*---------------------------------------------------------------------------*/
61 auto FeatureSet = new FEATURE_SET_STRUCT(MAX_PICO_FEATURES);
62 auto Outlines = ConvertBlob(Blob);
63 float XScale, YScale;
64 NormalizeOutlines(Outlines, &XScale, &YScale);
65 auto RemainingOutlines = Outlines;
66 iterate(RemainingOutlines) {
67 auto Outline = static_cast<MFOUTLINE>(RemainingOutlines->first_node());
68 ConvertToPicoFeatures2(Outline, FeatureSet);
69 }
70 if (classify_norm_method == baseline) {
71 NormalizePicoX(FeatureSet);
72 }
73 FreeOutlines(Outlines);
74 return (FeatureSet);
75
76} /* ExtractPicoFeatures */
77
78/*----------------------------------------------------------------------------
79 Private Code
80----------------------------------------------------------------------------*/
81/*---------------------------------------------------------------------------*/
95void ConvertSegmentToPicoFeat(FPOINT *Start, FPOINT *End, FEATURE_SET FeatureSet) {
96 float Angle;
97 float Length;
98 int NumFeatures;
99 FPOINT Center;
100 FPOINT Delta;
101 int i;
102
103 Angle = NormalizedAngleFrom(Start, End, 1.0);
104 Length = DistanceBetween(*Start, *End);
105 NumFeatures = static_cast<int>(floor(Length / classify_pico_feature_length + 0.5));
106 if (NumFeatures < 1) {
107 NumFeatures = 1;
108 }
109
110 /* compute vector for one pico feature */
111 Delta.x = XDelta(*Start, *End) / NumFeatures;
112 Delta.y = YDelta(*Start, *End) / NumFeatures;
113
114 /* compute position of first pico feature */
115 Center.x = Start->x + Delta.x / 2.0;
116 Center.y = Start->y + Delta.y / 2.0;
117
118 /* compute each pico feature in segment and add to feature set */
119 for (i = 0; i < NumFeatures; i++) {
120 auto Feature = new FEATURE_STRUCT(&PicoFeatDesc);
121 Feature->Params[PicoFeatDir] = Angle;
122 Feature->Params[PicoFeatX] = Center.x;
123 Feature->Params[PicoFeatY] = Center.y;
124 AddFeature(FeatureSet, Feature);
125
126 Center.x += Delta.x;
127 Center.y += Delta.y;
128 }
129} /* ConvertSegmentToPicoFeat */
130
131/*---------------------------------------------------------------------------*/
145 MFOUTLINE Next;
146 MFOUTLINE First;
147 MFOUTLINE Current;
148
149 if (DegenerateOutline(Outline)) {
150 return;
151 }
152
153 First = Outline;
154 Current = First;
155 Next = NextPointAfter(Current);
156 do {
157 /* note that an edge is hidden if the ending point of the edge is
158 marked as hidden. This situation happens because the order of
159 the outlines is reversed when they are converted from the old
160 format. In the old format, a hidden edge is marked by the
161 starting point for that edge. */
162 if (!(PointAt(Next)->Hidden)) {
163 ConvertSegmentToPicoFeat(&(PointAt(Current)->Point), &(PointAt(Next)->Point), FeatureSet);
164 }
165
166 Current = Next;
167 Next = NextPointAfter(Current);
168 } while (Current != First);
169
170} /* ConvertToPicoFeatures2 */
171
172/*---------------------------------------------------------------------------*/
181void NormalizePicoX(FEATURE_SET FeatureSet) {
182 int i;
183 FEATURE Feature;
184 float Origin = 0.0;
185
186 for (i = 0; i < FeatureSet->NumFeatures; i++) {
187 Feature = FeatureSet->Features[i];
188 Origin += Feature->Params[PicoFeatX];
189 }
190 Origin /= FeatureSet->NumFeatures;
191
192 for (i = 0; i < FeatureSet->NumFeatures; i++) {
193 Feature = FeatureSet->Features[i];
194 Feature->Params[PicoFeatX] -= Origin;
195 }
196} /* NormalizePicoX */
197
198/*---------------------------------------------------------------------------*/
205 INT_FX_RESULT_STRUCT local_fx_info(fx_info);
206 std::vector<INT_FEATURE_STRUCT> bl_features;
208 tesseract::BlobToTrainingSample(blob, false, &local_fx_info, &bl_features);
209 if (sample == nullptr) {
210 return nullptr;
211 }
212
213 uint32_t num_features = sample->num_features();
214 const INT_FEATURE_STRUCT *features = sample->features();
215 auto feature_set = new FEATURE_SET_STRUCT(num_features);
216 for (uint32_t f = 0; f < num_features; ++f) {
217 auto feature = new FEATURE_STRUCT(&IntFeatDesc);
218 feature->Params[IntX] = features[f].X;
219 feature->Params[IntY] = features[f].Y;
220 feature->Params[IntDir] = features[f].Theta;
221 AddFeature(feature_set, feature);
222 }
223 delete sample;
224
225 return feature_set;
226} /* ExtractIntCNFeatures */
227
228/*---------------------------------------------------------------------------*/
235 const INT_FX_RESULT_STRUCT &fx_info) {
236 INT_FX_RESULT_STRUCT local_fx_info(fx_info);
237 std::vector<INT_FEATURE_STRUCT> bl_features;
239 tesseract::BlobToTrainingSample(blob, false, &local_fx_info, &bl_features);
240 if (sample == nullptr) {
241 return nullptr;
242 }
243
244 auto feature_set = new FEATURE_SET_STRUCT(1);
245 auto feature = new FEATURE_STRUCT(&IntFeatDesc);
246
247 feature->Params[GeoBottom] = sample->geo_feature(GeoBottom);
248 feature->Params[GeoTop] = sample->geo_feature(GeoTop);
249 feature->Params[GeoWidth] = sample->geo_feature(GeoWidth);
250 AddFeature(feature_set, feature);
251 delete sample;
252
253 return feature_set;
254} /* ExtractIntGeoFeatures */
255
256} // namespace tesseract.
#define double_VAR(name, val, comment)
Definition: params.h:366
#define iterate(l)
Definition: oldlist.h:91
#define XDelta(A, B)
Definition: fpoint.h:38
#define YDelta(A, B)
Definition: fpoint.h:39
float DistanceBetween(FPOINT A, FPOINT B)
Definition: fpoint.cpp:29
float NormalizedAngleFrom(FPOINT *Point1, FPOINT *Point2, float FullScale)
Definition: fpoint.cpp:44
#define MAX_PICO_FEATURES
Definition: picofeat.h:45
void FreeOutlines(LIST Outlines)
Definition: mfoutline.cpp:151
void ConvertSegmentToPicoFeat(FPOINT *Start, FPOINT *End, FEATURE_SET FeatureSet)
Definition: picofeat.cpp:95
@ PicoFeatDir
Definition: picofeat.h:43
@ PicoFeatX
Definition: picofeat.h:43
@ PicoFeatY
Definition: picofeat.h:43
TrainingSample * BlobToTrainingSample(const TBLOB &blob, bool nonlinear_norm, INT_FX_RESULT_STRUCT *fx_info, std::vector< INT_FEATURE_STRUCT > *bl_features)
Definition: intfx.cpp:79
TESS_API const FEATURE_DESC_STRUCT PicoFeatDesc
@ baseline
Definition: mfoutline.h:53
LIST ConvertBlob(TBLOB *blob)
Definition: mfoutline.cpp:34
const FEATURE_DESC_STRUCT IntFeatDesc
@ GeoTop
Definition: picofeat.h:37
@ GeoWidth
Definition: picofeat.h:38
@ GeoBottom
Definition: picofeat.h:36
bool AddFeature(FEATURE_SET FeatureSet, FEATURE Feature)
Definition: ocrfeatures.cpp:40
@ IntDir
Definition: picofeat.h:31
void ConvertToPicoFeatures2(MFOUTLINE Outline, FEATURE_SET FeatureSet)
Definition: picofeat.cpp:144
void NormalizePicoX(FEATURE_SET FeatureSet)
Definition: picofeat.cpp:181
double classify_pico_feature_length
Definition: picofeat.cpp:37
FEATURE_SET ExtractIntCNFeatures(const TBLOB &blob, const INT_FX_RESULT_STRUCT &fx_info)
Definition: picofeat.cpp:204
FEATURE_SET ExtractPicoFeatures(TBLOB *Blob)
Definition: picofeat.cpp:60
void NormalizeOutlines(LIST Outlines, float *XScale, float *YScale)
Definition: mfoutline.cpp:249
FEATURE_SET ExtractIntGeoFeatures(const TBLOB &blob, const INT_FX_RESULT_STRUCT &fx_info)
Definition: picofeat.cpp:234
Definition: fpoint.h:29
float y
Definition: fpoint.h:30
float x
Definition: fpoint.h:30
std::vector< float > Params
Definition: ocrfeatures.h:66
std::vector< FEATURE_STRUCT * > Features
Definition: ocrfeatures.h:85
const INT_FEATURE_STRUCT * features() const
uint32_t num_features() const
int geo_feature(int index) const