tesseract v5.3.3.20231005
adaptive.cpp
Go to the documentation of this file.
1/******************************************************************************
2 ** Filename: adaptive.c
3 ** Purpose: Adaptive matcher.
4 ** Author: Dan Johnson
5 **
6 ** (c) Copyright Hewlett-Packard Company, 1988.
7 ** Licensed under the Apache License, Version 2.0 (the "License");
8 ** you may not use this file except in compliance with the License.
9 ** You may obtain a copy of the License at
10 ** http://www.apache.org/licenses/LICENSE-2.0
11 ** Unless required by applicable law or agreed to in writing, software
12 ** distributed under the License is distributed on an "AS IS" BASIS,
13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 ** See the License for the specific language governing permissions and
15 ** limitations under the License.
16 ******************************************************************************/
17
18#include "adaptive.h"
19
20#include "classify.h"
21
22#include <cassert>
23#include <cstdio>
24
25namespace tesseract {
26
27/*----------------------------------------------------------------------------
28 Public Code
29----------------------------------------------------------------------------*/
30/*---------------------------------------------------------------------------*/
42 assert(Templates != nullptr);
43 assert(Class != nullptr);
44 assert(LegalClassId(ClassId));
45 assert(UnusedClassIdIn(Templates->Templates, ClassId));
46 assert(Class->NumPermConfigs == 0);
47
48 auto IntClass = new INT_CLASS_STRUCT(1, 1);
49 AddIntClass(Templates->Templates, ClassId, IntClass);
50
51 assert(Templates->Class[ClassId] == nullptr);
52 Templates->Class[ClassId] = Class;
53
54} /* AddAdaptedClass */
55
56/*---------------------------------------------------------------------------*/
57
59 delete[] Ambigs;
60}
61
66
67 PermProtos = NewBitVector(MAX_NUM_PROTOS);
68 PermConfigs = NewBitVector(MAX_NUM_CONFIGS);
69 zero_all_bits(PermProtos, WordsInVectorOfSize(MAX_NUM_PROTOS));
70 zero_all_bits(PermConfigs, WordsInVectorOfSize(MAX_NUM_CONFIGS));
71
72 for (int i = 0; i < MAX_NUM_CONFIGS; i++) {
73 TempConfigFor(this, i) = nullptr;
74 }
75}
76
78 for (int i = 0; i < MAX_NUM_CONFIGS; i++) {
79 if (ConfigIsPermanent(this, i) && PermConfigFor(this, i) != nullptr) {
80 delete PermConfigFor(this, i);
81 } else if (!ConfigIsPermanent(this, i) && TempConfigFor(this, i) != nullptr) {
82 delete TempConfigFor(this, i);
83 }
84 }
85 FreeBitVector(PermProtos);
86 FreeBitVector(PermConfigs);
87 auto list = TempProtos;
88 while (list != nullptr) {
89 delete reinterpret_cast<TEMP_PROTO_STRUCT *>(list->node);
90 list = pop(list);
91 }
92}
93
100
101 /* Insert an empty class for each unichar id in unicharset */
102 for (unsigned i = 0; i < MAX_NUM_CLASSES; i++) {
103 Class[i] = nullptr;
104 if (i < unicharset.size()) {
106 }
107 }
108}
109
111 for (unsigned i = 0; i < (Templates)->NumClasses; i++) {
112 delete Class[i];
113 }
114 delete Templates;
115}
116
117// Returns FontinfoId of the given config of the given adapted class.
118int Classify::GetFontinfoId(ADAPT_CLASS_STRUCT *Class, uint8_t ConfigId) {
119 return (ConfigIsPermanent(Class, ConfigId) ? PermConfigFor(Class, ConfigId)->FontinfoId
120 : TempConfigFor(Class, ConfigId)->FontinfoId);
121}
122
127TEMP_CONFIG_STRUCT::TEMP_CONFIG_STRUCT(int maxProtoId, int fontinfoId) {
128 int NumProtos = maxProtoId + 1;
129
130 Protos = NewBitVector(NumProtos);
131
132 NumTimesSeen = 1;
133 MaxProtoId = maxProtoId;
134 ProtoVectorSize = WordsInVectorOfSize(NumProtos);
135 zero_all_bits(Protos, ProtoVectorSize);
136 FontinfoId = fontinfoId;
137}
138
140 FreeBitVector(Protos);
141}
142
143/*---------------------------------------------------------------------------*/
154 INT_CLASS_STRUCT *IClass;
155 ADAPT_CLASS_STRUCT *AClass;
156
157 fprintf(File, "\n\nSUMMARY OF ADAPTED TEMPLATES:\n\n");
158 fprintf(File, "Num classes = %d; Num permanent classes = %d\n\n", Templates->NumNonEmptyClasses,
159 Templates->NumPermClasses);
160 fprintf(File, " Id NC NPC NP NPP\n");
161 fprintf(File, "------------------------\n");
162
163 for (unsigned i = 0; i < (Templates->Templates)->NumClasses; i++) {
164 IClass = Templates->Templates->Class[i];
165 AClass = Templates->Class[i];
166 if (!IsEmptyAdaptedClass(AClass)) {
167 fprintf(File, "%5u %s %3d %3d %3d %3zd\n", i, unicharset.id_to_unichar(i), IClass->NumConfigs,
168 AClass->NumPermConfigs, IClass->NumProtos,
169 IClass->NumProtos - AClass->TempProtos->size());
170 }
171 }
172 fprintf(File, "\n");
173
174} /* PrintAdaptedTemplates */
175
176/*---------------------------------------------------------------------------*/
187 int NumTempProtos;
188 int NumConfigs;
189 int i;
190 ADAPT_CLASS_STRUCT *Class;
191
192 /* first read high level adapted class structure */
193 Class = new ADAPT_CLASS_STRUCT;
194 fp->FRead(Class, sizeof(ADAPT_CLASS_STRUCT), 1);
195
196 /* then read in the definitions of the permanent protos and configs */
197 Class->PermProtos = NewBitVector(MAX_NUM_PROTOS);
198 Class->PermConfigs = NewBitVector(MAX_NUM_CONFIGS);
199 fp->FRead(Class->PermProtos, sizeof(uint32_t), WordsInVectorOfSize(MAX_NUM_PROTOS));
200 fp->FRead(Class->PermConfigs, sizeof(uint32_t), WordsInVectorOfSize(MAX_NUM_CONFIGS));
201
202 /* then read in the list of temporary protos */
203 fp->FRead(&NumTempProtos, sizeof(int), 1);
204 Class->TempProtos = NIL_LIST;
205 for (i = 0; i < NumTempProtos; i++) {
206 auto TempProto = new TEMP_PROTO_STRUCT;
207 fp->FRead(TempProto, sizeof(TEMP_PROTO_STRUCT), 1);
208 Class->TempProtos = push_last(Class->TempProtos, TempProto);
209 }
210
211 /* then read in the adapted configs */
212 fp->FRead(&NumConfigs, sizeof(int), 1);
213 for (i = 0; i < NumConfigs; i++) {
214 if (test_bit(Class->PermConfigs, i)) {
215 Class->Config[i].Perm = ReadPermConfig(fp);
216 } else {
217 Class->Config[i].Temp = ReadTempConfig(fp);
218 }
219 }
220
221 return (Class);
222
223} /* ReadAdaptedClass */
224
225/*---------------------------------------------------------------------------*/
236 auto Templates = new ADAPT_TEMPLATES_STRUCT;
237
238 /* first read the high level adaptive template struct */
239 fp->FRead(Templates, sizeof(ADAPT_TEMPLATES_STRUCT), 1);
240
241 /* then read in the basic integer templates */
242 Templates->Templates = ReadIntTemplates(fp);
243
244 /* then read in the adaptive info for each class */
245 for (unsigned i = 0; i < (Templates->Templates)->NumClasses; i++) {
246 Templates->Class[i] = ReadAdaptedClass(fp);
247 }
248 return (Templates);
249
250} /* ReadAdaptedTemplates */
251
252/*---------------------------------------------------------------------------*/
263 auto Config = new PERM_CONFIG_STRUCT;
264 uint8_t NumAmbigs;
265 fp->FRead(&NumAmbigs, sizeof(NumAmbigs), 1);
266 Config->Ambigs = new UNICHAR_ID[NumAmbigs + 1];
267 fp->FRead(Config->Ambigs, sizeof(UNICHAR_ID), NumAmbigs);
268 Config->Ambigs[NumAmbigs] = -1;
269 fp->FRead(&(Config->FontinfoId), sizeof(int), 1);
270
271 return (Config);
272
273} /* ReadPermConfig */
274
275/*---------------------------------------------------------------------------*/
286 auto Config = new TEMP_CONFIG_STRUCT;
287 fp->FRead(Config, sizeof(TEMP_CONFIG_STRUCT), 1);
288
289 Config->Protos = NewBitVector(Config->ProtoVectorSize * BITSINLONG);
290 fp->FRead(Config->Protos, sizeof(uint32_t), Config->ProtoVectorSize);
291
292 return (Config);
293
294} /* ReadTempConfig */
295
296/*---------------------------------------------------------------------------*/
307void WriteAdaptedClass(FILE *File, ADAPT_CLASS_STRUCT *Class, int NumConfigs) {
308 /* first write high level adapted class structure */
309 fwrite(Class, sizeof(ADAPT_CLASS_STRUCT), 1, File);
310
311 /* then write out the definitions of the permanent protos and configs */
312 fwrite(Class->PermProtos, sizeof(uint32_t), WordsInVectorOfSize(MAX_NUM_PROTOS), File);
313 fwrite(Class->PermConfigs, sizeof(uint32_t), WordsInVectorOfSize(MAX_NUM_CONFIGS), File);
314
315 /* then write out the list of temporary protos */
316 uint32_t NumTempProtos = Class->TempProtos->size();
317 fwrite(&NumTempProtos, sizeof(NumTempProtos), 1, File);
318 auto TempProtos = Class->TempProtos;
319 iterate(TempProtos) {
320 void *proto = TempProtos->node;
321 fwrite(proto, sizeof(TEMP_PROTO_STRUCT), 1, File);
322 }
323
324 /* then write out the adapted configs */
325 fwrite(&NumConfigs, sizeof(int), 1, File);
326 for (int i = 0; i < NumConfigs; i++) {
327 if (test_bit(Class->PermConfigs, i)) {
328 WritePermConfig(File, Class->Config[i].Perm);
329 } else {
330 WriteTempConfig(File, Class->Config[i].Temp);
331 }
332 }
333
334} /* WriteAdaptedClass */
335
336/*---------------------------------------------------------------------------*/
346 /* first write the high level adaptive template struct */
347 fwrite(Templates, sizeof(ADAPT_TEMPLATES_STRUCT), 1, File);
348
349 /* then write out the basic integer templates */
351
352 /* then write out the adaptive info for each class */
353 for (unsigned i = 0; i < (Templates->Templates)->NumClasses; i++) {
354 WriteAdaptedClass(File, Templates->Class[i], Templates->Templates->Class[i]->NumConfigs);
355 }
356} /* WriteAdaptedTemplates */
357
358/*---------------------------------------------------------------------------*/
369 uint8_t NumAmbigs = 0;
370
371 assert(Config != nullptr);
372 while (Config->Ambigs[NumAmbigs] > 0) {
373 ++NumAmbigs;
374 }
375
376 fwrite(&NumAmbigs, sizeof(uint8_t), 1, File);
377 fwrite(Config->Ambigs, sizeof(UNICHAR_ID), NumAmbigs, File);
378 fwrite(&(Config->FontinfoId), sizeof(int), 1, File);
379} /* WritePermConfig */
380
381/*---------------------------------------------------------------------------*/
392 assert(Config != nullptr);
393
394 fwrite(Config, sizeof(TEMP_CONFIG_STRUCT), 1, File);
395 fwrite(Config->Protos, sizeof(uint32_t), Config->ProtoVectorSize, File);
396
397} /* WriteTempConfig */
398
399} // namespace tesseract
#define MAX_NUM_CLASSES
Definition: matchdefs.h:31
#define test_bit(array, bit)
Definition: bitvec.h:59
const size_t BITSINLONG
Definition: bitvec.h:31
#define iterate(l)
Definition: oldlist.h:91
#define NIL_LIST
Definition: oldlist.h:75
#define UnusedClassIdIn(T, c)
Definition: intproto.h:155
#define MAX_NUM_PROTOS
Definition: intproto.h:48
#define MAX_NUM_CONFIGS
Definition: intproto.h:47
#define LegalClassId(c)
Definition: intproto.h:154
#define IsEmptyAdaptedClass(Class)
Definition: adaptive.h:83
#define ConfigIsPermanent(Class, ConfigId)
Definition: adaptive.h:85
#define PermConfigFor(Class, ConfigId)
Definition: adaptive.h:93
#define TempConfigFor(Class, ConfigId)
Definition: adaptive.h:91
void AddIntClass(INT_TEMPLATES_STRUCT *Templates, CLASS_ID ClassId, INT_CLASS_STRUCT *Class)
Definition: intproto.cpp:220
void WriteAdaptedClass(FILE *File, ADAPT_CLASS_STRUCT *Class, int NumConfigs)
Definition: adaptive.cpp:307
ADAPT_CLASS_STRUCT * ReadAdaptedClass(TFile *fp)
Definition: adaptive.cpp:186
void WritePermConfig(FILE *File, PERM_CONFIG_STRUCT *Config)
Definition: adaptive.cpp:368
PERM_CONFIG_STRUCT * ReadPermConfig(TFile *fp)
Definition: adaptive.cpp:262
CLUSTERCONFIG Config
int UNICHAR_ID
Definition: unichar.h:34
void AddAdaptedClass(ADAPT_TEMPLATES_STRUCT *Templates, ADAPT_CLASS_STRUCT *Class, CLASS_ID ClassId)
Definition: adaptive.cpp:41
TEMP_CONFIG_STRUCT * ReadTempConfig(TFile *fp)
Definition: adaptive.cpp:285
LIST push_last(LIST list, void *item)
Definition: oldlist.cpp:192
LIST pop(LIST list)
Definition: oldlist.cpp:166
void WriteTempConfig(FILE *File, TEMP_CONFIG_STRUCT *Config)
Definition: adaptive.cpp:391
UNICHAR_ID CLASS_ID
Definition: matchdefs.h:34
UNICHARSET unicharset
Definition: ccutil.h:61
size_t FRead(void *buffer, size_t size, size_t count)
Definition: serialis.cpp:221
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:279
size_t size() const
Definition: unicharset.h:355
PERM_CONFIG_STRUCT * Perm
Definition: adaptive.h:52
TEMP_CONFIG_STRUCT * Temp
Definition: adaptive.h:51
ADAPTED_CONFIG Config[MAX_NUM_CONFIGS]
Definition: adaptive.h:64
ADAPT_CLASS_STRUCT * Class[MAX_NUM_CLASSES]
Definition: adaptive.h:75
INT_TEMPLATES_STRUCT * Templates
Definition: adaptive.h:72
void WriteIntTemplates(FILE *File, INT_TEMPLATES_STRUCT *Templates, const UNICHARSET &target_unicharset)
Definition: intproto.cpp:919
void WriteAdaptedTemplates(FILE *File, ADAPT_TEMPLATES_STRUCT *Templates)
Definition: adaptive.cpp:345
INT_TEMPLATES_STRUCT * ReadIntTemplates(TFile *fp)
Definition: intproto.cpp:629
ADAPT_TEMPLATES_STRUCT * ReadAdaptedTemplates(TFile *File)
Definition: adaptive.cpp:235
int GetFontinfoId(ADAPT_CLASS_STRUCT *Class, uint8_t ConfigId)
Definition: adaptive.cpp:118
void PrintAdaptedTemplates(FILE *File, ADAPT_TEMPLATES_STRUCT *Templates)
Definition: adaptive.cpp:153
INT_CLASS_STRUCT * Class[MAX_NUM_CLASSES]
Definition: intproto.h:111