tesseract  4.00.00dev
commontraining.h File Reference
#include "cluster.h"
#include "commandlineflags.h"
#include "featdefs.h"
#include "intproto.h"
#include "oldlist.h"

Go to the source code of this file.

Classes

struct  LABELEDLISTNODE
 
struct  MERGE_CLASS_NODE
 

Namespaces

 tesseract
 

Typedefs

typedef struct LABELEDLISTNODELABELEDLIST
 
typedef MERGE_CLASS_NODEMERGE_CLASS
 

Functions

void ParseArguments (int *argc, char ***argv)
 
ShapeTabletesseract::LoadShapeTable (const STRING &file_prefix)
 
void tesseract::WriteShapeTable (const STRING &file_prefix, const ShapeTable &shape_table)
 
MasterTrainer * tesseract::LoadTrainingData (int argc, const char *const *argv, bool replication, ShapeTable **shape_table, STRING *file_prefix)
 
const char * GetNextFilename (int argc, const char *const *argv)
 
LABELEDLIST FindList (LIST List, char *Label)
 
LABELEDLIST NewLabeledList (const char *Label)
 
void ReadTrainingSamples (const FEATURE_DEFS_STRUCT &feature_defs, const char *feature_name, int max_samples, UNICHARSET *unicharset, FILE *file, LIST *training_samples)
 
void WriteTrainingSamples (const FEATURE_DEFS_STRUCT &FeatureDefs, char *Directory, LIST CharList, const char *program_feature_type)
 
void FreeTrainingSamples (LIST CharList)
 
void FreeLabeledList (LABELEDLIST LabeledList)
 
void FreeLabeledClassList (LIST ClassListList)
 
CLUSTERERSetUpForClustering (const FEATURE_DEFS_STRUCT &FeatureDefs, LABELEDLIST CharSample, const char *program_feature_type)
 
LIST RemoveInsignificantProtos (LIST ProtoList, BOOL8 KeepSigProtos, BOOL8 KeepInsigProtos, int N)
 
void CleanUpUnusedData (LIST ProtoList)
 
void MergeInsignificantProtos (LIST ProtoList, const char *label, CLUSTERER *Clusterer, CLUSTERCONFIG *Config)
 
MERGE_CLASS FindClass (LIST List, const char *Label)
 
MERGE_CLASS NewLabeledClass (const char *Label)
 
CLASS_STRUCTSetUpForFloat2Int (const UNICHARSET &unicharset, LIST LabeledClassList)
 
void Normalize (float *Values)
 
void FreeNormProtoList (LIST CharList)
 
void AddToNormProtosList (LIST *NormProtoList, LIST ProtoList, char *CharName)
 
int NumberOfProtos (LIST ProtoList, BOOL8 CountSigProtos, BOOL8 CountInsigProtos)
 
void allocNormProtos ()
 

Variables

FEATURE_DEFS_STRUCT feature_defs
 
CLUSTERCONFIG Config
 

Typedef Documentation

◆ LABELEDLIST

typedef struct LABELEDLISTNODE * LABELEDLIST

◆ MERGE_CLASS

Definition at line 56 of file commontraining.h.

Function Documentation

◆ AddToNormProtosList()

void AddToNormProtosList ( LIST NormProtoList,
LIST  ProtoList,
char *  CharName 
)

Definition at line 805 of file commontraining.cpp.

809 {
810  PROTOTYPE* Proto;
811  LABELEDLIST LabeledProtoList;
812 
813  LabeledProtoList = NewLabeledList(CharName);
814  iterate(ProtoList)
815  {
816  Proto = (PROTOTYPE *) first_node (ProtoList);
817  LabeledProtoList->List = push(LabeledProtoList->List, Proto);
818  }
819  *NormProtoList = push(*NormProtoList, LabeledProtoList);
820 }
LABELEDLIST NewLabeledList(const char *Label)
LIST push(LIST list, void *element)
Definition: oldlist.cpp:288
#define first_node(l)
Definition: oldlist.h:139
#define iterate(l)
Definition: oldlist.h:159

◆ allocNormProtos()

void allocNormProtos ( )

◆ CleanUpUnusedData()

void CleanUpUnusedData ( LIST  ProtoList)

Definition at line 575 of file commontraining.cpp.

577 {
578  PROTOTYPE* Prototype;
579 
580  iterate(ProtoList)
581  {
582  Prototype = (PROTOTYPE *) first_node (ProtoList);
583  free(Prototype->Variance.Elliptical);
584  Prototype->Variance.Elliptical = nullptr;
585  free(Prototype->Magnitude.Elliptical);
586  Prototype->Magnitude.Elliptical = nullptr;
587  free(Prototype->Weight.Elliptical);
588  Prototype->Weight.Elliptical = nullptr;
589  }
590 }
FLOAT32 * Elliptical
Definition: cluster.h:64
FLOATUNION Magnitude
Definition: cluster.h:82
#define first_node(l)
Definition: oldlist.h:139
#define iterate(l)
Definition: oldlist.h:159
FLOATUNION Variance
Definition: cluster.h:81
FLOATUNION Weight
Definition: cluster.h:83

◆ FindClass()

MERGE_CLASS FindClass ( LIST  List,
const char *  Label 
)

Definition at line 658 of file commontraining.cpp.

658  {
659  MERGE_CLASS MergeClass;
660 
661  iterate (List)
662  {
663  MergeClass = (MERGE_CLASS) first_node (List);
664  if (strcmp (MergeClass->Label, Label) == 0)
665  return (MergeClass);
666  }
667  return (nullptr);
668 
669 } /* FindClass */
MERGE_CLASS_NODE * MERGE_CLASS
#define first_node(l)
Definition: oldlist.h:139
#define iterate(l)
Definition: oldlist.h:159

◆ FindList()

LABELEDLIST FindList ( LIST  List,
char *  Label 
)

This routine searches through a list of labeled lists to find a list with the specified label. If a matching labeled list cannot be found, nullptr is returned.

Parameters
Listlist to search
Labellabel to search for
Returns
Labeled list with the specified label or nullptr.
Note
Globals: none
Exceptions: none
History: Fri Aug 18 15:57:41 1989, DSJ, Created.

Definition at line 305 of file commontraining.cpp.

305  {
306  LABELEDLIST LabeledList;
307 
308  iterate (List)
309  {
310  LabeledList = (LABELEDLIST) first_node (List);
311  if (strcmp (LabeledList->Label, Label) == 0)
312  return (LabeledList);
313  }
314  return (nullptr);
315 
316 } /* FindList */
struct LABELEDLISTNODE * LABELEDLIST
#define first_node(l)
Definition: oldlist.h:139
#define iterate(l)
Definition: oldlist.h:159

◆ FreeLabeledClassList()

void FreeLabeledClassList ( LIST  ClassList)

This routine deallocates all of the space allocated to the specified list of training samples.

Parameters
ClassListlist of all fonts in document
Returns
none
Note
Globals: none
Exceptions: none
History: Fri Aug 18 17:44:27 1989, DSJ, Created.

Definition at line 693 of file commontraining.cpp.

693  {
694  MERGE_CLASS MergeClass;
695 
696  LIST nodes = ClassList;
697  iterate(ClassList) /* iterate through all of the fonts */
698  {
699  MergeClass = (MERGE_CLASS) first_node (ClassList);
700  free (MergeClass->Label);
701  FreeClass(MergeClass->Class);
702  delete MergeClass;
703  }
704  destroy(nodes);
705 
706 } /* FreeLabeledClassList */
LIST destroy(LIST list)
Definition: oldlist.cpp:175
CLASS_TYPE Class
void FreeClass(CLASS_TYPE Class)
Definition: protos.cpp:214
MERGE_CLASS_NODE * MERGE_CLASS
#define first_node(l)
Definition: oldlist.h:139
#define iterate(l)
Definition: oldlist.h:159

◆ FreeLabeledList()

void FreeLabeledList ( LABELEDLIST  LabeledList)

This routine deallocates all of the memory consumed by a labeled list. It does not free any memory which may be consumed by the items in the list.

Parameters
LabeledListlabeled list to be freed
Note
Globals: none
Returns
none
Note
Exceptions: none
History: Fri Aug 18 17:52:45 1989, DSJ, Created.

Definition at line 456 of file commontraining.cpp.

456  {
457  destroy(LabeledList->List);
458  free(LabeledList->Label);
459  free(LabeledList);
460 } /* FreeLabeledList */
LIST destroy(LIST list)
Definition: oldlist.cpp:175

◆ FreeNormProtoList()

void FreeNormProtoList ( LIST  CharList)

Definition at line 789 of file commontraining.cpp.

791 {
792  LABELEDLIST char_sample;
793 
794  LIST nodes = CharList;
795  iterate(CharList) /* iterate through all of the fonts */
796  {
797  char_sample = (LABELEDLIST) first_node (CharList);
798  FreeLabeledList (char_sample);
799  }
800  destroy(nodes);
801 
802 } // FreeNormProtoList
struct LABELEDLISTNODE * LABELEDLIST
void FreeLabeledList(LABELEDLIST LabeledList)
LIST destroy(LIST list)
Definition: oldlist.cpp:175
#define first_node(l)
Definition: oldlist.h:139
#define iterate(l)
Definition: oldlist.h:159

◆ FreeTrainingSamples()

void FreeTrainingSamples ( LIST  CharList)

This routine deallocates all of the space allocated to the specified list of training samples.

Parameters
CharListlist of all fonts in document
Returns
none
Note
Globals: none
Exceptions: none
History: Fri Aug 18 17:44:27 1989, DSJ, Created.

Definition at line 427 of file commontraining.cpp.

427  {
428  LABELEDLIST char_sample;
429  FEATURE_SET FeatureSet;
430  LIST FeatureList;
431 
432  LIST nodes = CharList;
433  iterate(CharList) { /* iterate through all of the fonts */
434  char_sample = (LABELEDLIST) first_node(CharList);
435  FeatureList = char_sample->List;
436  iterate(FeatureList) { /* iterate through all of the classes */
437  FeatureSet = (FEATURE_SET) first_node(FeatureList);
438  FreeFeatureSet(FeatureSet);
439  }
440  FreeLabeledList(char_sample);
441  }
442  destroy(nodes);
443 } /* FreeTrainingSamples */
struct LABELEDLISTNODE * LABELEDLIST
void FreeLabeledList(LABELEDLIST LabeledList)
LIST destroy(LIST list)
Definition: oldlist.cpp:175
FEATURE_SET_STRUCT * FEATURE_SET
Definition: ocrfeatures.h:74
void FreeFeatureSet(FEATURE_SET FeatureSet)
Definition: ocrfeatures.cpp:69
#define first_node(l)
Definition: oldlist.h:139
#define iterate(l)
Definition: oldlist.h:159

◆ GetNextFilename()

const char* GetNextFilename ( int  argc,
const char *const *  argv 
)

This routine returns the next command line argument. If there are no remaining command line arguments, it returns nullptr. This routine should only be called after all option arguments have been parsed and removed with ParseArguments.

Globals:

  • tessoptind defined by tessopt sys call
    Returns
    Next command line argument or nullptr.
    Note
    Exceptions: none
    History: Fri Aug 18 09:34:12 1989, DSJ, Created.

Definition at line 286 of file commontraining.cpp.

286  {
287  if (tessoptind < argc)
288  return argv[tessoptind++];
289  else
290  return nullptr;
291 } /* GetNextFilename */
int tessoptind
Definition: tessopt.cpp:24

◆ MergeInsignificantProtos()

void MergeInsignificantProtos ( LIST  ProtoList,
const char *  label,
CLUSTERER Clusterer,
CLUSTERCONFIG Config 
)

Definition at line 510 of file commontraining.cpp.

511  {
512  PROTOTYPE* Prototype;
513  bool debug = strcmp(FLAGS_test_ch.c_str(), label) == 0;
514 
515  LIST pProtoList = ProtoList;
516  iterate(pProtoList) {
517  Prototype = (PROTOTYPE *) first_node (pProtoList);
518  if (Prototype->Significant || Prototype->Merged)
519  continue;
520  FLOAT32 best_dist = 0.125;
521  PROTOTYPE* best_match = nullptr;
522  // Find the nearest alive prototype.
523  LIST list_it = ProtoList;
524  iterate(list_it) {
525  PROTOTYPE* test_p = (PROTOTYPE *) first_node (list_it);
526  if (test_p != Prototype && !test_p->Merged) {
527  FLOAT32 dist = ComputeDistance(Clusterer->SampleSize,
528  Clusterer->ParamDesc,
529  Prototype->Mean, test_p->Mean);
530  if (dist < best_dist) {
531  best_match = test_p;
532  best_dist = dist;
533  }
534  }
535  }
536  if (best_match != nullptr && !best_match->Significant) {
537  if (debug)
538  tprintf("Merging red clusters (%d+%d) at %g,%g and %g,%g\n",
539  best_match->NumSamples, Prototype->NumSamples,
540  best_match->Mean[0], best_match->Mean[1],
541  Prototype->Mean[0], Prototype->Mean[1]);
542  best_match->NumSamples = MergeClusters(Clusterer->SampleSize,
543  Clusterer->ParamDesc,
544  best_match->NumSamples,
545  Prototype->NumSamples,
546  best_match->Mean,
547  best_match->Mean, Prototype->Mean);
548  Prototype->NumSamples = 0;
549  Prototype->Merged = 1;
550  } else if (best_match != nullptr) {
551  if (debug)
552  tprintf("Red proto at %g,%g matched a green one at %g,%g\n",
553  Prototype->Mean[0], Prototype->Mean[1],
554  best_match->Mean[0], best_match->Mean[1]);
555  Prototype->Merged = 1;
556  }
557  }
558  // Mark significant those that now have enough samples.
559  int min_samples = (inT32) (Config->MinSamples * Clusterer->NumChar);
560  pProtoList = ProtoList;
561  iterate(pProtoList) {
562  Prototype = (PROTOTYPE *) first_node (pProtoList);
563  // Process insignificant protos that do not match a green one
564  if (!Prototype->Significant && Prototype->NumSamples >= min_samples &&
565  !Prototype->Merged) {
566  if (debug)
567  tprintf("Red proto at %g,%g becoming green\n",
568  Prototype->Mean[0], Prototype->Mean[1]);
569  Prototype->Significant = true;
570  }
571  }
572 } /* MergeInsignificantProtos */
inT32 NumChar
Definition: cluster.h:93
FLOAT32 * Mean
Definition: cluster.h:78
inT32 MergeClusters(inT16 N, register PARAM_DESC ParamDesc[], register inT32 n1, register inT32 n2, register FLOAT32 m[], register FLOAT32 m1[], register FLOAT32 m2[])
unsigned NumSamples
Definition: cluster.h:75
#define tprintf(...)
Definition: tprintf.h:31
inT16 SampleSize
Definition: cluster.h:87
int32_t inT32
Definition: host.h:38
#define first_node(l)
Definition: oldlist.h:139
FLOAT32 ComputeDistance(int k, PARAM_DESC *dim, FLOAT32 p1[], FLOAT32 p2[])
Definition: kdtree.cpp:467
#define iterate(l)
Definition: oldlist.h:159
float FLOAT32
Definition: host.h:42
PARAM_DESC * ParamDesc
Definition: cluster.h:88
FLOAT32 MinSamples
Definition: cluster.h:50
unsigned Merged
Definition: cluster.h:69
unsigned Significant
Definition: cluster.h:68

◆ NewLabeledClass()

MERGE_CLASS NewLabeledClass ( const char *  Label)

Definition at line 672 of file commontraining.cpp.

672  {
673  MERGE_CLASS MergeClass;
674 
675  MergeClass = new MERGE_CLASS_NODE;
676  MergeClass->Label = (char*)Emalloc (strlen (Label)+1);
677  strcpy (MergeClass->Label, Label);
678  MergeClass->Class = NewClass (MAX_NUM_PROTOS, MAX_NUM_CONFIGS);
679  return (MergeClass);
680 
681 } /* NewLabeledClass */
#define MAX_NUM_CONFIGS
Definition: intproto.h:46
CLASS_TYPE Class
#define MAX_NUM_PROTOS
Definition: intproto.h:47
void * Emalloc(int Size)
Definition: emalloc.cpp:47
CLASS_TYPE NewClass(int NumProtos, int NumConfigs)
Definition: protos.cpp:246

◆ NewLabeledList()

LABELEDLIST NewLabeledList ( const char *  Label)

This routine allocates a new, empty labeled list and gives it the specified label.

Parameters
Labellabel for new list
Returns
New, empty labeled list.
Note
Globals: none
Exceptions: none
History: Fri Aug 18 16:08:46 1989, DSJ, Created.

Definition at line 328 of file commontraining.cpp.

328  {
329  LABELEDLIST LabeledList;
330 
331  LabeledList = (LABELEDLIST) Emalloc (sizeof (LABELEDLISTNODE));
332  LabeledList->Label = (char*)Emalloc (strlen (Label)+1);
333  strcpy (LabeledList->Label, Label);
334  LabeledList->List = NIL_LIST;
335  LabeledList->SampleCount = 0;
336  LabeledList->font_sample_count = 0;
337  return (LabeledList);
338 
339 } /* NewLabeledList */
struct LABELEDLISTNODE * LABELEDLIST
#define NIL_LIST
Definition: oldlist.h:126
void * Emalloc(int Size)
Definition: emalloc.cpp:47

◆ Normalize()

void Normalize ( float *  Values)

Definition at line 772 of file commontraining.cpp.

774 {
775  float Slope;
776  float Intercept;
777  float Normalizer;
778 
779  Slope = tan (Values [2] * 2 * PI);
780  Intercept = Values [1] - Slope * Values [0];
781  Normalizer = 1 / sqrt (Slope * Slope + 1.0);
782 
783  Values [0] = Slope * Normalizer;
784  Values [1] = - Normalizer;
785  Values [2] = Intercept * Normalizer;
786 } // Normalize
#define PI
Definition: const.h:19

◆ NumberOfProtos()

int NumberOfProtos ( LIST  ProtoList,
BOOL8  CountSigProtos,
BOOL8  CountInsigProtos 
)

Definition at line 823 of file commontraining.cpp.

824  {
825  int N = 0;
826  PROTOTYPE* Proto;
827 
828  iterate(ProtoList)
829  {
830  Proto = (PROTOTYPE *) first_node ( ProtoList );
831  if ((Proto->Significant && CountSigProtos) ||
832  (!Proto->Significant && CountInsigProtos))
833  N++;
834  }
835  return(N);
836 }
#define first_node(l)
Definition: oldlist.h:139
#define iterate(l)
Definition: oldlist.h:159
unsigned Significant
Definition: cluster.h:68

◆ ParseArguments()

void ParseArguments ( int *  argc,
char ***  argv 
)

This routine parses the command line arguments that were passed to the program and ses them to set relevant training-related global parameters

Globals:

  • Config current clustering parameters
    Parameters
    argcnumber of command line arguments to parse
    argvcommand line arguments
    Returns
    none
    Note
    Exceptions: Illegal options terminate the program.

Definition at line 86 of file commontraining.cpp.

86  {
87  STRING usage;
88  if (*argc) {
89  usage += (*argv)[0];
90  }
91  usage += " [.tr files ...]";
92  tesseract::ParseCommandLineFlags(usage.c_str(), argc, argv, true);
93  // Record the index of the first non-flag argument to 1, since we set
94  // remove_flags to true when parsing the flags.
95  tessoptind = 1;
96  // Set some global values based on the flags.
98  MAX(0.0, MIN(1.0, double(FLAGS_clusterconfig_min_samples_fraction)));
100  MAX(0.0, MIN(1.0, double(FLAGS_clusterconfig_max_illegal)));
102  MAX(0.0, MIN(1.0, double(FLAGS_clusterconfig_independence)));
104  MAX(0.0, MIN(1.0, double(FLAGS_clusterconfig_confidence)));
105  // Set additional parameters from config file if specified.
106  if (!FLAGS_configfile.empty()) {
108  FLAGS_configfile.c_str(),
110  ccutil.params());
111  }
112 }
int tessoptind
Definition: tessopt.cpp:24
#define MIN(x, y)
Definition: ndminx.h:28
FLOAT32 MaxIllegal
Definition: cluster.h:51
#define MAX(x, y)
Definition: ndminx.h:24
FLOAT64 Confidence
Definition: cluster.h:54
CLUSTERCONFIG Config
static bool ReadParamsFile(const char *file, SetParamConstraint constraint, ParamsVectors *member_params)
Definition: params.cpp:40
FLOAT32 Independence
Definition: cluster.h:53
void ParseCommandLineFlags(const char *usage, int *argc, char ***argv, const bool remove_flags)
Definition: strngs.h:45
CCUtil ccutil
ParamsVectors * params()
Definition: ccutil.h:62
FLOAT32 MinSamples
Definition: cluster.h:50
const char * c_str() const
Definition: strngs.cpp:209

◆ ReadTrainingSamples()

void ReadTrainingSamples ( const FEATURE_DEFS_STRUCT feature_defs,
const char *  feature_name,
int  max_samples,
UNICHARSET unicharset,
FILE *  file,
LIST training_samples 
)

This routine reads training samples from a file and places them into a data structure which organizes the samples by FontName and CharName. It then returns this data structure.

Parameters
fileopen text file to read samples from
feature_defs
feature_name
max_samples
unicharset
training_samples
Returns
none
Note
Globals: none
Exceptions: none
History:
  • Fri Aug 18 13:11:39 1989, DSJ, Created.
  • Tue May 17 1998 simplifications to structure, illiminated font, and feature specification levels of structure.

Definition at line 363 of file commontraining.cpp.

366  {
367  char buffer[2048];
368  char unichar[UNICHAR_LEN + 1];
369  LABELEDLIST char_sample;
370  FEATURE_SET feature_samples;
371  CHAR_DESC char_desc;
372  uint32_t feature_type = ShortNameToFeatureType(feature_defs, feature_name);
373 
374  // Zero out the font_sample_count for all the classes.
375  LIST it = *training_samples;
376  iterate(it) {
377  char_sample = reinterpret_cast<LABELEDLIST>(first_node(it));
378  char_sample->font_sample_count = 0;
379  }
380 
381  while (fgets(buffer, 2048, file) != nullptr) {
382  if (buffer[0] == '\n')
383  continue;
384 
385  sscanf(buffer, "%*s %s", unichar);
386  if (unicharset != nullptr && !unicharset->contains_unichar(unichar)) {
387  unicharset->unichar_insert(unichar);
388  if (unicharset->size() > MAX_NUM_CLASSES) {
389  tprintf("Error: Size of unicharset in training is "
390  "greater than MAX_NUM_CLASSES\n");
391  exit(1);
392  }
393  }
394  char_sample = FindList(*training_samples, unichar);
395  if (char_sample == nullptr) {
396  char_sample = NewLabeledList(unichar);
397  *training_samples = push(*training_samples, char_sample);
398  }
399  char_desc = ReadCharDescription(feature_defs, file);
400  feature_samples = char_desc->FeatureSets[feature_type];
401  if (char_sample->font_sample_count < max_samples || max_samples <= 0) {
402  char_sample->List = push(char_sample->List, feature_samples);
403  char_sample->SampleCount++;
404  char_sample->font_sample_count++;
405  } else {
406  FreeFeatureSet(feature_samples);
407  }
408  for (size_t i = 0; i < char_desc->NumFeatureSets; i++) {
409  if (feature_type != i)
410  FreeFeatureSet(char_desc->FeatureSets[i]);
411  }
412  free(char_desc);
413  }
414 } // ReadTrainingSamples
LABELEDLIST NewLabeledList(const char *Label)
uinT32 NumFeatureSets
Definition: featdefs.h:43
LIST push(LIST list, void *element)
Definition: oldlist.cpp:288
#define tprintf(...)
Definition: tprintf.h:31
bool contains_unichar(const char *const unichar_repr) const
Definition: unicharset.cpp:668
int size() const
Definition: unicharset.h:338
void unichar_insert(const char *const unichar_repr, OldUncleanUnichars old_style)
Definition: unicharset.cpp:623
#define UNICHAR_LEN
Definition: unichar.h:31
CHAR_DESC ReadCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs, FILE *File)
Definition: featdefs.cpp:254
void FreeFeatureSet(FEATURE_SET FeatureSet)
Definition: ocrfeatures.cpp:69
#define first_node(l)
Definition: oldlist.h:139
LABELEDLIST FindList(LIST List, char *Label)
#define iterate(l)
Definition: oldlist.h:159
#define MAX_NUM_CLASSES
Definition: matchdefs.h:31
FEATURE_SET FeatureSets[NUM_FEATURE_TYPES]
Definition: featdefs.h:44
uint32_t ShortNameToFeatureType(const FEATURE_DEFS_STRUCT &FeatureDefs, const char *ShortName)
Definition: featdefs.cpp:293

◆ RemoveInsignificantProtos()

LIST RemoveInsignificantProtos ( LIST  ProtoList,
BOOL8  KeepSigProtos,
BOOL8  KeepInsigProtos,
int  N 
)

Definition at line 593 of file commontraining.cpp.

599 {
600  LIST NewProtoList = NIL_LIST;
601  LIST pProtoList;
602  PROTOTYPE* Proto;
603  PROTOTYPE* NewProto;
604  int i;
605 
606  pProtoList = ProtoList;
607  iterate(pProtoList)
608  {
609  Proto = (PROTOTYPE *) first_node (pProtoList);
610  if ((Proto->Significant && KeepSigProtos) ||
611  (!Proto->Significant && KeepInsigProtos))
612  {
613  NewProto = (PROTOTYPE *)Emalloc(sizeof(PROTOTYPE));
614 
615  NewProto->Mean = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32));
616  NewProto->Significant = Proto->Significant;
617  NewProto->Style = Proto->Style;
618  NewProto->NumSamples = Proto->NumSamples;
619  NewProto->Cluster = nullptr;
620  NewProto->Distrib = nullptr;
621 
622  for (i=0; i < N; i++)
623  NewProto->Mean[i] = Proto->Mean[i];
624  if (Proto->Variance.Elliptical != nullptr) {
625  NewProto->Variance.Elliptical = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32));
626  for (i=0; i < N; i++)
627  NewProto->Variance.Elliptical[i] = Proto->Variance.Elliptical[i];
628  }
629  else
630  NewProto->Variance.Elliptical = nullptr;
631  //---------------------------------------------
632  if (Proto->Magnitude.Elliptical != nullptr) {
633  NewProto->Magnitude.Elliptical = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32));
634  for (i=0; i < N; i++)
635  NewProto->Magnitude.Elliptical[i] = Proto->Magnitude.Elliptical[i];
636  }
637  else
638  NewProto->Magnitude.Elliptical = nullptr;
639  //------------------------------------------------
640  if (Proto->Weight.Elliptical != nullptr) {
641  NewProto->Weight.Elliptical = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32));
642  for (i=0; i < N; i++)
643  NewProto->Weight.Elliptical[i] = Proto->Weight.Elliptical[i];
644  }
645  else
646  NewProto->Weight.Elliptical = nullptr;
647 
648  NewProto->TotalMagnitude = Proto->TotalMagnitude;
649  NewProto->LogMagnitude = Proto->LogMagnitude;
650  NewProtoList = push_last(NewProtoList, NewProto);
651  }
652  }
653  FreeProtoList(&ProtoList);
654  return (NewProtoList);
655 } /* RemoveInsignificantProtos */
FLOAT32 * Mean
Definition: cluster.h:78
FLOAT32 LogMagnitude
Definition: cluster.h:80
void FreeProtoList(LIST *ProtoList)
Definition: cluster.cpp:573
DISTRIBUTION * Distrib
Definition: cluster.h:77
unsigned NumSamples
Definition: cluster.h:75
FLOAT32 * Elliptical
Definition: cluster.h:64
FLOATUNION Magnitude
Definition: cluster.h:82
CLUSTER * Cluster
Definition: cluster.h:76
LIST push_last(LIST list, void *item)
Definition: oldlist.cpp:302
#define NIL_LIST
Definition: oldlist.h:126
unsigned Style
Definition: cluster.h:74
#define first_node(l)
Definition: oldlist.h:139
#define iterate(l)
Definition: oldlist.h:159
void * Emalloc(int Size)
Definition: emalloc.cpp:47
FLOATUNION Variance
Definition: cluster.h:81
float FLOAT32
Definition: host.h:42
FLOAT32 TotalMagnitude
Definition: cluster.h:79
FLOATUNION Weight
Definition: cluster.h:83
unsigned Significant
Definition: cluster.h:68

◆ SetUpForClustering()

CLUSTERER* SetUpForClustering ( const FEATURE_DEFS_STRUCT FeatureDefs,
LABELEDLIST  char_sample,
const char *  program_feature_type 
)

This routine reads samples from a LABELEDLIST and enters those samples into a clusterer data structure. This data structure is then returned to the caller.

Parameters
char_sampleLABELEDLIST that holds all the feature information for a
FeatureDefs
program_feature_typegiven character.
Returns
Pointer to new clusterer data structure.
Note
Globals: None
Exceptions: None
History: 8/16/89, DSJ, Created.

Definition at line 476 of file commontraining.cpp.

478  {
479  uinT16 N;
480  int i, j;
481  FLOAT32* Sample = nullptr;
482  CLUSTERER *Clusterer;
483  inT32 CharID;
484  LIST FeatureList = nullptr;
485  FEATURE_SET FeatureSet = nullptr;
486 
487  int32_t desc_index =
488  ShortNameToFeatureType(FeatureDefs, program_feature_type);
489  N = FeatureDefs.FeatureDesc[desc_index]->NumParams;
490  Clusterer = MakeClusterer(N, FeatureDefs.FeatureDesc[desc_index]->ParamDesc);
491 
492  FeatureList = char_sample->List;
493  CharID = 0;
494  iterate(FeatureList) {
495  FeatureSet = (FEATURE_SET) first_node(FeatureList);
496  for (i = 0; i < FeatureSet->MaxNumFeatures; i++) {
497  if (Sample == nullptr) Sample = (FLOAT32*)Emalloc(N * sizeof(FLOAT32));
498  for (j = 0; j < N; j++)
499  Sample[j] = FeatureSet->Features[i]->Params[j];
500  MakeSample (Clusterer, Sample, CharID);
501  }
502  CharID++;
503  }
504  free(Sample);
505  return Clusterer;
506 
507 } /* SetUpForClustering */
FLOAT32 Params[1]
Definition: ocrfeatures.h:65
CLUSTERER * MakeClusterer(inT16 SampleSize, const PARAM_DESC ParamDesc[])
Definition: cluster.cpp:399
FEATURE_SET_STRUCT * FEATURE_SET
Definition: ocrfeatures.h:74
int32_t inT32
Definition: host.h:38
const PARAM_DESC * ParamDesc
Definition: ocrfeatures.h:59
SAMPLE * MakeSample(CLUSTERER *Clusterer, const FLOAT32 *Feature, inT32 CharID)
Definition: cluster.cpp:455
const FEATURE_DESC_STRUCT * FeatureDesc[NUM_FEATURE_TYPES]
Definition: featdefs.h:50
FEATURE Features[1]
Definition: ocrfeatures.h:72
#define first_node(l)
Definition: oldlist.h:139
#define iterate(l)
Definition: oldlist.h:159
void * Emalloc(int Size)
Definition: emalloc.cpp:47
float FLOAT32
Definition: host.h:42
uint16_t uinT16
Definition: host.h:37
uint32_t ShortNameToFeatureType(const FEATURE_DEFS_STRUCT &FeatureDefs, const char *ShortName)
Definition: featdefs.cpp:293

◆ SetUpForFloat2Int()

CLASS_STRUCT* SetUpForFloat2Int ( const UNICHARSET unicharset,
LIST  LabeledClassList 
)

Definition at line 709 of file commontraining.cpp.

710  {
711  MERGE_CLASS MergeClass;
712  CLASS_TYPE Class;
713  int NumProtos;
714  int NumConfigs;
715  int NumWords;
716  int i, j;
717  float Values[3];
718  PROTO NewProto;
719  PROTO OldProto;
720  BIT_VECTOR NewConfig;
721  BIT_VECTOR OldConfig;
722 
723  // printf("Float2Int ...\n");
724 
725  CLASS_STRUCT* float_classes = new CLASS_STRUCT[unicharset.size()];
726  iterate(LabeledClassList)
727  {
728  UnicityTableEqEq<int> font_set;
729  MergeClass = (MERGE_CLASS) first_node (LabeledClassList);
730  Class = &float_classes[unicharset.unichar_to_id(MergeClass->Label)];
731  NumProtos = MergeClass->Class->NumProtos;
732  NumConfigs = MergeClass->Class->NumConfigs;
733  font_set.move(&MergeClass->Class->font_set);
734  Class->NumProtos = NumProtos;
735  Class->MaxNumProtos = NumProtos;
736  Class->Prototypes = (PROTO) Emalloc (sizeof(PROTO_STRUCT) * NumProtos);
737  for(i=0; i < NumProtos; i++)
738  {
739  NewProto = ProtoIn(Class, i);
740  OldProto = ProtoIn(MergeClass->Class, i);
741  Values[0] = OldProto->X;
742  Values[1] = OldProto->Y;
743  Values[2] = OldProto->Angle;
744  Normalize(Values);
745  NewProto->X = OldProto->X;
746  NewProto->Y = OldProto->Y;
747  NewProto->Length = OldProto->Length;
748  NewProto->Angle = OldProto->Angle;
749  NewProto->A = Values[0];
750  NewProto->B = Values[1];
751  NewProto->C = Values[2];
752  }
753 
754  Class->NumConfigs = NumConfigs;
755  Class->MaxNumConfigs = NumConfigs;
756  Class->font_set.move(&font_set);
757  Class->Configurations = (BIT_VECTOR*) Emalloc (sizeof(BIT_VECTOR) * NumConfigs);
758  NumWords = WordsInVectorOfSize(NumProtos);
759  for(i=0; i < NumConfigs; i++)
760  {
761  NewConfig = NewBitVector(NumProtos);
762  OldConfig = MergeClass->Class->Configurations[i];
763  for(j=0; j < NumWords; j++)
764  NewConfig[j] = OldConfig[j];
765  Class->Configurations[i] = NewConfig;
766  }
767  }
768  return float_classes;
769 } // SetUpForFloat2Int
#define WordsInVectorOfSize(NumBits)
Definition: bitvec.h:63
void Normalize(float *Values)
FLOAT32 B
Definition: protos.h:45
uinT32 * BIT_VECTOR
Definition: bitvec.h:28
inT16 MaxNumConfigs
Definition: protos.h:63
#define ProtoIn(Class, Pid)
Definition: protos.h:123
PROTO Prototypes
Definition: protos.h:61
FLOAT32 C
Definition: protos.h:46
CONFIGS Configurations
Definition: protos.h:64
FLOAT32 Angle
Definition: protos.h:49
CLASS_TYPE Class
void move(UnicityTable< T > *from)
FLOAT32 A
Definition: protos.h:44
FLOAT32 Y
Definition: protos.h:48
int size() const
Definition: unicharset.h:338
inT16 NumConfigs
Definition: protos.h:62
BIT_VECTOR NewBitVector(int NumBits)
Definition: bitvec.cpp:89
FLOAT32 X
Definition: protos.h:47
FLOAT32 Length
Definition: protos.h:50
inT16 NumProtos
Definition: protos.h:59
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:207
PROTO_STRUCT * PROTO
Definition: protos.h:52
MERGE_CLASS_NODE * MERGE_CLASS
inT16 MaxNumProtos
Definition: protos.h:60
#define first_node(l)
Definition: oldlist.h:139
#define iterate(l)
Definition: oldlist.h:159
void * Emalloc(int Size)
Definition: emalloc.cpp:47
UnicityTableEqEq< int > font_set
Definition: protos.h:65

◆ WriteTrainingSamples()

void WriteTrainingSamples ( const FEATURE_DEFS_STRUCT FeatureDefs,
char *  Directory,
LIST  CharList,
const char *  program_feature_type 
)

Variable Documentation

◆ Config

CLUSTERCONFIG Config

Definition at line 50 of file commontraining.cpp.

◆ feature_defs

FEATURE_DEFS_STRUCT feature_defs

Definition at line 51 of file commontraining.cpp.