All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
clusttool.cpp
Go to the documentation of this file.
1 /******************************************************************************
2  ** Filename: clustertool.c
3  ** Purpose: Misc. tools for use with the clustering routines
4  ** Author: Dan Johnson
5  ** History: 6/6/89, DSJ, Created.
6  **
7  ** (c) Copyright Hewlett-Packard Company, 1988.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  ******************************************************************************/
18 
19 //--------------------------Include Files----------------------------------
20 #include "clusttool.h"
21 #include "const.h"
22 #include "danerror.h"
23 #include "emalloc.h"
24 #include "scanutils.h"
25 #include <stdio.h>
26 #include <math.h>
27 
28 //---------------Global Data Definitions and Declarations--------------------
29 #define TOKENSIZE 80 //< max size of tokens read from an input file
30 #define MAXSAMPLESIZE 65535 //< max num of dimensions in feature space
31 //#define MAXBLOCKSIZE 65535 //< max num of samples in a character (block size)
32 
43 uinT16 ReadSampleSize(FILE *File) {
44  int SampleSize;
45 
46  if ((tfscanf(File, "%d", &SampleSize) != 1) ||
47  (SampleSize < 0) || (SampleSize > MAXSAMPLESIZE))
48  DoError (ILLEGALSAMPLESIZE, "Illegal sample size");
49  return (SampleSize);
50 }
51 
66 PARAM_DESC *ReadParamDesc(FILE *File, uinT16 N) {
67  int i;
68  PARAM_DESC *ParamDesc;
69  char Token[TOKENSIZE];
70 
71  ParamDesc = (PARAM_DESC *) Emalloc (N * sizeof (PARAM_DESC));
72  for (i = 0; i < N; i++) {
73  if (tfscanf(File, "%s", Token) != 1)
75  "Illegal circular/linear specification");
76  if (Token[0] == 'c')
77  ParamDesc[i].Circular = TRUE;
78  else
79  ParamDesc[i].Circular = FALSE;
80 
81  if (tfscanf(File, "%s", Token) != 1)
83  "Illegal essential/non-essential spec");
84  if (Token[0] == 'e')
85  ParamDesc[i].NonEssential = FALSE;
86  else
87  ParamDesc[i].NonEssential = TRUE;
88  if (tfscanf(File, "%f%f", &(ParamDesc[i].Min), &(ParamDesc[i].Max)) != 2)
89  DoError (ILLEGALMINMAXSPEC, "Illegal min or max specification");
90  ParamDesc[i].Range = ParamDesc[i].Max - ParamDesc[i].Min;
91  ParamDesc[i].HalfRange = ParamDesc[i].Range / 2;
92  ParamDesc[i].MidRange = (ParamDesc[i].Max + ParamDesc[i].Min) / 2;
93  }
94  return (ParamDesc);
95 }
96 
113 PROTOTYPE *ReadPrototype(FILE *File, uinT16 N) {
114  char Token[TOKENSIZE];
115  int Status;
116  PROTOTYPE *Proto;
117  int SampleCount;
118  int i;
119 
120  if ((Status = tfscanf(File, "%s", Token)) == 1) {
121  Proto = (PROTOTYPE *) Emalloc (sizeof (PROTOTYPE));
122  Proto->Cluster = NULL;
123  if (Token[0] == 's')
124  Proto->Significant = TRUE;
125  else
126  Proto->Significant = FALSE;
127 
128  Proto->Style = ReadProtoStyle (File);
129 
130  if ((tfscanf(File, "%d", &SampleCount) != 1) || (SampleCount < 0))
131  DoError (ILLEGALSAMPLECOUNT, "Illegal sample count");
132  Proto->NumSamples = SampleCount;
133 
134  Proto->Mean = ReadNFloats (File, N, NULL);
135  if (Proto->Mean == NULL)
136  DoError (ILLEGALMEANSPEC, "Illegal prototype mean");
137 
138  switch (Proto->Style) {
139  case spherical:
140  if (ReadNFloats (File, 1, &(Proto->Variance.Spherical)) == NULL)
141  DoError (ILLEGALVARIANCESPEC, "Illegal prototype variance");
142  Proto->Magnitude.Spherical =
143  1.0 / sqrt ((double) (2.0 * PI * Proto->Variance.Spherical));
144  Proto->TotalMagnitude =
145  pow (Proto->Magnitude.Spherical, (float) N);
146  Proto->LogMagnitude = log ((double) Proto->TotalMagnitude);
147  Proto->Weight.Spherical = 1.0 / Proto->Variance.Spherical;
148  Proto->Distrib = NULL;
149  break;
150  case elliptical:
151  Proto->Variance.Elliptical = ReadNFloats (File, N, NULL);
152  if (Proto->Variance.Elliptical == NULL)
153  DoError (ILLEGALVARIANCESPEC, "Illegal prototype variance");
154  Proto->Magnitude.Elliptical =
155  (FLOAT32 *) Emalloc (N * sizeof (FLOAT32));
156  Proto->Weight.Elliptical =
157  (FLOAT32 *) Emalloc (N * sizeof (FLOAT32));
158  Proto->TotalMagnitude = 1.0;
159  for (i = 0; i < N; i++) {
160  Proto->Magnitude.Elliptical[i] =
161  1.0 /
162  sqrt ((double) (2.0 * PI * Proto->Variance.Elliptical[i]));
163  Proto->Weight.Elliptical[i] =
164  1.0 / Proto->Variance.Elliptical[i];
165  Proto->TotalMagnitude *= Proto->Magnitude.Elliptical[i];
166  }
167  Proto->LogMagnitude = log ((double) Proto->TotalMagnitude);
168  Proto->Distrib = NULL;
169  break;
170  case mixed:
171  Proto->Distrib =
172  (DISTRIBUTION *) Emalloc (N * sizeof (DISTRIBUTION));
173  for (i = 0; i < N; i++) {
174  if (tfscanf(File, "%s", Token) != 1)
176  "Illegal prototype distribution");
177  switch (Token[0]) {
178  case 'n':
179  Proto->Distrib[i] = normal;
180  break;
181  case 'u':
182  Proto->Distrib[i] = uniform;
183  break;
184  case 'r':
185  Proto->Distrib[i] = D_random;
186  break;
187  default:
189  "Illegal prototype distribution");
190  }
191  }
192  Proto->Variance.Elliptical = ReadNFloats (File, N, NULL);
193  if (Proto->Variance.Elliptical == NULL)
194  DoError (ILLEGALVARIANCESPEC, "Illegal prototype variance");
195  Proto->Magnitude.Elliptical =
196  (FLOAT32 *) Emalloc (N * sizeof (FLOAT32));
197  Proto->Weight.Elliptical =
198  (FLOAT32 *) Emalloc (N * sizeof (FLOAT32));
199  Proto->TotalMagnitude = 1.0;
200  for (i = 0; i < N; i++) {
201  switch (Proto->Distrib[i]) {
202  case normal:
203  Proto->Magnitude.Elliptical[i] = 1.0 /
204  sqrt ((double)
205  (2.0 * PI * Proto->Variance.Elliptical[i]));
206  Proto->Weight.Elliptical[i] =
207  1.0 / Proto->Variance.Elliptical[i];
208  break;
209  case uniform:
210  case D_random:
211  Proto->Magnitude.Elliptical[i] = 1.0 /
212  (2.0 * Proto->Variance.Elliptical[i]);
213  break;
214  case DISTRIBUTION_COUNT:
215  ASSERT_HOST(!"Distribution count not allowed!");
216  }
217  Proto->TotalMagnitude *= Proto->Magnitude.Elliptical[i];
218  }
219  Proto->LogMagnitude = log ((double) Proto->TotalMagnitude);
220  break;
221  }
222  return (Proto);
223  }
224  else if (Status == EOF)
225  return (NULL);
226  else {
227  DoError (ILLEGALSIGNIFICANCESPEC, "Illegal significance specification");
228  return (NULL);
229  }
230 }
231 
242  char Token[TOKENSIZE];
243  PROTOSTYLE Style;
244 
245  if (tfscanf(File, "%s", Token) != 1)
246  DoError (ILLEGALSTYLESPEC, "Illegal prototype style specification");
247  switch (Token[0]) {
248  case 's':
249  Style = spherical;
250  break;
251  case 'e':
252  Style = elliptical;
253  break;
254  case 'm':
255  Style = mixed;
256  break;
257  case 'a':
258  Style = automatic;
259  break;
260  default:
261  Style = elliptical;
262  DoError (ILLEGALSTYLESPEC, "Illegal prototype style specification");
263  }
264  return (Style);
265 }
266 
281 FLOAT32* ReadNFloats(FILE * File, uinT16 N, FLOAT32 Buffer[]) {
282  int i;
283  int NumFloatsRead;
284 
285  if (Buffer == NULL)
286  Buffer = reinterpret_cast<FLOAT32*>(Emalloc(N * sizeof(FLOAT32)));
287 
288  for (i = 0; i < N; i++) {
289  NumFloatsRead = tfscanf(File, "%f", &(Buffer[i]));
290  if (NumFloatsRead != 1) {
291  if ((NumFloatsRead == EOF) && (i == 0)) {
292  Efree(Buffer);
293  return NULL;
294  } else {
295  DoError(ILLEGALFLOAT, "Illegal float specification");
296  }
297  }
298  }
299  return Buffer;
300 }
301 
313 void
314 WriteParamDesc (FILE * File, uinT16 N, PARAM_DESC ParamDesc[]) {
315  int i;
316 
317  for (i = 0; i < N; i++) {
318  if (ParamDesc[i].Circular)
319  fprintf (File, "circular ");
320  else
321  fprintf (File, "linear ");
322 
323  if (ParamDesc[i].NonEssential)
324  fprintf (File, "non-essential ");
325  else
326  fprintf (File, "essential ");
327 
328  fprintf (File, "%10.6f %10.6f\n", ParamDesc[i].Min, ParamDesc[i].Max);
329  }
330 }
331 
343 void WritePrototype(FILE *File, uinT16 N, PROTOTYPE *Proto) {
344  int i;
345 
346  if (Proto->Significant)
347  fprintf (File, "significant ");
348  else
349  fprintf (File, "insignificant ");
350  WriteProtoStyle (File, (PROTOSTYLE) Proto->Style);
351  fprintf (File, "%6d\n\t", Proto->NumSamples);
352  WriteNFloats (File, N, Proto->Mean);
353  fprintf (File, "\t");
354 
355  switch (Proto->Style) {
356  case spherical:
357  WriteNFloats (File, 1, &(Proto->Variance.Spherical));
358  break;
359  case elliptical:
360  WriteNFloats (File, N, Proto->Variance.Elliptical);
361  break;
362  case mixed:
363  for (i = 0; i < N; i++)
364  switch (Proto->Distrib[i]) {
365  case normal:
366  fprintf (File, " %9s", "normal");
367  break;
368  case uniform:
369  fprintf (File, " %9s", "uniform");
370  break;
371  case D_random:
372  fprintf (File, " %9s", "random");
373  break;
374  case DISTRIBUTION_COUNT:
375  ASSERT_HOST(!"Distribution count not allowed!");
376  }
377  fprintf (File, "\n\t");
378  WriteNFloats (File, N, Proto->Variance.Elliptical);
379  }
380 }
381 
393 void WriteNFloats(FILE * File, uinT16 N, FLOAT32 Array[]) {
394  for (int i = 0; i < N; i++)
395  fprintf(File, " %9.6f", Array[i]);
396  fprintf(File, "\n");
397 }
398 
410 void WriteProtoStyle(FILE *File, PROTOSTYLE ProtoStyle) {
411  switch (ProtoStyle) {
412  case spherical:
413  fprintf (File, "spherical");
414  break;
415  case elliptical:
416  fprintf (File, "elliptical");
417  break;
418  case mixed:
419  fprintf (File, "mixed");
420  break;
421  case automatic:
422  fprintf (File, "automatic");
423  break;
424  }
425 }
426 
445  FILE *File,
446  uinT16 N,
447  PARAM_DESC ParamDesc[],
448  LIST ProtoList,
449  BOOL8 WriteSigProtos,
450  BOOL8 WriteInsigProtos)
451 {
452  PROTOTYPE *Proto;
453 
454  /* write file header */
455  fprintf(File,"%0d\n",N);
456  WriteParamDesc(File,N,ParamDesc);
457 
458  /* write prototypes */
459  iterate(ProtoList)
460  {
461  Proto = (PROTOTYPE *) first_node ( ProtoList );
462  if (( Proto->Significant && WriteSigProtos ) ||
463  ( ! Proto->Significant && WriteInsigProtos ) )
464  WritePrototype( File, N, Proto );
465  }
466 }
void WritePrototype(FILE *File, uinT16 N, PROTOTYPE *Proto)
Definition: clusttool.cpp:343
FLOAT32 Min
Definition: ocrfeatures.h:49
#define ILLEGALSTYLESPEC
Definition: clusttool.h:61
float FLOAT32
Definition: host.h:111
DISTRIBUTION * Distrib
Definition: cluster.h:77
#define ILLEGALMEANSPEC
Definition: clusttool.h:63
#define ILLEGALSIGNIFICANCESPEC
Definition: clusttool.h:60
int tfscanf(FILE *stream, const char *format,...)
Definition: scanutils.cpp:229
unsigned char BOOL8
Definition: host.h:113
FLOAT32 * ReadNFloats(FILE *File, uinT16 N, FLOAT32 Buffer[])
Definition: clusttool.cpp:281
FLOAT32 Spherical
Definition: cluster.h:63
uinT16 ReadSampleSize(FILE *File)
Definition: clusttool.cpp:43
DISTRIBUTION
Definition: cluster.h:58
FLOAT32 LogMagnitude
Definition: cluster.h:80
FLOATUNION Variance
Definition: cluster.h:81
FLOAT32 * Mean
Definition: cluster.h:78
Definition: cluster.h:59
FLOAT32 HalfRange
Definition: ocrfeatures.h:52
#define TOKENSIZE
Definition: clusttool.cpp:29
#define ASSERT_HOST(x)
Definition: errcode.h:84
unsigned Significant
Definition: cluster.h:68
FLOATUNION Weight
Definition: cluster.h:83
FLOAT32 TotalMagnitude
Definition: cluster.h:79
#define MAXSAMPLESIZE
Definition: clusttool.cpp:30
#define ILLEGALMINMAXSPEC
Definition: clusttool.h:59
FLOAT32 MidRange
Definition: ocrfeatures.h:53
unsigned NumSamples
Definition: cluster.h:75
#define ILLEGALESSENTIALSPEC
Definition: clusttool.h:67
void * Emalloc(int Size)
Definition: emalloc.cpp:47
FLOATUNION Magnitude
Definition: cluster.h:82
FLOAT32 * Elliptical
Definition: cluster.h:64
CLUSTER * Cluster
Definition: cluster.h:76
void WriteNFloats(FILE *File, uinT16 N, FLOAT32 Array[])
Definition: clusttool.cpp:393
#define ILLEGALVARIANCESPEC
Definition: clusttool.h:64
inT8 NonEssential
Definition: ocrfeatures.h:48
inT8 Circular
Definition: ocrfeatures.h:47
#define first_node(l)
Definition: oldlist.h:139
#define iterate(l)
Definition: oldlist.h:159
#define ILLEGALDISTRIBUTION
Definition: clusttool.h:65
#define ILLEGALSAMPLESIZE
Definition: clusttool.h:57
#define ILLEGALSAMPLECOUNT
Definition: clusttool.h:62
Definition: cluster.h:45
#define ILLEGALFLOAT
Definition: clusttool.h:66
#define FALSE
Definition: capi.h:29
FLOAT32 Range
Definition: ocrfeatures.h:51
void Efree(void *ptr)
Definition: emalloc.cpp:79
#define PI
Definition: const.h:19
#define TRUE
Definition: capi.h:28
unsigned Style
Definition: cluster.h:74
PROTOSTYLE ReadProtoStyle(FILE *File)
Definition: clusttool.cpp:241
void DoError(int Error, const char *Message)
Definition: danerror.cpp:42
#define NULL
Definition: host.h:144
PARAM_DESC * ReadParamDesc(FILE *File, uinT16 N)
Definition: clusttool.cpp:66
PROTOSTYLE
Definition: cluster.h:44
void WriteParamDesc(FILE *File, uinT16 N, PARAM_DESC ParamDesc[])
Definition: clusttool.cpp:314
void WriteProtoList(FILE *File, uinT16 N, PARAM_DESC ParamDesc[], LIST ProtoList, BOOL8 WriteSigProtos, BOOL8 WriteInsigProtos)
Definition: clusttool.cpp:444
FLOAT32 Max
Definition: ocrfeatures.h:50
#define ILLEGALCIRCULARSPEC
Definition: clusttool.h:58
PROTOTYPE * ReadPrototype(FILE *File, uinT16 N)
Definition: clusttool.cpp:113
unsigned short uinT16
Definition: host.h:101
void WriteProtoStyle(FILE *File, PROTOSTYLE ProtoStyle)
Definition: clusttool.cpp:410