tesseract-ocr.github.io/3.x/a00901_source.html

 /******************************************************************************

  **     Filename:       cluster.c

  **     Purpose:        Routines for clustering points in N-D space

  **     Author:         Dan Johnson

  **     History:        5/29/89, DSJ, Created.

  **

  **     (c) Copyright Hewlett-Packard Company, 1988.

  ** Licensed under the Apache License, Version 2.0 (the "License");

  ** you may not use this file except in compliance with the License.

  ** You may obtain a copy of the License at

  ** http://www.apache.org/licenses/LICENSE-2.0

  ** Unless required by applicable law or agreed to in writing, software

  ** distributed under the License is distributed on an "AS IS" BASIS,

  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

  ** See the License for the specific language governing permissions and

  ** limitations under the License.

  ******************************************************************************/

 #include "const.h"

 #include "cluster.h"

 #include "emalloc.h"

 #include "genericheap.h"

 #include "helpers.h"

 #include "kdpair.h"

 #include "matrix.h"

 #include "tprintf.h"

 #include "danerror.h"

 #include "freelist.h"

 #include <math.h>


 #define HOTELLING 1  // If true use Hotelling's test to decide where to split.

 #define FTABLE_X 10  // Size of FTable.

 #define FTABLE_Y 100  // Size of FTable.


 // Table of values approximating the cumulative F-distribution for a confidence of 1%.

 const double FTable[FTABLE_Y][FTABLE_X] = {

  {4052.19, 4999.52, 5403.34, 5624.62, 5763.65, 5858.97, 5928.33, 5981.10, 6022.50, 6055.85,},

   {98.502,  99.000,  99.166,  99.249,  99.300,  99.333,  99.356,  99.374,  99.388,  99.399,},

   {34.116,  30.816,  29.457,  28.710,  28.237,  27.911,  27.672,  27.489,  27.345,  27.229,},

   {21.198,  18.000,  16.694,  15.977,  15.522,  15.207,  14.976,  14.799,  14.659,  14.546,},

   {16.258,  13.274,  12.060,  11.392,  10.967,  10.672,  10.456,  10.289,  10.158,  10.051,},

   {13.745,  10.925,   9.780,   9.148,   8.746,   8.466,   8.260,   8.102,   7.976,   7.874,},

   {12.246,   9.547,   8.451,   7.847,   7.460,   7.191,   6.993,   6.840,   6.719,   6.620,},

   {11.259,   8.649,   7.591,   7.006,   6.632,   6.371,   6.178,   6.029,   5.911,   5.814,},

   {10.561,   8.022,   6.992,   6.422,   6.057,   5.802,   5.613,   5.467,   5.351,   5.257,},

   {10.044,   7.559,   6.552,   5.994,   5.636,   5.386,   5.200,   5.057,   4.942,   4.849,},

   { 9.646,   7.206,   6.217,   5.668,   5.316,   5.069,   4.886,   4.744,   4.632,   4.539,},

   { 9.330,   6.927,   5.953,   5.412,   5.064,   4.821,   4.640,   4.499,   4.388,   4.296,},

   { 9.074,   6.701,   5.739,   5.205,   4.862,   4.620,   4.441,   4.302,   4.191,   4.100,},

   { 8.862,   6.515,   5.564,   5.035,   4.695,   4.456,   4.278,   4.140,   4.030,   3.939,},

   { 8.683,   6.359,   5.417,   4.893,   4.556,   4.318,   4.142,   4.004,   3.895,   3.805,},

   { 8.531,   6.226,   5.292,   4.773,   4.437,   4.202,   4.026,   3.890,   3.780,   3.691,},

   { 8.400,   6.112,   5.185,   4.669,   4.336,   4.102,   3.927,   3.791,   3.682,   3.593,},

   { 8.285,   6.013,   5.092,   4.579,   4.248,   4.015,   3.841,   3.705,   3.597,   3.508,},

   { 8.185,   5.926,   5.010,   4.500,   4.171,   3.939,   3.765,   3.631,   3.523,   3.434,},

   { 8.096,   5.849,   4.938,   4.431,   4.103,   3.871,   3.699,   3.564,   3.457,   3.368,},

   { 8.017,   5.780,   4.874,   4.369,   4.042,   3.812,   3.640,   3.506,   3.398,   3.310,},

   { 7.945,   5.719,   4.817,   4.313,   3.988,   3.758,   3.587,   3.453,   3.346,   3.258,},

   { 7.881,   5.664,   4.765,   4.264,   3.939,   3.710,   3.539,   3.406,   3.299,   3.211,},

   { 7.823,   5.614,   4.718,   4.218,   3.895,   3.667,   3.496,   3.363,   3.256,   3.168,},

   { 7.770,   5.568,   4.675,   4.177,   3.855,   3.627,   3.457,   3.324,   3.217,   3.129,},

   { 7.721,   5.526,   4.637,   4.140,   3.818,   3.591,   3.421,   3.288,   3.182,   3.094,},

   { 7.677,   5.488,   4.601,   4.106,   3.785,   3.558,   3.388,   3.256,   3.149,   3.062,},

   { 7.636,   5.453,   4.568,   4.074,   3.754,   3.528,   3.358,   3.226,   3.120,   3.032,},

   { 7.598,   5.420,   4.538,   4.045,   3.725,   3.499,   3.330,   3.198,   3.092,   3.005,},

   { 7.562,   5.390,   4.510,   4.018,   3.699,   3.473,   3.305,   3.173,   3.067,   2.979,},

   { 7.530,   5.362,   4.484,   3.993,   3.675,   3.449,   3.281,   3.149,   3.043,   2.955,},

   { 7.499,   5.336,   4.459,   3.969,   3.652,   3.427,   3.258,   3.127,   3.021,   2.934,},

   { 7.471,   5.312,   4.437,   3.948,   3.630,   3.406,   3.238,   3.106,   3.000,   2.913,},

   { 7.444,   5.289,   4.416,   3.927,   3.611,   3.386,   3.218,   3.087,   2.981,   2.894,},

   { 7.419,   5.268,   4.396,   3.908,   3.592,   3.368,   3.200,   3.069,   2.963,   2.876,},

   { 7.396,   5.248,   4.377,   3.890,   3.574,   3.351,   3.183,   3.052,   2.946,   2.859,},

   { 7.373,   5.229,   4.360,   3.873,   3.558,   3.334,   3.167,   3.036,   2.930,   2.843,},

   { 7.353,   5.211,   4.343,   3.858,   3.542,   3.319,   3.152,   3.021,   2.915,   2.828,},

   { 7.333,   5.194,   4.327,   3.843,   3.528,   3.305,   3.137,   3.006,   2.901,   2.814,},

   { 7.314,   5.179,   4.313,   3.828,   3.514,   3.291,   3.124,   2.993,   2.888,   2.801,},

   { 7.296,   5.163,   4.299,   3.815,   3.501,   3.278,   3.111,   2.980,   2.875,   2.788,},

   { 7.280,   5.149,   4.285,   3.802,   3.488,   3.266,   3.099,   2.968,   2.863,   2.776,},

   { 7.264,   5.136,   4.273,   3.790,   3.476,   3.254,   3.087,   2.957,   2.851,   2.764,},

   { 7.248,   5.123,   4.261,   3.778,   3.465,   3.243,   3.076,   2.946,   2.840,   2.754,},

   { 7.234,   5.110,   4.249,   3.767,   3.454,   3.232,   3.066,   2.935,   2.830,   2.743,},

   { 7.220,   5.099,   4.238,   3.757,   3.444,   3.222,   3.056,   2.925,   2.820,   2.733,},

   { 7.207,   5.087,   4.228,   3.747,   3.434,   3.213,   3.046,   2.916,   2.811,   2.724,},

   { 7.194,   5.077,   4.218,   3.737,   3.425,   3.204,   3.037,   2.907,   2.802,   2.715,},

   { 7.182,   5.066,   4.208,   3.728,   3.416,   3.195,   3.028,   2.898,   2.793,   2.706,},

   { 7.171,   5.057,   4.199,   3.720,   3.408,   3.186,   3.020,   2.890,   2.785,   2.698,},

   { 7.159,   5.047,   4.191,   3.711,   3.400,   3.178,   3.012,   2.882,   2.777,   2.690,},

   { 7.149,   5.038,   4.182,   3.703,   3.392,   3.171,   3.005,   2.874,   2.769,   2.683,},

   { 7.139,   5.030,   4.174,   3.695,   3.384,   3.163,   2.997,   2.867,   2.762,   2.675,},

   { 7.129,   5.021,   4.167,   3.688,   3.377,   3.156,   2.990,   2.860,   2.755,   2.668,},

   { 7.119,   5.013,   4.159,   3.681,   3.370,   3.149,   2.983,   2.853,   2.748,   2.662,},

   { 7.110,   5.006,   4.152,   3.674,   3.363,   3.143,   2.977,   2.847,   2.742,   2.655,},

   { 7.102,   4.998,   4.145,   3.667,   3.357,   3.136,   2.971,   2.841,   2.736,   2.649,},

   { 7.093,   4.991,   4.138,   3.661,   3.351,   3.130,   2.965,   2.835,   2.730,   2.643,},

   { 7.085,   4.984,   4.132,   3.655,   3.345,   3.124,   2.959,   2.829,   2.724,   2.637,},

   { 7.077,   4.977,   4.126,   3.649,   3.339,   3.119,   2.953,   2.823,   2.718,   2.632,},

   { 7.070,   4.971,   4.120,   3.643,   3.333,   3.113,   2.948,   2.818,   2.713,   2.626,},

   { 7.062,   4.965,   4.114,   3.638,   3.328,   3.108,   2.942,   2.813,   2.708,   2.621,},

   { 7.055,   4.959,   4.109,   3.632,   3.323,   3.103,   2.937,   2.808,   2.703,   2.616,},

   { 7.048,   4.953,   4.103,   3.627,   3.318,   3.098,   2.932,   2.803,   2.698,   2.611,},

   { 7.042,   4.947,   4.098,   3.622,   3.313,   3.093,   2.928,   2.798,   2.693,   2.607,},

   { 7.035,   4.942,   4.093,   3.618,   3.308,   3.088,   2.923,   2.793,   2.689,   2.602,},

   { 7.029,   4.937,   4.088,   3.613,   3.304,   3.084,   2.919,   2.789,   2.684,   2.598,},

   { 7.023,   4.932,   4.083,   3.608,   3.299,   3.080,   2.914,   2.785,   2.680,   2.593,},

   { 7.017,   4.927,   4.079,   3.604,   3.295,   3.075,   2.910,   2.781,   2.676,   2.589,},

   { 7.011,   4.922,   4.074,   3.600,   3.291,   3.071,   2.906,   2.777,   2.672,   2.585,},

   { 7.006,   4.917,   4.070,   3.596,   3.287,   3.067,   2.902,   2.773,   2.668,   2.581,},

   { 7.001,   4.913,   4.066,   3.591,   3.283,   3.063,   2.898,   2.769,   2.664,   2.578,},

   { 6.995,   4.908,   4.062,   3.588,   3.279,   3.060,   2.895,   2.765,   2.660,   2.574,},

   { 6.990,   4.904,   4.058,   3.584,   3.275,   3.056,   2.891,   2.762,   2.657,   2.570,},

   { 6.985,   4.900,   4.054,   3.580,   3.272,   3.052,   2.887,   2.758,   2.653,   2.567,},

   { 6.981,   4.896,   4.050,   3.577,   3.268,   3.049,   2.884,   2.755,   2.650,   2.563,},

   { 6.976,   4.892,   4.047,   3.573,   3.265,   3.046,   2.881,   2.751,   2.647,   2.560,},

   { 6.971,   4.888,   4.043,   3.570,   3.261,   3.042,   2.877,   2.748,   2.644,   2.557,},

   { 6.967,   4.884,   4.040,   3.566,   3.258,   3.039,   2.874,   2.745,   2.640,   2.554,},

   { 6.963,   4.881,   4.036,   3.563,   3.255,   3.036,   2.871,   2.742,   2.637,   2.551,},

   { 6.958,   4.877,   4.033,   3.560,   3.252,   3.033,   2.868,   2.739,   2.634,   2.548,},

   { 6.954,   4.874,   4.030,   3.557,   3.249,   3.030,   2.865,   2.736,   2.632,   2.545,},

   { 6.950,   4.870,   4.027,   3.554,   3.246,   3.027,   2.863,   2.733,   2.629,   2.542,},

   { 6.947,   4.867,   4.024,   3.551,   3.243,   3.025,   2.860,   2.731,   2.626,   2.539,},

   { 6.943,   4.864,   4.021,   3.548,   3.240,   3.022,   2.857,   2.728,   2.623,   2.537,},

   { 6.939,   4.861,   4.018,   3.545,   3.238,   3.019,   2.854,   2.725,   2.621,   2.534,},

   { 6.935,   4.858,   4.015,   3.543,   3.235,   3.017,   2.852,   2.723,   2.618,   2.532,},

   { 6.932,   4.855,   4.012,   3.540,   3.233,   3.014,   2.849,   2.720,   2.616,   2.529,},

   { 6.928,   4.852,   4.010,   3.538,   3.230,   3.012,   2.847,   2.718,   2.613,   2.527,},

   { 6.925,   4.849,   4.007,   3.535,   3.228,   3.009,   2.845,   2.715,   2.611,   2.524,},

   { 6.922,   4.846,   4.004,   3.533,   3.225,   3.007,   2.842,   2.713,   2.609,   2.522,},

   { 6.919,   4.844,   4.002,   3.530,   3.223,   3.004,   2.840,   2.711,   2.606,   2.520,},

   { 6.915,   4.841,   3.999,   3.528,   3.221,   3.002,   2.838,   2.709,   2.604,   2.518,},

   { 6.912,   4.838,   3.997,   3.525,   3.218,   3.000,   2.835,   2.706,   2.602,   2.515,},

   { 6.909,   4.836,   3.995,   3.523,   3.216,   2.998,   2.833,   2.704,   2.600,   2.513,},

   { 6.906,   4.833,   3.992,   3.521,   3.214,   2.996,   2.831,   2.702,   2.598,   2.511,},

   { 6.904,   4.831,   3.990,   3.519,   3.212,   2.994,   2.829,   2.700,   2.596,   2.509,},

   { 6.901,   4.829,   3.988,   3.517,   3.210,   2.992,   2.827,   2.698,   2.594,   2.507,},

   { 6.898,   4.826,   3.986,   3.515,   3.208,   2.990,   2.825,   2.696,   2.592,   2.505,},

   { 6.895,   4.824,   3.984,   3.513,   3.206,   2.988,   2.823,   2.694,   2.590,   2.503}

 };


 #define MINVARIANCE     0.0004


 #define MINSAMPLESPERBUCKET 5

 #define MINSAMPLES    (MINBUCKETS * MINSAMPLESPERBUCKET)

 #define MINSAMPLESNEEDED  1


 #define BUCKETTABLESIZE   1024

 #define NORMALEXTENT    3.0


 struct TEMPCLUSTER {

   CLUSTER *Cluster;

   CLUSTER *Neighbor;

 };


 typedef tesseract::KDPairInc<float, TEMPCLUSTER*> ClusterPair;

 typedef tesseract::GenericHeap<ClusterPair> ClusterHeap;


 struct STATISTICS {

   FLOAT32 AvgVariance;

   FLOAT32 *CoVariance;

   FLOAT32 *Min;                  // largest negative distance from the mean

   FLOAT32 *Max;                  // largest positive distance from the mean

 };


 struct BUCKETS {

   DISTRIBUTION Distribution;     // distribution being tested for

   uinT32 SampleCount;            // # of samples in histogram

   FLOAT64 Confidence;            // confidence level of test

   FLOAT64 ChiSquared;            // test threshold

   uinT16 NumberOfBuckets;        // number of cells in histogram

   uinT16 Bucket[BUCKETTABLESIZE];// mapping to histogram buckets

   uinT32 *Count;                 // frequency of occurence histogram

   FLOAT32 *ExpectedCount;        // expected histogram

 };


 struct CHISTRUCT{

   uinT16 DegreesOfFreedom;

   FLOAT64 Alpha;

   FLOAT64 ChiSquared;

 };


 // For use with KDWalk / MakePotentialClusters

 struct ClusteringContext {

   ClusterHeap *heap;  // heap used to hold temp clusters, "best" on top

   TEMPCLUSTER *candidates;  // array of potential clusters

   KDTREE *tree;  // kd-tree to be searched for neighbors

   inT32 next;  // next candidate to be used

 };


 typedef FLOAT64 (*DENSITYFUNC) (inT32);

 typedef FLOAT64 (*SOLVEFUNC) (CHISTRUCT *, double);


 #define Odd(N) ((N)%2)

 #define Mirror(N,R) ((R) - (N) - 1)

 #define Abs(N) ( ( (N) < 0 ) ? ( -(N) ) : (N) )


 //--------------Global Data Definitions and Declarations----------------------

 #define SqrtOf2Pi     2.506628275

 static const FLOAT64 kNormalStdDev = BUCKETTABLESIZE / (2.0 * NORMALEXTENT);

 static const FLOAT64 kNormalVariance =

     (BUCKETTABLESIZE * BUCKETTABLESIZE) / (4.0 * NORMALEXTENT * NORMALEXTENT);

 static const FLOAT64 kNormalMagnitude =

     (2.0 * NORMALEXTENT) / (SqrtOf2Pi * BUCKETTABLESIZE);

 static const FLOAT64 kNormalMean = BUCKETTABLESIZE / 2;


 #define LOOKUPTABLESIZE   8

 #define MAXDEGREESOFFREEDOM MAXBUCKETS


 static const uinT32 kCountTable[LOOKUPTABLESIZE] = {

   MINSAMPLES, 200, 400, 600, 800, 1000, 1500, 2000

 };  // number of samples


 static const uinT16 kBucketsTable[LOOKUPTABLESIZE] = {

   MINBUCKETS, 16, 20, 24, 27, 30, 35, MAXBUCKETS

 };  // number of buckets


 /*-------------------------------------------------------------------------

           Private Function Prototypes

 --------------------------------------------------------------------------*/

 void CreateClusterTree(CLUSTERER *Clusterer);


 void MakePotentialClusters(ClusteringContext *context, CLUSTER *Cluster,

                            inT32 Level);


 CLUSTER *FindNearestNeighbor(KDTREE *Tree,

                              CLUSTER *Cluster,

                              FLOAT32 *Distance);


 CLUSTER *MakeNewCluster(CLUSTERER *Clusterer, TEMPCLUSTER *TempCluster);


 inT32 MergeClusters (inT16 N,

 register PARAM_DESC ParamDesc[],

 register inT32 n1,

 register inT32 n2,

 register FLOAT32 m[],

 register FLOAT32 m1[], register FLOAT32 m2[]);


 void ComputePrototypes(CLUSTERER *Clusterer, CLUSTERCONFIG *Config);


 PROTOTYPE *MakePrototype(CLUSTERER *Clusterer,

                          CLUSTERCONFIG *Config,

                          CLUSTER *Cluster);


 PROTOTYPE *MakeDegenerateProto(uinT16 N,

                                CLUSTER *Cluster,

                                STATISTICS *Statistics,

                                PROTOSTYLE Style,

                                inT32 MinSamples);


 PROTOTYPE *TestEllipticalProto(CLUSTERER *Clusterer,

                                CLUSTERCONFIG *Config,

                                CLUSTER *Cluster,

                                STATISTICS *Statistics);


 PROTOTYPE *MakeSphericalProto(CLUSTERER *Clusterer,

                               CLUSTER *Cluster,

                               STATISTICS *Statistics,

                               BUCKETS *Buckets);


 PROTOTYPE *MakeEllipticalProto(CLUSTERER *Clusterer,

                                CLUSTER *Cluster,

                                STATISTICS *Statistics,

                                BUCKETS *Buckets);


 PROTOTYPE *MakeMixedProto(CLUSTERER *Clusterer,

                           CLUSTER *Cluster,

                           STATISTICS *Statistics,

                           BUCKETS *NormalBuckets,

                           FLOAT64 Confidence);


 void MakeDimRandom(uinT16 i, PROTOTYPE *Proto, PARAM_DESC *ParamDesc);


 void MakeDimUniform(uinT16 i, PROTOTYPE *Proto, STATISTICS *Statistics);


 STATISTICS *ComputeStatistics (inT16 N,

 PARAM_DESC ParamDesc[], CLUSTER * Cluster);


 PROTOTYPE *NewSphericalProto(uinT16 N,

                              CLUSTER *Cluster,

                              STATISTICS *Statistics);


 PROTOTYPE *NewEllipticalProto(inT16 N,

                               CLUSTER *Cluster,

                               STATISTICS *Statistics);


 PROTOTYPE *NewMixedProto(inT16 N, CLUSTER *Cluster, STATISTICS *Statistics);


 PROTOTYPE *NewSimpleProto(inT16 N, CLUSTER *Cluster);


 BOOL8 Independent (PARAM_DESC ParamDesc[],

 inT16 N, FLOAT32 * CoVariance, FLOAT32 Independence);


 BUCKETS *GetBuckets(CLUSTERER* clusterer,

                     DISTRIBUTION Distribution,

                     uinT32 SampleCount,

                     FLOAT64 Confidence);


 BUCKETS *MakeBuckets(DISTRIBUTION Distribution,

                      uinT32 SampleCount,

                      FLOAT64 Confidence);


 uinT16 OptimumNumberOfBuckets(uinT32 SampleCount);


 FLOAT64 ComputeChiSquared(uinT16 DegreesOfFreedom, FLOAT64 Alpha);


 FLOAT64 NormalDensity(inT32 x);


 FLOAT64 UniformDensity(inT32 x);


 FLOAT64 Integral(FLOAT64 f1, FLOAT64 f2, FLOAT64 Dx);


 void FillBuckets(BUCKETS *Buckets,

                  CLUSTER *Cluster,

                  uinT16 Dim,

                  PARAM_DESC *ParamDesc,

                  FLOAT32 Mean,

                  FLOAT32 StdDev);


 uinT16 NormalBucket(PARAM_DESC *ParamDesc,

                     FLOAT32 x,

                     FLOAT32 Mean,

                     FLOAT32 StdDev);


 uinT16 UniformBucket(PARAM_DESC *ParamDesc,

                      FLOAT32 x,

                      FLOAT32 Mean,

                      FLOAT32 StdDev);


 BOOL8 DistributionOK(BUCKETS *Buckets);


 void FreeStatistics(STATISTICS *Statistics);


 void FreeBuckets(BUCKETS *Buckets);


 void FreeCluster(CLUSTER *Cluster);


 uinT16 DegreesOfFreedom(DISTRIBUTION Distribution, uinT16 HistogramBuckets);


 int NumBucketsMatch(void *arg1,   // BUCKETS *Histogram,

                     void *arg2);  // uinT16 *DesiredNumberOfBuckets);


 int ListEntryMatch(void *arg1, void *arg2);


 void AdjustBuckets(BUCKETS *Buckets, uinT32 NewSampleCount);


 void InitBuckets(BUCKETS *Buckets);


 int AlphaMatch(void *arg1,   // CHISTRUCT *ChiStruct,

                void *arg2);  // CHISTRUCT *SearchKey);


 CHISTRUCT *NewChiStruct(uinT16 DegreesOfFreedom, FLOAT64 Alpha);


 FLOAT64 Solve(SOLVEFUNC Function,

               void *FunctionParams,

               FLOAT64 InitialGuess,

               FLOAT64 Accuracy);


 FLOAT64 ChiArea(CHISTRUCT *ChiParams, FLOAT64 x);


 BOOL8 MultipleCharSamples(CLUSTERER *Clusterer,

                           CLUSTER *Cluster,

                           FLOAT32 MaxIllegal);


 double InvertMatrix(const float* input, int size, float* inv);


 //--------------------------Public Code--------------------------------------

 CLUSTERER *

 MakeClusterer (inT16 SampleSize, const PARAM_DESC ParamDesc[]) {

   CLUSTERER *Clusterer;

   int i;


   // allocate main clusterer data structure and init simple fields

   Clusterer = (CLUSTERER *) Emalloc (sizeof (CLUSTERER));

   Clusterer->SampleSize = SampleSize;

   Clusterer->NumberOfSamples = 0;

   Clusterer->NumChar = 0;


   // init fields which will not be used initially

   Clusterer->Root = NULL;

   Clusterer->ProtoList = NIL_LIST;


   // maintain a copy of param descriptors in the clusterer data structure

   Clusterer->ParamDesc =

     (PARAM_DESC *) Emalloc (SampleSize * sizeof (PARAM_DESC));

   for (i = 0; i < SampleSize; i++) {

     Clusterer->ParamDesc[i].Circular = ParamDesc[i].Circular;

     Clusterer->ParamDesc[i].NonEssential = ParamDesc[i].NonEssential;

     Clusterer->ParamDesc[i].Min = ParamDesc[i].Min;

     Clusterer->ParamDesc[i].Max = ParamDesc[i].Max;

     Clusterer->ParamDesc[i].Range = ParamDesc[i].Max - ParamDesc[i].Min;

     Clusterer->ParamDesc[i].HalfRange = Clusterer->ParamDesc[i].Range / 2;

     Clusterer->ParamDesc[i].MidRange =

       (ParamDesc[i].Max + ParamDesc[i].Min) / 2;

   }


   // allocate a kd tree to hold the samples

   Clusterer->KDTree = MakeKDTree (SampleSize, ParamDesc);


   // Initialize cache of histogram buckets to minimize recomputing them.

   for (int d = 0; d < DISTRIBUTION_COUNT; ++d) {

     for (int c = 0; c < MAXBUCKETS + 1 - MINBUCKETS; ++c)

       Clusterer->bucket_cache[d][c] = NULL;

   }


   return Clusterer;

 }                                // MakeClusterer


 SAMPLE* MakeSample(CLUSTERER * Clusterer, const FLOAT32* Feature,

                    inT32 CharID) {

   SAMPLE *Sample;

   int i;


   // see if the samples have already been clustered - if so trap an error

   if (Clusterer->Root != NULL)

     DoError (ALREADYCLUSTERED,

       "Can't add samples after they have been clustered");


   // allocate the new sample and initialize it

   Sample = (SAMPLE *) Emalloc (sizeof (SAMPLE) +

     (Clusterer->SampleSize -

     1) * sizeof (FLOAT32));

   Sample->Clustered = FALSE;

   Sample->Prototype = FALSE;

   Sample->SampleCount = 1;

   Sample->Left = NULL;

   Sample->Right = NULL;

   Sample->CharID = CharID;


   for (i = 0; i < Clusterer->SampleSize; i++)

     Sample->Mean[i] = Feature[i];


   // add the sample to the KD tree - keep track of the total # of samples

   Clusterer->NumberOfSamples++;

   KDStore (Clusterer->KDTree, Sample->Mean, (char *) Sample);

   if (CharID >= Clusterer->NumChar)

     Clusterer->NumChar = CharID + 1;


   // execute hook for monitoring clustering operation

   // (*SampleCreationHook)( Sample );


   return (Sample);

 }                                // MakeSample


 LIST ClusterSamples(CLUSTERER *Clusterer, CLUSTERCONFIG *Config) {

   //only create cluster tree if samples have never been clustered before

   if (Clusterer->Root == NULL)

     CreateClusterTree(Clusterer);


   //deallocate the old prototype list if one exists

   FreeProtoList (&Clusterer->ProtoList);

   Clusterer->ProtoList = NIL_LIST;


   //compute prototypes starting at the root node in the tree

   ComputePrototypes(Clusterer, Config);

   return (Clusterer->ProtoList);

 }                                // ClusterSamples


 void FreeClusterer(CLUSTERER *Clusterer) {

   if (Clusterer != NULL) {

     memfree (Clusterer->ParamDesc);

     if (Clusterer->KDTree != NULL)

       FreeKDTree (Clusterer->KDTree);

     if (Clusterer->Root != NULL)

       FreeCluster (Clusterer->Root);

     // Free up all used buckets structures.

     for (int d = 0; d < DISTRIBUTION_COUNT; ++d) {

       for (int c = 0; c < MAXBUCKETS + 1 - MINBUCKETS; ++c)

         if (Clusterer->bucket_cache[d][c] != NULL)

           FreeBuckets(Clusterer->bucket_cache[d][c]);

     }


     memfree(Clusterer);

   }

 }                                // FreeClusterer


 void FreeProtoList(LIST *ProtoList) {

   destroy_nodes(*ProtoList, FreePrototype);

 }                                // FreeProtoList


 void FreePrototype(void *arg) {  //PROTOTYPE     *Prototype)

   PROTOTYPE *Prototype = (PROTOTYPE *) arg;


   // unmark the corresponding cluster (if there is one

   if (Prototype->Cluster != NULL)

     Prototype->Cluster->Prototype = FALSE;


   // deallocate the prototype statistics and then the prototype itself

   if (Prototype->Distrib != NULL)

     memfree (Prototype->Distrib);

   if (Prototype->Mean != NULL)

     memfree (Prototype->Mean);

   if (Prototype->Style != spherical) {

     if (Prototype->Variance.Elliptical != NULL)

       memfree (Prototype->Variance.Elliptical);

     if (Prototype->Magnitude.Elliptical != NULL)

       memfree (Prototype->Magnitude.Elliptical);

     if (Prototype->Weight.Elliptical != NULL)

       memfree (Prototype->Weight.Elliptical);

   }

   memfree(Prototype);

 }                                // FreePrototype


 CLUSTER *NextSample(LIST *SearchState) {

   CLUSTER *Cluster;


   if (*SearchState == NIL_LIST)

     return (NULL);

   Cluster = (CLUSTER *) first_node (*SearchState);

   *SearchState = pop (*SearchState);

   while (TRUE) {

     if (Cluster->Left == NULL)

       return (Cluster);

     *SearchState = push (*SearchState, Cluster->Right);

     Cluster = Cluster->Left;

   }

 }                                // NextSample


 FLOAT32 Mean(PROTOTYPE *Proto, uinT16 Dimension) {

   return (Proto->Mean[Dimension]);

 }                                // Mean


 FLOAT32 StandardDeviation(PROTOTYPE *Proto, uinT16 Dimension) {

   switch (Proto->Style) {

     case spherical:

       return ((FLOAT32) sqrt ((double) Proto->Variance.Spherical));

     case elliptical:

       return ((FLOAT32)

         sqrt ((double) Proto->Variance.Elliptical[Dimension]));

     case mixed:

       switch (Proto->Distrib[Dimension]) {

         case normal:

           return ((FLOAT32)

             sqrt ((double) Proto->Variance.Elliptical[Dimension]));

         case uniform:

         case D_random:

           return (Proto->Variance.Elliptical[Dimension]);

         case DISTRIBUTION_COUNT:

           ASSERT_HOST(!"Distribution count not allowed!");

       }

   }

   return 0.0f;

 }                                // StandardDeviation


 /*---------------------------------------------------------------------------

             Private Code

 ----------------------------------------------------------------------------*/

 void CreateClusterTree(CLUSTERER *Clusterer) {

   ClusteringContext context;

   ClusterPair HeapEntry;

   TEMPCLUSTER *PotentialCluster;


   // each sample and its nearest neighbor form a "potential" cluster

   // save these in a heap with the "best" potential clusters on top

   context.tree = Clusterer->KDTree;

   context.candidates = (TEMPCLUSTER *)

     Emalloc(Clusterer->NumberOfSamples * sizeof(TEMPCLUSTER));

   context.next = 0;

   context.heap = new ClusterHeap(Clusterer->NumberOfSamples);

   KDWalk(context.tree, (void_proc)MakePotentialClusters, &context);


   // form potential clusters into actual clusters - always do "best" first

   while (context.heap->Pop(&HeapEntry)) {

     PotentialCluster = HeapEntry.data;


     // if main cluster of potential cluster is already in another cluster

     // then we don't need to worry about it

     if (PotentialCluster->Cluster->Clustered) {

       continue;

     }


     // if main cluster is not yet clustered, but its nearest neighbor is

     // then we must find a new nearest neighbor

     else if (PotentialCluster->Neighbor->Clustered) {

       PotentialCluster->Neighbor =

         FindNearestNeighbor(context.tree, PotentialCluster->Cluster,

                             &HeapEntry.key);

       if (PotentialCluster->Neighbor != NULL) {

         context.heap->Push(&HeapEntry);

       }

     }


     // if neither cluster is already clustered, form permanent cluster

     else {

       PotentialCluster->Cluster =

           MakeNewCluster(Clusterer, PotentialCluster);

       PotentialCluster->Neighbor =

           FindNearestNeighbor(context.tree, PotentialCluster->Cluster,

                               &HeapEntry.key);

       if (PotentialCluster->Neighbor != NULL) {

         context.heap->Push(&HeapEntry);

       }

     }

   }


   // the root node in the cluster tree is now the only node in the kd-tree

   Clusterer->Root = (CLUSTER *) RootOf(Clusterer->KDTree);


   // free up the memory used by the K-D tree, heap, and temp clusters

   FreeKDTree(context.tree);

   Clusterer->KDTree = NULL;

   delete context.heap;

   memfree(context.candidates);

 }                                // CreateClusterTree


 void MakePotentialClusters(ClusteringContext *context,

                            CLUSTER *Cluster, inT32 Level) {

   ClusterPair HeapEntry;

   int next = context->next;

   context->candidates[next].Cluster = Cluster;

   HeapEntry.data = &(context->candidates[next]);

   context->candidates[next].Neighbor =

       FindNearestNeighbor(context->tree,

                           context->candidates[next].Cluster,

                           &HeapEntry.key);

   if (context->candidates[next].Neighbor != NULL) {

     context->heap->Push(&HeapEntry);

     context->next++;

   }

 }                                // MakePotentialClusters


 CLUSTER *

 FindNearestNeighbor(KDTREE * Tree, CLUSTER * Cluster, FLOAT32 * Distance)

 #define MAXNEIGHBORS  2

 #define MAXDISTANCE   MAX_FLOAT32

 {

   CLUSTER *Neighbor[MAXNEIGHBORS];

   FLOAT32 Dist[MAXNEIGHBORS];

   int NumberOfNeighbors;

   inT32 i;

   CLUSTER *BestNeighbor;


   // find the 2 nearest neighbors of the cluster

   KDNearestNeighborSearch(Tree, Cluster->Mean, MAXNEIGHBORS, MAXDISTANCE,

                           &NumberOfNeighbors, (void **)Neighbor, Dist);


   // search for the nearest neighbor that is not the cluster itself

   *Distance = MAXDISTANCE;

   BestNeighbor = NULL;

   for (i = 0; i < NumberOfNeighbors; i++) {

     if ((Dist[i] < *Distance) && (Neighbor[i] != Cluster)) {

       *Distance = Dist[i];

       BestNeighbor = Neighbor[i];

     }

   }

   return BestNeighbor;

 }                                // FindNearestNeighbor


 CLUSTER *MakeNewCluster(CLUSTERER *Clusterer, TEMPCLUSTER *TempCluster) {

   CLUSTER *Cluster;


   // allocate the new cluster and initialize it

   Cluster = (CLUSTER *) Emalloc(

       sizeof(CLUSTER) + (Clusterer->SampleSize - 1) * sizeof(FLOAT32));

   Cluster->Clustered = FALSE;

   Cluster->Prototype = FALSE;

   Cluster->Left = TempCluster->Cluster;

   Cluster->Right = TempCluster->Neighbor;

   Cluster->CharID = -1;


   // mark the old clusters as "clustered" and delete them from the kd-tree

   Cluster->Left->Clustered = TRUE;

   Cluster->Right->Clustered = TRUE;

   KDDelete(Clusterer->KDTree, Cluster->Left->Mean, Cluster->Left);

   KDDelete(Clusterer->KDTree, Cluster->Right->Mean, Cluster->Right);


   // compute the mean and sample count for the new cluster

   Cluster->SampleCount =

       MergeClusters(Clusterer->SampleSize, Clusterer->ParamDesc,

                     Cluster->Left->SampleCount, Cluster->Right->SampleCount,

                     Cluster->Mean, Cluster->Left->Mean, Cluster->Right->Mean);


   // add the new cluster to the KD tree

   KDStore(Clusterer->KDTree, Cluster->Mean, Cluster);

   return Cluster;

 }                                // MakeNewCluster


 inT32 MergeClusters(inT16 N,

                     PARAM_DESC ParamDesc[],

                     inT32 n1,

                     inT32 n2,

                     FLOAT32 m[],

                     FLOAT32 m1[], FLOAT32 m2[]) {

   inT32 i, n;


   n = n1 + n2;

   for (i = N; i > 0; i--, ParamDesc++, m++, m1++, m2++) {

     if (ParamDesc->Circular) {

       // if distance between means is greater than allowed

       // reduce upper point by one "rotation" to compute mean

       // then normalize the mean back into the accepted range

       if ((*m2 - *m1) > ParamDesc->HalfRange) {

         *m = (n1 * *m1 + n2 * (*m2 - ParamDesc->Range)) / n;

         if (*m < ParamDesc->Min)

           *m += ParamDesc->Range;

       }

       else if ((*m1 - *m2) > ParamDesc->HalfRange) {

         *m = (n1 * (*m1 - ParamDesc->Range) + n2 * *m2) / n;

         if (*m < ParamDesc->Min)

           *m += ParamDesc->Range;

       }

       else

         *m = (n1 * *m1 + n2 * *m2) / n;

     }

     else

       *m = (n1 * *m1 + n2 * *m2) / n;

   }

   return n;

 }                                // MergeClusters


 void ComputePrototypes(CLUSTERER *Clusterer, CLUSTERCONFIG *Config) {

   LIST ClusterStack = NIL_LIST;

   CLUSTER *Cluster;

   PROTOTYPE *Prototype;


   // use a stack to keep track of clusters waiting to be processed

   // initially the only cluster on the stack is the root cluster

   if (Clusterer->Root != NULL)

     ClusterStack = push (NIL_LIST, Clusterer->Root);


   // loop until we have analyzed all clusters which are potential prototypes

   while (ClusterStack != NIL_LIST) {

     // remove the next cluster to be analyzed from the stack

     // try to make a prototype from the cluster

     // if successful, put it on the proto list, else split the cluster

     Cluster = (CLUSTER *) first_node (ClusterStack);

     ClusterStack = pop (ClusterStack);

     Prototype = MakePrototype(Clusterer, Config, Cluster);

     if (Prototype != NULL) {

       Clusterer->ProtoList = push (Clusterer->ProtoList, Prototype);

     }

     else {

       ClusterStack = push (ClusterStack, Cluster->Right);

       ClusterStack = push (ClusterStack, Cluster->Left);

     }

   }

 }                                // ComputePrototypes


 PROTOTYPE *MakePrototype(CLUSTERER *Clusterer,

                          CLUSTERCONFIG *Config,

                          CLUSTER *Cluster) {

   STATISTICS *Statistics;

   PROTOTYPE *Proto;

   BUCKETS *Buckets;


   // filter out clusters which contain samples from the same character

   if (MultipleCharSamples (Clusterer, Cluster, Config->MaxIllegal))

     return NULL;


   // compute the covariance matrix and ranges for the cluster

   Statistics =

       ComputeStatistics(Clusterer->SampleSize, Clusterer->ParamDesc, Cluster);


   // check for degenerate clusters which need not be analyzed further

   // note that the MinSamples test assumes that all clusters with multiple

   // character samples have been removed (as above)

   Proto = MakeDegenerateProto(

       Clusterer->SampleSize, Cluster, Statistics, Config->ProtoStyle,

       (inT32) (Config->MinSamples * Clusterer->NumChar));

   if (Proto != NULL) {

     FreeStatistics(Statistics);

     return Proto;

   }

   // check to ensure that all dimensions are independent

   if (!Independent(Clusterer->ParamDesc, Clusterer->SampleSize,

                    Statistics->CoVariance, Config->Independence)) {

     FreeStatistics(Statistics);

     return NULL;

   }


   if (HOTELLING && Config->ProtoStyle == elliptical) {

     Proto = TestEllipticalProto(Clusterer, Config, Cluster, Statistics);

     if (Proto != NULL) {

       FreeStatistics(Statistics);

       return Proto;

     }

   }


   // create a histogram data structure used to evaluate distributions

   Buckets = GetBuckets(Clusterer, normal, Cluster->SampleCount,

                        Config->Confidence);


   // create a prototype based on the statistics and test it

   switch (Config->ProtoStyle) {

     case spherical:

       Proto = MakeSphericalProto(Clusterer, Cluster, Statistics, Buckets);

       break;

     case elliptical:

       Proto = MakeEllipticalProto(Clusterer, Cluster, Statistics, Buckets);

       break;

     case mixed:

       Proto = MakeMixedProto(Clusterer, Cluster, Statistics, Buckets,

                              Config->Confidence);

       break;

     case automatic:

       Proto = MakeSphericalProto(Clusterer, Cluster, Statistics, Buckets);

       if (Proto != NULL)

         break;

       Proto = MakeEllipticalProto(Clusterer, Cluster, Statistics, Buckets);

       if (Proto != NULL)

         break;

       Proto = MakeMixedProto(Clusterer, Cluster, Statistics, Buckets,

                              Config->Confidence);

       break;

   }

   FreeStatistics(Statistics);

   return Proto;

 }                                // MakePrototype


 PROTOTYPE *MakeDegenerateProto(  //this was MinSample

                                uinT16 N,

                                CLUSTER *Cluster,

                                STATISTICS *Statistics,

                                PROTOSTYLE Style,

                                inT32 MinSamples) {

   PROTOTYPE *Proto = NULL;


   if (MinSamples < MINSAMPLESNEEDED)

     MinSamples = MINSAMPLESNEEDED;


   if (Cluster->SampleCount < MinSamples) {

     switch (Style) {

       case spherical:

         Proto = NewSphericalProto (N, Cluster, Statistics);

         break;

       case elliptical:

       case automatic:

         Proto = NewEllipticalProto (N, Cluster, Statistics);

         break;

       case mixed:

         Proto = NewMixedProto (N, Cluster, Statistics);

         break;

     }

     Proto->Significant = FALSE;

   }

   return (Proto);

 }                                // MakeDegenerateProto


 PROTOTYPE *TestEllipticalProto(CLUSTERER *Clusterer,

                                CLUSTERCONFIG *Config,

                                CLUSTER *Cluster,

                                STATISTICS *Statistics) {

   // Fraction of the number of samples used as a range around 1 within

   // which a cluster has the magic size that allows a boost to the

   // FTable by kFTableBoostMargin, thus allowing clusters near the

   // magic size (equal to the number of sample characters) to be more

   // likely to stay together.

   const double kMagicSampleMargin = 0.0625;

   const double kFTableBoostMargin = 2.0;


   int N = Clusterer->SampleSize;

   CLUSTER* Left = Cluster->Left;

   CLUSTER* Right = Cluster->Right;

   if (Left == NULL || Right == NULL)

     return NULL;

   int TotalDims = Left->SampleCount + Right->SampleCount;

   if (TotalDims < N + 1 || TotalDims < 2)

     return NULL;

   const int kMatrixSize = N * N * sizeof(FLOAT32);

   FLOAT32* Covariance = reinterpret_cast<FLOAT32 *>(Emalloc(kMatrixSize));

   FLOAT32* Inverse = reinterpret_cast<FLOAT32 *>(Emalloc(kMatrixSize));

   FLOAT32* Delta = reinterpret_cast<FLOAT32*>(Emalloc(N * sizeof(FLOAT32)));

   // Compute a new covariance matrix that only uses essential features.

   for (int i = 0; i < N; ++i) {

     int row_offset = i * N;

     if (!Clusterer->ParamDesc[i].NonEssential) {

       for (int j = 0; j < N; ++j) {

         if (!Clusterer->ParamDesc[j].NonEssential)

           Covariance[j + row_offset] = Statistics->CoVariance[j + row_offset];

         else

           Covariance[j + row_offset] = 0.0f;

       }

     } else {

       for (int j = 0; j < N; ++j) {

         if (i == j)

           Covariance[j + row_offset] = 1.0f;

         else

           Covariance[j + row_offset] = 0.0f;

       }

     }

   }

   double err = InvertMatrix(Covariance, N, Inverse);

   if (err > 1) {

     tprintf("Clustering error: Matrix inverse failed with error %g\n", err);

   }

   int EssentialN = 0;

   for (int dim = 0; dim < N; ++dim) {

     if (!Clusterer->ParamDesc[dim].NonEssential) {

       Delta[dim] = Left->Mean[dim] - Right->Mean[dim];

       ++EssentialN;

     } else {

       Delta[dim] = 0.0f;

     }

   }

   // Compute Hotelling's T-squared.

   double Tsq = 0.0;

   for (int x = 0; x < N; ++x) {

     double temp = 0.0;

     for (int y = 0; y < N; ++y) {

       temp += Inverse[y + N*x] * Delta[y];

     }

     Tsq += Delta[x] * temp;

   }

   memfree(Covariance);

   memfree(Inverse);

   memfree(Delta);

   // Changed this function to match the formula in

   // Statistical Methods in Medical Research p 473

   // By Peter Armitage, Geoffrey Berry, J. N. S. Matthews.

   // Tsq *= Left->SampleCount * Right->SampleCount / TotalDims;

   double F = Tsq * (TotalDims - EssentialN - 1) / ((TotalDims - 2)*EssentialN);

   int Fx = EssentialN;

   if (Fx > FTABLE_X)

     Fx = FTABLE_X;

   --Fx;

   int Fy = TotalDims - EssentialN - 1;

   if (Fy > FTABLE_Y)

     Fy = FTABLE_Y;

   --Fy;

   double FTarget = FTable[Fy][Fx];

   if (Config->MagicSamples > 0 &&

       TotalDims >= Config->MagicSamples * (1.0 - kMagicSampleMargin) &&

       TotalDims <= Config->MagicSamples * (1.0 + kMagicSampleMargin)) {

     // Give magic-sized clusters a magic FTable boost.

     FTarget += kFTableBoostMargin;

   }

   if (F < FTarget) {

     return NewEllipticalProto (Clusterer->SampleSize, Cluster, Statistics);

   }

   return NULL;

 }


 PROTOTYPE *MakeSphericalProto(CLUSTERER *Clusterer,

                               CLUSTER *Cluster,

                               STATISTICS *Statistics,

                               BUCKETS *Buckets) {

   PROTOTYPE *Proto = NULL;

   int i;


   // check that each dimension is a normal distribution

   for (i = 0; i < Clusterer->SampleSize; i++) {

     if (Clusterer->ParamDesc[i].NonEssential)

       continue;


     FillBuckets (Buckets, Cluster, i, &(Clusterer->ParamDesc[i]),

       Cluster->Mean[i],

       sqrt ((FLOAT64) (Statistics->AvgVariance)));

     if (!DistributionOK (Buckets))

       break;

   }

   // if all dimensions matched a normal distribution, make a proto

   if (i >= Clusterer->SampleSize)

     Proto = NewSphericalProto (Clusterer->SampleSize, Cluster, Statistics);

   return (Proto);

 }                                // MakeSphericalProto


 PROTOTYPE *MakeEllipticalProto(CLUSTERER *Clusterer,

                                CLUSTER *Cluster,

                                STATISTICS *Statistics,

                                BUCKETS *Buckets) {

   PROTOTYPE *Proto = NULL;

   int i;


   // check that each dimension is a normal distribution

   for (i = 0; i < Clusterer->SampleSize; i++) {

     if (Clusterer->ParamDesc[i].NonEssential)

       continue;


     FillBuckets (Buckets, Cluster, i, &(Clusterer->ParamDesc[i]),

       Cluster->Mean[i],

       sqrt ((FLOAT64) Statistics->

       CoVariance[i * (Clusterer->SampleSize + 1)]));

     if (!DistributionOK (Buckets))

       break;

   }

   // if all dimensions matched a normal distribution, make a proto

   if (i >= Clusterer->SampleSize)

     Proto = NewEllipticalProto (Clusterer->SampleSize, Cluster, Statistics);

   return (Proto);

 }                                // MakeEllipticalProto


 PROTOTYPE *MakeMixedProto(CLUSTERER *Clusterer,

                           CLUSTER *Cluster,

                           STATISTICS *Statistics,

                           BUCKETS *NormalBuckets,

                           FLOAT64 Confidence) {

   PROTOTYPE *Proto;

   int i;

   BUCKETS *UniformBuckets = NULL;

   BUCKETS *RandomBuckets = NULL;


   // create a mixed proto to work on - initially assume all dimensions normal*/

   Proto = NewMixedProto (Clusterer->SampleSize, Cluster, Statistics);


   // find the proper distribution for each dimension

   for (i = 0; i < Clusterer->SampleSize; i++) {

     if (Clusterer->ParamDesc[i].NonEssential)

       continue;


     FillBuckets (NormalBuckets, Cluster, i, &(Clusterer->ParamDesc[i]),

       Proto->Mean[i],

       sqrt ((FLOAT64) Proto->Variance.Elliptical[i]));

     if (DistributionOK (NormalBuckets))

       continue;


     if (RandomBuckets == NULL)

       RandomBuckets =

         GetBuckets(Clusterer, D_random, Cluster->SampleCount, Confidence);

     MakeDimRandom (i, Proto, &(Clusterer->ParamDesc[i]));

     FillBuckets (RandomBuckets, Cluster, i, &(Clusterer->ParamDesc[i]),

       Proto->Mean[i], Proto->Variance.Elliptical[i]);

     if (DistributionOK (RandomBuckets))

       continue;


     if (UniformBuckets == NULL)

       UniformBuckets =

         GetBuckets(Clusterer, uniform, Cluster->SampleCount, Confidence);

     MakeDimUniform(i, Proto, Statistics);

     FillBuckets (UniformBuckets, Cluster, i, &(Clusterer->ParamDesc[i]),

       Proto->Mean[i], Proto->Variance.Elliptical[i]);

     if (DistributionOK (UniformBuckets))

       continue;

     break;

   }

   // if any dimension failed to match a distribution, discard the proto

   if (i < Clusterer->SampleSize) {

     FreePrototype(Proto);

     Proto = NULL;

   }

   return (Proto);

 }                                // MakeMixedProto


 void MakeDimRandom(uinT16 i, PROTOTYPE *Proto, PARAM_DESC *ParamDesc) {

   Proto->Distrib[i] = D_random;

   Proto->Mean[i] = ParamDesc->MidRange;

   Proto->Variance.Elliptical[i] = ParamDesc->HalfRange;


   // subtract out the previous magnitude of this dimension from the total

   Proto->TotalMagnitude /= Proto->Magnitude.Elliptical[i];

   Proto->Magnitude.Elliptical[i] = 1.0 / ParamDesc->Range;

   Proto->TotalMagnitude *= Proto->Magnitude.Elliptical[i];

   Proto->LogMagnitude = log ((double) Proto->TotalMagnitude);


   // note that the proto Weight is irrelevant for D_random protos

 }                                // MakeDimRandom


 void MakeDimUniform(uinT16 i, PROTOTYPE *Proto, STATISTICS *Statistics) {

   Proto->Distrib[i] = uniform;

   Proto->Mean[i] = Proto->Cluster->Mean[i] +

     (Statistics->Min[i] + Statistics->Max[i]) / 2;

   Proto->Variance.Elliptical[i] =

     (Statistics->Max[i] - Statistics->Min[i]) / 2;

   if (Proto->Variance.Elliptical[i] < MINVARIANCE)

     Proto->Variance.Elliptical[i] = MINVARIANCE;


   // subtract out the previous magnitude of this dimension from the total

   Proto->TotalMagnitude /= Proto->Magnitude.Elliptical[i];

   Proto->Magnitude.Elliptical[i] =

     1.0 / (2.0 * Proto->Variance.Elliptical[i]);

   Proto->TotalMagnitude *= Proto->Magnitude.Elliptical[i];

   Proto->LogMagnitude = log ((double) Proto->TotalMagnitude);


   // note that the proto Weight is irrelevant for uniform protos

 }                                // MakeDimUniform


 STATISTICS *

 ComputeStatistics (inT16 N, PARAM_DESC ParamDesc[], CLUSTER * Cluster) {

   STATISTICS *Statistics;

   int i, j;

   FLOAT32 *CoVariance;

   FLOAT32 *Distance;

   LIST SearchState;

   SAMPLE *Sample;

   uinT32 SampleCountAdjustedForBias;


   // allocate memory to hold the statistics results

   Statistics = (STATISTICS *) Emalloc (sizeof (STATISTICS));

   Statistics->CoVariance = (FLOAT32 *) Emalloc (N * N * sizeof (FLOAT32));

   Statistics->Min = (FLOAT32 *) Emalloc (N * sizeof (FLOAT32));

   Statistics->Max = (FLOAT32 *) Emalloc (N * sizeof (FLOAT32));


   // allocate temporary memory to hold the sample to mean distances

   Distance = (FLOAT32 *) Emalloc (N * sizeof (FLOAT32));


   // initialize the statistics

   Statistics->AvgVariance = 1.0;

   CoVariance = Statistics->CoVariance;

   for (i = 0; i < N; i++) {

     Statistics->Min[i] = 0.0;

     Statistics->Max[i] = 0.0;

     for (j = 0; j < N; j++, CoVariance++)

       *CoVariance = 0;

   }

   // find each sample in the cluster and merge it into the statistics

   InitSampleSearch(SearchState, Cluster);

   while ((Sample = NextSample (&SearchState)) != NULL) {

     for (i = 0; i < N; i++) {

       Distance[i] = Sample->Mean[i] - Cluster->Mean[i];

       if (ParamDesc[i].Circular) {

         if (Distance[i] > ParamDesc[i].HalfRange)

           Distance[i] -= ParamDesc[i].Range;

         if (Distance[i] < -ParamDesc[i].HalfRange)

           Distance[i] += ParamDesc[i].Range;

       }

       if (Distance[i] < Statistics->Min[i])

         Statistics->Min[i] = Distance[i];

       if (Distance[i] > Statistics->Max[i])

         Statistics->Max[i] = Distance[i];

     }

     CoVariance = Statistics->CoVariance;

     for (i = 0; i < N; i++)

       for (j = 0; j < N; j++, CoVariance++)

         *CoVariance += Distance[i] * Distance[j];

   }

   // normalize the variances by the total number of samples

   // use SampleCount-1 instead of SampleCount to get an unbiased estimate

   // also compute the geometic mean of the diagonal variances

   // ensure that clusters with only 1 sample are handled correctly

   if (Cluster->SampleCount > 1)

     SampleCountAdjustedForBias = Cluster->SampleCount - 1;

   else

     SampleCountAdjustedForBias = 1;

   CoVariance = Statistics->CoVariance;

   for (i = 0; i < N; i++)

   for (j = 0; j < N; j++, CoVariance++) {

     *CoVariance /= SampleCountAdjustedForBias;

     if (j == i) {

       if (*CoVariance < MINVARIANCE)

         *CoVariance = MINVARIANCE;

       Statistics->AvgVariance *= *CoVariance;

     }

   }

   Statistics->AvgVariance = (float)pow((double)Statistics->AvgVariance,

                                        1.0 / N);


   // release temporary memory and return

   memfree(Distance);

   return (Statistics);

 }                                // ComputeStatistics


 PROTOTYPE *NewSphericalProto(uinT16 N,

                              CLUSTER *Cluster,

                              STATISTICS *Statistics) {

   PROTOTYPE *Proto;


   Proto = NewSimpleProto (N, Cluster);


   Proto->Variance.Spherical = Statistics->AvgVariance;

   if (Proto->Variance.Spherical < MINVARIANCE)

     Proto->Variance.Spherical = MINVARIANCE;


   Proto->Magnitude.Spherical =

     1.0 / sqrt ((double) (2.0 * PI * Proto->Variance.Spherical));

   Proto->TotalMagnitude = (float)pow((double)Proto->Magnitude.Spherical,

                                      (double) N);

   Proto->Weight.Spherical = 1.0 / Proto->Variance.Spherical;

   Proto->LogMagnitude = log ((double) Proto->TotalMagnitude);


   return (Proto);

 }                                // NewSphericalProto


 PROTOTYPE *NewEllipticalProto(inT16 N,

                               CLUSTER *Cluster,

                               STATISTICS *Statistics) {

   PROTOTYPE *Proto;

   FLOAT32 *CoVariance;

   int i;


   Proto = NewSimpleProto (N, Cluster);

   Proto->Variance.Elliptical = (FLOAT32 *) Emalloc (N * sizeof (FLOAT32));

   Proto->Magnitude.Elliptical = (FLOAT32 *) Emalloc (N * sizeof (FLOAT32));

   Proto->Weight.Elliptical = (FLOAT32 *) Emalloc (N * sizeof (FLOAT32));


   CoVariance = Statistics->CoVariance;

   Proto->TotalMagnitude = 1.0;

   for (i = 0; i < N; i++, CoVariance += N + 1) {

     Proto->Variance.Elliptical[i] = *CoVariance;

     if (Proto->Variance.Elliptical[i] < MINVARIANCE)

       Proto->Variance.Elliptical[i] = MINVARIANCE;


     Proto->Magnitude.Elliptical[i] =

       1.0 / sqrt ((double) (2.0 * PI * Proto->Variance.Elliptical[i]));

     Proto->Weight.Elliptical[i] = 1.0 / Proto->Variance.Elliptical[i];

     Proto->TotalMagnitude *= Proto->Magnitude.Elliptical[i];

   }

   Proto->LogMagnitude = log ((double) Proto->TotalMagnitude);

   Proto->Style = elliptical;

   return (Proto);

 }                                // NewEllipticalProto


 PROTOTYPE *NewMixedProto(inT16 N, CLUSTER *Cluster, STATISTICS *Statistics) {

   PROTOTYPE *Proto;

   int i;


   Proto = NewEllipticalProto (N, Cluster, Statistics);

   Proto->Distrib = (DISTRIBUTION *) Emalloc (N * sizeof (DISTRIBUTION));


   for (i = 0; i < N; i++) {

     Proto->Distrib[i] = normal;

   }

   Proto->Style = mixed;

   return (Proto);

 }                                // NewMixedProto


 PROTOTYPE *NewSimpleProto(inT16 N, CLUSTER *Cluster) {

   PROTOTYPE *Proto;

   int i;


   Proto = (PROTOTYPE *) Emalloc (sizeof (PROTOTYPE));

   Proto->Mean = (FLOAT32 *) Emalloc (N * sizeof (FLOAT32));


   for (i = 0; i < N; i++)

     Proto->Mean[i] = Cluster->Mean[i];

   Proto->Distrib = NULL;


   Proto->Significant = TRUE;

   Proto->Merged = FALSE;

   Proto->Style = spherical;

   Proto->NumSamples = Cluster->SampleCount;

   Proto->Cluster = Cluster;

   Proto->Cluster->Prototype = TRUE;

   return (Proto);

 }                                // NewSimpleProto


 BOOL8

 Independent (PARAM_DESC ParamDesc[],

 inT16 N, FLOAT32 * CoVariance, FLOAT32 Independence) {

   int i, j;

   FLOAT32 *VARii;                // points to ith on-diagonal element

   FLOAT32 *VARjj;                // points to jth on-diagonal element

   FLOAT32 CorrelationCoeff;


   VARii = CoVariance;

   for (i = 0; i < N; i++, VARii += N + 1) {

     if (ParamDesc[i].NonEssential)

       continue;


     VARjj = VARii + N + 1;

     CoVariance = VARii + 1;

     for (j = i + 1; j < N; j++, CoVariance++, VARjj += N + 1) {

       if (ParamDesc[j].NonEssential)

         continue;


       if ((*VARii == 0.0) || (*VARjj == 0.0))

         CorrelationCoeff = 0.0;

       else

         CorrelationCoeff =

           sqrt (sqrt (*CoVariance * *CoVariance / (*VARii * *VARjj)));

       if (CorrelationCoeff > Independence)

         return (FALSE);

     }

   }

   return (TRUE);

 }                                // Independent


 BUCKETS *GetBuckets(CLUSTERER* clusterer,

                     DISTRIBUTION Distribution,

                     uinT32 SampleCount,

                     FLOAT64 Confidence) {

   // Get an old bucket structure with the same number of buckets.

   uinT16 NumberOfBuckets = OptimumNumberOfBuckets(SampleCount);

   BUCKETS *Buckets =

       clusterer->bucket_cache[Distribution][NumberOfBuckets - MINBUCKETS];


   // If a matching bucket structure is not found, make one and save it.

   if (Buckets == NULL) {

     Buckets = MakeBuckets(Distribution, SampleCount, Confidence);

     clusterer->bucket_cache[Distribution][NumberOfBuckets - MINBUCKETS] =

         Buckets;

   } else {

     // Just adjust the existing buckets.

     if (SampleCount != Buckets->SampleCount)

       AdjustBuckets(Buckets, SampleCount);

     if (Confidence != Buckets->Confidence) {

       Buckets->Confidence = Confidence;

       Buckets->ChiSquared = ComputeChiSquared(

           DegreesOfFreedom(Distribution, Buckets->NumberOfBuckets),

           Confidence);

     }

     InitBuckets(Buckets);

   }

   return Buckets;

 }                                // GetBuckets


 BUCKETS *MakeBuckets(DISTRIBUTION Distribution,

                      uinT32 SampleCount,

                      FLOAT64 Confidence) {

   const DENSITYFUNC DensityFunction[] =

     { NormalDensity, UniformDensity, UniformDensity };

   int i, j;

   BUCKETS *Buckets;

   FLOAT64 BucketProbability;

   FLOAT64 NextBucketBoundary;

   FLOAT64 Probability;

   FLOAT64 ProbabilityDelta;

   FLOAT64 LastProbDensity;

   FLOAT64 ProbDensity;

   uinT16 CurrentBucket;

   BOOL8 Symmetrical;


   // allocate memory needed for data structure

   Buckets = reinterpret_cast<BUCKETS*>(Emalloc(sizeof(BUCKETS)));

   Buckets->NumberOfBuckets = OptimumNumberOfBuckets(SampleCount);

   Buckets->SampleCount = SampleCount;

   Buckets->Confidence = Confidence;

   Buckets->Count = reinterpret_cast<uinT32*>(

       Emalloc(Buckets->NumberOfBuckets * sizeof(uinT32)));

   Buckets->ExpectedCount = reinterpret_cast<FLOAT32*>(

       Emalloc(Buckets->NumberOfBuckets * sizeof(FLOAT32)));


   // initialize simple fields

   Buckets->Distribution = Distribution;

   for (i = 0; i < Buckets->NumberOfBuckets; i++) {

     Buckets->Count[i] = 0;

     Buckets->ExpectedCount[i] = 0.0;

   }


   // all currently defined distributions are symmetrical

   Symmetrical = TRUE;

   Buckets->ChiSquared = ComputeChiSquared(

       DegreesOfFreedom(Distribution, Buckets->NumberOfBuckets), Confidence);


   if (Symmetrical) {

     // allocate buckets so that all have approx. equal probability

     BucketProbability = 1.0 / (FLOAT64) (Buckets->NumberOfBuckets);


     // distribution is symmetric so fill in upper half then copy

     CurrentBucket = Buckets->NumberOfBuckets / 2;

     if (Odd (Buckets->NumberOfBuckets))

       NextBucketBoundary = BucketProbability / 2;

     else

       NextBucketBoundary = BucketProbability;


     Probability = 0.0;

     LastProbDensity =

       (*DensityFunction[(int) Distribution]) (BUCKETTABLESIZE / 2);

     for (i = BUCKETTABLESIZE / 2; i < BUCKETTABLESIZE; i++) {

       ProbDensity = (*DensityFunction[(int) Distribution]) (i + 1);

       ProbabilityDelta = Integral (LastProbDensity, ProbDensity, 1.0);

       Probability += ProbabilityDelta;

       if (Probability > NextBucketBoundary) {

         if (CurrentBucket < Buckets->NumberOfBuckets - 1)

           CurrentBucket++;

         NextBucketBoundary += BucketProbability;

       }

       Buckets->Bucket[i] = CurrentBucket;

       Buckets->ExpectedCount[CurrentBucket] +=

         (FLOAT32) (ProbabilityDelta * SampleCount);

       LastProbDensity = ProbDensity;

     }

     // place any leftover probability into the last bucket

     Buckets->ExpectedCount[CurrentBucket] +=

       (FLOAT32) ((0.5 - Probability) * SampleCount);


     // copy upper half of distribution to lower half

     for (i = 0, j = BUCKETTABLESIZE - 1; i < j; i++, j--)

       Buckets->Bucket[i] =

         Mirror(Buckets->Bucket[j], Buckets->NumberOfBuckets);


     // copy upper half of expected counts to lower half

     for (i = 0, j = Buckets->NumberOfBuckets - 1; i <= j; i++, j--)

       Buckets->ExpectedCount[i] += Buckets->ExpectedCount[j];

   }

   return Buckets;

 }                                // MakeBuckets


 uinT16 OptimumNumberOfBuckets(uinT32 SampleCount) {

   uinT8 Last, Next;

   FLOAT32 Slope;


   if (SampleCount < kCountTable[0])

     return kBucketsTable[0];


   for (Last = 0, Next = 1; Next < LOOKUPTABLESIZE; Last++, Next++) {

     if (SampleCount <= kCountTable[Next]) {

       Slope = (FLOAT32) (kBucketsTable[Next] - kBucketsTable[Last]) /

           (FLOAT32) (kCountTable[Next] - kCountTable[Last]);

       return ((uinT16) (kBucketsTable[Last] +

           Slope * (SampleCount - kCountTable[Last])));

     }

   }

   return kBucketsTable[Last];

 }                                // OptimumNumberOfBuckets


 FLOAT64

 ComputeChiSquared (uinT16 DegreesOfFreedom, FLOAT64 Alpha)

 #define CHIACCURACY     0.01

 #define MINALPHA  (1e-200)

 {

   static LIST ChiWith[MAXDEGREESOFFREEDOM + 1];


   CHISTRUCT *OldChiSquared;

   CHISTRUCT SearchKey;


   // limit the minimum alpha that can be used - if alpha is too small

   //      it may not be possible to compute chi-squared.

   Alpha = ClipToRange(Alpha, MINALPHA, 1.0);

   if (Odd (DegreesOfFreedom))

     DegreesOfFreedom++;


   /* find the list of chi-squared values which have already been computed

      for the specified number of degrees of freedom.  Search the list for

      the desired chi-squared. */

   SearchKey.Alpha = Alpha;

   OldChiSquared = (CHISTRUCT *) first_node (search (ChiWith[DegreesOfFreedom],

     &SearchKey, AlphaMatch));


   if (OldChiSquared == NULL) {

     OldChiSquared = NewChiStruct (DegreesOfFreedom, Alpha);

     OldChiSquared->ChiSquared = Solve (ChiArea, OldChiSquared,

       (FLOAT64) DegreesOfFreedom,

       (FLOAT64) CHIACCURACY);

     ChiWith[DegreesOfFreedom] = push (ChiWith[DegreesOfFreedom],

       OldChiSquared);

   }

   else {

     // further optimization might move OldChiSquared to front of list

   }


   return (OldChiSquared->ChiSquared);


 }                                // ComputeChiSquared


 FLOAT64 NormalDensity(inT32 x) {

   FLOAT64 Distance;


   Distance = x - kNormalMean;

   return kNormalMagnitude * exp(-0.5 * Distance * Distance / kNormalVariance);

 }                                // NormalDensity


 FLOAT64 UniformDensity(inT32 x) {

   static FLOAT64 UniformDistributionDensity = (FLOAT64) 1.0 / BUCKETTABLESIZE;


   if ((x >= 0.0) && (x <= BUCKETTABLESIZE))

     return UniformDistributionDensity;

   else

     return (FLOAT64) 0.0;

 }                                // UniformDensity


 FLOAT64 Integral(FLOAT64 f1, FLOAT64 f2, FLOAT64 Dx) {

   return (f1 + f2) * Dx / 2.0;

 }                                // Integral


 void FillBuckets(BUCKETS *Buckets,

                  CLUSTER *Cluster,

                  uinT16 Dim,

                  PARAM_DESC *ParamDesc,

                  FLOAT32 Mean,

                  FLOAT32 StdDev) {

   uinT16 BucketID;

   int i;

   LIST SearchState;

   SAMPLE *Sample;


   // initialize the histogram bucket counts to 0

   for (i = 0; i < Buckets->NumberOfBuckets; i++)

     Buckets->Count[i] = 0;


   if (StdDev == 0.0) {

     /* if the standard deviation is zero, then we can't statistically

        analyze the cluster.  Use a pseudo-analysis: samples exactly on

        the mean are distributed evenly across all buckets.  Samples greater

        than the mean are placed in the last bucket; samples less than the

        mean are placed in the first bucket. */


     InitSampleSearch(SearchState, Cluster);

     i = 0;

     while ((Sample = NextSample (&SearchState)) != NULL) {

       if (Sample->Mean[Dim] > Mean)

         BucketID = Buckets->NumberOfBuckets - 1;

       else if (Sample->Mean[Dim] < Mean)

         BucketID = 0;

       else

         BucketID = i;

       Buckets->Count[BucketID] += 1;

       i++;

       if (i >= Buckets->NumberOfBuckets)

         i = 0;

     }

   }

   else {

     // search for all samples in the cluster and add to histogram buckets

     InitSampleSearch(SearchState, Cluster);

     while ((Sample = NextSample (&SearchState)) != NULL) {

       switch (Buckets->Distribution) {

         case normal:

           BucketID = NormalBucket (ParamDesc, Sample->Mean[Dim],

             Mean, StdDev);

           break;

         case D_random:

         case uniform:

           BucketID = UniformBucket (ParamDesc, Sample->Mean[Dim],

             Mean, StdDev);

           break;

         default:

           BucketID = 0;

       }

       Buckets->Count[Buckets->Bucket[BucketID]] += 1;

     }

   }

 }                                // FillBuckets


 uinT16 NormalBucket(PARAM_DESC *ParamDesc,

                     FLOAT32 x,

                     FLOAT32 Mean,

                     FLOAT32 StdDev) {

   FLOAT32 X;


   // wraparound circular parameters if necessary

   if (ParamDesc->Circular) {

     if (x - Mean > ParamDesc->HalfRange)

       x -= ParamDesc->Range;

     else if (x - Mean < -ParamDesc->HalfRange)

       x += ParamDesc->Range;

   }


   X = ((x - Mean) / StdDev) * kNormalStdDev + kNormalMean;

   if (X < 0)

     return 0;

   if (X > BUCKETTABLESIZE - 1)

     return ((uinT16) (BUCKETTABLESIZE - 1));

   return (uinT16) floor((FLOAT64) X);

 }                                // NormalBucket


 uinT16 UniformBucket(PARAM_DESC *ParamDesc,

                      FLOAT32 x,

                      FLOAT32 Mean,

                      FLOAT32 StdDev) {

   FLOAT32 X;


   // wraparound circular parameters if necessary

   if (ParamDesc->Circular) {

     if (x - Mean > ParamDesc->HalfRange)

       x -= ParamDesc->Range;

     else if (x - Mean < -ParamDesc->HalfRange)

       x += ParamDesc->Range;

   }


   X = ((x - Mean) / (2 * StdDev) * BUCKETTABLESIZE + BUCKETTABLESIZE / 2.0);

   if (X < 0)

     return 0;

   if (X > BUCKETTABLESIZE - 1)

     return (uinT16) (BUCKETTABLESIZE - 1);

   return (uinT16) floor((FLOAT64) X);

 }                                // UniformBucket


 BOOL8 DistributionOK(BUCKETS *Buckets) {

   FLOAT32 FrequencyDifference;

   FLOAT32 TotalDifference;

   int i;


   // compute how well the histogram matches the expected histogram

   TotalDifference = 0.0;

   for (i = 0; i < Buckets->NumberOfBuckets; i++) {

     FrequencyDifference = Buckets->Count[i] - Buckets->ExpectedCount[i];

     TotalDifference += (FrequencyDifference * FrequencyDifference) /

       Buckets->ExpectedCount[i];

   }


   // test to see if the difference is more than expected

   if (TotalDifference > Buckets->ChiSquared)

     return FALSE;

   else

     return TRUE;

 }                                // DistributionOK


 void FreeStatistics(STATISTICS *Statistics) {

   memfree (Statistics->CoVariance);

   memfree (Statistics->Min);

   memfree (Statistics->Max);

   memfree(Statistics);

 }                                // FreeStatistics


 void FreeBuckets(BUCKETS *buckets) {

   Efree(buckets->Count);

   Efree(buckets->ExpectedCount);

   Efree(buckets);

 }                                // FreeBuckets


 void FreeCluster(CLUSTER *Cluster) {

   if (Cluster != NULL) {

     FreeCluster (Cluster->Left);

     FreeCluster (Cluster->Right);

     memfree(Cluster);

   }

 }                                // FreeCluster


 uinT16 DegreesOfFreedom(DISTRIBUTION Distribution, uinT16 HistogramBuckets) {

   static uinT8 DegreeOffsets[] = { 3, 3, 1 };


   uinT16 AdjustedNumBuckets;


   AdjustedNumBuckets = HistogramBuckets - DegreeOffsets[(int) Distribution];

   if (Odd (AdjustedNumBuckets))

     AdjustedNumBuckets++;

   return (AdjustedNumBuckets);


 }                                // DegreesOfFreedom


 int NumBucketsMatch(void *arg1,    // BUCKETS *Histogram,

                     void *arg2) {  // uinT16 *DesiredNumberOfBuckets)

   BUCKETS *Histogram = (BUCKETS *) arg1;

   uinT16 *DesiredNumberOfBuckets = (uinT16 *) arg2;


   return (*DesiredNumberOfBuckets == Histogram->NumberOfBuckets);


 }                                // NumBucketsMatch


 int ListEntryMatch(void *arg1,    //ListNode

                    void *arg2) {  //Key

   return (arg1 == arg2);


 }                                // ListEntryMatch


 void AdjustBuckets(BUCKETS *Buckets, uinT32 NewSampleCount) {

   int i;

   FLOAT64 AdjustFactor;


   AdjustFactor = (((FLOAT64) NewSampleCount) /

     ((FLOAT64) Buckets->SampleCount));


   for (i = 0; i < Buckets->NumberOfBuckets; i++) {

     Buckets->ExpectedCount[i] *= AdjustFactor;

   }


   Buckets->SampleCount = NewSampleCount;


 }                                // AdjustBuckets


 void InitBuckets(BUCKETS *Buckets) {

   int i;


   for (i = 0; i < Buckets->NumberOfBuckets; i++) {

     Buckets->Count[i] = 0;

   }


 }                                // InitBuckets


 int AlphaMatch(void *arg1,    //CHISTRUCT                             *ChiStruct,

                void *arg2) {  //CHISTRUCT                             *SearchKey)

   CHISTRUCT *ChiStruct = (CHISTRUCT *) arg1;

   CHISTRUCT *SearchKey = (CHISTRUCT *) arg2;


   return (ChiStruct->Alpha == SearchKey->Alpha);


 }                                // AlphaMatch


 CHISTRUCT *NewChiStruct(uinT16 DegreesOfFreedom, FLOAT64 Alpha) {

   CHISTRUCT *NewChiStruct;


   NewChiStruct = (CHISTRUCT *) Emalloc (sizeof (CHISTRUCT));

   NewChiStruct->DegreesOfFreedom = DegreesOfFreedom;

   NewChiStruct->Alpha = Alpha;

   return (NewChiStruct);


 }                                // NewChiStruct


 FLOAT64

 Solve (SOLVEFUNC Function,

 void *FunctionParams, FLOAT64 InitialGuess, FLOAT64 Accuracy)

 #define INITIALDELTA    0.1

 #define  DELTARATIO     0.1

 {

   FLOAT64 x;

   FLOAT64 f;

   FLOAT64 Slope;

   FLOAT64 Delta;

   FLOAT64 NewDelta;

   FLOAT64 xDelta;

   FLOAT64 LastPosX, LastNegX;


   x = InitialGuess;

   Delta = INITIALDELTA;

   LastPosX = MAX_FLOAT32;

   LastNegX = -MAX_FLOAT32;

   f = (*Function) ((CHISTRUCT *) FunctionParams, x);

   while (Abs (LastPosX - LastNegX) > Accuracy) {

     // keep track of outer bounds of current estimate

     if (f < 0)

       LastNegX = x;

     else

       LastPosX = x;


     // compute the approx. slope of f(x) at the current point

     Slope =

       ((*Function) ((CHISTRUCT *) FunctionParams, x + Delta) - f) / Delta;


     // compute the next solution guess */

     xDelta = f / Slope;

     x -= xDelta;


     // reduce the delta used for computing slope to be a fraction of

     //the amount moved to get to the new guess

     NewDelta = Abs (xDelta) * DELTARATIO;

     if (NewDelta < Delta)

       Delta = NewDelta;


     // compute the value of the function at the new guess

     f = (*Function) ((CHISTRUCT *) FunctionParams, x);

   }

   return (x);


 }                                // Solve


 FLOAT64 ChiArea(CHISTRUCT *ChiParams, FLOAT64 x) {

   int i, N;

   FLOAT64 SeriesTotal;

   FLOAT64 Denominator;

   FLOAT64 PowerOfx;


   N = ChiParams->DegreesOfFreedom / 2 - 1;

   SeriesTotal = 1;

   Denominator = 1;

   PowerOfx = 1;

   for (i = 1; i <= N; i++) {

     Denominator *= 2 * i;

     PowerOfx *= x;

     SeriesTotal += PowerOfx / Denominator;

   }

   return ((SeriesTotal * exp (-0.5 * x)) - ChiParams->Alpha);


 }                                // ChiArea


 BOOL8

 MultipleCharSamples (CLUSTERER * Clusterer,

 CLUSTER * Cluster, FLOAT32 MaxIllegal)

 #define ILLEGAL_CHAR    2

 {

   static BOOL8 *CharFlags = NULL;

   static inT32 NumFlags = 0;

   int i;

   LIST SearchState;

   SAMPLE *Sample;

   inT32 CharID;

   inT32 NumCharInCluster;

   inT32 NumIllegalInCluster;

   FLOAT32 PercentIllegal;


   // initial estimate assumes that no illegal chars exist in the cluster

   NumCharInCluster = Cluster->SampleCount;

   NumIllegalInCluster = 0;


   if (Clusterer->NumChar > NumFlags) {

     if (CharFlags != NULL)

       memfree(CharFlags);

     NumFlags = Clusterer->NumChar;

     CharFlags = (BOOL8 *) Emalloc (NumFlags * sizeof (BOOL8));

   }


   for (i = 0; i < NumFlags; i++)

     CharFlags[i] = FALSE;


   // find each sample in the cluster and check if we have seen it before

   InitSampleSearch(SearchState, Cluster);

   while ((Sample = NextSample (&SearchState)) != NULL) {

     CharID = Sample->CharID;

     if (CharFlags[CharID] == FALSE) {

       CharFlags[CharID] = TRUE;

     }

     else {

       if (CharFlags[CharID] == TRUE) {

         NumIllegalInCluster++;

         CharFlags[CharID] = ILLEGAL_CHAR;

       }

       NumCharInCluster--;

       PercentIllegal = (FLOAT32) NumIllegalInCluster / NumCharInCluster;

       if (PercentIllegal > MaxIllegal) {

         destroy(SearchState);

         return (TRUE);

       }

     }

   }

   return (FALSE);


 }                                // MultipleCharSamples


 double InvertMatrix(const float* input, int size, float* inv) {

   // Allocate memory for the 2D arrays.

   GENERIC_2D_ARRAY<double> U(size, size, 0.0);

   GENERIC_2D_ARRAY<double> U_inv(size, size, 0.0);

   GENERIC_2D_ARRAY<double> L(size, size, 0.0);


   // Initialize the working matrices. U starts as input, L as I and U_inv as O.

   int row;

   int col;

   for (row = 0; row < size; row++) {

     for (col = 0; col < size; col++) {

       U[row][col] = input[row*size + col];

       L[row][col] = row == col ? 1.0 : 0.0;

       U_inv[row][col] = 0.0;

     }

   }


   // Compute forward matrix by inversion by LU decomposition of input.

   for (col = 0; col < size; ++col) {

     // Find best pivot

     int best_row = 0;

     double best_pivot = -1.0;

     for (row = col; row < size; ++row) {

       if (Abs(U[row][col]) > best_pivot) {

         best_pivot = Abs(U[row][col]);

         best_row = row;

       }

     }

     // Exchange pivot rows.

     if (best_row != col) {

       for (int k = 0; k < size; ++k) {

         double tmp = U[best_row][k];

         U[best_row][k] = U[col][k];

         U[col][k] = tmp;

         tmp = L[best_row][k];

         L[best_row][k] = L[col][k];

         L[col][k] = tmp;

       }

     }

     // Now do the pivot itself.

     for (row = col + 1; row < size; ++row) {

       double ratio = -U[row][col] / U[col][col];

       for (int j = col; j < size; ++j) {

         U[row][j] += U[col][j] * ratio;

       }

       for (int k = 0; k < size; ++k) {

         L[row][k] += L[col][k] * ratio;

       }

     }

   }

   // Next invert U.

   for (col = 0; col < size; ++col) {

     U_inv[col][col] = 1.0 / U[col][col];

     for (row = col - 1; row >= 0; --row) {

       double total = 0.0;

       for (int k = col; k > row; --k) {

         total += U[row][k] * U_inv[k][col];

       }

       U_inv[row][col] = -total / U[row][row];

     }

   }

   // Now the answer is U_inv.L.

   for (row = 0; row < size; row++) {

     for (col = 0; col < size; col++) {

       double sum = 0.0;

       for (int k = row; k < size; ++k) {

         sum += U_inv[row][k] * L[k][col];

       }

       inv[row*size + col] = sum;

     }

   }

   // Check matrix product.

   double error_sum = 0.0;

   for (row = 0; row < size; row++) {

     for (col = 0; col < size; col++) {

       double sum = 0.0;

       for (int k = 0; k < size; ++k) {

         sum += input[row*size + k] * inv[k *size + col];

       }

       if (row != col) {

         error_sum += Abs(sum);

       }

     }

   }

   return error_sum;

 }

MINVARIANCE
#define MINVARIANCE
Definition: cluster.cpp:142

Odd
#define Odd(N)
Definition: cluster.cpp:206

STATISTICS::Max
FLOAT32 * Max
Definition: cluster.cpp:175

tesseract::GenericHeap::Pop
bool Pop(Pair *entry)
Definition: genericheap.h:116

memfree
void memfree(void *element)
Definition: freelist.cpp:30

STATISTICS::Min
FLOAT32 * Min
Definition: cluster.cpp:174

PARAM_DESC::Min
FLOAT32 Min
Definition: ocrfeatures.h:49

UniformBucket
uinT16 UniformBucket(PARAM_DESC *ParamDesc, FLOAT32 x, FLOAT32 Mean, FLOAT32 StdDev)
Definition: cluster.cpp:2124

MakeEllipticalProto
PROTOTYPE * MakeEllipticalProto(CLUSTERER *Clusterer, CLUSTER *Cluster, STATISTICS *Statistics, BUCKETS *Buckets)
Definition: cluster.cpp:1264

INITIALDELTA
#define INITIALDELTA

FTABLE_Y
#define FTABLE_Y
Definition: cluster.cpp:32

PARAM_DESC
Definition: ocrfeatures.h:46

BUCKETS
Definition: cluster.cpp:178

sample::Left
struct sample * Left
Definition: cluster.h:36

STATISTICS::AvgVariance
FLOAT32 AvgVariance
Definition: cluster.cpp:172

NewChiStruct
CHISTRUCT * NewChiStruct(uinT16 DegreesOfFreedom, FLOAT64 Alpha)
Definition: cluster.cpp:2370

ClusteringContext::tree
KDTREE * tree
Definition: cluster.cpp:199

FLOAT32
float FLOAT32
Definition: host.h:111

ILLEGAL_CHAR
#define ILLEGAL_CHAR

automatic
Definition: cluster.h:45

KDTREE
Definition: kdtree.h:49

ClusteringContext
Definition: cluster.cpp:196

emalloc.h

kdpair.h

NewMixedProto
PROTOTYPE * NewMixedProto(inT16 N, CLUSTER *Cluster, STATISTICS *Statistics)
Definition: cluster.cpp:1598

FreeBuckets
void FreeBuckets(BUCKETS *Buckets)
Definition: cluster.cpp:2201

Independent
BOOL8 Independent(PARAM_DESC ParamDesc[], inT16 N, FLOAT32 *CoVariance, FLOAT32 Independence)
Definition: cluster.cpp:1665

tesseract::GenericHeap
Definition: genericheap.h:58

CHISTRUCT
Definition: cluster.cpp:189

sample::Clustered
unsigned Clustered
Definition: cluster.h:33

MINBUCKETS
#define MINBUCKETS
Definition: cluster.h:26

NIL_LIST
#define NIL_LIST
Definition: oldlist.h:126

FreeCluster
void FreeCluster(CLUSTER *Cluster)
Definition: cluster.cpp:2220

MakeDimRandom
void MakeDimRandom(uinT16 i, PROTOTYPE *Proto, PARAM_DESC *ParamDesc)
Definition: cluster.cpp:1369

PROTOTYPE::Distrib
DISTRIBUTION * Distrib
Definition: cluster.h:77

TestEllipticalProto
PROTOTYPE * TestEllipticalProto(CLUSTERER *Clusterer, CLUSTERCONFIG *Config, CLUSTER *Cluster, STATISTICS *Statistics)
Definition: cluster.cpp:1119

MultipleCharSamples
BOOL8 MultipleCharSamples(CLUSTERER *Clusterer, CLUSTER *Cluster, FLOAT32 MaxIllegal)
Definition: cluster.cpp:2512

FTABLE_X
#define FTABLE_X
Definition: cluster.cpp:31

AdjustBuckets
void AdjustBuckets(BUCKETS *Buckets, uinT32 NewSampleCount)
Definition: cluster.cpp:2301

sample::SampleCount
unsigned SampleCount
Definition: cluster.h:35

tprintf
#define tprintf(...)
Definition: tprintf.h:31

tesseract::KDPair::key
Key key
Definition: kdpair.h:46

BUCKETTABLESIZE
#define BUCKETTABLESIZE
Definition: cluster.cpp:160

LOOKUPTABLESIZE
#define LOOKUPTABLESIZE
Definition: cluster.cpp:228

genericheap.h

BUCKETS::Bucket
uinT16 Bucket[BUCKETTABLESIZE]
Definition: cluster.cpp:184

CLUSTERER::bucket_cache
BUCKETS * bucket_cache[DISTRIBUTION_COUNT][MAXBUCKETS+1-MINBUCKETS]
Definition: cluster.h:95

InvertMatrix
double InvertMatrix(const float *input, int size, float *inv)
Definition: cluster.cpp:2569

danerror.h

sample::Right
struct sample * Right
Definition: cluster.h:37

KDNearestNeighborSearch
void KDNearestNeighborSearch(KDTREE *Tree, FLOAT32 Query[], int QuerySize, FLOAT32 MaxDistance, int *NumberOfResults, void **NBuffer, FLOAT32 DBuffer[])
Definition: kdtree.cpp:322

MakeClusterer
CLUSTERER * MakeClusterer(inT16 SampleSize, const PARAM_DESC ParamDesc[])
Definition: cluster.cpp:400

BOOL8
unsigned char BOOL8
Definition: host.h:113

ComputeChiSquared
FLOAT64 ComputeChiSquared(uinT16 DegreesOfFreedom, FLOAT64 Alpha)
Definition: cluster.cpp:1897

SqrtOf2Pi
#define SqrtOf2Pi
Definition: cluster.cpp:218

MakeSample
SAMPLE * MakeSample(CLUSTERER *Clusterer, const FLOAT32 *Feature, inT32 CharID)
Definition: cluster.cpp:457

Abs
#define Abs(N)
Definition: cluster.cpp:208

FLOATUNION::Spherical
FLOAT32 Spherical
Definition: cluster.h:63

DISTRIBUTION_COUNT
Definition: cluster.h:59

MakeDegenerateProto
PROTOTYPE * MakeDegenerateProto(uinT16 N, CLUSTER *Cluster, STATISTICS *Statistics, PROTOSTYLE Style, inT32 MinSamples)
Definition: cluster.cpp:1077

OptimumNumberOfBuckets
uinT16 OptimumNumberOfBuckets(uinT32 SampleCount)
Definition: cluster.cpp:1859

DISTRIBUTION
DISTRIBUTION
Definition: cluster.h:58

PROTOTYPE::LogMagnitude
FLOAT32 LogMagnitude
Definition: cluster.h:80

PROTOTYPE::Variance
FLOATUNION Variance
Definition: cluster.h:81

PROTOTYPE::Mean
FLOAT32 * Mean
Definition: cluster.h:78

MAXNEIGHBORS
#define MAXNEIGHBORS

normal
Definition: cluster.h:59

CLUSTERER::KDTree
KDTREE * KDTree
Definition: cluster.h:90

spherical
Definition: cluster.h:45

tprintf.h

PARAM_DESC::HalfRange
FLOAT32 HalfRange
Definition: ocrfeatures.h:52

NORMALEXTENT
#define NORMALEXTENT
Definition: cluster.cpp:161

BUCKETS::Confidence
FLOAT64 Confidence
Definition: cluster.cpp:181

ClipToRange
T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound)
Definition: helpers.h:115

ASSERT_HOST
#define ASSERT_HOST(x)
Definition: errcode.h:84

FreeKDTree
void FreeKDTree(KDTREE *Tree)
Definition: kdtree.cpp:351

NewEllipticalProto
PROTOTYPE * NewEllipticalProto(inT16 N, CLUSTER *Cluster, STATISTICS *Statistics)
Definition: cluster.cpp:1553

freelist.h

CLUSTERER::NumberOfSamples
inT32 NumberOfSamples
Definition: cluster.h:89

PROTOTYPE::Significant
unsigned Significant
Definition: cluster.h:68

PROTOTYPE::Weight
FLOATUNION Weight
Definition: cluster.h:83

MINSAMPLESNEEDED
#define MINSAMPLESNEEDED
Definition: cluster.cpp:152

CLUSTERCONFIG::Independence
FLOAT32 Independence
Definition: cluster.h:53

MAXDISTANCE
#define MAXDISTANCE

STATISTICS
Definition: cluster.cpp:171

CLUSTERCONFIG::MaxIllegal
FLOAT32 MaxIllegal
Definition: cluster.h:51

CLUSTERER
Definition: cluster.h:86

ClusteringContext::next
inT32 next
Definition: cluster.cpp:200

PROTOTYPE::TotalMagnitude
FLOAT32 TotalMagnitude
Definition: cluster.h:79

GENERIC_2D_ARRAY
Definition: matrix.h:39

ListEntryMatch
int ListEntryMatch(void *arg1, void *arg2)
Definition: cluster.cpp:2284

CLUSTERCONFIG::MagicSamples
int MagicSamples
Definition: cluster.h:55

search
LIST search(LIST list, void *key, int_compare is_equal)
Definition: oldlist.cpp:413

uniform
Definition: cluster.h:59

MINSAMPLES
#define MINSAMPLES
Definition: cluster.cpp:151

CHISTRUCT::ChiSquared
FLOAT64 ChiSquared
Definition: cluster.cpp:192

UniformDensity
FLOAT64 UniformDensity(inT32 x)
Definition: cluster.cpp:1968

uinT32
unsigned int uinT32
Definition: host.h:103

MakePotentialClusters
void MakePotentialClusters(ClusteringContext *context, CLUSTER *Cluster, inT32 Level)
Definition: cluster.cpp:773

MakeBuckets
BUCKETS * MakeBuckets(DISTRIBUTION Distribution, uinT32 SampleCount, FLOAT64 Confidence)
Definition: cluster.cpp:1761

AlphaMatch
int AlphaMatch(void *arg1, void *arg2)
Definition: cluster.cpp:2349

TEMPCLUSTER::Neighbor
CLUSTER * Neighbor
Definition: cluster.cpp:165

HOTELLING
#define HOTELLING
Definition: cluster.cpp:30

ClusterSamples
LIST ClusterSamples(CLUSTERER *Clusterer, CLUSTERCONFIG *Config)
Definition: cluster.cpp:515

PARAM_DESC::MidRange
FLOAT32 MidRange
Definition: ocrfeatures.h:53

PROTOTYPE::NumSamples
unsigned NumSamples
Definition: cluster.h:75

FreeProtoList
void FreeProtoList(LIST *ProtoList)
Definition: cluster.cpp:571

sample::Mean
FLOAT32 Mean[1]
Definition: cluster.h:39

ChiArea
FLOAT64 ChiArea(CHISTRUCT *ChiParams, FLOAT64 x)
Definition: cluster.cpp:2464

CLUSTERER::Root
CLUSTER * Root
Definition: cluster.h:91

PROTOTYPE
Definition: cluster.h:67

void_proc
void(* void_proc)(...)
Definition: cutil.h:66

Solve
FLOAT64 Solve(SOLVEFUNC Function, void *FunctionParams, FLOAT64 InitialGuess, FLOAT64 Accuracy)
Definition: cluster.cpp:2397

const.h

CHIACCURACY
#define CHIACCURACY

BUCKETS::SampleCount
uinT32 SampleCount
Definition: cluster.cpp:180

RootOf
#define RootOf(T)
Definition: kdtree.h:58

Mirror
#define Mirror(N, R)
Definition: cluster.cpp:207

BUCKETS::NumberOfBuckets
uinT16 NumberOfBuckets
Definition: cluster.cpp:183

InitBuckets
void InitBuckets(BUCKETS *Buckets)
Definition: cluster.cpp:2325

CLUSTERCONFIG::Confidence
FLOAT64 Confidence
Definition: cluster.h:54

Emalloc
void * Emalloc(int Size)
Definition: emalloc.cpp:47

destroy_nodes
void destroy_nodes(LIST list, void_dest destructor)
Definition: oldlist.cpp:204

ClusteringContext::heap
ClusterHeap * heap
Definition: cluster.cpp:197

tesseract::GenericHeap::Push
void Push(Pair *entry)
Definition: genericheap.h:95

ALREADYCLUSTERED
#define ALREADYCLUSTERED
Definition: cluster.h:133

PROTOTYPE::Magnitude
FLOATUNION Magnitude
Definition: cluster.h:82

FLOATUNION::Elliptical
FLOAT32 * Elliptical
Definition: cluster.h:64

PROTOTYPE::Cluster
CLUSTER * Cluster
Definition: cluster.h:76

FindNearestNeighbor
CLUSTER * FindNearestNeighbor(KDTREE *Tree, CLUSTER *Cluster, FLOAT32 *Distance)
Definition: cluster.cpp:807

CLUSTERCONFIG::MinSamples
FLOAT32 MinSamples
Definition: cluster.h:50

FTable
const double FTable[FTABLE_Y][FTABLE_X]
Definition: cluster.cpp:35

NumBucketsMatch
int NumBucketsMatch(void *arg1, void *arg2)
Definition: cluster.cpp:2266

CLUSTERCONFIG::ProtoStyle
PROTOSTYLE ProtoStyle
Definition: cluster.h:49

MakePrototype
PROTOTYPE * MakePrototype(CLUSTERER *Clusterer, CLUSTERCONFIG *Config, CLUSTER *Cluster)
Definition: cluster.cpp:982

PARAM_DESC::NonEssential
inT8 NonEssential
Definition: ocrfeatures.h:48

DistributionOK
BOOL8 DistributionOK(BUCKETS *Buckets)
Definition: cluster.cpp:2159

CLUSTERER::NumChar
inT32 NumChar
Definition: cluster.h:93

PARAM_DESC::Circular
inT8 Circular
Definition: ocrfeatures.h:47

CLUSTERCONFIG
Definition: cluster.h:48

first_node
#define first_node(l)
Definition: oldlist.h:139

destroy
LIST destroy(LIST list)
Definition: oldlist.cpp:187

tesseract::KDPairInc
Definition: kdpair.h:51

NormalBucket
uinT16 NormalBucket(PARAM_DESC *ParamDesc, FLOAT32 x, FLOAT32 Mean, FLOAT32 StdDev)
Definition: cluster.cpp:2088

FillBuckets
void FillBuckets(BUCKETS *Buckets, CLUSTER *Cluster, uinT16 Dim, PARAM_DESC *ParamDesc, FLOAT32 Mean, FLOAT32 StdDev)
Definition: cluster.cpp:2015

pop
LIST pop(LIST list)
Definition: oldlist.cpp:305

helpers.h

CLUSTERER::ProtoList
LIST ProtoList
Definition: cluster.h:92

KDStore
void KDStore(KDTREE *Tree, FLOAT32 *Key, void *Data)
Definition: kdtree.cpp:218

ComputePrototypes
void ComputePrototypes(CLUSTERER *Clusterer, CLUSTERCONFIG *Config)
Definition: cluster.cpp:936

Config
CLUSTERCONFIG Config
Definition: commontraining.cpp:51

D_random
Definition: cluster.h:59

mixed
Definition: cluster.h:45

TEMPCLUSTER
Definition: cluster.cpp:163

list_rec
Definition: oldlist.h:127

sample::Prototype
unsigned Prototype
Definition: cluster.h:34

MakeNewCluster
CLUSTER * MakeNewCluster(CLUSTERER *Clusterer, TEMPCLUSTER *TempCluster)
Definition: cluster.cpp:846

tesseract::KDPair::data
Data data
Definition: kdpair.h:45

Mean
FLOAT32 Mean(PROTOTYPE *Proto, uinT16 Dimension)
Definition: cluster.cpp:650

FALSE
#define FALSE
Definition: capi.h:29

CLUSTERER::ParamDesc
PARAM_DESC * ParamDesc
Definition: cluster.h:88

PARAM_DESC::Range
FLOAT32 Range
Definition: ocrfeatures.h:51

Efree
void Efree(void *ptr)
Definition: emalloc.cpp:79

MergeClusters
inT32 MergeClusters(inT16 N, register PARAM_DESC ParamDesc[], register inT32 n1, register inT32 n2, register FLOAT32 m[], register FLOAT32 m1[], register FLOAT32 m2[])

ComputeStatistics
STATISTICS * ComputeStatistics(inT16 N, PARAM_DESC ParamDesc[], CLUSTER *Cluster)
Definition: cluster.cpp:1431

sample
Definition: cluster.h:32

MakeSphericalProto
PROTOTYPE * MakeSphericalProto(CLUSTERER *Clusterer, CLUSTER *Cluster, STATISTICS *Statistics, BUCKETS *Buckets)
Definition: cluster.cpp:1226

NextSample
CLUSTER * NextSample(LIST *SearchState)
Definition: cluster.cpp:625

DegreesOfFreedom
uinT16 DegreesOfFreedom(DISTRIBUTION Distribution, uinT16 HistogramBuckets)
Definition: cluster.cpp:2243

PI
#define PI
Definition: const.h:19

InitSampleSearch
#define InitSampleSearch(S, C)
Definition: cluster.h:105

TRUE
#define TRUE
Definition: capi.h:28

NewSphericalProto
PROTOTYPE * NewSphericalProto(uinT16 N, CLUSTER *Cluster, STATISTICS *Statistics)
Definition: cluster.cpp:1519

PROTOTYPE::Style
unsigned Style
Definition: cluster.h:74

MAXBUCKETS
#define MAXBUCKETS
Definition: cluster.h:27

ClusterPair
tesseract::KDPairInc< float, TEMPCLUSTER * > ClusterPair
Definition: cluster.cpp:168

FreeClusterer
void FreeClusterer(CLUSTERER *Clusterer)
Definition: cluster.cpp:543

MAX_FLOAT32
#define MAX_FLOAT32
Definition: host.h:124

BUCKETS::Count
uinT32 * Count
Definition: cluster.cpp:185

KDWalk
void KDWalk(KDTREE *Tree, void_proc action, void *context)
Definition: kdtree.cpp:332

SOLVEFUNC
FLOAT64(* SOLVEFUNC)(CHISTRUCT *, double)
Definition: cluster.cpp:204

CreateClusterTree
void CreateClusterTree(CLUSTERER *Clusterer)
Definition: cluster.cpp:705

TEMPCLUSTER::Cluster
CLUSTER * Cluster
Definition: cluster.cpp:164

matrix.h

DELTARATIO
#define DELTARATIO

DoError
void DoError(int Error, const char *Message)
Definition: danerror.cpp:42

sample::CharID
inT32 CharID
Definition: cluster.h:38

FreePrototype
void FreePrototype(void *arg)
Definition: cluster.cpp:586

GetBuckets
BUCKETS * GetBuckets(CLUSTERER *clusterer, DISTRIBUTION Distribution, uinT32 SampleCount, FLOAT64 Confidence)
Definition: cluster.cpp:1713

NormalDensity
FLOAT64 NormalDensity(inT32 x)
Definition: cluster.cpp:1951

NULL
#define NULL
Definition: host.h:144

elliptical
Definition: cluster.h:45

DENSITYFUNC
FLOAT64(* DENSITYFUNC)(inT32)
Definition: cluster.cpp:203

KDDelete
void KDDelete(KDTREE *Tree, FLOAT32 Key[], void *Data)
Definition: kdtree.cpp:265

MakeMixedProto
PROTOTYPE * MakeMixedProto(CLUSTERER *Clusterer, CLUSTER *Cluster, STATISTICS *Statistics, BUCKETS *NormalBuckets, FLOAT64 Confidence)
Definition: cluster.cpp:1307

BUCKETS::Distribution
DISTRIBUTION Distribution
Definition: cluster.cpp:179

MAXDEGREESOFFREEDOM
#define MAXDEGREESOFFREEDOM
Definition: cluster.cpp:229

PROTOTYPE::Merged
unsigned Merged
Definition: cluster.h:69

CHISTRUCT::DegreesOfFreedom
uinT16 DegreesOfFreedom
Definition: cluster.cpp:190

MakeDimUniform
void MakeDimUniform(uinT16 i, PROTOTYPE *Proto, STATISTICS *Statistics)
Definition: cluster.cpp:1394

MakeKDTree
KDTREE * MakeKDTree(inT16 KeySize, const PARAM_DESC KeyDesc[])
Definition: kdtree.cpp:182

ClusteringContext::candidates
TEMPCLUSTER * candidates
Definition: cluster.cpp:198

StandardDeviation
FLOAT32 StandardDeviation(PROTOTYPE *Proto, uinT16 Dimension)
Definition: cluster.cpp:664

CLUSTERER::SampleSize
inT16 SampleSize
Definition: cluster.h:87

FreeStatistics
void FreeStatistics(STATISTICS *Statistics)
Definition: cluster.cpp:2188

PROTOSTYLE
PROTOSTYLE
Definition: cluster.h:44

CHISTRUCT::Alpha
FLOAT64 Alpha
Definition: cluster.cpp:191

STATISTICS::CoVariance
FLOAT32 * CoVariance
Definition: cluster.cpp:173

NewSimpleProto
PROTOTYPE * NewSimpleProto(inT16 N, CLUSTER *Cluster)
Definition: cluster.cpp:1623

Integral
FLOAT64 Integral(FLOAT64 f1, FLOAT64 f2, FLOAT64 Dx)
Definition: cluster.cpp:1988

cluster.h

PARAM_DESC::Max
FLOAT32 Max
Definition: ocrfeatures.h:50

BUCKETS::ExpectedCount
FLOAT32 * ExpectedCount
Definition: cluster.cpp:186

push
LIST push(LIST list, void *element)
Definition: oldlist.cpp:323

uinT16
unsigned short uinT16
Definition: host.h:101

FLOAT64
double FLOAT64
Definition: host.h:112

ClusterHeap
tesseract::GenericHeap< ClusterPair > ClusterHeap
Definition: cluster.cpp:169

BUCKETS::ChiSquared
FLOAT64 ChiSquared
Definition: cluster.cpp:182

MINALPHA
#define MINALPHA

inT16
short inT16
Definition: host.h:100

inT32
int inT32
Definition: host.h:102

uinT8
unsigned char uinT8
Definition: host.h:99