tesseract v5.3.3.20231005
cluster.cpp File Reference
#include "cluster.h"
#include "genericheap.h"
#include "kdpair.h"
#include "matrix.h"
#include "tprintf.h"
#include "helpers.h"
#include <cfloat>
#include <cmath>
#include <vector>

Go to the source code of this file.

Classes

struct  tesseract::TEMPCLUSTER
 
struct  tesseract::STATISTICS
 
struct  tesseract::BUCKETS
 
struct  tesseract::CHISTRUCT
 
struct  tesseract::ClusteringContext
 

Namespaces

namespace  tesseract
 

Macros

#define _USE_MATH_DEFINES
 
#define HOTELLING   1
 
#define FTABLE_X   10
 
#define FTABLE_Y   100
 
#define MINVARIANCE   0.0004
 
#define MINSAMPLESPERBUCKET   5
 
#define MINSAMPLES   (MINBUCKETS * MINSAMPLESPERBUCKET)
 
#define MINSAMPLESNEEDED   1
 
#define BUCKETTABLESIZE   1024
 
#define NORMALEXTENT   3.0
 
#define Odd(N)   ((N) % 2)
 
#define Mirror(N, R)   ((R) - (N)-1)
 
#define Abs(N)   (((N) < 0) ? (-(N)) : (N))
 
#define SqrtOf2Pi   2.506628275
 
#define LOOKUPTABLESIZE   8
 
#define MAXDEGREESOFFREEDOM   MAXBUCKETS
 
#define MAXNEIGHBORS   2
 
#define MAXDISTANCE   FLT_MAX
 
#define CHIACCURACY   0.01
 
#define MINALPHA   (1e-200)
 
#define INITIALDELTA   0.1
 
#define DELTARATIO   0.1
 
#define ILLEGAL_CHAR   2
 

Typedefs

using tesseract::ClusterPair = tesseract::KDPairInc< float, TEMPCLUSTER * >
 
using tesseract::ClusterHeap = tesseract::GenericHeap< ClusterPair >
 
using tesseract::DENSITYFUNC = double(*)(int32_t)
 
using tesseract::SOLVEFUNC = double(*)(CHISTRUCT *, double)
 

Functions

CLUSTERER * tesseract::MakeClusterer (int16_t SampleSize, const PARAM_DESC ParamDesc[])
 
SAMPLE * tesseract::MakeSample (CLUSTERER *Clusterer, const float *Feature, uint32_t CharID)
 
LIST tesseract::ClusterSamples (CLUSTERER *Clusterer, CLUSTERCONFIG *Config)
 
void tesseract::FreeClusterer (CLUSTERER *Clusterer)
 
void tesseract::FreeProtoList (LIST *ProtoList)
 
void tesseract::FreePrototype (void *arg)
 
CLUSTER * tesseract::NextSample (LIST *SearchState)
 
float tesseract::Mean (PROTOTYPE *Proto, uint16_t Dimension)
 
float tesseract::StandardDeviation (PROTOTYPE *Proto, uint16_t Dimension)
 
int32_t tesseract::MergeClusters (int16_t N, PARAM_DESC ParamDesc[], int32_t n1, int32_t n2, float m[], float m1[], float m2[])
 

Variables

const double tesseract::FTable [FTABLE_Y][FTABLE_X]
 

Macro Definition Documentation

◆ _USE_MATH_DEFINES

#define _USE_MATH_DEFINES

Definition at line 18 of file cluster.cpp.

◆ Abs

#define Abs (   N)    (((N) < 0) ? (-(N)) : (N))

Definition at line 1316 of file cluster.cpp.

◆ BUCKETTABLESIZE

#define BUCKETTABLESIZE   1024

define the size of the table which maps normalized samples to histogram buckets. Also define the number of standard deviations in a normal distribution which are considered to be significant. The mapping table will be defined in such a way that it covers the specified number of standard deviations on either side of the mean. BUCKETTABLESIZE should always be even.

Definition at line 1254 of file cluster.cpp.

◆ CHIACCURACY

#define CHIACCURACY   0.01

◆ DELTARATIO

#define DELTARATIO   0.1

◆ FTABLE_X

#define FTABLE_X   10

Definition at line 36 of file cluster.cpp.

◆ FTABLE_Y

#define FTABLE_Y   100

Definition at line 37 of file cluster.cpp.

◆ HOTELLING

#define HOTELLING   1

Definition at line 35 of file cluster.cpp.

◆ ILLEGAL_CHAR

#define ILLEGAL_CHAR   2

◆ INITIALDELTA

#define INITIALDELTA   0.1

◆ LOOKUPTABLESIZE

#define LOOKUPTABLESIZE   8

define lookup tables used to compute the number of histogram buckets that should be used for a given number of samples.

Definition at line 1335 of file cluster.cpp.

◆ MAXDEGREESOFFREEDOM

#define MAXDEGREESOFFREEDOM   MAXBUCKETS

Definition at line 1336 of file cluster.cpp.

◆ MAXDISTANCE

#define MAXDISTANCE   FLT_MAX

◆ MAXNEIGHBORS

#define MAXNEIGHBORS   2

◆ MINALPHA

#define MINALPHA   (1e-200)

◆ MINSAMPLES

#define MINSAMPLES   (MINBUCKETS * MINSAMPLESPERBUCKET)

Definition at line 1245 of file cluster.cpp.

◆ MINSAMPLESNEEDED

#define MINSAMPLESNEEDED   1

Definition at line 1246 of file cluster.cpp.

◆ MINSAMPLESPERBUCKET

#define MINSAMPLESPERBUCKET   5

define the absolute minimum number of samples which must be present in order to accurately test hypotheses about underlying probability distributions. Define separately the minimum samples that are needed before a statistical analysis is attempted; this number should be equal to MINSAMPLES but can be set to a lower number for early testing when very few samples are available.

Definition at line 1244 of file cluster.cpp.

◆ MINVARIANCE

#define MINVARIANCE   0.0004

define the variance which will be used as a minimum variance for any dimension of any feature. Since most features are calculated from numbers with a precision no better than 1 in 128, the variance should never be less than the square of this number for parameters whose range is 1.

Definition at line 1236 of file cluster.cpp.

◆ Mirror

#define Mirror (   N,
 
)    ((R) - (N)-1)

Definition at line 1315 of file cluster.cpp.

◆ NORMALEXTENT

#define NORMALEXTENT   3.0

Definition at line 1255 of file cluster.cpp.

◆ Odd

#define Odd (   N)    ((N) % 2)

Definition at line 1314 of file cluster.cpp.

◆ SqrtOf2Pi

#define SqrtOf2Pi   2.506628275

the following variables describe a discrete normal distribution which is used by NormalDensity() and NormalBucket(). The constant NORMALEXTENT determines how many standard deviations of the distribution are mapped onto the fixed discrete range of x. x=0 is mapped to -NORMALEXTENT standard deviations and x=BUCKETTABLESIZE is mapped to +NORMALEXTENT standard deviations.

Definition at line 1326 of file cluster.cpp.