tesseract  4.00.00dev
statistc.cpp
Go to the documentation of this file.
1 /**********************************************************************
2  * File: statistc.cpp (Formerly stats.c)
3  * Description: Simple statistical package for integer values.
4  * Author: Ray Smith
5  * Created: Mon Feb 04 16:56:05 GMT 1991
6  *
7  * (C) Copyright 1991, Hewlett-Packard Ltd.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18  **********************************************************************/
19 
20 // Include automatically generated configuration file if running autoconf.
21 #ifdef HAVE_CONFIG_H
22 #include "config_auto.h"
23 #endif
24 
25 #include "statistc.h"
26 #include <string.h>
27 #include <math.h>
28 #include <stdlib.h>
29 #include "helpers.h"
30 #include "scrollview.h"
31 #include "tprintf.h"
32 
34 
35 /**********************************************************************
36  * STATS::STATS
37  *
38  * Construct a new stats element by allocating and zeroing the memory.
39  **********************************************************************/
40 STATS::STATS(inT32 min_bucket_value, inT32 max_bucket_value_plus_1) {
41  if (max_bucket_value_plus_1 <= min_bucket_value) {
42  min_bucket_value = 0;
43  max_bucket_value_plus_1 = 1;
44  }
45  rangemin_ = min_bucket_value; // setup
46  rangemax_ = max_bucket_value_plus_1;
47  buckets_ = new inT32[rangemax_ - rangemin_];
48  clear();
49 }
50 
52  rangemax_ = 0;
53  rangemin_ = 0;
54  buckets_ = NULL;
55 }
56 
57 /**********************************************************************
58  * STATS::set_range
59  *
60  * Alter the range on an existing stats element.
61  **********************************************************************/
62 bool STATS::set_range(inT32 min_bucket_value, inT32 max_bucket_value_plus_1) {
63  if (max_bucket_value_plus_1 <= min_bucket_value) {
64  return false;
65  }
66  if (rangemax_ - rangemin_ != max_bucket_value_plus_1 - min_bucket_value) {
67  delete [] buckets_;
68  buckets_ = new inT32[max_bucket_value_plus_1 - min_bucket_value];
69  }
70  rangemin_ = min_bucket_value; // setup
71  rangemax_ = max_bucket_value_plus_1;
72  clear(); // zero it
73  return true;
74 }
75 
76 /**********************************************************************
77  * STATS::clear
78  *
79  * Clear out the STATS class by zeroing all the buckets.
80  **********************************************************************/
81 void STATS::clear() { // clear out buckets
82  total_count_ = 0;
83  if (buckets_ != NULL)
84  memset(buckets_, 0, (rangemax_ - rangemin_) * sizeof(buckets_[0]));
85 }
86 
87 /**********************************************************************
88  * STATS::~STATS
89  *
90  * Destructor for a stats class.
91  **********************************************************************/
92 STATS::~STATS() { delete[] buckets_; }
93 
94 /**********************************************************************
95  * STATS::add
96  *
97  * Add a set of samples to (or delete from) a pile.
98  **********************************************************************/
99 void STATS::add(inT32 value, inT32 count) {
100  if (buckets_ == NULL) {
101  return;
102  }
103  value = ClipToRange(value, rangemin_, rangemax_ - 1);
104  buckets_[value - rangemin_] += count;
105  total_count_ += count; // keep count of total
106 }
107 
108 /**********************************************************************
109  * STATS::mode
110  *
111  * Find the mode of a stats class.
112  **********************************************************************/
113 inT32 STATS::mode() const { // get mode of samples
114  if (buckets_ == NULL) {
115  return rangemin_;
116  }
117  inT32 max = buckets_[0]; // max cell count
118  inT32 maxindex = 0; // index of max
119  for (int index = rangemax_ - rangemin_ - 1; index > 0; --index) {
120  if (buckets_[index] > max) {
121  max = buckets_[index]; // find biggest
122  maxindex = index;
123  }
124  }
125  return maxindex + rangemin_; // index of biggest
126 }
127 
128 /**********************************************************************
129  * STATS::mean
130  *
131  * Find the mean of a stats class.
132  **********************************************************************/
133 double STATS::mean() const { //get mean of samples
134  if (buckets_ == NULL || total_count_ <= 0) {
135  return static_cast<double>(rangemin_);
136  }
137  inT64 sum = 0;
138  for (int index = rangemax_ - rangemin_ - 1; index >= 0; --index) {
139  sum += static_cast<inT64>(index) * buckets_[index];
140  }
141  return static_cast<double>(sum) / total_count_ + rangemin_;
142 }
143 
144 /**********************************************************************
145  * STATS::sd
146  *
147  * Find the standard deviation of a stats class.
148  **********************************************************************/
149 double STATS::sd() const { //standard deviation
150  if (buckets_ == NULL || total_count_ <= 0) {
151  return 0.0;
152  }
153  inT64 sum = 0;
154  double sqsum = 0.0;
155  for (int index = rangemax_ - rangemin_ - 1; index >= 0; --index) {
156  sum += static_cast<inT64>(index) * buckets_[index];
157  sqsum += static_cast<double>(index) * index * buckets_[index];
158  }
159  double variance = static_cast<double>(sum) / total_count_;
160  variance = sqsum / total_count_ - variance * variance;
161  if (variance > 0.0)
162  return sqrt(variance);
163  return 0.0;
164 }
165 
166 /**********************************************************************
167  * STATS::ile
168  *
169  * Returns the fractile value such that frac fraction (in [0,1]) of samples
170  * has a value less than the return value.
171  **********************************************************************/
172 double STATS::ile(double frac) const {
173  if (buckets_ == NULL || total_count_ == 0) {
174  return static_cast<double>(rangemin_);
175  }
176 #if 0
177  // TODO(rays) The existing code doesn't seem to be doing the right thing
178  // with target a double but this substitute crashes the code that uses it.
179  // Investigate and fix properly.
180  int target = IntCastRounded(frac * total_count_);
181  target = ClipToRange(target, 1, total_count_);
182 #else
183  double target = frac * total_count_;
184  target = ClipToRange(target, 1.0, static_cast<double>(total_count_));
185 #endif
186  int sum = 0;
187  int index = 0;
188  for (index = 0; index < rangemax_ - rangemin_ && sum < target;
189  sum += buckets_[index++]);
190  if (index > 0) {
191  ASSERT_HOST(buckets_[index - 1] > 0);
192  return rangemin_ + index -
193  static_cast<double>(sum - target) / buckets_[index - 1];
194  } else {
195  return static_cast<double>(rangemin_);
196  }
197 }
198 
199 /**********************************************************************
200  * STATS::min_bucket
201  *
202  * Find REAL minimum bucket - ile(0.0) isn't necessarily correct
203  **********************************************************************/
204 inT32 STATS::min_bucket() const { // Find min
205  if (buckets_ == NULL || total_count_ == 0) {
206  return rangemin_;
207  }
208  inT32 min = 0;
209  for (min = 0; (min < rangemax_ - rangemin_) && (buckets_[min] == 0); min++);
210  return rangemin_ + min;
211 }
212 
213 /**********************************************************************
214  * STATS::max_bucket
215  *
216  * Find REAL maximum bucket - ile(1.0) isn't necessarily correct
217  **********************************************************************/
218 
219 inT32 STATS::max_bucket() const { // Find max
220  if (buckets_ == NULL || total_count_ == 0) {
221  return rangemin_;
222  }
223  inT32 max;
224  for (max = rangemax_ - rangemin_ - 1; max > 0 && buckets_[max] == 0; max--);
225  return rangemin_ + max;
226 }
227 
228 /**********************************************************************
229  * STATS::median
230  *
231  * Finds a more useful estimate of median than ile(0.5).
232  *
233  * Overcomes a problem with ile() - if the samples are, for example,
234  * 6,6,13,14 ile(0.5) return 7.0 - when a more useful value would be midway
235  * between 6 and 13 = 9.5
236  **********************************************************************/
237 double STATS::median() const { //get median
238  if (buckets_ == NULL) {
239  return static_cast<double>(rangemin_);
240  }
241  double median = ile(0.5);
242  int median_pile = static_cast<int>(floor(median));
243  if ((total_count_ > 1) && (pile_count(median_pile) == 0)) {
244  inT32 min_pile;
245  inT32 max_pile;
246  /* Find preceding non zero pile */
247  for (min_pile = median_pile; pile_count(min_pile) == 0; min_pile--);
248  /* Find following non zero pile */
249  for (max_pile = median_pile; pile_count(max_pile) == 0; max_pile++);
250  median = (min_pile + max_pile) / 2.0;
251  }
252  return median;
253 }
254 
255 /**********************************************************************
256  * STATS::local_min
257  *
258  * Return TRUE if this point is a local min.
259  **********************************************************************/
260 bool STATS::local_min(inT32 x) const {
261  if (buckets_ == NULL) {
262  return false;
263  }
264  x = ClipToRange(x, rangemin_, rangemax_ - 1) - rangemin_;
265  if (buckets_[x] == 0)
266  return true;
267  inT32 index; // table index
268  for (index = x - 1; index >= 0 && buckets_[index] == buckets_[x]; --index);
269  if (index >= 0 && buckets_[index] < buckets_[x])
270  return false;
271  for (index = x + 1; index < rangemax_ - rangemin_ &&
272  buckets_[index] == buckets_[x]; ++index);
273  if (index < rangemax_ - rangemin_ && buckets_[index] < buckets_[x])
274  return false;
275  else
276  return true;
277 }
278 
279 /**********************************************************************
280  * STATS::smooth
281  *
282  * Apply a triangular smoothing filter to the stats.
283  * This makes the modes a bit more useful.
284  * The factor gives the height of the triangle, i.e. the weight of the
285  * centre.
286  **********************************************************************/
287 void STATS::smooth(inT32 factor) {
288  if (buckets_ == NULL || factor < 2) {
289  return;
290  }
291  STATS result(rangemin_, rangemax_);
292  int entrycount = rangemax_ - rangemin_;
293  for (int entry = 0; entry < entrycount; entry++) {
294  //centre weight
295  int count = buckets_[entry] * factor;
296  for (int offset = 1; offset < factor; offset++) {
297  if (entry - offset >= 0)
298  count += buckets_[entry - offset] * (factor - offset);
299  if (entry + offset < entrycount)
300  count += buckets_[entry + offset] * (factor - offset);
301  }
302  result.add(entry + rangemin_, count);
303  }
304  total_count_ = result.total_count_;
305  memcpy(buckets_, result.buckets_, entrycount * sizeof(buckets_[0]));
306 }
307 
308 /**********************************************************************
309  * STATS::cluster
310  *
311  * Cluster the samples into max_cluster clusters.
312  * Each call runs one iteration. The array of clusters must be
313  * max_clusters+1 in size as cluster 0 is used to indicate which samples
314  * have been used.
315  * The return value is the current number of clusters.
316  **********************************************************************/
317 
318 inT32 STATS::cluster(float lower, // thresholds
319  float upper,
320  float multiple, // distance threshold
321  inT32 max_clusters, // max no to make
322  STATS *clusters) { // array of clusters
323  BOOL8 new_cluster; // added one
324  float *centres; // cluster centres
325  inT32 entry; // bucket index
326  inT32 cluster; // cluster index
327  inT32 best_cluster; // one to assign to
328  inT32 new_centre = 0; // residual mode
329  inT32 new_mode; // pile count of new_centre
330  inT32 count; // pile to place
331  float dist; // from cluster
332  float min_dist; // from best_cluster
333  inT32 cluster_count; // no of clusters
334 
335  if (buckets_ == NULL || max_clusters < 1)
336  return 0;
337  centres = new float[max_clusters + 1];
338  for (cluster_count = 1; cluster_count <= max_clusters
339  && clusters[cluster_count].buckets_ != NULL
340  && clusters[cluster_count].total_count_ > 0;
341  cluster_count++) {
342  centres[cluster_count] =
343  static_cast<float>(clusters[cluster_count].ile(0.5));
344  new_centre = clusters[cluster_count].mode();
345  for (entry = new_centre - 1; centres[cluster_count] - entry < lower
346  && entry >= rangemin_
347  && pile_count(entry) <= pile_count(entry + 1);
348  entry--) {
349  count = pile_count(entry) - clusters[0].pile_count(entry);
350  if (count > 0) {
351  clusters[cluster_count].add(entry, count);
352  clusters[0].add (entry, count);
353  }
354  }
355  for (entry = new_centre + 1; entry - centres[cluster_count] < lower
356  && entry < rangemax_
357  && pile_count(entry) <= pile_count(entry - 1);
358  entry++) {
359  count = pile_count(entry) - clusters[0].pile_count(entry);
360  if (count > 0) {
361  clusters[cluster_count].add(entry, count);
362  clusters[0].add(entry, count);
363  }
364  }
365  }
366  cluster_count--;
367 
368  if (cluster_count == 0) {
369  clusters[0].set_range(rangemin_, rangemax_);
370  }
371  do {
372  new_cluster = FALSE;
373  new_mode = 0;
374  for (entry = 0; entry < rangemax_ - rangemin_; entry++) {
375  count = buckets_[entry] - clusters[0].buckets_[entry];
376  //remaining pile
377  if (count > 0) { //any to handle
378  min_dist = static_cast<float>(MAX_INT32);
379  best_cluster = 0;
380  for (cluster = 1; cluster <= cluster_count; cluster++) {
381  dist = entry + rangemin_ - centres[cluster];
382  //find distance
383  if (dist < 0)
384  dist = -dist;
385  if (dist < min_dist) {
386  min_dist = dist; //find least
387  best_cluster = cluster;
388  }
389  }
390  if (min_dist > upper //far enough for new
391  && (best_cluster == 0
392  || entry + rangemin_ > centres[best_cluster] * multiple
393  || entry + rangemin_ < centres[best_cluster] / multiple)) {
394  if (count > new_mode) {
395  new_mode = count;
396  new_centre = entry + rangemin_;
397  }
398  }
399  }
400  }
401  // need new and room
402  if (new_mode > 0 && cluster_count < max_clusters) {
403  cluster_count++;
404  new_cluster = TRUE;
405  if (!clusters[cluster_count].set_range(rangemin_, rangemax_)) {
406  delete [] centres;
407  return 0;
408  }
409  centres[cluster_count] = static_cast<float>(new_centre);
410  clusters[cluster_count].add(new_centre, new_mode);
411  clusters[0].add(new_centre, new_mode);
412  for (entry = new_centre - 1; centres[cluster_count] - entry < lower
413  && entry >= rangemin_
414  && pile_count (entry) <= pile_count(entry + 1); entry--) {
415  count = pile_count(entry) - clusters[0].pile_count(entry);
416  if (count > 0) {
417  clusters[cluster_count].add(entry, count);
418  clusters[0].add(entry, count);
419  }
420  }
421  for (entry = new_centre + 1; entry - centres[cluster_count] < lower
422  && entry < rangemax_
423  && pile_count (entry) <= pile_count(entry - 1); entry++) {
424  count = pile_count(entry) - clusters[0].pile_count(entry);
425  if (count > 0) {
426  clusters[cluster_count].add(entry, count);
427  clusters[0].add (entry, count);
428  }
429  }
430  centres[cluster_count] =
431  static_cast<float>(clusters[cluster_count].ile(0.5));
432  }
433  } while (new_cluster && cluster_count < max_clusters);
434  delete [] centres;
435  return cluster_count;
436 }
437 
438 // Helper tests that the current index is still part of the peak and gathers
439 // the data into the peak, returning false when the peak is ended.
440 // src_buckets[index] - used_buckets[index] is the unused part of the histogram.
441 // prev_count is the histogram count of the previous index on entry and is
442 // updated to the current index on return.
443 // total_count and total_value are accumulating the mean of the peak.
444 static bool GatherPeak(int index, const int* src_buckets, int* used_buckets,
445  int* prev_count, int* total_count, double* total_value) {
446  int pile_count = src_buckets[index] - used_buckets[index];
447  if (pile_count <= *prev_count && pile_count > 0) {
448  // Accumulate count and index.count product.
449  *total_count += pile_count;
450  *total_value += index * pile_count;
451  // Mark this index as used
452  used_buckets[index] = src_buckets[index];
453  *prev_count = pile_count;
454  return true;
455  } else {
456  return false;
457  }
458 }
459 
460 // Finds (at most) the top max_modes modes, well actually the whole peak around
461 // each mode, returning them in the given modes vector as a <mean of peak,
462 // total count of peak> pair in order of decreasing total count.
463 // Since the mean is the key and the count the data in the pair, a single call
464 // to sort on the output will re-sort by increasing mean of peak if that is
465 // more useful than decreasing total count.
466 // Returns the actual number of modes found.
467 int STATS::top_n_modes(int max_modes,
468  GenericVector<KDPairInc<float, int> >* modes) const {
469  if (max_modes <= 0) return 0;
470  int src_count = rangemax_ - rangemin_;
471  // Used copies the counts in buckets_ as they get used.
472  STATS used(rangemin_, rangemax_);
473  modes->truncate(0);
474  // Total count of the smallest peak found so far.
475  int least_count = 1;
476  // Mode that is used as a seed for each peak
477  int max_count = 0;
478  do {
479  // Find an unused mode.
480  max_count = 0;
481  int max_index = 0;
482  for (int src_index = 0; src_index < src_count; src_index++) {
483  int pile_count = buckets_[src_index] - used.buckets_[src_index];
484  if (pile_count > max_count) {
485  max_count = pile_count;
486  max_index = src_index;
487  }
488  }
489  if (max_count > 0) {
490  // Copy the bucket count to used so it doesn't get found again.
491  used.buckets_[max_index] = max_count;
492  // Get the entire peak.
493  double total_value = max_index * max_count;
494  int total_count = max_count;
495  int prev_pile = max_count;
496  for (int offset = 1; max_index + offset < src_count; ++offset) {
497  if (!GatherPeak(max_index + offset, buckets_, used.buckets_,
498  &prev_pile, &total_count, &total_value))
499  break;
500  }
501  prev_pile = buckets_[max_index];
502  for (int offset = 1; max_index - offset >= 0; ++offset) {
503  if (!GatherPeak(max_index - offset, buckets_, used.buckets_,
504  &prev_pile, &total_count, &total_value))
505  break;
506  }
507  if (total_count > least_count || modes->size() < max_modes) {
508  // We definitely want this mode, so if we have enough discard the least.
509  if (modes->size() == max_modes)
510  modes->truncate(max_modes - 1);
511  int target_index = 0;
512  // Linear search for the target insertion point.
513  while (target_index < modes->size() &&
514  (*modes)[target_index].data >= total_count)
515  ++target_index;
516  float peak_mean =
517  static_cast<float>(total_value / total_count + rangemin_);
518  modes->insert(KDPairInc<float, int>(peak_mean, total_count),
519  target_index);
520  least_count = modes->back().data;
521  }
522  }
523  } while (max_count > 0);
524  return modes->size();
525 }
526 
527 /**********************************************************************
528  * STATS::print
529  *
530  * Prints a summary and table of the histogram.
531  **********************************************************************/
532 void STATS::print() const {
533  if (buckets_ == NULL) {
534  return;
535  }
536  inT32 min = min_bucket() - rangemin_;
537  inT32 max = max_bucket() - rangemin_;
538 
539  int num_printed = 0;
540  for (int index = min; index <= max; index++) {
541  if (buckets_[index] != 0) {
542  tprintf("%4d:%-3d ", rangemin_ + index, buckets_[index]);
543  if (++num_printed % 8 == 0)
544  tprintf ("\n");
545  }
546  }
547  tprintf ("\n");
548  print_summary();
549 }
550 
551 
552 
553 /**********************************************************************
554  * STATS::print_summary
555  *
556  * Print a summary of the stats.
557  **********************************************************************/
558 void STATS::print_summary() const {
559  if (buckets_ == NULL) {
560  return;
561  }
562  inT32 min = min_bucket();
563  inT32 max = max_bucket();
564  tprintf("Total count=%d\n", total_count_);
565  tprintf("Min=%.2f Really=%d\n", ile(0.0), min);
566  tprintf("Lower quartile=%.2f\n", ile(0.25));
567  tprintf("Median=%.2f, ile(0.5)=%.2f\n", median(), ile(0.5));
568  tprintf("Upper quartile=%.2f\n", ile(0.75));
569  tprintf("Max=%.2f Really=%d\n", ile(1.0), max);
570  tprintf("Range=%d\n", max + 1 - min);
571  tprintf("Mean= %.2f\n", mean());
572  tprintf("SD= %.2f\n", sd());
573 }
574 
575 
576 /**********************************************************************
577  * STATS::plot
578  *
579  * Draw a histogram of the stats table.
580  **********************************************************************/
581 
582 #ifndef GRAPHICS_DISABLED
583 void STATS::plot(ScrollView* window, // to draw in
584  float xorigin, // bottom left
585  float yorigin,
586  float xscale, // one x unit
587  float yscale, // one y unit
588  ScrollView::Color colour) const { // colour to draw in
589  if (buckets_ == NULL) {
590  return;
591  }
592  window->Pen(colour);
593 
594  for (int index = 0; index < rangemax_ - rangemin_; index++) {
595  window->Rectangle( xorigin + xscale * index, yorigin,
596  xorigin + xscale * (index + 1),
597  yorigin + yscale * buckets_[index]);
598  }
599 }
600 #endif
601 
602 
603 /**********************************************************************
604  * STATS::plotline
605  *
606  * Draw a histogram of the stats table. (Line only)
607  **********************************************************************/
608 
609 #ifndef GRAPHICS_DISABLED
610 void STATS::plotline(ScrollView* window, // to draw in
611  float xorigin, // bottom left
612  float yorigin,
613  float xscale, // one x unit
614  float yscale, // one y unit
615  ScrollView::Color colour) const { // colour to draw in
616  if (buckets_ == NULL) {
617  return;
618  }
619  window->Pen(colour);
620  window->SetCursor(xorigin, yorigin + yscale * buckets_[0]);
621  for (int index = 0; index < rangemax_ - rangemin_; index++) {
622  window->DrawTo(xorigin + xscale * index,
623  yorigin + yscale * buckets_[index]);
624  }
625 }
626 #endif
627 
628 
629 /**********************************************************************
630  * choose_nth_item
631  *
632  * Returns the index of what would b the nth item in the array
633  * if the members were sorted, without actually sorting.
634  **********************************************************************/
635 
636 inT32 choose_nth_item(inT32 index, float *array, inT32 count) {
637  inT32 next_sample; // next one to do
638  inT32 next_lesser; // space for new
639  inT32 prev_greater; // last one saved
640  inT32 equal_count; // no of equal ones
641  float pivot; // proposed median
642  float sample; // current sample
643 
644  if (count <= 1)
645  return 0;
646  if (count == 2) {
647  if (array[0] < array[1]) {
648  return index >= 1 ? 1 : 0;
649  }
650  else {
651  return index >= 1 ? 0 : 1;
652  }
653  }
654  else {
655  if (index < 0)
656  index = 0; // ensure legal
657  else if (index >= count)
658  index = count - 1;
659  equal_count = (inT32) (rand() % count);
660  pivot = array[equal_count];
661  // fill gap
662  array[equal_count] = array[0];
663  next_lesser = 0;
664  prev_greater = count;
665  equal_count = 1;
666  for (next_sample = 1; next_sample < prev_greater;) {
667  sample = array[next_sample];
668  if (sample < pivot) {
669  // shuffle
670  array[next_lesser++] = sample;
671  next_sample++;
672  }
673  else if (sample > pivot) {
674  prev_greater--;
675  // juggle
676  array[next_sample] = array[prev_greater];
677  array[prev_greater] = sample;
678  }
679  else {
680  equal_count++;
681  next_sample++;
682  }
683  }
684  for (next_sample = next_lesser; next_sample < prev_greater;)
685  array[next_sample++] = pivot;
686  if (index < next_lesser)
687  return choose_nth_item (index, array, next_lesser);
688  else if (index < prev_greater)
689  return next_lesser; // in equal bracket
690  else
691  return choose_nth_item (index - prev_greater,
692  array + prev_greater,
693  count - prev_greater) + prev_greater;
694  }
695 }
696 
697 /**********************************************************************
698  * choose_nth_item
699  *
700  * Returns the index of what would be the nth item in the array
701  * if the members were sorted, without actually sorting.
702  **********************************************************************/
703 inT32 choose_nth_item(inT32 index, void *array, inT32 count, size_t size,
704  int (*compar)(const void*, const void*)) {
705  int result; // of compar
706  inT32 next_sample; // next one to do
707  inT32 next_lesser; // space for new
708  inT32 prev_greater; // last one saved
709  inT32 equal_count; // no of equal ones
710  inT32 pivot; // proposed median
711 
712  if (count <= 1)
713  return 0;
714  if (count == 2) {
715  if (compar (array, (char *) array + size) < 0) {
716  return index >= 1 ? 1 : 0;
717  }
718  else {
719  return index >= 1 ? 0 : 1;
720  }
721  }
722  if (index < 0)
723  index = 0; // ensure legal
724  else if (index >= count)
725  index = count - 1;
726  pivot = (inT32) (rand () % count);
727  swap_entries (array, size, pivot, 0);
728  next_lesser = 0;
729  prev_greater = count;
730  equal_count = 1;
731  for (next_sample = 1; next_sample < prev_greater;) {
732  result =
733  compar ((char *) array + size * next_sample,
734  (char *) array + size * next_lesser);
735  if (result < 0) {
736  swap_entries (array, size, next_lesser++, next_sample++);
737  // shuffle
738  }
739  else if (result > 0) {
740  prev_greater--;
741  swap_entries(array, size, prev_greater, next_sample);
742  }
743  else {
744  equal_count++;
745  next_sample++;
746  }
747  }
748  if (index < next_lesser)
749  return choose_nth_item (index, array, next_lesser, size, compar);
750  else if (index < prev_greater)
751  return next_lesser; // in equal bracket
752  else
753  return choose_nth_item (index - prev_greater,
754  (char *) array + size * prev_greater,
755  count - prev_greater, size,
756  compar) + prev_greater;
757 }
758 
759 /**********************************************************************
760  * swap_entries
761  *
762  * Swap 2 entries of arbitrary size in-place in a table.
763  **********************************************************************/
764 void swap_entries(void *array, // array of entries
765  size_t size, // size of entry
766  inT32 index1, // entries to swap
767  inT32 index2) {
768  char tmp;
769  char *ptr1; // to entries
770  char *ptr2;
771  size_t count; // of bytes
772 
773  ptr1 = static_cast<char *>(array) + index1 * size;
774  ptr2 = static_cast<char *>(array) + index2 * size;
775  for (count = 0; count < size; count++) {
776  tmp = *ptr1;
777  *ptr1++ = *ptr2;
778  *ptr2++ = tmp; // tedious!
779  }
780 }
inT32 max_bucket() const
Definition: statistc.cpp:219
void add(inT32 value, inT32 count)
Definition: statistc.cpp:99
#define TRUE
Definition: capi.h:45
void clear()
Definition: statistc.cpp:81
double median() const
Definition: statistc.cpp:237
void DrawTo(int x, int y)
Definition: scrollview.cpp:531
double sd() const
Definition: statistc.cpp:149
void Rectangle(int x1, int y1, int x2, int y2)
Definition: scrollview.cpp:606
int top_n_modes(int max_modes, GenericVector< tesseract::KDPairInc< float, int > > *modes) const
Definition: statistc.cpp:467
#define MAX_INT32
Definition: host.h:62
T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound)
Definition: helpers.h:122
inT32 choose_nth_item(inT32 index, float *array, inT32 count)
Definition: statistc.cpp:636
#define tprintf(...)
Definition: tprintf.h:31
inT32 pile_count(inT32 value) const
Definition: statistc.h:78
void smooth(inT32 factor)
Definition: statistc.cpp:287
int count(LIST var_list)
Definition: oldlist.cpp:103
inT32 cluster(float lower, float upper, float multiple, inT32 max_clusters, STATS *clusters)
Definition: statistc.cpp:318
double mean() const
Definition: statistc.cpp:133
void Pen(Color color)
Definition: scrollview.cpp:726
~STATS()
Definition: statistc.cpp:92
STATS()
Definition: statistc.cpp:51
#define FALSE
Definition: capi.h:46
inT32 min_bucket() const
Definition: statistc.cpp:204
inT32 mode() const
Definition: statistc.cpp:113
int32_t inT32
Definition: host.h:38
void plot(ScrollView *window, float xorigin, float yorigin, float xscale, float yscale, ScrollView::Color colour) const
Definition: statistc.cpp:583
#define ASSERT_HOST(x)
Definition: errcode.h:84
Definition: cluster.h:32
void SetCursor(int x, int y)
Definition: scrollview.cpp:525
void plotline(ScrollView *window, float xorigin, float yorigin, float xscale, float yscale, ScrollView::Color colour) const
Definition: statistc.cpp:610
bool set_range(inT32 min_bucket_value, inT32 max_bucket_value_plus_1)
Definition: statistc.cpp:62
Definition: statistc.h:33
unsigned char BOOL8
Definition: host.h:44
void print_summary() const
Definition: statistc.cpp:558
double ile(double frac) const
Definition: statistc.cpp:172
bool local_min(inT32 x) const
Definition: statistc.cpp:260
int IntCastRounded(double x)
Definition: helpers.h:179
int64_t inT64
Definition: host.h:40
void print() const
Definition: statistc.cpp:532
void swap_entries(void *array, size_t size, inT32 index1, inT32 index2)
Definition: statistc.cpp:764