22 #include "config_auto.h" 41 if (max_bucket_value_plus_1 <= min_bucket_value) {
43 max_bucket_value_plus_1 = 1;
45 rangemin_ = min_bucket_value;
46 rangemax_ = max_bucket_value_plus_1;
47 buckets_ =
new inT32[rangemax_ - rangemin_];
63 if (max_bucket_value_plus_1 <= min_bucket_value) {
66 if (rangemax_ - rangemin_ != max_bucket_value_plus_1 - min_bucket_value) {
68 buckets_ =
new inT32[max_bucket_value_plus_1 - min_bucket_value];
70 rangemin_ = min_bucket_value;
71 rangemax_ = max_bucket_value_plus_1;
84 memset(buckets_, 0, (rangemax_ - rangemin_) *
sizeof(buckets_[0]));
100 if (buckets_ == NULL) {
103 value =
ClipToRange(value, rangemin_, rangemax_ - 1);
104 buckets_[value - rangemin_] +=
count;
105 total_count_ +=
count;
114 if (buckets_ == NULL) {
117 inT32 max = buckets_[0];
119 for (
int index = rangemax_ - rangemin_ - 1; index > 0; --index) {
120 if (buckets_[index] > max) {
121 max = buckets_[index];
125 return maxindex + rangemin_;
134 if (buckets_ == NULL || total_count_ <= 0) {
135 return static_cast<double>(rangemin_);
138 for (
int index = rangemax_ - rangemin_ - 1; index >= 0; --index) {
139 sum +=
static_cast<inT64>(index) * buckets_[index];
141 return static_cast<double>(sum) / total_count_ + rangemin_;
150 if (buckets_ == NULL || total_count_ <= 0) {
155 for (
int index = rangemax_ - rangemin_ - 1; index >= 0; --index) {
156 sum +=
static_cast<inT64>(index) * buckets_[index];
157 sqsum +=
static_cast<double>(index) * index * buckets_[index];
159 double variance =
static_cast<double>(sum) / total_count_;
160 variance = sqsum / total_count_ - variance * variance;
162 return sqrt(variance);
173 if (buckets_ == NULL || total_count_ == 0) {
174 return static_cast<double>(rangemin_);
183 double target = frac * total_count_;
184 target =
ClipToRange(target, 1.0, static_cast<double>(total_count_));
188 for (index = 0; index < rangemax_ - rangemin_ && sum < target;
189 sum += buckets_[index++]);
192 return rangemin_ + index -
193 static_cast<double>(sum - target) / buckets_[index - 1];
195 return static_cast<double>(rangemin_);
205 if (buckets_ == NULL || total_count_ == 0) {
209 for (min = 0; (min < rangemax_ - rangemin_) && (buckets_[min] == 0); min++);
210 return rangemin_ + min;
220 if (buckets_ == NULL || total_count_ == 0) {
224 for (max = rangemax_ - rangemin_ - 1; max > 0 && buckets_[max] == 0; max--);
225 return rangemin_ + max;
238 if (buckets_ == NULL) {
239 return static_cast<double>(rangemin_);
242 int median_pile =
static_cast<int>(floor(median));
243 if ((total_count_ > 1) && (
pile_count(median_pile) == 0)) {
247 for (min_pile = median_pile;
pile_count(min_pile) == 0; min_pile--);
249 for (max_pile = median_pile;
pile_count(max_pile) == 0; max_pile++);
250 median = (min_pile + max_pile) / 2.0;
261 if (buckets_ == NULL) {
264 x =
ClipToRange(x, rangemin_, rangemax_ - 1) - rangemin_;
265 if (buckets_[x] == 0)
268 for (index = x - 1; index >= 0 && buckets_[index] == buckets_[x]; --index);
269 if (index >= 0 && buckets_[index] < buckets_[x])
271 for (index = x + 1; index < rangemax_ - rangemin_ &&
272 buckets_[index] == buckets_[x]; ++index);
273 if (index < rangemax_ - rangemin_ && buckets_[index] < buckets_[x])
288 if (buckets_ == NULL || factor < 2) {
291 STATS result(rangemin_, rangemax_);
292 int entrycount = rangemax_ - rangemin_;
293 for (
int entry = 0; entry < entrycount; entry++) {
295 int count = buckets_[entry] * factor;
296 for (
int offset = 1; offset < factor; offset++) {
297 if (entry - offset >= 0)
298 count += buckets_[entry - offset] * (factor - offset);
299 if (entry + offset < entrycount)
300 count += buckets_[entry + offset] * (factor - offset);
302 result.
add(entry + rangemin_, count);
304 total_count_ = result.total_count_;
305 memcpy(buckets_, result.buckets_, entrycount *
sizeof(buckets_[0]));
328 inT32 new_centre = 0;
335 if (buckets_ == NULL || max_clusters < 1)
337 centres =
new float[max_clusters + 1];
338 for (cluster_count = 1; cluster_count <= max_clusters
339 && clusters[cluster_count].buckets_ != NULL
340 && clusters[cluster_count].total_count_ > 0;
342 centres[cluster_count] =
343 static_cast<float>(clusters[cluster_count].
ile(0.5));
344 new_centre = clusters[cluster_count].
mode();
345 for (entry = new_centre - 1; centres[cluster_count] - entry < lower
346 && entry >= rangemin_
351 clusters[cluster_count].
add(entry, count);
352 clusters[0].
add (entry, count);
355 for (entry = new_centre + 1; entry - centres[cluster_count] < lower
361 clusters[cluster_count].
add(entry, count);
362 clusters[0].
add(entry, count);
368 if (cluster_count == 0) {
369 clusters[0].
set_range(rangemin_, rangemax_);
374 for (entry = 0; entry < rangemax_ - rangemin_; entry++) {
375 count = buckets_[entry] - clusters[0].buckets_[entry];
378 min_dist =
static_cast<float>(
MAX_INT32);
380 for (cluster = 1; cluster <= cluster_count; cluster++) {
381 dist = entry + rangemin_ - centres[
cluster];
385 if (dist < min_dist) {
391 && (best_cluster == 0
392 || entry + rangemin_ > centres[best_cluster] * multiple
393 || entry + rangemin_ < centres[best_cluster] / multiple)) {
394 if (count > new_mode) {
396 new_centre = entry + rangemin_;
402 if (new_mode > 0 && cluster_count < max_clusters) {
405 if (!clusters[cluster_count].
set_range(rangemin_, rangemax_)) {
409 centres[cluster_count] =
static_cast<float>(new_centre);
410 clusters[cluster_count].
add(new_centre, new_mode);
411 clusters[0].
add(new_centre, new_mode);
412 for (entry = new_centre - 1; centres[cluster_count] - entry < lower
413 && entry >= rangemin_
417 clusters[cluster_count].
add(entry, count);
418 clusters[0].
add(entry, count);
421 for (entry = new_centre + 1; entry - centres[cluster_count] < lower
426 clusters[cluster_count].
add(entry, count);
427 clusters[0].
add (entry, count);
430 centres[cluster_count] =
431 static_cast<float>(clusters[cluster_count].
ile(0.5));
433 }
while (new_cluster && cluster_count < max_clusters);
435 return cluster_count;
444 static bool GatherPeak(
int index,
const int* src_buckets,
int* used_buckets,
445 int* prev_count,
int* total_count,
double* total_value) {
446 int pile_count = src_buckets[index] - used_buckets[index];
447 if (pile_count <= *prev_count && pile_count > 0) {
452 used_buckets[index] = src_buckets[index];
469 if (max_modes <= 0)
return 0;
470 int src_count = rangemax_ - rangemin_;
472 STATS used(rangemin_, rangemax_);
482 for (
int src_index = 0; src_index < src_count; src_index++) {
483 int pile_count = buckets_[src_index] - used.buckets_[src_index];
484 if (pile_count > max_count) {
486 max_index = src_index;
491 used.buckets_[max_index] = max_count;
493 double total_value = max_index * max_count;
494 int total_count = max_count;
495 int prev_pile = max_count;
496 for (
int offset = 1; max_index + offset < src_count; ++offset) {
497 if (!GatherPeak(max_index + offset, buckets_, used.buckets_,
498 &prev_pile, &total_count, &total_value))
501 prev_pile = buckets_[max_index];
502 for (
int offset = 1; max_index - offset >= 0; ++offset) {
503 if (!GatherPeak(max_index - offset, buckets_, used.buckets_,
504 &prev_pile, &total_count, &total_value))
507 if (total_count > least_count || modes->size() < max_modes) {
509 if (modes->size() == max_modes)
510 modes->truncate(max_modes - 1);
511 int target_index = 0;
513 while (target_index < modes->size() &&
514 (*modes)[target_index].data >= total_count)
517 static_cast<float>(total_value / total_count + rangemin_);
520 least_count = modes->back().data;
523 }
while (max_count > 0);
524 return modes->size();
533 if (buckets_ == NULL) {
540 for (
int index = min; index <= max; index++) {
541 if (buckets_[index] != 0) {
542 tprintf(
"%4d:%-3d ", rangemin_ + index, buckets_[index]);
543 if (++num_printed % 8 == 0)
559 if (buckets_ == NULL) {
564 tprintf(
"Total count=%d\n", total_count_);
565 tprintf(
"Min=%.2f Really=%d\n",
ile(0.0), min);
569 tprintf(
"Max=%.2f Really=%d\n",
ile(1.0), max);
570 tprintf(
"Range=%d\n", max + 1 - min);
582 #ifndef GRAPHICS_DISABLED 589 if (buckets_ == NULL) {
594 for (
int index = 0; index < rangemax_ - rangemin_; index++) {
595 window->
Rectangle( xorigin + xscale * index, yorigin,
596 xorigin + xscale * (index + 1),
597 yorigin + yscale * buckets_[index]);
609 #ifndef GRAPHICS_DISABLED 616 if (buckets_ == NULL) {
620 window->
SetCursor(xorigin, yorigin + yscale * buckets_[0]);
621 for (
int index = 0; index < rangemax_ - rangemin_; index++) {
622 window->
DrawTo(xorigin + xscale * index,
623 yorigin + yscale * buckets_[index]);
647 if (array[0] < array[1]) {
648 return index >= 1 ? 1 : 0;
651 return index >= 1 ? 0 : 1;
657 else if (index >= count)
660 pivot = array[equal_count];
662 array[equal_count] = array[0];
664 prev_greater =
count;
666 for (next_sample = 1; next_sample < prev_greater;) {
667 sample = array[next_sample];
668 if (sample < pivot) {
670 array[next_lesser++] = sample;
673 else if (sample > pivot) {
676 array[next_sample] = array[prev_greater];
677 array[prev_greater] = sample;
684 for (next_sample = next_lesser; next_sample < prev_greater;)
685 array[next_sample++] = pivot;
686 if (index < next_lesser)
688 else if (index < prev_greater)
692 array + prev_greater,
693 count - prev_greater) + prev_greater;
704 int (*compar)(
const void*,
const void*)) {
715 if (compar (array, (
char *) array + size) < 0) {
716 return index >= 1 ? 1 : 0;
719 return index >= 1 ? 0 : 1;
724 else if (index >= count)
729 prev_greater =
count;
731 for (next_sample = 1; next_sample < prev_greater;) {
733 compar ((
char *) array + size * next_sample,
734 (
char *) array + size * next_lesser);
736 swap_entries (array, size, next_lesser++, next_sample++);
739 else if (result > 0) {
748 if (index < next_lesser)
750 else if (index < prev_greater)
754 (
char *) array + size * prev_greater,
755 count - prev_greater, size,
756 compar) + prev_greater;
773 ptr1 =
static_cast<char *
>(array) + index1 * size;
774 ptr2 =
static_cast<char *
>(array) + index2 * size;
775 for (count = 0; count < size; count++) {
void add(inT32 value, inT32 count)
void DrawTo(int x, int y)
void Rectangle(int x1, int y1, int x2, int y2)
int top_n_modes(int max_modes, GenericVector< tesseract::KDPairInc< float, int > > *modes) const
T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound)
inT32 choose_nth_item(inT32 index, float *array, inT32 count)
inT32 pile_count(inT32 value) const
void smooth(inT32 factor)
inT32 cluster(float lower, float upper, float multiple, inT32 max_clusters, STATS *clusters)
void plot(ScrollView *window, float xorigin, float yorigin, float xscale, float yscale, ScrollView::Color colour) const
void SetCursor(int x, int y)
void plotline(ScrollView *window, float xorigin, float yorigin, float xscale, float yscale, ScrollView::Color colour) const
bool set_range(inT32 min_bucket_value, inT32 max_bucket_value_plus_1)
void print_summary() const
double ile(double frac) const
bool local_min(inT32 x) const
int IntCastRounded(double x)
void swap_entries(void *array, size_t size, inT32 index1, inT32 index2)