338 {
339 bool new_cluster;
340 float *centres;
341 int32_t entry;
343 int32_t best_cluster;
344 int32_t new_centre = 0;
345 int32_t new_mode;
347 float dist;
348 float min_dist;
349 int32_t cluster_count;
350
351 if (buckets_ == nullptr || max_clusters < 1) {
352 return 0;
353 }
354 centres = new float[max_clusters + 1];
355 for (cluster_count = 1;
356 cluster_count <= max_clusters && clusters[cluster_count].buckets_ != nullptr &&
357 clusters[cluster_count].total_count_ > 0;
358 cluster_count++) {
359 centres[cluster_count] = static_cast<float>(clusters[cluster_count].ile(0.5));
360 new_centre = clusters[cluster_count].mode();
361 for (entry = new_centre - 1; centres[cluster_count] - entry < lower && entry >= rangemin_ &&
363 entry--) {
366 clusters[cluster_count].add(entry,
count);
367 clusters[0].add(entry,
count);
368 }
369 }
370 for (entry = new_centre + 1; entry - centres[cluster_count] < lower && entry <= rangemax_ &&
372 entry++) {
375 clusters[cluster_count].add(entry,
count);
376 clusters[0].add(entry,
count);
377 }
378 }
379 }
380 cluster_count--;
381
382 if (cluster_count == 0) {
383 clusters[0].set_range(rangemin_, rangemax_);
384 }
385 do {
386 new_cluster = false;
387 new_mode = 0;
388 for (entry = 0; entry <= rangemax_ - rangemin_; entry++) {
389 count = buckets_[entry] - clusters[0].buckets_[entry];
390
392 min_dist = static_cast<float>(INT32_MAX);
393 best_cluster = 0;
395 dist = entry + rangemin_ - centres[
cluster];
396
397 if (dist < 0) {
398 dist = -dist;
399 }
400 if (dist < min_dist) {
401 min_dist = dist;
403 }
404 }
405 if (min_dist > upper
406 && (best_cluster == 0 || entry + rangemin_ > centres[best_cluster] * multiple ||
407 entry + rangemin_ < centres[best_cluster] / multiple)) {
408 if (
count > new_mode) {
410 new_centre = entry + rangemin_;
411 }
412 }
413 }
414 }
415
416 if (new_mode > 0 && cluster_count < max_clusters) {
417 cluster_count++;
418 new_cluster = true;
419 if (!clusters[cluster_count].
set_range(rangemin_, rangemax_)) {
420 delete[] centres;
421 return 0;
422 }
423 centres[cluster_count] = static_cast<float>(new_centre);
424 clusters[cluster_count].add(new_centre, new_mode);
425 clusters[0].add(new_centre, new_mode);
426 for (entry = new_centre - 1; centres[cluster_count] - entry < lower && entry >= rangemin_ &&
428 entry--) {
431 clusters[cluster_count].add(entry,
count);
432 clusters[0].add(entry,
count);
433 }
434 }
435 for (entry = new_centre + 1; entry - centres[cluster_count] < lower && entry <= rangemax_ &&
437 entry++) {
440 clusters[cluster_count].add(entry,
count);
441 clusters[0].add(entry,
count);
442 }
443 }
444 centres[cluster_count] = static_cast<float>(clusters[cluster_count].ile(0.5));
445 }
446 } while (new_cluster && cluster_count < max_clusters);
447 delete[] centres;
448 return cluster_count;
449}
int32_t pile_count(int32_t value) const
int32_t cluster(float lower, float upper, float multiple, int32_t max_clusters, STATS *clusters)
bool set_range(int32_t min_bucket_value, int32_t max_bucket_value)