tesseract v5.3.3.20231005
kdtree.cpp
Go to the documentation of this file.
1/******************************************************************************
2 ** Filename: kdtree.cpp
3 ** Purpose: Routines for managing K-D search trees
4 ** Author: Dan Johnson
5 **
6 ** (c) Copyright Hewlett-Packard Company, 1988.
7 ** Licensed under the Apache License, Version 2.0 (the "License");
8 ** you may not use this file except in compliance with the License.
9 ** You may obtain a copy of the License at
10 ** http://www.apache.org/licenses/LICENSE-2.0
11 ** Unless required by applicable law or agreed to in writing, software
12 ** distributed under the License is distributed on an "AS IS" BASIS,
13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 ** See the License for the specific language governing permissions and
15 ** limitations under the License.
16 ******************************************************************************/
17
18/*-----------------------------------------------------------------------------
19 Include Files and Type Defines
20-----------------------------------------------------------------------------*/
21#include "kdtree.h"
22
23#include <algorithm>
24#include <cfloat> // for FLT_MAX
25#include <cmath>
26#include <cstdio>
27
28namespace tesseract {
29
30#define Magnitude(X) ((X) < 0 ? -(X) : (X))
31#define NodeFound(N, K, D) (((N)->Key == (K)) && ((N)->Data == (D)))
32
33/*-----------------------------------------------------------------------------
34 Global Data Definitions and Declarations
35-----------------------------------------------------------------------------*/
36#define MINSEARCH (-FLT_MAX)
37#define MAXSEARCH FLT_MAX
38
39// Helper function to find the next essential dimension in a cycle.
40static int NextLevel(KDTREE *tree, int level) {
41 do {
42 ++level;
43 if (level >= tree->KeySize) {
44 level = 0;
45 }
46 } while (tree->KeyDesc[level].NonEssential);
47 return level;
48}
49
50//-----------------------------------------------------------------------------
52template <typename Key, typename Value>
53class MinK {
54public:
55 MinK(Key max_key, int k);
57
58 struct Element {
59 Element() = default;
60 Element(const Key &k, const Value &v) : key(k), value(v) {}
61
62 Key key;
63 Value value;
64 };
65
66 bool insert(Key k, Value v);
67 const Key &max_insertable_key();
68
70 return elements_count_;
71 }
72 const Element *elements() {
73 return elements_;
74 }
75
76private:
77 const Key max_key_;
78 Element *elements_;
79 int elements_count_;
80 int k_;
81 int max_index_;
82};
83
84template <typename Key, typename Value>
85MinK<Key, Value>::MinK(Key max_key, int k)
86 : max_key_(max_key), elements_count_(0), k_(k < 1 ? 1 : k), max_index_(0) {
87 elements_ = new Element[k_];
88}
89
90template <typename Key, typename Value>
92 delete[] elements_;
93}
94
95template <typename Key, typename Value>
97 if (elements_count_ < k_) {
98 return max_key_;
99 }
100 return elements_[max_index_].key;
101}
102
103template <typename Key, typename Value>
104bool MinK<Key, Value>::insert(Key key, Value value) {
105 if (elements_count_ < k_) {
106 elements_[elements_count_++] = Element(key, value);
107 if (key > elements_[max_index_].key) {
108 max_index_ = elements_count_ - 1;
109 }
110 return true;
111 } else if (key < elements_[max_index_].key) {
112 // evict the largest element.
113 elements_[max_index_] = Element(key, value);
114 // recompute max_index_
115 for (int i = 0; i < elements_count_; i++) {
116 if (elements_[i].key > elements_[max_index_].key) {
117 max_index_ = i;
118 }
119 }
120 return true;
121 }
122 return false;
123}
124
125//-----------------------------------------------------------------------------
129public:
130 KDTreeSearch(KDTREE *tree, float *query_point, int k_closest);
132
134 void Search(int *result_count, float *distances, void **results);
135
136private:
137 void SearchRec(int Level, KDNODE *SubTree);
138 bool BoxIntersectsSearch(float *lower, float *upper);
139
140 KDTREE *tree_;
141 float *query_point_;
142 float *sb_min_;
143 float *sb_max_;
144 MinK<float, void *> results_;
145};
146
147KDTreeSearch::KDTreeSearch(KDTREE *tree, float *query_point, int k_closest)
148 : tree_(tree), query_point_(query_point), results_(MAXSEARCH, k_closest) {
149 sb_min_ = new float[tree->KeySize];
150 sb_max_ = new float[tree->KeySize];
151}
152
154 delete[] sb_min_;
155 delete[] sb_max_;
156}
157
160void KDTreeSearch::Search(int *result_count, float *distances, void **results) {
161 if (tree_->Root.Left == nullptr) {
162 *result_count = 0;
163 } else {
164 for (int i = 0; i < tree_->KeySize; i++) {
165 sb_min_[i] = tree_->KeyDesc[i].Min;
166 sb_max_[i] = tree_->KeyDesc[i].Max;
167 }
168 SearchRec(0, tree_->Root.Left);
169 int count = results_.elements_count();
170 *result_count = count;
171 for (int j = 0; j < count; j++) {
172 // Pre-cast to float64 as key is a template type and we have no control
173 // over its actual type.
174 distances[j] = static_cast<float>(sqrt(static_cast<double>(results_.elements()[j].key)));
175 results[j] = results_.elements()[j].value;
176 }
177 }
178}
179
180/*-----------------------------------------------------------------------------
181 Public Code
182-----------------------------------------------------------------------------*/
186KDTREE *MakeKDTree(int16_t KeySize, const PARAM_DESC KeyDesc[]) {
187 auto *KDTree = new KDTREE(KeySize);
188 for (int i = 0; i < KeySize; i++) {
189 KDTree->KeyDesc[i].NonEssential = KeyDesc[i].NonEssential;
190 KDTree->KeyDesc[i].Circular = KeyDesc[i].Circular;
191 if (KeyDesc[i].Circular) {
192 KDTree->KeyDesc[i].Min = KeyDesc[i].Min;
193 KDTree->KeyDesc[i].Max = KeyDesc[i].Max;
194 KDTree->KeyDesc[i].Range = KeyDesc[i].Max - KeyDesc[i].Min;
195 KDTree->KeyDesc[i].HalfRange = KDTree->KeyDesc[i].Range / 2;
196 KDTree->KeyDesc[i].MidRange = (KeyDesc[i].Max + KeyDesc[i].Min) / 2;
197 } else {
198 KDTree->KeyDesc[i].Min = MINSEARCH;
199 KDTree->KeyDesc[i].Max = MAXSEARCH;
200 }
201 }
202 KDTree->Root.Left = nullptr;
203 KDTree->Root.Right = nullptr;
204 return KDTree;
205}
206
215void KDStore(KDTREE *Tree, float *Key, CLUSTER *Data) {
216 auto PtrToNode = &(Tree->Root.Left);
217 auto Node = *PtrToNode;
218 auto Level = NextLevel(Tree, -1);
219 while (Node != nullptr) {
220 if (Key[Level] < Node->BranchPoint) {
221 PtrToNode = &(Node->Left);
222 if (Key[Level] > Node->LeftBranch) {
223 Node->LeftBranch = Key[Level];
224 }
225 } else {
226 PtrToNode = &(Node->Right);
227 if (Key[Level] < Node->RightBranch) {
228 Node->RightBranch = Key[Level];
229 }
230 }
231 Level = NextLevel(Tree, Level);
232 Node = *PtrToNode;
233 }
234
235 *PtrToNode = new KDNODE(Tree, Key, Data, Level);
236} /* KDStore */
237
252void KDDelete(KDTREE *Tree, float Key[], void *Data) {
253 int Level;
254 KDNODE *Current;
255 KDNODE *Father;
256
257 /* initialize search at root of tree */
258 Father = &(Tree->Root);
259 Current = Father->Left;
260 Level = NextLevel(Tree, -1);
261
262 /* search tree for node to be deleted */
263 while ((Current != nullptr) && (!NodeFound(Current, Key, Data))) {
264 Father = Current;
265 if (Key[Level] < Current->BranchPoint) {
266 Current = Current->Left;
267 } else {
268 Current = Current->Right;
269 }
270
271 Level = NextLevel(Tree, Level);
272 }
273
274 if (Current != nullptr) { /* if node to be deleted was found */
275 if (Current == Father->Left) {
276 Father->Left = nullptr;
277 Father->LeftBranch = Tree->KeyDesc[Level].Min;
278 } else {
279 Father->Right = nullptr;
280 Father->RightBranch = Tree->KeyDesc[Level].Max;
281 }
282
283 InsertNodes(Tree, Current->Left);
284 InsertNodes(Tree, Current->Right);
285 delete Current;
286 }
287} /* KDDelete */
288
305void KDNearestNeighborSearch(KDTREE *Tree, float Query[], int QuerySize, float MaxDistance,
306 int *NumberOfResults, void **NBuffer, float DBuffer[]) {
307 KDTreeSearch search(Tree, Query, QuerySize);
308 search.Search(NumberOfResults, DBuffer, NBuffer);
309}
310
311/*---------------------------------------------------------------------------*/
314 if (Tree->Root.Left != nullptr) {
315 Walk(Tree, action, context, Tree->Root.Left, NextLevel(Tree, -1));
316 }
317}
318
319/*-----------------------------------------------------------------------------
320 Private Code
321-----------------------------------------------------------------------------*/
322
323/*---------------------------------------------------------------------------*/
329void KDTreeSearch::SearchRec(int level, KDNODE *sub_tree) {
330 if (level >= tree_->KeySize) {
331 level = 0;
332 }
333
334 if (!BoxIntersectsSearch(sb_min_, sb_max_)) {
335 return;
336 }
337
338 results_.insert(DistanceSquared(tree_->KeySize, &tree_->KeyDesc[0], query_point_, sub_tree->Key),
339 sub_tree->Data);
340
341 if (query_point_[level] < sub_tree->BranchPoint) {
342 if (sub_tree->Left != nullptr) {
343 float tmp = sb_max_[level];
344 sb_max_[level] = sub_tree->LeftBranch;
345 SearchRec(NextLevel(tree_, level), sub_tree->Left);
346 sb_max_[level] = tmp;
347 }
348 if (sub_tree->Right != nullptr) {
349 float tmp = sb_min_[level];
350 sb_min_[level] = sub_tree->RightBranch;
351 SearchRec(NextLevel(tree_, level), sub_tree->Right);
352 sb_min_[level] = tmp;
353 }
354 } else {
355 if (sub_tree->Right != nullptr) {
356 float tmp = sb_min_[level];
357 sb_min_[level] = sub_tree->RightBranch;
358 SearchRec(NextLevel(tree_, level), sub_tree->Right);
359 sb_min_[level] = tmp;
360 }
361 if (sub_tree->Left != nullptr) {
362 float tmp = sb_max_[level];
363 sb_max_[level] = sub_tree->LeftBranch;
364 SearchRec(NextLevel(tree_, level), sub_tree->Left);
365 sb_max_[level] = tmp;
366 }
367 }
368}
369
370/*---------------------------------------------------------------------------*/
378float DistanceSquared(int k, PARAM_DESC *dim, float p1[], float p2[]) {
379 float total_distance = 0;
380
381 for (; k > 0; k--, p1++, p2++, dim++) {
382 if (dim->NonEssential) {
383 continue;
384 }
385
386 float dimension_distance = *p1 - *p2;
387
388 /* if this dimension is circular - check wraparound distance */
389 if (dim->Circular) {
390 dimension_distance = Magnitude(dimension_distance);
391 float wrap_distance = dim->Max - dim->Min - dimension_distance;
392 dimension_distance = std::min(dimension_distance, wrap_distance);
393 }
394
395 total_distance += dimension_distance * dimension_distance;
396 }
397 return total_distance;
398}
399
400float ComputeDistance(int k, PARAM_DESC *dim, float p1[], float p2[]) {
401 return std::sqrt(DistanceSquared(k, dim, p1, p2));
402}
403
404/*---------------------------------------------------------------------------*/
409bool KDTreeSearch::BoxIntersectsSearch(float *lower, float *upper) {
410 float *query = query_point_;
411 // Compute the sum in higher precision.
412 double total_distance = 0.0;
413 double radius_squared =
414 static_cast<double>(results_.max_insertable_key()) * results_.max_insertable_key();
415 PARAM_DESC *dim = &tree_->KeyDesc[0];
416
417 for (int i = tree_->KeySize; i > 0; i--, dim++, query++, lower++, upper++) {
418 if (dim->NonEssential) {
419 continue;
420 }
421
422 float dimension_distance;
423 if (*query < *lower) {
424 dimension_distance = *lower - *query;
425 } else if (*query > *upper) {
426 dimension_distance = *query - *upper;
427 } else {
428 dimension_distance = 0;
429 }
430
431 /* if this dimension is circular - check wraparound distance */
432 if (dim->Circular) {
433 float wrap_distance = FLT_MAX;
434 if (*query < *lower) {
435 wrap_distance = *query + dim->Max - dim->Min - *upper;
436 } else if (*query > *upper) {
437 wrap_distance = *lower - (*query - (dim->Max - dim->Min));
438 }
439 dimension_distance = std::min(dimension_distance, wrap_distance);
440 }
441
442 total_distance += static_cast<double>(dimension_distance) * dimension_distance;
443 if (total_distance >= radius_squared) {
444 return false;
445 }
446 }
447 return true;
448}
449
450/*---------------------------------------------------------------------------*/
466void Walk(KDTREE *tree, kdwalk_proc action, ClusteringContext *context, KDNODE *sub_tree, int32_t level) {
467 (*action)(context, sub_tree->Data, level);
468 if (sub_tree->Left != nullptr) {
469 Walk(tree, action, context, sub_tree->Left, NextLevel(tree, level));
470 }
471 if (sub_tree->Right != nullptr) {
472 Walk(tree, action, context, sub_tree->Right, NextLevel(tree, level));
473 }
474}
475
477void InsertNodes(KDTREE *tree, KDNODE *nodes) {
478 if (nodes == nullptr) {
479 return;
480 }
481
482 KDStore(tree, nodes->Key, nodes->Data);
483 InsertNodes(tree, nodes->Left);
484 InsertNodes(tree, nodes->Right);
485}
486
487} // namespace tesseract
#define MAXSEARCH
Definition: kdtree.cpp:37
#define NodeFound(N, K, D)
Definition: kdtree.cpp:31
#define MINSEARCH
Definition: kdtree.cpp:36
#define Magnitude(X)
Definition: kdtree.cpp:30
int value
int * count
void(*)(ClusteringContext *context, CLUSTER *Cluster, int32_t Level) kdwalk_proc
Definition: kdtree.h:39
void KDDelete(KDTREE *Tree, float Key[], void *Data)
Definition: kdtree.cpp:252
void KDWalk(KDTREE *Tree, kdwalk_proc action, ClusteringContext *context)
Definition: kdtree.cpp:313
float ComputeDistance(int k, PARAM_DESC *dim, float p1[], float p2[])
Definition: kdtree.cpp:400
void Walk(KDTREE *tree, kdwalk_proc action, ClusteringContext *context, KDNODE *sub_tree, int32_t level)
Definition: kdtree.cpp:466
float DistanceSquared(int k, PARAM_DESC *dim, float p1[], float p2[])
Definition: kdtree.cpp:378
void InsertNodes(KDTREE *tree, KDNODE *nodes)
Definition: kdtree.cpp:477
KDTREE * MakeKDTree(int16_t KeySize, const PARAM_DESC KeyDesc[])
Definition: kdtree.cpp:186
LIST search(LIST list, void *key, int_compare is_equal)
Definition: oldlist.cpp:211
void KDNearestNeighborSearch(KDTREE *Tree, float Query[], int QuerySize, float MaxDistance, int *NumberOfResults, void **NBuffer, float DBuffer[])
Definition: kdtree.cpp:305
void KDStore(KDTREE *Tree, float *Key, CLUSTER *Data)
Definition: kdtree.cpp:215
action
Definition: upload.py:408
int elements_count()
Definition: kdtree.cpp:69
MinK(Key max_key, int k)
Definition: kdtree.cpp:85
bool insert(Key k, Value v)
Definition: kdtree.cpp:104
const Key & max_insertable_key()
Definition: kdtree.cpp:96
const Element * elements()
Definition: kdtree.cpp:72
Element(const Key &k, const Value &v)
Definition: kdtree.cpp:60
KDTreeSearch(KDTREE *tree, float *query_point, int k_closest)
Definition: kdtree.cpp:147
void Search(int *result_count, float *distances, void **results)
Definition: kdtree.cpp:160
float * Key
Definition: kdtree.h:57
KDNODE * Right
Definition: kdtree.h:63
float LeftBranch
Definition: kdtree.h:60
KDNODE * Left
Definition: kdtree.h:62
float RightBranch
Definition: kdtree.h:61
CLUSTER * Data
Definition: kdtree.h:58
float BranchPoint
Definition: kdtree.h:59
std::vector< PARAM_DESC > KeyDesc
Definition: kdtree.h:82
int16_t KeySize
Definition: kdtree.h:80
KDNODE Root
Definition: kdtree.h:81