tesseract v5.3.3.20231005
sorthelper.h
Go to the documentation of this file.
1
2// File: sorthelper.h
3// Description: Generic sort and maxfinding class.
4// Author: Ray Smith
5//
6// (C) Copyright 2010, Google Inc.
7// Licensed under the Apache License, Version 2.0 (the "License");
8// you may not use this file except in compliance with the License.
9// You may obtain a copy of the License at
10// http://www.apache.org/licenses/LICENSE-2.0
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS,
13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// See the License for the specific language governing permissions and
15// limitations under the License.
16//
18
19#ifndef TESSERACT_CCUTIL_SORTHELPER_H_
20#define TESSERACT_CCUTIL_SORTHELPER_H_
21
22#include <cstdlib>
23#include <vector>
24
25namespace tesseract {
26
27// Generic class to provide functions based on a <value,count> pair.
28// T is the value type.
29// The class keeps a count of each value and can return the most frequent
30// value or a sorted array of the values with counts.
31// Note that this class uses linear search for adding. It is better
32// to use the STATS class to get the mode of a large number of values
33// in a small space. SortHelper is better to get the mode of a small number
34// of values from a large space.
35// T must have a copy constructor.
36template <typename T>
38public:
39 // Simple pair class to hold the values and counts.
40 template <typename PairT>
41 struct SortPair {
42 PairT value;
43 int count;
44 };
45 // qsort function to sort by decreasing count.
46 static int SortPairsByCount(const void *v1, const void *v2) {
47 const auto *p1 = static_cast<const SortPair<T> *>(v1);
48 const auto *p2 = static_cast<const SortPair<T> *>(v2);
49 return p2->count - p1->count;
50 }
51 // qsort function to sort by decreasing value.
52 static int SortPairsByValue(const void *v1, const void *v2) {
53 const auto *p1 = static_cast<const SortPair<T> *>(v1);
54 const auto *p2 = static_cast<const SortPair<T> *>(v2);
55 if (p2->value - p1->value < 0) {
56 return -1;
57 }
58 if (p2->value - p1->value > 0) {
59 return 1;
60 }
61 return 0;
62 }
63
64 // Constructor takes a hint of the array size, but it need not be accurate.
65 explicit SortHelper(int sizehint) {
66 counts_.reserve(sizehint);
67 }
68
69 // Add a value that may be a duplicate of an existing value.
70 // Uses a linear search.
71 void Add(T value, int count) {
72 // Linear search for value.
73 for (auto &it : counts_) {
74 if (it.value == value) {
75 it.count += count;
76 return;
77 }
78 }
79 SortPair<T> new_pair = {value, count};
80 counts_.push_back(SortPair<T>(new_pair));
81 }
82
83 // Returns the frequency of the most frequent value.
84 // If max_value is not nullptr, returns the most frequent value.
85 // If the array is empty, returns -INT32_MAX and max_value is unchanged.
86 int MaxCount(T *max_value) const {
87 int best_count = -INT32_MAX;
88 for (auto &it : counts_) {
89 if (it.count > best_count) {
90 best_count = it.count;
91 if (max_value != nullptr) {
92 *max_value = it.value;
93 }
94 }
95 }
96 return best_count;
97 }
98
99 // Returns the data array sorted by decreasing frequency.
100 const std::vector<SortPair<T>> &SortByCount() {
101 counts_.sort(&SortPairsByCount);
102 return counts_;
103 }
104 // Returns the data array sorted by decreasing value.
105 const std::vector<SortPair<T>> &SortByValue() {
106 counts_.sort(&SortPairsByValue);
107 return counts_;
108 }
109
110private:
111 std::vector<SortPair<T>> counts_;
112};
113
114} // namespace tesseract
115
116#endif // TESSERACT_CCUTIL_SORTHELPER_H_.
int value
int * count
SortHelper(int sizehint)
Definition: sorthelper.h:65
const std::vector< SortPair< T > > & SortByValue()
Definition: sorthelper.h:105
int MaxCount(T *max_value) const
Definition: sorthelper.h:86
void Add(T value, int count)
Definition: sorthelper.h:71
static int SortPairsByCount(const void *v1, const void *v2)
Definition: sorthelper.h:46
static int SortPairsByValue(const void *v1, const void *v2)
Definition: sorthelper.h:52
const std::vector< SortPair< T > > & SortByCount()
Definition: sorthelper.h:100