tesseract  4.00.00dev
genericvector.h
Go to the documentation of this file.
1 // File: genericvector.h
3 // Description: Generic vector class
4 // Author: Daria Antonova
5 // Created: Mon Jun 23 11:26:43 PDT 2008
6 //
7 // (C) Copyright 2007, Google Inc.
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
19 //
20 #ifndef TESSERACT_CCUTIL_GENERICVECTOR_H_
21 #define TESSERACT_CCUTIL_GENERICVECTOR_H_
22 
23 #include <assert.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 
27 #include "tesscallback.h"
28 #include "errcode.h"
29 #include "helpers.h"
30 #include "ndminx.h"
31 #include "serialis.h"
32 #include "strngs.h"
33 
34 // Use PointerVector<T> below in preference to GenericVector<T*>, as that
35 // provides automatic deletion of pointers, [De]Serialize that works, and
36 // sort that works.
37 template <typename T>
38 class GenericVector {
39  public:
42  }
43  GenericVector(int size, T init_val) {
44  init(size);
45  init_to_size(size, init_val);
46  }
47 
48  // Copy
49  GenericVector(const GenericVector& other) {
50  this->init(other.size());
51  this->operator+=(other);
52  }
55 
57 
58  // Reserve some memory.
59  void reserve(int size);
60  // Double the size of the internal array.
61  void double_the_size();
62 
63  // Resizes to size and sets all values to t.
64  void init_to_size(int size, T t);
65  // Resizes to size without any initialization.
66  void resize_no_init(int size) {
67  reserve(size);
68  size_used_ = size;
69  }
70 
71  // Return the size used.
72  int size() const {
73  return size_used_;
74  }
75  // Workaround to avoid g++ -Wsign-compare warnings.
76  size_t unsigned_size() const {
77  static_assert(sizeof(size_used_) <= sizeof(size_t),
78  "Wow! sizeof(size_t) < sizeof(int32_t)!!");
79  assert(0 <= size_used_);
80  return static_cast<size_t>(size_used_);
81  }
82  int size_reserved() const {
83  return size_reserved_;
84  }
85 
86  int length() const {
87  return size_used_;
88  }
89 
90  // Return true if empty.
91  bool empty() const {
92  return size_used_ == 0;
93  }
94 
95  // Return the object from an index.
96  T &get(int index) const;
97  T &back() const;
98  T &operator[](int index) const;
99  // Returns the last object and removes it.
100  T pop_back();
101 
102  // Return the index of the T object.
103  // This method NEEDS a compare_callback to be passed to
104  // set_compare_callback.
105  int get_index(T object) const;
106 
107  // Return true if T is in the array
108  bool contains(T object) const;
109 
110  // Return true if the index is valid
111  T contains_index(int index) const;
112 
113  // Push an element in the end of the array
114  int push_back(T object);
115  void operator+=(T t);
116 
117  // Push an element in the end of the array if the same
118  // element is not already contained in the array.
119  int push_back_new(T object);
120 
121  // Push an element in the front of the array
122  // Note: This function is O(n)
123  int push_front(T object);
124 
125  // Set the value at the given index
126  void set(T t, int index);
127 
128  // Insert t at the given index, push other elements to the right.
129  void insert(T t, int index);
130 
131  // Removes an element at the given index and
132  // shifts the remaining elements to the left.
133  void remove(int index);
134 
135  // Truncates the array to the given size by removing the end.
136  // If the current size is less, the array is not expanded.
137  void truncate(int size) {
138  if (size < size_used_)
139  size_used_ = size;
140  }
141 
142  // Add a callback to be called to delete the elements when the array took
143  // their ownership.
145 
146  // Add a callback to be called to compare the elements when needed (contains,
147  // get_id, ...)
149 
150  // Clear the array, calling the clear callback function if any.
151  // All the owned callbacks are also deleted.
152  // If you don't want the callbacks to be deleted, before calling clear, set
153  // the callback to NULL.
154  void clear();
155 
156  // Delete objects pointed to by data_[i]
157  void delete_data_pointers();
158 
159  // This method clears the current object, then, does a shallow copy of
160  // its argument, and finally invalidates its argument.
161  // Callbacks are moved to the current object;
162  void move(GenericVector<T>* from);
163 
164  // Read/Write the array to a file. This does _NOT_ read/write the callbacks.
165  // The callback given must be permanent since they will be called more than
166  // once. The given callback will be deleted at the end.
167  // If the callbacks are NULL, then the data is simply read/written using
168  // fread (and swapping)/fwrite.
169  // Returns false on error or if the callback returns false.
170  // DEPRECATED. Use [De]Serialize[Classes] instead.
171  bool write(FILE* f, TessResultCallback2<bool, FILE*, T const &>* cb) const;
172  bool read(tesseract::TFile* f,
174  // Writes a vector of simple types to the given file. Assumes that bitwise
175  // read/write of T will work. Returns false in case of error.
176  // TODO(rays) Change all callers to use TFile and remove deprecated methods.
177  bool Serialize(FILE* fp) const;
178  bool Serialize(tesseract::TFile* fp) const;
179  // Reads a vector of simple types from the given file. Assumes that bitwise
180  // read/write will work with ReverseN according to sizeof(T).
181  // Returns false in case of error.
182  // If swap is true, assumes a big/little-endian swap is needed.
183  // TFile is assumed to know about swapping.
184  bool DeSerialize(bool swap, FILE* fp);
185  bool DeSerialize(tesseract::TFile* fp);
186  // Skips the deserialization of the vector.
187  static bool SkipDeSerialize(tesseract::TFile* fp);
188  // Writes a vector of classes to the given file. Assumes the existence of
189  // bool T::Serialize(FILE* fp) const that returns false in case of error.
190  // Returns false in case of error.
191  bool SerializeClasses(FILE* fp) const;
192  bool SerializeClasses(tesseract::TFile* fp) const;
193  // Reads a vector of classes from the given file. Assumes the existence of
194  // bool T::Deserialize(bool swap, FILE* fp) that returns false in case of
195  // error. Also needs T::T() and T::T(constT&), as init_to_size is used in
196  // this function. Returns false in case of error.
197  // If swap is true, assumes a big/little-endian swap is needed.
198  bool DeSerializeClasses(bool swap, FILE* fp);
200  // Calls SkipDeSerialize on the elements of the vector.
201  static bool SkipDeSerializeClasses(tesseract::TFile* fp);
202 
203  // Allocates a new array of double the current_size, copies over the
204  // information from data to the new location, deletes data and returns
205  // the pointed to the new larger array.
206  // This function uses memcpy to copy the data, instead of invoking
207  // operator=() for each element like double_the_size() does.
208  static T *double_the_size_memcpy(int current_size, T *data) {
209  T *data_new = new T[current_size * 2];
210  memcpy(data_new, data, sizeof(T) * current_size);
211  delete[] data;
212  return data_new;
213  }
214 
215  // Reverses the elements of the vector.
216  void reverse() {
217  for (int i = 0; i < size_used_ / 2; ++i)
218  Swap(&data_[i], &data_[size_used_ - 1 - i]);
219  }
220 
221  // Sorts the members of this vector using the less than comparator (cmp_lt),
222  // which compares the values. Useful for GenericVectors to primitive types.
223  // Will not work so great for pointers (unless you just want to sort some
224  // pointers). You need to provide a specialization to sort_cmp to use
225  // your type.
226  void sort();
227 
228  // Sort the array into the order defined by the qsort function comparator.
229  // The comparator function is as defined by qsort, ie. it receives pointers
230  // to two Ts and returns negative if the first element is to appear earlier
231  // in the result and positive if it is to appear later, with 0 for equal.
232  void sort(int (*comparator)(const void*, const void*)) {
233  qsort(data_, size_used_, sizeof(*data_), comparator);
234  }
235 
236  // Searches the array (assuming sorted in ascending order, using sort()) for
237  // an element equal to target and returns true if it is present.
238  // Use binary_search to get the index of target, or its nearest candidate.
239  bool bool_binary_search(const T& target) const {
240  int index = binary_search(target);
241  if (index >= size_used_)
242  return false;
243  return data_[index] == target;
244  }
245  // Searches the array (assuming sorted in ascending order, using sort()) for
246  // an element equal to target and returns the index of the best candidate.
247  // The return value is conceptually the largest index i such that
248  // data_[i] <= target or 0 if target < the whole vector.
249  // NOTE that this function uses operator> so really the return value is
250  // the largest index i such that data_[i] > target is false.
251  int binary_search(const T& target) const {
252  int bottom = 0;
253  int top = size_used_;
254  while (top - bottom > 1) {
255  int middle = (bottom + top) / 2;
256  if (data_[middle] > target)
257  top = middle;
258  else
259  bottom = middle;
260  }
261  return bottom;
262  }
263 
264  // Compact the vector by deleting elements using operator!= on basic types.
265  // The vector must be sorted.
266  void compact_sorted() {
267  if (size_used_ == 0)
268  return;
269 
270  // First element is in no matter what, hence the i = 1.
271  int last_write = 0;
272  for (int i = 1; i < size_used_; ++i) {
273  // Finds next unique item and writes it.
274  if (data_[last_write] != data_[i])
275  data_[++last_write] = data_[i];
276  }
277  // last_write is the index of a valid data cell, so add 1.
278  size_used_ = last_write + 1;
279  }
280 
281  // Compact the vector by deleting elements for which delete_cb returns
282  // true. delete_cb is a permanent callback and will be deleted.
284  int new_size = 0;
285  int old_index = 0;
286  // Until the callback returns true, the elements stay the same.
287  while (old_index < size_used_ && !delete_cb->Run(old_index++))
288  ++new_size;
289  // Now just copy anything else that gets false from delete_cb.
290  for (; old_index < size_used_; ++old_index) {
291  if (!delete_cb->Run(old_index)) {
292  data_[new_size++] = data_[old_index];
293  }
294  }
295  size_used_ = new_size;
296  delete delete_cb;
297  }
298 
299  T dot_product(const GenericVector<T>& other) const {
300  T result = static_cast<T>(0);
301  for (int i = MIN(size_used_, other.size_used_) - 1; i >= 0; --i)
302  result += data_[i] * other.data_[i];
303  return result;
304  }
305 
306  // Returns the index of what would be the target_index_th item in the array
307  // if the members were sorted, without actually sorting. Members are
308  // shuffled around, but it takes O(n) time.
309  // NOTE: uses operator< and operator== on the members.
310  int choose_nth_item(int target_index) {
311  // Make sure target_index is legal.
312  if (target_index < 0)
313  target_index = 0; // ensure legal
314  else if (target_index >= size_used_)
315  target_index = size_used_ - 1;
316  unsigned int seed = 1;
317  return choose_nth_item(target_index, 0, size_used_, &seed);
318  }
319 
320  // Swaps the elements with the given indices.
321  void swap(int index1, int index2) {
322  if (index1 != index2) {
323  T tmp = data_[index1];
324  data_[index1] = data_[index2];
325  data_[index2] = tmp;
326  }
327  }
328  // Returns true if all elements of *this are within the given range.
329  // Only uses operator<
330  bool WithinBounds(const T& rangemin, const T& rangemax) const {
331  for (int i = 0; i < size_used_; ++i) {
332  if (data_[i] < rangemin || rangemax < data_[i])
333  return false;
334  }
335  return true;
336  }
337 
338  protected:
339  // Internal recursive version of choose_nth_item.
340  int choose_nth_item(int target_index, int start, int end, unsigned int* seed);
341 
342  // Init the object, allocating size memory.
343  void init(int size);
344 
345  // We are assuming that the object generally placed in thie
346  // vector are small enough that for efficiency it makes sense
347  // to start with a larger initial size.
348  static const int kDefaultVectorSize = 4;
351  T* data_;
353  // Mutable because Run method is not const
355 };
356 
357 namespace tesseract {
358 
359 // Function to read a GenericVector<char> from a whole file.
360 // Returns false on failure.
361 typedef bool (*FileReader)(const STRING& filename, GenericVector<char>* data);
362 // Function to write a GenericVector<char> to a whole file.
363 // Returns false on failure.
364 typedef bool (*FileWriter)(const GenericVector<char>& data,
365  const STRING& filename);
366 // The default FileReader loads the whole file into the vector of char,
367 // returning false on error.
368 inline bool LoadDataFromFile(const char* filename, GenericVector<char>* data) {
369  bool result = false;
370  FILE* fp = fopen(filename, "rb");
371  if (fp != NULL) {
372  fseek(fp, 0, SEEK_END);
373  long size = ftell(fp);
374  fseek(fp, 0, SEEK_SET);
375  // Trying to open a directory on Linux sets size to LONG_MAX. Catch it here.
376  if (size > 0 && size < LONG_MAX) {
377  data->resize_no_init(size);
378  result = fread(&(*data)[0], 1, size, fp) == size;
379  }
380  fclose(fp);
381  }
382  return result;
383 }
384 
385 inline bool LoadDataFromFile(const STRING& filename,
386  GenericVector<char>* data) {
387  return LoadDataFromFile(filename.string(), data);
388 }
389 
390 // The default FileWriter writes the vector of char to the filename file,
391 // returning false on error.
392 inline bool SaveDataToFile(const GenericVector<char>& data,
393  const STRING& filename) {
394  FILE* fp = fopen(filename.string(), "wb");
395  if (fp == NULL) return false;
396  bool result =
397  static_cast<int>(fwrite(&data[0], 1, data.size(), fp)) == data.size();
398  fclose(fp);
399  return result;
400 }
401 // Reads a file as a vector of STRING.
402 inline bool LoadFileLinesToStrings(const STRING& filename,
403  GenericVector<STRING>* lines) {
404  GenericVector<char> data;
405  if (!LoadDataFromFile(filename.string(), &data)) {
406  return false;
407  }
408  STRING lines_str(&data[0], data.size());
409  lines_str.split('\n', lines);
410  return true;
411 }
412 
413 template <typename T>
414 bool cmp_eq(T const & t1, T const & t2) {
415  return t1 == t2;
416 }
417 
418 // Used by sort()
419 // return < 0 if t1 < t2
420 // return 0 if t1 == t2
421 // return > 0 if t1 > t2
422 template <typename T>
423 int sort_cmp(const void* t1, const void* t2) {
424  const T* a = static_cast<const T *> (t1);
425  const T* b = static_cast<const T *> (t2);
426  if (*a < *b) {
427  return -1;
428  } else if (*b < *a) {
429  return 1;
430  } else {
431  return 0;
432  }
433 }
434 
435 // Used by PointerVector::sort()
436 // return < 0 if t1 < t2
437 // return 0 if t1 == t2
438 // return > 0 if t1 > t2
439 template <typename T>
440 int sort_ptr_cmp(const void* t1, const void* t2) {
441  const T* a = *static_cast<T* const*>(t1);
442  const T* b = *static_cast<T* const*>(t2);
443  if (*a < *b) {
444  return -1;
445  } else if (*b < *a) {
446  return 1;
447  } else {
448  return 0;
449  }
450 }
451 
452 // Subclass for a vector of pointers. Use in preference to GenericVector<T*>
453 // as it provides automatic deletion and correct serialization, with the
454 // corollary that all copy operations are deep copies of the pointed-to objects.
455 template<typename T>
456 class PointerVector : public GenericVector<T*> {
457  public:
459  explicit PointerVector(int size) : GenericVector<T*>(size) { }
461  // Clear must be called here, even though it is called again by the base,
462  // as the base will call the wrong clear.
463  clear();
464  }
465  // Copy must be deep, as the pointers will be automatically deleted on
466  // destruction.
467  PointerVector(const PointerVector& other) : GenericVector<T*>(other) {
468  this->init(other.size());
469  this->operator+=(other);
470  }
472  this->reserve(this->size_used_ + other.size_used_);
473  for (int i = 0; i < other.size(); ++i) {
474  this->push_back(new T(*other.data_[i]));
475  }
476  return *this;
477  }
478 
480  if (&other != this) {
481  this->truncate(0);
482  this->operator+=(other);
483  }
484  return *this;
485  }
486 
487  // Removes an element at the given index and
488  // shifts the remaining elements to the left.
489  void remove(int index) {
490  delete GenericVector<T*>::data_[index];
492  }
493 
494  // Truncates the array to the given size by removing the end.
495  // If the current size is less, the array is not expanded.
496  void truncate(int size) {
497  for (int i = size; i < GenericVector<T*>::size_used_; ++i)
498  delete GenericVector<T*>::data_[i];
500  }
501 
502  // Compact the vector by deleting elements for which delete_cb returns
503  // true. delete_cb is a permanent callback and will be deleted.
505  int new_size = 0;
506  int old_index = 0;
507  // Until the callback returns true, the elements stay the same.
508  while (old_index < GenericVector<T*>::size_used_ &&
509  !delete_cb->Run(GenericVector<T*>::data_[old_index++]))
510  ++new_size;
511  // Now just copy anything else that gets false from delete_cb.
512  for (; old_index < GenericVector<T*>::size_used_; ++old_index) {
513  if (!delete_cb->Run(GenericVector<T*>::data_[old_index])) {
514  GenericVector<T*>::data_[new_size++] =
515  GenericVector<T*>::data_[old_index];
516  } else {
517  delete GenericVector<T*>::data_[old_index];
518  }
519  }
521  delete delete_cb;
522  }
523 
524  // Clear the array, calling the clear callback function if any.
525  // All the owned callbacks are also deleted.
526  // If you don't want the callbacks to be deleted, before calling clear, set
527  // the callback to NULL.
528  void clear() {
531  }
532 
533  // Writes a vector of (pointers to) classes to the given file. Assumes the
534  // existence of bool T::Serialize(FILE*) const that returns false in case of
535  // error. There is no Serialize for simple types, as you would have a
536  // normal GenericVector of those.
537  // Returns false in case of error.
538  bool Serialize(FILE* fp) const {
540  if (fwrite(&used, sizeof(used), 1, fp) != 1) return false;
541  for (int i = 0; i < used; ++i) {
542  inT8 non_null = GenericVector<T*>::data_[i] != NULL;
543  if (fwrite(&non_null, sizeof(non_null), 1, fp) != 1) return false;
544  if (non_null && !GenericVector<T*>::data_[i]->Serialize(fp)) return false;
545  }
546  return true;
547  }
548  bool Serialize(TFile* fp) const {
550  if (fp->FWrite(&used, sizeof(used), 1) != 1) return false;
551  for (int i = 0; i < used; ++i) {
552  inT8 non_null = GenericVector<T*>::data_[i] != NULL;
553  if (fp->FWrite(&non_null, sizeof(non_null), 1) != 1) return false;
554  if (non_null && !GenericVector<T*>::data_[i]->Serialize(fp)) return false;
555  }
556  return true;
557  }
558  // Reads a vector of (pointers to) classes to the given file. Assumes the
559  // existence of bool T::DeSerialize(bool, Tfile*) const that returns false in
560  // case of error. There is no Serialize for simple types, as you would have a
561  // normal GenericVector of those.
562  // If swap is true, assumes a big/little-endian swap is needed.
563  // Also needs T::T(), as new T is used in this function.
564  // Returns false in case of error.
565  bool DeSerialize(bool swap, FILE* fp) {
566  inT32 reserved;
567  if (fread(&reserved, sizeof(reserved), 1, fp) != 1) return false;
568  if (swap) Reverse32(&reserved);
569  GenericVector<T*>::reserve(reserved);
570  truncate(0);
571  for (int i = 0; i < reserved; ++i) {
572  inT8 non_null;
573  if (fread(&non_null, sizeof(non_null), 1, fp) != 1) return false;
574  T* item = NULL;
575  if (non_null) {
576  item = new T;
577  if (!item->DeSerialize(swap, fp)) {
578  delete item;
579  return false;
580  }
581  this->push_back(item);
582  } else {
583  // Null elements should keep their place in the vector.
584  this->push_back(NULL);
585  }
586  }
587  return true;
588  }
589  bool DeSerialize(TFile* fp) {
590  inT32 reserved;
591  if (!DeSerializeSize(fp, &reserved)) return false;
592  GenericVector<T*>::reserve(reserved);
593  truncate(0);
594  for (int i = 0; i < reserved; ++i) {
595  if (!DeSerializeElement(fp)) return false;
596  }
597  return true;
598  }
599  // Enables deserialization of a selection of elements. Note that in order to
600  // retain the integrity of the stream, the caller must call some combination
601  // of DeSerializeElement and DeSerializeSkip of the exact number returned in
602  // *size, assuming a true return.
603  static bool DeSerializeSize(TFile* fp, inT32* size) {
604  return fp->FReadEndian(size, sizeof(*size), 1) == 1;
605  }
606  // Reads and appends to the vector the next element of the serialization.
608  inT8 non_null;
609  if (fp->FRead(&non_null, sizeof(non_null), 1) != 1) return false;
610  T* item = NULL;
611  if (non_null) {
612  item = new T;
613  if (!item->DeSerialize(fp)) {
614  delete item;
615  return false;
616  }
617  this->push_back(item);
618  } else {
619  // Null elements should keep their place in the vector.
620  this->push_back(NULL);
621  }
622  return true;
623  }
624  // Skips the next element of the serialization.
625  static bool DeSerializeSkip(TFile* fp) {
626  inT8 non_null;
627  if (fp->FRead(&non_null, sizeof(non_null), 1) != 1) return false;
628  if (non_null) {
629  if (!T::SkipDeSerialize(fp)) return false;
630  }
631  return true;
632  }
633 
634  // Sorts the items pointed to by the members of this vector using
635  // t::operator<().
636  void sort() { this->GenericVector<T*>::sort(&sort_ptr_cmp<T>); }
637 };
638 
639 } // namespace tesseract
640 
641 // A useful vector that uses operator== to do comparisons.
642 template <typename T>
643 class GenericVectorEqEq : public GenericVector<T> {
644  public:
647  NewPermanentTessCallback(tesseract::cmp_eq<T>));
648  }
649  GenericVectorEqEq(int size) : GenericVector<T>(size) {
651  NewPermanentTessCallback(tesseract::cmp_eq<T>));
652  }
653 };
654 
655 template <typename T>
656 void GenericVector<T>::init(int size) {
657  size_used_ = 0;
658  size_reserved_ = 0;
659  data_ = 0;
660  clear_cb_ = 0;
661  compare_cb_ = 0;
662  reserve(size);
663 }
664 
665 template <typename T>
667  clear();
668 }
669 
670 // Reserve some memory. If the internal array contains elements, they are
671 // copied.
672 template <typename T>
674  if (size_reserved_ >= size || size <= 0)
675  return;
676  if (size < kDefaultVectorSize) size = kDefaultVectorSize;
677  T* new_array = new T[size];
678  for (int i = 0; i < size_used_; ++i)
679  new_array[i] = data_[i];
680  delete[] data_;
681  data_ = new_array;
683 }
684 
685 template <typename T>
687  if (size_reserved_ == 0) {
689  }
690  else {
691  reserve(2 * size_reserved_);
692  }
693 }
694 
695 // Resizes to size and sets all values to t.
696 template <typename T>
697 void GenericVector<T>::init_to_size(int size, T t) {
698  reserve(size);
699  size_used_ = size;
700  for (int i = 0; i < size; ++i)
701  data_[i] = t;
702 }
703 
704 
705 // Return the object from an index.
706 template <typename T>
707 T &GenericVector<T>::get(int index) const {
708  ASSERT_HOST(index >= 0 && index < size_used_);
709  return data_[index];
710 }
711 
712 template <typename T>
713 T &GenericVector<T>::operator[](int index) const {
714  assert(index >= 0 && index < size_used_);
715  return data_[index];
716 }
717 
718 template <typename T>
720  ASSERT_HOST(size_used_ > 0);
721  return data_[size_used_ - 1];
722 }
723 // Returns the last object and removes it.
724 template <typename T>
726  ASSERT_HOST(size_used_ > 0);
727  return data_[--size_used_];
728 }
729 
730 // Return the object from an index.
731 template <typename T>
732 void GenericVector<T>::set(T t, int index) {
733  ASSERT_HOST(index >= 0 && index < size_used_);
734  data_[index] = t;
735 }
736 
737 // Shifts the rest of the elements to the right to make
738 // space for the new elements and inserts the given element
739 // at the specified index.
740 template <typename T>
741 void GenericVector<T>::insert(T t, int index) {
742  ASSERT_HOST(index >= 0 && index <= size_used_);
743  if (size_reserved_ == size_used_)
744  double_the_size();
745  for (int i = size_used_; i > index; --i) {
746  data_[i] = data_[i-1];
747  }
748  data_[index] = t;
749  size_used_++;
750 }
751 
752 // Removes an element at the given index and
753 // shifts the remaining elements to the left.
754 template <typename T>
755 void GenericVector<T>::remove(int index) {
756  ASSERT_HOST(index >= 0 && index < size_used_);
757  for (int i = index; i < size_used_ - 1; ++i) {
758  data_[i] = data_[i+1];
759  }
760  size_used_--;
761 }
762 
763 // Return true if the index is valindex
764 template <typename T>
766  return index >= 0 && index < size_used_;
767 }
768 
769 // Return the index of the T object.
770 template <typename T>
771 int GenericVector<T>::get_index(T object) const {
772  for (int i = 0; i < size_used_; ++i) {
773  ASSERT_HOST(compare_cb_ != NULL);
774  if (compare_cb_->Run(object, data_[i]))
775  return i;
776  }
777  return -1;
778 }
779 
780 // Return true if T is in the array
781 template <typename T>
782 bool GenericVector<T>::contains(T object) const {
783  return get_index(object) != -1;
784 }
785 
786 // Add an element in the array
787 template <typename T>
789  int index = 0;
790  if (size_used_ == size_reserved_)
791  double_the_size();
792  index = size_used_++;
793  data_[index] = object;
794  return index;
795 }
796 
797 template <typename T>
799  int index = get_index(object);
800  if (index >= 0)
801  return index;
802  return push_back(object);
803 }
804 
805 // Add an element in the array (front)
806 template <typename T>
808  if (size_used_ == size_reserved_)
809  double_the_size();
810  for (int i = size_used_; i > 0; --i)
811  data_[i] = data_[i-1];
812  data_[0] = object;
813  ++size_used_;
814  return 0;
815 }
816 
817 template <typename T>
819  push_back(t);
820 }
821 
822 template <typename T>
824  this->reserve(size_used_ + other.size_used_);
825  for (int i = 0; i < other.size(); ++i) {
826  this->operator+=(other.data_[i]);
827  }
828  return *this;
829 }
830 
831 template <typename T>
833  if (&other != this) {
834  this->truncate(0);
835  this->operator+=(other);
836  }
837  return *this;
838 }
839 
840 // Add a callback to be called to delete the elements when the array took
841 // their ownership.
842 template <typename T>
844  clear_cb_ = cb;
845 }
846 
847 // Add a callback to be called to delete the elements when the array took
848 // their ownership.
849 template <typename T>
852  compare_cb_ = cb;
853 }
854 
855 // Clear the array, calling the callback function if any.
856 template <typename T>
858  if (size_reserved_ > 0) {
859  if (clear_cb_ != NULL)
860  for (int i = 0; i < size_used_; ++i)
861  clear_cb_->Run(data_[i]);
862  delete[] data_;
863  data_ = NULL;
864  size_used_ = 0;
865  size_reserved_ = 0;
866  }
867  if (clear_cb_ != NULL) {
868  delete clear_cb_;
869  clear_cb_ = NULL;
870  }
871  if (compare_cb_ != NULL) {
872  delete compare_cb_;
873  compare_cb_ = NULL;
874  }
875 }
876 
877 template <typename T>
879  for (int i = 0; i < size_used_; ++i)
880  if (data_[i]) {
881  delete data_[i];
882  }
883 }
884 
885 
886 template <typename T>
888  FILE* f, TessResultCallback2<bool, FILE*, T const &>* cb) const {
889  if (fwrite(&size_reserved_, sizeof(size_reserved_), 1, f) != 1) return false;
890  if (fwrite(&size_used_, sizeof(size_used_), 1, f) != 1) return false;
891  if (cb != NULL) {
892  for (int i = 0; i < size_used_; ++i) {
893  if (!cb->Run(f, data_[i])) {
894  delete cb;
895  return false;
896  }
897  }
898  delete cb;
899  } else {
900  if (fwrite(data_, sizeof(T), size_used_, f) != unsigned_size())
901  return false;
902  }
903  return true;
904 }
905 
906 template <typename T>
909  inT32 reserved;
910  if (f->FReadEndian(&reserved, sizeof(reserved), 1) != 1) return false;
911  reserve(reserved);
912  if (f->FReadEndian(&size_used_, sizeof(size_used_), 1) != 1) return false;
913  if (cb != NULL) {
914  for (int i = 0; i < size_used_; ++i) {
915  if (!cb->Run(f, data_ + i)) {
916  delete cb;
917  return false;
918  }
919  }
920  delete cb;
921  } else {
922  if (f->FReadEndian(data_, sizeof(T), size_used_) != size_used_)
923  return false;
924  }
925  return true;
926 }
927 
928 // Writes a vector of simple types to the given file. Assumes that bitwise
929 // read/write of T will work. Returns false in case of error.
930 template <typename T>
931 bool GenericVector<T>::Serialize(FILE* fp) const {
932  if (fwrite(&size_used_, sizeof(size_used_), 1, fp) != 1) return false;
933  if (fwrite(data_, sizeof(*data_), size_used_, fp) != unsigned_size())
934  return false;
935  return true;
936 }
937 template <typename T>
939  if (fp->FWrite(&size_used_, sizeof(size_used_), 1) != 1) return false;
940  if (fp->FWrite(data_, sizeof(*data_), size_used_) != size_used_) return false;
941  return true;
942 }
943 
944 // Reads a vector of simple types from the given file. Assumes that bitwise
945 // read/write will work with ReverseN according to sizeof(T).
946 // Returns false in case of error.
947 // If swap is true, assumes a big/little-endian swap is needed.
948 template <typename T>
949 bool GenericVector<T>::DeSerialize(bool swap, FILE* fp) {
950  inT32 reserved;
951  if (fread(&reserved, sizeof(reserved), 1, fp) != 1) return false;
952  if (swap) Reverse32(&reserved);
953  reserve(reserved);
954  size_used_ = reserved;
955  if (fread(data_, sizeof(T), size_used_, fp) != unsigned_size()) return false;
956  if (swap) {
957  for (int i = 0; i < size_used_; ++i)
958  ReverseN(&data_[i], sizeof(data_[i]));
959  }
960  return true;
961 }
962 template <typename T>
964  inT32 reserved;
965  if (fp->FReadEndian(&reserved, sizeof(reserved), 1) != 1) return false;
966  reserve(reserved);
967  size_used_ = reserved;
968  return fp->FReadEndian(data_, sizeof(T), size_used_) == size_used_;
969 }
970 template <typename T>
972  inT32 reserved;
973  if (fp->FReadEndian(&reserved, sizeof(reserved), 1) != 1) return false;
974  return fp->FRead(NULL, sizeof(T), reserved) == reserved;
975 }
976 
977 // Writes a vector of classes to the given file. Assumes the existence of
978 // bool T::Serialize(FILE* fp) const that returns false in case of error.
979 // Returns false in case of error.
980 template <typename T>
982  if (fwrite(&size_used_, sizeof(size_used_), 1, fp) != 1) return false;
983  for (int i = 0; i < size_used_; ++i) {
984  if (!data_[i].Serialize(fp)) return false;
985  }
986  return true;
987 }
988 template <typename T>
990  if (fp->FWrite(&size_used_, sizeof(size_used_), 1) != 1) return false;
991  for (int i = 0; i < size_used_; ++i) {
992  if (!data_[i].Serialize(fp)) return false;
993  }
994  return true;
995 }
996 
997 // Reads a vector of classes from the given file. Assumes the existence of
998 // bool T::Deserialize(bool swap, FILE* fp) that returns false in case of
999 // error. Also needs T::T() and T::T(constT&), as init_to_size is used in
1000 // this function. Returns false in case of error.
1001 // If swap is true, assumes a big/little-endian swap is needed.
1002 template <typename T>
1003 bool GenericVector<T>::DeSerializeClasses(bool swap, FILE* fp) {
1004  inT32 reserved;
1005  if (fread(&reserved, sizeof(reserved), 1, fp) != 1) return false;
1006  if (swap) Reverse32(&reserved);
1007  T empty;
1008  init_to_size(reserved, empty);
1009  for (int i = 0; i < reserved; ++i) {
1010  if (!data_[i].DeSerialize(swap, fp)) return false;
1011  }
1012  return true;
1013 }
1014 template <typename T>
1016  inT32 reserved;
1017  if (fp->FReadEndian(&reserved, sizeof(reserved), 1) != 1) return false;
1018  T empty;
1019  init_to_size(reserved, empty);
1020  for (int i = 0; i < reserved; ++i) {
1021  if (!data_[i].DeSerialize(fp)) return false;
1022  }
1023  return true;
1024 }
1025 template <typename T>
1027  inT32 reserved;
1028  if (fp->FReadEndian(&reserved, sizeof(reserved), 1) != 1) return false;
1029  for (int i = 0; i < reserved; ++i) {
1030  if (!T::SkipDeSerialize(fp)) return false;
1031  }
1032  return true;
1033 }
1034 
1035 // This method clear the current object, then, does a shallow copy of
1036 // its argument, and finally invalidates its argument.
1037 template <typename T>
1039  this->clear();
1040  this->data_ = from->data_;
1041  this->size_reserved_ = from->size_reserved_;
1042  this->size_used_ = from->size_used_;
1043  this->compare_cb_ = from->compare_cb_;
1044  this->clear_cb_ = from->clear_cb_;
1045  from->data_ = NULL;
1046  from->clear_cb_ = NULL;
1047  from->compare_cb_ = NULL;
1048  from->size_used_ = 0;
1049  from->size_reserved_ = 0;
1050 }
1051 
1052 template <typename T>
1054  sort(&tesseract::sort_cmp<T>);
1055 }
1056 
1057 // Internal recursive version of choose_nth_item.
1058 // The algorithm used comes from "Algorithms" by Sedgewick:
1059 // http://books.google.com/books/about/Algorithms.html?id=idUdqdDXqnAC
1060 // The principle is to choose a random pivot, and move everything less than
1061 // the pivot to its left, and everything greater than the pivot to the end
1062 // of the array, then recurse on the part that contains the desired index, or
1063 // just return the answer if it is in the equal section in the middle.
1064 // The random pivot guarantees average linear time for the same reason that
1065 // n times vector::push_back takes linear time on average.
1066 // target_index, start and and end are all indices into the full array.
1067 // Seed is a seed for rand_r for thread safety purposes. Its value is
1068 // unimportant as the random numbers do not affect the result except
1069 // between equal answers.
1070 template <typename T>
1071 int GenericVector<T>::choose_nth_item(int target_index, int start, int end,
1072  unsigned int* seed) {
1073  // Number of elements to process.
1074  int num_elements = end - start;
1075  // Trivial cases.
1076  if (num_elements <= 1)
1077  return start;
1078  if (num_elements == 2) {
1079  if (data_[start] < data_[start + 1]) {
1080  return target_index > start ? start + 1 : start;
1081  } else {
1082  return target_index > start ? start : start + 1;
1083  }
1084  }
1085  // Place the pivot at start.
1086  #ifndef rand_r // _MSC_VER, ANDROID
1087  srand(*seed);
1088  #define rand_r(seed) rand()
1089  #endif // _MSC_VER
1090  int pivot = rand_r(seed) % num_elements + start;
1091  swap(pivot, start);
1092  // The invariant condition here is that items [start, next_lesser) are less
1093  // than the pivot (which is at index next_lesser) and items
1094  // [prev_greater, end) are greater than the pivot, with items
1095  // [next_lesser, prev_greater) being equal to the pivot.
1096  int next_lesser = start;
1097  int prev_greater = end;
1098  for (int next_sample = start + 1; next_sample < prev_greater;) {
1099  if (data_[next_sample] < data_[next_lesser]) {
1100  swap(next_lesser++, next_sample++);
1101  } else if (data_[next_sample] == data_[next_lesser]) {
1102  ++next_sample;
1103  } else {
1104  swap(--prev_greater, next_sample);
1105  }
1106  }
1107  // Now the invariant is set up, we recurse on just the section that contains
1108  // the desired index.
1109  if (target_index < next_lesser)
1110  return choose_nth_item(target_index, start, next_lesser, seed);
1111  else if (target_index < prev_greater)
1112  return next_lesser; // In equal bracket.
1113  else
1114  return choose_nth_item(target_index, prev_greater, end, seed);
1115 }
1116 
1117 
1118 #endif // TESSERACT_CCUTIL_GENERICVECTOR_H_
void reserve(int size)
void set_compare_callback(TessResultCallback2< bool, T const &, T const &> *cb)
int sort_ptr_cmp(const void *t1, const void *t2)
bool empty() const
Definition: genericvector.h:91
#define MIN(x, y)
Definition: ndminx.h:28
static bool DeSerializeSize(TFile *fp, inT32 *size)
PointerVector(const PointerVector &other)
int FWrite(const void *buffer, int size, int count)
Definition: serialis.cpp:148
inT32 size_reserved_
bool cmp_eq(T const &t1, T const &t2)
bool DeSerializeElement(TFile *fp)
void compact(TessResultCallback1< bool, int > *delete_cb)
static bool SkipDeSerializeClasses(tesseract::TFile *fp)
virtual R Run(A1, A2)=0
int sort_cmp(const void *t1, const void *t2)
void remove(int index)
void Reverse32(void *ptr)
Definition: helpers.h:200
static bool SkipDeSerialize(tesseract::TFile *fp)
bool DeSerialize(bool swap, FILE *fp)
_ConstTessMemberResultCallback_0_0< false, R, T1 >::base * NewPermanentTessCallback(const T1 *obj, R(T2::*member)() const)
Definition: tesscallback.h:116
void set(T t, int index)
void resize_no_init(int size)
Definition: genericvector.h:66
T & back() const
TessCallback1< T > * clear_cb_
int size() const
Definition: genericvector.h:72
bool LoadDataFromFile(const STRING &filename, GenericVector< char > *data)
void double_the_size()
static const int kDefaultVectorSize
bool read(tesseract::TFile *f, TessResultCallback2< bool, tesseract::TFile *, T *> *cb)
GenericVector< T > & operator=(const GenericVector &other)
void delete_data_pointers()
#define rand_r(seed)
void truncate(int size)
bool(* FileWriter)(const GenericVector< char > &data, const STRING &filename)
TessResultCallback2< bool, T const &, T const & > * compare_cb_
int8_t inT8
Definition: host.h:34
int size_reserved() const
Definition: genericvector.h:82
int get_index(T object) const
bool Serialize(FILE *fp) const
int choose_nth_item(int target_index)
const char * string() const
Definition: strngs.cpp:198
int push_back(T object)
bool DeSerialize(TFile *fp)
bool WithinBounds(const T &rangemin, const T &rangemax) const
void move(GenericVector< T > *from)
PointerVector< T > & operator+=(const PointerVector &other)
virtual R Run(A1)=0
bool DeSerializeClasses(bool swap, FILE *fp)
bool Serialize(FILE *fp) const
static bool DeSerializeSkip(TFile *fp)
int length() const
Definition: genericvector.h:86
void compact(TessResultCallback1< bool, const T *> *delete_cb)
Definition: strngs.h:45
int binary_search(const T &target) const
bool Serialize(TFile *fp) const
size_t unsigned_size() const
Definition: genericvector.h:76
int32_t inT32
Definition: host.h:38
virtual void Run(A1)=0
bool bool_binary_search(const T &target) const
#define ASSERT_HOST(x)
Definition: errcode.h:84
bool SaveDataToFile(const GenericVector< char > &data, const STRING &filename)
GenericVectorEqEq(int size)
bool DeSerialize(bool swap, FILE *fp)
T dot_product(const GenericVector< T > &other) const
T contains_index(int index) const
GenericVector(int size, T init_val)
Definition: genericvector.h:43
void sort(int(*comparator)(const void *, const void *))
int FReadEndian(void *buffer, int size, int count)
Definition: serialis.cpp:97
int push_back_new(T object)
void ReverseN(void *ptr, int num_bytes)
Definition: helpers.h:184
void set_clear_callback(TessCallback1< T > *cb)
bool write(FILE *f, TessResultCallback2< bool, FILE *, T const &> *cb) const
PointerVector< T > & operator=(const PointerVector &other)
void insert(T t, int index)
void compact_sorted()
T & operator[](int index) const
void swap(int index1, int index2)
T & get(int index) const
int push_front(T object)
bool(* FileReader)(const STRING &filename, GenericVector< char > *data)
GenericVector< T > & operator+=(const GenericVector &other)
bool SerializeClasses(FILE *fp) const
void init_to_size(int size, T t)
GenericVector(const GenericVector &other)
Definition: genericvector.h:49
static T * double_the_size_memcpy(int current_size, T *data)
bool LoadFileLinesToStrings(const STRING &filename, GenericVector< STRING > *lines)
void split(const char c, GenericVector< STRING > *splited)
Definition: strngs.cpp:286
int FRead(void *buffer, int size, int count)
Definition: serialis.cpp:108
void init(int size)
bool contains(T object) const
void Swap(T *p1, T *p2)
Definition: helpers.h:97