All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
genericvector.h
Go to the documentation of this file.
1 // File: genericvector.h
3 // Description: Generic vector class
4 // Author: Daria Antonova
5 // Created: Mon Jun 23 11:26:43 PDT 2008
6 //
7 // (C) Copyright 2007, Google Inc.
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
19 //
20 #ifndef TESSERACT_CCUTIL_GENERICVECTOR_H_
21 #define TESSERACT_CCUTIL_GENERICVECTOR_H_
22 
23 #include <assert.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 
27 #include "tesscallback.h"
28 #include "errcode.h"
29 #include "helpers.h"
30 #include "ndminx.h"
31 #include "serialis.h"
32 #include "strngs.h"
33 
34 // Use PointerVector<T> below in preference to GenericVector<T*>, as that
35 // provides automatic deletion of pointers, [De]Serialize that works, and
36 // sort that works.
37 template <typename T>
38 class GenericVector {
39  public:
42  }
43  GenericVector(int size, T init_val) {
44  init(size);
45  init_to_size(size, init_val);
46  }
47 
48  // Copy
49  GenericVector(const GenericVector& other) {
50  this->init(other.size());
51  this->operator+=(other);
52  }
55 
57 
58  // Reserve some memory.
59  void reserve(int size);
60  // Double the size of the internal array.
61  void double_the_size();
62 
63  // Resizes to size and sets all values to t.
64  void init_to_size(int size, T t);
65  // Resizes to size without any initialization.
66  void resize_no_init(int size) {
67  reserve(size);
68  size_used_ = size;
69  }
70 
71  // Return the size used.
72  int size() const {
73  return size_used_;
74  }
75  int size_reserved() const {
76  return size_reserved_;
77  }
78 
79  int length() const {
80  return size_used_;
81  }
82 
83  // Return true if empty.
84  bool empty() const {
85  return size_used_ == 0;
86  }
87 
88  // Return the object from an index.
89  T &get(int index) const;
90  T &back() const;
91  T &operator[](int index) const;
92  // Returns the last object and removes it.
93  T pop_back();
94 
95  // Return the index of the T object.
96  // This method NEEDS a compare_callback to be passed to
97  // set_compare_callback.
98  int get_index(T object) const;
99 
100  // Return true if T is in the array
101  bool contains(T object) const;
102 
103  // Return true if the index is valid
104  T contains_index(int index) const;
105 
106  // Push an element in the end of the array
107  int push_back(T object);
108  void operator+=(T t);
109 
110  // Push an element in the end of the array if the same
111  // element is not already contained in the array.
112  int push_back_new(T object);
113 
114  // Push an element in the front of the array
115  // Note: This function is O(n)
116  int push_front(T object);
117 
118  // Set the value at the given index
119  void set(T t, int index);
120 
121  // Insert t at the given index, push other elements to the right.
122  void insert(T t, int index);
123 
124  // Removes an element at the given index and
125  // shifts the remaining elements to the left.
126  void remove(int index);
127 
128  // Truncates the array to the given size by removing the end.
129  // If the current size is less, the array is not expanded.
130  void truncate(int size) {
131  if (size < size_used_)
132  size_used_ = size;
133  }
134 
135  // Add a callback to be called to delete the elements when the array took
136  // their ownership.
138 
139  // Add a callback to be called to compare the elements when needed (contains,
140  // get_id, ...)
142 
143  // Clear the array, calling the clear callback function if any.
144  // All the owned callbacks are also deleted.
145  // If you don't want the callbacks to be deleted, before calling clear, set
146  // the callback to NULL.
147  void clear();
148 
149  // Delete objects pointed to by data_[i]
150  void delete_data_pointers();
151 
152  // This method clears the current object, then, does a shallow copy of
153  // its argument, and finally invalidates its argument.
154  // Callbacks are moved to the current object;
155  void move(GenericVector<T>* from);
156 
157  // Read/Write the array to a file. This does _NOT_ read/write the callbacks.
158  // The callback given must be permanent since they will be called more than
159  // once. The given callback will be deleted at the end.
160  // If the callbacks are NULL, then the data is simply read/written using
161  // fread (and swapping)/fwrite.
162  // Returns false on error or if the callback returns false.
163  // DEPRECATED. Use [De]Serialize[Classes] instead.
164  bool write(FILE* f, TessResultCallback2<bool, FILE*, T const &>* cb) const;
165  bool read(FILE* f, TessResultCallback3<bool, FILE*, T*, bool>* cb, bool swap);
166  // Writes a vector of simple types to the given file. Assumes that bitwise
167  // read/write of T will work. Returns false in case of error.
168  // TODO(rays) Change all callers to use TFile and remove deprecated methods.
169  bool Serialize(FILE* fp) const;
170  bool Serialize(tesseract::TFile* fp) const;
171  // Reads a vector of simple types from the given file. Assumes that bitwise
172  // read/write will work with ReverseN according to sizeof(T).
173  // Returns false in case of error.
174  // If swap is true, assumes a big/little-endian swap is needed.
175  bool DeSerialize(bool swap, FILE* fp);
176  bool DeSerialize(bool swap, tesseract::TFile* fp);
177  // Writes a vector of classes to the given file. Assumes the existence of
178  // bool T::Serialize(FILE* fp) const that returns false in case of error.
179  // Returns false in case of error.
180  bool SerializeClasses(FILE* fp) const;
181  bool SerializeClasses(tesseract::TFile* fp) const;
182  // Reads a vector of classes from the given file. Assumes the existence of
183  // bool T::Deserialize(bool swap, FILE* fp) that returns false in case of
184  // error. Also needs T::T() and T::T(constT&), as init_to_size is used in
185  // this function. Returns false in case of error.
186  // If swap is true, assumes a big/little-endian swap is needed.
187  bool DeSerializeClasses(bool swap, FILE* fp);
188  bool DeSerializeClasses(bool swap, tesseract::TFile* fp);
189 
190  // Allocates a new array of double the current_size, copies over the
191  // information from data to the new location, deletes data and returns
192  // the pointed to the new larger array.
193  // This function uses memcpy to copy the data, instead of invoking
194  // operator=() for each element like double_the_size() does.
195  static T *double_the_size_memcpy(int current_size, T *data) {
196  T *data_new = new T[current_size * 2];
197  memcpy(data_new, data, sizeof(T) * current_size);
198  delete[] data;
199  return data_new;
200  }
201 
202  // Reverses the elements of the vector.
203  void reverse() {
204  for (int i = 0; i < size_used_ / 2; ++i)
205  Swap(&data_[i], &data_[size_used_ - 1 - i]);
206  }
207 
208  // Sorts the members of this vector using the less than comparator (cmp_lt),
209  // which compares the values. Useful for GenericVectors to primitive types.
210  // Will not work so great for pointers (unless you just want to sort some
211  // pointers). You need to provide a specialization to sort_cmp to use
212  // your type.
213  void sort();
214 
215  // Sort the array into the order defined by the qsort function comparator.
216  // The comparator function is as defined by qsort, ie. it receives pointers
217  // to two Ts and returns negative if the first element is to appear earlier
218  // in the result and positive if it is to appear later, with 0 for equal.
219  void sort(int (*comparator)(const void*, const void*)) {
220  qsort(data_, size_used_, sizeof(*data_), comparator);
221  }
222 
223  // Searches the array (assuming sorted in ascending order, using sort()) for
224  // an element equal to target and returns true if it is present.
225  // Use binary_search to get the index of target, or its nearest candidate.
226  bool bool_binary_search(const T& target) const {
227  int index = binary_search(target);
228  if (index >= size_used_)
229  return false;
230  return data_[index] == target;
231  }
232  // Searches the array (assuming sorted in ascending order, using sort()) for
233  // an element equal to target and returns the index of the best candidate.
234  // The return value is conceptually the largest index i such that
235  // data_[i] <= target or 0 if target < the whole vector.
236  // NOTE that this function uses operator> so really the return value is
237  // the largest index i such that data_[i] > target is false.
238  int binary_search(const T& target) const {
239  int bottom = 0;
240  int top = size_used_;
241  do {
242  int middle = (bottom + top) / 2;
243  if (data_[middle] > target)
244  top = middle;
245  else
246  bottom = middle;
247  }
248  while (top - bottom > 1);
249  return bottom;
250  }
251 
252  // Compact the vector by deleting elements using operator!= on basic types.
253  // The vector must be sorted.
254  void compact_sorted() {
255  if (size_used_ == 0)
256  return;
257 
258  // First element is in no matter what, hence the i = 1.
259  int last_write = 0;
260  for (int i = 1; i < size_used_; ++i) {
261  // Finds next unique item and writes it.
262  if (data_[last_write] != data_[i])
263  data_[++last_write] = data_[i];
264  }
265  // last_write is the index of a valid data cell, so add 1.
266  size_used_ = last_write + 1;
267  }
268 
269  // Compact the vector by deleting elements for which delete_cb returns
270  // true. delete_cb is a permanent callback and will be deleted.
272  int new_size = 0;
273  int old_index = 0;
274  // Until the callback returns true, the elements stay the same.
275  while (old_index < size_used_ && !delete_cb->Run(old_index++))
276  ++new_size;
277  // Now just copy anything else that gets false from delete_cb.
278  for (; old_index < size_used_; ++old_index) {
279  if (!delete_cb->Run(old_index)) {
280  data_[new_size++] = data_[old_index];
281  }
282  }
283  size_used_ = new_size;
284  delete delete_cb;
285  }
286 
287  T dot_product(const GenericVector<T>& other) const {
288  T result = static_cast<T>(0);
289  for (int i = MIN(size_used_, other.size_used_) - 1; i >= 0; --i)
290  result += data_[i] * other.data_[i];
291  return result;
292  }
293 
294  // Returns the index of what would be the target_index_th item in the array
295  // if the members were sorted, without actually sorting. Members are
296  // shuffled around, but it takes O(n) time.
297  // NOTE: uses operator< and operator== on the members.
298  int choose_nth_item(int target_index) {
299  // Make sure target_index is legal.
300  if (target_index < 0)
301  target_index = 0; // ensure legal
302  else if (target_index >= size_used_)
303  target_index = size_used_ - 1;
304  unsigned int seed = 1;
305  return choose_nth_item(target_index, 0, size_used_, &seed);
306  }
307 
308  // Swaps the elements with the given indices.
309  void swap(int index1, int index2) {
310  if (index1 != index2) {
311  T tmp = data_[index1];
312  data_[index1] = data_[index2];
313  data_[index2] = tmp;
314  }
315  }
316  // Returns true if all elements of *this are within the given range.
317  // Only uses operator<
318  bool WithinBounds(const T& rangemin, const T& rangemax) const {
319  for (int i = 0; i < size_used_; ++i) {
320  if (data_[i] < rangemin || rangemax < data_[i])
321  return false;
322  }
323  return true;
324  }
325 
326  protected:
327  // Internal recursive version of choose_nth_item.
328  int choose_nth_item(int target_index, int start, int end, unsigned int* seed);
329 
330  // Init the object, allocating size memory.
331  void init(int size);
332 
333  // We are assuming that the object generally placed in thie
334  // vector are small enough that for efficiency it makes sence
335  // to start with a larger initial size.
336  static const int kDefaultVectorSize = 4;
339  T* data_;
341  // Mutable because Run method is not const
343 };
344 
345 namespace tesseract {
346 
347 // Function to read a GenericVector<char> from a whole file.
348 // Returns false on failure.
349 typedef bool (*FileReader)(const STRING& filename, GenericVector<char>* data);
350 // Function to write a GenericVector<char> to a whole file.
351 // Returns false on failure.
352 typedef bool (*FileWriter)(const GenericVector<char>& data,
353  const STRING& filename);
354 // The default FileReader loads the whole file into the vector of char,
355 // returning false on error.
356 inline bool LoadDataFromFile(const STRING& filename,
357  GenericVector<char>* data) {
358  FILE* fp = fopen(filename.string(), "rb");
359  if (fp == NULL) return false;
360  fseek(fp, 0, SEEK_END);
361  size_t size = ftell(fp);
362  fseek(fp, 0, SEEK_SET);
363  // Pad with a 0, just in case we treat the result as a string.
364  data->init_to_size(size + 1, 0);
365  bool result = fread(&(*data)[0], 1, size, fp) == size;
366  fclose(fp);
367  return result;
368 }
369 // The default FileWriter writes the vector of char to the filename file,
370 // returning false on error.
371 inline bool SaveDataToFile(const GenericVector<char>& data,
372  const STRING& filename) {
373  FILE* fp = fopen(filename.string(), "wb");
374  if (fp == NULL) return false;
375  bool result =
376  static_cast<int>(fwrite(&data[0], 1, data.size(), fp)) == data.size();
377  fclose(fp);
378  return result;
379 }
380 
381 template <typename T>
382 bool cmp_eq(T const & t1, T const & t2) {
383  return t1 == t2;
384 }
385 
386 // Used by sort()
387 // return < 0 if t1 < t2
388 // return 0 if t1 == t2
389 // return > 0 if t1 > t2
390 template <typename T>
391 int sort_cmp(const void* t1, const void* t2) {
392  const T* a = static_cast<const T *> (t1);
393  const T* b = static_cast<const T *> (t2);
394  if (*a < *b) {
395  return -1;
396  } else if (*b < *a) {
397  return 1;
398  } else {
399  return 0;
400  }
401 }
402 
403 // Used by PointerVector::sort()
404 // return < 0 if t1 < t2
405 // return 0 if t1 == t2
406 // return > 0 if t1 > t2
407 template <typename T>
408 int sort_ptr_cmp(const void* t1, const void* t2) {
409  const T* a = *reinterpret_cast<T * const *>(t1);
410  const T* b = *reinterpret_cast<T * const *>(t2);
411  if (*a < *b) {
412  return -1;
413  } else if (*b < *a) {
414  return 1;
415  } else {
416  return 0;
417  }
418 }
419 
420 // Subclass for a vector of pointers. Use in preference to GenericVector<T*>
421 // as it provides automatic deletion and correct serialization, with the
422 // corollary that all copy operations are deep copies of the pointed-to objects.
423 template<typename T>
424 class PointerVector : public GenericVector<T*> {
425  public:
427  explicit PointerVector(int size) : GenericVector<T*>(size) { }
429  // Clear must be called here, even though it is called again by the base,
430  // as the base will call the wrong clear.
431  clear();
432  }
433  // Copy must be deep, as the pointers will be automatically deleted on
434  // destruction.
435  PointerVector(const PointerVector& other) {
436  this->init(other.size());
437  this->operator+=(other);
438  }
440  this->reserve(this->size_used_ + other.size_used_);
441  for (int i = 0; i < other.size(); ++i) {
442  this->push_back(new T(*other.data_[i]));
443  }
444  return *this;
445  }
446 
448  if (&other != this) {
449  this->truncate(0);
450  this->operator+=(other);
451  }
452  return *this;
453  }
454 
455  // Removes an element at the given index and
456  // shifts the remaining elements to the left.
457  void remove(int index) {
458  delete GenericVector<T*>::data_[index];
460  }
461 
462  // Truncates the array to the given size by removing the end.
463  // If the current size is less, the array is not expanded.
464  void truncate(int size) {
465  for (int i = size; i < GenericVector<T*>::size_used_; ++i)
466  delete GenericVector<T*>::data_[i];
468  }
469 
470  // Compact the vector by deleting elements for which delete_cb returns
471  // true. delete_cb is a permanent callback and will be deleted.
473  int new_size = 0;
474  int old_index = 0;
475  // Until the callback returns true, the elements stay the same.
476  while (old_index < GenericVector<T*>::size_used_ &&
477  !delete_cb->Run(GenericVector<T*>::data_[old_index++]))
478  ++new_size;
479  // Now just copy anything else that gets false from delete_cb.
480  for (; old_index < GenericVector<T*>::size_used_; ++old_index) {
481  if (!delete_cb->Run(GenericVector<T*>::data_[old_index])) {
482  GenericVector<T*>::data_[new_size++] =
483  GenericVector<T*>::data_[old_index];
484  } else {
485  delete GenericVector<T*>::data_[old_index];
486  }
487  }
489  delete delete_cb;
490  }
491 
492  // Clear the array, calling the clear callback function if any.
493  // All the owned callbacks are also deleted.
494  // If you don't want the callbacks to be deleted, before calling clear, set
495  // the callback to NULL.
496  void clear() {
499  }
500 
501  // Writes a vector of (pointers to) classes to the given file. Assumes the
502  // existence of bool T::Serialize(FILE*) const that returns false in case of
503  // error. There is no Serialize for simple types, as you would have a
504  // normal GenericVector of those.
505  // Returns false in case of error.
506  bool Serialize(FILE* fp) const {
508  if (fwrite(&used, sizeof(used), 1, fp) != 1) return false;
509  for (int i = 0; i < used; ++i) {
510  inT8 non_null = GenericVector<T*>::data_[i] != NULL;
511  if (fwrite(&non_null, sizeof(non_null), 1, fp) != 1) return false;
512  if (non_null && !GenericVector<T*>::data_[i]->Serialize(fp)) return false;
513  }
514  return true;
515  }
516  bool Serialize(TFile* fp) const {
518  if (fp->FWrite(&used, sizeof(used), 1) != 1) return false;
519  for (int i = 0; i < used; ++i) {
520  inT8 non_null = GenericVector<T*>::data_[i] != NULL;
521  if (fp->FWrite(&non_null, sizeof(non_null), 1) != 1) return false;
522  if (non_null && !GenericVector<T*>::data_[i]->Serialize(fp)) return false;
523  }
524  return true;
525  }
526  // Reads a vector of (pointers to) classes to the given file. Assumes the
527  // existence of bool T::DeSerialize(bool, Tfile*) const that returns false in
528  // case of error. There is no Serialize for simple types, as you would have a
529  // normal GenericVector of those.
530  // If swap is true, assumes a big/little-endian swap is needed.
531  // Also needs T::T(), as new T is used in this function.
532  // Returns false in case of error.
533  bool DeSerialize(bool swap, FILE* fp) {
534  inT32 reserved;
535  if (fread(&reserved, sizeof(reserved), 1, fp) != 1) return false;
536  if (swap) Reverse32(&reserved);
537  GenericVector<T*>::reserve(reserved);
538  truncate(0);
539  for (int i = 0; i < reserved; ++i) {
540  inT8 non_null;
541  if (fread(&non_null, sizeof(non_null), 1, fp) != 1) return false;
542  T* item = NULL;
543  if (non_null) {
544  item = new T;
545  if (!item->DeSerialize(swap, fp)) {
546  delete item;
547  return false;
548  }
549  this->push_back(item);
550  } else {
551  // Null elements should keep their place in the vector.
552  this->push_back(NULL);
553  }
554  }
555  return true;
556  }
557  bool DeSerialize(bool swap, TFile* fp) {
558  inT32 reserved;
559  if (fp->FRead(&reserved, sizeof(reserved), 1) != 1) return false;
560  if (swap) Reverse32(&reserved);
561  GenericVector<T*>::reserve(reserved);
562  truncate(0);
563  for (int i = 0; i < reserved; ++i) {
564  inT8 non_null;
565  if (fp->FRead(&non_null, sizeof(non_null), 1) != 1) return false;
566  T* item = NULL;
567  if (non_null) {
568  item = new T;
569  if (!item->DeSerialize(swap, fp)) {
570  delete item;
571  return false;
572  }
573  this->push_back(item);
574  } else {
575  // Null elements should keep their place in the vector.
576  this->push_back(NULL);
577  }
578  }
579  return true;
580  }
581 
582  // Sorts the items pointed to by the members of this vector using
583  // t::operator<().
584  void sort() {
585  sort(&sort_ptr_cmp<T>);
586  }
587 };
588 
589 } // namespace tesseract
590 
591 // A useful vector that uses operator== to do comparisons.
592 template <typename T>
593 class GenericVectorEqEq : public GenericVector<T> {
594  public:
597  NewPermanentTessCallback(tesseract::cmp_eq<T>));
598  }
601  NewPermanentTessCallback(tesseract::cmp_eq<T>));
602  }
603 };
604 
605 template <typename T>
606 void GenericVector<T>::init(int size) {
607  size_used_ = 0;
608  size_reserved_ = 0;
609  data_ = 0;
610  clear_cb_ = 0;
611  compare_cb_ = 0;
612  reserve(size);
613 }
614 
615 template <typename T>
617  clear();
618 }
619 
620 // Reserve some memory. If the internal array contains elements, they are
621 // copied.
622 template <typename T>
624  if (size_reserved_ >= size || size <= 0)
625  return;
626  T* new_array = new T[size];
627  for (int i = 0; i < size_used_; ++i)
628  new_array[i] = data_[i];
629  if (data_ != NULL) delete[] data_;
630  data_ = new_array;
631  size_reserved_ = size;
632 }
633 
634 template <typename T>
636  if (size_reserved_ == 0) {
637  reserve(kDefaultVectorSize);
638  }
639  else {
640  reserve(2 * size_reserved_);
641  }
642 }
643 
644 // Resizes to size and sets all values to t.
645 template <typename T>
646 void GenericVector<T>::init_to_size(int size, T t) {
647  reserve(size);
648  size_used_ = size;
649  for (int i = 0; i < size; ++i)
650  data_[i] = t;
651 }
652 
653 
654 // Return the object from an index.
655 template <typename T>
656 T &GenericVector<T>::get(int index) const {
657  ASSERT_HOST(index >= 0 && index < size_used_);
658  return data_[index];
659 }
660 
661 template <typename T>
662 T &GenericVector<T>::operator[](int index) const {
663  assert(index >= 0 && index < size_used_);
664  return data_[index];
665 }
666 
667 template <typename T>
669  ASSERT_HOST(size_used_ > 0);
670  return data_[size_used_ - 1];
671 }
672 // Returns the last object and removes it.
673 template <typename T>
675  ASSERT_HOST(size_used_ > 0);
676  return data_[--size_used_];
677 }
678 
679 // Return the object from an index.
680 template <typename T>
681 void GenericVector<T>::set(T t, int index) {
682  ASSERT_HOST(index >= 0 && index < size_used_);
683  data_[index] = t;
684 }
685 
686 // Shifts the rest of the elements to the right to make
687 // space for the new elements and inserts the given element
688 // at the specified index.
689 template <typename T>
690 void GenericVector<T>::insert(T t, int index) {
691  ASSERT_HOST(index >= 0 && index <= size_used_);
692  if (size_reserved_ == size_used_)
693  double_the_size();
694  for (int i = size_used_; i > index; --i) {
695  data_[i] = data_[i-1];
696  }
697  data_[index] = t;
698  size_used_++;
699 }
700 
701 // Removes an element at the given index and
702 // shifts the remaining elements to the left.
703 template <typename T>
704 void GenericVector<T>::remove(int index) {
705  ASSERT_HOST(index >= 0 && index < size_used_);
706  for (int i = index; i < size_used_ - 1; ++i) {
707  data_[i] = data_[i+1];
708  }
709  size_used_--;
710 }
711 
712 // Return true if the index is valindex
713 template <typename T>
715  return index >= 0 && index < size_used_;
716 }
717 
718 // Return the index of the T object.
719 template <typename T>
720 int GenericVector<T>::get_index(T object) const {
721  for (int i = 0; i < size_used_; ++i) {
722  ASSERT_HOST(compare_cb_ != NULL);
723  if (compare_cb_->Run(object, data_[i]))
724  return i;
725  }
726  return -1;
727 }
728 
729 // Return true if T is in the array
730 template <typename T>
731 bool GenericVector<T>::contains(T object) const {
732  return get_index(object) != -1;
733 }
734 
735 // Add an element in the array
736 template <typename T>
738  int index = 0;
739  if (size_used_ == size_reserved_)
740  double_the_size();
741  index = size_used_++;
742  data_[index] = object;
743  return index;
744 }
745 
746 template <typename T>
748  int index = get_index(object);
749  if (index >= 0)
750  return index;
751  return push_back(object);
752 }
753 
754 // Add an element in the array (front)
755 template <typename T>
757  if (size_used_ == size_reserved_)
758  double_the_size();
759  for (int i = size_used_; i > 0; --i)
760  data_[i] = data_[i-1];
761  data_[0] = object;
762  ++size_used_;
763  return 0;
764 }
765 
766 template <typename T>
768  push_back(t);
769 }
770 
771 template <typename T>
773  this->reserve(size_used_ + other.size_used_);
774  for (int i = 0; i < other.size(); ++i) {
775  this->operator+=(other.data_[i]);
776  }
777  return *this;
778 }
779 
780 template <typename T>
782  if (&other != this) {
783  this->truncate(0);
784  this->operator+=(other);
785  }
786  return *this;
787 }
788 
789 // Add a callback to be called to delete the elements when the array took
790 // their ownership.
791 template <typename T>
793  clear_cb_ = cb;
794 }
795 
796 // Add a callback to be called to delete the elements when the array took
797 // their ownership.
798 template <typename T>
801  compare_cb_ = cb;
802 }
803 
804 // Clear the array, calling the callback function if any.
805 template <typename T>
807  if (size_reserved_ > 0) {
808  if (clear_cb_ != NULL)
809  for (int i = 0; i < size_used_; ++i)
810  clear_cb_->Run(data_[i]);
811  delete[] data_;
812  data_ = NULL;
813  size_used_ = 0;
814  size_reserved_ = 0;
815  }
816  if (clear_cb_ != NULL) {
817  delete clear_cb_;
818  clear_cb_ = NULL;
819  }
820  if (compare_cb_ != NULL) {
821  delete compare_cb_;
822  compare_cb_ = NULL;
823  }
824 }
825 
826 template <typename T>
828  for (int i = 0; i < size_used_; ++i)
829  if (data_[i]) {
830  delete data_[i];
831  }
832 }
833 
834 
835 template <typename T>
837  FILE* f, TessResultCallback2<bool, FILE*, T const &>* cb) const {
838  if (fwrite(&size_reserved_, sizeof(size_reserved_), 1, f) != 1) return false;
839  if (fwrite(&size_used_, sizeof(size_used_), 1, f) != 1) return false;
840  if (cb != NULL) {
841  for (int i = 0; i < size_used_; ++i) {
842  if (!cb->Run(f, data_[i])) {
843  delete cb;
844  return false;
845  }
846  }
847  delete cb;
848  } else {
849  if (fwrite(data_, sizeof(T), size_used_, f) != size_used_) return false;
850  }
851  return true;
852 }
853 
854 template <typename T>
857  bool swap) {
858  inT32 reserved;
859  if (fread(&reserved, sizeof(reserved), 1, f) != 1) return false;
860  if (swap) Reverse32(&reserved);
861  reserve(reserved);
862  if (fread(&size_used_, sizeof(size_used_), 1, f) != 1) return false;
863  if (swap) Reverse32(&size_used_);
864  if (cb != NULL) {
865  for (int i = 0; i < size_used_; ++i) {
866  if (!cb->Run(f, data_ + i, swap)) {
867  delete cb;
868  return false;
869  }
870  }
871  delete cb;
872  } else {
873  if (fread(data_, sizeof(T), size_used_, f) != size_used_) return false;
874  if (swap) {
875  for (int i = 0; i < size_used_; ++i)
876  ReverseN(&data_[i], sizeof(T));
877  }
878  }
879  return true;
880 }
881 
882 // Writes a vector of simple types to the given file. Assumes that bitwise
883 // read/write of T will work. Returns false in case of error.
884 template <typename T>
885 bool GenericVector<T>::Serialize(FILE* fp) const {
886  if (fwrite(&size_used_, sizeof(size_used_), 1, fp) != 1) return false;
887  if (fwrite(data_, sizeof(*data_), size_used_, fp) != size_used_) return false;
888  return true;
889 }
890 template <typename T>
892  if (fp->FWrite(&size_used_, sizeof(size_used_), 1) != 1) return false;
893  if (fp->FWrite(data_, sizeof(*data_), size_used_) != size_used_) return false;
894  return true;
895 }
896 
897 // Reads a vector of simple types from the given file. Assumes that bitwise
898 // read/write will work with ReverseN according to sizeof(T).
899 // Returns false in case of error.
900 // If swap is true, assumes a big/little-endian swap is needed.
901 template <typename T>
902 bool GenericVector<T>::DeSerialize(bool swap, FILE* fp) {
903  inT32 reserved;
904  if (fread(&reserved, sizeof(reserved), 1, fp) != 1) return false;
905  if (swap) Reverse32(&reserved);
906  reserve(reserved);
907  size_used_ = reserved;
908  if (fread(data_, sizeof(T), size_used_, fp) != size_used_) return false;
909  if (swap) {
910  for (int i = 0; i < size_used_; ++i)
911  ReverseN(&data_[i], sizeof(data_[i]));
912  }
913  return true;
914 }
915 template <typename T>
917  inT32 reserved;
918  if (fp->FRead(&reserved, sizeof(reserved), 1) != 1) return false;
919  if (swap) Reverse32(&reserved);
920  reserve(reserved);
921  size_used_ = reserved;
922  if (fp->FRead(data_, sizeof(T), size_used_) != size_used_) return false;
923  if (swap) {
924  for (int i = 0; i < size_used_; ++i)
925  ReverseN(&data_[i], sizeof(data_[i]));
926  }
927  return true;
928 }
929 
930 // Writes a vector of classes to the given file. Assumes the existence of
931 // bool T::Serialize(FILE* fp) const that returns false in case of error.
932 // Returns false in case of error.
933 template <typename T>
935  if (fwrite(&size_used_, sizeof(size_used_), 1, fp) != 1) return false;
936  for (int i = 0; i < size_used_; ++i) {
937  if (!data_[i].Serialize(fp)) return false;
938  }
939  return true;
940 }
941 template <typename T>
943  if (fp->FWrite(&size_used_, sizeof(size_used_), 1) != 1) return false;
944  for (int i = 0; i < size_used_; ++i) {
945  if (!data_[i].Serialize(fp)) return false;
946  }
947  return true;
948 }
949 
950 // Reads a vector of classes from the given file. Assumes the existence of
951 // bool T::Deserialize(bool swap, FILE* fp) that returns false in case of
952 // error. Alse needs T::T() and T::T(constT&), as init_to_size is used in
953 // this function. Returns false in case of error.
954 // If swap is true, assumes a big/little-endian swap is needed.
955 template <typename T>
956 bool GenericVector<T>::DeSerializeClasses(bool swap, FILE* fp) {
957  uinT32 reserved;
958  if (fread(&reserved, sizeof(reserved), 1, fp) != 1) return false;
959  if (swap) Reverse32(&reserved);
960  T empty;
961  init_to_size(reserved, empty);
962  for (int i = 0; i < reserved; ++i) {
963  if (!data_[i].DeSerialize(swap, fp)) return false;
964  }
965  return true;
966 }
967 template <typename T>
969  uinT32 reserved;
970  if (fp->FRead(&reserved, sizeof(reserved), 1) != 1) return false;
971  if (swap) Reverse32(&reserved);
972  T empty;
973  init_to_size(reserved, empty);
974  for (int i = 0; i < reserved; ++i) {
975  if (!data_[i].DeSerialize(swap, fp)) return false;
976  }
977  return true;
978 }
979 
980 // This method clear the current object, then, does a shallow copy of
981 // its argument, and finally invalidates its argument.
982 template <typename T>
984  this->clear();
985  this->data_ = from->data_;
986  this->size_reserved_ = from->size_reserved_;
987  this->size_used_ = from->size_used_;
988  this->compare_cb_ = from->compare_cb_;
989  this->clear_cb_ = from->clear_cb_;
990  from->data_ = NULL;
991  from->clear_cb_ = NULL;
992  from->compare_cb_ = NULL;
993  from->size_used_ = 0;
994  from->size_reserved_ = 0;
995 }
996 
997 template <typename T>
999  sort(&tesseract::sort_cmp<T>);
1000 }
1001 
1002 // Internal recursive version of choose_nth_item.
1003 // The algorithm used comes from "Algorithms" by Sedgewick:
1004 // http://books.google.com/books/about/Algorithms.html?id=idUdqdDXqnAC
1005 // The principle is to choose a random pivot, and move everything less than
1006 // the pivot to its left, and everything greater than the pivot to the end
1007 // of the array, then recurse on the part that contains the desired index, or
1008 // just return the answer if it is in the equal section in the middle.
1009 // The random pivot guarantees average linear time for the same reason that
1010 // n times vector::push_back takes linear time on average.
1011 // target_index, start and and end are all indices into the full array.
1012 // Seed is a seed for rand_r for thread safety purposes. Its value is
1013 // unimportant as the random numbers do not affect the result except
1014 // between equal answers.
1015 template <typename T>
1016 int GenericVector<T>::choose_nth_item(int target_index, int start, int end,
1017  unsigned int* seed) {
1018  // Number of elements to process.
1019  int num_elements = end - start;
1020  // Trivial cases.
1021  if (num_elements <= 1)
1022  return start;
1023  if (num_elements == 2) {
1024  if (data_[start] < data_[start + 1]) {
1025  return target_index > start ? start + 1 : start;
1026  } else {
1027  return target_index > start ? start : start + 1;
1028  }
1029  }
1030  // Place the pivot at start.
1031  #ifndef rand_r // _MSC_VER, ANDROID
1032  srand(*seed);
1033  #define rand_r(seed) rand()
1034  #endif // _MSC_VER
1035  int pivot = rand_r(seed) % num_elements + start;
1036  swap(pivot, start);
1037  // The invariant condition here is that items [start, next_lesser) are less
1038  // than the pivot (which is at index next_lesser) and items
1039  // [prev_greater, end) are greater than the pivot, with items
1040  // [next_lesser, prev_greater) being equal to the pivot.
1041  int next_lesser = start;
1042  int prev_greater = end;
1043  for (int next_sample = start + 1; next_sample < prev_greater;) {
1044  if (data_[next_sample] < data_[next_lesser]) {
1045  swap(next_lesser++, next_sample++);
1046  } else if (data_[next_sample] == data_[next_lesser]) {
1047  ++next_sample;
1048  } else {
1049  swap(--prev_greater, next_sample);
1050  }
1051  }
1052  // Now the invariant is set up, we recurse on just the section that contains
1053  // the desired index.
1054  if (target_index < next_lesser)
1055  return choose_nth_item(target_index, start, next_lesser, seed);
1056  else if (target_index < prev_greater)
1057  return next_lesser; // In equal bracket.
1058  else
1059  return choose_nth_item(target_index, prev_greater, end, seed);
1060 }
1061 
1062 
1063 #endif // TESSERACT_CCUTIL_GENERICVECTOR_H_
bool WithinBounds(const T &rangemin, const T &rangemax) const
int sort_cmp(const void *t1, const void *t2)
GenericVector< T > & operator+=(const GenericVector &other)
virtual R Run(A1, A2)=0
T & operator[](int index) const
int size() const
Definition: genericvector.h:72
void truncate(int size)
bool DeSerializeClasses(bool swap, FILE *fp)
int length() const
Definition: genericvector.h:79
static T * double_the_size_memcpy(int current_size, T *data)
int push_back(T object)
bool(* FileWriter)(const GenericVector< char > &data, const STRING &filename)
GenericVectorEqEq(int size)
int sort_ptr_cmp(const void *t1, const void *t2)
#define MIN(x, y)
Definition: ndminx.h:28
PointerVector< T > & operator=(const PointerVector &other)
T & back() const
T dot_product(const GenericVector< T > &other) const
void Reverse32(void *ptr)
Definition: helpers.h:193
void set_compare_callback(TessResultCallback2< bool, T const &, T const & > *cb)
PointerVector< T > & operator+=(const PointerVector &other)
void resize_no_init(int size)
Definition: genericvector.h:66
bool read(FILE *f, TessResultCallback3< bool, FILE *, T *, bool > *cb, bool swap)
int size_reserved() const
Definition: genericvector.h:75
bool bool_binary_search(const T &target) const
bool Serialize(FILE *fp) const
bool(* FileReader)(const STRING &filename, GenericVector< char > *data)
bool cmp_eq(T const &t1, T const &t2)
void init(int size)
#define ASSERT_HOST(x)
Definition: errcode.h:84
void set(T t, int index)
GenericVector< T > & operator=(const GenericVector &other)
void sort(int(*comparator)(const void *, const void *))
void compact_sorted()
bool SaveDataToFile(const GenericVector< char > &data, const STRING &filename)
void compact(TessResultCallback1< bool, int > *delete_cb)
inT32 choose_nth_item(inT32 index, float *array, inT32 count)
Definition: statistc.cpp:642
inT32 size_reserved_
bool SerializeClasses(FILE *fp) const
void insert(T t, int index)
unsigned int uinT32
Definition: host.h:103
bool LoadDataFromFile(const STRING &filename, GenericVector< char > *data)
static const int kDefaultVectorSize
void delete_data_pointers()
GenericVector(int size, T init_val)
Definition: genericvector.h:43
int choose_nth_item(int target_index)
void init_to_size(int size, T t)
bool DeSerialize(bool swap, FILE *fp)
int binary_search(const T &target) const
T contains_index(int index) const
_ConstTessMemberResultCallback_0_0< false, R, T1 >::base * NewPermanentTessCallback(const T1 *obj, R(T2::*member)() const)
Definition: tesscallback.h:116
bool Serialize(TFile *fp) const
TessCallback1< T > * clear_cb_
#define rand_r(seed)
virtual R Run(A1)=0
bool contains(T object) const
int push_back_new(T object)
bool empty() const
Definition: genericvector.h:84
void swap(int index1, int index2)
void compact(TessResultCallback1< bool, const T * > *delete_cb)
void remove(int index)
void ReverseN(void *ptr, int num_bytes)
Definition: helpers.h:177
int FWrite(const void *buffer, int size, int count)
Definition: serialis.cpp:131
bool DeSerialize(bool swap, TFile *fp)
void reserve(int size)
void double_the_size()
bool Serialize(FILE *fp) const
PointerVector(const PointerVector &other)
GenericVector(const GenericVector &other)
Definition: genericvector.h:49
Definition: strngs.h:44
void move(GenericVector< T > *from)
bool write(FILE *f, TessResultCallback2< bool, FILE *, T const & > *cb) const
TessResultCallback2< bool, T const &, T const & > * compare_cb_
#define NULL
Definition: host.h:144
bool DeSerialize(bool swap, FILE *fp)
SIGNED char inT8
Definition: host.h:98
void Swap(T *p1, T *p2)
Definition: helpers.h:90
const char * string() const
Definition: strngs.cpp:193
ICOORD & operator+=(ICOORD &op1, const ICOORD &op2)
Definition: ipoints.h:86
int get_index(T object) const
T & get(int index) const
void set_clear_callback(TessCallback1< T > *cb)
int push_front(T object)
int FRead(void *buffer, int size, int count)
Definition: serialis.cpp:91
int inT32
Definition: host.h:102
virtual R Run(A1, A2, A3)=0