tesseract  4.00.00dev
resultiterator.h
Go to the documentation of this file.
1 // File: resultiterator.h
3 // Description: Iterator for tesseract results that is capable of
4 // iterating in proper reading order over Bi Directional
5 // (e.g. mixed Hebrew and English) text.
6 // Author: David Eger
7 // Created: Fri May 27 13:58:06 PST 2011
8 //
9 // (C) Copyright 2011, Google Inc.
10 // Licensed under the Apache License, Version 2.0 (the "License");
11 // you may not use this file except in compliance with the License.
12 // You may obtain a copy of the License at
13 // http://www.apache.org/licenses/LICENSE-2.0
14 // Unless required by applicable law or agreed to in writing, software
15 // distributed under the License is distributed on an "AS IS" BASIS,
16 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 // See the License for the specific language governing permissions and
18 // limitations under the License.
19 //
21 
22 #ifndef TESSERACT_CCMAIN_RESULT_ITERATOR_H_
23 #define TESSERACT_CCMAIN_RESULT_ITERATOR_H_
24 
25 #include "platform.h"
26 #include "ltrresultiterator.h"
27 
28 template <typename T> class GenericVector;
29 template <typename T> class GenericVectorEqEq;
30 class BLOB_CHOICE_IT;
31 class WERD_RES;
32 class STRING;
33 
34 namespace tesseract {
35 
36 class Tesseract;
37 
39  public:
40  static ResultIterator *StartOfParagraph(const LTRResultIterator &resit);
41 
46  virtual ~ResultIterator() {}
47 
48  // ============= Moving around within the page ============.
53  virtual void Begin();
54 
67  virtual bool Next(PageIteratorLevel level);
68 
75  virtual bool IsAtBeginningOf(PageIteratorLevel level) const;
76 
82  virtual bool IsAtFinalElement(PageIteratorLevel level,
83  PageIteratorLevel element) const;
84 
85  // ============= Functions that refer to words only ============.
86  // Returns the number of blanks before the current word.
87  int BlanksBeforeWord() const;
88 
89  // ============= Accessing data ==============.
90 
95  virtual char* GetUTF8Text(PageIteratorLevel level) const;
96 
101  bool ParagraphIsLtr() const;
102 
103  // ============= Exposed only for testing =============.
104 
127  static void CalculateTextlineOrder(
128  bool paragraph_is_ltr,
129  const GenericVector<StrongScriptDirection> &word_dirs,
130  GenericVectorEqEq<int> *reading_order);
131 
132  static const int kMinorRunStart;
133  static const int kMinorRunEnd;
134  static const int kComplexWord;
135 
136  protected:
143  TESS_LOCAL explicit ResultIterator(const LTRResultIterator &resit);
144 
145  private:
150  bool CurrentParagraphIsLtr() const;
151 
163  void CalculateTextlineOrder(bool paragraph_is_ltr,
164  const LTRResultIterator &resit,
165  GenericVectorEqEq<int> *indices) const;
167  void CalculateTextlineOrder(bool paragraph_is_ltr,
168  const LTRResultIterator &resit,
170  GenericVectorEqEq<int> *indices) const;
171 
176  int LTRWordIndex() const;
177 
182  void CalculateBlobOrder(GenericVector<int> *blob_indices) const;
183 
185  void MoveToLogicalStartOfTextline();
186 
191  void MoveToLogicalStartOfWord();
192 
194  bool IsAtFinalSymbolOfWord() const;
195 
197  bool IsAtFirstSymbolOfWord() const;
198 
203  void AppendSuffixMarks(STRING *text) const;
204 
206  void AppendUTF8WordText(STRING *text) const;
207 
215  void IterateAndAppendUTF8TextlineText(STRING *text);
216 
223  void AppendUTF8ParagraphText(STRING *text) const;
224 
226  bool BidiDebug(int min_level) const;
227 
228  bool current_paragraph_is_ltr_;
229 
234  bool at_beginning_of_minor_run_;
235 
237  bool in_minor_direction_;
238 
243  bool preserve_interword_spaces_;
244 };
245 
246 } // namespace tesseract.
247 
248 #endif // TESSERACT_CCMAIN_RESULT_ITERATOR_H_
#define TESS_LOCAL
Definition: platform.h:88
Definition: strngs.h:45
static const int kMinorRunEnd
#define TESS_API
Definition: platform.h:87
static const int kMinorRunStart
static const int kComplexWord