tesseract v5.3.3.20231005
resultiterator.h
Go to the documentation of this file.
1// SPDX-License-Identifier: Apache-2.0
2// File: resultiterator.h
3// Description: Iterator for tesseract results that is capable of
4// iterating in proper reading order over Bi Directional
5// (e.g. mixed Hebrew and English) text.
6// Author: David Eger
7//
8// (C) Copyright 2011, Google Inc.
9// Licensed under the Apache License, Version 2.0 (the "License");
10// you may not use this file except in compliance with the License.
11// You may obtain a copy of the License at
12// http://www.apache.org/licenses/LICENSE-2.0
13// Unless required by applicable law or agreed to in writing, software
14// distributed under the License is distributed on an "AS IS" BASIS,
15// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16// See the License for the specific language governing permissions and
17// limitations under the License.
18
19#ifndef TESSERACT_CCMAIN_RESULT_ITERATOR_H_
20#define TESSERACT_CCMAIN_RESULT_ITERATOR_H_
21
22#include "export.h" // for TESS_API, TESS_LOCAL
23#include "ltrresultiterator.h" // for LTRResultIterator
24#include "publictypes.h" // for PageIteratorLevel
25#include "unichar.h" // for StrongScriptDirection
26
27#include <set> // for std::pair
28#include <vector> // for std::vector
29
30namespace tesseract {
31
33public:
34 static ResultIterator *StartOfParagraph(const LTRResultIterator &resit);
35
40 ~ResultIterator() override = default;
41
42 // ============= Moving around within the page ============.
47 void Begin() override;
48
61 bool Next(PageIteratorLevel level) override;
62
69 bool IsAtBeginningOf(PageIteratorLevel level) const override;
70
76 bool IsAtFinalElement(PageIteratorLevel level,
77 PageIteratorLevel element) const override;
78
79 // ============= Functions that refer to words only ============.
80 // Returns the number of blanks before the current word.
81 int BlanksBeforeWord() const;
82
83 // ============= Accessing data ==============.
84
89 virtual char *GetUTF8Text(PageIteratorLevel level) const;
90
94 virtual std::vector<std::vector<std::vector<std::pair<const char *, float>>>>
95 *GetRawLSTMTimesteps() const;
96 virtual std::vector<std::vector<std::pair<const char *, float>>>
97 *GetBestLSTMSymbolChoices() const;
98
103 bool ParagraphIsLtr() const;
104
105 // ============= Exposed only for testing =============.
106
129 static void CalculateTextlineOrder(
130 bool paragraph_is_ltr,
131 const std::vector<StrongScriptDirection> &word_dirs,
132 std::vector<int> *reading_order);
133
134 static const int kMinorRunStart;
135 static const int kMinorRunEnd;
136 static const int kComplexWord;
137
138protected:
145 explicit ResultIterator(const LTRResultIterator &resit);
146
147private:
152 bool CurrentParagraphIsLtr() const;
153
165 void CalculateTextlineOrder(bool paragraph_is_ltr,
166 const LTRResultIterator &resit,
167 std::vector<int> *indices) const;
169 void CalculateTextlineOrder(bool paragraph_is_ltr,
170 const LTRResultIterator &resit,
171 std::vector<StrongScriptDirection> *ssd,
172 std::vector<int> *indices) const;
173
178 int LTRWordIndex() const;
179
184 void CalculateBlobOrder(std::vector<int> *blob_indices) const;
185
187 void MoveToLogicalStartOfTextline();
188
193 void MoveToLogicalStartOfWord();
194
196 bool IsAtFinalSymbolOfWord() const;
197
199 bool IsAtFirstSymbolOfWord() const;
200
205 void AppendSuffixMarks(std::string *text) const;
206
208 void AppendUTF8WordText(std::string *text) const;
209
217 void IterateAndAppendUTF8TextlineText(std::string *text);
218
225 void AppendUTF8ParagraphText(std::string *text) const;
226
228 bool BidiDebug(int min_level) const;
229
230 bool current_paragraph_is_ltr_;
231
236 bool at_beginning_of_minor_run_;
237
239 bool in_minor_direction_;
240
245 bool preserve_interword_spaces_;
246};
247
248} // namespace tesseract.
249
250#endif // TESSERACT_CCMAIN_RESULT_ITERATOR_H_
static const int kMinorRunEnd
static const int kMinorRunStart
~ResultIterator() override=default
static const int kComplexWord
#define TESS_API
Definition: export.h:32