tesseract v5.3.3.20231005
rejctmap.cpp
Go to the documentation of this file.
1/**********************************************************************
2 * File: rejctmap.cpp (Formerly rejmap.c)
3 * Description: REJ and REJMAP class functions.
4 * Author: Phil Cheatle
5 *
6 * (C) Copyright 1994, Hewlett-Packard Ltd.
7 ** Licensed under the Apache License, Version 2.0 (the "License");
8 ** you may not use this file except in compliance with the License.
9 ** You may obtain a copy of the License at
10 ** http://www.apache.org/licenses/LICENSE-2.0
11 ** Unless required by applicable law or agreed to in writing, software
12 ** distributed under the License is distributed on an "AS IS" BASIS,
13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 ** See the License for the specific language governing permissions and
15 ** limitations under the License.
16 *
17 **********************************************************************/
18
19#include "rejctmap.h"
20
21#include <memory>
22
23#include "params.h"
24
25namespace tesseract {
26
27void REJ::full_print(FILE *fp) const {
28 fprintf(fp, "R_TESS_FAILURE: %s\n", flag(R_TESS_FAILURE) ? "T" : "F");
29 fprintf(fp, "R_SMALL_XHT: %s\n", flag(R_SMALL_XHT) ? "T" : "F");
30 fprintf(fp, "R_EDGE_CHAR: %s\n", flag(R_EDGE_CHAR) ? "T" : "F");
31 fprintf(fp, "R_1IL_CONFLICT: %s\n", flag(R_1IL_CONFLICT) ? "T" : "F");
32 fprintf(fp, "R_POSTNN_1IL: %s\n", flag(R_POSTNN_1IL) ? "T" : "F");
33 fprintf(fp, "R_REJ_CBLOB: %s\n", flag(R_REJ_CBLOB) ? "T" : "F");
34 fprintf(fp, "R_MM_REJECT: %s\n", flag(R_MM_REJECT) ? "T" : "F");
35 fprintf(fp, "R_BAD_REPETITION: %s\n", flag(R_BAD_REPETITION) ? "T" : "F");
36 fprintf(fp, "R_POOR_MATCH: %s\n", flag(R_POOR_MATCH) ? "T" : "F");
37 fprintf(fp, "R_NOT_TESS_ACCEPTED: %s\n",
38 flag(R_NOT_TESS_ACCEPTED) ? "T" : "F");
39 fprintf(fp, "R_CONTAINS_BLANKS: %s\n", flag(R_CONTAINS_BLANKS) ? "T" : "F");
40 fprintf(fp, "R_BAD_PERMUTER: %s\n", flag(R_BAD_PERMUTER) ? "T" : "F");
41 fprintf(fp, "R_HYPHEN: %s\n", flag(R_HYPHEN) ? "T" : "F");
42 fprintf(fp, "R_DUBIOUS: %s\n", flag(R_DUBIOUS) ? "T" : "F");
43 fprintf(fp, "R_NO_ALPHANUMS: %s\n", flag(R_NO_ALPHANUMS) ? "T" : "F");
44 fprintf(fp, "R_MOSTLY_REJ: %s\n", flag(R_MOSTLY_REJ) ? "T" : "F");
45 fprintf(fp, "R_XHT_FIXUP: %s\n", flag(R_XHT_FIXUP) ? "T" : "F");
46 fprintf(fp, "R_BAD_QUALITY: %s\n", flag(R_BAD_QUALITY) ? "T" : "F");
47 fprintf(fp, "R_DOC_REJ: %s\n", flag(R_DOC_REJ) ? "T" : "F");
48 fprintf(fp, "R_BLOCK_REJ: %s\n", flag(R_BLOCK_REJ) ? "T" : "F");
49 fprintf(fp, "R_ROW_REJ: %s\n", flag(R_ROW_REJ) ? "T" : "F");
50 fprintf(fp, "R_UNLV_REJ: %s\n", flag(R_UNLV_REJ) ? "T" : "F");
51 fprintf(fp, "R_HYPHEN_ACCEPT: %s\n", flag(R_HYPHEN_ACCEPT) ? "T" : "F");
52 fprintf(fp, "R_NN_ACCEPT: %s\n", flag(R_NN_ACCEPT) ? "T" : "F");
53 fprintf(fp, "R_MM_ACCEPT: %s\n", flag(R_MM_ACCEPT) ? "T" : "F");
54 fprintf(fp, "R_QUALITY_ACCEPT: %s\n", flag(R_QUALITY_ACCEPT) ? "T" : "F");
55 fprintf(fp, "R_MINIMAL_REJ_ACCEPT: %s\n",
56 flag(R_MINIMAL_REJ_ACCEPT) ? "T" : "F");
57}
58
60 initialise(source.len);
61 for (unsigned i = 0; i < len; i++) {
62 ptr[i] = source.ptr[i];
63 }
64 return *this;
65}
66
67void REJMAP::initialise(uint16_t length) {
68 ptr = std::make_unique<REJ[]>(length);
69 len = length;
70}
71
72int16_t REJMAP::accept_count() const { // How many accepted?
73 int16_t count = 0;
74 for (unsigned i = 0; i < len; i++) {
75 if (ptr[i].accepted()) {
76 count++;
77 }
78 }
79 return count;
80}
81
82bool REJMAP::recoverable_rejects() const { // Any non perm rejs?
83 for (unsigned i = 0; i < len; i++) {
84 if (ptr[i].recoverable()) {
85 return true;
86 }
87 }
88 return false;
89}
90
91bool REJMAP::quality_recoverable_rejects() const { // Any potential rejs?
92 for (unsigned i = 0; i < len; i++) {
93 if (ptr[i].accept_if_good_quality()) {
94 return true;
95 }
96 }
97 return false;
98}
99
100void REJMAP::remove_pos( // Cut out an element
101 uint16_t pos // element to remove
102) {
103 ASSERT_HOST(pos < len);
104 ASSERT_HOST(len > 0);
105
106 len--;
107 for (; pos < len; pos++) {
108 ptr[pos] = ptr[pos + 1];
109 }
110}
111
112void REJMAP::print(FILE *fp) const {
113 fputc('"', fp);
114 for (unsigned i = 0; i < len; i++) {
115 fputc( ptr[i].display_char(), fp);
116 }
117 fputc('"', fp);
118}
119
120void REJMAP::full_print(FILE *fp) const {
121 for (unsigned i = 0; i < len; i++) {
122 ptr[i].full_print(fp);
123 fprintf(fp, "\n");
124 }
125}
126
127void REJMAP::rej_word_small_xht() { // Reject whole word
128 for (unsigned i = 0; i < len; i++) {
129 ptr[i].setrej_small_xht();
130 }
131}
132
133void REJMAP::rej_word_tess_failure() { // Reject whole word
134 for (unsigned i = 0; i < len; i++) {
135 ptr[i].setrej_tess_failure();
136 }
137}
138
139void REJMAP::rej_word_not_tess_accepted() { // Reject whole word
140 for (unsigned i = 0; i < len; i++) {
141 if (ptr[i].accepted()) {
142 ptr[i].setrej_not_tess_accepted();
143 }
144 }
145}
146
147void REJMAP::rej_word_contains_blanks() { // Reject whole word
148 for (unsigned i = 0; i < len; i++) {
149 if (ptr[i].accepted()) {
150 ptr[i].setrej_contains_blanks();
151 }
152 }
153}
154
155void REJMAP::rej_word_bad_permuter() { // Reject whole word
156 for (unsigned i = 0; i < len; i++) {
157 if (ptr[i].accepted()) {
158 ptr[i].setrej_bad_permuter();
159 }
160 }
161}
162
163void REJMAP::rej_word_xht_fixup() { // Reject whole word
164 for (unsigned i = 0; i < len; i++) {
165 if (ptr[i].accepted()) {
166 ptr[i].setrej_xht_fixup();
167 }
168 }
169}
170
171void REJMAP::rej_word_no_alphanums() { // Reject whole word
172 for (unsigned i = 0; i < len; i++) {
173 if (ptr[i].accepted()) {
174 ptr[i].setrej_no_alphanums();
175 }
176 }
177}
178
179void REJMAP::rej_word_mostly_rej() { // Reject whole word
180 for (unsigned i = 0; i < len; i++) {
181 if (ptr[i].accepted()) {
182 ptr[i].setrej_mostly_rej();
183 }
184 }
185}
186
187void REJMAP::rej_word_bad_quality() { // Reject whole word
188 for (unsigned i = 0; i < len; i++) {
189 if (ptr[i].accepted()) {
190 ptr[i].setrej_bad_quality();
191 }
192 }
193}
194
195void REJMAP::rej_word_doc_rej() { // Reject whole word
196 for (unsigned i = 0; i < len; i++) {
197 if (ptr[i].accepted()) {
198 ptr[i].setrej_doc_rej();
199 }
200 }
201}
202
203void REJMAP::rej_word_block_rej() { // Reject whole word
204 for (unsigned i = 0; i < len; i++) {
205 if (ptr[i].accepted()) {
206 ptr[i].setrej_block_rej();
207 }
208 }
209}
210
211void REJMAP::rej_word_row_rej() { // Reject whole word
212 for (unsigned i = 0; i < len; i++) {
213 if (ptr[i].accepted()) {
214 ptr[i].setrej_row_rej();
215 }
216 }
217}
218
219} // namespace tesseract
#define ASSERT_HOST(x)
Definition: errcode.h:54
int * count
@ R_MINIMAL_REJ_ACCEPT
Definition: rejctmap.h:89
@ R_ROW_REJ
Definition: rejctmap.h:81
@ R_NO_ALPHANUMS
Definition: rejctmap.h:71
@ R_TESS_FAILURE
Definition: rejctmap.h:53
@ R_QUALITY_ACCEPT
Definition: rejctmap.h:88
@ R_DOC_REJ
Definition: rejctmap.h:79
@ R_MM_ACCEPT
Definition: rejctmap.h:87
@ R_MOSTLY_REJ
Definition: rejctmap.h:72
@ R_XHT_FIXUP
Definition: rejctmap.h:73
@ R_POOR_MATCH
Definition: rejctmap.h:63
@ R_SMALL_XHT
Definition: rejctmap.h:54
@ R_BAD_PERMUTER
Definition: rejctmap.h:66
@ R_BAD_REPETITION
Definition: rejctmap.h:60
@ R_BLOCK_REJ
Definition: rejctmap.h:80
@ R_HYPHEN_ACCEPT
Definition: rejctmap.h:86
@ R_HYPHEN
Definition: rejctmap.h:69
@ R_CONTAINS_BLANKS
Definition: rejctmap.h:65
@ R_POSTNN_1IL
Definition: rejctmap.h:57
@ R_REJ_CBLOB
Definition: rejctmap.h:58
@ R_NOT_TESS_ACCEPTED
Definition: rejctmap.h:64
@ R_BAD_QUALITY
Definition: rejctmap.h:76
@ R_UNLV_REJ
Definition: rejctmap.h:82
@ R_NN_ACCEPT
Definition: rejctmap.h:85
@ R_DUBIOUS
Definition: rejctmap.h:70
@ R_MM_REJECT
Definition: rejctmap.h:59
@ R_1IL_CONFLICT
Definition: rejctmap.h:56
@ R_EDGE_CHAR
Definition: rejctmap.h:55
bool flag(REJ_FLAGS rej_flag) const
Definition: rejctmap.h:117
void full_print(FILE *fp) const
Definition: rejctmap.cpp:27
void rej_word_not_tess_accepted()
Definition: rejctmap.cpp:139
void print(FILE *fp) const
Definition: rejctmap.cpp:112
void rej_word_tess_failure()
Definition: rejctmap.cpp:133
void rej_word_contains_blanks()
Definition: rejctmap.cpp:147
void rej_word_small_xht()
Definition: rejctmap.cpp:127
void rej_word_bad_quality()
Definition: rejctmap.cpp:187
void rej_word_xht_fixup()
Definition: rejctmap.cpp:163
void rej_word_row_rej()
Definition: rejctmap.cpp:211
void remove_pos(uint16_t pos)
Definition: rejctmap.cpp:100
int16_t accept_count() const
Definition: rejctmap.cpp:72
uint16_t length() const
Definition: rejctmap.h:333
void rej_word_block_rej()
Definition: rejctmap.cpp:203
bool quality_recoverable_rejects() const
Definition: rejctmap.cpp:91
void initialise(uint16_t length)
Definition: rejctmap.cpp:67
void rej_word_bad_permuter()
Definition: rejctmap.cpp:155
REJMAP & operator=(const REJMAP &source)
Definition: rejctmap.cpp:59
void rej_word_no_alphanums()
Definition: rejctmap.cpp:171
void rej_word_doc_rej()
Definition: rejctmap.cpp:195
bool recoverable_rejects() const
Definition: rejctmap.cpp:82
void full_print(FILE *fp) const
Definition: rejctmap.cpp:120
void rej_word_mostly_rej()
Definition: rejctmap.cpp:179