tesseract v5.3.3.20231005
adaptions.cpp
Go to the documentation of this file.
1/**********************************************************************
2 * File: adaptions.cpp (Formerly adaptions.c)
3 * Description: Functions used to adapt to blobs already confidently
4 * identified
5 * Author: Chris Newton
6 *
7 * (C) Copyright 1992, Hewlett-Packard Ltd.
8 ** Licensed under the Apache License, Version 2.0 (the "License");
9 ** you may not use this file except in compliance with the License.
10 ** You may obtain a copy of the License at
11 ** http://www.apache.org/licenses/LICENSE-2.0
12 ** Unless required by applicable law or agreed to in writing, software
13 ** distributed under the License is distributed on an "AS IS" BASIS,
14 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 ** See the License for the specific language governing permissions and
16 ** limitations under the License.
17 *
18 **********************************************************************/
19
20#include <cctype>
21#include <cstring>
22#include "control.h"
23#include "reject.h"
24#include "stopper.h"
25#include "tesseractclass.h"
26#include "tessvars.h"
27
28// Include automatically generated configuration file if running autoconf.
29#ifdef HAVE_CONFIG_H
30# include "config_auto.h"
31#endif
32
33namespace tesseract {
34bool Tesseract::word_adaptable( // should we adapt?
35 WERD_RES *word, uint16_t mode) {
36 if (tessedit_adaption_debug) {
37 tprintf("Running word_adaptable() for %s rating %.4f certainty %.4f\n",
38 word->best_choice->unichar_string().c_str(), word->best_choice->rating(),
39 word->best_choice->certainty());
40 }
41
42 bool status = false;
43 std::bitset<16> flags(mode);
44
45 enum MODES {
46 ADAPTABLE_WERD,
47 ACCEPTABLE_WERD,
48 CHECK_DAWGS,
49 CHECK_SPACES,
50 CHECK_ONE_ELL_CONFLICT,
51 CHECK_AMBIG_WERD
52 };
53
54 /*
550: NO adaption
56*/
57 if (mode == 0) {
58 if (tessedit_adaption_debug) {
59 tprintf("adaption disabled\n");
60 }
61 return false;
62 }
63
64 if (flags[ADAPTABLE_WERD]) {
65 status |= word->tess_would_adapt; // result of Classify::AdaptableWord()
66 if (tessedit_adaption_debug && !status) {
67 tprintf("tess_would_adapt bit is false\n");
68 }
69 }
70
71 if (flags[ACCEPTABLE_WERD]) {
72 status |= word->tess_accepted;
73 if (tessedit_adaption_debug && !status) {
74 tprintf("tess_accepted bit is false\n");
75 }
76 }
77
78 if (!status) { // If not set then
79 return false; // ignore other checks
80 }
81
82 if (flags[CHECK_DAWGS] && (word->best_choice->permuter() != SYSTEM_DAWG_PERM) &&
83 (word->best_choice->permuter() != FREQ_DAWG_PERM) &&
84 (word->best_choice->permuter() != USER_DAWG_PERM) &&
85 (word->best_choice->permuter() != NUMBER_PERM)) {
86 if (tessedit_adaption_debug) {
87 tprintf("word not in dawgs\n");
88 }
89 return false;
90 }
91
92 if (flags[CHECK_ONE_ELL_CONFLICT] && one_ell_conflict(word, false)) {
93 if (tessedit_adaption_debug) {
94 tprintf("word has ell conflict\n");
95 }
96 return false;
97 }
98
99 if (flags[CHECK_SPACES] &&
100 (strchr(word->best_choice->unichar_string().c_str(), ' ') != nullptr)) {
101 if (tessedit_adaption_debug) {
102 tprintf("word contains spaces\n");
103 }
104 return false;
105 }
106
107 if (flags[CHECK_AMBIG_WERD] && word->best_choice->dangerous_ambig_found()) {
108 if (tessedit_adaption_debug) {
109 tprintf("word is ambiguous\n");
110 }
111 return false;
112 }
113
114 if (tessedit_adaption_debug) {
115 tprintf("returning status %d\n", status);
116 }
117 return status;
118}
119
120} // namespace tesseract
void tprintf(const char *format,...)
Definition: tprintf.cpp:41
@ SYSTEM_DAWG_PERM
Definition: ratngs.h:244
@ NUMBER_PERM
Definition: ratngs.h:242
@ USER_DAWG_PERM
Definition: ratngs.h:246
@ FREQ_DAWG_PERM
Definition: ratngs.h:247
bool one_ell_conflict(WERD_RES *word_res, bool update_map)
Definition: reject.cpp:287
bool word_adaptable(WERD_RES *word, uint16_t mode)
Definition: adaptions.cpp:34
WERD_CHOICE * best_choice
Definition: pageres.h:239
float certainty() const
Definition: ratngs.h:315
uint8_t permuter() const
Definition: ratngs.h:331
bool dangerous_ambig_found() const
Definition: ratngs.h:348
std::string & unichar_string()
Definition: ratngs.h:519
float rating() const
Definition: ratngs.h:312