All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
rejctmap.h
Go to the documentation of this file.
1 /**********************************************************************
2  * File: rejctmap.h (Formerly rejmap.h)
3  * Description: REJ and REJMAP class functions.
4  * Author: Phil Cheatle
5  * Created: Thu Jun 9 13:46:38 BST 1994
6  *
7  * (C) Copyright 1994, Hewlett-Packard Ltd.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18 
19 This module may look unneccessarily verbose, but here's the philosophy...
20 
21 ALL processing of the reject map is done in this module. There are lots of
22 separate calls to set reject/accept flags. These have DELIBERATELY been kept
23 distinct so that this module can decide what to do.
24 
25 Basically, there is a flag for each sort of rejection or acceptance. This
26 provides a history of what has happened to EACH character.
27 
28 Determining whether a character is CURRENTLY rejected depends on implicit
29 understanding of the SEQUENCE of possible calls. The flags are defined and
30 grouped in the REJ_FLAGS enum. These groupings are used in determining a
31 characters CURRENT rejection status. Basically, a character is ACCEPTED if
32 
33  none of the permanent rej flags are set
34  AND ( the character has never been rejected
35  OR an accept flag is set which is LATER than the latest reject flag )
36 
37 IT IS FUNDAMENTAL THAT ANYONE HACKING THIS CODE UNDERSTANDS THE SIGNIFICANCE
38 OF THIS IMPLIED TEMPORAL ORDERING OF THE FLAGS!!!!
39 **********************************************************************/
40 
41 #ifndef REJCTMAP_H
42 #define REJCTMAP_H
43 
44 #ifdef __UNIX__
45 #include <assert.h>
46 #endif
47 #include "memry.h"
48 #include "bits16.h"
49 #include "params.h"
50 
52 {
53  /* Reject modes which are NEVER overridden */
54  R_TESS_FAILURE, // PERM Tess didnt classify
55  R_SMALL_XHT, // PERM Xht too small
56  R_EDGE_CHAR, // PERM Too close to edge of image
57  R_1IL_CONFLICT, // PERM 1Il confusion
58  R_POSTNN_1IL, // PERM 1Il unrejected by NN
59  R_REJ_CBLOB, // PERM Odd blob
60  R_MM_REJECT, // PERM Matrix match rejection (m's)
61  R_BAD_REPETITION, // TEMP Repeated char which doesn't match trend
62 
63  /* Initial reject modes (pre NN_ACCEPT) */
64  R_POOR_MATCH, // TEMP Ray's original heuristic (Not used)
65  R_NOT_TESS_ACCEPTED, // TEMP Tess didnt accept WERD
66  R_CONTAINS_BLANKS, // TEMP Tess failed on other chs in WERD
67  R_BAD_PERMUTER, // POTENTIAL Bad permuter for WERD
68 
69  /* Reject modes generated after NN_ACCEPT but before MM_ACCEPT */
70  R_HYPHEN, // TEMP Post NN dodgy hyphen or full stop
71  R_DUBIOUS, // TEMP Post NN dodgy chars
72  R_NO_ALPHANUMS, // TEMP No alphanumerics in word after NN
73  R_MOSTLY_REJ, // TEMP Most of word rejected so rej the rest
74  R_XHT_FIXUP, // TEMP Xht tests unsure
75 
76  /* Reject modes generated after MM_ACCEPT but before QUALITY_ACCEPT */
77  R_BAD_QUALITY, // TEMP Quality metrics bad for WERD
78 
79  /* Reject modes generated after QUALITY_ACCEPT but before MINIMAL_REJ accep*/
80  R_DOC_REJ, // TEMP Document rejection
81  R_BLOCK_REJ, // TEMP Block rejection
82  R_ROW_REJ, // TEMP Row rejection
83  R_UNLV_REJ, // TEMP ~ turned to - or ^ turned to space
84 
85  /* Accept modes which occur inbetween the above rejection groups */
86  R_NN_ACCEPT, //NN acceptance
87  R_HYPHEN_ACCEPT, //Hyphen acceptance
88  R_MM_ACCEPT, //Matrix match acceptance
89  R_QUALITY_ACCEPT, //Accept word in good quality doc
90  R_MINIMAL_REJ_ACCEPT //Accept EVERYTHING except tess failures
91 };
92 
93 /* REJECT MAP VALUES */
94 
95 #define MAP_ACCEPT '1'
96 #define MAP_REJECT_PERM '0'
97 #define MAP_REJECT_TEMP '2'
98 #define MAP_REJECT_POTENTIAL '3'
99 
100 class REJ
101 {
102  BITS16 flags1;
103  BITS16 flags2;
104 
105  void set_flag(REJ_FLAGS rej_flag) {
106  if (rej_flag < 16)
107  flags1.turn_on_bit (rej_flag);
108  else
109  flags2.turn_on_bit (rej_flag - 16);
110  }
111 
112  BOOL8 rej_before_nn_accept();
113  BOOL8 rej_between_nn_and_mm();
114  BOOL8 rej_between_mm_and_quality_accept();
115  BOOL8 rej_between_quality_and_minimal_rej_accept();
116  BOOL8 rej_before_mm_accept();
117  BOOL8 rej_before_quality_accept();
118 
119  public:
120  REJ() { //constructor
121  }
122 
123  REJ( //classwise copy
124  const REJ &source) {
125  flags1 = source.flags1;
126  flags2 = source.flags2;
127  }
128 
129  REJ & operator= ( //assign REJ
130  const REJ & source) { //from this
131  flags1 = source.flags1;
132  flags2 = source.flags2;
133  return *this;
134  }
135 
136  BOOL8 flag(REJ_FLAGS rej_flag) {
137  if (rej_flag < 16)
138  return flags1.bit (rej_flag);
139  else
140  return flags2.bit (rej_flag - 16);
141  }
142 
143  char display_char() {
144  if (perm_rejected ())
145  return MAP_REJECT_PERM;
146  else if (accept_if_good_quality ())
147  return MAP_REJECT_POTENTIAL;
148  else if (rejected ())
149  return MAP_REJECT_TEMP;
150  else
151  return MAP_ACCEPT;
152  }
153 
154  BOOL8 perm_rejected(); //Is char perm reject?
155 
156  BOOL8 rejected(); //Is char rejected?
157 
158  BOOL8 accepted() { //Is char accepted?
159  return !rejected ();
160  }
161 
162  //potential rej?
164 
166  return (rejected () && !perm_rejected ());
167  }
168 
169  void setrej_tess_failure(); //Tess generated blank
170  void setrej_small_xht(); //Small xht char/wd
171  void setrej_edge_char(); //Close to image edge
172  void setrej_1Il_conflict(); //Initial reject map
173  void setrej_postNN_1Il(); //1Il after NN
174  void setrej_rej_cblob(); //Insert duff blob
175  void setrej_mm_reject(); //Matrix matcher
176  //Odd repeated char
177  void setrej_bad_repetition();
178  void setrej_poor_match(); //Failed Rays heuristic
179  //TEMP reject_word
181  //TEMP reject_word
182  void setrej_contains_blanks();
183  void setrej_bad_permuter(); //POTENTIAL reject_word
184  void setrej_hyphen(); //PostNN dubious hyph or .
185  void setrej_dubious(); //PostNN dubious limit
186  void setrej_no_alphanums(); //TEMP reject_word
187  void setrej_mostly_rej(); //TEMP reject_word
188  void setrej_xht_fixup(); //xht fixup
189  void setrej_bad_quality(); //TEMP reject_word
190  void setrej_doc_rej(); //TEMP reject_word
191  void setrej_block_rej(); //TEMP reject_word
192  void setrej_row_rej(); //TEMP reject_word
193  void setrej_unlv_rej(); //TEMP reject_word
194  void setrej_nn_accept(); //NN Flipped a char
195  void setrej_hyphen_accept(); //Good aspect ratio
196  void setrej_mm_accept(); //Matrix matcher
197  //Quality flip a char
198  void setrej_quality_accept();
199  //Accept all except blank
201 
202  void full_print(FILE *fp);
203 };
204 
205 class REJMAP
206 {
207  REJ *ptr; //ptr to the chars
208  inT16 len; //Number of chars
209 
210  public:
211  REJMAP() { //constructor
212  ptr = NULL;
213  len = 0;
214  }
215 
216  REJMAP( //classwise copy
217  const REJMAP &rejmap);
218 
219  REJMAP & operator= ( //assign REJMAP
220  const REJMAP & source); //from this
221 
222  ~REJMAP () { //destructor
223  if (ptr != NULL)
224  free_struct (ptr, len * sizeof (REJ), "REJ");
225  }
226 
227  void initialise( //Redefine map
228  inT16 length);
229 
230  REJ & operator[]( //access function
231  inT16 index) const //map index
232  {
233  ASSERT_HOST (index < len);
234  return ptr[index]; //no bounds checks
235  }
236 
237  inT32 length() const { //map length
238  return len;
239  }
240 
241  inT16 accept_count(); //How many accepted?
242 
243  inT16 reject_count() { //How many rejects?
244  return len - accept_count ();
245  }
246 
247  void remove_pos( //Cut out an element
248  inT16 pos); //element to remove
249 
250  void print(FILE *fp);
251 
252  void full_print(FILE *fp);
253 
254  BOOL8 recoverable_rejects(); //Any non perm rejs?
255 
257  //Any potential rejs?
258 
259  void rej_word_small_xht(); //Reject whole word
260  //Reject whole word
261  void rej_word_tess_failure();
263  //Reject whole word
264  //Reject whole word
266  //Reject whole word
267  void rej_word_bad_permuter();
268  void rej_word_xht_fixup(); //Reject whole word
269  //Reject whole word
270  void rej_word_no_alphanums();
271  void rej_word_mostly_rej(); //Reject whole word
272  void rej_word_bad_quality(); //Reject whole word
273  void rej_word_doc_rej(); //Reject whole word
274  void rej_word_block_rej(); //Reject whole word
275  void rej_word_row_rej(); //Reject whole word
276 };
277 #endif
Definition: rejctmap.h:100
#define MAP_REJECT_POTENTIAL
Definition: rejctmap.h:98
void setrej_tess_failure()
Definition: rejctmap.cpp:96
void setrej_edge_char()
Definition: rejctmap.cpp:106
void setrej_bad_quality()
Definition: rejctmap.cpp:183
void setrej_hyphen()
Definition: rejctmap.cpp:158
void setrej_row_rej()
Definition: rejctmap.cpp:198
inT32 length() const
Definition: rejctmap.h:237
void setrej_contains_blanks()
Definition: rejctmap.cpp:147
void turn_on_bit(uinT8 bit_num)
Definition: bits16.h:37
REJ_FLAGS
Definition: rejctmap.h:51
BOOL8 bit(uinT8 bit_num) const
Definition: bits16.h:56
BOOL8 rejected()
Definition: rejctmap.cpp:73
void setrej_1Il_conflict()
Definition: rejctmap.cpp:111
void rej_word_xht_fixup()
Definition: rejctmap.cpp:461
unsigned char BOOL8
Definition: host.h:113
void setrej_doc_rej()
Definition: rejctmap.cpp:188
void setrej_rej_cblob()
Definition: rejctmap.cpp:121
#define ASSERT_HOST(x)
Definition: errcode.h:84
BOOL8 quality_recoverable_rejects()
Definition: rejctmap.cpp:354
REJ & operator=(const REJ &source)
Definition: rejctmap.h:129
BOOL8 recoverable_rejects()
Definition: rejctmap.cpp:343
Definition: bits16.h:25
void setrej_bad_repetition()
Definition: rejctmap.cpp:131
BOOL8 perm_rejected()
Definition: rejctmap.cpp:24
void full_print(FILE *fp)
Definition: rejctmap.cpp:234
void rej_word_small_xht()
Definition: rejctmap.cpp:416
void setrej_bad_permuter()
Definition: rejctmap.cpp:153
void full_print(FILE *fp)
Definition: rejctmap.cpp:406
REJ(const REJ &source)
Definition: rejctmap.h:123
REJ()
Definition: rejctmap.h:120
void setrej_unlv_rej()
Definition: rejctmap.cpp:203
void rej_word_block_rej()
Definition: rejctmap.cpp:506
void setrej_block_rej()
Definition: rejctmap.cpp:193
#define MAP_ACCEPT
Definition: rejctmap.h:95
void setrej_mm_accept()
Definition: rejctmap.cpp:218
BOOL8 flag(REJ_FLAGS rej_flag)
Definition: rejctmap.h:136
#define MAP_REJECT_TEMP
Definition: rejctmap.h:97
REJMAP & operator=(const REJMAP &source)
Definition: rejctmap.cpp:297
void rej_word_tess_failure()
Definition: rejctmap.cpp:425
void setrej_mostly_rej()
Definition: rejctmap.cpp:173
void rej_word_row_rej()
Definition: rejctmap.cpp:515
inT16 accept_count()
Definition: rejctmap.cpp:331
void remove_pos(inT16 pos)
Definition: rejctmap.cpp:365
void setrej_xht_fixup()
Definition: rejctmap.cpp:178
void rej_word_no_alphanums()
Definition: rejctmap.cpp:470
void setrej_minimal_rej_accept()
Definition: rejctmap.cpp:228
void rej_word_bad_quality()
Definition: rejctmap.cpp:488
void setrej_hyphen_accept()
Definition: rejctmap.cpp:208
void rej_word_mostly_rej()
Definition: rejctmap.cpp:479
void setrej_quality_accept()
Definition: rejctmap.cpp:223
void setrej_no_alphanums()
Definition: rejctmap.cpp:168
void setrej_not_tess_accepted()
Definition: rejctmap.cpp:141
REJ & operator[](inT16 index) const
Definition: rejctmap.h:230
BOOL8 accept_if_good_quality()
Definition: rejctmap.cpp:83
void setrej_poor_match()
Definition: rejctmap.cpp:136
void setrej_dubious()
Definition: rejctmap.cpp:163
inT16 reject_count()
Definition: rejctmap.h:243
void setrej_small_xht()
Definition: rejctmap.cpp:101
#define MAP_REJECT_PERM
Definition: rejctmap.h:96
char display_char()
Definition: rejctmap.h:143
void initialise(inT16 length)
Definition: rejctmap.cpp:318
#define NULL
Definition: host.h:144
REJMAP()
Definition: rejctmap.h:211
void print(FILE *fp)
Definition: rejctmap.cpp:394
void free_struct(void *deadstruct, inT32, const char *)
Definition: memry.cpp:43
BOOL8 accepted()
Definition: rejctmap.h:158
void rej_word_contains_blanks()
Definition: rejctmap.cpp:443
void setrej_nn_accept()
Definition: rejctmap.cpp:213
void rej_word_bad_permuter()
Definition: rejctmap.cpp:452
void rej_word_doc_rej()
Definition: rejctmap.cpp:497
void setrej_mm_reject()
Definition: rejctmap.cpp:126
void setrej_postNN_1Il()
Definition: rejctmap.cpp:116
void rej_word_not_tess_accepted()
Definition: rejctmap.cpp:434
BOOL8 recoverable()
Definition: rejctmap.h:165
short inT16
Definition: host.h:100
~REJMAP()
Definition: rejctmap.h:222
int inT32
Definition: host.h:102