45 bool word_ending,
WERD_CHOICE *word,
float certainties[],
46 float *limit,
WERD_CHOICE *best_choice,
int *attempts_left,
47 void *void_more_args) {
48 auto *more_args =
static_cast<DawgArgs *
>(void_more_args);
49 word_ending = (
static_cast<unsigned>(char_choice_index) == char_choices.size() - 1);
50 int word_index = word->
length() - 1;
51 if (best_choice->
rating() < *limit) {
59 bool checked_unigrams =
false;
61 if (dawg_debug_level) {
66 std::vector<UNICHAR_ID> encoding;
70 bool unigrams_ok =
true;
74 DawgArgs unigram_dawg_args(&unigram_active_dawgs, &unigram_updated_dawgs, more_args->permuter);
76 for (
size_t i = 0; unigrams_ok &&
i < encoding.size(); ++
i) {
82 word->
unichar_id(word_index + num_unigrams - 1),
83 word_ending &&
i == encoding.size() - 1);
85 if (dawg_debug_level) {
87 unigrams_ok ?
"OK" :
"not OK");
91 while (num_unigrams-- > 0) {
96 checked_unigrams =
true;
97 more_args->permuter = unigram_dawg_args.
permuter;
98 *(more_args->updated_dawgs) = *(unigram_dawg_args.
updated_dawgs);
108 if (dawg_debug_level) {
111 if (strcmp(output_ambig_words_file.c_str(),
"") != 0) {
112 if (output_ambig_words_file_ ==
nullptr) {
113 output_ambig_words_file_ = fopen(output_ambig_words_file.c_str(),
"wb+");
114 if (output_ambig_words_file_ ==
nullptr) {
115 tprintf(
"Failed to open output_ambig_words_file %s\n", output_ambig_words_file.c_str());
118 std::string word_str;
121 fprintf(output_ambig_words_file_,
"%s", word_str.c_str());
123 std::string word_str;
126 fprintf(output_ambig_words_file_,
"%s", word_str.c_str());
134 ++(more_args->updated_dawgs);
136 ++(more_args->active_dawgs);
137 permute_choices(debug, char_choices, char_choice_index + 1, prev_char_frag_info, word,
138 certainties, limit, best_choice, attempts_left, more_args);
140 --(more_args->updated_dawgs);
141 --(more_args->active_dawgs);
144 if (dawg_debug_level) {
145 tprintf(
"last unichar not OK at index %d in %s\n", word_index, word->
debug_string().c_str());
160 float rating_limit) {
162 best_choice->make_bad();
163 best_choice->set_rating(rating_limit);
174 int attempts_left = max_permuter_attempts;
175 permute_choices((dawg_debug_level) ?
"permute_dawg_debug" :
nullptr, char_choices, 0,
nullptr,
176 &word, certainties, &rating_limit, best_choice, &attempts_left, &dawg_args);
177 delete[] active_dawgs;
189 WERD_CHOICE *word,
float certainties[],
float *limit,
190 WERD_CHOICE *best_choice,
int *attempts_left,
void *more_args) {
193 "%s permute_choices: char_choice_index=%d"
194 " limit=%g rating=%g, certainty=%g word=%s\n",
198 if (
static_cast<unsigned>(char_choice_index) < char_choices.size()) {
199 BLOB_CHOICE_IT blob_choice_it;
200 blob_choice_it.set_to_list(char_choices.at(char_choice_index));
201 for (blob_choice_it.mark_cycle_pt(); !blob_choice_it.cycled_list(); blob_choice_it.forward()) {
203 append_choices(debug, char_choices, *(blob_choice_it.data()), char_choice_index,
204 prev_char_frag_info, word, certainties, limit, best_choice, attempts_left,
206 if (*attempts_left <= 0) {
208 tprintf(
"permute_choices(): attempts_left is 0\n");
225 const BLOB_CHOICE &blob_choice,
int char_choice_index,
227 float certainties[],
float *limit,
WERD_CHOICE *best_choice,
228 int *attempts_left,
void *more_args) {
229 auto word_ending = (
static_cast<unsigned>(char_choice_index) == char_choices.size() - 1);
234 prev_char_frag_info, debug, word_ending, &char_frag_info)) {
238 if (char_frag_info.
unichar_id == INVALID_UNICHAR_ID) {
239 permute_choices(debug, char_choices, char_choice_index + 1, &char_frag_info, word, certainties,
240 limit, best_choice, attempts_left, more_args);
245 float old_rating = word->
rating();
247 uint8_t old_permuter = word->
permuter();
253 (this->*
go_deeper_fxn_)(debug, char_choices, char_choice_index, &char_frag_info, word_ending,
254 word, certainties, limit, best_choice, attempts_left, more_args);
293 prev_char_frag_info !=
nullptr ? prev_char_frag_info->
fragment :
nullptr;
296 if (debug && (prev_fragment || this_fragment)) {
297 tprintf(
"%s check fragments: choice=%s word_ending=%d\n", debug,
298 getUnicharset().debug_str(curr_unichar_id).c_str(), word_ending);
308 char_frag_info->
fragment = this_fragment;
309 char_frag_info->
rating = curr_rating;
310 char_frag_info->
certainty = curr_certainty;
312 if (prev_fragment && !this_fragment) {
314 tprintf(
"Skip choice with incomplete fragment\n");
320 char_frag_info->
unichar_id = INVALID_UNICHAR_ID;
324 tprintf(
"Non-matching fragment piece\n");
332 tprintf(
"Built character %s from fragments\n",
337 tprintf(
"Record fragment continuation\n");
339 char_frag_info->
fragment = this_fragment;
342 char_frag_info->
rating = prev_char_frag_info->
rating + curr_rating;
348 tprintf(
"Record fragment beginning\n");
352 tprintf(
"Non-starting fragment piece with no prev_fragment\n");
358 if (word_ending && char_frag_info->
fragment) {
360 tprintf(
"Word cannot end with a fragment\n");
void tprintf(const char *format,...)
std::vector< BLOB_CHOICE_LIST * > BLOB_CHOICE_LIST_VECTOR
UNICHAR_ID unichar_id() const
std::string debug_string() const
void string_and_lengths(std::string *word_str, std::string *word_lengths_str) const
UNICHAR_ID unichar_id(unsigned index) const
void set_certainty(float new_val)
void append_unichar_id_space_allocated(UNICHAR_ID unichar_id, int blob_count, float rating, float certainty)
void set_permuter(uint8_t perm)
const UNICHARSET * unicharset() const
void remove_last_unichar_id()
void append_unichar_id(UNICHAR_ID unichar_id, int blob_count, float rating, float certainty)
void set_rating(float new_val)
static std::string to_string(const char *unichar, int pos, int total, bool natural)
bool is_continuation_of(const CHAR_FRAGMENT *fragment) const
const char * get_unichar() const
bool is_beginning() const
const CHAR_FRAGMENT * get_fragment(UNICHAR_ID unichar_id) const
const char * id_to_unichar(UNICHAR_ID id) const
bool get_isngram(UNICHAR_ID unichar_id) const
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
const CHAR_FRAGMENT * fragment
DawgPositionVector * updated_dawgs
DawgPositionVector * active_dawgs
void permute_choices(const char *debug, const BLOB_CHOICE_LIST_VECTOR &char_choices, int char_choice_index, const CHAR_FRAGMENT_INFO *prev_char_frag_info, WERD_CHOICE *word, float certainties[], float *limit, WERD_CHOICE *best_choice, int *attempts_left, void *more_args)
void append_choices(const char *debug, const BLOB_CHOICE_LIST_VECTOR &char_choices, const BLOB_CHOICE &blob_choice, int char_choice_index, const CHAR_FRAGMENT_INFO *prev_char_frag_info, WERD_CHOICE *word, float certainties[], float *limit, WERD_CHOICE *best_choice, int *attempts_left, void *more_args)
int(Dict::* letter_is_okay_)(void *void_dawg_args, const UNICHARSET &unicharset, UNICHAR_ID unichar_id, bool word_end) const
void(Dict::* go_deeper_fxn_)(const char *debug, const BLOB_CHOICE_LIST_VECTOR &char_choices, int char_choice_index, const CHAR_FRAGMENT_INFO *prev_char_frag_info, bool word_ending, WERD_CHOICE *word, float certainties[], float *limit, WERD_CHOICE *best_choice, int *attempts_left, void *void_more_args)
Pointer to go_deeper function.
void update_best_choice(const WERD_CHOICE &word, WERD_CHOICE *best_choice)
WERD_CHOICE * dawg_permute_and_select(const BLOB_CHOICE_LIST_VECTOR &char_choices, float rating_limit)
void go_deeper_dawg_fxn(const char *debug, const BLOB_CHOICE_LIST_VECTOR &char_choices, int char_choice_index, const CHAR_FRAGMENT_INFO *prev_char_frag_info, bool word_ending, WERD_CHOICE *word, float certainties[], float *limit, WERD_CHOICE *best_choice, int *attempts_left, void *void_more_args)
void init_active_dawgs(DawgPositionVector *active_dawgs, bool ambigs_mode) const
const UNICHARSET & getUnicharset() const
bool fragment_state_okay(UNICHAR_ID curr_unichar_id, float curr_rating, float curr_certainty, const CHAR_FRAGMENT_INFO *prev_char_frag_info, const char *debug, int word_ending, CHAR_FRAGMENT_INFO *char_frag_info)