846 {
847 bool bol;
848
849
850 bool prev_fuzzy_sp;
851 bool prev_fuzzy_non;
852 uint8_t prev_blanks;
853 bool fuzzy_sp = false;
854 bool fuzzy_non = false;
855 uint8_t blanks = 0;
856 bool prev_gap_was_a_space = false;
857 bool break_at_next_gap = false;
858 ROW *real_row;
859 C_OUTLINE_IT cout_it;
860 C_BLOB_LIST cblobs;
861 C_BLOB_IT cblob_it = &cblobs;
862 WERD_LIST words;
863 WERD *word;
864 int32_t next_rep_char_word_right = INT32_MAX;
865 float repetition_spacing;
866 int32_t xstarts[2];
867 int32_t prev_x;
868 BLOBNBOX_IT box_it;
871 int16_t prev_gap = INT16_MAX;
872 int16_t current_gap = INT16_MAX;
873 int16_t next_gap = INT16_MAX;
874 int16_t prev_within_xht_gap = INT16_MAX;
875 int16_t current_within_xht_gap = INT16_MAX;
876 int16_t next_within_xht_gap = INT16_MAX;
877 int16_t word_count = 0;
878
879
880 WERD_IT rep_char_it(&(row->rep_words));
881 if (!rep_char_it.empty()) {
882 next_rep_char_word_right = rep_char_it.data()->bounding_box().right();
883 }
884
885 prev_x = -INT16_MAX;
886 cblob_it.set_to_list(&cblobs);
887 box_it.set_to_list(row->blob_list());
888
889 WERD_IT word_it(&words);
890 bol = true;
891 prev_blanks = 0;
892 prev_fuzzy_sp = false;
893 prev_fuzzy_non = false;
894 if (!box_it.empty()) {
895 xstarts[0] = box_it.data()->bounding_box().left();
896 if (xstarts[0] > next_rep_char_word_right) {
897
898 word = rep_char_it.extract();
899 word_it.add_after_then_move(word);
900
901 word->set_flag(
W_BOL,
true);
902 bol = false;
903 word->set_blanks(0);
904
907 xstarts[0] = word->bounding_box().left();
908
909 repetition_spacing = find_mean_blob_spacing(word);
910 current_gap = box_it.data()->bounding_box().left() - next_rep_char_word_right;
911 current_within_xht_gap = current_gap;
912 if (current_gap > tosp_rep_space * repetition_spacing) {
913 prev_blanks = static_cast<uint8_t>(std::floor(current_gap / row->space_size));
914 if (prev_blanks < 1) {
915 prev_blanks = 1;
916 }
917 } else {
918 prev_blanks = 0;
919 }
920 if (tosp_debug_level > 5) {
921 tprintf(
"Repch wd at BOL(%d, %d). rep spacing %5.2f; Rgap:%d ",
922 box_it.data()->bounding_box().left(), box_it.data()->bounding_box().bottom(),
923 repetition_spacing, current_gap);
924 }
925 prev_fuzzy_sp = false;
926 prev_fuzzy_non = false;
927 if (rep_char_it.empty()) {
928 next_rep_char_word_right = INT32_MAX;
929 } else {
930 rep_char_it.forward();
931 next_rep_char_word_right = rep_char_it.data()->bounding_box().right();
932 }
933 }
934
935 peek_at_next_gap(row, box_it, next_blob_box, next_gap, next_within_xht_gap);
936 do {
937 auto bblob = box_it.data();
938 auto blob_box = bblob->bounding_box();
939 if (bblob->joined_to_prev()) {
940 auto cblob = bblob->remove_cblob();
941 if (cblob != nullptr) {
942 cout_it.set_to_list(cblob_it.data()->out_list());
943 cout_it.move_to_last();
944 cout_it.add_list_after(cblob->out_list());
945 delete cblob;
946 }
947 } else {
948 auto cblob = bblob->cblob();
949 if (cblob != nullptr) {
950 bblob->set_owns_cblob(false);
951 cblob_it.add_after_then_move(cblob);
952 }
953 prev_x = blob_box.right();
954 }
955 box_it.forward();
956 bblob = box_it.data();
957 blob_box = bblob->bounding_box();
958
959 if (!bblob->joined_to_prev() && bblob->cblob() != nullptr) {
960
961 prev_gap = current_gap;
962 prev_within_xht_gap = current_within_xht_gap;
963 prev_blob_box = next_blob_box;
964 current_gap = next_gap;
965 current_within_xht_gap = next_within_xht_gap;
966 peek_at_next_gap(row, box_it, next_blob_box, next_gap, next_within_xht_gap);
967
968 int16_t prev_gap_arg = prev_gap;
969 int16_t next_gap_arg = next_gap;
970 if (tosp_only_use_xht_gaps) {
971 prev_gap_arg = prev_within_xht_gap;
972 next_gap_arg = next_within_xht_gap;
973 }
974
975 if (blob_box.left() > next_rep_char_word_right ||
976 make_a_word_break(row, blob_box, prev_gap_arg, prev_blob_box, current_gap,
977 current_within_xht_gap, next_blob_box, next_gap_arg, blanks, fuzzy_sp,
978 fuzzy_non, prev_gap_was_a_space, break_at_next_gap) ||
979 box_it.at_first()) {
980
981 word = new WERD(&cblobs, prev_blanks, nullptr);
982 word_count++;
983 word_it.add_after_then_move(word);
984 if (bol) {
985 word->set_flag(
W_BOL,
true);
986 bol = false;
987 }
988 if (prev_fuzzy_sp) {
989
991 } else if (prev_fuzzy_non) {
993 }
994
995
996 if (blob_box.left() > next_rep_char_word_right) {
997
998 word = rep_char_it.extract();
999 word_it.add_after_then_move(word);
1000
1001
1002 repetition_spacing = find_mean_blob_spacing(word);
1003 current_gap = word->bounding_box().left() - prev_x;
1004 current_within_xht_gap = current_gap;
1005 if (current_gap > tosp_rep_space * repetition_spacing) {
1006 blanks = static_cast<uint8_t>(std::floor(current_gap / row->space_size));
1007 if (blanks < 1) {
1008 blanks = 1;
1009 }
1010 } else {
1011 blanks = 0;
1012 }
1013 if (tosp_debug_level > 5) {
1014 tprintf(
"Repch wd (%d,%d) rep gap %5.2f; Lgap:%d (%d blanks);",
1015 word->bounding_box().left(), word->bounding_box().bottom(),
1016 repetition_spacing, current_gap, blanks);
1017 }
1018 word->set_blanks(blanks);
1019
1022
1023
1024
1025 current_gap = blob_box.left() - next_rep_char_word_right;
1026 if (current_gap > tosp_rep_space * repetition_spacing) {
1027 blanks = static_cast<uint8_t>(current_gap / row->space_size);
1028 if (blanks < 1) {
1029 blanks = 1;
1030 }
1031 } else {
1032 blanks = 0;
1033 }
1034 if (tosp_debug_level > 5) {
1035 tprintf(
" Rgap:%d (%d blanks)\n", current_gap, blanks);
1036 }
1037 fuzzy_sp = false;
1038 fuzzy_non = false;
1039
1040 if (rep_char_it.empty()) {
1041 next_rep_char_word_right = INT32_MAX;
1042 } else {
1043 rep_char_it.forward();
1044 next_rep_char_word_right = rep_char_it.data()->bounding_box().right();
1045 }
1046 }
1047
1048 if (box_it.at_first() && rep_char_it.empty()) {
1049
1050 word->set_flag(
W_EOL,
true);
1051 xstarts[1] = prev_x;
1052 } else {
1053 prev_blanks = blanks;
1054 prev_fuzzy_sp = fuzzy_sp;
1055 prev_fuzzy_non = fuzzy_non;
1056 }
1057 }
1058 }
1059 } while (!box_it.at_first());
1060
1061
1062 while (!rep_char_it.empty()) {
1063 word = rep_char_it.extract();
1064 word_it.add_after_then_move(word);
1065
1066
1067 repetition_spacing = find_mean_blob_spacing(word);
1068 current_gap = word->bounding_box().left() - prev_x;
1069 if (current_gap > tosp_rep_space * repetition_spacing) {
1070 blanks = static_cast<uint8_t>(std::floor(current_gap / row->space_size));
1071 if (blanks < 1) {
1072 blanks = 1;
1073 }
1074 } else {
1075 blanks = 0;
1076 }
1077 if (tosp_debug_level > 5) {
1078 tprintf(
"Repch wd at EOL (%d,%d). rep spacing %5.2f; Lgap:%d (%d blanks)\n",
1079 word->bounding_box().left(), word->bounding_box().bottom(), repetition_spacing,
1080 current_gap, blanks);
1081 }
1082 word->set_blanks(blanks);
1083
1086 prev_x = word->bounding_box().right();
1087 if (rep_char_it.empty()) {
1088
1089 word->set_flag(
W_EOL,
true);
1090 xstarts[1] = prev_x;
1091 } else {
1092 rep_char_it.forward();
1093 }
1094 }
1095 real_row =
1096 new ROW(row, static_cast<int16_t>(row->kern_size), static_cast<int16_t>(row->space_size));
1097 word_it.set_to_list(real_row->word_list());
1098
1099 word_it.add_list_after(&words);
1100 real_row->recalc_bounding_box();
1101
1102 if (tosp_debug_level > 4) {
1103 tprintf(
"Row: Made %d words in row ((%d,%d)(%d,%d))\n", word_count,
1104 real_row->bounding_box().left(), real_row->bounding_box().bottom(),
1105 real_row->bounding_box().right(), real_row->bounding_box().top());
1106 }
1107 return real_row;
1108 }
1109 return nullptr;
1110}
@ W_FUZZY_NON
fuzzy nonspace