void Tesseract::fix_fuzzy_space_list(WERD_RES_LIST &best_perm, ROW *row, BLOCK* block) { inT16 best_score; WERD_RES_LIST current_perm; inT16 current_score; BOOL8 improved = FALSE; best_score = eval_word_spacing(best_perm); // default score dump_words(best_perm, best_score, 1, improved); if (best_score != PERFECT_WERDS) initialise_search(best_perm, current_perm); while ((best_score != PERFECT_WERDS) && !current_perm.empty()) { match_current_words(current_perm, row, block); current_score = eval_word_spacing(current_perm); dump_words(current_perm, current_score, 2, improved); if (current_score > best_score) { best_perm.clear(); best_perm.deep_copy(¤t_perm, &WERD_RES::deep_copy); best_score = current_score; improved = TRUE; } if (current_score < PERFECT_WERDS) transform_to_next_perm(current_perm); } dump_words(best_perm, best_score, 3, improved); }
inT16 fp_eval_word_spacing(WERD_RES_LIST &word_res_list) { WERD_RES_IT word_it(&word_res_list); WERD_RES *word; PBLOB_IT blob_it; inT16 word_length; inT16 score = 0; inT16 i; inT16 offset; const char *chs; float small_limit = bln_x_height * fixsp_small_outlines_size; if (!fixsp_fp_eval) return (eval_word_spacing (word_res_list)); for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) { word = word_it.data (); word_length = word->reject_map.length (); chs = word->best_choice->string ().string (); if ((word->done || word->tess_accepted) || (word->best_choice->permuter () == SYSTEM_DAWG_PERM) || (word->best_choice->permuter () == FREQ_DAWG_PERM) || (word->best_choice->permuter () == USER_DAWG_PERM) || (safe_dict_word (chs) > 0)) { blob_it.set_to_list (word->outword->blob_list ()); for (i = 0, offset = 0; i < word_length; offset += word->best_choice->lengths()[i++], blob_it.forward ()) { if ((chs[offset] == ' ') || (blob_noise_score (blob_it.data ()) < small_limit)) score -= 1; //penalise possibly erroneous non-space else if (word->reject_map[i].accepted ()) score++; } } } if (score < 0) score = 0; return score; }