void Tesseract::fix_fuzzy_space_list(WERD_RES_LIST &best_perm, ROW *row, BLOCK* block) { inT16 best_score; WERD_RES_LIST current_perm; inT16 current_score; BOOL8 improved = FALSE; best_score = eval_word_spacing(best_perm); // default score dump_words(best_perm, best_score, 1, improved); if (best_score != PERFECT_WERDS) initialise_search(best_perm, current_perm); while ((best_score != PERFECT_WERDS) && !current_perm.empty()) { match_current_words(current_perm, row, block); current_score = eval_word_spacing(current_perm); dump_words(current_perm, current_score, 2, improved); if (current_score > best_score) { best_perm.clear(); best_perm.deep_copy(¤t_perm, &WERD_RES::deep_copy); best_score = current_score; improved = TRUE; } if (current_score < PERFECT_WERDS) transform_to_next_perm(current_perm); } dump_words(best_perm, best_score, 3, improved); }
void fix_noisy_space_list(WERD_RES_LIST &best_perm, ROW *row) { inT16 best_score; WERD_RES_IT best_perm_it(&best_perm); WERD_RES_LIST current_perm; WERD_RES_IT current_perm_it(¤t_perm); WERD_RES *old_word_res; WERD_RES *new_word_res; inT16 current_score; BOOL8 improved = FALSE; //default score best_score = fp_eval_word_spacing (best_perm); dump_words (best_perm, best_score, 1, improved); new_word_res = new WERD_RES; old_word_res = best_perm_it.data (); //Kludge to force deep copy old_word_res->combination = TRUE; *new_word_res = *old_word_res; //deep copy //Undo kludge old_word_res->combination = FALSE; //Undo kludge new_word_res->combination = FALSE; current_perm_it.add_to_end (new_word_res); break_noisiest_blob_word(current_perm); while ((best_score != PERFECT_WERDS) && !current_perm.empty ()) { match_current_words(current_perm, row); current_score = fp_eval_word_spacing (current_perm); dump_words (current_perm, current_score, 2, improved); if (current_score > best_score) { best_perm.clear(); best_perm.deep_copy(¤t_perm, &WERD_RES::deep_copy); best_score = current_score; improved = TRUE; } if (current_score < PERFECT_WERDS) break_noisiest_blob_word(current_perm); } dump_words (best_perm, best_score, 3, improved); }
/** * break_noisiest_blob_word() * Find the word with the blob which looks like the worst noise. * Break the word into two, deleting the noise blob. */ void Tesseract::break_noisiest_blob_word(WERD_RES_LIST &words) { WERD_RES_IT word_it(&words); WERD_RES_IT worst_word_it; float worst_noise_score = 9999; int worst_blob_index = -1; // Noisiest blob of noisiest wd int blob_index; // of wds noisiest blob float noise_score; // of wds noisiest blob WERD_RES *word_res; C_BLOB_IT blob_it; C_BLOB_IT rej_cblob_it; C_BLOB_LIST new_blob_list; C_BLOB_IT new_blob_it; C_BLOB_IT new_rej_cblob_it; WERD *new_word; inT16 start_of_noise_blob; inT16 i; for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) { blob_index = worst_noise_blob(word_it.data(), &noise_score); if (blob_index > -1 && worst_noise_score > noise_score) { worst_noise_score = noise_score; worst_blob_index = blob_index; worst_word_it = word_it; } } if (worst_blob_index < 0) { words.clear(); // signal termination return; } /* Now split the worst_word_it */ word_res = worst_word_it.data(); /* Move blobs before noise blob to a new bloblist */ new_blob_it.set_to_list(&new_blob_list); blob_it.set_to_list(word_res->word->cblob_list()); for (i = 0; i < worst_blob_index; i++, blob_it.forward()) { new_blob_it.add_after_then_move(blob_it.extract()); } start_of_noise_blob = blob_it.data()->bounding_box().left(); delete blob_it.extract(); // throw out noise blob new_word = new WERD(&new_blob_list, word_res->word); new_word->set_flag(W_EOL, FALSE); word_res->word->set_flag(W_BOL, FALSE); word_res->word->set_blanks(1); // After break new_rej_cblob_it.set_to_list(new_word->rej_cblob_list()); rej_cblob_it.set_to_list(word_res->word->rej_cblob_list()); for (; (!rej_cblob_it.empty() && (rej_cblob_it.data()->bounding_box().left() < start_of_noise_blob)); rej_cblob_it.forward()) { new_rej_cblob_it.add_after_then_move(rej_cblob_it.extract()); } WERD_RES* new_word_res = new WERD_RES(new_word); new_word_res->combination = TRUE; worst_word_it.add_before_then_move(new_word_res); word_res->ClearResults(); }
/** * @name transform_to_next_perm() * Examines the current word list to find the smallest word gap size. Then walks * the word list closing any gaps of this size by either inserted new * combination words, or extending existing ones. * * The routine COULD be limited to stop it building words longer than N blobs. * * If there are no more gaps then it DELETES the entire list and returns the * empty list to cause termination. */ void transform_to_next_perm(WERD_RES_LIST &words) { WERD_RES_IT word_it(&words); WERD_RES_IT prev_word_it(&words); WERD_RES *word; WERD_RES *prev_word; WERD_RES *combo; WERD *copy_word; inT16 prev_right = -MAX_INT16; TBOX box; inT16 gap; inT16 min_gap = MAX_INT16; for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) { word = word_it.data(); if (!word->part_of_combo) { box = word->word->bounding_box(); if (prev_right > -MAX_INT16) { gap = box.left() - prev_right; if (gap < min_gap) min_gap = gap; } prev_right = box.right(); } } if (min_gap < MAX_INT16) { prev_right = -MAX_INT16; // back to start word_it.set_to_list(&words); // Note: we can't use cycle_pt due to inserted combos at start of list. for (; (prev_right == -MAX_INT16) || !word_it.at_first(); word_it.forward()) { word = word_it.data(); if (!word->part_of_combo) { box = word->word->bounding_box(); if (prev_right > -MAX_INT16) { gap = box.left() - prev_right; if (gap <= min_gap) { prev_word = prev_word_it.data(); if (prev_word->combination) { combo = prev_word; } else { /* Make a new combination and insert before * the first word being joined. */ copy_word = new WERD; *copy_word = *(prev_word->word); // deep copy combo = new WERD_RES(copy_word); combo->combination = TRUE; combo->x_height = prev_word->x_height; prev_word->part_of_combo = TRUE; prev_word_it.add_before_then_move(combo); } combo->word->set_flag(W_EOL, word->word->flag(W_EOL)); if (word->combination) { combo->word->join_on(word->word); // Move blobs to combo // old combo no longer needed delete word_it.extract(); } else { // Copy current wd to combo combo->copy_on(word); word->part_of_combo = TRUE; } combo->done = FALSE; combo->ClearResults(); } else { prev_word_it = word_it; // catch up } } prev_right = box.right(); } } } else { words.clear(); // signal termination } }