void fixspace_dbg(WERD_RES *word) { TBOX box = word->word->bounding_box (); BOOL8 show_map_detail = FALSE; inT16 i; box.print (); #ifndef SECURE_NAMES tprintf (" \"%s\" ", word->best_choice->string ().string ()); tprintf ("Blob count: %d (word); %d/%d (outword)\n", word->word->gblob_list ()->length (), word->outword->gblob_list ()->length (), word->outword->rej_blob_list ()->length ()); word->reject_map.print (debug_fp); tprintf ("\n"); if (show_map_detail) { tprintf ("\"%s\"\n", word->best_choice->string ().string ()); for (i = 0; word->best_choice->string ()[i] != '\0'; i++) { tprintf ("**** \"%c\" ****\n", word->best_choice->string ()[i]); word->reject_map[i].full_print (debug_fp); } } tprintf ("Tess Accepted: %s\n", word->tess_accepted ? "TRUE" : "FALSE"); tprintf ("Done flag: %s\n\n", word->done ? "TRUE" : "FALSE"); #endif }
// Internal version of EvaluateBox returns the unclipped gradients as well // as the result of EvaluateBox. // hgrad1 and hgrad2 are the gradients for the horizontal textline. int TextlineProjection::EvaluateBoxInternal(const TBOX& box, const DENORM* denorm, bool debug, int* hgrad1, int* hgrad2, int* vgrad1, int* vgrad2) const { int top_gradient = BestMeanGradientInRow(denorm, box.left(), box.right(), box.top(), true); int bottom_gradient = -BestMeanGradientInRow(denorm, box.left(), box.right(), box.bottom(), false); int left_gradient = BestMeanGradientInColumn(denorm, box.left(), box.bottom(), box.top(), true); int right_gradient = -BestMeanGradientInColumn(denorm, box.right(), box.bottom(), box.top(), false); int top_clipped = MAX(top_gradient, 0); int bottom_clipped = MAX(bottom_gradient, 0); int left_clipped = MAX(left_gradient, 0); int right_clipped = MAX(right_gradient, 0); if (debug) { tprintf("Gradients: top = %d, bottom = %d, left= %d, right= %d for box:", top_gradient, bottom_gradient, left_gradient, right_gradient); box.print(); } int result = MAX(top_clipped, bottom_clipped) - MAX(left_clipped, right_clipped); if (hgrad1 != NULL && hgrad2 != NULL) { *hgrad1 = top_gradient; *hgrad2 = bottom_gradient; } if (vgrad1 != NULL && vgrad2 != NULL) { *vgrad1 = left_gradient; *vgrad2 = right_gradient; } return result; }
// Find a set of blobs that are aligned in the given vertical // direction with the given blob. Returns a list of aligned // blobs and the number in the list. // For other parameters see FindAlignedBlob below. int AlignedBlob::AlignTabs(const AlignedBlobParams& params, bool top_to_bottom, BLOBNBOX* bbox, BLOBNBOX_CLIST* good_points, int* end_y) { int ptcount = 0; BLOBNBOX_C_IT it(good_points); TBOX box = bbox->bounding_box(); bool debug = WithinTestRegion(2, box.left(), box.bottom()); if (debug) { tprintf("Starting alignment run at blob:"); box.print(); } int x_start = params.right_tab ? box.right() : box.left(); while (bbox != nullptr) { // Add the blob to the list if the appropriate side is a tab candidate, // or if we are working on a ragged tab. TabType type = params.right_tab ? bbox->right_tab_type() : bbox->left_tab_type(); if (((type != TT_NONE && type != TT_MAYBE_RAGGED) || params.ragged) && (it.empty() || it.data() != bbox)) { if (top_to_bottom) it.add_before_then_move(bbox); else it.add_after_then_move(bbox); ++ptcount; } // Find the next blob that is aligned with the current one. // FindAlignedBlob guarantees that forward progress will be made in the // top_to_bottom direction, and therefore eventually it will return nullptr, // making this while (bbox != nullptr) loop safe. bbox = FindAlignedBlob(params, top_to_bottom, bbox, x_start, end_y); if (bbox != nullptr) { box = bbox->bounding_box(); if (!params.ragged) x_start = params.right_tab ? box.right() : box.left(); } } if (debug) { tprintf("Alignment run ended with %d pts at blob:", ptcount); box.print(); } return ptcount; }
/// Consume all source blobs that strongly overlap the given box, /// putting them into a new word, with the correct_text label. /// Fights over which box owns which blobs are settled by /// applying the blobs to box or next_box with the least non-overlap. /// @return false if the box was in error, which can only be caused by /// failing to find an overlapping blob for a box. bool Tesseract::ResegmentWordBox(BLOCK_LIST *block_list, const TBOX& box, const TBOX& next_box, const char* correct_text) { if (applybox_debug > 1) { tprintf("\nAPPLY_BOX: in ResegmentWordBox() for %s\n", correct_text); } WERD* new_word = NULL; BLOCK_IT b_it(block_list); for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) { BLOCK* block = b_it.data(); if (!box.major_overlap(block->bounding_box())) continue; ROW_IT r_it(block->row_list()); for (r_it.mark_cycle_pt(); !r_it.cycled_list(); r_it.forward()) { ROW* row = r_it.data(); if (!box.major_overlap(row->bounding_box())) continue; WERD_IT w_it(row->word_list()); for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) { WERD* word = w_it.data(); if (applybox_debug > 2) { tprintf("Checking word:"); word->bounding_box().print(); } if (word->text() != NULL && word->text()[0] != '\0') continue; // Ignore words that are already done. if (!box.major_overlap(word->bounding_box())) continue; C_BLOB_IT blob_it(word->cblob_list()); for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { C_BLOB* blob = blob_it.data(); TBOX blob_box = blob->bounding_box(); if (!blob_box.major_overlap(box)) continue; double current_box_miss_metric = BoxMissMetric(blob_box, box); double next_box_miss_metric = BoxMissMetric(blob_box, next_box); if (applybox_debug > 2) { tprintf("Checking blob:"); blob_box.print(); tprintf("Current miss metric = %g, next = %g\n", current_box_miss_metric, next_box_miss_metric); } if (current_box_miss_metric > next_box_miss_metric) continue; // Blob is a better match for next box. if (applybox_debug > 2) { tprintf("Blob match: blob:"); blob_box.print(); tprintf("Matches box:"); box.print(); tprintf("With next box:"); next_box.print(); } if (new_word == NULL) { // Make a new word with a single blob. new_word = word->shallow_copy(); new_word->set_text(correct_text); w_it.add_to_end(new_word); } C_BLOB_IT new_blob_it(new_word->cblob_list()); new_blob_it.add_to_end(blob_it.extract()); } } } } if (new_word == NULL && applybox_debug > 0) tprintf("FAIL!\n"); return new_word != NULL; }
/// Gather consecutive blobs that match the given box into the best_state /// and corresponding correct_text. /// /// Fights over which box owns which blobs are settled by pre-chopping and /// applying the blobs to box or next_box with the least non-overlap. /// @return false if the box was in error, which can only be caused by /// failing to find an appropriate blob for a box. /// /// This means that occasionally, blobs may be incorrectly segmented if the /// chopper fails to find a suitable chop point. bool Tesseract::ResegmentCharBox(PAGE_RES* page_res, const TBOX *prev_box, const TBOX& box, const TBOX& next_box, const char* correct_text) { if (applybox_debug > 1) { tprintf("\nAPPLY_BOX: in ResegmentCharBox() for %s\n", correct_text); } PAGE_RES_IT page_res_it(page_res); WERD_RES* word_res; for (word_res = page_res_it.word(); word_res != NULL; word_res = page_res_it.forward()) { if (!word_res->box_word->bounding_box().major_overlap(box)) continue; if (applybox_debug > 1) { tprintf("Checking word box:"); word_res->box_word->bounding_box().print(); } int word_len = word_res->box_word->length(); for (int i = 0; i < word_len; ++i) { TBOX char_box = TBOX(); int blob_count = 0; for (blob_count = 0; i + blob_count < word_len; ++blob_count) { TBOX blob_box = word_res->box_word->BlobBox(i + blob_count); if (!blob_box.major_overlap(box)) break; if (word_res->correct_text[i + blob_count].length() > 0) break; // Blob is claimed already. double current_box_miss_metric = BoxMissMetric(blob_box, box); double next_box_miss_metric = BoxMissMetric(blob_box, next_box); if (applybox_debug > 2) { tprintf("Checking blob:"); blob_box.print(); tprintf("Current miss metric = %g, next = %g\n", current_box_miss_metric, next_box_miss_metric); } if (current_box_miss_metric > next_box_miss_metric) break; // Blob is a better match for next box. char_box += blob_box; } if (blob_count > 0) { if (applybox_debug > 1) { tprintf("Index [%d, %d) seem good.\n", i, i + blob_count); } if (!char_box.almost_equal(box, 3) && (box.x_gap(next_box) < -3 || (prev_box != NULL && prev_box->x_gap(box) < -3))) { return false; } // We refine just the box_word, best_state and correct_text here. // The rebuild_word is made in TidyUp. // blob_count blobs are put together to match the box. Merge the // box_word boxes, save the blob_count in the state and the text. word_res->box_word->MergeBoxes(i, i + blob_count); word_res->best_state[i] = blob_count; word_res->correct_text[i] = correct_text; if (applybox_debug > 2) { tprintf("%d Blobs match: blob box:", blob_count); word_res->box_word->BlobBox(i).print(); tprintf("Matches box:"); box.print(); tprintf("With next box:"); next_box.print(); } // Eliminated best_state and correct_text entries for the consumed // blobs. for (int j = 1; j < blob_count; ++j) { word_res->best_state.remove(i + 1); word_res->correct_text.remove(i + 1); } // Assume that no box spans multiple source words, so we are done with // this box. if (applybox_debug > 1) { tprintf("Best state = "); for (int j = 0; j < word_res->best_state.size(); ++j) { tprintf("%d ", word_res->best_state[j]); } tprintf("\n"); tprintf("Correct text = [[ "); for (int j = 0; j < word_res->correct_text.size(); ++j) { tprintf("%s ", word_res->correct_text[j].string()); } tprintf("]]\n"); } return true; } } } if (applybox_debug > 0) { tprintf("FAIL!\n"); } return false; // Failure. }