// Compute the distance from the from_box to the to_box using curved // projection space. Separation that involves a decrease in projection // density (moving from the from_box to the to_box) is weighted more heavily // than constant density, and an increase is weighted less. // If horizontal_textline is true, then curved space is used vertically, // as for a diacritic on the edge of a textline. // The projection uses original image coords, so denorm is used to get // back to the image coords from box/part space. // How the calculation works: Think of a diacritic near a textline. // Distance is measured from the far side of the from_box to the near side of // the to_box. Shown is the horizontal textline case. // |------^-----| // | from | box | // |------|-----| // perpendicular | // <------v-------->|--------------------| // parallel | to box | // |--------------------| // Perpendicular distance uses "curved space" See VerticalDistance below. // Parallel distance is linear. // Result is perpendicular_gap + parallel_gap / kParaPerpDistRatio. int TextlineProjection::DistanceOfBoxFromBox(const TBOX& from_box, const TBOX& to_box, bool horizontal_textline, const DENORM* denorm, bool debug) const { // The parallel_gap is the horizontal gap between a horizontal textline and // the box. Analogous for vertical. int parallel_gap = 0; // start_pt is the box end of the line to be modified for curved space. TPOINT start_pt; // end_pt is the partition end of the line to be modified for curved space. TPOINT end_pt; if (horizontal_textline) { parallel_gap = from_box.x_gap(to_box) + from_box.width(); start_pt.x = (from_box.left() + from_box.right()) / 2; end_pt.x = start_pt.x; if (from_box.top() - to_box.top() >= to_box.bottom() - from_box.bottom()) { start_pt.y = from_box.top(); end_pt.y = MIN(to_box.top(), start_pt.y); } else { start_pt.y = from_box.bottom(); end_pt.y = MAX(to_box.bottom(), start_pt.y); } } else { parallel_gap = from_box.y_gap(to_box) + from_box.height(); if (from_box.right() - to_box.right() >= to_box.left() - from_box.left()) { start_pt.x = from_box.right(); end_pt.x = MIN(to_box.right(), start_pt.x); } else { start_pt.x = from_box.left(); end_pt.x = MAX(to_box.left(), start_pt.x); } start_pt.y = (from_box.bottom() + from_box.top()) / 2; end_pt.y = start_pt.y; } // The perpendicular gap is the max vertical distance gap out of: // top of from_box to to_box top and bottom of from_box to to_box bottom. // This value is then modified for curved projection space. // Analogous for vertical. int perpendicular_gap = 0; // If start_pt == end_pt, then the from_box lies entirely within the to_box // (in the perpendicular direction), so we don't need to calculate the // perpendicular_gap. if (start_pt.x != end_pt.x || start_pt.y != end_pt.y) { if (denorm != NULL) { // Denormalize the start and end. denorm->DenormTransform(NULL, start_pt, &start_pt); denorm->DenormTransform(NULL, end_pt, &end_pt); } if (abs(start_pt.y - end_pt.y) >= abs(start_pt.x - end_pt.x)) { perpendicular_gap = VerticalDistance(debug, start_pt.x, start_pt.y, end_pt.y); } else { perpendicular_gap = HorizontalDistance(debug, start_pt.x, end_pt.x, start_pt.y); } } // The parallel_gap weighs less than the perpendicular_gap. return perpendicular_gap + parallel_gap / kParaPerpDistRatio; }
/// Gather consecutive blobs that match the given box into the best_state /// and corresponding correct_text. /// /// Fights over which box owns which blobs are settled by pre-chopping and /// applying the blobs to box or next_box with the least non-overlap. /// @return false if the box was in error, which can only be caused by /// failing to find an appropriate blob for a box. /// /// This means that occasionally, blobs may be incorrectly segmented if the /// chopper fails to find a suitable chop point. bool Tesseract::ResegmentCharBox(PAGE_RES* page_res, const TBOX *prev_box, const TBOX& box, const TBOX& next_box, const char* correct_text) { if (applybox_debug > 1) { tprintf("\nAPPLY_BOX: in ResegmentCharBox() for %s\n", correct_text); } PAGE_RES_IT page_res_it(page_res); WERD_RES* word_res; for (word_res = page_res_it.word(); word_res != NULL; word_res = page_res_it.forward()) { if (!word_res->box_word->bounding_box().major_overlap(box)) continue; if (applybox_debug > 1) { tprintf("Checking word box:"); word_res->box_word->bounding_box().print(); } int word_len = word_res->box_word->length(); for (int i = 0; i < word_len; ++i) { TBOX char_box = TBOX(); int blob_count = 0; for (blob_count = 0; i + blob_count < word_len; ++blob_count) { TBOX blob_box = word_res->box_word->BlobBox(i + blob_count); if (!blob_box.major_overlap(box)) break; if (word_res->correct_text[i + blob_count].length() > 0) break; // Blob is claimed already. double current_box_miss_metric = BoxMissMetric(blob_box, box); double next_box_miss_metric = BoxMissMetric(blob_box, next_box); if (applybox_debug > 2) { tprintf("Checking blob:"); blob_box.print(); tprintf("Current miss metric = %g, next = %g\n", current_box_miss_metric, next_box_miss_metric); } if (current_box_miss_metric > next_box_miss_metric) break; // Blob is a better match for next box. char_box += blob_box; } if (blob_count > 0) { if (applybox_debug > 1) { tprintf("Index [%d, %d) seem good.\n", i, i + blob_count); } if (!char_box.almost_equal(box, 3) && (box.x_gap(next_box) < -3 || (prev_box != NULL && prev_box->x_gap(box) < -3))) { return false; } // We refine just the box_word, best_state and correct_text here. // The rebuild_word is made in TidyUp. // blob_count blobs are put together to match the box. Merge the // box_word boxes, save the blob_count in the state and the text. word_res->box_word->MergeBoxes(i, i + blob_count); word_res->best_state[i] = blob_count; word_res->correct_text[i] = correct_text; if (applybox_debug > 2) { tprintf("%d Blobs match: blob box:", blob_count); word_res->box_word->BlobBox(i).print(); tprintf("Matches box:"); box.print(); tprintf("With next box:"); next_box.print(); } // Eliminated best_state and correct_text entries for the consumed // blobs. for (int j = 1; j < blob_count; ++j) { word_res->best_state.remove(i + 1); word_res->correct_text.remove(i + 1); } // Assume that no box spans multiple source words, so we are done with // this box. if (applybox_debug > 1) { tprintf("Best state = "); for (int j = 0; j < word_res->best_state.size(); ++j) { tprintf("%d ", word_res->best_state[j]); } tprintf("\n"); tprintf("Correct text = [[ "); for (int j = 0; j < word_res->correct_text.size(); ++j) { tprintf("%s ", word_res->correct_text[j].string()); } tprintf("]]\n"); } return true; } } } if (applybox_debug > 0) { tprintf("FAIL!\n"); } return false; // Failure. }