// Finds a vector corresponding to a set of vertically aligned blob edges // running through the given box. The type of vector returned and the // search parameters are determined by the AlignedBlobParams. // vertical_x and y are updated with an estimate of the real // vertical direction. (skew finding.) // Returns NULL if no decent vector can be found. TabVector* AlignedBlob::FindVerticalAlignment(AlignedBlobParams align_params, BLOBNBOX* bbox, int* vertical_x, int* vertical_y) { int ext_start_y, ext_end_y; BLOBNBOX_CLIST good_points; // Search up and then down from the starting bbox. int pt_count = AlignTabs(align_params, false, bbox, &good_points, &ext_end_y); pt_count += AlignTabs(align_params, true, bbox, &good_points, &ext_start_y); BLOBNBOX_C_IT it(&good_points); it.move_to_last(); int end_y = it.data()->bounding_box().top(); it.move_to_first(); int start_y = it.data()->bounding_box().bottom(); if (pt_count >= align_params.min_points && end_y - start_y >= align_params.min_length) { int confirmed_points = 0; // Count existing confirmed points to see if vector is acceptable. for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { bbox = it.data(); if (align_params.right_tab) { if (bbox->right_tab_type() == align_params.confirmed_type) ++confirmed_points; } else { if (bbox->left_tab_type() == align_params.confirmed_type) ++confirmed_points; } } // Ragged vectors are not allowed to use too many already used points. if (!align_params.ragged || confirmed_points + confirmed_points < pt_count) { const TBOX& box = bbox->bounding_box(); if (WithinTestRegion(2, box.left(), box.bottom())) { tprintf("Confirming tab vector of %d pts starting at %d,%d\n", pt_count, box.left(), box.bottom()); } // Flag all the aligned neighbours as confirmed . for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { bbox = it.data(); if (align_params.right_tab) { bbox->set_right_tab_type(align_params.confirmed_type); } else { bbox->set_left_tab_type(align_params.confirmed_type); } } // Now make the vector and return it. TabVector* result = TabVector::FitVector(align_params.alignment, align_params.vertical, ext_start_y, ext_end_y, &good_points, vertical_x, vertical_y); if (WithinTestRegion(2, box.left(), box.bottom())) { result->Print("After fitting"); } return result; } } return NULL; }
// Return the partner of this TabVector if the vector qualifies as // being a vertical text line, otherwise NULL. TabVector* TabVector::VerticalTextlinePartner() { if (!partners_.singleton()) return NULL; TabVector_C_IT partner_it(&partners_); TabVector* partner = partner_it.data(); BLOBNBOX_C_IT box_it1(&boxes_); BLOBNBOX_C_IT box_it2(&partner->boxes_); // Count how many boxes are also in the other list. // At the same time, gather the mean width and median vertical gap. if (textord_debug_tabfind > 1) { Print("Testing for vertical text"); partner->Print(" partner"); } int num_matched = 0; int num_unmatched = 0; int total_widths = 0; int width = startpt().x() - partner->startpt().x(); if (width < 0) width = -width; STATS gaps(0, width * 2); BLOBNBOX* prev_bbox = NULL; box_it2.mark_cycle_pt(); for (box_it1.mark_cycle_pt(); !box_it1.cycled_list(); box_it1.forward()) { BLOBNBOX* bbox = box_it1.data(); TBOX box = bbox->bounding_box(); if (prev_bbox != NULL) { gaps.add(box.bottom() - prev_bbox->bounding_box().top(), 1); } while (!box_it2.cycled_list() && box_it2.data() != bbox && box_it2.data()->bounding_box().bottom() < box.bottom()) { box_it2.forward(); } if (!box_it2.cycled_list() && box_it2.data() == bbox && bbox->region_type() >= BRT_UNKNOWN && (prev_bbox == NULL || prev_bbox->region_type() >= BRT_UNKNOWN)) ++num_matched; else ++num_unmatched; total_widths += box.width(); prev_bbox = bbox; } if (num_unmatched + num_matched == 0) return NULL; double avg_width = total_widths * 1.0 / (num_unmatched + num_matched); double max_gap = textord_tabvector_vertical_gap_fraction * avg_width; int min_box_match = static_cast<int>((num_matched + num_unmatched) * textord_tabvector_vertical_box_ratio); bool is_vertical = (gaps.get_total() > 0 && num_matched >= min_box_match && gaps.median() <= max_gap); if (textord_debug_tabfind > 1) { tprintf("gaps=%d, matched=%d, unmatched=%d, min_match=%d " "median gap=%.2f, width=%.2f max_gap=%.2f Vertical=%s\n", gaps.get_total(), num_matched, num_unmatched, min_box_match, gaps.median(), avg_width, max_gap, is_vertical?"Yes":"No"); } return (is_vertical) ? partner : NULL; }
// Setup the constraints between the partners of this TabVector. void TabVector::SetupPartnerConstraints() { // With the first and last partner, we want a common bottom and top, // respectively, and for each change of partner, we want a common // top of first with bottom of next. TabVector_C_IT it(&partners_); TabVector* prev_partner = NULL; for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { TabVector* partner = it.data(); if (partner->top_constraints_ == NULL || partner->bottom_constraints_ == NULL) { partner->Print("Impossible: has no constraints"); Print("This vector has it as a partner"); continue; } if (prev_partner == NULL) { // This is the first partner, so common bottom. if (TabConstraint::CompatibleConstraints(bottom_constraints_, partner->bottom_constraints_)) TabConstraint::MergeConstraints(bottom_constraints_, partner->bottom_constraints_); } else { // We need prev top to be common with partner bottom. if (TabConstraint::CompatibleConstraints(prev_partner->top_constraints_, partner->bottom_constraints_)) TabConstraint::MergeConstraints(prev_partner->top_constraints_, partner->bottom_constraints_); } prev_partner = partner; if (it.at_last()) { // This is the last partner, so common top. if (TabConstraint::CompatibleConstraints(top_constraints_, partner->top_constraints_)) TabConstraint::MergeConstraints(top_constraints_, partner->top_constraints_); } } }
// Finds a vector corresponding to a set of vertically aligned blob edges // running through the given box. The type of vector returned and the // search parameters are determined by the AlignedBlobParams. // vertical_x and y are updated with an estimate of the real // vertical direction. (skew finding.) // Returns nullptr if no decent vector can be found. TabVector* AlignedBlob::FindVerticalAlignment(AlignedBlobParams align_params, BLOBNBOX* bbox, int* vertical_x, int* vertical_y) { int ext_start_y, ext_end_y; BLOBNBOX_CLIST good_points; // Search up and then down from the starting bbox. TBOX box = bbox->bounding_box(); bool debug = WithinTestRegion(2, box.left(), box.bottom()); int pt_count = AlignTabs(align_params, false, bbox, &good_points, &ext_end_y); pt_count += AlignTabs(align_params, true, bbox, &good_points, &ext_start_y); BLOBNBOX_C_IT it(&good_points); it.move_to_last(); box = it.data()->bounding_box(); int end_y = box.top(); int end_x = align_params.right_tab ? box.right() : box.left(); it.move_to_first(); box = it.data()->bounding_box(); int start_x = align_params.right_tab ? box.right() : box.left(); int start_y = box.bottom(); // Acceptable tab vectors must have a minimum number of points, // have a minimum acceptable length, and have a minimum gradient. // The gradient corresponds to the skew angle. // Ragged tabs don't need to satisfy the gradient condition, as they // will always end up parallel to the vertical direction. bool at_least_2_crossings = AtLeast2LineCrossings(&good_points); if ((pt_count >= align_params.min_points && end_y - start_y >= align_params.min_length && (align_params.ragged || end_y - start_y >= abs(end_x - start_x) * kMinTabGradient)) || at_least_2_crossings) { int confirmed_points = 0; // Count existing confirmed points to see if vector is acceptable. for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { bbox = it.data(); if (align_params.right_tab) { if (bbox->right_tab_type() == align_params.confirmed_type) ++confirmed_points; } else { if (bbox->left_tab_type() == align_params.confirmed_type) ++confirmed_points; } } // Ragged vectors are not allowed to use too many already used points. if (!align_params.ragged || confirmed_points + confirmed_points < pt_count) { const TBOX& box = bbox->bounding_box(); if (debug) { tprintf("Confirming tab vector of %d pts starting at %d,%d\n", pt_count, box.left(), box.bottom()); } // Flag all the aligned neighbours as confirmed . for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { bbox = it.data(); if (align_params.right_tab) { bbox->set_right_tab_type(align_params.confirmed_type); } else { bbox->set_left_tab_type(align_params.confirmed_type); } if (debug) { bbox->bounding_box().print(); } } // Now make the vector and return it. TabVector* result = TabVector::FitVector(align_params.alignment, align_params.vertical, ext_start_y, ext_end_y, &good_points, vertical_x, vertical_y); result->set_intersects_other_lines(at_least_2_crossings); if (debug) { tprintf("Box was %d, %d\n", box.left(), box.bottom()); result->Print("After fitting"); } return result; } else if (debug) { tprintf("Ragged tab used too many used points: %d out of %d\n", confirmed_points, pt_count); } } else if (debug) { tprintf("Tab vector failed basic tests: pt count %d vs min %d, " "length %d vs min %d, min grad %g\n", pt_count, align_params.min_points, end_y - start_y, align_params.min_length, abs(end_x - start_x) * kMinTabGradient); } return nullptr; }