// Finds a vector corresponding to a set of vertically aligned blob edges // running through the given box. The type of vector returned and the // search parameters are determined by the AlignedBlobParams. // vertical_x and y are updated with an estimate of the real // vertical direction. (skew finding.) // Returns NULL if no decent vector can be found. TabVector* AlignedBlob::FindVerticalAlignment(AlignedBlobParams align_params, BLOBNBOX* bbox, int* vertical_x, int* vertical_y) { int ext_start_y, ext_end_y; BLOBNBOX_CLIST good_points; // Search up and then down from the starting bbox. int pt_count = AlignTabs(align_params, false, bbox, &good_points, &ext_end_y); pt_count += AlignTabs(align_params, true, bbox, &good_points, &ext_start_y); BLOBNBOX_C_IT it(&good_points); it.move_to_last(); int end_y = it.data()->bounding_box().top(); it.move_to_first(); int start_y = it.data()->bounding_box().bottom(); if (pt_count >= align_params.min_points && end_y - start_y >= align_params.min_length) { int confirmed_points = 0; // Count existing confirmed points to see if vector is acceptable. for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { bbox = it.data(); if (align_params.right_tab) { if (bbox->right_tab_type() == align_params.confirmed_type) ++confirmed_points; } else { if (bbox->left_tab_type() == align_params.confirmed_type) ++confirmed_points; } } // Ragged vectors are not allowed to use too many already used points. if (!align_params.ragged || confirmed_points + confirmed_points < pt_count) { const TBOX& box = bbox->bounding_box(); if (WithinTestRegion(2, box.left(), box.bottom())) { tprintf("Confirming tab vector of %d pts starting at %d,%d\n", pt_count, box.left(), box.bottom()); } // Flag all the aligned neighbours as confirmed . for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { bbox = it.data(); if (align_params.right_tab) { bbox->set_right_tab_type(align_params.confirmed_type); } else { bbox->set_left_tab_type(align_params.confirmed_type); } } // Now make the vector and return it. TabVector* result = TabVector::FitVector(align_params.alignment, align_params.vertical, ext_start_y, ext_end_y, &good_points, vertical_x, vertical_y); if (WithinTestRegion(2, box.left(), box.bottom())) { result->Print("After fitting"); } return result; } } return NULL; }
// Return the partner of this TabVector if the vector qualifies as // being a vertical text line, otherwise NULL. TabVector* TabVector::VerticalTextlinePartner() { if (!partners_.singleton()) return NULL; TabVector_C_IT partner_it(&partners_); TabVector* partner = partner_it.data(); BLOBNBOX_C_IT box_it1(&boxes_); BLOBNBOX_C_IT box_it2(&partner->boxes_); // Count how many boxes are also in the other list. // At the same time, gather the mean width and median vertical gap. if (textord_debug_tabfind > 1) { Print("Testing for vertical text"); partner->Print(" partner"); } int num_matched = 0; int num_unmatched = 0; int total_widths = 0; int width = startpt().x() - partner->startpt().x(); if (width < 0) width = -width; STATS gaps(0, width * 2); BLOBNBOX* prev_bbox = NULL; box_it2.mark_cycle_pt(); for (box_it1.mark_cycle_pt(); !box_it1.cycled_list(); box_it1.forward()) { BLOBNBOX* bbox = box_it1.data(); TBOX box = bbox->bounding_box(); if (prev_bbox != NULL) { gaps.add(box.bottom() - prev_bbox->bounding_box().top(), 1); } while (!box_it2.cycled_list() && box_it2.data() != bbox && box_it2.data()->bounding_box().bottom() < box.bottom()) { box_it2.forward(); } if (!box_it2.cycled_list() && box_it2.data() == bbox && bbox->region_type() >= BRT_UNKNOWN && (prev_bbox == NULL || prev_bbox->region_type() >= BRT_UNKNOWN)) ++num_matched; else ++num_unmatched; total_widths += box.width(); prev_bbox = bbox; } if (num_unmatched + num_matched == 0) return NULL; double avg_width = total_widths * 1.0 / (num_unmatched + num_matched); double max_gap = textord_tabvector_vertical_gap_fraction * avg_width; int min_box_match = static_cast<int>((num_matched + num_unmatched) * textord_tabvector_vertical_box_ratio); bool is_vertical = (gaps.get_total() > 0 && num_matched >= min_box_match && gaps.median() <= max_gap); if (textord_debug_tabfind > 1) { tprintf("gaps=%d, matched=%d, unmatched=%d, min_match=%d " "median gap=%.2f, width=%.2f max_gap=%.2f Vertical=%s\n", gaps.get_total(), num_matched, num_unmatched, min_box_match, gaps.median(), avg_width, max_gap, is_vertical?"Yes":"No"); } return (is_vertical) ? partner : NULL; }
// Finds vertical lines in the given list of BLOBNBOXes. bleft and tright // are the bounds of the image on which the input line_bblobs were found. // The input line_bblobs list is const really. // The output vertical_x and vertical_y are the total of all the vectors. // The output list of TabVector makes no reference to the input BLOBNBOXes. void LineFinder::FindLineVectors(const ICOORD& bleft, const ICOORD& tright, BLOBNBOX_LIST* line_bblobs, int* vertical_x, int* vertical_y, TabVector_LIST* vectors) { BLOBNBOX_IT bbox_it(line_bblobs); int b_count = 0; // Put all the blobs into the grid to find the lines, and move the blobs // to the output lists. AlignedBlob blob_grid(kLineFindGridSize, bleft, tright); for (bbox_it.mark_cycle_pt(); !bbox_it.cycled_list(); bbox_it.forward()) { BLOBNBOX* bblob = bbox_it.data(); bblob->set_left_tab_type(TT_UNCONFIRMED); bblob->set_left_rule(bleft.x()); bblob->set_right_rule(tright.x()); bblob->set_left_crossing_rule(bleft.x()); bblob->set_right_crossing_rule(tright.x()); blob_grid.InsertBBox(false, true, bblob); ++b_count; } if (textord_debug_tabfind) tprintf("Inserted %d line blobs into grid\n", b_count); if (b_count == 0) return; // Search the entire grid, looking for vertical line vectors. GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> lsearch(&blob_grid); BLOBNBOX* bbox; TabVector_IT vector_it(vectors); *vertical_x = 0; *vertical_y = 1; lsearch.StartFullSearch(); while ((bbox = lsearch.NextFullSearch()) != NULL) { if (bbox->left_tab_type() == TT_UNCONFIRMED) { const TBOX& box = bbox->bounding_box(); if (AlignedBlob::WithinTestRegion(2, box.left(), box.bottom())) tprintf("Finding line vector starting at bbox (%d,%d)\n", box.left(), box.bottom()); AlignedBlobParams align_params(*vertical_x, *vertical_y, box.width()); TabVector* vector = blob_grid.FindVerticalAlignment(align_params, bbox, vertical_x, vertical_y); if (vector != NULL) { vector->Freeze(); vector_it.add_to_end(vector); } } } ScrollView* line_win = NULL; if (textord_tabfind_show_vlines) { line_win = blob_grid.MakeWindow(0, 50, "Vlines"); blob_grid.DisplayBoxes(line_win); line_win = blob_grid.DisplayTabs("Vlines", line_win); } }
// Public factory to build a TabVector from a list of boxes. // The TabVector will be of the given alignment type. // The input vertical vector is used in fitting, and the output // vertical_x, vertical_y have the resulting line vector added to them // if the alignment is not ragged. // The extended_start_y and extended_end_y are the maximum possible // extension to the line segment that can be used to align with others. // The input CLIST of BLOBNBOX good_points is consumed and taken over. TabVector* TabVector::FitVector(TabAlignment alignment, ICOORD vertical, int extended_start_y, int extended_end_y, BLOBNBOX_CLIST* good_points, int* vertical_x, int* vertical_y) { TabVector* vector = new TabVector(extended_start_y, extended_end_y, alignment, good_points); if (!vector->Fit(vertical, false)) { delete vector; return NULL; } if (!vector->IsRagged()) { vertical = vector->endpt_ - vector->startpt_; int weight = vector->BoxCount(); *vertical_x += vertical.x() * weight; *vertical_y += vertical.y() * weight; } return vector; }
// Set all the tops and bottoms as appropriate to a mean of the // constrained range. Delete all the constraints and list. void TabConstraint::ApplyConstraints(TabConstraint_LIST* constraints) { int y_min = -MAX_INT32; int y_max = MAX_INT32; GetConstraints(constraints, &y_min, &y_max); int y = (y_min + y_max) / 2; TabConstraint_IT it(constraints); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { TabConstraint* constraint = it.data(); TabVector* v = constraint->vector_; if (constraint->is_top_) { v->SetYEnd(y); v->set_top_constraints(NULL); } else { v->SetYStart(y); v->set_bottom_constraints(NULL); } } delete constraints; }
// Setup the constraints between the partners of this TabVector. void TabVector::SetupPartnerConstraints() { // With the first and last partner, we want a common bottom and top, // respectively, and for each change of partner, we want a common // top of first with bottom of next. TabVector_C_IT it(&partners_); TabVector* prev_partner = NULL; for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { TabVector* partner = it.data(); if (partner->top_constraints_ == NULL || partner->bottom_constraints_ == NULL) { partner->Print("Impossible: has no constraints"); Print("This vector has it as a partner"); continue; } if (prev_partner == NULL) { // This is the first partner, so common bottom. if (TabConstraint::CompatibleConstraints(bottom_constraints_, partner->bottom_constraints_)) TabConstraint::MergeConstraints(bottom_constraints_, partner->bottom_constraints_); } else { // We need prev top to be common with partner bottom. if (TabConstraint::CompatibleConstraints(prev_partner->top_constraints_, partner->bottom_constraints_)) TabConstraint::MergeConstraints(prev_partner->top_constraints_, partner->bottom_constraints_); } prev_partner = partner; if (it.at_last()) { // This is the last partner, so common top. if (TabConstraint::CompatibleConstraints(top_constraints_, partner->top_constraints_)) TabConstraint::MergeConstraints(top_constraints_, partner->top_constraints_); } } }
// Return true if this vector is the same side, overlaps, and close // enough to the other to be merged. bool TabVector::SimilarTo(const ICOORD& vertical, const TabVector& other, BlobGrid* grid) const { if ((IsRightTab() && other.IsRightTab()) || (IsLeftTab() && other.IsLeftTab())) { // If they don't overlap, at least in extensions, then there is no chance. if (ExtendedOverlap(other.extended_ymax_, other.extended_ymin_) < 0) return false; // A fast approximation to the scale factor of the sort_key_. int v_scale = abs(vertical.y()); if (v_scale == 0) v_scale = 1; // If they are close enough, then OK. if (sort_key_ + kSimilarVectorDist * v_scale >= other.sort_key_ && sort_key_ - kSimilarVectorDist * v_scale <= other.sort_key_) return true; // Ragged tabs get a bigger threshold. if (!IsRagged() || !other.IsRagged() || sort_key_ + kSimilarRaggedDist * v_scale < other.sort_key_ || sort_key_ - kSimilarRaggedDist * v_scale > other.sort_key_) return false; if (grid == NULL) { // There is nothing else to test! return true; } // If there is nothing in the rectangle between the vector that is going to // move, and the place it is moving to, then they can be merged. // Setup a vertical search for any blob. const TabVector* mover = (IsRightTab() && sort_key_ < other.sort_key_) ? this : &other; int top_y = mover->endpt_.y(); int bottom_y = mover->startpt_.y(); int left = MIN(mover->XAtY(top_y), mover->XAtY(bottom_y)); int right = MAX(mover->XAtY(top_y), mover->XAtY(bottom_y)); int shift = abs(sort_key_ - other.sort_key_) / v_scale; if (IsRightTab()) { right += shift; } else { left -= shift; } GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> vsearch(grid); vsearch.StartVerticalSearch(left, right, top_y); BLOBNBOX* blob; while ((blob = vsearch.NextVerticalSearch(true)) != NULL) { TBOX box = blob->bounding_box(); if (box.top() > bottom_y) return true; // Nothing found. if (box.bottom() < top_y) continue; // Doesn't overlap. int left_at_box = XAtY(box.bottom()); int right_at_box = left_at_box; if (IsRightTab()) right_at_box += shift; else left_at_box -= shift; if (MIN(right_at_box, box.right()) > MAX(left_at_box, box.left())) return false; } return true; // Nothing found. } return false; }
// Finds a vector corresponding to a set of vertically aligned blob edges // running through the given box. The type of vector returned and the // search parameters are determined by the AlignedBlobParams. // vertical_x and y are updated with an estimate of the real // vertical direction. (skew finding.) // Returns nullptr if no decent vector can be found. TabVector* AlignedBlob::FindVerticalAlignment(AlignedBlobParams align_params, BLOBNBOX* bbox, int* vertical_x, int* vertical_y) { int ext_start_y, ext_end_y; BLOBNBOX_CLIST good_points; // Search up and then down from the starting bbox. TBOX box = bbox->bounding_box(); bool debug = WithinTestRegion(2, box.left(), box.bottom()); int pt_count = AlignTabs(align_params, false, bbox, &good_points, &ext_end_y); pt_count += AlignTabs(align_params, true, bbox, &good_points, &ext_start_y); BLOBNBOX_C_IT it(&good_points); it.move_to_last(); box = it.data()->bounding_box(); int end_y = box.top(); int end_x = align_params.right_tab ? box.right() : box.left(); it.move_to_first(); box = it.data()->bounding_box(); int start_x = align_params.right_tab ? box.right() : box.left(); int start_y = box.bottom(); // Acceptable tab vectors must have a minimum number of points, // have a minimum acceptable length, and have a minimum gradient. // The gradient corresponds to the skew angle. // Ragged tabs don't need to satisfy the gradient condition, as they // will always end up parallel to the vertical direction. bool at_least_2_crossings = AtLeast2LineCrossings(&good_points); if ((pt_count >= align_params.min_points && end_y - start_y >= align_params.min_length && (align_params.ragged || end_y - start_y >= abs(end_x - start_x) * kMinTabGradient)) || at_least_2_crossings) { int confirmed_points = 0; // Count existing confirmed points to see if vector is acceptable. for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { bbox = it.data(); if (align_params.right_tab) { if (bbox->right_tab_type() == align_params.confirmed_type) ++confirmed_points; } else { if (bbox->left_tab_type() == align_params.confirmed_type) ++confirmed_points; } } // Ragged vectors are not allowed to use too many already used points. if (!align_params.ragged || confirmed_points + confirmed_points < pt_count) { const TBOX& box = bbox->bounding_box(); if (debug) { tprintf("Confirming tab vector of %d pts starting at %d,%d\n", pt_count, box.left(), box.bottom()); } // Flag all the aligned neighbours as confirmed . for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { bbox = it.data(); if (align_params.right_tab) { bbox->set_right_tab_type(align_params.confirmed_type); } else { bbox->set_left_tab_type(align_params.confirmed_type); } if (debug) { bbox->bounding_box().print(); } } // Now make the vector and return it. TabVector* result = TabVector::FitVector(align_params.alignment, align_params.vertical, ext_start_y, ext_end_y, &good_points, vertical_x, vertical_y); result->set_intersects_other_lines(at_least_2_crossings); if (debug) { tprintf("Box was %d, %d\n", box.left(), box.bottom()); result->Print("After fitting"); } return result; } else if (debug) { tprintf("Ragged tab used too many used points: %d out of %d\n", confirmed_points, pt_count); } } else if (debug) { tprintf("Tab vector failed basic tests: pt count %d vs min %d, " "length %d vs min %d, min grad %g\n", pt_count, align_params.min_points, end_y - start_y, align_params.min_length, abs(end_x - start_x) * kMinTabGradient); } return nullptr; }