// Finds a vector corresponding to a set of vertically aligned blob edges
// running through the given box. The type of vector returned and the
// search parameters are determined by the AlignedBlobParams.
// vertical_x and y are updated with an estimate of the real
// vertical direction. (skew finding.)
// Returns NULL if no decent vector can be found.
TabVector* AlignedBlob::FindVerticalAlignment(AlignedBlobParams align_params,
                                              BLOBNBOX* bbox,
                                              int* vertical_x,
                                              int* vertical_y) {
  int ext_start_y, ext_end_y;
  BLOBNBOX_CLIST good_points;
  // Search up and then down from the starting bbox.
  int pt_count = AlignTabs(align_params, false, bbox, &good_points, &ext_end_y);
  pt_count += AlignTabs(align_params, true, bbox, &good_points, &ext_start_y);
  BLOBNBOX_C_IT it(&good_points);
  it.move_to_last();
  int end_y = it.data()->bounding_box().top();
  it.move_to_first();
  int start_y = it.data()->bounding_box().bottom();
  if (pt_count >= align_params.min_points &&
      end_y - start_y >= align_params.min_length) {
    int confirmed_points = 0;
    // Count existing confirmed points to see if vector is acceptable.
    for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
      bbox = it.data();
      if (align_params.right_tab) {
        if (bbox->right_tab_type() == align_params.confirmed_type)
          ++confirmed_points;
      } else {
        if (bbox->left_tab_type() == align_params.confirmed_type)
          ++confirmed_points;
      }
    }
    // Ragged vectors are not allowed to use too many already used points.
    if (!align_params.ragged ||
        confirmed_points + confirmed_points < pt_count) {
      const TBOX& box = bbox->bounding_box();
      if (WithinTestRegion(2, box.left(), box.bottom())) {
        tprintf("Confirming tab vector of %d pts starting at %d,%d\n",
                pt_count, box.left(), box.bottom());
      }
      // Flag all the aligned neighbours as confirmed .
      for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
        bbox = it.data();
        if (align_params.right_tab) {
          bbox->set_right_tab_type(align_params.confirmed_type);
        } else {
          bbox->set_left_tab_type(align_params.confirmed_type);
        }
      }
      // Now make the vector and return it.
      TabVector* result = TabVector::FitVector(align_params.alignment,
                                               align_params.vertical,
                                               ext_start_y, ext_end_y,
                                               &good_points,
                                               vertical_x, vertical_y);
      if (WithinTestRegion(2, box.left(), box.bottom())) {
        result->Print("After fitting");
      }
      return result;
    }
  }
  return NULL;
}
// Return the partner of this TabVector if the vector qualifies as
// being a vertical text line, otherwise NULL.
TabVector* TabVector::VerticalTextlinePartner() {
  if (!partners_.singleton())
    return NULL;
  TabVector_C_IT partner_it(&partners_);
  TabVector* partner = partner_it.data();
  BLOBNBOX_C_IT box_it1(&boxes_);
  BLOBNBOX_C_IT box_it2(&partner->boxes_);
  // Count how many boxes are also in the other list.
  // At the same time, gather the mean width and median vertical gap.
  if (textord_debug_tabfind > 1) {
    Print("Testing for vertical text");
    partner->Print("           partner");
  }
  int num_matched = 0;
  int num_unmatched = 0;
  int total_widths = 0;
  int width = startpt().x() - partner->startpt().x();
  if (width < 0)
    width = -width;
  STATS gaps(0, width * 2);
  BLOBNBOX* prev_bbox = NULL;
  box_it2.mark_cycle_pt();
  for (box_it1.mark_cycle_pt(); !box_it1.cycled_list(); box_it1.forward()) {
    BLOBNBOX* bbox = box_it1.data();
    TBOX box = bbox->bounding_box();
    if (prev_bbox != NULL) {
      gaps.add(box.bottom() - prev_bbox->bounding_box().top(), 1);
    }
    while (!box_it2.cycled_list() && box_it2.data() != bbox &&
           box_it2.data()->bounding_box().bottom() < box.bottom()) {
      box_it2.forward();
    }
    if (!box_it2.cycled_list() && box_it2.data() == bbox &&
        bbox->region_type() >= BRT_UNKNOWN &&
        (prev_bbox == NULL || prev_bbox->region_type() >= BRT_UNKNOWN))
      ++num_matched;
    else
      ++num_unmatched;
    total_widths += box.width();
    prev_bbox = bbox;
  }
  if (num_unmatched + num_matched == 0) return NULL;
  double avg_width = total_widths * 1.0 / (num_unmatched + num_matched);
  double max_gap = textord_tabvector_vertical_gap_fraction * avg_width;
  int min_box_match = static_cast<int>((num_matched + num_unmatched) *
                                       textord_tabvector_vertical_box_ratio);
  bool is_vertical = (gaps.get_total() > 0 &&
                      num_matched >= min_box_match &&
                      gaps.median() <= max_gap);
  if (textord_debug_tabfind > 1) {
    tprintf("gaps=%d, matched=%d, unmatched=%d, min_match=%d "
            "median gap=%.2f, width=%.2f max_gap=%.2f Vertical=%s\n",
            gaps.get_total(), num_matched, num_unmatched, min_box_match,
            gaps.median(), avg_width, max_gap, is_vertical?"Yes":"No");
  }
  return (is_vertical) ? partner : NULL;
}
Exemple #3
0
// Finds vertical lines in the given list of BLOBNBOXes. bleft and tright
// are the bounds of the image on which the input line_bblobs were found.
// The input line_bblobs list is const really.
// The output vertical_x and vertical_y are the total of all the vectors.
// The output list of TabVector makes no reference to the input BLOBNBOXes.
void LineFinder::FindLineVectors(const ICOORD& bleft, const ICOORD& tright,
                                 BLOBNBOX_LIST* line_bblobs,
                                 int* vertical_x, int* vertical_y,
                                 TabVector_LIST* vectors) {
  BLOBNBOX_IT bbox_it(line_bblobs);
  int b_count = 0;
  // Put all the blobs into the grid to find the lines, and move the blobs
  // to the output lists.
  AlignedBlob blob_grid(kLineFindGridSize, bleft, tright);
  for (bbox_it.mark_cycle_pt(); !bbox_it.cycled_list(); bbox_it.forward()) {
    BLOBNBOX* bblob = bbox_it.data();
    bblob->set_left_tab_type(TT_UNCONFIRMED);
    bblob->set_left_rule(bleft.x());
    bblob->set_right_rule(tright.x());
    bblob->set_left_crossing_rule(bleft.x());
    bblob->set_right_crossing_rule(tright.x());
    blob_grid.InsertBBox(false, true, bblob);
    ++b_count;
  }
  if (textord_debug_tabfind)
    tprintf("Inserted %d line blobs into grid\n", b_count);
  if (b_count == 0)
    return;

  // Search the entire grid, looking for vertical line vectors.
  GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> lsearch(&blob_grid);
  BLOBNBOX* bbox;
  TabVector_IT vector_it(vectors);
  *vertical_x = 0;
  *vertical_y = 1;
  lsearch.StartFullSearch();
  while ((bbox = lsearch.NextFullSearch()) != NULL) {
    if (bbox->left_tab_type() == TT_UNCONFIRMED) {
      const TBOX& box = bbox->bounding_box();
      if (AlignedBlob::WithinTestRegion(2, box.left(), box.bottom()))
        tprintf("Finding line vector starting at bbox (%d,%d)\n",
                box.left(), box.bottom());
      AlignedBlobParams align_params(*vertical_x, *vertical_y, box.width());
      TabVector* vector = blob_grid.FindVerticalAlignment(align_params, bbox,
                                                          vertical_x,
                                                          vertical_y);
      if (vector != NULL) {
        vector->Freeze();
        vector_it.add_to_end(vector);
      }
    }
  }
  ScrollView* line_win = NULL;
  if (textord_tabfind_show_vlines) {
    line_win = blob_grid.MakeWindow(0, 50, "Vlines");
    blob_grid.DisplayBoxes(line_win);
    line_win = blob_grid.DisplayTabs("Vlines", line_win);
  }
}
// Public factory to build a TabVector from a list of boxes.
// The TabVector will be of the given alignment type.
// The input vertical vector is used in fitting, and the output
// vertical_x, vertical_y have the resulting line vector added to them
// if the alignment is not ragged.
// The extended_start_y and extended_end_y are the maximum possible
// extension to the line segment that can be used to align with others.
// The input CLIST of BLOBNBOX good_points is consumed and taken over.
TabVector* TabVector::FitVector(TabAlignment alignment, ICOORD vertical,
                                int  extended_start_y, int extended_end_y,
                                BLOBNBOX_CLIST* good_points,
                                int* vertical_x, int* vertical_y) {
  TabVector* vector = new TabVector(extended_start_y, extended_end_y,
                                    alignment, good_points);
  if (!vector->Fit(vertical, false)) {
    delete vector;
    return NULL;
  }
  if (!vector->IsRagged()) {
    vertical = vector->endpt_ - vector->startpt_;
    int weight = vector->BoxCount();
    *vertical_x += vertical.x() * weight;
    *vertical_y += vertical.y() * weight;
  }
  return vector;
}
// Set all the tops and bottoms as appropriate to a mean of the
// constrained range. Delete all the constraints and list.
void TabConstraint::ApplyConstraints(TabConstraint_LIST* constraints) {
  int y_min = -MAX_INT32;
  int y_max = MAX_INT32;
  GetConstraints(constraints, &y_min, &y_max);
  int y = (y_min + y_max) / 2;
  TabConstraint_IT it(constraints);
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
    TabConstraint* constraint = it.data();
    TabVector* v = constraint->vector_;
    if (constraint->is_top_) {
      v->SetYEnd(y);
      v->set_top_constraints(NULL);
    } else {
      v->SetYStart(y);
      v->set_bottom_constraints(NULL);
    }
  }
  delete constraints;
}
// Setup the constraints between the partners of this TabVector.
void TabVector::SetupPartnerConstraints() {
  // With the first and last partner, we want a common bottom and top,
  // respectively, and for each change of partner, we want a common
  // top of first with bottom of next.
  TabVector_C_IT it(&partners_);
  TabVector* prev_partner = NULL;
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
    TabVector* partner = it.data();
    if (partner->top_constraints_ == NULL ||
        partner->bottom_constraints_ == NULL) {
      partner->Print("Impossible: has no constraints");
      Print("This vector has it as a partner");
      continue;
    }
    if (prev_partner == NULL) {
      // This is the first partner, so common bottom.
      if (TabConstraint::CompatibleConstraints(bottom_constraints_,
                                               partner->bottom_constraints_))
        TabConstraint::MergeConstraints(bottom_constraints_,
                                        partner->bottom_constraints_);
    } else {
      // We need prev top to be common with partner bottom.
      if (TabConstraint::CompatibleConstraints(prev_partner->top_constraints_,
                                               partner->bottom_constraints_))
        TabConstraint::MergeConstraints(prev_partner->top_constraints_,
                                        partner->bottom_constraints_);
    }
    prev_partner = partner;
    if (it.at_last()) {
      // This is the last partner, so common top.
      if (TabConstraint::CompatibleConstraints(top_constraints_,
                                               partner->top_constraints_))
        TabConstraint::MergeConstraints(top_constraints_,
                                        partner->top_constraints_);
    }
  }
}
// Return true if this vector is the same side, overlaps, and close
// enough to the other to be merged.
bool TabVector::SimilarTo(const ICOORD& vertical,
                          const TabVector& other, BlobGrid* grid) const {
  if ((IsRightTab() && other.IsRightTab()) ||
      (IsLeftTab() && other.IsLeftTab())) {
    // If they don't overlap, at least in extensions, then there is no chance.
    if (ExtendedOverlap(other.extended_ymax_, other.extended_ymin_) < 0)
      return false;
    // A fast approximation to the scale factor of the sort_key_.
    int v_scale = abs(vertical.y());
    if (v_scale == 0)
      v_scale = 1;
    // If they are close enough, then OK.
    if (sort_key_ + kSimilarVectorDist * v_scale >= other.sort_key_ &&
        sort_key_ - kSimilarVectorDist * v_scale <= other.sort_key_)
      return true;
    // Ragged tabs get a bigger threshold.
    if (!IsRagged() || !other.IsRagged() ||
        sort_key_ + kSimilarRaggedDist * v_scale < other.sort_key_ ||
        sort_key_ - kSimilarRaggedDist * v_scale > other.sort_key_)
      return false;
    if (grid == NULL) {
      // There is nothing else to test!
      return true;
    }
    // If there is nothing in the rectangle between the vector that is going to
    // move, and the place it is moving to, then they can be merged.
    // Setup a vertical search for any blob.
    const TabVector* mover = (IsRightTab() &&
       sort_key_ < other.sort_key_) ? this : &other;
    int top_y = mover->endpt_.y();
    int bottom_y = mover->startpt_.y();
    int left = MIN(mover->XAtY(top_y), mover->XAtY(bottom_y));
    int right = MAX(mover->XAtY(top_y), mover->XAtY(bottom_y));
    int shift = abs(sort_key_ - other.sort_key_) / v_scale;
    if (IsRightTab()) {
      right += shift;
    } else {
      left -= shift;
    }

    GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> vsearch(grid);
    vsearch.StartVerticalSearch(left, right, top_y);
    BLOBNBOX* blob;
    while ((blob = vsearch.NextVerticalSearch(true)) != NULL) {
      TBOX box = blob->bounding_box();
      if (box.top() > bottom_y)
        return true;  // Nothing found.
      if (box.bottom() < top_y)
        continue;  // Doesn't overlap.
      int left_at_box = XAtY(box.bottom());
      int right_at_box = left_at_box;
      if (IsRightTab())
        right_at_box += shift;
      else
        left_at_box -= shift;
      if (MIN(right_at_box, box.right()) > MAX(left_at_box, box.left()))
        return false;
    }
    return true;  // Nothing found.
  }
  return false;
}
// Finds a vector corresponding to a set of vertically aligned blob edges
// running through the given box. The type of vector returned and the
// search parameters are determined by the AlignedBlobParams.
// vertical_x and y are updated with an estimate of the real
// vertical direction. (skew finding.)
// Returns nullptr if no decent vector can be found.
TabVector* AlignedBlob::FindVerticalAlignment(AlignedBlobParams align_params,
                                              BLOBNBOX* bbox,
                                              int* vertical_x,
                                              int* vertical_y) {
  int ext_start_y, ext_end_y;
  BLOBNBOX_CLIST good_points;
  // Search up and then down from the starting bbox.
  TBOX box = bbox->bounding_box();
  bool debug = WithinTestRegion(2, box.left(), box.bottom());
  int pt_count = AlignTabs(align_params, false, bbox, &good_points, &ext_end_y);
  pt_count += AlignTabs(align_params, true, bbox, &good_points, &ext_start_y);
  BLOBNBOX_C_IT it(&good_points);
  it.move_to_last();
  box = it.data()->bounding_box();
  int end_y = box.top();
  int end_x = align_params.right_tab ? box.right() : box.left();
  it.move_to_first();
  box = it.data()->bounding_box();
  int start_x = align_params.right_tab ? box.right() : box.left();
  int start_y = box.bottom();
  // Acceptable tab vectors must have a minimum number of points,
  // have a minimum acceptable length, and have a minimum gradient.
  // The gradient corresponds to the skew angle.
  // Ragged tabs don't need to satisfy the gradient condition, as they
  // will always end up parallel to the vertical direction.
  bool at_least_2_crossings = AtLeast2LineCrossings(&good_points);
  if ((pt_count >= align_params.min_points &&
      end_y - start_y >= align_params.min_length &&
      (align_params.ragged ||
          end_y - start_y >= abs(end_x - start_x) * kMinTabGradient)) ||
      at_least_2_crossings) {
    int confirmed_points = 0;
    // Count existing confirmed points to see if vector is acceptable.
    for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
      bbox = it.data();
      if (align_params.right_tab) {
        if (bbox->right_tab_type() == align_params.confirmed_type)
          ++confirmed_points;
      } else {
        if (bbox->left_tab_type() == align_params.confirmed_type)
          ++confirmed_points;
      }
    }
    // Ragged vectors are not allowed to use too many already used points.
    if (!align_params.ragged ||
        confirmed_points + confirmed_points < pt_count) {
      const TBOX& box = bbox->bounding_box();
      if (debug) {
        tprintf("Confirming tab vector of %d pts starting at %d,%d\n",
                pt_count, box.left(), box.bottom());
      }
      // Flag all the aligned neighbours as confirmed .
      for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
        bbox = it.data();
        if (align_params.right_tab) {
          bbox->set_right_tab_type(align_params.confirmed_type);
        } else {
          bbox->set_left_tab_type(align_params.confirmed_type);
        }
        if (debug) {
          bbox->bounding_box().print();
        }
      }
      // Now make the vector and return it.
      TabVector* result = TabVector::FitVector(align_params.alignment,
                                               align_params.vertical,
                                               ext_start_y, ext_end_y,
                                               &good_points,
                                               vertical_x, vertical_y);
      result->set_intersects_other_lines(at_least_2_crossings);
      if (debug) {
        tprintf("Box was %d, %d\n", box.left(), box.bottom());
        result->Print("After fitting");
      }
      return result;
    } else if (debug) {
      tprintf("Ragged tab used too many used points: %d out of %d\n",
              confirmed_points, pt_count);
    }
  } else if (debug) {
    tprintf("Tab vector failed basic tests: pt count %d vs min %d, "
            "length %d vs min %d, min grad %g\n",
            pt_count, align_params.min_points, end_y - start_y,
            align_params.min_length, abs(end_x - start_x) * kMinTabGradient);
  }
  return nullptr;
}