示例#1
0
// Finds a vector corresponding to a set of vertically aligned blob edges
// running through the given box. The type of vector returned and the
// search parameters are determined by the AlignedBlobParams.
// vertical_x and y are updated with an estimate of the real
// vertical direction. (skew finding.)
// Returns NULL if no decent vector can be found.
TabVector* AlignedBlob::FindVerticalAlignment(AlignedBlobParams align_params,
                                              BLOBNBOX* bbox,
                                              int* vertical_x,
                                              int* vertical_y) {
  int ext_start_y, ext_end_y;
  BLOBNBOX_CLIST good_points;
  // Search up and then down from the starting bbox.
  int pt_count = AlignTabs(align_params, false, bbox, &good_points, &ext_end_y);
  pt_count += AlignTabs(align_params, true, bbox, &good_points, &ext_start_y);
  BLOBNBOX_C_IT it(&good_points);
  it.move_to_last();
  int end_y = it.data()->bounding_box().top();
  it.move_to_first();
  int start_y = it.data()->bounding_box().bottom();
  if (pt_count >= align_params.min_points &&
      end_y - start_y >= align_params.min_length) {
    int confirmed_points = 0;
    // Count existing confirmed points to see if vector is acceptable.
    for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
      bbox = it.data();
      if (align_params.right_tab) {
        if (bbox->right_tab_type() == align_params.confirmed_type)
          ++confirmed_points;
      } else {
        if (bbox->left_tab_type() == align_params.confirmed_type)
          ++confirmed_points;
      }
    }
    // Ragged vectors are not allowed to use too many already used points.
    if (!align_params.ragged ||
        confirmed_points + confirmed_points < pt_count) {
      const TBOX& box = bbox->bounding_box();
      if (WithinTestRegion(2, box.left(), box.bottom())) {
        tprintf("Confirming tab vector of %d pts starting at %d,%d\n",
                pt_count, box.left(), box.bottom());
      }
      // Flag all the aligned neighbours as confirmed .
      for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
        bbox = it.data();
        if (align_params.right_tab) {
          bbox->set_right_tab_type(align_params.confirmed_type);
        } else {
          bbox->set_left_tab_type(align_params.confirmed_type);
        }
      }
      // Now make the vector and return it.
      TabVector* result = TabVector::FitVector(align_params.alignment,
                                               align_params.vertical,
                                               ext_start_y, ext_end_y,
                                               &good_points,
                                               vertical_x, vertical_y);
      if (WithinTestRegion(2, box.left(), box.bottom())) {
        result->Print("After fitting");
      }
      return result;
    }
  }
  return NULL;
}
示例#2
0
// Find a set of blobs that are aligned in the given vertical
// direction with the given blob. Returns a list of aligned
// blobs and the number in the list.
// For other parameters see FindAlignedBlob below.
int AlignedBlob::AlignTabs(const AlignedBlobParams& params,
                           bool top_to_bottom, BLOBNBOX* bbox,
                           BLOBNBOX_CLIST* good_points, int* end_y) {
  int ptcount = 0;
  BLOBNBOX_C_IT it(good_points);

  TBOX box = bbox->bounding_box();
  bool debug = WithinTestRegion(2, box.left(), box.bottom());
  if (debug) {
    tprintf("Starting alignment run at blob:");
    box.print();
  }
  int x_start = params.right_tab ? box.right() : box.left();
  while (bbox != nullptr) {
    // Add the blob to the list if the appropriate side is a tab candidate,
    // or if we are working on a ragged tab.
    TabType type = params.right_tab ? bbox->right_tab_type()
                                    : bbox->left_tab_type();
    if (((type != TT_NONE && type != TT_MAYBE_RAGGED) || params.ragged) &&
        (it.empty() || it.data() != bbox)) {
      if (top_to_bottom)
        it.add_before_then_move(bbox);
      else
        it.add_after_then_move(bbox);
      ++ptcount;
    }
    // Find the next blob that is aligned with the current one.
    // FindAlignedBlob guarantees that forward progress will be made in the
    // top_to_bottom direction, and therefore eventually it will return nullptr,
    // making this while (bbox != nullptr) loop safe.
    bbox = FindAlignedBlob(params, top_to_bottom, bbox, x_start, end_y);
    if (bbox != nullptr) {
      box = bbox->bounding_box();
      if (!params.ragged)
        x_start = params.right_tab ? box.right() : box.left();
    }
  }
  if (debug) {
    tprintf("Alignment run ended with %d pts at blob:", ptcount);
    box.print();
  }
  return ptcount;
}
示例#3
0
// Search vertically for a blob that is aligned with the input bbox.
// The search parameters are determined by AlignedBlobParams.
// top_to_bottom tells whether to search down or up.
// The return value is nullptr if nothing was found in the search box
// or if a blob was found in the gutter. On a nullptr return, end_y
// is set to the edge of the search box or the leading edge of the
// gutter blob if one was found.
BLOBNBOX* AlignedBlob::FindAlignedBlob(const AlignedBlobParams& p,
                                       bool top_to_bottom, BLOBNBOX* bbox,
                                       int x_start, int* end_y) {
  TBOX box = bbox->bounding_box();
  // If there are separator lines, get the column edges.
  int left_column_edge = bbox->left_rule();
  int right_column_edge = bbox->right_rule();
  // start_y is used to guarantee that forward progress is made and the
  // search does not go into an infinite loop. New blobs must extend the
  // line beyond start_y.
  int start_y = top_to_bottom ? box.bottom() : box.top();
  if (WithinTestRegion(2, x_start, start_y)) {
    tprintf("Column edges for blob at (%d,%d)->(%d,%d) are [%d, %d]\n",
            box.left(), box.top(), box.right(), box.bottom(),
            left_column_edge, right_column_edge);
  }
  // Compute skew tolerance.
  int skew_tolerance = p.max_v_gap / kMaxSkewFactor;
  // Calculate xmin and xmax of the search box so that it contains
  // all possibly relevant boxes up to p.max_v_gap above or below accoording
  // to top_to_bottom.
  // Start with a notion of vertical with the current estimate.
  int x2 = (p.max_v_gap * p.vertical.x() + p.vertical.y()/2) / p.vertical.y();
  if (top_to_bottom) {
    x2 = x_start - x2;
    *end_y = start_y - p.max_v_gap;
  } else {
    x2 = x_start + x2;
    *end_y = start_y + p.max_v_gap;
  }
  // Expand the box by an additional skew tolerance
  int xmin = std::min(x_start, x2) - skew_tolerance;
  int xmax = std::max(x_start, x2) + skew_tolerance;
  // Now add direction-specific tolerances.
  if (p.right_tab) {
    xmax += p.min_gutter;
    xmin -= p.l_align_tolerance;
  } else {
    xmax += p.r_align_tolerance;
    xmin -= p.min_gutter;
  }
  // Setup a vertical search for an aligned blob.
  GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> vsearch(this);
  if (WithinTestRegion(2, x_start, start_y))
    tprintf("Starting %s %s search at %d-%d,%d, search_size=%d, gutter=%d\n",
            p.ragged ? "Ragged" : "Aligned", p.right_tab ? "Right" : "Left",
            xmin, xmax, start_y, p.max_v_gap, p.min_gutter);
  vsearch.StartVerticalSearch(xmin, xmax, start_y);
  // result stores the best real return value.
  BLOBNBOX* result = nullptr;
  // The backup_result is not a tab candidate and can be used if no
  // real tab candidate result is found.
  BLOBNBOX* backup_result = nullptr;
  // neighbour is the blob that is currently being investigated.
  BLOBNBOX* neighbour = nullptr;
  while ((neighbour = vsearch.NextVerticalSearch(top_to_bottom)) != nullptr) {
    if (neighbour == bbox)
      continue;
    TBOX nbox = neighbour->bounding_box();
    int n_y = (nbox.top() + nbox.bottom()) / 2;
    if ((!top_to_bottom && n_y > start_y + p.max_v_gap) ||
        (top_to_bottom && n_y < start_y - p.max_v_gap)) {
      if (WithinTestRegion(2, x_start, start_y))
        tprintf("Neighbour too far at (%d,%d)->(%d,%d)\n",
                nbox.left(), nbox.bottom(), nbox.right(), nbox.top());
      break;  // Gone far enough.
    }
    // It is CRITICAL to ensure that forward progress is made, (strictly
    // in/decreasing n_y) or the caller could loop infinitely, while
    // waiting for a sequence of blobs in a line to end.
    // NextVerticalSearch alone does not guarantee this, as there may be
    // more than one blob in a grid cell. See comment in AlignTabs.
    if ((n_y < start_y) != top_to_bottom || nbox.y_overlap(box))
      continue;  // Only look in the required direction.
    if (result != nullptr && result->bounding_box().y_gap(nbox) > gridsize())
      return result;  // This result is clear.
    if (backup_result != nullptr && p.ragged && result == nullptr &&
        backup_result->bounding_box().y_gap(nbox) > gridsize())
      return backup_result;  // This result is clear.

    // If the neighbouring blob is the wrong side of a separator line, then it
    // "doesn't exist" as far as we are concerned.
    int x_at_n_y = x_start + (n_y - start_y) * p.vertical.x() / p.vertical.y();
    if (x_at_n_y < neighbour->left_crossing_rule() ||
        x_at_n_y > neighbour->right_crossing_rule())
      continue;  // Separator line in the way.
    int n_left = nbox.left();
    int n_right = nbox.right();
    int n_x = p.right_tab ? n_right : n_left;
    if (WithinTestRegion(2, x_start, start_y))
      tprintf("neighbour at (%d,%d)->(%d,%d), n_x=%d, n_y=%d, xatn=%d\n",
              nbox.left(), nbox.bottom(), nbox.right(), nbox.top(),
              n_x, n_y, x_at_n_y);
    if (p.right_tab &&
        n_left < x_at_n_y + p.min_gutter &&
        n_right > x_at_n_y + p.r_align_tolerance &&
        (p.ragged || n_left < x_at_n_y + p.gutter_fraction * nbox.height())) {
      // In the gutter so end of line.
      if (bbox->right_tab_type() >= TT_MAYBE_ALIGNED)
        bbox->set_right_tab_type(TT_DELETED);
      *end_y = top_to_bottom ? nbox.top() : nbox.bottom();
      if (WithinTestRegion(2, x_start, start_y))
        tprintf("gutter\n");
      return nullptr;
    }
    if (!p.right_tab &&
        n_left < x_at_n_y - p.l_align_tolerance &&
        n_right > x_at_n_y - p.min_gutter &&
        (p.ragged || n_right > x_at_n_y - p.gutter_fraction * nbox.height())) {
      // In the gutter so end of line.
      if (bbox->left_tab_type() >= TT_MAYBE_ALIGNED)
        bbox->set_left_tab_type(TT_DELETED);
      *end_y = top_to_bottom ? nbox.top() : nbox.bottom();
      if (WithinTestRegion(2, x_start, start_y))
        tprintf("gutter\n");
      return nullptr;
    }
    if ((p.right_tab && neighbour->leader_on_right()) ||
        (!p.right_tab && neighbour->leader_on_left()))
      continue;  // Neighbours of leaders are not allowed to be used.
    if (n_x <= x_at_n_y + p.r_align_tolerance &&
        n_x >= x_at_n_y - p.l_align_tolerance) {
      // Aligned so keep it. If it is a marked tab save it as result,
      // otherwise keep it as backup_result to return in case of later failure.
      if (WithinTestRegion(2, x_start, start_y))
        tprintf("aligned, seeking%d, l=%d, r=%d\n",
                p.right_tab, neighbour->left_tab_type(),
                neighbour->right_tab_type());
      TabType n_type = p.right_tab ? neighbour->right_tab_type()
                                   : neighbour->left_tab_type();
      if (n_type != TT_NONE && (p.ragged || n_type != TT_MAYBE_RAGGED)) {
        if (result == nullptr) {
          result = neighbour;
        } else {
          // Keep the closest neighbour by Euclidean distance.
          // This prevents it from picking a tab blob in another column.
          const TBOX& old_box = result->bounding_box();
          int x_diff = p.right_tab ? old_box.right() : old_box.left();
          x_diff -= x_at_n_y;
          int y_diff = (old_box.top() + old_box.bottom()) / 2 - start_y;
          int old_dist = x_diff * x_diff + y_diff * y_diff;
          x_diff = n_x - x_at_n_y;
          y_diff = n_y - start_y;
          int new_dist = x_diff * x_diff + y_diff * y_diff;
          if (new_dist < old_dist)
            result = neighbour;
        }
      } else if (backup_result == nullptr) {
        if (WithinTestRegion(2, x_start, start_y))
          tprintf("Backup\n");
        backup_result = neighbour;
      } else {
        TBOX backup_box = backup_result->bounding_box();
        if ((p.right_tab && backup_box.right() < nbox.right()) ||
            (!p.right_tab && backup_box.left() > nbox.left())) {
          if (WithinTestRegion(2, x_start, start_y))
            tprintf("Better backup\n");
          backup_result = neighbour;
        }
      }
    }
  }
  return result != nullptr ? result : backup_result;
}
示例#4
0
// Finds a vector corresponding to a set of vertically aligned blob edges
// running through the given box. The type of vector returned and the
// search parameters are determined by the AlignedBlobParams.
// vertical_x and y are updated with an estimate of the real
// vertical direction. (skew finding.)
// Returns nullptr if no decent vector can be found.
TabVector* AlignedBlob::FindVerticalAlignment(AlignedBlobParams align_params,
                                              BLOBNBOX* bbox,
                                              int* vertical_x,
                                              int* vertical_y) {
  int ext_start_y, ext_end_y;
  BLOBNBOX_CLIST good_points;
  // Search up and then down from the starting bbox.
  TBOX box = bbox->bounding_box();
  bool debug = WithinTestRegion(2, box.left(), box.bottom());
  int pt_count = AlignTabs(align_params, false, bbox, &good_points, &ext_end_y);
  pt_count += AlignTabs(align_params, true, bbox, &good_points, &ext_start_y);
  BLOBNBOX_C_IT it(&good_points);
  it.move_to_last();
  box = it.data()->bounding_box();
  int end_y = box.top();
  int end_x = align_params.right_tab ? box.right() : box.left();
  it.move_to_first();
  box = it.data()->bounding_box();
  int start_x = align_params.right_tab ? box.right() : box.left();
  int start_y = box.bottom();
  // Acceptable tab vectors must have a minimum number of points,
  // have a minimum acceptable length, and have a minimum gradient.
  // The gradient corresponds to the skew angle.
  // Ragged tabs don't need to satisfy the gradient condition, as they
  // will always end up parallel to the vertical direction.
  bool at_least_2_crossings = AtLeast2LineCrossings(&good_points);
  if ((pt_count >= align_params.min_points &&
      end_y - start_y >= align_params.min_length &&
      (align_params.ragged ||
          end_y - start_y >= abs(end_x - start_x) * kMinTabGradient)) ||
      at_least_2_crossings) {
    int confirmed_points = 0;
    // Count existing confirmed points to see if vector is acceptable.
    for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
      bbox = it.data();
      if (align_params.right_tab) {
        if (bbox->right_tab_type() == align_params.confirmed_type)
          ++confirmed_points;
      } else {
        if (bbox->left_tab_type() == align_params.confirmed_type)
          ++confirmed_points;
      }
    }
    // Ragged vectors are not allowed to use too many already used points.
    if (!align_params.ragged ||
        confirmed_points + confirmed_points < pt_count) {
      const TBOX& box = bbox->bounding_box();
      if (debug) {
        tprintf("Confirming tab vector of %d pts starting at %d,%d\n",
                pt_count, box.left(), box.bottom());
      }
      // Flag all the aligned neighbours as confirmed .
      for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
        bbox = it.data();
        if (align_params.right_tab) {
          bbox->set_right_tab_type(align_params.confirmed_type);
        } else {
          bbox->set_left_tab_type(align_params.confirmed_type);
        }
        if (debug) {
          bbox->bounding_box().print();
        }
      }
      // Now make the vector and return it.
      TabVector* result = TabVector::FitVector(align_params.alignment,
                                               align_params.vertical,
                                               ext_start_y, ext_end_y,
                                               &good_points,
                                               vertical_x, vertical_y);
      result->set_intersects_other_lines(at_least_2_crossings);
      if (debug) {
        tprintf("Box was %d, %d\n", box.left(), box.bottom());
        result->Print("After fitting");
      }
      return result;
    } else if (debug) {
      tprintf("Ragged tab used too many used points: %d out of %d\n",
              confirmed_points, pt_count);
    }
  } else if (debug) {
    tprintf("Tab vector failed basic tests: pt count %d vs min %d, "
            "length %d vs min %d, min grad %g\n",
            pt_count, align_params.min_points, end_y - start_y,
            align_params.min_length, abs(end_x - start_x) * kMinTabGradient);
  }
  return nullptr;
}