Example #1
0
// Sets up displacement_modes_ with the top few modes of the perpendicular
// distance of each blob from the given direction vector, after rounding.
void BaselineRow::SetupBlobDisplacements(const FCOORD& direction) {
  // Set of perpendicular displacements of the blob bottoms from the required
  // baseline direction.
  GenericVector<double> perp_blob_dists;
  displacement_modes_.truncate(0);
  // Gather the skew-corrected position of every blob.
  double min_dist = MAX_FLOAT32;
  double max_dist = -MAX_FLOAT32;
  BLOBNBOX_IT blob_it(blobs_);
  bool debug = false;
  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
    BLOBNBOX* blob = blob_it.data();
    const TBOX& box = blob->bounding_box();
#ifdef kDebugYCoord
    if (box.bottom() < kDebugYCoord && box.top() > kDebugYCoord) debug = true;
#endif
    FCOORD blob_pos((box.left() + box.right()) / 2.0f,
                    blob->baseline_position());
    double offset = direction * blob_pos;
    perp_blob_dists.push_back(offset);
    if (debug) {
      tprintf("Displacement %g for blob at:", offset);
      box.print();
    }
    UpdateRange(offset, &min_dist, &max_dist);
  }
  // Set up a histogram using disp_quant_factor_ as the bucket size.
  STATS dist_stats(IntCastRounded(min_dist / disp_quant_factor_),
                   IntCastRounded(max_dist / disp_quant_factor_) + 1);
  for (int i = 0; i < perp_blob_dists.size(); ++i) {
    dist_stats.add(IntCastRounded(perp_blob_dists[i] / disp_quant_factor_), 1);
  }
  GenericVector<KDPairInc<float, int> > scaled_modes;
  dist_stats.top_n_modes(kMaxDisplacementsModes, &scaled_modes);
  if (debug) {
    for (int i = 0; i < scaled_modes.size(); ++i) {
      tprintf("Top mode = %g * %d\n",
              scaled_modes[i].key * disp_quant_factor_, scaled_modes[i].data);
    }
  }
  for (int i = 0; i < scaled_modes.size(); ++i)
    displacement_modes_.push_back(disp_quant_factor_ * scaled_modes[i].key);
}
Example #2
0
// Fits a straight baseline to the points. Returns true if it had enough
// points to be reasonably sure of the fitted baseline.
// If use_box_bottoms is false, baselines positions are formed by
// considering the outlines of the blobs.
bool BaselineRow::FitBaseline(bool use_box_bottoms) {
  // Deterministic fitting is used wherever possible.
  fitter_.Clear();
  // Linear least squares is a backup if the DetLineFit produces a bad line.
  LLSQ llsq;
  BLOBNBOX_IT blob_it(blobs_);

  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
    BLOBNBOX* blob = blob_it.data();
    if (!use_box_bottoms) blob->EstimateBaselinePosition();
    const TBOX& box = blob->bounding_box();
    int x_middle = (box.left() + box.right()) / 2;
#ifdef kDebugYCoord
    if (box.bottom() < kDebugYCoord && box.top() > kDebugYCoord) {
      tprintf("Box bottom = %d, baseline pos=%d for box at:",
              box.bottom(), blob->baseline_position());
      box.print();
    }
#endif
    fitter_.Add(ICOORD(x_middle, blob->baseline_position()), box.width() / 2);
    llsq.add(x_middle, blob->baseline_position());
  }
  // Fit the line.
  ICOORD pt1, pt2;
  baseline_error_ = fitter_.Fit(&pt1, &pt2);
  baseline_pt1_ = pt1;
  baseline_pt2_ = pt2;
  if (baseline_error_ > max_baseline_error_ &&
      fitter_.SufficientPointsForIndependentFit()) {
    // The fit was bad but there were plenty of points, so try skipping
    // the first and last few, and use the new line if it dramatically improves
    // the error of fit.
    double error = fitter_.Fit(kNumSkipPoints, kNumSkipPoints, &pt1, &pt2);
    if (error < baseline_error_ / 2.0) {
      baseline_error_ = error;
      baseline_pt1_ = pt1;
      baseline_pt2_ = pt2;
    }
  }
  int debug = 0;
#ifdef kDebugYCoord
  Print();
  debug = bounding_box_.bottom() < kDebugYCoord &&
      bounding_box_.top() > kDebugYCoord
            ? 3 : 2;
#endif
  // Now we obtained a direction from that fit, see if we can improve the
  // fit using the same direction and some other start point.
  FCOORD direction(pt2 - pt1);
  double target_offset = direction * pt1;
  good_baseline_ = false;
  FitConstrainedIfBetter(debug, direction, 0.0, target_offset);
  // Wild lines can be produced because DetLineFit allows vertical lines, but
  // vertical text has been rotated so angles over pi/4 should be disallowed.
  // Near vertical lines can still be produced by vertically aligned components
  // on very short lines.
  double angle = BaselineAngle();
  if (fabs(angle) > M_PI * 0.25) {
    // Use the llsq fit as a backup.
    baseline_pt1_ = llsq.mean_point();
    baseline_pt2_ = baseline_pt1_ + FCOORD(1.0f, llsq.m());
    // TODO(rays) get rid of this when m and c are no longer used.
    double m = llsq.m();
    double c = llsq.c(m);
    baseline_error_ = llsq.rms(m, c);
    good_baseline_ = false;
  }
  return good_baseline_;
}