Exemple #1
0
// Computes the center of mass and second moments for the old baseline and
// 2nd moment normalizations. Returns the outline length.
// The input denorm should be the normalizations that have been applied from
// the image to the current state of this TBLOB.
int TBLOB::ComputeMoments(FCOORD* center, FCOORD* second_moments) const {
  // Compute 1st and 2nd moments of the original outline.
  LLSQ accumulator;
  TBOX box = bounding_box();
  // Iterate the outlines, accumulating edges relative the box.botleft().
  CollectEdges(box, NULL, &accumulator, NULL, NULL);
  *center = accumulator.mean_point() + box.botleft();
  // The 2nd moments are just the standard deviation of the point positions.
  double x2nd = sqrt(accumulator.x_variance());
  double y2nd = sqrt(accumulator.y_variance());
  if (x2nd < 1.0) x2nd = 1.0;
  if (y2nd < 1.0) y2nd = 1.0;
  second_moments->set_x(x2nd);
  second_moments->set_y(y2nd);
  return accumulator.count();
}
Exemple #2
0
// Helper returns the mean direction vector from the given stats. Use the
// mean direction from dirs if there is information available, otherwise, use
// the fit_vector from point_diffs.
static FCOORD MeanDirectionVector(const LLSQ& point_diffs, const LLSQ& dirs,
                                  const FCOORD& start_pt,
                                  const FCOORD& end_pt) {
  FCOORD fit_vector;
  if (dirs.count() > 0) {
    // There were directions, so use them. To avoid wrap-around problems, we
    // have 2 accumulators in dirs: x for normal directions and y for
    // directions offset by 128. We will use the one with the least variance.
    FCOORD mean_pt = dirs.mean_point();
    double mean_dir = 0.0;
    if (dirs.x_variance() <= dirs.y_variance()) {
      mean_dir = mean_pt.x();
    } else {
      mean_dir = mean_pt.y() + 128;
    }
    fit_vector.from_direction(Modulo(IntCastRounded(mean_dir), 256));
  } else {
    // There were no directions, so we rely on the vector_fit to the points.
    // Since the vector_fit is 180 degrees ambiguous, we align with the
    // supplied feature_dir by making the scalar product non-negative.
    FCOORD feature_dir(end_pt - start_pt);
    fit_vector = point_diffs.vector_fit();
    if (fit_vector.x() == 0.0f && fit_vector.y() == 0.0f) {
      // There was only a single point. Use feature_dir directly.
      fit_vector = feature_dir;
    } else {
      // Sometimes the least mean squares fit is wrong, due to the small sample
      // of points and scaling. Use a 90 degree rotated vector if that matches
      // feature_dir better.
      FCOORD fit_vector2 = !fit_vector;
      // The fit_vector is 180 degrees ambiguous, so resolve the ambiguity by
      // insisting that the scalar product with the feature_dir should be +ve.
      if (fit_vector % feature_dir < 0.0)
        fit_vector = -fit_vector;
      if (fit_vector2 % feature_dir < 0.0)
        fit_vector2 = -fit_vector2;
      // Even though fit_vector2 has a higher mean squared error, it might be
      // a better fit, so use it if the dot product with feature_dir is bigger.
      if (fit_vector2 % feature_dir > fit_vector % feature_dir)
        fit_vector = fit_vector2;
    }
  }
  return fit_vector;
}
Exemple #3
0
// Fits a straight baseline to the points. Returns true if it had enough
// points to be reasonably sure of the fitted baseline.
// If use_box_bottoms is false, baselines positions are formed by
// considering the outlines of the blobs.
bool BaselineRow::FitBaseline(bool use_box_bottoms) {
  // Deterministic fitting is used wherever possible.
  fitter_.Clear();
  // Linear least squares is a backup if the DetLineFit produces a bad line.
  LLSQ llsq;
  BLOBNBOX_IT blob_it(blobs_);

  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
    BLOBNBOX* blob = blob_it.data();
    if (!use_box_bottoms) blob->EstimateBaselinePosition();
    const TBOX& box = blob->bounding_box();
    int x_middle = (box.left() + box.right()) / 2;
#ifdef kDebugYCoord
    if (box.bottom() < kDebugYCoord && box.top() > kDebugYCoord) {
      tprintf("Box bottom = %d, baseline pos=%d for box at:",
              box.bottom(), blob->baseline_position());
      box.print();
    }
#endif
    fitter_.Add(ICOORD(x_middle, blob->baseline_position()), box.width() / 2);
    llsq.add(x_middle, blob->baseline_position());
  }
  // Fit the line.
  ICOORD pt1, pt2;
  baseline_error_ = fitter_.Fit(&pt1, &pt2);
  baseline_pt1_ = pt1;
  baseline_pt2_ = pt2;
  if (baseline_error_ > max_baseline_error_ &&
      fitter_.SufficientPointsForIndependentFit()) {
    // The fit was bad but there were plenty of points, so try skipping
    // the first and last few, and use the new line if it dramatically improves
    // the error of fit.
    double error = fitter_.Fit(kNumSkipPoints, kNumSkipPoints, &pt1, &pt2);
    if (error < baseline_error_ / 2.0) {
      baseline_error_ = error;
      baseline_pt1_ = pt1;
      baseline_pt2_ = pt2;
    }
  }
  int debug = 0;
#ifdef kDebugYCoord
  Print();
  debug = bounding_box_.bottom() < kDebugYCoord &&
      bounding_box_.top() > kDebugYCoord
            ? 3 : 2;
#endif
  // Now we obtained a direction from that fit, see if we can improve the
  // fit using the same direction and some other start point.
  FCOORD direction(pt2 - pt1);
  double target_offset = direction * pt1;
  good_baseline_ = false;
  FitConstrainedIfBetter(debug, direction, 0.0, target_offset);
  // Wild lines can be produced because DetLineFit allows vertical lines, but
  // vertical text has been rotated so angles over pi/4 should be disallowed.
  // Near vertical lines can still be produced by vertically aligned components
  // on very short lines.
  double angle = BaselineAngle();
  if (fabs(angle) > M_PI * 0.25) {
    // Use the llsq fit as a backup.
    baseline_pt1_ = llsq.mean_point();
    baseline_pt2_ = baseline_pt1_ + FCOORD(1.0f, llsq.m());
    // TODO(rays) get rid of this when m and c are no longer used.
    double m = llsq.m();
    double c = llsq.c(m);
    baseline_error_ = llsq.rms(m, c);
    good_baseline_ = false;
  }
  return good_baseline_;
}