// Computes the center of mass and second moments for the old baseline and // 2nd moment normalizations. Returns the outline length. // The input denorm should be the normalizations that have been applied from // the image to the current state of this TBLOB. int TBLOB::ComputeMoments(FCOORD* center, FCOORD* second_moments) const { // Compute 1st and 2nd moments of the original outline. LLSQ accumulator; TBOX box = bounding_box(); // Iterate the outlines, accumulating edges relative the box.botleft(). CollectEdges(box, NULL, &accumulator, NULL, NULL); *center = accumulator.mean_point() + box.botleft(); // The 2nd moments are just the standard deviation of the point positions. double x2nd = sqrt(accumulator.x_variance()); double y2nd = sqrt(accumulator.y_variance()); if (x2nd < 1.0) x2nd = 1.0; if (y2nd < 1.0) y2nd = 1.0; second_moments->set_x(x2nd); second_moments->set_y(y2nd); return accumulator.count(); }
// Helper returns the mean direction vector from the given stats. Use the // mean direction from dirs if there is information available, otherwise, use // the fit_vector from point_diffs. static FCOORD MeanDirectionVector(const LLSQ& point_diffs, const LLSQ& dirs, const FCOORD& start_pt, const FCOORD& end_pt) { FCOORD fit_vector; if (dirs.count() > 0) { // There were directions, so use them. To avoid wrap-around problems, we // have 2 accumulators in dirs: x for normal directions and y for // directions offset by 128. We will use the one with the least variance. FCOORD mean_pt = dirs.mean_point(); double mean_dir = 0.0; if (dirs.x_variance() <= dirs.y_variance()) { mean_dir = mean_pt.x(); } else { mean_dir = mean_pt.y() + 128; } fit_vector.from_direction(Modulo(IntCastRounded(mean_dir), 256)); } else { // There were no directions, so we rely on the vector_fit to the points. // Since the vector_fit is 180 degrees ambiguous, we align with the // supplied feature_dir by making the scalar product non-negative. FCOORD feature_dir(end_pt - start_pt); fit_vector = point_diffs.vector_fit(); if (fit_vector.x() == 0.0f && fit_vector.y() == 0.0f) { // There was only a single point. Use feature_dir directly. fit_vector = feature_dir; } else { // Sometimes the least mean squares fit is wrong, due to the small sample // of points and scaling. Use a 90 degree rotated vector if that matches // feature_dir better. FCOORD fit_vector2 = !fit_vector; // The fit_vector is 180 degrees ambiguous, so resolve the ambiguity by // insisting that the scalar product with the feature_dir should be +ve. if (fit_vector % feature_dir < 0.0) fit_vector = -fit_vector; if (fit_vector2 % feature_dir < 0.0) fit_vector2 = -fit_vector2; // Even though fit_vector2 has a higher mean squared error, it might be // a better fit, so use it if the dot product with feature_dir is bigger. if (fit_vector2 % feature_dir > fit_vector % feature_dir) fit_vector = fit_vector2; } } return fit_vector; }
// Fits a straight baseline to the points. Returns true if it had enough // points to be reasonably sure of the fitted baseline. // If use_box_bottoms is false, baselines positions are formed by // considering the outlines of the blobs. bool BaselineRow::FitBaseline(bool use_box_bottoms) { // Deterministic fitting is used wherever possible. fitter_.Clear(); // Linear least squares is a backup if the DetLineFit produces a bad line. LLSQ llsq; BLOBNBOX_IT blob_it(blobs_); for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { BLOBNBOX* blob = blob_it.data(); if (!use_box_bottoms) blob->EstimateBaselinePosition(); const TBOX& box = blob->bounding_box(); int x_middle = (box.left() + box.right()) / 2; #ifdef kDebugYCoord if (box.bottom() < kDebugYCoord && box.top() > kDebugYCoord) { tprintf("Box bottom = %d, baseline pos=%d for box at:", box.bottom(), blob->baseline_position()); box.print(); } #endif fitter_.Add(ICOORD(x_middle, blob->baseline_position()), box.width() / 2); llsq.add(x_middle, blob->baseline_position()); } // Fit the line. ICOORD pt1, pt2; baseline_error_ = fitter_.Fit(&pt1, &pt2); baseline_pt1_ = pt1; baseline_pt2_ = pt2; if (baseline_error_ > max_baseline_error_ && fitter_.SufficientPointsForIndependentFit()) { // The fit was bad but there were plenty of points, so try skipping // the first and last few, and use the new line if it dramatically improves // the error of fit. double error = fitter_.Fit(kNumSkipPoints, kNumSkipPoints, &pt1, &pt2); if (error < baseline_error_ / 2.0) { baseline_error_ = error; baseline_pt1_ = pt1; baseline_pt2_ = pt2; } } int debug = 0; #ifdef kDebugYCoord Print(); debug = bounding_box_.bottom() < kDebugYCoord && bounding_box_.top() > kDebugYCoord ? 3 : 2; #endif // Now we obtained a direction from that fit, see if we can improve the // fit using the same direction and some other start point. FCOORD direction(pt2 - pt1); double target_offset = direction * pt1; good_baseline_ = false; FitConstrainedIfBetter(debug, direction, 0.0, target_offset); // Wild lines can be produced because DetLineFit allows vertical lines, but // vertical text has been rotated so angles over pi/4 should be disallowed. // Near vertical lines can still be produced by vertically aligned components // on very short lines. double angle = BaselineAngle(); if (fabs(angle) > M_PI * 0.25) { // Use the llsq fit as a backup. baseline_pt1_ = llsq.mean_point(); baseline_pt2_ = baseline_pt1_ + FCOORD(1.0f, llsq.m()); // TODO(rays) get rid of this when m and c are no longer used. double m = llsq.m(); double c = llsq.c(m); baseline_error_ = llsq.rms(m, c); good_baseline_ = false; } return good_baseline_; }