// Sets up displacement_modes_ with the top few modes of the perpendicular // distance of each blob from the given direction vector, after rounding. void BaselineRow::SetupBlobDisplacements(const FCOORD& direction) { // Set of perpendicular displacements of the blob bottoms from the required // baseline direction. GenericVector<double> perp_blob_dists; displacement_modes_.truncate(0); // Gather the skew-corrected position of every blob. double min_dist = MAX_FLOAT32; double max_dist = -MAX_FLOAT32; BLOBNBOX_IT blob_it(blobs_); bool debug = false; for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { BLOBNBOX* blob = blob_it.data(); const TBOX& box = blob->bounding_box(); #ifdef kDebugYCoord if (box.bottom() < kDebugYCoord && box.top() > kDebugYCoord) debug = true; #endif FCOORD blob_pos((box.left() + box.right()) / 2.0f, blob->baseline_position()); double offset = direction * blob_pos; perp_blob_dists.push_back(offset); if (debug) { tprintf("Displacement %g for blob at:", offset); box.print(); } UpdateRange(offset, &min_dist, &max_dist); } // Set up a histogram using disp_quant_factor_ as the bucket size. STATS dist_stats(IntCastRounded(min_dist / disp_quant_factor_), IntCastRounded(max_dist / disp_quant_factor_) + 1); for (int i = 0; i < perp_blob_dists.size(); ++i) { dist_stats.add(IntCastRounded(perp_blob_dists[i] / disp_quant_factor_), 1); } GenericVector<KDPairInc<float, int> > scaled_modes; dist_stats.top_n_modes(kMaxDisplacementsModes, &scaled_modes); if (debug) { for (int i = 0; i < scaled_modes.size(); ++i) { tprintf("Top mode = %g * %d\n", scaled_modes[i].key * disp_quant_factor_, scaled_modes[i].data); } } for (int i = 0; i < scaled_modes.size(); ++i) displacement_modes_.push_back(disp_quant_factor_ * scaled_modes[i].key); }
// Fits a straight baseline to the points. Returns true if it had enough // points to be reasonably sure of the fitted baseline. // If use_box_bottoms is false, baselines positions are formed by // considering the outlines of the blobs. bool BaselineRow::FitBaseline(bool use_box_bottoms) { // Deterministic fitting is used wherever possible. fitter_.Clear(); // Linear least squares is a backup if the DetLineFit produces a bad line. LLSQ llsq; BLOBNBOX_IT blob_it(blobs_); for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { BLOBNBOX* blob = blob_it.data(); if (!use_box_bottoms) blob->EstimateBaselinePosition(); const TBOX& box = blob->bounding_box(); int x_middle = (box.left() + box.right()) / 2; #ifdef kDebugYCoord if (box.bottom() < kDebugYCoord && box.top() > kDebugYCoord) { tprintf("Box bottom = %d, baseline pos=%d for box at:", box.bottom(), blob->baseline_position()); box.print(); } #endif fitter_.Add(ICOORD(x_middle, blob->baseline_position()), box.width() / 2); llsq.add(x_middle, blob->baseline_position()); } // Fit the line. ICOORD pt1, pt2; baseline_error_ = fitter_.Fit(&pt1, &pt2); baseline_pt1_ = pt1; baseline_pt2_ = pt2; if (baseline_error_ > max_baseline_error_ && fitter_.SufficientPointsForIndependentFit()) { // The fit was bad but there were plenty of points, so try skipping // the first and last few, and use the new line if it dramatically improves // the error of fit. double error = fitter_.Fit(kNumSkipPoints, kNumSkipPoints, &pt1, &pt2); if (error < baseline_error_ / 2.0) { baseline_error_ = error; baseline_pt1_ = pt1; baseline_pt2_ = pt2; } } int debug = 0; #ifdef kDebugYCoord Print(); debug = bounding_box_.bottom() < kDebugYCoord && bounding_box_.top() > kDebugYCoord ? 3 : 2; #endif // Now we obtained a direction from that fit, see if we can improve the // fit using the same direction and some other start point. FCOORD direction(pt2 - pt1); double target_offset = direction * pt1; good_baseline_ = false; FitConstrainedIfBetter(debug, direction, 0.0, target_offset); // Wild lines can be produced because DetLineFit allows vertical lines, but // vertical text has been rotated so angles over pi/4 should be disallowed. // Near vertical lines can still be produced by vertically aligned components // on very short lines. double angle = BaselineAngle(); if (fabs(angle) > M_PI * 0.25) { // Use the llsq fit as a backup. baseline_pt1_ = llsq.mean_point(); baseline_pt2_ = baseline_pt1_ + FCOORD(1.0f, llsq.m()); // TODO(rays) get rid of this when m and c are no longer used. double m = llsq.m(); double c = llsq.c(m); baseline_error_ = llsq.rms(m, c); good_baseline_ = false; } return good_baseline_; }