// Given an initial estimate of line spacing (m_in) and the positions of each // baseline, computes the line spacing of the block more accurately in m_out, // and the corresponding intercept in c_out, and the number of spacings seen // in index_delta. Returns the error of fit to the line spacing model. // Uses a simple linear regression, but optimized the offset using the median. double BaselineBlock::FitLineSpacingModel( const GenericVector<double>& positions, double m_in, double* m_out, double* c_out, int* index_delta) { if (m_in == 0.0f || positions.size() < 2) { *m_out = m_in; *c_out = 0.0; if (index_delta != NULL) *index_delta = 0; return 0.0; } GenericVector<double> offsets; // Get the offset (remainder) linespacing for each line and choose the median. for (int i = 0; i < positions.size(); ++i) offsets.push_back(fmod(positions[i], m_in)); // Get the median offset. double median_offset = MedianOfCircularValues(m_in, &offsets); // Now fit a line to quantized line number and offset. LLSQ llsq; int min_index = MAX_INT32; int max_index = -MAX_INT32; for (int i = 0; i < positions.size(); ++i) { double y_pos = positions[i]; int row_index = IntCastRounded((y_pos - median_offset) / m_in); UpdateRange(row_index, &min_index, &max_index); llsq.add(row_index, y_pos); } // Get the refined line spacing. *m_out = llsq.m(); // Use the median offset rather than the mean. offsets.truncate(0); for (int i = 0; i < positions.size(); ++i) offsets.push_back(fmod(positions[i], *m_out)); // Get the median offset. if (debug_level_ > 2) { for (int i = 0; i < offsets.size(); ++i) tprintf("%d: %g\n", i, offsets[i]); } *c_out = MedianOfCircularValues(*m_out, &offsets); if (debug_level_ > 1) { tprintf("Median offset = %g, compared to mean of %g.\n", *c_out, llsq.c(*m_out)); } // Index_delta is the number of hypothesized line gaps present. if (index_delta != NULL) *index_delta = max_index - min_index; // Use the regression model's intercept to compute the error, as it may be // a full line-spacing in disagreement with the median. double rms_error = llsq.rms(*m_out, llsq.c(*m_out)); if (debug_level_ > 1) { tprintf("Linespacing of y=%g x + %g improved to %g x + %g, rms=%g\n", m_in, median_offset, *m_out, *c_out, rms_error); } return rms_error; }
void ResultIterator::CalculateTextlineOrder( bool paragraph_is_ltr, const LTRResultIterator &resit, GenericVector<StrongScriptDirection> *dirs_arg, GenericVectorEqEq<int> *word_indices) const { GenericVector<StrongScriptDirection> dirs; GenericVector<StrongScriptDirection> *directions; directions = (dirs_arg != NULL) ? dirs_arg : &dirs; directions->truncate(0); // A LTRResultIterator goes strictly left-to-right word order. LTRResultIterator ltr_it(resit); ltr_it.RestartRow(); if (ltr_it.Empty(RIL_WORD)) return; do { directions->push_back(ltr_it.WordDirection()); } while (ltr_it.Next(RIL_WORD) && !ltr_it.IsAtBeginningOf(RIL_TEXTLINE)); word_indices->truncate(0); CalculateTextlineOrder(paragraph_is_ltr, *directions, word_indices); }