示例#1
0
// Given an initial estimate of line spacing (m_in) and the positions of each
// baseline, computes the line spacing of the block more accurately in m_out,
// and the corresponding intercept in c_out, and the number of spacings seen
// in index_delta. Returns the error of fit to the line spacing model.
// Uses a simple linear regression, but optimized the offset using the median.
double BaselineBlock::FitLineSpacingModel(
    const GenericVector<double>& positions, double m_in,
    double* m_out, double* c_out, int* index_delta) {
  if (m_in == 0.0f || positions.size() < 2) {
    *m_out = m_in;
    *c_out = 0.0;
    if (index_delta != NULL) *index_delta = 0;
    return 0.0;
  }
  GenericVector<double> offsets;
  // Get the offset (remainder) linespacing for each line and choose the median.
  for (int i = 0; i < positions.size(); ++i)
    offsets.push_back(fmod(positions[i], m_in));
  // Get the median offset.
  double median_offset = MedianOfCircularValues(m_in, &offsets);
  // Now fit a line to quantized line number and offset.
  LLSQ llsq;
  int min_index = MAX_INT32;
  int max_index = -MAX_INT32;
  for (int i = 0; i < positions.size(); ++i) {
    double y_pos = positions[i];
    int row_index = IntCastRounded((y_pos - median_offset) / m_in);
    UpdateRange(row_index, &min_index, &max_index);
    llsq.add(row_index, y_pos);
  }
  // Get the refined line spacing.
  *m_out = llsq.m();
  // Use the median offset rather than the mean.
  offsets.truncate(0);
  for (int i = 0; i < positions.size(); ++i)
    offsets.push_back(fmod(positions[i], *m_out));
  // Get the median offset.
  if (debug_level_ > 2) {
    for (int i = 0; i < offsets.size(); ++i)
      tprintf("%d: %g\n", i, offsets[i]);
  }
  *c_out = MedianOfCircularValues(*m_out, &offsets);
  if (debug_level_ > 1) {
    tprintf("Median offset = %g, compared to mean of %g.\n",
            *c_out, llsq.c(*m_out));
  }
  // Index_delta is the number of hypothesized line gaps present.
  if (index_delta != NULL)
    *index_delta = max_index - min_index;
  // Use the regression model's intercept to compute the error, as it may be
  // a full line-spacing in disagreement with the median.
  double rms_error = llsq.rms(*m_out, llsq.c(*m_out));
  if (debug_level_ > 1) {
    tprintf("Linespacing of y=%g x + %g improved to %g x + %g, rms=%g\n",
            m_in, median_offset, *m_out, *c_out, rms_error);
  }
  return rms_error;
}
示例#2
0
void ResultIterator::CalculateTextlineOrder(
    bool paragraph_is_ltr,
    const LTRResultIterator &resit,
    GenericVector<StrongScriptDirection> *dirs_arg,
    GenericVectorEqEq<int> *word_indices) const {
  GenericVector<StrongScriptDirection> dirs;
  GenericVector<StrongScriptDirection> *directions;
  directions = (dirs_arg != NULL) ? dirs_arg : &dirs;
  directions->truncate(0);

  // A LTRResultIterator goes strictly left-to-right word order.
  LTRResultIterator ltr_it(resit);
  ltr_it.RestartRow();
  if (ltr_it.Empty(RIL_WORD)) return;
  do {
    directions->push_back(ltr_it.WordDirection());
  } while (ltr_it.Next(RIL_WORD) && !ltr_it.IsAtBeginningOf(RIL_TEXTLINE));

  word_indices->truncate(0);
  CalculateTextlineOrder(paragraph_is_ltr, *directions, word_indices);
}