Beispiel #1
0
// Adds sub-pixel resolution EdgeOffsets for the outline using only
// a binary image source.
// Runs a sliding window of 5 edge steps over the outline, maintaining a count
// of the number of steps in each of the 4 directions in the window, and a
// sum of the x or y position of each step (as appropriate to its direction.)
// Ignores single-count steps EXCEPT the sharp U-turn and smoothes out the
// perpendicular direction. Eg
// ___              ___       Chain code from the left:
//    |___    ___   ___|      222122212223221232223000
//        |___|  |_|          Corresponding counts of each direction:
//                          0   00000000000000000123
//                          1   11121111001111100000
//                          2   44434443443333343321
//                          3   00000001111111112111
// Count of direction at center 41434143413313143313
// Step gets used?              YNYYYNYYYNYYNYNYYYyY (y= U-turn exception)
// Path redrawn showing only the used points:
// ___              ___
//     ___    ___   ___|
//         ___    _
// Sub-pixel edge position cannot be shown well with ASCII-art, but each
// horizontal step's y position is the mean of the y positions of the steps
// in the same direction in the sliding window, which makes a much smoother
// outline, without losing important detail.
void C_OUTLINE::ComputeBinaryOffsets() {
  delete [] offsets;
  offsets = new EdgeOffset[stepcount];
  // Count of the number of steps in each direction in the sliding window.
  int dir_counts[4];
  // Sum of the positions (y for a horizontal step, x for vertical) in each
  // direction in the sliding window.
  int pos_totals[4];
  memset(dir_counts, 0, sizeof(dir_counts));
  memset(pos_totals, 0, sizeof(pos_totals));
  ICOORD pos = start;
  ICOORD tail_pos = pos;
  // tail_pos is the trailing position, with the next point to be lost from
  // the window.
  tail_pos -= step(stepcount - 1);
  tail_pos -= step(stepcount - 2);
  // head_pos is the leading position, with the next point to be added to the
  // window.
  ICOORD head_pos = tail_pos;
  // Set up the initial window with 4 points in [-2, 2)
  for (int s = -2; s < 2; ++s) {
    increment_step(s, 1, &head_pos, dir_counts, pos_totals);
  }
  for (int s = 0; s < stepcount; pos += step(s++)) {
    // At step s, s in in the middle of [s-2, s+2].
    increment_step(s + 2, 1, &head_pos, dir_counts, pos_totals);
    int dir_index = chain_code(s);
    ICOORD step_vec = step(s);
    int best_diff = 0;
    int offset = 0;
    // Use only steps that have a count of >=2 OR the strong U-turn with a
    // single d and 2 at d-1 and 2 at d+1 (mod 4).
    if (dir_counts[dir_index] >= 2 || (dir_counts[dir_index] == 1 &&
        dir_counts[Modulo(dir_index - 1, 4)] == 2 &&
        dir_counts[Modulo(dir_index + 1, 4)] == 2)) {
      // Valid step direction.
      best_diff = dir_counts[dir_index];
      int edge_pos = step_vec.x() == 0 ? pos.x() : pos.y();
      // The offset proposes that the actual step should be positioned at
      // the mean position of the steps in the window of the same direction.
      // See ASCII art above.
      offset = pos_totals[dir_index] - best_diff * edge_pos;
    }
    offsets[s].offset_numerator =
        static_cast<inT8>(ClipToRange(offset, -MAX_INT8, MAX_INT8));
    offsets[s].pixel_diff = static_cast<uinT8>(ClipToRange(best_diff, 0 ,
                                                           MAX_UINT8));
    // The direction is just the vector from start to end of the window.
    FCOORD direction(head_pos.x() - tail_pos.x(), head_pos.y() - tail_pos.y());
    offsets[s].direction = direction.to_direction();
    increment_step(s - 2, -1, &tail_pos, dir_counts, pos_totals);
  }
}
Beispiel #2
0
/**
 * Returns the bounding rectangle of the current object at the given level in
 * the coordinates of the working image that is pix_binary().
 * See comment on coordinate system above.
 * Returns false if there is no such object at the current position.
 */
bool PageIterator::BoundingBoxInternal(PageIteratorLevel level,
                                       int* left, int* top,
                                       int* right, int* bottom) const {
  if (Empty(level))
    return false;
  TBOX box;
  PARA *para = NULL;
  switch (level) {
    case RIL_BLOCK:
      box = it_->block()->block->bounding_box();
      break;
    case RIL_PARA:
      para = it_->row()->row->para();
      // explicit fall-through.
    case RIL_TEXTLINE:
      box = it_->row()->row->bounding_box();
      break;
    case RIL_WORD:
      box = it_->word()->word->bounding_box();
      break;
    case RIL_SYMBOL:
      if (cblob_it_ == NULL)
        box = it_->word()->box_word->BlobBox(blob_index_);
      else
        box = cblob_it_->data()->bounding_box();
  }
  if (level == RIL_PARA) {
    PageIterator other = *this;
    other.Begin();
    do {
      if (other.it_->block() &&
          other.it_->block()->block == it_->block()->block &&
          other.it_->row() && other.it_->row()->row &&
          other.it_->row()->row->para() == para) {
        box = box.bounding_union(other.it_->row()->row->bounding_box());
      }
    } while (other.Next(RIL_TEXTLINE));
  }
  if (level != RIL_SYMBOL || cblob_it_ != NULL)
    box.rotate(it_->block()->block->re_rotation());
  // Now we have a box in tesseract coordinates relative to the image rectangle,
  // we have to convert the coords to a top-down system.
  const int pix_height = pixGetHeight(tesseract_->pix_binary());
  const int pix_width = pixGetWidth(tesseract_->pix_binary());
  *left = ClipToRange(static_cast<int>(box.left()), 0, pix_width);
  *top = ClipToRange(pix_height - box.top(), 0, pix_height);
  *right = ClipToRange(static_cast<int>(box.right()), *left, pix_width);
  *bottom = ClipToRange(pix_height - box.bottom(), *top, pix_height);
  return true;
}
Beispiel #3
0
// Returns an INT_FEATURE_STRUCT corresponding to the given bucket coords.
INT_FEATURE_STRUCT IntFeatureSpace::PositionFromBuckets(int x,
                                                        int y,
                                                        int theta) const {
  INT_FEATURE_STRUCT pos = {
      static_cast<uinT8>(ClipToRange(
          (x * kIntFeatureExtent + kIntFeatureExtent / 2) / x_buckets_,
          0, MAX_UINT8)),
      static_cast<uinT8>(ClipToRange(
          (y * kIntFeatureExtent + kIntFeatureExtent / 2) / y_buckets_,
          0, MAX_UINT8)),
      static_cast<uinT8>(ClipToRange(
          DivRounded(theta * kIntFeatureExtent, theta_buckets_),
          0, MAX_UINT8))};
  return pos;
}
Beispiel #4
0
// Helper for SetupNonLinear computes an image of shortest run-lengths from
// the x/y edges provided.
// Based on "A nonlinear normalization method for handprinted Kanji character
// recognition -- line density equalization" by Hiromitsu Yamada et al.
// Eg below is an O in a 1-pixel margin-ed bounding box and the corresponding
//  ______________     input x_coords and y_coords.
// |  _________  |     <empty>
// | |    _    | |     1, 6
// | |   | |   | |     1, 3, 4, 6
// | |   | |   | |     1, 3, 4, 6
// | |   | |   | |     1, 3, 4, 6
// | |   |_|   | |     1, 3, 4, 6
// | |_________| |     1, 6
// |_____________|     <empty>
//  E 1 1 1 1 1 E
//  m 7 7 2 7 7 m
//  p     6     p
//  t     7     t
//  y           y
// The output image contains the min of the x and y run-length (distance
// between edges) at each coordinate in the image thus:
//  ______________
// |7 1_1_1_1_1 7|
// |1|5 5 1 5 5|1|
// |1|2 2|1|2 2|1|
// |1|2 2|1|2 2|1|
// |1|2 2|1|2 2|1|
// |1|2 2|1|2 2|1|
// |1|5_5_1_5_5|1|
// |7_1_1_1_1_1_7|
// Note that the input coords are all integer, so all partial pixels are dealt
// with elsewhere. Although it is nice for outlines to be properly connected
// and continuous, there is no requirement that they be as such, so they could
// have been derived from a flaky source, such as greyscale.
// This function works only within the provided box, and it is assumed that the
// input x_coords and y_coords have already been translated to have the bottom-
// left of box as the origin. Although an output, the minruns should have been
// pre-initialized to be the same size as box. Each element will contain the
// minimum of x and y run-length as shown above.
static void ComputeRunlengthImage(
    const TBOX& box,
    const GenericVector<GenericVector<int> >& x_coords,
    const GenericVector<GenericVector<int> >& y_coords,
    GENERIC_2D_ARRAY<int>* minruns) {
  int width = box.width();
  int height = box.height();
  ASSERT_HOST(minruns->dim1() == width);
  ASSERT_HOST(minruns->dim2() == height);
  // Set a 2-d image array to the run lengths at each pixel.
  for (int ix = 0; ix < width; ++ix) {
    int y = 0;
    for (int i = 0; i < y_coords[ix].size(); ++i) {
      int y_edge = ClipToRange(y_coords[ix][i], 0, height);
      int gap = y_edge - y;
      // Every pixel between the last and current edge get set to the gap.
      while (y < y_edge) {
        (*minruns)(ix, y) = gap;
        ++y;
      }
    }
    // Pretend there is a bounding box of edges all around the image.
    int gap = height - y;
    while (y < height) {
      (*minruns)(ix, y) = gap;
      ++y;
    }
  }
  // Now set the image pixels the the MIN of the x and y runlengths.
  for (int iy = 0; iy < height; ++iy) {
    int x = 0;
    for (int i = 0; i < x_coords[iy].size(); ++i) {
      int x_edge = ClipToRange(x_coords[iy][i], 0, width);
      int gap = x_edge - x;
      while (x < x_edge) {
        if (gap < (*minruns)(x, iy))
          (*minruns)(x, iy) = gap;
        ++x;
      }
    }
    int gap = width - x;
    while (x < width) {
      if (gap < (*minruns)(x, iy))
        (*minruns)(x, iy) = gap;
      ++x;
    }
  }
}
Beispiel #5
0
void DENORM::LocalNormTransform(const FCOORD& pt, FCOORD* transformed) const {
  FCOORD translated(pt.x() - x_origin_, pt.y() - y_origin_);
  if (x_map_ != NULL && y_map_ != NULL) {
    int x = ClipToRange(IntCastRounded(translated.x()), 0, x_map_->size()-1);
    translated.set_x((*x_map_)[x]);
    int y = ClipToRange(IntCastRounded(translated.y()), 0, y_map_->size()-1);
    translated.set_y((*y_map_)[y]);
  } else {
    translated.set_x(translated.x() * x_scale_);
    translated.set_y(translated.y() * y_scale_);
    if (rotation_ != NULL)
      translated.rotate(*rotation_);
  }
  transformed->set_x(translated.x() + final_xshift_);
  transformed->set_y(translated.y() + final_yshift_);
}
Beispiel #6
0
/**
 * Returns the bounding rectangle of the current object at the given level in
 * coordinates of the original image.
 * See comment on coordinate system above.
 * Returns false if there is no such object at the current position.
 */
bool PageIterator::BoundingBox(PageIteratorLevel level,
                               int* left, int* top,
                               int* right, int* bottom) const {
  if (!BoundingBoxInternal(level, left, top, right, bottom))
    return false;
  // Convert to the coordinate system of the original image.
  *left = ClipToRange(*left / scale_ + rect_left_,
                      rect_left_, rect_left_ + rect_width_);
  *top = ClipToRange(*top / scale_ + rect_top_,
                     rect_top_, rect_top_ + rect_height_);
  *right = ClipToRange((*right + scale_ - 1) / scale_ + rect_left_,
                       *left, rect_left_ + rect_width_);
  *bottom = ClipToRange((*bottom + scale_ - 1) / scale_ + rect_top_,
                        *top, rect_top_ + rect_height_);
  return true;
}
Beispiel #7
0
static void HistogramWeight(double weight, STATS* histogram) {
  int bucket = kHistogramBuckets - 1;
  if (weight != 0.0) {
    double logval = -log2(fabs(weight));
    bucket = ClipToRange(IntCastRounded(logval), 0, kHistogramBuckets - 1);
  }
  histogram->add(bucket, 1);
}
Beispiel #8
0
// Applies params model weights to the given features.
// Assumes that features is an array of size PTRAIN_NUM_FEATURE_TYPES.
// The cost is set to a number that can be multiplied by the outline length,
// as with the old ratings scheme. This enables words of different length
// and combinations of words to be compared meaningfully.
float ParamsModel::ComputeCost(const float features[]) const {
  float unnorm_score = 0.0;
  for (int f = 0; f < PTRAIN_NUM_FEATURE_TYPES; ++f) {
    unnorm_score += weights_vec_[pass_][f] * features[f];
  }
  return ClipToRange(-unnorm_score / kScoreScaleFactor,
                     kMinFinalCost, kMaxFinalCost);
}
// Constructs and returns a copy randomized by the method given by
// the randomizer index. If index is out of [0, kSampleRandomSize) then
// an exact copy is returned.
TrainingSample* TrainingSample::RandomizedCopy(int index) const {
  TrainingSample* sample = Copy();
  if (index >= 0 && index < kSampleRandomSize) {
    ++index;  // Remove the first combination.
    int yshift = kYShiftValues[index / kSampleScaleSize];
    double scaling = kScaleValues[index % kSampleScaleSize];
    for (int i = 0; i < num_features_; ++i) {
      double result = (features_[i].X - kRandomizingCenter) * scaling;
      result += kRandomizingCenter;
      sample->features_[i].X = ClipToRange(static_cast<int>(result + 0.5), 0,
                                           MAX_UINT8);
      result = (features_[i].Y - kRandomizingCenter) * scaling;
      result += kRandomizingCenter + yshift;
      sample->features_[i].Y = ClipToRange(static_cast<int>(result + 0.5), 0,
                                           MAX_UINT8);
    }
  }
  return sample;
}
Beispiel #10
0
// Fills in the x-height range accepted by the given unichar_id, given its
// bounding box in the usual baseline-normalized coordinates, with some
// initial crude x-height estimate (such as word size) and this denoting the
// transformation that was used. Returns false, and an empty range if the
// bottom is a mis-fit. Returns true and empty [0, 0] range if the bottom
// fits, but the top is impossible.
bool DENORM::XHeightRange(int unichar_id, const UNICHARSET& unicharset,
                          const TBOX& bbox,
                          inT16* min_xht, inT16* max_xht) const {
  // Clip the top and bottom to the limit of normalized feature space.
  int top = ClipToRange<int>(bbox.top(), 0, kBlnCellHeight - 1);
  int bottom = ClipToRange<int>(bbox.bottom(), 0, kBlnCellHeight - 1);
  // A tolerance of yscale corresponds to 1 pixel in the image.
  double tolerance = y_scale();
  int min_bottom, max_bottom, min_top, max_top;
  unicharset.get_top_bottom(unichar_id, &min_bottom, &max_bottom,
                            &min_top, &max_top);
  // Default returns indicate a mis-fit.
  *min_xht = 0;
  *max_xht = 0;
  // Chars with a misfitting bottom might be sub/superscript/dropcap, or might
  // just be wrongly classified. Return an empty range so they have to be
  // good to be considered.
  if (bottom < min_bottom - tolerance || bottom > max_bottom + tolerance) {
    return false;
  }
  // To help very high cap/xheight ratio fonts accept the correct x-height,
  // and to allow the large caps in small caps to accept the xheight of the
  // small caps, add kBlnBaselineOffset to chars with a maximum max.
  if (max_top == kBlnCellHeight - 1)
    max_top += kBlnBaselineOffset;
  int height = top - kBlnBaselineOffset;
  double min_height = min_top - kBlnBaselineOffset - tolerance;
  double max_height = max_top - kBlnBaselineOffset + tolerance;
  if (min_height <= 0.0) {
    if (height <= 0 || max_height > 0)
      *max_xht = MAX_INT16;  // Anything will do.
  } else if (height > 0) {
    int result = IntCastRounded(height * kBlnXHeight / y_scale() / min_height);
    *max_xht = static_cast<inT16>(ClipToRange(result, 0, MAX_INT16));
  }
  if (max_height > 0.0 && height > 0) {
    int result = IntCastRounded(height * kBlnXHeight / y_scale() / max_height);
    *min_xht = static_cast<inT16>(ClipToRange(result, 0, MAX_INT16));
  }
  return true;
}
Beispiel #11
0
/** 
 * For each class in unicharset, computes the match between
 * norm_feature and the normalization protos for that class.
 * Converts this number to the range from 0 - 255 and stores it
 * into char_norm_array.  CharNormArray is indexed by unichar_id.
 *
 * Globals: 
 * - PreTrainedTemplates current set of built-in templates
 *
 * @param norm_feature character normalization feature
 * @param[out] char_norm_array place to put results of size unicharset.size()
 *
 * @note Exceptions: none
 * @note History: Wed Feb 20 11:20:54 1991, DSJ, Created.
 */
void Classify::ComputeIntCharNormArray(const FEATURE_STRUCT& norm_feature,
                                       uinT8* char_norm_array) {
  for (int i = 0; i < unicharset.size(); i++) {
    if (i < PreTrainedTemplates->NumClasses) {
      int norm_adjust = static_cast<int>(INT_CHAR_NORM_RANGE *
        ComputeNormMatch(i, norm_feature, FALSE));
      char_norm_array[i] = ClipToRange(norm_adjust, 0, MAX_INT_CHAR_NORM);
    } else {
      // Classes with no templates (eg. ambigs & ligatures) default
      // to worst match.
      char_norm_array[i] = MAX_INT_CHAR_NORM;
    }
  }
}                                /* ComputeIntCharNormArray */
Beispiel #12
0
// Normalize in-place and record the normalization in the DENORM.
void TWERD::Normalize(ROW* row, float x_height, bool numeric_mode,
                      DENORM* denorm) {
  TBOX word_box = bounding_box();
  DENORM antidote((word_box.left() + word_box.right()) / 2.0,
                  kBlnXHeight / x_height, row);
  if (row == NULL) {
    antidote = DENORM(antidote.origin(), antidote.scale(), 0.0,
                      word_box.bottom(), 0, NULL, false, NULL);
  }
  int num_segments = 0;
  DENORM_SEG *segs = new DENORM_SEG[NumBlobs()];
  for (TBLOB* blob = blobs; blob != NULL; blob = blob->next) {
    TBOX blob_box = blob->bounding_box();
    ICOORD translation(-static_cast<int>(floor(antidote.origin() + 0.5)),
                       -blob_box.bottom());
    float factor = antidote.scale();
    if (numeric_mode) {
      factor = ClipToRange(kBlnXHeight * 4.0f / (3 * blob_box.height()),
                           factor, factor * 1.5f);
      segs[num_segments].xstart = blob->bounding_box().left();
      segs[num_segments].ycoord = blob_box.bottom();
      segs[num_segments++].scale_factor = factor;
    } else {
      float blob_x_center = (blob_box.left() + blob_box.right()) / 2.0;
      float y_shift = antidote.yshift_at_orig_x(blob_x_center);
      translation.set_y(-static_cast<int>(floor(y_shift + 0.5)));
    }
    blob->Move(translation);
    blob->Scale(factor);
    blob->Move(ICOORD(0, kBlnBaselineOffset));
  }
  if (num_segments > 0) {
    antidote.set_segments(segs, num_segments);
  }
  delete [] segs;
  if (denorm != NULL)
    *denorm = antidote;
}
Beispiel #13
0
WordFeature::WordFeature(const FCOORD& fcoord, uinT8 dir)
  : x_(IntCastRounded(fcoord.x())),
    y_(ClipToRange(IntCastRounded(fcoord.y()), 0, MAX_UINT8)),
    dir_(dir) {
}
Beispiel #14
0
// Adds sub-pixel resolution EdgeOffsets for the outline if the supplied
// pix is 8-bit. Does nothing otherwise.
// Operation: Consider the following near-horizontal line:
// _________
//          |________
//                   |________
// At *every* position along this line, the gradient direction will be close
// to vertical. Extrapoaltion/interpolation of the position of the threshold
// that was used to binarize the image gives a more precise vertical position
// for each horizontal step, and the conflict in step direction and gradient
// direction can be used to ignore the vertical steps.
void C_OUTLINE::ComputeEdgeOffsets(int threshold, Pix* pix) {
  if (pixGetDepth(pix) != 8) return;
  const l_uint32* data = pixGetData(pix);
  int wpl = pixGetWpl(pix);
  int width = pixGetWidth(pix);
  int height = pixGetHeight(pix);
  bool negative = flag(COUT_INVERSE);
  delete [] offsets;
  offsets = new EdgeOffset[stepcount];
  ICOORD pos = start;
  ICOORD prev_gradient;
  ComputeGradient(data, wpl, pos.x(), height - pos.y(), width, height,
                  &prev_gradient);
  for (int s = 0; s < stepcount; ++s) {
    ICOORD step_vec = step(s);
    TPOINT pt1(pos);
    pos += step_vec;
    TPOINT pt2(pos);
    ICOORD next_gradient;
    ComputeGradient(data, wpl, pos.x(), height - pos.y(), width, height,
                    &next_gradient);
    // Use the sum of the prev and next as the working gradient.
    ICOORD gradient = prev_gradient + next_gradient;
    // best_diff will be manipulated to be always positive.
    int best_diff = 0;
    // offset will be the extrapolation of the location of the greyscale
    // threshold from the edge with the largest difference, relative to the
    // location of the binary edge.
    int offset = 0;
    if (pt1.y == pt2.y && abs(gradient.y()) * 2 >= abs(gradient.x())) {
      // Horizontal step. diff_sign == 1 indicates black above.
      int diff_sign = (pt1.x > pt2.x) == negative ? 1 : -1;
      int x = MIN(pt1.x, pt2.x);
      int y = height - pt1.y;
      int best_sum = 0;
      int best_y = y;
      EvaluateVerticalDiff(data, wpl, diff_sign, x, y, height,
                           &best_diff, &best_sum, &best_y);
      // Find the strongest edge.
      int test_y = y;
      do {
        ++test_y;
      } while (EvaluateVerticalDiff(data, wpl, diff_sign, x, test_y, height,
                                    &best_diff, &best_sum, &best_y));
      test_y = y;
      do {
        --test_y;
      } while (EvaluateVerticalDiff(data, wpl, diff_sign, x, test_y, height,
                                    &best_diff, &best_sum, &best_y));
      offset = diff_sign * (best_sum / 2 - threshold) +
          (y - best_y) * best_diff;
    } else if (pt1.x == pt2.x && abs(gradient.x()) * 2 >= abs(gradient.y())) {
      // Vertical step. diff_sign == 1 indicates black on the left.
      int diff_sign = (pt1.y > pt2.y) == negative ? 1 : -1;
      int x = pt1.x;
      int y = height - MAX(pt1.y, pt2.y);
      const l_uint32* line = pixGetData(pix) + y * wpl;
      int best_sum = 0;
      int best_x = x;
      EvaluateHorizontalDiff(line, diff_sign, x, width,
                             &best_diff, &best_sum, &best_x);
      // Find the strongest edge.
      int test_x = x;
      do {
        ++test_x;
      } while (EvaluateHorizontalDiff(line, diff_sign, test_x, width,
                                      &best_diff, &best_sum, &best_x));
      test_x = x;
      do {
        --test_x;
      } while (EvaluateHorizontalDiff(line, diff_sign, test_x, width,
                                      &best_diff, &best_sum, &best_x));
      offset = diff_sign * (threshold - best_sum / 2) +
          (best_x - x) * best_diff;
    }
    offsets[s].offset_numerator =
        static_cast<inT8>(ClipToRange(offset, -MAX_INT8, MAX_INT8));
    offsets[s].pixel_diff = static_cast<uinT8>(ClipToRange(best_diff, 0 ,
                                                           MAX_UINT8));
    if (negative) gradient = -gradient;
    // Compute gradient angle quantized to 256 directions, rotated by 64 (pi/2)
    // to convert from gradient direction to edge direction.
    offsets[s].direction =
        Modulo(FCOORD::binary_angle_plus_pi(gradient.angle()) + 64, 256);
    prev_gradient = next_gradient;
  }
}
int TextlineProjection::ImageYToProjectionY(int y) const {
  y = ClipToRange((y_origin_ - y) / scale_factor_, 0, pixGetHeight(pix_) - 1);
  return y;
}
Beispiel #16
0
// Clip the given grid coordinates to fit within the grid.
void GridBase::ClipGridCoords(int* x, int* y) const {
  *x = ClipToRange(*x, 0, gridwidth_ - 1);
  *y = ClipToRange(*y, 0, gridheight_ - 1);
}
Beispiel #17
0
// Gets the properties for a grapheme string, combining properties for
// multiple characters in a meaningful way where possible.
// Returns false if no valid match was found in the unicharset.
// NOTE that script_id, mirror, and other_case refer to this unicharset on
// return and will need translation if the target unicharset is different.
bool UNICHARSET::GetStrProperties(const char* utf8_str,
                                  UNICHAR_PROPERTIES* props) const {
  props->Init();
  props->SetRangesEmpty();
  props->min_advance = 0;
  props->max_advance = 0;
  int total_unicodes = 0;
  GenericVector<UNICHAR_ID> encoding;
  if (!encode_string(utf8_str, true, &encoding, NULL, NULL))
    return false;  // Some part was invalid.
  for (int i = 0; i < encoding.size(); ++i) {
    int id = encoding[i];
    const UNICHAR_PROPERTIES& src_props = unichars[id].properties;
    // Logical OR all the bools.
    if (src_props.isalpha) props->isalpha = true;
    if (src_props.islower) props->islower = true;
    if (src_props.isupper) props->isupper = true;
    if (src_props.isdigit) props->isdigit = true;
    if (src_props.ispunctuation) props->ispunctuation = true;
    if (src_props.isngram) props->isngram = true;
    if (src_props.enabled) props->enabled = true;
    // Min/max the tops/bottoms.
    UpdateRange(src_props.min_bottom, &props->min_bottom, &props->max_bottom);
    UpdateRange(src_props.max_bottom, &props->min_bottom, &props->max_bottom);
    UpdateRange(src_props.min_top, &props->min_top, &props->max_top);
    UpdateRange(src_props.max_top, &props->min_top, &props->max_top);
    int bearing = ClipToRange(props->min_advance + src_props.min_bearing,
                              -MAX_INT16, MAX_INT16);
    if (total_unicodes == 0 || bearing < props->min_bearing)
      props->min_bearing = bearing;
    bearing = ClipToRange(props->max_advance + src_props.max_bearing,
                          -MAX_INT16, MAX_INT16);
    if (total_unicodes == 0 || bearing < props->max_bearing)
      props->max_bearing = bearing;
    props->min_advance = ClipToRange(props->min_advance + src_props.min_advance,
                                     -MAX_INT16, MAX_INT16);
    props->max_advance = ClipToRange(props->max_advance + src_props.max_advance,
                                     -MAX_INT16, MAX_INT16);
    // With a single width, just use the widths stored in the unicharset.
    props->min_width = src_props.min_width;
    props->max_width = src_props.max_width;
    // Use the first script id, other_case, mirror, direction.
    // Note that these will need translation, except direction.
    if (total_unicodes == 0) {
      props->script_id = src_props.script_id;
      props->other_case = src_props.other_case;
      props->mirror = src_props.mirror;
      props->direction = src_props.direction;
    }
    // The normed string for the compound character is the concatenation of
    // the normed versions of the individual characters.
    props->normed += src_props.normed;
    ++total_unicodes;
  }
  if (total_unicodes > 1) {
    // Estimate the total widths from the advance - bearing.
    props->min_width = ClipToRange(props->min_advance - props->max_bearing,
                                   -MAX_INT16, MAX_INT16);
    props->max_width = ClipToRange(props->max_advance - props->min_bearing,
                                   -MAX_INT16, MAX_INT16);
  }
  return total_unicodes > 0;
}
// Transform tesseract image coordinates to coordinates used in the projection.
int TextlineProjection::ImageXToProjectionX(int x) const {
  x = ClipToRange((x - x_origin_) / scale_factor_, 0, pixGetWidth(pix_) - 1);
  return x;
}