// Helper returns the mean pixel value over the line between the start_pt and
// end_pt (inclusive), but shifted perpendicular to the line in the projection
// image by offset pixels. For simplicity, it is assumed that the vector is
// either nearly horizontal or nearly vertical. It works on skewed textlines!
// The end points are in external coordinates, and will be denormalized with
// the denorm if not NULL before further conversion to pix coordinates.
// After all the conversions, the offset is added to the direction
// perpendicular to the line direction. The offset is thus in projection image
// coordinates, which allows the caller to get a guaranteed displacement
// between pixels used to calculate gradients.
int TextlineProjection::MeanPixelsInLineSegment(const DENORM* denorm,
                                                int offset,
                                                TPOINT start_pt,
                                                TPOINT end_pt) const {
  TransformToPixCoords(denorm, &start_pt);
  TransformToPixCoords(denorm, &end_pt);
  TruncateToImageBounds(&start_pt);
  TruncateToImageBounds(&end_pt);
  int wpl = pixGetWpl(pix_);
  uint32_t* data = pixGetData(pix_);
  int total = 0;
  int count = 0;
  int x_delta = end_pt.x - start_pt.x;
  int y_delta = end_pt.y - start_pt.y;
  if (abs(x_delta) >= abs(y_delta)) {
    if (x_delta == 0)
      return 0;
    // Horizontal line. Add the offset vertically.
    int x_step = x_delta > 0 ? 1 : -1;
    // Correct offset for rotation, keeping it anti-clockwise of the delta.
    offset *= x_step;
    start_pt.y += offset;
    end_pt.y += offset;
    TruncateToImageBounds(&start_pt);
    TruncateToImageBounds(&end_pt);
    x_delta = end_pt.x - start_pt.x;
    y_delta = end_pt.y - start_pt.y;
    count = x_delta * x_step + 1;
    for (int x = start_pt.x; x != end_pt.x; x += x_step) {
      int y = start_pt.y + DivRounded(y_delta * (x - start_pt.x), x_delta);
      total += GET_DATA_BYTE(data + wpl * y, x);
    }
  } else {
    // Vertical line. Add the offset horizontally.
    int y_step = y_delta > 0 ? 1 : -1;
    // Correct offset for rotation, keeping it anti-clockwise of the delta.
    // Pix holds the image with y=0 at the top, so the offset is negated.
    offset *= -y_step;
    start_pt.x += offset;
    end_pt.x += offset;
    TruncateToImageBounds(&start_pt);
    TruncateToImageBounds(&end_pt);
    x_delta = end_pt.x - start_pt.x;
    y_delta = end_pt.y - start_pt.y;
    count = y_delta * y_step + 1;
    for (int y = start_pt.y; y != end_pt.y; y += y_step) {
      int x = start_pt.x + DivRounded(x_delta * (y - start_pt.y), y_delta);
      total += GET_DATA_BYTE(data + wpl * y, x);
    }
  }
  return DivRounded(total, count);
}
示例#2
0
// Returns an INT_FEATURE_STRUCT corresponding to the given bucket coords.
INT_FEATURE_STRUCT IntFeatureSpace::PositionFromBuckets(int x,
                                                        int y,
                                                        int theta) const {
  INT_FEATURE_STRUCT pos(
      (x * kIntFeatureExtent + kIntFeatureExtent / 2) / x_buckets_,
      (y * kIntFeatureExtent + kIntFeatureExtent / 2) / y_buckets_,
      DivRounded(theta * kIntFeatureExtent, theta_buckets_));
  return pos;
}
示例#3
0
// Returns an INT_FEATURE_STRUCT corresponding to the given bucket coords.
INT_FEATURE_STRUCT IntFeatureSpace::PositionFromBuckets(int x,
                                                        int y,
                                                        int theta) const {
  INT_FEATURE_STRUCT pos = {
      static_cast<uinT8>(ClipToRange(
          (x * kIntFeatureExtent + kIntFeatureExtent / 2) / x_buckets_,
          0, MAX_UINT8)),
      static_cast<uinT8>(ClipToRange(
          (y * kIntFeatureExtent + kIntFeatureExtent / 2) / y_buckets_,
          0, MAX_UINT8)),
      static_cast<uinT8>(ClipToRange(
          DivRounded(theta * kIntFeatureExtent, theta_buckets_),
          0, MAX_UINT8))};
  return pos;
}
示例#4
0
// Returns a new x-height maximally compatible with the result in word_res.
// See comment above for overall algorithm.
float Tesseract::ComputeCompatibleXheight(WERD_RES *word_res) {
  STATS top_stats(0, MAX_UINT8);
  TBLOB* blob = word_res->rebuild_word->blobs;
  int blob_id = 0;
  for (; blob != NULL; blob = blob->next, ++blob_id) {
    UNICHAR_ID class_id = word_res->best_choice->unichar_id(blob_id);
    if (unicharset.get_isalpha(class_id) || unicharset.get_isdigit(class_id)) {
      int top = blob->bounding_box().top();
      // Clip the top to the limit of normalized feature space.
      if (top >= INT_FEAT_RANGE)
        top = INT_FEAT_RANGE - 1;
      int bottom = blob->bounding_box().bottom();
      int min_bottom, max_bottom, min_top, max_top;
      unicharset.get_top_bottom(class_id, &min_bottom, &max_bottom,
                                &min_top, &max_top);
      // Chars with a wild top range would mess up the result so ignore them.
      if (max_top - min_top > kMaxCharTopRange)
        continue;
      int misfit_dist = MAX((min_top - x_ht_acceptance_tolerance) - top,
                          top - (max_top + x_ht_acceptance_tolerance));
      int height = top - kBlnBaselineOffset;
      if (debug_x_ht_level >= 20) {
        tprintf("Class %s: height=%d, bottom=%d,%d top=%d,%d, actual=%d,%d : ",
                unicharset.id_to_unichar(class_id),
                height, min_bottom, max_bottom, min_top, max_top,
                bottom, top);
      }
      // Use only chars that fit in the expected bottom range, and where
      // the range of tops is sensibly near the xheight.
      if (min_bottom <= bottom + x_ht_acceptance_tolerance &&
          bottom - x_ht_acceptance_tolerance <= max_bottom &&
          min_top > kBlnBaselineOffset &&
          max_top - kBlnBaselineOffset >= kBlnXHeight &&
          misfit_dist > 0) {
        // Compute the x-height position using proportionality between the
        // actual height and expected height.
        int min_xht = DivRounded(height * kBlnXHeight,
                                 max_top - kBlnBaselineOffset);
        int max_xht = DivRounded(height * kBlnXHeight,
                                 min_top - kBlnBaselineOffset);
        if (debug_x_ht_level >= 20) {
          tprintf(" xht range min=%d, max=%d\n",
                  min_xht, max_xht);
        }
        // The range of expected heights gets a vote equal to the distance
        // of the actual top from the expected top.
        for (int y = min_xht; y <= max_xht; ++y)
          top_stats.add(y, misfit_dist);
      } else if (debug_x_ht_level >= 20) {
        tprintf(" already OK\n");
      }
    }
  }
  if (top_stats.get_total() == 0)
    return 0.0f;
  // The new xheight is just the median vote, which is then scaled out
  // of BLN space back to pixel space to get the x-height in pixel space.
  float new_xht = top_stats.median();
  if (debug_x_ht_level >= 20) {
    tprintf("Median xht=%f\n", new_xht);
    tprintf("Mode20:A: New x-height = %f (norm), %f (orig)\n",
            new_xht, new_xht / word_res->denorm.y_scale());
  }
  // The xheight must change by at least x_ht_min_change to be used.
  if (fabs(new_xht - kBlnXHeight) >= x_ht_min_change)
    return new_xht / word_res->denorm.y_scale();
  else
    return 0.0f;
}
示例#5
0
// Returns a new x-height maximally compatible with the result in word_res.
// See comment above for overall algorithm.
float Tesseract::ComputeCompatibleXheight(WERD_RES *word_res,
                                          float* baseline_shift) {
  STATS top_stats(0, MAX_UINT8);
  STATS shift_stats(-MAX_UINT8, MAX_UINT8);
  int bottom_shift = 0;
  int num_blobs = word_res->rebuild_word->NumBlobs();
  do {
    top_stats.clear();
    shift_stats.clear();
    for (int blob_id = 0; blob_id < num_blobs; ++blob_id) {
      TBLOB* blob = word_res->rebuild_word->blobs[blob_id];
      UNICHAR_ID class_id = word_res->best_choice->unichar_id(blob_id);
      if (unicharset.get_isalpha(class_id) ||
          unicharset.get_isdigit(class_id)) {
        int top = blob->bounding_box().top() + bottom_shift;
        // Clip the top to the limit of normalized feature space.
        if (top >= INT_FEAT_RANGE)
          top = INT_FEAT_RANGE - 1;
        int bottom = blob->bounding_box().bottom() + bottom_shift;
        int min_bottom, max_bottom, min_top, max_top;
        unicharset.get_top_bottom(class_id, &min_bottom, &max_bottom,
                                  &min_top, &max_top);
        // Chars with a wild top range would mess up the result so ignore them.
        if (max_top - min_top > kMaxCharTopRange)
          continue;
        int misfit_dist = MAX((min_top - x_ht_acceptance_tolerance) - top,
                            top - (max_top + x_ht_acceptance_tolerance));
        int height = top - kBlnBaselineOffset;
        if (debug_x_ht_level >= 2) {
          tprintf("Class %s: height=%d, bottom=%d,%d top=%d,%d, actual=%d,%d: ",
                  unicharset.id_to_unichar(class_id),
                  height, min_bottom, max_bottom, min_top, max_top,
                  bottom, top);
        }
        // Use only chars that fit in the expected bottom range, and where
        // the range of tops is sensibly near the xheight.
        if (min_bottom <= bottom + x_ht_acceptance_tolerance &&
            bottom - x_ht_acceptance_tolerance <= max_bottom &&
            min_top > kBlnBaselineOffset &&
            max_top - kBlnBaselineOffset >= kBlnXHeight &&
            misfit_dist > 0) {
          // Compute the x-height position using proportionality between the
          // actual height and expected height.
          int min_xht = DivRounded(height * kBlnXHeight,
                                   max_top - kBlnBaselineOffset);
          int max_xht = DivRounded(height * kBlnXHeight,
                                   min_top - kBlnBaselineOffset);
          if (debug_x_ht_level >= 2) {
            tprintf(" xht range min=%d, max=%d\n", min_xht, max_xht);
          }
          // The range of expected heights gets a vote equal to the distance
          // of the actual top from the expected top.
          for (int y = min_xht; y <= max_xht; ++y)
            top_stats.add(y, misfit_dist);
        } else if ((min_bottom > bottom + x_ht_acceptance_tolerance ||
                    bottom - x_ht_acceptance_tolerance > max_bottom) &&
                   bottom_shift == 0) {
          // Get the range of required bottom shift.
          int min_shift = min_bottom - bottom;
          int max_shift = max_bottom - bottom;
          if (debug_x_ht_level >= 2) {
            tprintf(" bottom shift min=%d, max=%d\n", min_shift, max_shift);
          }
          // The range of expected shifts gets a vote equal to the min distance
          // of the actual bottom from the expected bottom, spread over the
          // range of its acceptance.
          int misfit_weight = abs(min_shift);
          if (max_shift > min_shift)
            misfit_weight /= max_shift - min_shift;
          for (int y = min_shift; y <= max_shift; ++y)
            shift_stats.add(y, misfit_weight);
        } else {
          if (bottom_shift == 0) {
            // Things with bottoms that are already ok need to say so, on the
            // 1st iteration only.
            shift_stats.add(0, kBlnBaselineOffset);
          }
          if (debug_x_ht_level >= 2) {
            tprintf(" already OK\n");
          }
        }
      }
    }
    if (shift_stats.get_total() > top_stats.get_total()) {
      bottom_shift = IntCastRounded(shift_stats.median());
      if (debug_x_ht_level >= 2) {
        tprintf("Applying bottom shift=%d\n", bottom_shift);
      }
    }
  } while (bottom_shift != 0 &&
           top_stats.get_total() < shift_stats.get_total());
  // Baseline shift is opposite sign to the bottom shift.
  *baseline_shift = -bottom_shift / word_res->denorm.y_scale();
  if (debug_x_ht_level >= 2) {
    tprintf("baseline shift=%g\n", *baseline_shift);
  }
  if (top_stats.get_total() == 0)
    return bottom_shift != 0 ? word_res->x_height : 0.0f;
  // The new xheight is just the median vote, which is then scaled out
  // of BLN space back to pixel space to get the x-height in pixel space.
  float new_xht = top_stats.median();
  if (debug_x_ht_level >= 2) {
    tprintf("Median xht=%f\n", new_xht);
    tprintf("Mode20:A: New x-height = %f (norm), %f (orig)\n",
            new_xht, new_xht / word_res->denorm.y_scale());
  }
  // The xheight must change by at least x_ht_min_change to be used.
  if (fabs(new_xht - kBlnXHeight) >= x_ht_min_change)
    return new_xht / word_res->denorm.y_scale();
  else
    return bottom_shift != 0 ? word_res->x_height : 0.0f;
}