Esempio n. 1
0
// Fills in the x-height range accepted by the given unichar_id, given its
// bounding box in the usual baseline-normalized coordinates, with some
// initial crude x-height estimate (such as word size) and this denoting the
// transformation that was used. Returns false, and an empty range if the
// bottom is a mis-fit. Returns true and empty [0, 0] range if the bottom
// fits, but the top is impossible.
bool DENORM::XHeightRange(int unichar_id, const UNICHARSET& unicharset,
                          const TBOX& bbox,
                          inT16* min_xht, inT16* max_xht) const {
  // Clip the top and bottom to the limit of normalized feature space.
  int top = ClipToRange<int>(bbox.top(), 0, kBlnCellHeight - 1);
  int bottom = ClipToRange<int>(bbox.bottom(), 0, kBlnCellHeight - 1);
  // A tolerance of yscale corresponds to 1 pixel in the image.
  double tolerance = y_scale();
  int min_bottom, max_bottom, min_top, max_top;
  unicharset.get_top_bottom(unichar_id, &min_bottom, &max_bottom,
                            &min_top, &max_top);
  // Default returns indicate a mis-fit.
  *min_xht = 0;
  *max_xht = 0;
  // Chars with a misfitting bottom might be sub/superscript/dropcap, or might
  // just be wrongly classified. Return an empty range so they have to be
  // good to be considered.
  if (bottom < min_bottom - tolerance || bottom > max_bottom + tolerance) {
    return false;
  }
  // To help very high cap/xheight ratio fonts accept the correct x-height,
  // and to allow the large caps in small caps to accept the xheight of the
  // small caps, add kBlnBaselineOffset to chars with a maximum max.
  if (max_top == kBlnCellHeight - 1)
    max_top += kBlnBaselineOffset;
  int height = top - kBlnBaselineOffset;
  double min_height = min_top - kBlnBaselineOffset - tolerance;
  double max_height = max_top - kBlnBaselineOffset + tolerance;
  if (min_height <= 0.0) {
    if (height <= 0 || max_height > 0)
      *max_xht = MAX_INT16;  // Anything will do.
  } else if (height > 0) {
    int result = IntCastRounded(height * kBlnXHeight / y_scale() / min_height);
    *max_xht = static_cast<inT16>(ClipToRange(result, 0, MAX_INT16));
  }
  if (max_height > 0.0 && height > 0) {
    int result = IntCastRounded(height * kBlnXHeight / y_scale() / max_height);
    *min_xht = static_cast<inT16>(ClipToRange(result, 0, MAX_INT16));
  }
  return true;
}
Esempio n. 2
0
/* static */
ScriptPos WERD_CHOICE::ScriptPositionOf(bool print_debug,
                                        const UNICHARSET& unicharset,
                                        const TBOX& blob_box,
                                        UNICHAR_ID unichar_id) {
  ScriptPos retval = tesseract::SP_NORMAL;
  int top = blob_box.top();
  int bottom = blob_box.bottom();
  int min_bottom, max_bottom, min_top, max_top;
  unicharset.get_top_bottom(unichar_id,
                            &min_bottom, &max_bottom,
                            &min_top, &max_top);

  int sub_thresh_top = min_top - kMinSubscriptOffset;
  int sub_thresh_bot = kBlnBaselineOffset - kMinSubscriptOffset;
  int sup_thresh_bot = max_bottom + kMinSuperscriptOffset;
  if (bottom <= kMaxDropCapBottom) {
    retval = tesseract::SP_DROPCAP;
  } else if (top < sub_thresh_top && bottom < sub_thresh_bot) {
    retval = tesseract::SP_SUBSCRIPT;
  } else if (bottom > sup_thresh_bot) {
    retval = tesseract::SP_SUPERSCRIPT;
  }

  if (print_debug) {
    const char *pos = ScriptPosToString(retval);
    tprintf("%s Character %s[bot:%d top: %d]  "
            "bot_range[%d,%d]  top_range[%d, %d] "
            "sub_thresh[bot:%d top:%d]  sup_thresh_bot %d\n",
            pos, unicharset.id_to_unichar(unichar_id),
            bottom, top,
            min_bottom, max_bottom, min_top, max_top,
            sub_thresh_bot, sub_thresh_top,
            sup_thresh_bot);
  }
  return retval;
}
Esempio n. 3
0
// Fills in the x-height range accepted by the given unichar_id, given its
// bounding box in the usual baseline-normalized coordinates, with some
// initial crude x-height estimate (such as word size) and this denoting the
// transformation that was used.
void DENORM::XHeightRange(int unichar_id, const UNICHARSET& unicharset,
                          const TBOX& bbox,
                          float* min_xht, float* max_xht, float* yshift) const {
  // Default return -- accept anything.
  *yshift = 0.0f;
  *min_xht = 0.0f;
  *max_xht = MAX_FLOAT32;

  if (!unicharset.top_bottom_useful())
    return;

  // Clip the top and bottom to the limit of normalized feature space.
  int top = ClipToRange<int>(bbox.top(), 0, kBlnCellHeight - 1);
  int bottom = ClipToRange<int>(bbox.bottom(), 0, kBlnCellHeight - 1);
  // A tolerance of yscale corresponds to 1 pixel in the image.
  double tolerance = y_scale();
  // If the script doesn't have upper and lower-case characters, widen the
  // tolerance to allow sloppy baseline/x-height estimates.
  if (!unicharset.script_has_upper_lower())
    tolerance = y_scale() * kSloppyTolerance;

  int min_bottom, max_bottom, min_top, max_top;
  unicharset.get_top_bottom(unichar_id, &min_bottom, &max_bottom,
                            &min_top, &max_top);

  // Calculate the scale factor we'll use to get to image y-pixels
  double midx = (bbox.left() + bbox.right()) / 2.0;
  double ydiff = (bbox.top() - bbox.bottom()) + 2.0;
  FCOORD mid_bot(midx, bbox.bottom()), tmid_bot;
  FCOORD mid_high(midx, bbox.bottom() + ydiff), tmid_high;
  DenormTransform(NULL, mid_bot, &tmid_bot);
  DenormTransform(NULL, mid_high, &tmid_high);

  // bln_y_measure * yscale = image_y_measure
  double yscale = tmid_high.pt_to_pt_dist(tmid_bot) / ydiff;

  // Calculate y-shift
  int bln_yshift = 0, bottom_shift = 0, top_shift = 0;
  if (bottom < min_bottom - tolerance) {
    bottom_shift = bottom - min_bottom;
  } else if (bottom > max_bottom + tolerance) {
    bottom_shift = bottom - max_bottom;
  }
  if (top < min_top - tolerance) {
    top_shift = top - min_top;
  } else if (top > max_top + tolerance) {
    top_shift = top - max_top;
  }
  if ((top_shift >= 0 && bottom_shift > 0) ||
      (top_shift < 0 && bottom_shift < 0)) {
    bln_yshift = (top_shift + bottom_shift) / 2;
  }
  *yshift = bln_yshift * yscale;

  // To help very high cap/xheight ratio fonts accept the correct x-height,
  // and to allow the large caps in small caps to accept the xheight of the
  // small caps, add kBlnBaselineOffset to chars with a maximum max, and have
  // a top already at a significantly high position.
  if (max_top == kBlnCellHeight - 1 &&
      top > kBlnCellHeight - kBlnBaselineOffset / 2)
    max_top += kBlnBaselineOffset;
  top -= bln_yshift;
  int height = top - kBlnBaselineOffset;
  double min_height = min_top - kBlnBaselineOffset - tolerance;
  double max_height = max_top - kBlnBaselineOffset + tolerance;

  // We shouldn't try calculations if the characters are very short (for example
  // for punctuation).
  if (min_height > kBlnXHeight / 8 && height > 0) {
    float result = height * kBlnXHeight * yscale / min_height;
    *max_xht = result + kFinalPixelTolerance;
    result = height * kBlnXHeight * yscale / max_height;
    *min_xht = result - kFinalPixelTolerance;
  }
}