Ejemplo n.º 1
0
// Computes the DENORMS for bl(baseline) and cn(character) normalization
// during feature extraction. The input denorm describes the current state
// of the blob, which is usually a baseline-normalized word.
// The Transforms setup are as follows:
// Baseline Normalized (bl) Output:
//   We center the grapheme by aligning the x-coordinate of its centroid with
//   x=128 and leaving the already-baseline-normalized y as-is.
//
// Character Normalized (cn) Output:
//   We align the grapheme's centroid at the origin and scale it
//   asymmetrically in x and y so that the 2nd moments are a standard value
//   (51.2) ie the result is vaguely square.
// If classify_nonlinear_norm is true:
//   A non-linear normalization is setup that attempts to evenly distribute
//   edges across x and y.
//
// Some of the fields of fx_info are also setup:
// Length: Total length of outline.
// Rx:     Rounded y second moment. (Reversed by convention.)
// Ry:     rounded x second moment.
// Xmean:  Rounded x center of mass of the blob.
// Ymean:  Rounded y center of mass of the blob.
void Classify::SetupBLCNDenorms(const TBLOB& blob, bool nonlinear_norm,
                                DENORM* bl_denorm, DENORM* cn_denorm,
                                INT_FX_RESULT_STRUCT* fx_info) {
  // Compute 1st and 2nd moments of the original outline.
  FCOORD center, second_moments;
  int length = blob.ComputeMoments(&center, &second_moments);
  if (fx_info != nullptr) {
    fx_info->Length = length;
    fx_info->Rx = IntCastRounded(second_moments.y());
    fx_info->Ry = IntCastRounded(second_moments.x());

    fx_info->Xmean = IntCastRounded(center.x());
    fx_info->Ymean = IntCastRounded(center.y());
  }
  // Setup the denorm for Baseline normalization.
  bl_denorm->SetupNormalization(nullptr, nullptr, &blob.denorm(), center.x(), 128.0f,
                                1.0f, 1.0f, 128.0f, 128.0f);
  // Setup the denorm for character normalization.
  if (nonlinear_norm) {
    GenericVector<GenericVector<int> > x_coords;
    GenericVector<GenericVector<int> > y_coords;
    TBOX box;
    blob.GetPreciseBoundingBox(&box);
    box.pad(1, 1);
    blob.GetEdgeCoords(box, &x_coords, &y_coords);
    cn_denorm->SetupNonLinear(&blob.denorm(), box, UINT8_MAX, UINT8_MAX,
                              0.0f, 0.0f, x_coords, y_coords);
  } else {
    cn_denorm->SetupNormalization(nullptr, nullptr, &blob.denorm(),
                                  center.x(), center.y(),
                                  51.2f / second_moments.x(),
                                  51.2f / second_moments.y(),
                                  128.0f, 128.0f);
  }
}
Ejemplo n.º 2
0
// Helper gets the image of a rectangle, using the block.re_rotation() if
// needed to get to the image, and rotating the result back to horizontal
// layout. (CJK characters will be on their left sides) The vertical text flag
// is set in the returned ImageData if the text was originally vertical, which
// can be used to invoke a different CJK recognition engine. The revised_box
// is also returned to enable calculation of output bounding boxes.
ImageData* Tesseract::GetRectImage(const TBOX& box, const BLOCK& block,
                                   int padding, TBOX* revised_box) const {
  TBOX wbox = box;
  wbox.pad(padding, padding);
  *revised_box = wbox;
  // Number of clockwise 90 degree rotations needed to get back to tesseract
  // coords from the clipped image.
  int num_rotations = 0;
  if (block.re_rotation().y() > 0.0f)
    num_rotations = 1;
  else if (block.re_rotation().x() < 0.0f)
    num_rotations = 2;
  else if (block.re_rotation().y() < 0.0f)
    num_rotations = 3;
  // Handle two cases automatically: 1 the box came from the block, 2 the box
  // came from a box file, and refers to the image, which the block may not.
  if (block.bounding_box().major_overlap(*revised_box))
    revised_box->rotate(block.re_rotation());
  // Now revised_box always refers to the image.
  // BestPix is never colormapped, but may be of any depth.
  Pix* pix = BestPix();
  int width = pixGetWidth(pix);
  int height = pixGetHeight(pix);
  TBOX image_box(0, 0, width, height);
  // Clip to image bounds;
  *revised_box &= image_box;
  if (revised_box->null_box()) return NULL;
  Box* clip_box = boxCreate(revised_box->left(), height - revised_box->top(),
                            revised_box->width(), revised_box->height());
  Pix* box_pix = pixClipRectangle(pix, clip_box, NULL);
  if (box_pix == NULL) return NULL;
  boxDestroy(&clip_box);
  if (num_rotations > 0) {
    Pix* rot_pix = pixRotateOrth(box_pix, num_rotations);
    pixDestroy(&box_pix);
    box_pix = rot_pix;
  }
  // Convert sub-8-bit images to 8 bit.
  int depth = pixGetDepth(box_pix);
  if (depth < 8) {
    Pix* grey;
    grey = pixConvertTo8(box_pix, false);
    pixDestroy(&box_pix);
    box_pix = grey;
  }
  bool vertical_text = false;
  if (num_rotations > 0) {
    // Rotated the clipped revised box back to internal coordinates.
    FCOORD rotation(block.re_rotation().x(), -block.re_rotation().y());
    revised_box->rotate(rotation);
    if (num_rotations != 2)
      vertical_text = true;
  }
  return new ImageData(vertical_text, box_pix);
}