Exemplo n.º 1
0
// Finds horizontal line objects in the given pix.
// Uses the given resolution to determine size thresholds instead of any
// that may be present in the pix.
// The output vectors are owned by the list and Frozen (cannot refit) by
// having no boxes, as there is no need to refit or merge separator lines.
void LineFinder::FindHorizontalLines(int resolution,  Pix* pix,
                                     TabVector_LIST* vectors) {
#ifdef HAVE_LIBLEPT
  Pix* line_pix;
  Boxa* boxes = GetHLineBoxes(resolution, pix, &line_pix);
  C_BLOB_LIST line_cblobs;
  int width = pixGetWidth(pix);
  int height = pixGetHeight(pix);
  ConvertBoxaToBlobs(height, width, &boxes, &line_cblobs);
  // Make the BLOBNBOXes from the C_BLOBs.
  BLOBNBOX_LIST line_bblobs;
  C_BLOB_IT blob_it(&line_cblobs);
  BLOBNBOX_IT bbox_it(&line_bblobs);
  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
    C_BLOB* cblob = blob_it.data();
    BLOBNBOX* bblob = new BLOBNBOX(cblob);
    bbox_it.add_to_end(bblob);
  }
  ICOORD bleft(0, 0);
  ICOORD tright(height, width);
  int vertical_x, vertical_y;
  FindLineVectors(bleft, tright, &line_bblobs, &vertical_x, &vertical_y,
                  vectors);
  if (!vectors->empty()) {
    // Some lines were found, so erase the unused blobs from the line image
    // and then subtract the line image from the source.
    bbox_it.move_to_first();
    for (bbox_it.mark_cycle_pt(); !bbox_it.cycled_list(); bbox_it.forward()) {
      BLOBNBOX* blob = bbox_it.data();
      if (blob->left_tab_type() == TT_UNCONFIRMED) {
        const TBOX& box = blob->bounding_box();
        // Coords are in tess format so filp x and y and then covert
        // to leptonica by height -y.
        Box* pixbox = boxCreate(box.bottom(), height - box.right(),
                                box.height(), box.width());
        pixClearInRect(line_pix, pixbox);
        boxDestroy(&pixbox);
      }
    }
    pixDilateBrick(line_pix, line_pix, 3, 1);
    pixSubtract(pix, pix, line_pix);
    if (textord_tabfind_show_vlines)
      pixWrite("hlinesclean.png", line_pix, IFF_PNG);
    ICOORD vertical;
    vertical.set_with_shrink(vertical_x, vertical_y);
    TabVector::MergeSimilarTabVectors(vertical, vectors, NULL);
    // Iterate the vectors to flip them.
    TabVector_IT h_it(vectors);
    for (h_it.mark_cycle_pt(); !h_it.cycled_list(); h_it.forward()) {
      h_it.data()->XYFlip();
    }
  }
  pixDestroy(&line_pix);
#endif
}
Exemplo n.º 2
0
// Finds vertical line objects in the given pix.
// Uses the given resolution to determine size thresholds instead of any
// that may be present in the pix.
// The output vertical_x and vertical_y contain a sum of the output vectors,
// thereby giving the mean vertical direction.
// The output vectors are owned by the list and Frozen (cannot refit) by
// having no boxes, as there is no need to refit or merge separator lines.
void LineFinder::FindVerticalLines(int resolution,  Pix* pix,
                                   int* vertical_x, int* vertical_y,
                                   TabVector_LIST* vectors) {
#ifdef HAVE_LIBLEPT
  Pix* line_pix;
  Boxa* boxes = GetVLineBoxes(resolution, pix, &line_pix);
  C_BLOB_LIST line_cblobs;
  int width = pixGetWidth(pix);
  int height = pixGetHeight(pix);
  ConvertBoxaToBlobs(width, height, &boxes, &line_cblobs);
  // Make the BLOBNBOXes from the C_BLOBs.
  BLOBNBOX_LIST line_bblobs;
  C_BLOB_IT blob_it(&line_cblobs);
  BLOBNBOX_IT bbox_it(&line_bblobs);
  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
    C_BLOB* cblob = blob_it.data();
    BLOBNBOX* bblob = new BLOBNBOX(cblob);
    bbox_it.add_to_end(bblob);
  }
  ICOORD bleft(0, 0);
  ICOORD tright(width, height);
  FindLineVectors(bleft, tright, &line_bblobs, vertical_x, vertical_y, vectors);
  if (!vectors->empty()) {
    // Some lines were found, so erase the unused blobs from the line image
    // and then subtract the line image from the source.
    bbox_it.move_to_first();
    for (bbox_it.mark_cycle_pt(); !bbox_it.cycled_list(); bbox_it.forward()) {
      BLOBNBOX* blob = bbox_it.data();
      if (blob->left_tab_type() == TT_UNCONFIRMED) {
        const TBOX& box = blob->bounding_box();
        Box* pixbox = boxCreate(box.left(), height - box.top(),
                                box.width(), box.height());
        pixClearInRect(line_pix, pixbox);
        boxDestroy(&pixbox);
      }
    }
    pixDilateBrick(line_pix, line_pix, 1, 3);
    pixSubtract(pix, pix, line_pix);
    if (textord_tabfind_show_vlines)
      pixWrite("vlinesclean.png", line_pix, IFF_PNG);
    ICOORD vertical;
    vertical.set_with_shrink(*vertical_x, *vertical_y);
    TabVector::MergeSimilarTabVectors(vertical, vectors, NULL);
  }
  pixDestroy(&line_pix);
#endif
}
Exemplo n.º 3
0
// Helper erases false-positive line segments from the input/output line_pix.
// 1. Since thick lines shouldn't really break up, we can eliminate some false
//    positives by marking segments that are at least kMinThickLineWidth
//    thickness, yet have a length less than min_thick_length.
// 2. Lines that don't have at least 2 intersections with other lines and have
//    a lot of neighbouring non-lines are probably not lines (perhaps arabic
//    or Hindi words, or underlines.)
// Bad line components are erased from line_pix.
// Returns the number of remaining connected components.
static int FilterFalsePositives(int resolution, Pix* nonline_pix,
                                Pix* intersection_pix, Pix* line_pix) {
  int min_thick_length = static_cast<int>(resolution * kThickLengthMultiple);
  Pixa* pixa = NULL;
  Boxa* boxa = pixConnComp(line_pix, &pixa, 8);
  // Iterate over the boxes to remove false positives.
  int nboxes = boxaGetCount(boxa);
  int remaining_boxes = nboxes;
  for (int i = 0; i < nboxes; ++i) {
    Box* box = boxaGetBox(boxa, i, L_CLONE);
    l_int32 x, y, box_width, box_height;
    boxGetGeometry(box, &x, &y, &box_width, &box_height);
    Pix* comp_pix = pixaGetPix(pixa, i, L_CLONE);
    int max_width = MaxStrokeWidth(comp_pix);
    pixDestroy(&comp_pix);
    bool bad_line = false;
    // If the length is too short to stand-alone as a line, and the box width
    // is thick enough, and the stroke width is thick enough it is bad.
    if (box_width >= kMinThickLineWidth && box_height >= kMinThickLineWidth &&
        box_width < min_thick_length && box_height < min_thick_length &&
        max_width > kMinThickLineWidth) {
      // Too thick for the length.
      bad_line = true;
    }
    if (!bad_line &&
        (intersection_pix == NULL ||
        NumTouchingIntersections(box, intersection_pix) < 2)) {
      // Test non-line density near the line.
      int nonline_count = CountPixelsAdjacentToLine(max_width, box,
                                                    nonline_pix);
      if (nonline_count > box_height * box_width * kMaxNonLineDensity)
        bad_line = true;
    }
    if (bad_line) {
      // Not a good line.
      pixClearInRect(line_pix, box);
      --remaining_boxes;
    }
    boxDestroy(&box);
  }
  pixaDestroy(&pixa);
  boxaDestroy(&boxa);
  return remaining_boxes;
}
// Top-level method to perform splitting based on current settings.
// Returns true if a split was actually performed.
// split_for_pageseg should be true if the splitting is being done prior to
// page segmentation. This mode uses the flag
// pageseg_devanagari_split_strategy to determine the splitting strategy.
bool ShiroRekhaSplitter::Split(bool split_for_pageseg) {
  SplitStrategy split_strategy = split_for_pageseg ? pageseg_split_strategy_ :
      ocr_split_strategy_;
  if (split_strategy == NO_SPLIT) {
    return false;  // Nothing to do.
  }
  ASSERT_HOST(split_strategy == MINIMAL_SPLIT ||
              split_strategy == MAXIMAL_SPLIT);
  ASSERT_HOST(orig_pix_);
  if (devanagari_split_debuglevel > 0) {
    tprintf("Splitting shiro-rekha ...\n");
    tprintf("Split strategy = %s\n",
            split_strategy == MINIMAL_SPLIT ? "Minimal" : "Maximal");
    tprintf("Initial pageseg available = %s\n",
            segmentation_block_list_ ? "yes" : "no");
  }
  // Create a copy of original image to store the splitting output.
  pixDestroy(&splitted_image_);
  splitted_image_ = pixCopy(NULL, orig_pix_);

  // Initialize debug image if required.
  if (devanagari_split_debugimage) {
    pixDestroy(&debug_image_);
    debug_image_ = pixConvertTo32(orig_pix_);
  }

  // Determine all connected components in the input image. A close operation
  // may be required prior to this, depending on the current settings.
  Pix* pix_for_ccs = pixClone(orig_pix_);
  if (perform_close_ && global_xheight_ != kUnspecifiedXheight &&
      !segmentation_block_list_) {
    if (devanagari_split_debuglevel > 0) {
      tprintf("Performing a global close operation..\n");
    }
    // A global measure is available for xheight, but no local information
    // exists.
    pixDestroy(&pix_for_ccs);
    pix_for_ccs = pixCopy(NULL, orig_pix_);
    PerformClose(pix_for_ccs, global_xheight_);
  }
  Pixa* ccs;
  Boxa* tmp_boxa = pixConnComp(pix_for_ccs, &ccs, 8);
  boxaDestroy(&tmp_boxa);
  pixDestroy(&pix_for_ccs);

  // Iterate over all connected components. Get their bounding boxes and clip
  // out the image regions corresponding to these boxes from the original image.
  // Conditionally run splitting on each of them.
  Boxa* regions_to_clear = boxaCreate(0);
  for (int i = 0; i < pixaGetCount(ccs); ++i) {
    Box* box = ccs->boxa->box[i];
    Pix* word_pix = pixClipRectangle(orig_pix_, box, NULL);
    ASSERT_HOST(word_pix);
    int xheight = GetXheightForCC(box);
    if (xheight == kUnspecifiedXheight && segmentation_block_list_ &&
        devanagari_split_debugimage) {
      pixRenderBoxArb(debug_image_, box, 1, 255, 0, 0);
    }
    // If some xheight measure is available, attempt to pre-eliminate small
    // blobs from the shiro-rekha process. This is primarily to save the CCs
    // corresponding to punctuation marks/small dots etc which are part of
    // larger graphemes.
    if (xheight == kUnspecifiedXheight ||
        (box->w > xheight / 3 && box->h > xheight / 2)) {
      SplitWordShiroRekha(split_strategy, word_pix, xheight,
                          box->x, box->y, regions_to_clear);
    } else if (devanagari_split_debuglevel > 0) {
      tprintf("CC dropped from splitting: %d,%d (%d, %d)\n",
              box->x, box->y, box->w, box->h);
    }
    pixDestroy(&word_pix);
  }
  // Actually clear the boxes now.
  for (int i = 0; i < boxaGetCount(regions_to_clear); ++i) {
    Box* box = boxaGetBox(regions_to_clear, i, L_CLONE);
    pixClearInRect(splitted_image_, box);
    boxDestroy(&box);
  }
  boxaDestroy(&regions_to_clear);
  pixaDestroy(&ccs);
  if (devanagari_split_debugimage) {
    DumpDebugImage(split_for_pageseg ? "pageseg_split_debug.png" :
                   "ocr_split_debug.png");
  }
  return true;
}
// Returns a list of regions (boxes) which should be cleared in the original
// image so as to perform shiro-rekha splitting. Pix is assumed to carry one
// (or less) word only. Xheight measure could be the global estimate, the row
// estimate, or unspecified. If unspecified, over splitting may occur, since a
// conservative estimate of stroke width along with an associated multiplier
// is used in its place. It is advisable to have a specified xheight when
// splitting for classification/training.
// A vertical projection histogram of all the on-pixels in the input pix is
// computed. The maxima of this histogram is regarded as an approximate location
// of the shiro-rekha. By descending on the maxima's peak on both sides,
// stroke width of shiro-rekha is estimated.
// A horizontal projection histogram is computed for a sub-image of the input
// image, which extends from just below the shiro-rekha down to a certain
// leeway. The leeway depends on the input xheight, if provided, else a
// conservative multiplier on approximate stroke width is used (which may lead
// to over-splitting).
void ShiroRekhaSplitter::SplitWordShiroRekha(SplitStrategy split_strategy,
                                             Pix* pix,
                                             int xheight,
                                             int word_left,
                                             int word_top,
                                             Boxa* regions_to_clear) {
  if (split_strategy == NO_SPLIT) {
    return;
  }
  int width = pixGetWidth(pix);
  int height = pixGetHeight(pix);
  // Statistically determine the yextents of the shiro-rekha.
  int shirorekha_top, shirorekha_bottom, shirorekha_ylevel;
  GetShiroRekhaYExtents(pix, &shirorekha_top, &shirorekha_bottom,
                        &shirorekha_ylevel);
  // Since the shiro rekha is also a stroke, its width is equal to the stroke
  // width.
  int stroke_width = shirorekha_bottom - shirorekha_top + 1;

  // Some safeguards to protect CCs we do not want to be split.
  // These are particularly useful when the word wasn't eliminated earlier
  // because xheight information was unavailable.
  if (shirorekha_ylevel > height / 2) {
    // Shirorekha shouldn't be in the bottom half of the word.
    if (devanagari_split_debuglevel > 0) {
      tprintf("Skipping splitting CC at (%d, %d): shirorekha in lower half..\n",
              word_left, word_top);
    }
    return;
  }
  if (stroke_width > height / 3) {
    // Even the boldest of fonts shouldn't do this.
    if (devanagari_split_debuglevel > 0) {
      tprintf("Skipping splitting CC at (%d, %d): stroke width too huge..\n",
              word_left, word_top);
    }
    return;
  }

  // Clear the ascender and descender regions of the word.
  // Obtain a vertical projection histogram for the resulting image.
  Box* box_to_clear = boxCreate(0, shirorekha_top - stroke_width / 3,
                                width, 5 * stroke_width / 3);
  Pix* word_in_xheight = pixCopy(NULL, pix);
  pixClearInRect(word_in_xheight, box_to_clear);
  // Also clear any pixels which are below shirorekha_bottom + some leeway.
  // The leeway is set to xheight if the information is available, else it is a
  // multiplier applied to the stroke width.
  int leeway_to_keep = stroke_width * 3;
  if (xheight != kUnspecifiedXheight) {
    // This is because the xheight-region typically includes the shiro-rekha
    // inside it, i.e., the top of the xheight range corresponds to the top of
    // shiro-rekha.
    leeway_to_keep = xheight - stroke_width;
  }
  box_to_clear->y = shirorekha_bottom + leeway_to_keep;
  box_to_clear->h = height - box_to_clear->y;
  pixClearInRect(word_in_xheight, box_to_clear);
  boxDestroy(&box_to_clear);

  PixelHistogram vert_hist;
  vert_hist.ConstructVerticalCountHist(word_in_xheight);
  pixDestroy(&word_in_xheight);

  // If the number of black pixel in any column of the image is less than a
  // fraction of the stroke width, treat it as noise / a stray mark. Perform
  // these changes inside the vert_hist data itself, as that is used later on as
  // a bit vector for the final split decision at every column.
  for (int i = 0; i < width; ++i) {
    if (vert_hist.hist()[i] <= stroke_width / 4)
      vert_hist.hist()[i] = 0;
    else
      vert_hist.hist()[i] = 1;
  }
  // In order to split the line at any point, we make sure that the width of the
  // gap is atleast half the stroke width.
  int i = 0;
  int cur_component_width = 0;
  while (i < width) {
    if (!vert_hist.hist()[i]) {
      int j = 0;
      while (i + j < width && !vert_hist.hist()[i+j])
        ++j;
      if (j >= stroke_width / 2 && cur_component_width >= stroke_width / 2) {
        // Perform a shiro-rekha split. The intervening region lies from i to
        // i+j-1.
        // A minimal single-pixel split makes the estimation of intra- and
        // inter-word spacing easier during page layout analysis,
        // whereas a maximal split may be needed for OCR, depending on
        // how the engine was trained.
        bool minimal_split = (split_strategy == MINIMAL_SPLIT);
        int split_width = minimal_split ? 1 : j;
        int split_left = minimal_split ? i + (j / 2) - (split_width / 2) : i;
        if (!minimal_split || (i != 0 && i + j != width)) {
          Box* box_to_clear =
              boxCreate(word_left + split_left,
                        word_top + shirorekha_top - stroke_width / 3,
                        split_width,
                        5 * stroke_width / 3);
          if (box_to_clear) {
            boxaAddBox(regions_to_clear, box_to_clear, L_CLONE);
            // Mark this in the debug image if needed.
            if (devanagari_split_debugimage) {
              pixRenderBoxArb(debug_image_, box_to_clear, 1, 128, 255, 128);
            }
            boxDestroy(&box_to_clear);
            cur_component_width = 0;
          }
        }
      }
      i += j;
    } else {
      ++i;
      ++cur_component_width;
    }
  }
}
Exemplo n.º 6
0
main(int    argc,
     char **argv)
{
l_int32       i, j, w, h;
l_int32       minsum[5] =    { 2, 40, 50, 50, 70};
l_int32       skipdist[5] =  { 5,  5, 10, 10, 30};
l_int32       delta[5] =     { 2, 10, 10, 25, 40};
l_int32       maxbg[5] =     {10, 15, 10, 20, 40};
BOX          *box1, *box2, *box3, *box4;
BOXA         *boxa;
PIX          *pixs, *pixc, *pixt, *pixd, *pix32;
PIXA         *pixas, *pixad;
L_REGPARAMS  *rp;

    if (regTestSetup(argc, argv, &rp))
        return 1;

        /* Generate and save 1 bpp masks */
    pixas = pixaCreate(0);
    pixs = pixCreate(300, 250, 1);
    pixSetAll(pixs);
    box1 = boxCreate(50, 0, 140, 25);
    box2 = boxCreate(120, 100, 100, 25);
    box3 = boxCreate(75, 170, 80, 20);
    box4 = boxCreate(150, 80, 25, 70);

    pixClearInRect(pixs, box1);
    pixaAddPix(pixas, pixs, L_COPY);
    pixt = pixRotateOrth(pixs, 1);
    pixaAddPix(pixas, pixt, L_INSERT);

    pixClearInRect(pixs, box2);
    pixaAddPix(pixas, pixs, L_COPY);
    pixt = pixRotateOrth(pixs, 1);
    pixaAddPix(pixas, pixt, L_INSERT);

    pixClearInRect(pixs, box3);
    pixaAddPix(pixas, pixs, L_COPY);
    pixt = pixRotateOrth(pixs, 1);
    pixaAddPix(pixas, pixt, L_INSERT);

    pixClearInRect(pixs, box4);
    pixaAddPix(pixas, pixs, L_COPY);
    pixt = pixRotateOrth(pixs, 1);
    pixaAddPix(pixas, pixt, L_INSERT);

    boxDestroy(&box1);
    boxDestroy(&box2);
    boxDestroy(&box3);
    boxDestroy(&box4);
    pixDestroy(&pixs);

        /* Do 5 splittings on each of the 8 masks */
    pixad = pixaCreate(0);
    for (j = 0; j < 8; j++) {
        pixt = pixaGetPix(pixas, j, L_CLONE);
        pixGetDimensions(pixt, &w, &h, NULL);
        pix32 = pixCreate(w, h, 32);
        pixSetAll(pix32);
        pixPaintThroughMask(pix32, pixt, 0, 0, 0xc0c0c000);
        pixSaveTiled(pix32, pixad, 1, 1, 30, 32);
        for (i = 0; i < 5; i++) {
            pixc = pixCopy(NULL, pix32);
            boxa = pixSplitComponentIntoBoxa(pixt, NULL, minsum[i], skipdist[i],
                                             delta[i], maxbg[i], 0, 1);
/*            boxaWriteStream(stderr, boxa); */
            pixd = pixBlendBoxaRandom(pixc, boxa, 0.4);
            pixRenderBoxaArb(pixd, boxa, 2, 255, 0, 0);
            pixSaveTiled(pixd, pixad, 1, 0, 30, 32);
            pixDestroy(&pixd);
            pixDestroy(&pixc);
            boxaDestroy(&boxa);
        }
        pixDestroy(&pixt);
        pixDestroy(&pix32);
    }

        /* Display results */
    pixd = pixaDisplay(pixad, 0, 0);
    regTestWritePixAndCheck(rp, pixd, IFF_PNG);  /* 0 */
    pixDisplayWithTitle(pixd, 100, 100, NULL, rp->display);
    pixDestroy(&pixd);
    pixaDestroy(&pixad);

        /* Put the 8 masks all together, and split 5 ways */
    pixad = pixaCreate(0);
    pixs = pixaDisplayOnLattice(pixas, 325, 325);
    pixGetDimensions(pixs, &w, &h, NULL);
    pix32 = pixCreate(w, h, 32);
    pixSetAll(pix32);
    pixPaintThroughMask(pix32, pixs, 0, 0, 0xc0c0c000);
    pixSaveTiled(pix32, pixad, 1, 1, 30, 32);
    for (i = 0; i < 5; i++) {
        pixc = pixCopy(NULL, pix32);
        boxa = pixSplitIntoBoxa(pixs, minsum[i], skipdist[i],
                                delta[i], maxbg[i], 0, 1);
/*        boxaWriteStream(stderr, boxa); */
        pixd = pixBlendBoxaRandom(pixc, boxa, 0.4);
        pixRenderBoxaArb(pixd, boxa, 2, 255, 0, 0);
        pixSaveTiled(pixd, pixad, 1, 0, 30, 32);
        pixDestroy(&pixd);
        pixDestroy(&pixc);
        boxaDestroy(&boxa);
    }
    pixDestroy(&pix32);
    pixDestroy(&pixs);

        /* Display results */
    pixd = pixaDisplay(pixad, 0, 0);
    regTestWritePixAndCheck(rp, pixd, IFF_PNG);  /* 1 */
    pixDisplayWithTitle(pixd, 600, 100, NULL, rp->display);
    pixDestroy(&pixd);
    pixaDestroy(&pixad);

    pixaDestroy(&pixas);
    regTestCleanup(rp);
    return 0;
}