Exemple #1
0
/*!
 *  boxaIntersectsBox()
 *
 *      Input:  boxas
 *              box (for intersecting)
 *      Return  boxad (boxa with all boxes in boxas that intersect box),
 *                     or null on error
 *
 *  Notes:
 *      (1) All boxes in boxa that intersect with box (i.e., are completely
 *          or partially contained in box) are retained.
 */
BOXA *
boxaIntersectsBox(BOXA  *boxas,
                  BOX   *box)
{
l_int32  i, n, val;
BOX     *boxt;
BOXA    *boxad;

    PROCNAME("boxaIntersectsBox");

    if (!boxas)
        return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL);
    if (!box)
        return (BOXA *)ERROR_PTR("box not defined", procName, NULL);
    if ((n = boxaGetCount(boxas)) == 0)
        return boxaCreate(1);  /* empty */

    boxad = boxaCreate(0);
    for (i = 0; i < n; i++) {
        boxt = boxaGetBox(boxas, i, L_CLONE);
        boxIntersects(box, boxt, &val);
        if (val == 1)
            boxaAddBox(boxad, boxt, L_COPY);
        boxDestroy(&boxt);  /* destroy the clone */
    }

    return boxad;
}
Exemple #2
0
int hookIntersects()
{
	for (int i = 0; i < NUM_OF_BOXES; i++)
	{
		if (boxIntersects(boxes[i], hook.x+4, hook.y+12, hook.z))
			return (i);
	}
	return (-1);
}
Exemple #3
0
ColPartition* M_Utils::getTBoxColPart(ColPartitionGrid* cpgrid,
    TBOX t, PIX* img) {
  ColPartitionGridSearch colsearch(cpgrid);
  colsearch.StartFullSearch();
   ColPartition* curpart = NULL;
   while ((curpart = colsearch.NextFullSearch()) != NULL) {
     BOX* partbox;
     if(curpart->boxes_count() > 0)
       partbox = getColPartImCoords(curpart, img);
     else {
       TBOX b = curpart->bounding_box();
       partbox = tessTBoxToImBox(&b, img);
     }
     TBOX rtbox = t;
     BOX* box = tessTBoxToImBox(&rtbox, img);
     int intersects;
     boxIntersects(partbox, box, &intersects);
     if(intersects)
       return curpart;
     boxDestroy(&partbox);
     boxDestroy(&box);
   }
   return NULL;
}
Exemple #4
0
/*!
 *  pixFindPageForeground()
 *
 *      Input:  pixs (full resolution (any type or depth)
 *              threshold (for binarization; typically about 128)
 *              mindist (min distance of text from border to allow
 *                       cleaning near border; at 2x reduction, this
 *                       should be larger than 50; typically about 70)
 *              erasedist (when conditions are satisfied, erase anything
 *                         within this distance of the edge;
 *                         typically 30 at 2x reduction)
 *              pagenum (use for debugging when called repeatedly; labels
 *                       debug images that are assembled into pdfdir)
 *              showmorph (set to a negative integer to show steps in
 *                         generating masks; this is typically used
 *                         for debugging region extraction)
 *              display (set to 1  to display mask and selected region
 *                       for debugging a single page)
 *              pdfdir (subdirectory of /tmp where images showing the
 *                      result are placed when called repeatedly; use
 *                      null if no output requested)
 *      Return: box (region including foreground, with some pixel noise
 *                   removed), or null if not found
 *
 *  Notes:
 *      (1) This doesn't simply crop to the fg.  It attempts to remove
 *          pixel noise and junk at the edge of the image before cropping.
 *          The input @threshold is used if pixs is not 1 bpp.
 *      (2) There are several debugging options, determined by the
 *          last 4 arguments.
 *      (3) If you want pdf output of results when called repeatedly,
 *          the pagenum arg labels the images written, which go into
 *          /tmp/<pdfdir>/<pagenum>.png.  In that case,
 *          you would clean out the /tmp directory before calling this
 *          function on each page:
 *              lept_rmdir(pdfdir);
 *              lept_mkdir(pdfdir);
 */
BOX *
pixFindPageForeground(PIX         *pixs,
                      l_int32      threshold,
                      l_int32      mindist,
                      l_int32      erasedist,
                      l_int32      pagenum,
                      l_int32      showmorph,
                      l_int32      display,
                      const char  *pdfdir)
{
char     buf[64];
l_int32  flag, nbox, intersects;
l_int32  w, h, bx, by, bw, bh, left, right, top, bottom;
PIX     *pixb, *pixb2, *pixseed, *pixsf, *pixm, *pix1, *pixg2;
BOX     *box, *boxfg, *boxin, *boxd;
BOXA    *ba1, *ba2;

    PROCNAME("pixFindPageForeground");

    if (!pixs)
        return (BOX *)ERROR_PTR("pixs not defined", procName, NULL);

        /* Binarize, downscale by 0.5, remove the noise to generate a seed,
         * and do a seedfill back from the seed into those 8-connected
         * components of the binarized image for which there was at least
         * one seed pixel.  Also clear out any components that are within
         * 10 pixels of the edge at 2x reduction. */
    flag = (showmorph) ? -1 : 0;  /* if showmorph == -1, write intermediate
                                   * images to /tmp/seq_output_1.pdf */
    pixb = pixConvertTo1(pixs, threshold);
    pixb2 = pixScale(pixb, 0.5, 0.5);
    pixseed = pixMorphSequence(pixb2, "o1.2 + c9.9 + o3.5", flag);
    pixsf = pixSeedfillBinary(NULL, pixseed, pixb2, 8);
    pixSetOrClearBorder(pixsf, 10, 10, 10, 10, PIX_SET);
    pixm = pixRemoveBorderConnComps(pixsf, 8);
    if (display) pixDisplay(pixm, 100, 100);

        /* Now, where is the main block of text?  We want to remove noise near
         * the edge of the image, but to do that, we have to be convinced that
         * (1) there is noise and (2) it is far enough from the text block
         * and close enough to the edge.  For each edge, if the block
         * is more than mindist from that edge, then clean 'erasedist'
         * pixels from the edge. */
    pix1 = pixMorphSequence(pixm, "c50.50", flag - 1);
    ba1 = pixConnComp(pix1, NULL, 8);
    ba2 = boxaSort(ba1, L_SORT_BY_AREA, L_SORT_DECREASING, NULL);
    pixGetDimensions(pix1, &w, &h, NULL);
    nbox = boxaGetCount(ba2);
    if (nbox > 1) {
        box = boxaGetBox(ba2, 0, L_CLONE);
        boxGetGeometry(box, &bx, &by, &bw, &bh);
        left = (bx > mindist) ? erasedist : 0;
        right = (w - bx - bw > mindist) ? erasedist : 0;
        top = (by > mindist) ? erasedist : 0;
        bottom = (h - by - bh > mindist) ? erasedist : 0;
        pixSetOrClearBorder(pixm, left, right, top, bottom, PIX_CLR);
        boxDestroy(&box);
    }
    pixDestroy(&pix1);
    boxaDestroy(&ba1);
    boxaDestroy(&ba2);

        /* Locate the foreground region; don't bother cropping */
    pixClipToForeground(pixm, NULL, &boxfg);

        /* Sanity check the fg region.  Make sure it's not confined
         * to a thin boundary on the left and right sides of the image,
         * in which case it is likely to be noise. */
    if (boxfg) {
        boxin = boxCreate(0.1 * w, 0, 0.8 * w, h);
        boxIntersects(boxfg, boxin, &intersects);
        if (!intersects) {
            L_INFO("found only noise on page %d\n", procName, pagenum);
            boxDestroy(&boxfg);
        }
        boxDestroy(&boxin);
    }

    boxd = NULL;
    if (!boxfg) {
        L_INFO("no fg region found for page %d\n", procName, pagenum);
    } else {
        boxAdjustSides(boxfg, boxfg, -2, 2, -2, 2);  /* tiny expansion */
        boxd = boxTransform(boxfg, 0, 0, 2.0, 2.0);

            /* Write image showing box for this page.  This is to be
             * bundled up into a pdf of all the pages, which can be
             * generated by convertFilesToPdf()  */
        if (pdfdir) {
            pixg2 = pixConvert1To4Cmap(pixb);
            pixRenderBoxArb(pixg2, boxd, 3, 255, 0, 0);
            snprintf(buf, sizeof(buf), "/tmp/%s/%05d.png", pdfdir, pagenum);
            if (display) pixDisplay(pixg2, 700, 100);
            pixWrite(buf, pixg2, IFF_PNG);
            pixDestroy(&pixg2);
        }
    }

    pixDestroy(&pixb);
    pixDestroy(&pixb2);
    pixDestroy(&pixseed);
    pixDestroy(&pixsf);
    pixDestroy(&pixm);
    boxDestroy(&boxfg);
    return boxd;
}
Exemple #5
0
/*!
 *  boxaCombineOverlaps()
 *
 *      Input:  boxas
 *      Return: boxad (where each set of boxes in boxas that overlap are
 *                     combined into a single bounding box in boxad), or
 *                     null on error.
 *
 *  Notes:
 *      (1) If there are no overlapping boxes, it simply returns a copy
 *          of @boxas.
 *      (2) The alternative method of painting each rectanle and finding
 *          the 4-connected components gives the wrong result, because
 *          two non-overlapping rectangles, when rendered, can still
 *          be 4-connected, and hence they will be joined.
 *      (3) A bad case is to have n boxes, none of which overlap.
 *          Then you have one iteration with O(n^2) compares.  This
 *          is still faster than painting each rectangle and finding
 *          the connected components, even for thousands of rectangles.
 */
BOXA *
boxaCombineOverlaps(BOXA  *boxas)
{
l_int32  i, j, n1, n2, inter, interfound, niters;
BOX     *box1, *box2, *box3;
BOXA    *boxat1, *boxat2;

    PROCNAME("boxaCombineOverlaps");

    if (!boxas)
        return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL);

    boxat1 = boxaCopy(boxas, L_COPY);
    n1 = boxaGetCount(boxat1);
    niters = 0;
/*    fprintf(stderr, "%d iters: %d boxes\n", niters, n1); */
    while (1) {  /* loop until no change from previous iteration */
        niters++;
        boxat2 = boxaCreate(n1);
        for (i = 0; i < n1; i++) {
            box1 = boxaGetBox(boxat1, i, L_COPY);
            if (i == 0) {
                boxaAddBox(boxat2, box1, L_INSERT);
                continue;
            }
            n2 = boxaGetCount(boxat2);
                /* Now test box1 against all boxes already put in boxat2.
                 * If it is found to intersect with an existing box,
                 * replace that box by the union of the two boxes,
                 * and break to the outer loop.  If no overlap is
                 * found, add box1 to boxat2. */
            interfound = FALSE;
            for (j = 0; j < n2; j++) {
                box2 = boxaGetBox(boxat2, j, L_CLONE);
                boxIntersects(box1, box2, &inter);
                if (inter == 1) {
                    box3 = boxBoundingRegion(box1, box2);
                    boxaReplaceBox(boxat2, j, box3);
                    boxDestroy(&box1);
                    boxDestroy(&box2);
                    interfound = TRUE;
                    break;
                }
                boxDestroy(&box2);
            }
            if (interfound == FALSE)
                boxaAddBox(boxat2, box1, L_INSERT);
        }
        n2 = boxaGetCount(boxat2);
/*        fprintf(stderr, "%d iters: %d boxes\n", niters, n2); */
        if (n2 == n1)  /* we're done */
            break;
        else {
            n1 = n2;
            boxaDestroy(&boxat1);
            boxat1 = boxat2;
        }
    }
    boxaDestroy(&boxat1);
    return boxat2;
}